-- Efficiently obtain the number of digits in the base-b representation -- of a number. In other words, compute ceil(log_b n), -- or one plus the integer logarithm of a number base b. -- The method below avoids any divisions, the most time-consuming operation. module BaseWidth where import GHC.Integer (quotInteger, timesInteger) -- timesInteger is (*) on Integers and quotInteger is essentially -- div on Integers. However, quotInteger seems to be about 50% faster -- than div, for some reason. -- width of a number in base b -- Repeated divisions; possibly a memory leak in the addition basewidth0 :: Integer -> Integer -> Int basewidth0 b n | n < b = 1 basewidth0 b n = 1 + basewidth0 b (n `quotInteger` b) -- Repeated divisions -- basewidth 2 (2 ^ expm48 - 1) doesn't finish -- within an hour, although it runs in constant space basewidth1 :: Integer -> Integer -> Int basewidth1 b n = loop 1 n where loop acc n | n < b = acc loop acc n = (loop \$! succ acc) (n `quotInteger` b) -- A better algorithm data BaseP = BaseP !Integer -- b^k !Int -- k deriving Show mul_bp :: BaseP -> BaseP -> BaseP mul_bp (BaseP bk1 k1) (BaseP bk2 k2) = BaseP (bk1 `timesInteger` bk2) (k1+k2) basewidth2 :: Integer -> Integer -> Int basewidth2 _ 0 = 1 basewidth2 b n = check_lwb (BaseP 1 0) where b1 = BaseP b 1 -- check to see that the lower-bound is precise -- (the first argument is assured to be a lower-bound: n >= bk) check_lwb :: BaseP -> Int check_lwb lwbp = let lp1@(BaseP bk1 k1) = lwbp `mul_bp` b1 in if bk1 > n then k1 else improve_lwb lp1 b1 -- improve the lower bound improve_lwb lwbp bpi@(BaseP _ i) = let lp1@(BaseP bk1 k1) = lwbp `mul_bp` bpi in if bk1 > n then if i == 1 then k1 -- tight bound else check_lwb lwbp else improve_lwb lp1 (bpi `mul_bp` bpi) {- *BaseWidth> basewidth2 2 (2^10000000 -1 ) 10000000 (17.42 secs, 275537312 bytes) -} -- Even better algorithm: obtaining the binary digits of ceil(log_b n) -- one bit at a time -- The algorithm takes 2 * log2 (ceil(log_b n)) multiplications -- To see that, observe that the list 'bases' below has the maximal -- length 2 * log2 (ceil(log_b n)). Each iteration of major_bit -- adds an element to the list and each iteration of other_bits removes -- an element. -- This algorithm only uses multiplications and comparisons basewidth :: Integer -> Integer -> Int basewidth b _ | b <= 1 = error "basewidth: base must be greater than 1" basewidth b n | n < b = 1 basewidth b n | n == b = 2 basewidth b n = major_bit [BaseP b 1] where -- major_bit bases -- where bases is [BaseP b^k k | j <- [d,d-1..0], k=2^j] for some d -- and n > b^(2^d) -- Goal: find such d>0 so that b^(2^(d-1)) < n <= b^(2^d) major_bit :: [BaseP] -> Int major_bit bases@(bp:bps) = let bpnext@(BaseP bk2 k2) = bp `mul_bp` bp in case compare bk2 n of EQ -> k2 + 1 -- n == b^(2k) GT -> other_bits bp bps -- b^(2k) > n LT -> major_bit (bpnext : bases) -- b^(2k) < n -- other_bits (BaseP bi i) bases where -- bases = [BaseP b^k k | j <- [d,d-1..0], k=2^j] for some d, k=2^d -- and b^i < n < b^(i+2k) other_bits (BaseP _ i) [] = i+1 -- b^i < n < b^(i+1) other_bits bp (bphalf:bps) = let bpup@(BaseP bik ik) = bp `mul_bp` bphalf in case compare bik n of EQ -> ik + 1 -- n == b^(i+k) GT -> other_bits bp bps -- b^i < n < b^(i+k) LT -> other_bits bpup bps -- b^(i+k) < n < b^(i+2k) -- This version of the algorithm uses the integer division operation basewidth_div :: Integer -> Integer -> Int basewidth_div b _ | b <= 1 = error "basewidth: base must be greater than 1" basewidth_div b n | n < b = 1 basewidth_div b n | n == b = 2 basewidth_div b n = major_bit [BaseP b 1] where -- major_bit bases -- where bases is [BaseP b^k k | j <- [d,d-1..0], k=2^j] for some d -- and n > b^(2^d) -- Goal: find such d>0 so that b^(2^(d-1)) < n <= b^(2^d) major_bit :: [BaseP] -> Int major_bit bases@(bp@(BaseP bk k):bps) = let bpnext@(BaseP bk2 k2) = bp `mul_bp` bp in case compare bk2 n of EQ -> k2 + 1 -- n == b^(2k) GT -> other_bits k (n `quotInteger` bk) bps -- b^(2k) > n LT -> major_bit (bpnext : bases) -- b^(2k) < n -- other_bits i n bases where -- bases = [BaseP b^k k | j <- [d,d-1..0], k=2^j] for some d, k=2^d -- and b^i < n_orig < b^(i+2k) -- The argument n is n_orig `quotInteger` b^i, so that it always holds -- and 1 <= n < b^2k other_bits i _ [] = i+1 -- 1 <= n < b other_bits i n (bphalf@(BaseP bk k):bps) = case compare bk n of EQ -> i + k + 1 -- n == b^k GT -> other_bits i n bps -- 1 <= n < b^k LT -> (other_bits \$! (i+k)) (n `quotInteger` bk) bps -- b^k < n < b^2k -- An attempt to replace some divisions with multiplications did not -- work out: it was slower -- Slight optimizations: determining the upper by steps of 4 -- The performance seems nearly identical to basewidth_div -- (except for b=10, when basewidth_div4 is for some reason twice slower). basewidth_div4 :: Integer -> Integer -> Int basewidth_div4 b _ | b <= 1 = error "basewidth: base must be greater than 1" basewidth_div4 b n | n < b = 1 basewidth_div4 b n | n == b = 2 basewidth_div4 b n = major_bit [BaseP b 1] where -- major_bit bases -- where bases is [BaseP b^k k | j <- [d,d-1..0], k=2^j] for some d -- and n > b^(2^d) -- Goal: find such d>0 so that b^(2^(d-1)) < n <= b^(2^d) -- Using steps of 4 major_bit :: [BaseP] -> Int major_bit bases@(bp@(BaseP bk k):bps) = let bp2@(BaseP bk2 k2) = bp `mul_bp` bp in let bp4@(BaseP bk4 k4) = bp2 `mul_bp` bp2 in case compare bk4 n of EQ -> k4 + 1 -- n == b^(4k) GT -> case compare bk2 n of -- n < b^(4k) EQ -> k2 + 1 -- n == b^(2k) GT -> other_bits k (n `quotInteger` bk) bps -- n < b^(2k) LT -> other_bits k2 (n `quotInteger` bk2) (bp:bps) LT -> major_bit (bp4 : bp2 : bases) -- b^(4k) < n -- other_bits i n bases where -- bases = [BaseP b^k k | j <- [d,d-1..0], k=2^j] for some d, k=2^d -- and b^i < n_orig < b^(i+2k) -- The argument n is n_orig `quotInteger` b^i, so that it always holds -- and 1 <= n < b^2k -- This function is identical to that of basewidth_div other_bits i _ [] = i+1 -- 1 <= n < b other_bits i n (bphalf@(BaseP bk k):bps) = case compare bk n of EQ -> i + k + 1 -- n == b^k GT -> other_bits i n bps -- 1 <= n < b^k LT -> (other_bits \$! (i+k)) (n `quotInteger` bk) bps -- b^k < n < b^2k test1 = [1,1,2,2,2,3,3,4,4,4,5,5,6,7,7] == map (basewidth 10) [1,9,10,11,99,101,999,1001,1010, 10^4-1,10^4,10^4+1,10^6-1,10^6,10^6+1] test11 = [1,1,2,2,2,3,3,4,4,4,5,5,6,7,7] == map (basewidth_div 10) [1,9,10,11,99,101,999,1001,1010, 10^4-1,10^4,10^4+1,10^6-1,10^6,10^6+1] test12 = [1,1,2,2,2,3,3,4,4,4,5,5,6,7,7] == map (basewidth_div4 10) [1,9,10,11,99,101,999,1001,1010, 10^4-1,10^4,10^4+1,10^6-1,10^6,10^6+1] test2 = [7,8,40,41] == map (basewidth 2) [127,129,2^40-1,2^40] test21 = [7,8,40,41] == map (basewidth_div 2) [127,129,2^40-1,2^40] test22 = [7,8,40,41] == map (basewidth_div4 2) [127,129,2^40-1,2^40] -- [7,8,40,41] {- *BaseWidth> basewidth 2 (2^10000000 -1 ) 10000000 (10.94 secs, 121005792 bytes) -} -- The exponent of the 48th Mersenne prime expm48 = 57885161 :: Integer mersenne48 :: Integer mersenne48 = 2 ^ expm48 - 1 -- First compute test_mersenne_dummy = mersenne48 > 10 -- so to evaluate mersenne48 -- in ghci, interpreted, i386, 640MHz -- True -- (12.52 secs, 129101660 bytes) test_mersenne_bw2 = basewidth 2 mersenne48 -- In ghci, interpreted, i386, 640MHz -- 57885161 -- (63.40 secs, 648892816 bytes) -- amd64, intel Core, 3GHz -- 57885161 -- (4.65 secs, 639884088 bytes) test_mersenne_bw2_div = basewidth_div 2 mersenne48 -- amd64, intel Core, 3GHz -- 57885161 -- (2.39 secs, 303346776 bytes) test_mersenne_bw3_div = basewidth_div 3 mersenne48 -- amd64, intel Core, 3GHz -- 36521471 -- (1.25 secs, 164636216 bytes) test_mersenne_bw10 = basewidth 10 mersenne48 -- In ghci, interpreted, i386, 640MHz -- 17425170 -- (73.52 secs, 750306044 bytes) -- amd64, intel Core, 3GHz -- 17425170 -- (5.59 secs, 744803256 bytes) test_mersenne_bw10_div = basewidth_div 10 mersenne48 -- amd64, intel Core, 3GHz -- 17425170 -- (1.18 secs, 159545832 bytes) {- :set -XMagicHash GHC.Types.I# (GHC.Integer.Logarithms.integerLogBase# 2 mersenne48) 57885160 (2.43 secs, 324014256 bytes) GHC.Types.I# (GHC.Integer.Logarithms.integerLogBase# 3 mersenne48) 36521470 (1.25 secs, 164838024 bytes) GHC.Types.I# (GHC.Integer.Logarithms.integerLogBase# 10 mersenne48) 17425169 (1.18 secs, 159744976 bytes) -}