-- Simple back-tracking search with delimited control
-- Illustrating shift/reset, and the application of different search
-- strategies to the same program without re-writing it
module Searches where
import Control.Monad.Cont hiding (mplus)
import System.Environment (getArgs)
-- Lazy search tree
-- which is the result of a non-deterministic expression.
-- Thunks are important to _prevent_ memoization so that we
-- could examine the tree several times without fully expanding it.
-- Iterative deepening below critically relies on that property.
data SearchTree a = Leaf a | Node [() -> SearchTree a]
-- Different search strategies can be implemented as operations
-- on the search tree.
-- Depth-first search
dfs :: SearchTree a -> [a]
dfs (Leaf x) = [x]
dfs (Node b) = concatMap (\x -> dfs $ x ()) b
-- Breadth-first search
-- Naive version
bfs :: SearchTree a -> [a]
bfs tree = loop [\() -> tree] -- list, treated as a queue
where
loop [] = []
loop (h:t) = case h () of
Leaf x -> x : loop t
Node b -> loop (t ++ b)
-- The problem is that a list is a poor implementation of a queue.
-- The argument of loop is the current frontier, which, generally, grows
-- exponentially with depth. At every recursion call, we perform t ++ b,
-- and then take the head of the result: that is, we have to perform
-- list concatenation on every recursive call. It takes time, and space,
-- proportional to the size of t, which is exponential in depth!
-- Optimized version by a contributor (Jan 2022)
-- It is essentially the level traversal
-- http://okmij.org/ftp/Algorithms/BFN.html#interest
bfs3 :: SearchTree a -> [a]
bfs3 tree = loop [\() -> tree]
where
loop [] = []
loop ts = xs ++ loop ss
where (xs, ss) = leaves_and_subtrees ts
leaves_and_subtrees = foldr (\x -> cons $ x ()) ([],[])
where
cons (Leaf x) (xs, ss) = (x:xs, ss)
cons (Node ts) (xs, ss) = (xs, ts++ss)
-- A more idiomatic and conventional level-traversal
-- It also avoids all concatentations
-- the main function
-- forest f curr next
-- takes three arguments. The first f :: [()->SearchTree]
-- is the forest being handled. curr :: [[()->SearchTree]]
-- is the other forests at the same level (the same depth), yet to
-- be handled. Finally, next :: [[()->SearchTree]]
-- is the accumulator for next-level forests (in reverse order)
-- Except for expanding the tree (h ()) and (reverse next) when
-- finishing the level, all operations take constant time and space.
bfsl :: SearchTree a -> [a]
bfsl tree = forest [\() -> tree] [] []
where
forest (h:t) curr next = case h () of
Leaf x -> x : forest t curr next
Node [] -> forest t curr next
Node ks -> forest t curr (ks : next)
forest [] (h:t) next = forest h t next
forest [] [] [] = []
forest [] [] next = forest [] (reverse next) []
-- Iterative deepening
-- It should be obvious that the code repeatedly expands the nodes
-- of the tree as it examines the tree deeper and deeper.
-- One is tempted to eliminate the repetition (e.g., by changing
-- the SearchTree declaration to take advantage of GHC's memoization).
-- However, that will miss the point: re-evaluating the same
-- expressions (nodes) over and over again is the essence of
-- iterative deepening. The algorithm trades time for space:
-- for very large search trees, it is overall better to recompute the
-- nodes than to store them.
-- The algorithm is complete, like BFS -- it always finds a solution if one
-- exists -- and yet is memory-efficient as DFS.
-- BFS takes so much memory, to store the frontier of the search,
-- that it becomes impractical for even the moderate toy problems.
-- Collect the values from the leaves whose distance from
-- the root is exactly d.
-- The clause `depth_search d (Node [])' proved very useful:
-- It plugs the memory leak.
{-
depth_search :: Int -> SearchTree a -> [a]
depth_search 0 (Leaf x) = [x]
depth_search d (Leaf _) = []
depth_search d (Node [])= []
depth_search 0 (Node _) = []
depth_search d (Node b) = concatMap (\t -> depth_search (d-1) (t ())) b
iter_deepening :: Int -> SearchTree a -> [a]
iter_deepening n t = concatMap (\d -> depth_search d t) [0..n]
-}
-- More optimized for space
depth_search :: Int -> [a] -> SearchTree a -> [a]
depth_search 0 l (Leaf x) = x:l
depth_search d l (Leaf _) = l
depth_search 0 l (Node _) = l
depth_search d l (Node ts)= loop (d-1) l ts
where
loop d l [] = l
loop d l [h] = depth_search d l (h ())
loop d l (h:t) = depth_search d (loop d l t) (h ())
iter_deepening :: Int -> SearchTree a -> [a]
iter_deepening n t = loop 0
where loop d = if d > n then [] else depth_search d (loop (d+1)) t
-- Other search strategies can be added easily.
-- Reifying a non-deterministic program as a search tree
-- Notably we do _not_ use SearchTree itself as a monad
-- Defining shift/reset in terms of Cont
runC :: Cont w w -> w
runC m = runCont m id
reset :: Cont a a -> Cont w a
reset = return . runC
shift :: ((a -> w) -> Cont w w) -> Cont w a
shift f = cont (runC . f)
-- Non-deterministic choice from a _finite_ list
-- This is the only primitive. Everything else is implemented in terms
-- of choose
choose :: [a] -> Cont (SearchTree w) a
choose lst = shift (\k -> return $ Node (map (\x () -> k x) lst))
-- Failing computation
failure :: Cont (SearchTree w) a
failure = choose []
-- How to run non-deterministic computation
reify :: Cont (SearchTree a) a -> SearchTree a
reify m = runC (fmap Leaf m)
-- With Cont and reification, we separate the vexing issues of the search
-- strategy from constructing a computation.
-- Examples
-- Example 1: computing Pythagorean triples
-- It is not a very representative example of typical search problems,
-- since the solutions are many and easily found
-- See a better example below
ex1 = reify $ do
x <- choose [1..10]
y <- choose [1..10]
z <- choose [1..10]
if x*x + y*y == z*z then return (x,y,z) else failure
test1d = dfs ex1
-- [(3,4,5),(4,3,5),(6,8,10),(8,6,10)]
test1b = bfs ex1
-- the same, but takes many times more memory
test1b' = bfs3 ex1
test1b'' = bfsl ex1
-- about the same amount of memory as DFS
test1i = iter_deepening 5 ex1
-- about as much memory as DFS
-- Non-deterministic choice from a potentially _infinite_ list
-- First, we define mplus to join two computations together.
-- We define mplus in terms of choose
mplus :: Cont (SearchTree w) a -> Cont (SearchTree w) a ->
Cont (SearchTree w) a
-- mplus e1 e2 = choose [e1,e2] >>= id
-- or we may inline choose and simplify a bit:
mplus e1 e2 = shift (\f ->
return $ Node [\ () -> runCont (e1 >>= return . f) id,
\ () -> runCont (e2 >>= return . f) id])
-- We may further hand-optimize that expression to the following.
-- But it leaks memory! It retains the constructed tree
-- mplus e1 e2 = Cont (\k -> Node [\() -> runCont e1 k,
-- \() -> runCont e2 k])
-- See an article on preventing memoization in search problems.
-- Generally speaking, mplus is not associative. It better not be,
-- since associative and non-commutative mplus makes the search
-- strategy incomplete.
-- Consider
-- e1 `mplus` return False >>= (\x -> if x then mzero else return x)
-- where e1 = return True `mplus` e1
-- If mplus is associative and non-commutative, the search will diverge
-- although a solution exists
-- We can now define the general choice
choose' [] = failure
choose' [x] = return x
choose' (h:t) = return h `mplus` (choose' t)
-- A stream of numbers, more efficient that choose' [1..]
from i = choose [return i, from $! i + 1] >>= id
-- Pythagorean triples from the range of numbers
ex2 range = reify $ do
x <- choose' range
y <- choose' range
z <- choose' range
if x*x + y*y == z*z then return (x,y,z) else failure
-- Finite range
test2d = dfs $ ex2 [1..10]
-- [(3,4,5),(4,3,5),(6,8,10),(8,6,10)]
test2b = bfsl $ ex2 [1..10]
-- Infinite range
-- DFS expectedly diverges, but BFS and iterative deepening give
-- solutions; iterative deepening takes much less memory than BFS.
test3d = take 5 . dfs $ ex2 [1..]
-- diverges!!
test3b = take 5 . bfs $ ex2 [1..]
-- [(3,4,5),(4,3,5),(6,8,10),(8,6,10),(5,12,13)]
test3b' = take 5 . bfsl $ ex2 [1..]
test3i = take 5 . iter_deepening 100 $ ex2 [1..]
-- [(3,4,5),(4,3,5),(6,8,10),(8,6,10),(5,12,13)]
-- A better example: simple inductive programming
-- Given a sequence of input-output pairs [(Int,Int)],
-- find an Int->Int function with that behavior
-- A representation of an Int->Int function
data Exp =
K Int -- constant funtion
| X -- identity
| Exp :+ Exp -- \x -> f x + g x
| Exp :* Exp -- \x -> f x * g x
deriving Show
eval :: Exp -> Int -> Int
eval (K x) _ = x
eval X x = x
eval (f :+ g) x = eval f x + eval g x
eval (f :* g) x = eval f x * eval g x
-- An Exp
an_exp =
(fmap K $ choose numbers) `mplus`
(return X) `mplus`
(liftM2 (:+) an_exp an_exp) `mplus`
(liftM2 (:*) an_exp an_exp)
where numbers = [-2..2]
induct io = reify $ do
exp <- an_exp
if all (\ (i,o) -> eval exp i == o) io then return exp else failure
-- testib = take 2 $ bfs3 $ induct [(0,1), (1,2), (2,5)]
testib = take 1 $ bfs3 $ induct [(0,1), (1,1), (2,3)]
-- [K 1 :+ (X :* (K (-1) :+ X))]
testib' = take 1 $ bfsl $ induct [(0,1), (1,1), (2,3)]
-- the same, but faster and with less memory
testii = take 1 $ iter_deepening 100 $ induct [(0,1), (1,1), (2,3)]
-- a bit worse than testib'
-- Benchmarking
-- Compiling it:
-- ghc -O2 -rtsopts -main-is Searches.main Searches.hs
-- To run this code
-- GHCRTS="-tstderr" ./Searches pyth bfs 30
-- GHCRTS="-tstderr" ./Searches pyth iter 30
ex3 = reify $ do
x <- from 1
y <- from 1
z <- from 1
if x*x + y*y == z*z then return (x,y,z) else failure
main :: IO ()
main = getArgs >>= check
where
check ["pyth",key,ns] | [(n,"")] <- reads ns =
print $ take 10 $ drop n . select key $ ex3
check ["ind",key,ns] | [(n,"")] <- reads ns =
-- cannot find for this example: run out of memory:
-- BFS runs very fast, but iter also runs out, eventually
-- -- x^3 + 2*x^2 - x + 1
-- print $ take n . select key $ induct [(0,1), (1,3), (-1,3), (2,15)]
print $ take n . select key $ induct [(0,1), (1,1), (2,3), (-1,3)]
select "bfs" = bfs
select "bfs3" = bfs3
select "bfsl" = bfsl
select "iter" = iter_deepening 10000
{-
GHCRTS="-tstderr" ./Searches pyth bfs 10
[(12,16,20),(16,12,20),(7,24,25),(24,7,25),(10,24,26),(15,20,25),(20,15,25),(24,10,26),(20,21,29),(21,20,29)]
<>
GHCRTS="-tstderr" ./Searches pyth bfs3 10
[(12,16,20),(16,12,20),(7,24,25),(24,7,25),(10,24,26),(15,20,25),(20,15,25),(24,10,26),(20,21,29),(21,20,29)]
<>
GHCRTS="-tstderr" ./Searches pyth bfsl 10
[(12,16,20),(16,12,20),(7,24,25),(24,7,25),(10,24,26),(15,20,25),(20,15,25),(24,10,26),(20,21,29),(21,20,29)]
<>
GHCRTS="-tstderr" ./Searches pyth iter 10
[(12,16,20),(16,12,20),(7,24,25),(24,7,25),(10,24,26),(15,20,25),(20,15,25),(24,10,26),(20,21,29),(21,20,29)]
<>
GHCRTS="-tstderr" ./Searches pyth bfs3 200
[(49,168,175),(168,49,175),(33,180,183),(72,154,170),(99,132,165),(132,99,165),(154,72,170),(180,33,183),(80,150,170),(150,80,170)]
<>
GHCRTS="-tstderr" ./Searches pyth bfsl 200
[(49,168,175),(168,49,175),(33,180,183),(72,154,170),(99,132,165),(132,99,165),(154,72,170),(180,33,183),(80,150,170),(150,80,170)]
<>
GHCRTS="-tstderr" ./Searches pyth iter 200
[(49,168,175),(168,49,175),(33,180,183),(72,154,170),(99,132,165),(132,99,165),(154,72,170),(180,33,183),(80,150,170),(150,80,170)]
<>
GHCRTS="-tstderr" ./Searches ind bfs3 4
[K 1 :+ (X :* (K (-1) :+ X)),K 1 :+ (X :* (X :+ K (-1))),K 1 :+ ((K (-1) :+ X) :* X),K 1 :+ ((X :+ K (-1)) :* X)]
<>
GHCRTS="-tstderr" ./Searches ind bfsl 4
<>
GHCRTS="-tstderr" ./Searches ind iter 4
<>
-}