{-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE FlexibleContexts #-} -- A set of simple examples for the introduction module Y1 where import Control.Monad.Reader import Control.Monad.State import Control.Monad.Error import Control.Monad.Trans import Control.Monad.Identity import qualified Data.ByteString.Char8 as B import Data.Char import qualified Data.Sequence as S import Data.Sequence (ViewL(..),(|>)) import System.IO import System.IO.Error (try) -- --------------------------------------------------------------------------- -- Example 1: Tab expansion -- Using laziness -- First version: expand each tab with 8 spaces tabX0,tabX1 :: String -> String tabX0 [] = [] tabX0 ('\t':rest) = replicate 8 ' ' ++ tabX0 rest tabX0 (c:rest) = c : tabX0 rest -- Second version: advance the tab to the next tab position -- This version needs a local state, to keep the current position tabX1 = go 0 where go pos [] = [] go pos ('\t':rest) = let pos' = (pos + 8) - pos `mod` 8 in replicate (pos' - pos) ' ' ++ go pos' rest go pos (c:rest) = c : go (if c == '\n' then 0 else pos + 1) rest -- Expanding the tabs in the file expandFile_lazy = do h <- openFile "/tmp/testf.txt" ReadMode str <- hGetContents h let str' = tabX0 str putStr str' -- The code is modular: we can easily replace tabX0 with tabX1, -- or even abstract out the processor expandFile_lazy' transf = do h <- openFile "/tmp/testf.txt" ReadMode str <- hGetContents h let str' = transf str putStr str' make_test = do h <- openFile "/tmp/testf.txt" WriteMode hPutStr h $ "\tTest of\ttabs\n" ++ "1\t12\t123\t1234\t12345\t123456\t1234567\t12345678\t\n" ++ "\t1\t12\tx\n" hClose h putStrLn "Made test" tExp0 = expandFile_lazy' tabX0 tExp1 = expandFile_lazy' tabX1 -- That was cheating: using lazy IO (and implicitly assuming that putStr -- is incremental: it is not). -- We didn't close the file. We didn't handle errors; we did not even report -- them. -- Using strict IO, naive tab expansion expandFile_strict = do h <- openFile "/tmp/testf.txt" ReadMode loop h hClose h where loop h = do done <- hIsEOF h if done then return () else hGetChar h >>= check >> loop h check '\t' = putStr (replicate 8 ' ') check c = putStr [c] -- We can distinguish EOF from other errors. -- But the tab expansion and reading (and checking for the end) are intertwined. -- Although we can abstract out processing of a character, we can't -- abstract out the processor of the stream. We cannot easily -- replace the naive tab expansion algorithm with a smart tab expansion, -- since there is no place to keep the state (the current position). -- We raise the level of abstraction, using generators. -- First we abstract out the processor -- expandFile_gen1 :: (Char -> IO a) -> IO () expandFile_gen0 consumer = do h <- openFile "/tmp/testf.txt" ReadMode; loop h; hClose h where loop h = do done <- hIsEOF h if done then return () else hGetChar h >>= consumer >> loop h -- Take a look at the inferred type -- Let us introduce the abbreviation for it type GenT' e m a = (e -> m ()) -> m a -- Meaning: a computation that produces a in the monad m and -- along the way produces elements e and gets the consumer -- to handle them. So, expandFile_gen0 :: GenT' Char IO () -- This is the type of ReaderT! -- the consumer is in the environment, we can hide it. So, we can write -- the generator thusly type GenT e m = ReaderT (e -> m ()) m -- Strictly speaking, we should make GenT to be -- GenT e m = forall m'. m' >= m => ReaderT (e -> m' ()) m' -- where the partial order on monads >= is established by monad transformation -- Alternatively, -- GenT e (c :: (* -> *) -> Constraint) = forall m. c m => ReaderT (e -> m ()) m -- Perhaps these more complex (but more precise) types can be left for -- another time. fileGen1 :: MonadIO m => GenT Char m () fileGen1 = do h <- liftIO $ openFile "/tmp/testf.txt" ReadMode loop h liftIO $ hClose h where loop h = do done <- liftIO $ hIsEOF h if done then return () else liftIO (hGetChar h) >>= yield >> loop h yield x = do consumer <- ask lift $ consumer x -- But yield is completely general! yield :: Monad m => e -> GenT e m () yield x = do consumer <- ask lift $ consumer x fileGen :: MonadIO m => GenT Char m () fileGen = do h <- liftIO $ openFile "/tmp/testf.txt" ReadMode loop h liftIO $ hClose h where loop h = do done <- liftIO $ hIsEOF h if done then return () else liftIO (hGetChar h) >>= yield >> loop h runGenT :: GenT e m () -> (e -> m ()) -> m () runGenT m f = runReaderT m f -- Naive tab expansion -- First version: expand each tab with 8 spaces tabY0 :: MonadIO m => Char -> m () tabY0 '\t' = liftIO $ putStr (replicate 8 ' ') tabY0 c = liftIO $ putStr [c] tExpS0 :: IO () tExpS0 = runGenT fileGen tabY0 -- Can we use the same fileGen with a different tab-expansion function? -- That is, have we gained modularity? -- Yes, as we shall see: -- Let the consumer have other effects (e.g., keep its own state) -- The inferred signature is as follows: -- tabY1 :: (MonadState Int m, MonadIO m) => Char -> m () -- But we can write the signature directly: tabY1 :: MonadIO m => Char -> StateT Int m () tabY1 c = get >>= (\pos -> go pos c) >>= put where go pos '\t' = let pos' = (pos + 8) - pos `mod` 8 in liftIO (putStr (replicate (pos' - pos) ' ')) >> return pos' go pos c = liftIO (putStr [c]) >> return (if c == '\n' then 0 else pos + 1) -- We have used exactly the same fileGen as before! tExpS1 :: IO () tExpS1 = evalStateT (runGenT fileGen tabY1) 0 -- Let us abstract the pattern seen in tabY1. The go function is a state -- transition function. Its type is very similar to that in the first argument -- of fold -- For example, -- List.foldl :: (a -> b -> a) -> a -> [b] -> a -- We may consider generator to be a `ephemeral' collection -- That is, GenT e m () is sort of like [e] -- Since we have effects, we should consider `effectful' folds: -- Monad.foldM_ :: Monad m => (a -> b -> m a) -> a -> [b] -> m () -- Foldable.foldl :: Foldable t => (a -> b -> a) -> a -> t b -> a -- -- The particularly close is -- Foldable.foldlM :: (Foldable t, Monad m) => -- (a -> b -> m a) -> a -> t b -> m a -- So, GenT e m () looks like a `monadic Foldable': Foldable -- for which only foldlM is defined (but not a simple foldMap). -- Let us call Foldable.foldlM as foldG (to avoid name clashes) -- and implement as follows foldG :: Monad m => (s -> e -> m s) -> s -> GenT e (StateT s m) () -> m s foldG f s0 gen = execStateT (runGenT gen consumer) s0 where consumer x = get >>= (\s -> lift $ f s x) >>= put -- A variant of foldG that returns no final state foldG_ :: Monad m => (s -> e -> m s) -> s -> GenT e (StateT s m) () -> m () foldG_ f s0 gen = evalStateT (runGenT gen consumer) s0 where consumer x = get >>= (\s -> lift $ f s x) >>= put -- BTW, also very appropriate: -- traverse_ :: (Foldable t, Applicative f) => (a -> f b) -> t a -> f () -- We re-write tExpS1 and tabY1 using foldG as follows tabY1' :: MonadIO m => Int -> Char -> m Int tabY1' pos '\t' = let pos' = (pos + 8) - pos `mod` 8 in liftIO (putStr (replicate (pos' - pos) ' ')) >> return pos' tabY1' pos c = liftIO (putStr [c]) >> return (if c == '\n' then 0 else pos + 1) tExpS1' :: IO () tExpS1' = foldG_ tabY1' 0 fileGen -- We can generalize even further: we can separate the tab expansion -- from writing, as we did in tabX1 case. tabY1'' :: Monad m => Int -> Char -> GenT String m Int tabY1'' pos '\t' = let pos' = (pos + 8) - pos `mod` 8 in yield (replicate (pos' - pos) ' ') >> return pos' tabY1'' pos c = yield [c] >> return (if c == '\n' then 0 else pos + 1) tExpS1'' :: IO () tExpS1'' = runGenT (foldG_ tabY1'' 0 fileGen) putStr {- Test of tabs 1 12 123 1234 12345 123456 1234567 12345678 1 12 x -} -- Thus tabY1'' has become a stream transformer (transducer). -- We can compose multiple transformers. -- tExpS1'' shows that production (fileGen), transforming tabY'' and -- final consumption (putStr) are fully separted. -- More involved example: -- replacing \r\n with \n -- It requires look-ahead, or emulating iteratees. -- --------------------------------------------------------------------------- -- Example II: Trees data Tree = Leaf Int | Node Tree Tree deriving Show tree1 = Node (Node (Node (Leaf 3) (Leaf 4)) (Node (Leaf 1) (Leaf 2))) (Leaf 5) -- The essence of repmin: -- if we do not traverse the transformed tree completely, -- the remaining nodes are not even constructed (the original -- tree has to be traversed completely if we demand a single -- node of the transformed tree). -- The nodes of the transformed tree are constructed only once, -- no matter how many times the tree is processed. repmin :: Tree -> Tree repmin t = tr where (m, tr) = walk m t walk m (Leaf n) = (n, Leaf m) walk m (Node t1 t2) = (n1 `min` n2, Node tr1 tr2) where (n1,tr1) = walk m t1 (n2,tr2) = walk m t2 -- In its original formulation, repmin is out of scope for generators. -- Generators are used for incremental stream processing or traversal. -- In repmin, the point is to construct a tree. It is not clear -- what it means for a tree to be constructed incrementally -- (and in any case, repmin does not do that). -- Repmin cleverly exploits memoization present in lazy evaluation. -- (without memoization, repmin is grossly inefficient, repeatedly -- re-traversing the tree). -- We can however transform repmin into the form that is in the scope -- for using generators. -- We se-reialize the tree afterwards, converting the tree into a -- stylized XML data TreeStream = BegNode | EndNode | LeafData Int deriving Show serializeX :: Tree -> [TreeStream] serializeX (Leaf x) = [LeafData x] serializeX (Node t1 t2) = [BegNode] ++ serializeX t1 ++ serializeX t2 ++ [EndNode] ts1 = serializeX tree1 {- [BegNode,BegNode,BegNode,LeafData 3,LeafData 4, EndNode,BegNode,LeafData 1,LeafData 2,EndNode,EndNode,LeafData 5,EndNode] -} ts2 = serializeX (repmin tree1) {- [BegNode,BegNode,BegNode,LeafData 1,LeafData 1,EndNode,BegNode, LeafData 1,LeafData 1,EndNode,EndNode,LeafData 1,EndNode] -} -- We can't do any effects in repmin. -- If we wish to have effects, we have to use the recursive do -- and monad must be MonadFix. The monadic computation is also -- too eager: with pure repmin, we can determine if -- (repmin tree) is a Leaf or Node without traversing the tree. -- With monadic repmin, we cannot do that. However, we can do -- such a simple test with generators below. -- The problem: can we generalize repmin so we can have effects -- (printing the trace of nodes as they are traversed) -- and produce [TreeStream] with the same efficiency gurantees -- as repmin does? -- traversal generator traverse :: Monad m => Tree -> GenT TreeStream m () traverse (Leaf i) = yield (LeafData i) traverse (Node t1 t2) = do yield BegNode traverse t1; traverse t2 yield EndNode -- debug printing transformer trace :: (MonadIO m, Show e) => String -> e -> GenT e m () trace header x = liftIO (putStrLn $ header ++ show x) >> yield x -- collector of stream in a list collect :: Monad m => GenT e (StateT [e] m) () -> m [e] collect g = foldG (\s e -> return $ e:s) [] g >>= return . reverse tg1 :: [TreeStream] tg1 = runIdentity $ collect . traverse $ tree1 -- The same with debug printing tg2 :: IO [TreeStream] tg2 = collect (runGenT (traverse tree1) (trace "init: ")) -- we added the debugging without chaging the generator or the collector. -- We now implement the repmin-like transformer -- We need a look-ahead buffer -- BTW, the example looks quite alike the code that appears in -- the pretty-printing code repminT gen = do (m,buf) <- foldG go (0,[]) gen mapM_ (flush m) (reverse buf) where go (m, []) BegNode = yield BegNode >> return (m, []) go (m, []) EndNode = yield EndNode >> return (m, []) go (m, []) (LeafData x) = return (x,[LeafData x]) go (m, b) (LeafData x) = return (x `min` m,LeafData x : b) go (m, b) e = return (m, e:b) flush m (LeafData _) = yield (LeafData m) flush _ e = yield e -- Note composing of various transformers, adding debugging printing -- here and there tg3 :: IO [TreeStream] tg3 = collect . flip runGenT (trace "after: ") . repminT . flip runGenT (trace "init: ") $ traverse tree1 -- We observe the large latency and the need for the unbounded look-ahead -- buffer -- But (serializeX . repmin) had the same latency and space requirements! -- Only it was much harder to see.