{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE FlexibleContexts #-}
-- A set of simple examples for the introduction
module Y1 where
import Control.Monad.Reader
import Control.Monad.State
import Control.Monad.Error
import Control.Monad.Trans
import Control.Monad.Identity
import qualified Data.ByteString.Char8 as B
import Data.Char
import qualified Data.Sequence as S
import Data.Sequence (ViewL(..),(|>))
import System.IO
import System.IO.Error (try)
-- ---------------------------------------------------------------------------
-- Example 1: Tab expansion
-- Using laziness
-- First version: expand each tab with 8 spaces
tabX0,tabX1 :: String -> String
tabX0 [] = []
tabX0 ('\t':rest) = replicate 8 ' ' ++ tabX0 rest
tabX0 (c:rest) = c : tabX0 rest
-- Second version: advance the tab to the next tab position
-- This version needs a local state, to keep the current position
tabX1 = go 0 where
go pos [] = []
go pos ('\t':rest) = let pos' = (pos + 8) - pos `mod` 8 in
replicate (pos' - pos) ' ' ++ go pos' rest
go pos (c:rest) = c : go (if c == '\n' then 0 else pos + 1) rest
-- Expanding the tabs in the file
expandFile_lazy = do
h <- openFile "/tmp/testf.txt" ReadMode
str <- hGetContents h
let str' = tabX0 str
putStr str'
-- The code is modular: we can easily replace tabX0 with tabX1,
-- or even abstract out the processor
expandFile_lazy' transf = do
h <- openFile "/tmp/testf.txt" ReadMode
str <- hGetContents h
let str' = transf str
putStr str'
make_test = do
h <- openFile "/tmp/testf.txt" WriteMode
hPutStr h $ "\tTest of\ttabs\n" ++
"1\t12\t123\t1234\t12345\t123456\t1234567\t12345678\t\n" ++
"\t1\t12\tx\n"
hClose h
putStrLn "Made test"
tExp0 = expandFile_lazy' tabX0
tExp1 = expandFile_lazy' tabX1
-- That was cheating: using lazy IO (and implicitly assuming that putStr
-- is incremental: it is not).
-- We didn't close the file. We didn't handle errors; we did not even report
-- them.
-- Using strict IO, naive tab expansion
expandFile_strict = do
h <- openFile "/tmp/testf.txt" ReadMode
loop h
hClose h
where loop h = do done <- hIsEOF h
if done then return ()
else hGetChar h >>= check >> loop h
check '\t' = putStr (replicate 8 ' ')
check c = putStr [c]
-- We can distinguish EOF from other errors.
-- But the tab expansion and reading (and checking for the end) are intertwined.
-- Although we can abstract out processing of a character, we can't
-- abstract out the processor of the stream. We cannot easily
-- replace the naive tab expansion algorithm with a smart tab expansion,
-- since there is no place to keep the state (the current position).
-- We raise the level of abstraction, using generators.
-- First we abstract out the processor
-- expandFile_gen1 :: (Char -> IO a) -> IO ()
expandFile_gen0 consumer = do
h <- openFile "/tmp/testf.txt" ReadMode; loop h; hClose h
where loop h = do done <- hIsEOF h
if done then return ()
else hGetChar h >>= consumer >> loop h
-- Take a look at the inferred type
-- Let us introduce the abbreviation for it
type GenT' e m a = (e -> m ()) -> m a
-- Meaning: a computation that produces a in the monad m and
-- along the way produces elements e and gets the consumer
-- to handle them. So, expandFile_gen0 :: GenT' Char IO ()
-- This is the type of ReaderT!
-- the consumer is in the environment, we can hide it. So, we can write
-- the generator thusly
type GenT e m = ReaderT (e -> m ()) m
-- Strictly speaking, we should make GenT to be
-- GenT e m = forall m'. m' >= m => ReaderT (e -> m' ()) m'
-- where the partial order on monads >= is established by monad transformation
-- Alternatively,
-- GenT e (c :: (* -> *) -> Constraint) = forall m. c m => ReaderT (e -> m ()) m
-- Perhaps these more complex (but more precise) types can be left for
-- another time.
fileGen1 :: MonadIO m => GenT Char m ()
fileGen1 = do
h <- liftIO $ openFile "/tmp/testf.txt" ReadMode
loop h
liftIO $ hClose h
where loop h = do done <- liftIO $ hIsEOF h
if done then return ()
else liftIO (hGetChar h) >>= yield >> loop h
yield x = do
consumer <- ask
lift $ consumer x
-- But yield is completely general!
yield :: Monad m => e -> GenT e m ()
yield x = do consumer <- ask
lift $ consumer x
fileGen :: MonadIO m => GenT Char m ()
fileGen = do
h <- liftIO $ openFile "/tmp/testf.txt" ReadMode
loop h
liftIO $ hClose h
where loop h = do done <- liftIO $ hIsEOF h
if done then return ()
else liftIO (hGetChar h) >>= yield >> loop h
runGenT :: GenT e m () -> (e -> m ()) -> m ()
runGenT m f = runReaderT m f
-- Naive tab expansion
-- First version: expand each tab with 8 spaces
tabY0 :: MonadIO m => Char -> m ()
tabY0 '\t' = liftIO $ putStr (replicate 8 ' ')
tabY0 c = liftIO $ putStr [c]
tExpS0 :: IO ()
tExpS0 = runGenT fileGen tabY0
-- Can we use the same fileGen with a different tab-expansion function?
-- That is, have we gained modularity?
-- Yes, as we shall see:
-- Let the consumer have other effects (e.g., keep its own state)
-- The inferred signature is as follows:
-- tabY1 :: (MonadState Int m, MonadIO m) => Char -> m ()
-- But we can write the signature directly:
tabY1 :: MonadIO m => Char -> StateT Int m ()
tabY1 c = get >>= (\pos -> go pos c) >>= put
where
go pos '\t' =
let pos' = (pos + 8) - pos `mod` 8 in
liftIO (putStr (replicate (pos' - pos) ' ')) >> return pos'
go pos c = liftIO (putStr [c]) >> return (if c == '\n' then 0 else pos + 1)
-- We have used exactly the same fileGen as before!
tExpS1 :: IO ()
tExpS1 = evalStateT (runGenT fileGen tabY1) 0
-- Let us abstract the pattern seen in tabY1. The go function is a state
-- transition function. Its type is very similar to that in the first argument
-- of fold
-- For example,
-- List.foldl :: (a -> b -> a) -> a -> [b] -> a
-- We may consider generator to be a `ephemeral' collection
-- That is, GenT e m () is sort of like [e]
-- Since we have effects, we should consider `effectful' folds:
-- Monad.foldM_ :: Monad m => (a -> b -> m a) -> a -> [b] -> m ()
-- Foldable.foldl :: Foldable t => (a -> b -> a) -> a -> t b -> a
--
-- The particularly close is
-- Foldable.foldlM :: (Foldable t, Monad m) =>
-- (a -> b -> m a) -> a -> t b -> m a
-- So, GenT e m () looks like a `monadic Foldable': Foldable
-- for which only foldlM is defined (but not a simple foldMap).
-- Let us call Foldable.foldlM as foldG (to avoid name clashes)
-- and implement as follows
foldG :: Monad m =>
(s -> e -> m s) -> s -> GenT e (StateT s m) () -> m s
foldG f s0 gen = execStateT (runGenT gen consumer) s0
where consumer x = get >>= (\s -> lift $ f s x) >>= put
-- A variant of foldG that returns no final state
foldG_ :: Monad m =>
(s -> e -> m s) -> s -> GenT e (StateT s m) () -> m ()
foldG_ f s0 gen = evalStateT (runGenT gen consumer) s0
where consumer x = get >>= (\s -> lift $ f s x) >>= put
-- BTW, also very appropriate:
-- traverse_ :: (Foldable t, Applicative f) => (a -> f b) -> t a -> f ()
-- We re-write tExpS1 and tabY1 using foldG as follows
tabY1' :: MonadIO m => Int -> Char -> m Int
tabY1' pos '\t' =
let pos' = (pos + 8) - pos `mod` 8 in
liftIO (putStr (replicate (pos' - pos) ' ')) >> return pos'
tabY1' pos c = liftIO (putStr [c]) >> return (if c == '\n' then 0 else pos + 1)
tExpS1' :: IO ()
tExpS1' = foldG_ tabY1' 0 fileGen
-- We can generalize even further: we can separate the tab expansion
-- from writing, as we did in tabX1 case.
tabY1'' :: Monad m => Int -> Char -> GenT String m Int
tabY1'' pos '\t' =
let pos' = (pos + 8) - pos `mod` 8 in
yield (replicate (pos' - pos) ' ') >> return pos'
tabY1'' pos c = yield [c] >> return (if c == '\n' then 0 else pos + 1)
tExpS1'' :: IO ()
tExpS1'' = runGenT (foldG_ tabY1'' 0 fileGen) putStr
{-
Test of tabs
1 12 123 1234 12345 123456 1234567 12345678
1 12 x
-}
-- Thus tabY1'' has become a stream transformer (transducer).
-- We can compose multiple transformers.
-- tExpS1'' shows that production (fileGen), transforming tabY'' and
-- final consumption (putStr) are fully separted.
-- More involved example:
-- replacing \r\n with \n
-- It requires look-ahead, or emulating iteratees.
-- ---------------------------------------------------------------------------
-- Example II: Trees
data Tree = Leaf Int | Node Tree Tree
deriving Show
tree1 = Node
(Node
(Node (Leaf 3) (Leaf 4))
(Node (Leaf 1) (Leaf 2)))
(Leaf 5)
-- The essence of repmin:
-- if we do not traverse the transformed tree completely,
-- the remaining nodes are not even constructed (the original
-- tree has to be traversed completely if we demand a single
-- node of the transformed tree).
-- The nodes of the transformed tree are constructed only once,
-- no matter how many times the tree is processed.
repmin :: Tree -> Tree
repmin t = tr
where (m, tr) = walk m t
walk m (Leaf n) = (n, Leaf m)
walk m (Node t1 t2) = (n1 `min` n2, Node tr1 tr2)
where
(n1,tr1) = walk m t1
(n2,tr2) = walk m t2
-- In its original formulation, repmin is out of scope for generators.
-- Generators are used for incremental stream processing or traversal.
-- In repmin, the point is to construct a tree. It is not clear
-- what it means for a tree to be constructed incrementally
-- (and in any case, repmin does not do that).
-- Repmin cleverly exploits memoization present in lazy evaluation.
-- (without memoization, repmin is grossly inefficient, repeatedly
-- re-traversing the tree).
-- We can however transform repmin into the form that is in the scope
-- for using generators.
-- We se-reialize the tree afterwards, converting the tree into a
-- stylized XML
data TreeStream = BegNode
| EndNode
| LeafData Int
deriving Show
serializeX :: Tree -> [TreeStream]
serializeX (Leaf x) = [LeafData x]
serializeX (Node t1 t2) =
[BegNode] ++ serializeX t1 ++ serializeX t2 ++ [EndNode]
ts1 = serializeX tree1
{-
[BegNode,BegNode,BegNode,LeafData 3,LeafData 4,
EndNode,BegNode,LeafData 1,LeafData 2,EndNode,EndNode,LeafData 5,EndNode]
-}
ts2 = serializeX (repmin tree1)
{-
[BegNode,BegNode,BegNode,LeafData 1,LeafData 1,EndNode,BegNode,
LeafData 1,LeafData 1,EndNode,EndNode,LeafData 1,EndNode]
-}
-- We can't do any effects in repmin.
-- If we wish to have effects, we have to use the recursive do
-- and monad must be MonadFix. The monadic computation is also
-- too eager: with pure repmin, we can determine if
-- (repmin tree) is a Leaf or Node without traversing the tree.
-- With monadic repmin, we cannot do that. However, we can do
-- such a simple test with generators below.
-- The problem: can we generalize repmin so we can have effects
-- (printing the trace of nodes as they are traversed)
-- and produce [TreeStream] with the same efficiency gurantees
-- as repmin does?
-- traversal generator
traverse :: Monad m => Tree -> GenT TreeStream m ()
traverse (Leaf i) = yield (LeafData i)
traverse (Node t1 t2) = do
yield BegNode
traverse t1; traverse t2
yield EndNode
-- debug printing transformer
trace :: (MonadIO m, Show e) => String -> e -> GenT e m ()
trace header x = liftIO (putStrLn $ header ++ show x) >> yield x
-- collector of stream in a list
collect :: Monad m => GenT e (StateT [e] m) () -> m [e]
collect g = foldG (\s e -> return $ e:s) [] g >>= return . reverse
tg1 :: [TreeStream]
tg1 = runIdentity $ collect . traverse $ tree1
-- The same with debug printing
tg2 :: IO [TreeStream]
tg2 = collect (runGenT (traverse tree1) (trace "init: "))
-- we added the debugging without chaging the generator or the collector.
-- We now implement the repmin-like transformer
-- We need a look-ahead buffer
-- BTW, the example looks quite alike the code that appears in
-- the pretty-printing code
repminT gen = do
(m,buf) <- foldG go (0,[]) gen
mapM_ (flush m) (reverse buf)
where
go (m, []) BegNode = yield BegNode >> return (m, [])
go (m, []) EndNode = yield EndNode >> return (m, [])
go (m, []) (LeafData x) = return (x,[LeafData x])
go (m, b) (LeafData x) = return (x `min` m,LeafData x : b)
go (m, b) e = return (m, e:b)
flush m (LeafData _) = yield (LeafData m)
flush _ e = yield e
-- Note composing of various transformers, adding debugging printing
-- here and there
tg3 :: IO [TreeStream]
tg3 = collect .
flip runGenT (trace "after: ") .
repminT .
flip runGenT (trace "init: ") $ traverse tree1
-- We observe the large latency and the need for the unbounded look-ahead
-- buffer
-- But (serializeX . repmin) had the same latency and space requirements!
-- Only it was much harder to see.