{-# LANGUAGE MultiParamTypeClasses, FlexibleInstances, FlexibleContexts #-}
-- Compositionality and reuse
-- of lazy evaluation and of simple generators
-- The main example from
-- Lennart Augustsson: More points for lazy evaluation, May 2011.
-- http://augustss.blogspot.com/2011/05/more-points-for-lazy-evaluation-in.html
module YAny where
import Control.Monad.Error
import Control.Monad.Reader
import Control.Monad.State
import Control.Exception
import System.IO
import GenT
type Producer m e = GenT e m ()
type Consumer m e = e -> m ()
type Transducer m1 m2 e1 e2 = Producer m1 e1 -> Producer m2 e2
-- The function 'any' can be defined by composition of two already written
-- functions 'or' and 'map'
anyl :: (a -> Bool) -> [a] -> Bool
anyl f = or . map f
-- To demonstrate that it really stops as soon as the element satisfying
-- the predicate is found, we use it with an infinite list
t1 = any (>10) [1..]
-- True
-- In a strict language,
-- any f xs === or (map f xs)
-- and so 'or' does not get to work before (map f xs) finishes.
-- Which, if it is applied to an infinite list, never happens.
-- We can grow the chain farther.
-- For example, the input list of numbers could be the result of
-- parsing a column of text
-- Splitting into lines, parsing, comparing -- all happens
-- incrementally and on demand.
t2 = any (>10) . map read . lines $ "2\n3\n5\n7\n11\n13\nINVALID"
-- True
-- Even though INVALID is unparseable as a number.
-- Therefore, evaluation really stopped before the entire list is consumed.
-- But what if we want to read the column of text from a file?
-- Or a communication pipe?
t2_bad = (any (>10) . map read . lines) `fmap` read_string test_file
where
read_string :: FilePath -> IO String
read_string = undefined
-- where read_string is a function to read file data. But how much to read?
-- Mainly, the parsing and `any' evaluation happens only after
-- read_string action finished and returns the value, that
-- is, some content from the file. But it may have read too much
-- or too little -- we can't know until we parse data and check
-- the predicate. So, we lost incrementality and termination right
-- after the satisfying element is found.
test_file = "/tmp/testf.txt"
make_test = do
h <- openFile test_file WriteMode
hPutStr h $ "2\n3\n5\n7\n11\n13\nINVALID"
hClose h
putStrLn "Made test"
-- 1. Use lazy IO. It has many problems, including with resource usage
t2_lazy = do
h <- openFile test_file ReadMode
str <- hGetContents h
let result = any (>10) . map read . lines $ str
return result
-- True
-- However, when the handle h is closed? When GC gets around to
-- it: leak of a scarce resource (file handle).
-- Mainly, Lazy IO has big problems if the input comes from a pipe.
-- It is not specified how much hGetContents really reads:
-- It reads by buffer-full and it may read-ahead by arbitrary amounts,
-- which for communication channels can (and does) lead to a deadlock.
-- For the channels, we need to know exactly how much we read.
-- 2. Read strictly
t2_strict =
bracket (openFile test_file ReadMode) hClose (loop (>10))
where
loop f h = do
r <- liftM f $ liftM read $ hGetLine h
if r then return True else loop f h
-- True
-- Here we lost compositionality and have to program the iteration
-- explicitly, essentially inlining 'or'. See Augustsson's post
-- for more discussion why we have to do this.
-- On the plus side, the handle is definitely closed,
-- immediately right after the result is obtained.
-- 3. Roll out the explicit stream: monadic list
data ListV m a = Nil | Cons a (List m a)
type List m a = m (ListV m a)
mapL :: Monad m => (a -> b) -> List m a -> List m b
mapL f l = check `liftM` l
where
check Nil = Nil
check (Cons x l) = Cons (f x) (mapL f l)
orL :: Monad m => List m Bool -> m Bool
orL l = l >>= check
where
check Nil = return False
check (Cons True _) = return True
check (Cons _ l) = orL l
linesL :: FilePath -> List IO String
linesL fname = openFile fname ReadMode >>= loop
where
loop h = do
b <- hIsEOF h
if b then hClose h >> return Nil
else do
l <- hGetLine h
return (Cons l (loop h))
-- Here, the handle is closed only when we see EOF
anyL :: Monad m => (a -> Bool) -> List m a -> m Bool
anyL f = orL . mapL f
-- Almost like regular any
t2_stream = anyL (>10) . mapL read $ linesL test_file
-- True
-- This is great -- the code looks just as the same as in the
-- lazy version. And we never read the INVALID data.
-- But since we never finish reading the file, and so never see EOF,
-- who closes the handle?
-- Only GC -- which may occur really late. So, we leak a
-- scarce resource, just as we did with Lazy IO!
-- 4. Use simple generators
-- A sample producer. It only produces file data, without caring
-- who will consume them and how.
fileG :: (GBracket m, MonadIO m) => FilePath -> Producer m Char
fileG fname =
gbracket (liftIO $ openFile fname ReadMode) (liftIO . hClose) loop
where
loop h = do
b <- liftIO $ hIsEOF h
if b then return () else liftIO (hGetChar h) >>= yield >> loop h
-- Now the handle is closed not only on EOF but also on any exception!
-- One consumer is putChar, whose type is indeed Consumer IO Char.
-- Here is the simplest hook-up of fileG and putChar, copying
-- the file to the standard output.
catG :: FilePath -> IO ()
catG fname = fileG fname `runGenT` putChar
-- catG test_file
--
-- The function |or| is a more interesting consumer
orG :: Monad m => Producer (ErrT Bool m) Bool -> m Bool
orG gen = either id (const False) `liftM` runErrT (runGenT gen orC)
where
orC :: MonadError Bool m => Consumer m Bool
orC True = throwError True
orC False = return ()
anyG :: Monad m =>
(a -> Bool) -> Producer (GenT Bool (ErrT Bool m)) a -> m Bool
anyG f = orG . mapG f
-- The code is quite like the lazy case: functional composition
linesG :: Monad m => Transducer (StateT String (GenT String m)) m Char String
linesG gen = foldG tr [] gen >>= at_end
where
tr s '\n' = yield (reverse s) >> return []
tr s c = return (c:s)
at_end [] = return ()
at_end s = yield (reverse s)
check_linesG :: IO ()
check_linesG = linesG (fileG test_file) `runGenT` putStrLn
{-
2
3
5
7
11
13
INVALID
-}
-- The last line in the file was not terminated by \n
t2_gen :: IO Bool
t2_gen = anyG (>10) . mapG read . linesG $ fileG test_file
-- True
-- and now we know that the handle will be closed
-- ------------------------------------------------------------------------
-- Utilities
class Monad m => GBracket m where
gbracket :: m a -> (a -> m b) -> (a -> m c) -> m c
instance GBracket IO where
gbracket = bracket
instance GBracket m => GBracket (ReaderT e m) where
gbracket pre post body = ReaderT $ \env ->
gbracket (runReaderT pre env)
(\a -> runReaderT (post a) env)
(\a -> runReaderT (body a) env)
instance GBracket m => GBracket (StateT s m) where
gbracket pre post body = StateT $ \s ->
gbracket (runStateT pre s)
(\(a,s) -> runStateT (post a) s)
(\(a,s) -> runStateT (body a) s)
-- Alas, in my version of GHC ErrorT still has the
-- annoying Error e constraint!
-- The following code is written to compensate
newtype ErrT e m a = ErrT{runErrT :: m (Either e a)}
instance Monad m => Monad (ErrT e m) where
return = ErrT . return . Right
ErrT m >>= f = ErrT (m >>= \x -> case x of
Right x -> runErrT (f x)
Left e -> return (Left e))
instance Monad m => MonadError e (ErrT e m) where
throwError = ErrT . return . Left
catchError m h = reifyErr m >>= \x -> case x of
Left e -> h e
Right x -> return x
reifyErr :: Monad m => ErrT e m a -> ErrT e m (Either e a)
reifyErr (ErrT m) = ErrT (m >>= return . Right)
reflectErr :: Monad m => Either e a -> ErrT e m a
reflectErr = ErrT . return
instance Monad m => GBracket (ErrT e m) where
gbracket pre post body = do
a <- pre
r <- reifyErr (body a)
post a
reflectErr r
instance MonadTrans (ErrT e) where
lift m = ErrT $ liftM Right m
instance MonadIO m => MonadIO (ErrT e m) where
liftIO = lift . liftIO