{-# LANGUAGE MultiParamTypeClasses, FlexibleInstances, FlexibleContexts #-} -- Compositionality and reuse -- of lazy evaluation and of simple generators -- The main example from -- Lennart Augustsson: More points for lazy evaluation, May 2011. -- http://augustss.blogspot.com/2011/05/more-points-for-lazy-evaluation-in.html module YAny where import Control.Monad.Error import Control.Monad.Reader import Control.Monad.State import Control.Exception import System.IO import GenT type Producer m e = GenT e m () type Consumer m e = e -> m () type Transducer m1 m2 e1 e2 = Producer m1 e1 -> Producer m2 e2 -- The function 'any' can be defined by composition of two already written -- functions 'or' and 'map' anyl :: (a -> Bool) -> [a] -> Bool anyl f = or . map f -- To demonstrate that it really stops as soon as the element satisfying -- the predicate is found, we use it with an infinite list t1 = any (>10) [1..] -- True -- In a strict language, -- any f xs === or (map f xs) -- and so 'or' does not get to work before (map f xs) finishes. -- Which, if it is applied to an infinite list, never happens. -- We can grow the chain farther. -- For example, the input list of numbers could be the result of -- parsing a column of text -- Splitting into lines, parsing, comparing -- all happens -- incrementally and on demand. t2 = any (>10) . map read . lines $ "2\n3\n5\n7\n11\n13\nINVALID" -- True -- Even though INVALID is unparseable as a number. -- Therefore, evaluation really stopped before the entire list is consumed. -- But what if we want to read the column of text from a file? -- Or a communication pipe? t2_bad = (any (>10) . map read . lines) `fmap` read_string test_file where read_string :: FilePath -> IO String read_string = undefined -- where read_string is a function to read file data. But how much to read? -- Mainly, the parsing and `any' evaluation happens only after -- read_string action finished and returns the value, that -- is, some content from the file. But it may have read too much -- or too little -- we can't know until we parse data and check -- the predicate. So, we lost incrementality and termination right -- after the satisfying element is found. test_file = "/tmp/testf.txt" make_test = do h <- openFile test_file WriteMode hPutStr h $ "2\n3\n5\n7\n11\n13\nINVALID" hClose h putStrLn "Made test" -- 1. Use lazy IO. It has many problems, including with resource usage t2_lazy = do h <- openFile test_file ReadMode str <- hGetContents h let result = any (>10) . map read . lines $ str return result -- True -- However, when the handle h is closed? When GC gets around to -- it: leak of a scarce resource (file handle). -- Mainly, Lazy IO has big problems if the input comes from a pipe. -- It is not specified how much hGetContents really reads: -- It reads by buffer-full and it may read-ahead by arbitrary amounts, -- which for communication channels can (and does) lead to a deadlock. -- For the channels, we need to know exactly how much we read. -- 2. Read strictly t2_strict = bracket (openFile test_file ReadMode) hClose (loop (>10)) where loop f h = do r <- liftM f $ liftM read $ hGetLine h if r then return True else loop f h -- True -- Here we lost compositionality and have to program the iteration -- explicitly, essentially inlining 'or'. See Augustsson's post -- for more discussion why we have to do this. -- On the plus side, the handle is definitely closed, -- immediately right after the result is obtained. -- 3. Roll out the explicit stream: monadic list data ListV m a = Nil | Cons a (List m a) type List m a = m (ListV m a) mapL :: Monad m => (a -> b) -> List m a -> List m b mapL f l = check `liftM` l where check Nil = Nil check (Cons x l) = Cons (f x) (mapL f l) orL :: Monad m => List m Bool -> m Bool orL l = l >>= check where check Nil = return False check (Cons True _) = return True check (Cons _ l) = orL l linesL :: FilePath -> List IO String linesL fname = openFile fname ReadMode >>= loop where loop h = do b <- hIsEOF h if b then hClose h >> return Nil else do l <- hGetLine h return (Cons l (loop h)) -- Here, the handle is closed only when we see EOF anyL :: Monad m => (a -> Bool) -> List m a -> m Bool anyL f = orL . mapL f -- Almost like regular any t2_stream = anyL (>10) . mapL read $ linesL test_file -- True -- This is great -- the code looks just as the same as in the -- lazy version. And we never read the INVALID data. -- But since we never finish reading the file, and so never see EOF, -- who closes the handle? -- Only GC -- which may occur really late. So, we leak a -- scarce resource, just as we did with Lazy IO! -- 4. Use simple generators -- A sample producer. It only produces file data, without caring -- who will consume them and how. fileG :: (GBracket m, MonadIO m) => FilePath -> Producer m Char fileG fname = gbracket (liftIO $ openFile fname ReadMode) (liftIO . hClose) loop where loop h = do b <- liftIO $ hIsEOF h if b then return () else liftIO (hGetChar h) >>= yield >> loop h -- Now the handle is closed not only on EOF but also on any exception! -- One consumer is putChar, whose type is indeed Consumer IO Char. -- Here is the simplest hook-up of fileG and putChar, copying -- the file to the standard output. catG :: FilePath -> IO () catG fname = fileG fname `runGenT` putChar -- catG test_file -- -- The function |or| is a more interesting consumer orG :: Monad m => Producer (ErrT Bool m) Bool -> m Bool orG gen = either id (const False) `liftM` runErrT (runGenT gen orC) where orC :: MonadError Bool m => Consumer m Bool orC True = throwError True orC False = return () anyG :: Monad m => (a -> Bool) -> Producer (GenT Bool (ErrT Bool m)) a -> m Bool anyG f = orG . mapG f -- The code is quite like the lazy case: functional composition linesG :: Monad m => Transducer (StateT String (GenT String m)) m Char String linesG gen = foldG tr [] gen >>= at_end where tr s '\n' = yield (reverse s) >> return [] tr s c = return (c:s) at_end [] = return () at_end s = yield (reverse s) check_linesG :: IO () check_linesG = linesG (fileG test_file) `runGenT` putStrLn {- 2 3 5 7 11 13 INVALID -} -- The last line in the file was not terminated by \n t2_gen :: IO Bool t2_gen = anyG (>10) . mapG read . linesG $ fileG test_file -- True -- and now we know that the handle will be closed -- ------------------------------------------------------------------------ -- Utilities class Monad m => GBracket m where gbracket :: m a -> (a -> m b) -> (a -> m c) -> m c instance GBracket IO where gbracket = bracket instance GBracket m => GBracket (ReaderT e m) where gbracket pre post body = ReaderT $ \env -> gbracket (runReaderT pre env) (\a -> runReaderT (post a) env) (\a -> runReaderT (body a) env) instance GBracket m => GBracket (StateT s m) where gbracket pre post body = StateT $ \s -> gbracket (runStateT pre s) (\(a,s) -> runStateT (post a) s) (\(a,s) -> runStateT (body a) s) -- Alas, in my version of GHC ErrorT still has the -- annoying Error e constraint! -- The following code is written to compensate newtype ErrT e m a = ErrT{runErrT :: m (Either e a)} instance Monad m => Monad (ErrT e m) where return = ErrT . return . Right ErrT m >>= f = ErrT (m >>= \x -> case x of Right x -> runErrT (f x) Left e -> return (Left e)) instance Monad m => MonadError e (ErrT e m) where throwError = ErrT . return . Left catchError m h = reifyErr m >>= \x -> case x of Left e -> h e Right x -> return x reifyErr :: Monad m => ErrT e m a -> ErrT e m (Either e a) reifyErr (ErrT m) = ErrT (m >>= return . Right) reflectErr :: Monad m => Either e a -> ErrT e m a reflectErr = ErrT . return instance Monad m => GBracket (ErrT e m) where gbracket pre post body = do a <- pre r <- reifyErr (body a) post a reflectErr r instance MonadTrans (ErrT e) where lift m = ErrT $ liftM Right m instance MonadIO m => MonadIO (ErrT e m) where liftIO = lift . liftIO