-- Another sequence of simple examples demonstrating Iteratees -- in comparison with Lazy and Handle IO -- The code from IterDemo1, a bit simplified for the talk -- We stress it uses the full-blown library IterateeM, putting a -- thin veneer on the top of it. -- later: make the class RunPromote, use Rules to specialize run -- to runI module IterDem where import System.IO import System.IO.Error (isEOFError) import Data.Char (isSpace) import Data.List (isPrefixOf) import Control.Exception (try, throw, IOException, fromException, bracket) import Control.Monad.Trans (lift) import IterateeM as IM import ComposeAdv (R, L(..), (:>=:)(..)) import Data.Monoid -- ------------------------------------------------------------------------ -- We start with a trivial example: counting white space -- characters in a file. -- Lazy IO example -- Standard pattern-matching on a string -- (see for a better code below) countWS_lazy :: String -> Int countWS_lazy "" = 0 countWS_lazy (c:str) | isSpace c = 1 + countWS_lazy str countWS_lazy (_:str) = countWS_lazy str -- The complete example, counting white-space characters in a file run_countWSL fname = readFile fname >>= print . countWS_lazy twsl = run_countWSL "/etc/motd" -- 190 -- Handle-based IO -- We now differentiate EOF from other IO errors: -- we could not do that with the Lazy IO. -- The code is much more explicit -- with error handling and -- ensuring that the handle is always closed -- -- but is still quite simple. countWS_handle :: Handle -> IO Int countWS_handle h = loop 0 where loop n = try (hGetChar h) >>= check n check n (Right c) = loop (if isSpace c then n+1 else n) check n (Left e) | Just ioe <- fromException e, isEOFError ioe = return n check _ (Left e) = throw e run_countWSH fname = print =<< bracket (openFile fname ReadMode) hClose countWS_handle twsh = run_countWSH "/etc/motd" -- The Iteratee IO -- We use pattern-matching on the stream, like in the Lazy IO case. -- The stream is implicit here. The code is tail-recursive. -- Again, this is not the best code we can write: see below for -- a better version. -- We should make n+1 strict -- as usual in Haskell. countWS_iter :: Monad m => Iteratee Char m Int countWS_iter = loop 0 where loop n = getchar >>= check n check n (Just c) = loop (if isSpace c then n+1 else n) check n Nothing = return n -- The code is close to that for Lazy IO and Handle IO. -- We did not have to deal with errors: getchar will -- take care for that. IO Errors other than EOF will be reported -- explicitly (like with the Handle IO) run_countWSI fname = print =<< fileL fname >$< countWS_iter -- No bracketing is needed: there is no handle to leak twsi = run_countWSI "/etc/motd" -- ------------------------------------------------------------------------ -- More elegant solution to the white-space counting problem -- Lazy IO permits a far more elegant solution -- It is a one-liner, using the standard Prelude functions on lists countWS'_lazy :: String -> Int countWS'_lazy = length . filter isSpace run_countWSL' fname = readFile fname >>= print . countWS'_lazy twsl' = run_countWSL' "/etc/motd" -- 54 -- Iteratee IO affords the same elegance -- We use nested streams: filterL is what it sounds like, the analogue -- of List.filter countWS'_iter :: Monad m => Iteratee Char m Int countWS'_iter = filterL isSpace >$< count_i run_countWSI' fname = print =<< fileL fname >$< countWS'_iter twsi' = run_countWSI' "/etc/motd" -- Two different ways of writing it: -- More symmetric than LazyIO run_countWSI2 fname = print =<< fileL fname >$< (filterL isSpace >$< count_i) twsi2 = run_countWSI2 "/etc/motd" -- >.< relates to >$< as (.) relates to ($) -- And implemented similarly... -- Show the types run_countWSI3 fname = print =<< (fileL fname >.< filterL isSpace) >$< count_i twsi3 = run_countWSI3 "/etc/motd" -- A producer that gives all spaces from a file fileSpaceL :: FilePath -> L Char IO IO fileSpaceL fname = fileL fname >.< filterL isSpace run_countWSI4 fname = print =<< fileSpaceL fname >$< count_i twsi4 = run_countWSI4 "/etc/motd" -- ------------------------------------------------------------------------ -- A more elaborate problem: counting the occurrences of the word ``the'' -- (assuming the input is text with words of bounded size) -- We must count ``the'' as the word by itself, not being a part of -- another word. That is, in order for ``the'' to be counted, the -- character before and after (if exist) must be whitespace. -- Lazy IO -- We use the standard library function `words' to parse the input string -- into a list of words. We filter out the words other than ``the'' -- and count the remainder. countTHE_lazy :: String -> Int countTHE_lazy = length . filter (=="the") . words run_countTHEL fname = readFile fname >>= print . countTHE_lazy tthel = run_countTHEL "/etc/motd" -- The IterateeIO does the same stream processing as Lazy IO, -- converting one stream to another (characters to words, words to -- filtered words) countTHE_iter :: Monad m => Iteratee Char m Int countTHE_iter = wordsL >$< filterL (=="the") >$< count_i run_countTHEI fname = print =<< fileL fname >$< countTHE_iter tthei = run_countTHEI "/etc/motd" -- ------------------------------------------------------------------------ -- counting the occurrences of the word ``the'' in the sequence of files -- assuming the files are concatenated together. -- Therefore, we will count the word `the' if the letter `t' is in one -- file but `he' is in the next one. -- Lazy IO: we re-use the previously written counting function countTHE_lazy -- We will pass it a string that is the concatenation of files' contents. run_manyTHEL fnames = mapM readFile fnames >>= print . countTHE_lazy . concat tmanyl = run_manyTHEL ["/etc/motd","/etc/motd"] -- The code is elegant. -- The files are read incrementally; the second file will be read only after -- the first one finished. Alas, we have to open all of the files first! -- (the readFile action is performed first, which opens the file and prepares -- it for lazy reading). -- Therefore, we need as many descriptors as there are files in the fnames -- list. If the list is obtained from scanning a directory tree, -- we may run out of file descriptors! That is particularly disturbing given -- that we only need one file descriptor, opening and closing it as we go. -- We get the first intimation how Lazy IO mis-manages resources -- sometimes -- taking much more than needed, and giving the programmer no facilities -- to control the resources. -- The Handle IO solution -- The first approximation -- run_manyTHEH fnames = mapM counter fnames >>= print . sum -- where counter fname = -- bracket (openFile fname ReadMode) hClose countTHE_handle -- We only need one file descriptor for the whole operation: the next -- file is opened only after the previous file is closed. -- Alas, this solution is deficient since EOF is counted as a word -- terminator. We can't handle the case of the word `the' split across the -- files. We have to re-write our state machine to perform an action -- upon the detection of EOF to re-associate the handle with another file. -- This re-writing is left as an exercises to the reader, to drive down -- the point of how low-level Handle IO is. -- tmanyh = run_manyTHEH ["/etc/motd","/etc/motd"] -- Iteratee IO -- Mappending producers concatenates their sources run_manyTHEI fnames = print =<< foldr1 mappend (map fileL fnames) >$< countTHE_iter -- The iteratee code again looks pretty much like Lazy IO code. However, -- only one file descriptor is used for all processing. -- fileL prints the debugging message when the file is opened -- and closed. We clearly see that, unlike Lazy IO, only one file is -- open at any given time. tmanyi = run_manyTHEI ["/etc/motd","/etc/motd"] -- ------------------------------------------------------------------------ -- Counting the occurrences of the word ``the'' and the white space, together -- Lazy IO -- We just combine the previously written counters. -- It looks great! run_countPairL fname = do str <- readFile fname print (countWS'_lazy str, countTHE_lazy str) twsthel = run_countPairL "/etc/motd" -- (190,8) -- Alas, it doesn't work great! Now, the whole file is loaded in memory. -- The processing is no longer incremental. We have come across one -- of many surprises of Lazy IO. -- The Iteratee code -- Like Lazy IO, we re-use the previously written counters, pairing them. -- Unlike Lazy IO, the processing remains incremental. As we read a -- block from file, we send the block to two iteratees for handling. run_countPairI fname = print =<< fileL fname >$< (countWS'_iter `en_pair` countTHE_iter) twsthei = run_countPairI "/etc/motd" -- (190,8) -- ------------------------------------------------------------------------ -- Early termination: -- Counting the occurrences of the word ``the'' and the white space -- within the prefix of the stream of the size at most N. The example -- is abstracted from reading HTTP request content with the -- explicitly specified Content-Length. -- We should not attempt to read even a single byte after N since -- the web client expects the reply first, before it will send -- the next request. If we attempt to read ahead after N bytes, -- the deadlock ensues. run_ntermL n fname = do str0 <- readFile fname let str = Prelude.take n str0 print (countWS_lazy str, countTHE_lazy str) tnterml = run_ntermL 160 "/etc/motd" -- As before, run_countPairL, this code does not run in constant memory. -- There is another problem: since we may read only a part of the file, -- the file descriptor will not be closed (until a finalizer is run, which -- may happen very late). There is a real danger of running out of -- file descriptors (which regularly happens in practice with Lazy IO). -- There is the third problem: the run-time system may speculatively -- read-ahead, at any time and for any reason. The programmer has -- no way whatsoever to control this read-ahead or even be informed about it. -- Therefore, deadlock may happen (and does routinely happen in practice, when -- using lazy IO for interactive services). -- Lazy IO was designed to give the impression that IO is not even happening. -- Alas, when dealing with request-response servers and multiple IO -- operations, even reading is an observable effect. -- Iteratee IO run_ntermI n fname = print =<< fileL fname >$< takeL n >$< (countWS_iter `en_pair` countTHE_iter) tntermi = run_ntermI 160 "/etc/motd" -- The enumeratee takeL will ensure that no more than n characters -- are read; afterwards, it will not speculatively ask for any further -- file data. -- Early termination: -- Counting the occurrences of the word ``the'' and the white space -- up to the occurrence of the terminating string ``the end'' -- (``the'' in the terminating string should be counted). -- Exercise: don't count ``the'' occurring in the terminator. -- The example is abstracted from reading HTTP multi-part messages, -- which are terminated by a so-called boundary string. -- See IterDemo1.hs -- ------------------------------------------------------------------------ -- The following functions are general and should be in an iteratee -- library (and in some libraries, they are) -- Count the stream -- This iteratee does not do any IO (betrayed by its type, polymorphic -- over the monad m). The counting iteratee is also polymorphic over stream -- elements: it can count any streams. count_i :: Monad m => Iteratee el m Int count_i = ie_cont $ step 0 where step acc (Chunk []) = ie_contM (step acc) step acc (Chunk [_]) = ie_contM (step $! succ acc) step acc (Chunk ls) = ie_contM (step $! acc + length ls) step acc stream = ie_doneM acc stream -- Like a combination of head and peek -- (Some Iteratee libraries may provide this function) -- It is also easy to write. getchar :: Monad m => Iteratee el m (Maybe el) getchar = ie_cont step where step (Chunk []) = ie_contM step step (Chunk (c:rest)) = ie_doneM (Just c) (Chunk rest) step s@(EOF Nothing) = ie_doneM Nothing s step s@(EOF (Just e)) = return (throwErr e, s) -- Plugging a consumer into a producer: almost like the -- regular functional application -- We can almost read L e mi mo as (e mi -> mo) infixr 1 >$< {-# INLINE (>$<) #-} (>$<) :: (mo :>=: mi) => L e mi mo -> R e mi a -> mo a L f >$< x = f x >>= promote . run -- The following operator >.< relates to >$< as composition relates -- to a functional application infixr 2 >.< -- The code says, in particular, that composing an enumerator with -- an enumeratee gives an enumerator, and composing two enumeratees -- gives an enumeratee. -- The code also gives all the intermediate cases. (>.<) :: (mo :>=: m) => L e m mo -> L e' m (R e m) -> L e' m mo e12 >.< e23 = L (\ie' -> e12 >$< unL e23 ie') -- A few standard enumerators and enumeratees in the L notation takeL :: Monad m => Int -> L e m (R e m) takeL n = L (IM.take n) wordsL :: Monad m => L String m (R Char m) wordsL = L IM.enum_words fileL :: FilePath -> L Char IO IO fileL fname = L (IM.enum_file fname) filterL :: Monad m => (el -> Bool) -> L el m (R el m) filterL test = L go where go (IE_cont Nothing k) = ie_cont (step k) go i = return i step k (Chunk l) = case filter test l of [] -> ie_contM (step k) l -> feedI k (Chunk l) >>= \i -> return (go i, empty_stream) step k s = ie_doneM (ie_cont k) s -- Parallel composition of Iteratees -- The following enumeratee enum2' applies two iteratees to the same stream, -- in effect, splitting the stream in two. -- There is no buffering however: enum2' receives a chunk and passes -- it to both iteratees, before asking for the next chunk. -- The enumeratee enum2' finishes either when the stream is over or -- when at least one of the given iteratees finish. -- The IterateeM.hs library has the similar combinator enum2, -- which continues requesting stream data so long as at least -- one of the iteratees wants them. enum2' :: Monad m => Iteratee el m a -> Iteratee el m b -> Iteratee el m (Iteratee el m a, Iteratee el m b) enum2' (IE_cont Nothing k1) (IE_cont Nothing k2) = ie_cont step where step (Chunk []) = ie_contM step step str@Chunk{} = feedI k1 str >>= \i1 -> feedI k2 str >>= \i2 -> ie_ret $ enum2' i1 i2 step str = ie_doneM (ie_cont k1, ie_cont k2) str -- at least one iteratee is finished and doesn't want any data enum2' i1 i2 = return (i1,i2) -- A convenient combinator for the frequently occurring pattern en_pair :: Monad m => Iteratee el m a -> Iteratee el m b -> Iteratee el m (a,b) en_pair i1 i2 = enum2' i1 i2 >>= runI2 -- ------------------------------------------------------------------------ -- Formulation of the equational laws -- A producer whose source is a given finite string strL :: Monad m => [e] -> L e m m strL str = L (enum_pure_1chunk str) -- Read a stream to the end and return all of its elements as a list -- This primitive iteratee is quite useful when writing test cases. strR :: Monad m => R e m [e] strR = stream2list infixl 0 === -- `Propositional equality' -- It should really be proven than `computed'. Therefore, -- the computation below is trivial (===) :: a -> a -> a (===) = const -- Equational laws -- The law of composition pr_comp s1 s2 = strL (s1 ++ s2) === strL s1 `mappend` strL s2 t_report e1 e2 = do r1 <- e1 r2 <- e2 print (r1, r2, r1 == r2) -- The spot-check tcomp = t_report (strL ("123" ++ "456") >$< strR) ((strL "123" `mappend` strL "456") >$< strR) -- The law of chaining -- (provided i properly recognizes s1) pr_chain s1 s2 i f = (strL (s1 ++ s2) >$< (i >>= f)) === ((strL s1 >$< i) >>= \x -> strL s2 >$< f x) tchain = t_report (strL (s1 ++ s2) >$< (i >>= f)) ((strL s1 >$< i) >>= \x -> strL s2 >$< f x) where s1 = "1" s2 = "23" i = getchar f x = strR >>= \y -> return (x,y) -- Failure is the zero of bind failure :: Monad m => R e m a failure = throwErrStr "failure" pr_zero f = failure >>= f === failure {- i >>= \x -> (k1 x >= k1) >= k2) -}