-- A set of simple examples demonstrating Iteratees -- -- We implement progressively more complex examples -- by freely combining independently written, primitive iteratees. -- We stress assembling of the whole program from independent -- building blocks, replacing and reshuffling the blocks to -- suit the task. -- -- Each primitive block may be stateful; it encapsulates and manages -- its state without leaking it. The overall program state -- -- composed of the state of the participating building blocks -- -- is left implicit. We never have to worry about initializing -- the state at the beginning of the program; each iteratee -- will take care of the initialization, accumulation and -- finalization of its own state. -- -- The examples revolve around reading text data and -- counting lines and words, or searching for a specific -- word. The final example emulates "grep -w -n" -- -- To compile this code -- ghc --make -O2 -main-is IterDemo.tlc2' IterDemo.hs -- To run this code -- GHCRTS="-tstderr" /usr/bin/time ./IterDemo module IterDemo where import IterateeM import Control.Monad.Trans -- ------------------------------------------------------------------------ -- The first example: counting the number of lines in a file -- Lazy IO solution, for comparison count_lines fname = do str <- readFile fname print . length . lines $ str -- Iteratee solution -- The example illustrates the nested stream: en_lines is -- the enumeratee, consuming chunks of characters and -- generating the stream of lines count_lines_i fname = print =<< run =<< enum_file fname .| en_lines count_i -- wc /etc/motd -- 24 161 1112 /etc/motd -- wc /usr/share/dict/words -- 235923 235923 2493500 /usr/share/dict/words tl1 = count_lines "/etc/motd" -- 24 tl1' = count_lines "/usr/share/dict/words" -- 235923 tl2 = count_lines_i "/etc/motd" tl2' = count_lines_i "/usr/share/dict/words" -- 235923 -- Counting lines from several sources, effectively -- concatenating them. One source can be a file, the other -- -- a given string (or UDP datagrams, etc.) -- The example demonstrates that the Kleisli composition (>>>) of -- enumerators effectively concatenates their sources. count_lines_many fname = print =<< run =<< (enum_file fname >>> enum_pure_1chunk "\naaa\n") .| en_lines count_i -- 26 -- ------------------------------------------------------------------------ -- Counting lines and characters -- The lazy IO solution, for comparison. -- It does NOT run in constant memory count_lines_chars fname = do str <- readFile fname print $ (length (lines str), length str) -- The Iteratee example illustrates pairing of iteratees to -- run in parallel. The same count_i counts the outer stream -- (chunks of characters) and the inner stream (chunks of lines). count_lines_chars_i fname = print =<< run =<< enum_file fname ((id .| en_lines count_i) `en_pair` count_i) tlc1 = count_lines_chars "/etc/motd" -- (24,1112) tlc1' = count_lines_chars "/usr/share/dict/words" -- (235923,2493500) -- Needs 43MB, thus loading the whole file into memory tlc2 = count_lines_chars_i "/etc/motd" -- (24,1112) tlc2' = count_lines_chars_i "/usr/share/dict/words" -- (235923,2493500) -- counts in constant memory -- Counting of lines and words -- The example nests streams even deeper, transforming -- the stream of characters to the stream of lines and then to -- the stream of words. -- The enumeratee en_words (see the end of the file) -- consumes lines and produces words. -- The familiar count_i does all the counting. count_lines_words_i fname = print =<< run =<< enum_file fname .| en_lines ((id .| en_words count_i) `en_pair` count_i) tlw = count_lines_words_i "/etc/motd" -- ------------------------------------------------------------------------ -- Searching the stream -- Stopping the stream processing once the desired word is found. -- Stopping the line counter, which was running in parallel, -- gives the line number for the line where the word was found. -- This example illustrates the exception handling. -- The exception is raised by Iteratee.head when IterateeM.dropWhile -- below dropped the whole stream while looking for the given word. search_word word fname = let search = IterateeM.dropWhile (/= word) >> IterateeM.head handle = en_handle (\_ -> "not found:" ++ word) in print =<< run =<< enum_file fname .| en_lines ((id .| en_words (handle search)) `en_pair` count_i) ts1 = search_word "the" "/etc/motd" -- (Right "the",3) -- That is, the word `the' has occurred on line 3 ts2 = search_word "NO" "/etc/motd" ts3 = search_word "Linux" "/etc/motd" ts4 = search_word "nonexistent" "/etc/motd" -- (Left "not found:nonexistent",8) -- The word was not found; 8 lines were read -- We modify search_word to print not only the number for the line -- with the found word, but also the line itself. -- We thus implement "grep -w -n -m 1 " -- The only difference from search_word is in the last line, the -- additional pairing with en_last. grep1_word word fname = let search = IterateeM.dropWhile (/= word) >> IterateeM.head handle = en_handle (\_ -> "not found:" ++ word) in print =<< run =<< enum_file fname .| en_lines ((id .| en_words (handle search)) `en_pair` (count_i `en_pair` en_last)) tg1 = grep1_word "the" "/etc/motd" {- (Right "the", (3,Just "The programs included with the Debian GNU/Linux system are free software;")) grep -w -n -m 1 the /etc/motd prints the same information -} tg2 = grep1_word "the" "/dev/null" -- (Left "not found:the",(0,Nothing)) tg3 = grep1_word "XXX" "/etc/motd" -- (Left "not found:XXX",(8,Just "permitted by applicable law.")) -- Printing not only the line with the found word, but also -- 4 preceding lines. -- We thus implement "grep -w -n -m 1 -B 4 " -- The only modification is the replacement of en_last with -- en_lastN 5. -- The state needed for the context accumulation is encapsulated -- in en_lastN, therefore, no other changes are needed. grep5_word word fname = let search = IterateeM.dropWhile (/= word) >> IterateeM.head handle = en_handle (\_ -> "not found:" ++ word) in print =<< run =<< enum_file fname .| en_lines ((id .| en_words (handle search)) `en_pair` (count_i `en_pair` (en_lastN 5))) tg5 = grep5_word "the" "/etc/motd" {- (Right "the",(5, ["FreeBSD 8.2-RELEASE (Adric) #0: Sat Apr 9 03:56:26 PDT 2011", "", "Welcome to FreeBSD!", "", "Before seeking technical support, please use the following resources:"])) grep -w -n -m 1 -B 4 the /etc/motd prints the same information -} -- ------------------------------------------------------------------------ -- Repeated stream searching: emulating grep -w -- Emulating "grep -w -n " -- We print all lines that contain the given word, with the -- corresponding line number -- The example demonstrates generators: converting an -- iterative process to an enumerator. Since iterations consume -- stream data, we get in effect an enumeratee (see unfold_stream below) -- -- The example shows the duplication of an iteratee, seducing it -- to reveal the intermediate result. We feed an iteratee EOF, -- (getting it to give the result so far), _and_ we feed -- the iteratee more stream data, continuing with the accumulated state. -- -- Instead of stream2list below (which accumulates stream data in a list) -- we could have used an iteratee that just prints stream data as they -- are received. grep_word word fname = print =<< run =<< enum_file fname .| en_lines .| unfold_stream (unfolder acc) stream2list where search = IterateeM.dropWhile (/= word) >> IterateeM.head guard = id .| en_words (en_handle (\_ -> "not found:" ++ word) search) acc = count_i `en_pair` en_last -- Building the unfolder: -- acc is the accumulating iteratee; -- guard is the guard iteratee, returning (Left _) to stop -- and (Right _) to continue. -- We duplicate the unfinished iteratee acc: we feed it EOF -- to obtain the result so far, and we use it again with more stream data. unfolder state = Unfolder (enum2' guard state >>= check) check r@(_,state') = runI2 r >>= \x -> case x of (Right _,v) -> return $ Just (v, unfolder state') (Left _,_) -> return Nothing th1 = grep_word "the" "/etc/motd" {- [(3,Just "The programs included with the Debian GNU/Linux system are free software;"), (4,Just "the exact distribution terms for each program are described in the"), (7,Just "Debian GNU/Linux comes with ABSOLUTELY NO WARRANTY, to the extent")] grep -w -n the /etc/motd prints the same information -} th2 = grep_word "the" "/dev/null" -- [] th3 = grep_word "XXX" "/etc/motd" -- [] -- ------------------------------------------------------------------------ -- The following functions are general and should be in an iteratee -- library (and in some libraries, they are) -- Count the stream -- This iteratee does not do any IO (betrayed by its type, polymorphic -- over the monad m). The counting iteratee is also polymorphic over stream -- elements: it can count any streams. count_i :: Monad m => Iteratee el m Int count_i = ie_cont $ step 0 where step acc (Chunk []) = ie_contM (step acc) step acc (Chunk [_]) = ie_contM (step $! succ acc) step acc (Chunk ls) = ie_contM (step $! acc + length ls) step acc stream = ie_doneM acc stream -- Return the last received element -- The analogue of List.last en_last :: Monad m => Iteratee el m (Maybe el) en_last = ie_cont $ step Nothing where step prev (Chunk []) = ie_contM (step prev) step prev (Chunk ls) = ie_contM (step . Just $! last ls) step prev stream = ie_doneM prev stream -- Return N last received elements -- The acc below is the list of N last elements in the reverse -- order en_lastN :: Monad m => Int -> Iteratee el m [el] en_lastN n = ie_cont $ step [] where step acc (Chunk []) = ie_contM (step acc) step acc (Chunk ls) = ie_contM (step $! Prelude.take n $ reverse ls ++ acc) step acc stream = ie_doneM (reverse acc) stream -- For testing tlastN n = print =<< run =<< enum_file "/etc/motd" (en_lastN n) -- Convert the stream of characters into the stream of lines -- The iteratee library has an enumeratee enum_lines, which -- however stops at the empty line. -- We define an enumeratee here that keeps on reading -- till EOF en_lines :: Monad m => Enumeratee Char Line m a en_lines (IE_cont Nothing k) = line >>= check_line where check_line (Right l) = lift (feedI k (Chunk [l])) >>= en_lines check_line (Left l) = lift (feedI k (if null l then Chunk [] else Chunk [l])) en_lines i = return i -- Parallel composition of Iteratees -- The following enumeratee enum2' applies two iteratees to the same stream, -- in effect, splitting the stream in two. -- There is no buffering however: enum2' receives a chunk and passes -- it to both iteratees, before asking for the next chunk. -- The enumeratee enum2' finishes either when the stream is over or -- when at least one of the given iteratees finish. -- The IterateeM.hs library has the similar combinator enum2, -- which continues requesting stream data so long as at least -- one of the iteratees wants them. enum2' :: Monad m => Iteratee el m a -> Iteratee el m b -> Iteratee el m (Iteratee el m a, Iteratee el m b) enum2' (IE_cont Nothing k1) (IE_cont Nothing k2) = ie_cont (step k1 k2) where step k1 k2 (Chunk []) = ie_contM (step k1 k2) step k1 k2 str@Chunk{} = feedI k1 str >>= \i1 -> feedI k2 str >>= \i2 -> ie_ret $ enum2' i1 i2 step k1 k2 str = ie_doneM (ie_cont k1, ie_cont k2) str -- at least one iteratee is finished and doesn't want any data enum2' i1 i2 = return (i1,i2) -- A convenient combinator for the frequently occurring pattern en_pair :: Monad m => Iteratee el m a -> Iteratee el m b -> Iteratee el m (a,b) en_pair i1 i2 = enum2' i1 i2 >>= runI2 -- Lines-to-words enumeratee -- (The IterateeM.hs has the similar enum_words, which parses -- words directly from the character stream) en_words :: Monad m => Enumeratee Line String m a en_words i@(IE_cont Nothing k) = ie_cont (step k) where step k (Chunk []) = ie_contM (step k) step k (Chunk ls) = feedI k (Chunk (words . concat $ ls)) >>= \i -> ie_ret $ en_words i step k1 str = ie_doneM (ie_cont k) str en_words i = return i -- The analogue of the while-loop: -- read the stream until the guard tells us to stop. -- This is the analogue of List.unfoldr. The result is not a list -- however; the produced elements are fed to the given iteratee. -- This enumeratee generalizes IterateeM.sequence_stream -- co-inductive, as expected newtype Unfolder m a = Unfolder (m (Maybe (a, Unfolder m a))) unfold_stream :: (Monad m, MonadTrans t, Monad (t m)) => Unfolder (t m) el -> -- producer Iteratee el m w -> -- consumer t m (Iteratee el m w) unfold_stream (Unfolder p) i@(IE_cont Nothing k) = p >>= check where -- check (Just (v,p)) = lift (feedI k (Chunk [v])) >>= unfold_stream p check (Just (v,p)) = yield_to k v >>= unfold_stream p check Nothing = return i -- we define yield_to to make the analogy with generators clearer yield_to k v = lift (feedI k (Chunk [v])) unfold_stream _ i = return i