-- Deriving Effectful Iteratees module IterDerivM where import Control.Monad type LChar = Maybe Char -- lifted character -- We re-do the derivation in IterDeriv.hs, this time allowing -- iteratees arbitrary effects in some monad m. -- A process like IterDeriv.I, which can be either finished or -- inputting a character is modeled as a final co-algebra -- (cf. Hughes and Hinze) as follows. -- The process may now do arbitrary side-effects in some monad m. data I m a = Done a | GetC (LChar -> m (I m a)) -- A sample process: reading a line until '\n' -- The process does no effects. getline :: Monad m => I m String getline = loop "" where loop acc = GetC (check acc) check acc (Just c) | c /= '\n' = return (loop (c:acc)) check acc _ = return (Done (reverse acc)) -- Here is one effect: outputting a string class Monad m => PutS m where putS :: String -> m () instance PutS IO where putS = putStrLn -- This getline process writes the trace of all received -- characters getlineT :: (PutS m, Monad m) => I m String getlineT = loop "" where loop acc = GetC (trace acc) trace acc c = putS ("got " ++ show c) >> check acc c check acc (Just c) | c /= '\n' = return (loop (c:acc)) check acc _ = return (Done (reverse acc)) -- We combine I m a processes using monadic bind: -- I m a is a monad -- a free monad, actually. instance Monad m => Monad (I m) where return = Done Done x >>= f = f x GetC k >>= f = GetC (k >>> (return . (>>= f))) -- Kleisli composition -- `Lifting' function composition to monadic functions -- The operator (>>>) here is a special -- cases of that in Control.Category (>>>):: Monad m => (a -> m b) -> (b -> m c) -> (a -> m c) f1 >>> f2 = \x -> f1 x >>= f2 -- In fact, m (I m a) is also a monad. We can use the following -- bind to chain two (m (I m a)) processes -- Alas, we can't define "instance Monad (IM m)" -- because partially-applied type synonyms are (not yet) allowed -- in instance heads. type IM m a = m (I m a) infixl 1 `bind` bind :: Monad m => IM m a -> (a -> IM m b) -> IM m b bind m f = m >>= check where check (Done x) = f x check (GetC k) = return (GetC (\c -> bind (k c) f)) -- A sample composition of getlineT processes, -- reading several lines, until the empty line -- (and producing the input trace, as a side effect) getlinesT :: (PutS m, Monad m) => I m [String] getlinesT = loop [] where loop acc = getlineT >>= check acc check acc "" = return (reverse acc) check acc l = loop (l:acc) -- The interpreters of the I m a processes -- The enumerator, feeding a process a string en_str :: Monad m => String -> I m a -> m (I m a) en_str "" i = return i en_str (c:t) (GetC k) = en_str t =<< k (Just c) en_str _ (Done x) = return (Done x) -- The way to run I run :: Monad m => I m a -> m a run (Done x) = return x run (GetC k) = run =<< k Nothing -- A more practically useful function -- If the process does not want to finish upon receiving the EOF -- we should report an error rather than diverge. -- Denotationally, run' is the same as run; practically run' -- is quite more helpful. run' :: Monad m => I m a -> m a run' (GetC k) = k Nothing >>= \r -> case r of Done x -> return x _ -> fail "divergent iteratee" run' (Done x) = return x -- tests t110 = print =<< run =<< en_str "abd\nxxx\nf" getlineT t111 = print =<< run =<< en_str "abd\nxxx\nf" getlinesT -- ["abd","xxx","f"] -- ------------------------------------------------------------------------ -- Proving the equational laws -- The law of composition -- en_str (s1 ++ s2) === en_str s1 >>> en_str s2 -- Here, s1 must be a finite string; s2 is arbitrary -- If the enumeratee applied to (Done x) iteratee, the enumerators -- on both sides obviously return it. We assume then that enumerators apply to -- the (GetC k) iteratee. -- By induction on s1 -- Base case: s1 === "" -- then en_str s1 === en_str "" === return, which is the unit -- of (>>>) -- Inductive case: let s1 === c:s1' proof_composition c s1' s2 k = en_str ((c:s1') ++ s2) (GetC k) === -- property of list append en_str (c:(s1' ++ s2)) (GetC k) === -- second clause of en_str en_str (s1'++s2) =<< k (Just c) === -- inductive hypothesis (en_str s1' >>> en_str s2) =<< k (Just c) === -- definition of (>>>) k (Just c) >>= (\x -> en_str s1' x >>= en_str s2) === -- associativity of bind (k (Just c) >>= en_str s1') >>= en_str s2 === -- second clause of en_str en_str (c:s1') (GetC k) >>= en_str s2 === -- definition of (>>>) (en_str (c:s1') >>> en_str s2) (GetC k) -- Our equational proof was written in Haskell! -- It was not mechanically checked. But at least it is well-typed. -- GHC (at present) cannot verify that -- each step in the proof is valid; we need a real theorem prover for -- that. However GHC does verify that the premise is well-typed -- and each step in the proof is type preserving. -- GHC thus already catches many silly errors during equational -- re-writing: mis-substitutions tend to break type preservation. infixl 0 === -- `Propositional equality' -- It should really be proven than `computed'. Therefore, -- the computation below is trivial (===) :: a -> a -> a (===) = const -- Proof of the law of effectful chaining: -- 1. If the iteratee i properly recognizes s1, then -- en_str (s1++s2) (i >>= f) === en_str s1 i `bind` en_str s2 . f -- 2. If the iteratee i does not recognize s, then -- en_str s (i >>= f) === en_str s i >>= (return . (>>= f)) -- Part 1: The proof is by induction on s1, which must be finite -- by the definition of proper recognition. -- If s1 === "", then i, which properly recognizes the empty string, -- must be Done x. It is easy to see that return (Done x) is the -- left unit of `bind`: proof_left_unit_bind x f = return (return x) `bind` f === return (Done x) `bind` f === -- definition of bind f x -- Suppose s1 === c:s1' -- Then i must have the form (GetC k) where (k (Just c)) is an -- action that produces the iteratee properly recognizing s1' proof_chaining c s1' s2 k f = en_str ((c:s1') ++ s2) (GetC k >>= f) === -- property of list append en_str (c:(s1' ++ s2)) (GetC k >>= f) === -- definition of bind of (I m) en_str (c:(s1' ++ s2)) (GetC (k >>> (return . (>>= f)))) === -- second clause of en_str definition en_str (s1'++s2) =<< (k >>> (return . (>>= f))) (Just c) === -- definition of (>>>) en_str (s1'++s2) =<< (k (Just c) >>= (return . (>>= f))) === -- rearrangements (k (Just c) >>= (return . (>>= f))) >>= en_str (s1'++s2) === -- associativity of bind k (Just c) >>= (\i' -> return (i' >>= f) >>= en_str (s1'++s2)) === -- left unit law k (Just c) >>= (\i' -> en_str (s1'++s2) (i' >>= f)) === -- inductive hypothesis: i' does properly recognize s1' k (Just c) >>= (\i' -> en_str s1' i' `bind` en_str s2 . f) === -- a property of bind: m `bind` f === m >>= (\x -> return x `bind` f) k (Just c) >>= (\i' -> en_str s1' i' >>= (\x -> return x `bind` en_str s2 . f)) === -- associativity (k (Just c) >>= en_str s1') >>= (\x -> return x `bind` en_str s2 . f) === -- second clause of en_str definition en_str (c:s1') (GetC k) >>= (\x -> return x `bind` en_str s2 . f) === -- the same property of bind en_str (c:s1') (GetC k) `bind` en_str s2 . f -- Part 2: The proof is by induction on s, assuming s finite. -- In the base case, s === "", en_str s is return -- and return (i >>= f) === return i >>= (return . (>>= f)) -- using monad laws. -- Inductive case, s === c:s'. Since i does not recognize (c:s') -- i must be of the form GetC k. proof_chaining2 c s' k f = en_str (c:s') ((GetC k) >>= f) === -- definition of bind of (I m) en_str (c:s') (GetC (k >>> (return . (>>= f)))) === -- second clause of en_str definition en_str s' =<< (k >>> (return . (>>= f))) (Just c) === -- definition of (>>>) en_str s' =<< (k (Just c) >>= (return . (>>= f))) === -- rearrangements (k (Just c) >>= (return . (>>= f))) >>= en_str s' === -- associativity of bind k (Just c) >>= (\i' -> return (i' >>= f) >>= en_str s') === -- left unit law k (Just c) >>= (\i' -> en_str s' (i' >>= f)) === -- inductive hypothesis: i' does not recognize s' k (Just c) >>= (\i' -> en_str s' i' >>= (return . (>>= f))) === -- associativity of bind (k (Just c) >>= en_str s') >>= (return . (>>= f)) === -- second clause of en_str definition en_str (c:s') (GetC k) >>= (return . (>>= f)) -- ---------------------------------------------------------------------- -- Parsing combinator library, this time with effects -- primitive parsers -- An iteratee that does not recognize anything (never gets Done) failure :: Monad m => I m a failure = GetC (const (return failure)) -- The law Zero -- failure === failure >>= f -- The proof is a trivial bi-simulation. Since (I m a) is a -- final co-algebra, bi-similarity implies equality. -- Consider the relation on iteratees -- R = { (failure, failure >>= f) for all f} -- and its observations. failure is always of the form (GetC k); -- passing k any LChar gives, after a trivial effect, failure again. -- If failure = GetC k, then -- failure >>= f = GetC (\c -> k c >>= (return . (>>= f))) -- so feeding the same LChar (or any LChar) gives back failure >>= f. -- So R is a bi-simulation indeed. -- An iteratee (Done v) recognizes the empty string empty :: Monad m => a -> I m a empty v = Done v -- A parser for one lifted character oneL :: Monad m => I m LChar oneL = GetC (return . Done) -- Left-biased alternation -- Informally, it races the two processes until one process -- finishes. If both finish at the same time, the first process wins. infix 1 I m a -> I m a -> I m a Done x liftM2 (>= \x -> return (x,x) === -- en_str s i >>= \x -> en_str s i >>= \y -> return (x,y) -- for any string s -- Right-distributivity law -- i >>= \x -> (k1 x >= k1) >= k2) -- provided that i is the idempotent iteratee. -- -- We prove by bi-similarity. -- Let R be a relation on iteratees: -- R = { (i >>= \x -> (k1 x >= k1) >= k2)) } -- and consider the observations on a pair of related iteratees -- iA =def= i >>= \x -> (k1 x >= k1) >= k2) -- -- case A. If i is of the form Done x, then monad laws give -- iA = iB = (k1 x) >= \i1' -> k12 c >>= \i2' -> return $ i1' >=) and (>= \x -> (k1 x kA c === -- definition on (>>=) (k >>> (return . (>>= \x -> (k1 x >= \i' -> return (i' >>= \x -> (k1 x >= k1) >= k2) of GetC kB -> kB c === -- definition of (>>=) case GetC (k >>> (return . (>>= k1))) >> (return . (>>= k2))) of GetC kB -> kB c === -- definition of (>= \ix -> return (ix >>= k1)) (k c >>= \iy -> return (iy >>= k2)) === -- definition of liftM2 (k c >>= \ix -> return (ix >>= k1)) >>= \i1 -> (k c >>= \iy -> return (iy >>= k2)) >>= \i2 -> return (i1 >= \ix -> k c >>= \iy -> return ((ix >>= k1) >= k2)) === -- monad unit law k c >>= \ix -> k c >>= \iy -> return (ix,iy) >>= \ (ix,iy) -> return ((ix >>= k1) >= k2)) === -- idempotence, given the string [c] k c >>= \ix -> return (ix,ix) >>= \ (ix,iy) -> return ((ix >>= k1) >= k2)) === -- monad laws k c >>= \i' -> return ((i' >>= k1) >= k2)) -- Thus feeding c to iA and iB incurs the same effect -- (associated with k c) and produces the iteratees that are also -- related by R. -- R is thus a bi-simulation. Since I m a is a final co-algebra, -- bisimilar iteratees are equal. -- Another interesting property -- If s is properly recognized by i1 or i2, or s is not recognized -- by either i1 or i2, and the monad m is commutative, then -- en_str s (i1 Monad (PutB m) where return = PBDone PBDone x >>= f = f x PutB str k >>= f = PutB str (liftM (>>= f) k) instance Monad m => PutS (PutB m) where putS str = PutB str (return $ PBDone ()) -- The handler of PutB requests. The types make clear that -- en_pb takes an iteratee that can make a PutB request, and -- returns the iteratee that doesn't make such request. -- We have indeed `handled' PutB. The type of en_pb is -- reminiscent of Enumerator and Enumeratee, but differs -- from either. -- The first argument to en_pb is the put-back buffer en_pb :: Monad m => String -> I (PutB m) a -> m (I m a) en_pb _ (Done x) = return $ Done x en_pb pb (GetC k) = case pb of [] -> return $ GetC (check pb . k) c:t -> check t (k (Just c)) where check pb (PBDone x) = en_pb pb x check pb (PutB s m) = m >>= check (pb ++ s) -- We now repeat the examples from IterDeriv.hs -- using parsing combinators with look-ahead. -- Parsing (abab)* -- or -- S ::= eps | "abab" S -- Unlike the combinators in IterDeriv.hs, we do the longest match here -- Check to see if a given string occurs in the stream -- If it does, we consume the string (that is, advance the stream) and -- return True. Otherwise, we return False. The stream remains positioned -- where it was at the beginning of lookahead. lookahead :: Monad m => String -> I (PutB m) Bool lookahead str = loop "" str where loop acc "" = return True loop acc str = GetC (check acc str) check acc (c:t) (Just c') | c == c' = return (loop (c:acc) t) check acc _ (Just c) = putS (reverse (c:acc)) >> return (return False) check acc _ Nothing = putS (reverse acc) >> return (return False) -- A few tests of lookahead tl1 = print =<< run =<< en_str "abX" =<< en_pb [] (liftM2 (\x y -> (x,y)) (lookahead "ab") oneL) -- (True,Just 'X') tl2 = print =<< run =<< en_str "aX" =<< en_pb [] (liftM2 (\x y -> (x,y)) (lookahead "ab") oneL) -- (False,Just 'a') tl3 = print =<< run =<< en_str "X" =<< en_pb [] (liftM2 (\x y -> (x,y)) (lookahead "ab") oneL) -- (False,Just 'X') -- parse str* finding the longest possible match manystr :: Monad m => String -> I (PutB m) [String] manystr str = lookahead str >>= \x -> if x then liftM (str:) (manystr str) else return [] -- The following basic combinators work in any monad (including -- the monad with look-ahead) -- recognizes one character string that satisfies the given pred pSat :: Monad m => (LChar -> Bool) -> I m LChar pSat pred = oneL >>= \c -> if pred c then return c else failure -- The same but for the proper character pSat' :: Monad m => (Char -> Bool) -> I m Char pSat' pred = oneL >>= \c -> case c of Just c | pred c -> return c _ -> failure pEOF :: Monad m => a -> I m a pEOF x = pSat (==Nothing) >> return x tab1 = print =<< run =<< en_str "abababab" =<< en_pb [] ((manystr "abab") >>= pEOF) -- ["abab","abab"] tab2 = print =<< run =<< en_str "ababababX" =<< en_pb [] ((manystr "abab") >>= pEOF) -- *** Exception: divergent iteratee -- We find the longest match indeed: the first iteratee ate as -- much as it could tab3 = print =<< run =<< en_str "abababab" =<< en_pb [] (manystr "abab" >>= \s1 -> manystr "abab" >>= \s2 -> pEOF [s1,s2]) -- [["abab","abab"],[]] tab4 = print =<< run =<< en_str "ababababcd" =<< en_pb [] (manystr "ab" >>= \s1 -> manystr "abcd" >>= \s2 -> pEOF [s1,s2]) -- divergence tab5 = print =<< run =<< en_str "abababacd" =<< en_pb [] (manystr "ab" >>= \s1 -> manystr "acd" >>= \s2 -> pEOF [s1,s2]) -- [["ab","ab","ab"],["acd"]] -- Exercise to the reader: implement the parser for Exp -- Exp ::= Num | Exp + Exp | ( Exp )