-- Haskell98! -- Handle and Lazy IO: the baseline to contrast with the Iteratee IO -- The running example, part 1 -- Reading headers, the sequence of lines terminated by an -- empty line. Each line is terminated by CRLF (in a more general -- case, by either of CR, LF, or CRLF). -- We should return the headers in order. In the case of error, -- we should return the headers read so far and the description of the error. -- Here we use the standard GHC buffered IO. None of the functions in this -- file report the precise IO error. module GHCBufferIO where import System.IO import Data.List (last, init) import Control.Monad (when) type Headers = [String] type ErrMsg = String -- The result of reading headers data HResult = HR Headers -- successful | HRFail ErrMsg Headers -- give the headers so far deriving Show -- The first attempt -- The code is ugly and imperative (note the indentation) -- Can't handle CR as the line terminator line_read h = doread [] where doread acc = do eof <- hIsEOF h if eof then return $ HRFail "EOF" (reverse acc) else do -- don't forget that GHC does not count CR as the line terminator l <- hGetLine h >>= return . strip_cr if null l then return $ HR (reverse acc) else doread (l:acc) strip_cr [] = [] strip_cr s = if last s == '\r' then init s else s -- Handling all three CR, LF and CRLF as line terminators -- Correct, but very ugly... line_read_cr h = doread [] [] where doread acc curr_line = do eof <- hIsEOF h if eof then return $ HRFail "EOF" (reverse acc) else hGetChar h >>= check_term acc curr_line check_term acc curr_line '\n' = finish acc curr_line check_term acc curr_line '\r' = do eof <- hIsEOF h if eof then finish acc curr_line -- EOF will be caught later else do c <- hLookAhead h when (c == '\n') (hGetChar h >> return ()) finish acc curr_line check_term acc curr_line c = doread acc (c:curr_line) finish acc "" = return $ HR (reverse acc) finish acc line = doread (reverse line:acc) "" -- Lazy IO. Pattern-matching makes for a convenient parsing -- We could have used Prelude.lines; -- but the latter can't handle CRLF, and can't tell if the last line -- was terminated or not -- But we can't do any IO on the handle afterwards... line_lazy h = hGetContents h >>= return . doparse [] where doparse acc str = case break (\c -> c == '\r' || c == '\n') str of (_,"") -> HRFail "EOF" (reverse acc) (l,'\r':'\n':rest) -> finish acc l rest (l,_:rest) -> finish acc l rest finish acc "" rest = HR (reverse acc) finish acc l rest = doparse (l:acc) rest -- Tests -- Test driver test_driver filepath reader = do h <- openFile filepath ReadMode putStrLn "About to read headers" headers <- reader h putStrLn "Finished reading headers" -- for lazy IO, this should be commented out -- Not only hClose: the whole block should be commented out -- {- eof <- hIsEOF h if eof then putStrLn "EOF after reading headers" else hGetLine h >>= putStrLn . ("The line after headers is: "++) hClose h -- -} putStrLn "The headers are" case headers of HR headers -> print headers HRFail err headers -> do putStrLn $ "Detected error: "++ err putStrLn "Headers so far" print headers test11 = test_driver "test1.txt" line_read test12 = test_driver "test2.txt" line_read -- can't handle CR terminator test13 = test_driver "test3.txt" line_read test14 = test_driver "/dev/null" line_read test21 = test_driver "test1.txt" line_read_cr test22 = test_driver "test2.txt" line_read_cr -- can handle CR terminator test23 = test_driver "test3.txt" line_read_cr test24 = test_driver "/dev/null" line_read_cr -- Have to disable reading after the headers... test31 = test_driver "test1.txt" line_lazy test32 = test_driver "test2.txt" line_lazy test33 = test_driver "test3.txt" line_lazy test34 = test_driver "/dev/null" line_lazy