-- Haskell98! -- The running example, part 1 -- Reading headers, the sequence of lines terminated by an -- empty line. Each line is terminated by CRLF (in a more general -- case, by either of CR, LF, or CRLF). -- We should return the headers in order. In the case of error, -- we should return the headers read so far and the description of the error. -- Manual buffering; suitable for layered IO (reading from an encrypted -- stream, etc). Although the code below is fairly efficient, -- it is a burden to write. -- The present code assumes that a line is terminated with CRLF or -- just LF. The case of a sole CR as terminator is not handled. -- The code is complex already (which is the point); handling the -- sole CR terminator will make the code even more complex. -- The size of the buffer is set to 5, just for testing purposes -- (to get the good distribution of special cases of CRLF and the buffer -- boundary) module NaiveBufferIO where import System.Posix.IO import Foreign.C import Foreign.Ptr import Foreign.Storable import Foreign.Marshal.Alloc import Data.List import LowLevelIO type Headers = [String] type ErrMsg = String -- The result of reading headers data HResult = HR Headers Block -- successful; return the remainder, too | HRFail ErrMsg Headers -- give the headers so far deriving Show data Block = Block (Ptr CChar) CSize deriving Show -- Here we try to be more modular: separating reading of the headers -- into reading of a single line and then assembling the result... -- An auxiliary function: -- Read a (possibly incomplete) line from the block, -- return the line and the remainder, if any type Line = String -- A complete line, CRLF or CR are -- stripped off type ILine = String -- Not a complete line, the prefix -- of a complete line. The LF terminator -- of the line is yet to be seen -- ILine may end in CR data ReadLineResult = RLIncomplete ILine -- we have not seen NL yet | RLDone Line Block -- String now is complete line cchar_nl, cchar_cr :: CChar cchar_nl = fromIntegral 10 cchar_cr = fromIntegral 13 read_line :: ILine -> Block -> IO ReadLineResult read_line prev (Block _ 0) = return $ RLIncomplete prev read_line prev (Block p n) = do tpos <- find_terminator p n 0 case tpos of Just 0 -> do -- saw NL right away let line = case () of _ | null prev -> "" _ | last prev == '\r' -> init prev _ -> prev return $ RLDone line (Block (plusPtr p 1) (pred n)) Just len -> do c <- peek (p `plusPtr` (pred len)) let lcontent = if c == cchar_cr then pred len else len s <- peekCAStringLen (p,lcontent) let l1 = succ len -- account for the NL return $ RLDone (prev ++ s) (Block (plusPtr p l1) (n- fromIntegral l1)) Nothing -> peekCAStringLen (p,fromIntegral n) >>= return . RLIncomplete . (prev ++) where find_terminator p 0 acc = return Nothing find_terminator p n acc = do c <- peek p if c == cchar_nl then return $ Just acc else find_terminator (p `plusPtr` 1) (pred n) (succ acc) read_headers fd (Block p n) = loop "" [] where loop before acc = do n' <- myfdRead fd p n case n' of Left errno -> return $ HRFail "IO Error" (reverse acc) Right 0 -> return $ HRFail "EOF" (reverse acc) Right n -> read_line before (Block p n) >>= dobuff acc dobuff acc (RLDone "" b) = return $ HR (reverse acc) b -- CRLF or LF dobuff acc (RLDone s b) = read_line "" b >>= dobuff (s:acc) dobuff acc (RLIncomplete s) = loop s acc test_driver filepath = do fd <- openFd filepath ReadOnly Nothing defaultFileFlags putStrLn "About to read headers" allocaBytes (fromIntegral buffer_size) (\p -> read_print_headers fd (Block p buffer_size)) closeFd fd putStrLn "Done" where buffer_size = 5 -- for tests; in real life, there should be 1024 or so read_print_headers fd buf = do headers <- read_headers fd buf putStrLn "Finished reading headers" putStrLn "The headers are" case headers of HR headers (Block p n) -> do print headers after <- peekCAStringLen (p,fromIntegral n) print_after_headers fd after buf HRFail err headers -> do putStrLn $ "Detected error: "++ err putStrLn "Headers so far" print headers print_after_headers fd after (Block p n) = do n' <- myfdRead fd p n case n' of Left errno -> putStrLn $ "IO Error following `" ++ after ++ "'" Right 0 -> putStrLn $ "EOF following `" ++ after ++ "'" Right n -> do after' <- peekCAStringLen (p,fromIntegral n) putStrLn $ "Data following `" ++ after ++ after' ++ "'" test11 = test_driver "test1.txt" test12 = test_driver "test2.txt" -- can't handle CR terminator test13 = test_driver "test3.txt" test14 = test_driver "/dev/null"