-- Haskell98! -- Random and Binary IO with IterateeM module RandomIO ( -- IO monad tracking endianness EIO -- data constructor is not exported , Endianess(..) , eio_msb_first , eio_set_endian , runEIO -- Iteratees to read unsigned integers written -- in Big- or Little-endian ways , endian_read2 , endian_read4 -- enumerators supporting seekable streams , seek_stream , enum_fd_random -- Utility functions , takeR -- specialized `take' , test_driver_random -- test case runner ) where import IterateeM hiding (test1, test2, test3) import System.Posix import Foreign.C import Foreign.Ptr import Foreign.Marshal.Alloc import Foreign.Marshal.Array import Control.Monad.Trans import Data.Word import Data.Bits import Data.IORef import Text.Printf import Data.Typeable import Control.Exception import Prelude hiding (head, drop, dropWhile, take, break, catch) import qualified Prelude import System.IO (SeekMode(..)) import LowLevelIO -- The type of the IO monad supporting endianness -- Since the base monad is IO, it seems simpler to implement endianness -- as an IORef cell, propagated as the `environment' data Endianess = Big_endian | Little_endian deriving (Show, Eq) newtype EIO a = EIO{unEIO:: IORef Endianess -> IO a} instance Monad EIO where return = EIO . const . return m >>= f = EIO( \env -> unEIO m env >>= (\x -> unEIO (f x) env) ) instance MonadIO EIO where liftIO = EIO . const -- The EIO data constructor should not be exported -- The programmer should use the following functions to access -- endianness eio_msb_first :: EIO Bool eio_msb_first = EIO action where action env = readIORef env >>= return . (== Big_endian) eio_set_endian :: Endianess -> EIO () eio_set_endian flag = EIO action where action env = writeIORef env flag runEIO:: Endianess -> EIO a -> IO a runEIO endn m = newIORef endn >>= unEIO m -- ------------------------------------------------------------------------ -- Binary Random IO Iteratees -- Seek is implemented as a restartable exception with a specific -- exception code data SeekException = SeekException FileOffset deriving Show instance Typeable SeekException where typeOf _ = mkTyConApp (mkTyCon "SeekException") [] instance Exception SeekException seek_stream :: Monad m => FileOffset -> Iteratee el m () seek_stream off = throwRecoverableErr (toException . SeekException $ off) (ie_doneM ()) -- Read at most n elements from a stream and apply the given iteratee to the -- stream of the read elements. If the given iteratee accepted fewer -- elements, we stop. -- This is the variation of `take' with the early termination -- of processing of the outer stream once the processing of the inner stream -- finished early. This variation is particularly useful for randomIO, -- where we do not have to care to `drain the input stream'. takeR :: Monad m => Int -> Enumeratee el el m a takeR 0 i = return i takeR n (IE_cont Nothing k) = ie_cont (step n k) where step n k (Chunk []) = ie_contM (step n k) step n k chunk@(Chunk str) | length str < n = feedI k chunk >>= ie_ret . takeR (n - length str) step n k (Chunk str) = feedI k (Chunk s1) >>= \i -> ie_doneM i (Chunk s2) where (s1,s2) = splitAt n str step n k stream = feedI k stream >>= \i -> ie_doneM i stream takeR n i = return i -- Iteratees to read unsigned integers written in Big- or Little-endian ways endian_read2 :: Iteratee Word8 EIO Word16 endian_read2 = do c1 <- head c2 <- head flag <- lift eio_msb_first return $ if flag then (fromIntegral c1 `shiftL` 8) .|. fromIntegral c2 else (fromIntegral c2 `shiftL` 8) .|. fromIntegral c1 endian_read4 :: Iteratee Word8 EIO Word32 endian_read4 = do c1 <- head c2 <- head c3 <- head c4 <- head flag <- lift eio_msb_first return $ if flag then (((((fromIntegral c1 `shiftL` 8) .|. fromIntegral c2) `shiftL` 8) .|. fromIntegral c3) `shiftL` 8) .|. fromIntegral c4 else (((((fromIntegral c4 `shiftL` 8) .|. fromIntegral c3) `shiftL` 8) .|. fromIntegral c2) `shiftL` 8) .|. fromIntegral c1 -- ------------------------------------------------------------------------ -- Binary Random IO enumerators -- The enumerator of a POSIX Fd: a variation of enum_fd that -- supports seek requests (the restartable SeekException) -- We assume here that all exceptional conditions should be reported -- through Iteratee, rather than by throwing IO Exceptions enum_fd_random :: MonadIO m => Fd -> Enumerator Word8 m a enum_fd_random _ i@IE_done{} = return i -- i doesn't want any input enum_fd_random fd i = do p <- liftIO $ mallocBytes (fromIntegral buffer_size) r <- check p (0,0) i liftIO $ free p return r where -- buffer_size = 4096 buffer_size = 5 -- for tests; in real life, there should be 1024 or so -- The second argument, pos, of do_loop is (off,len), describing which part -- of the file is currently in the buffer 'p' check :: MonadIO m => Ptr Word8 -> (FileOffset,Int) -> Iteratee Word8 m a -> m (Iteratee Word8 m a) check p pos (IE_cont Nothing k) = do_read p pos k check p pos (IE_cont (Just e) k) | Just (SeekException off) <- fromException e = do_seek p pos k off check p pos iter = return iter -- Thanks to John Lato for the strictness annotation -- Otherwise, the `off + fromIntegral len' below accumulates thunks do_read p (off,len) k | off `seq` len `seq` False = undefined do_read p (off,len) k = do n <- liftIO $ myfdRead fd (castPtr p) buffer_size liftIO $ putStrLn $ "Read buffer, size " ++ either (const "IO err") show n case n of Left errno -> feedI k (EOF (Just (exc_IOErr errno))) Right 0 -> return $ ie_cont k Right n -> do str <- liftIO $ peekArray (fromIntegral n) p feedI k (Chunk str) >>= check p (off + fromIntegral len,fromIntegral n) do_seek p pos@(off,len) k off' | off <= off' && off' < off + fromIntegral len = -- Seek within buffer p do let local_off = fromIntegral $ off' - off str <- liftIO $ peekArray (len - local_off) (p `plusPtr` local_off) feedI k (Chunk str) >>= check p pos do_seek p pos k off' = do -- Seek outside the buffer off' <- liftIO $ myfdSeek fd AbsoluteSeek (fromIntegral off') liftIO $ putStrLn $ "Read buffer, offset " ++ either (const "IO err") show off' case off' of Left errno -> feedI k (EOF (Just (exc_IOErr errno))) Right off -> do_read p (off,0) k -- ------------------------------------------------------------------------ -- Tests -- Tests of takeR vs take -- take 4 always reads 4 elements, regardless of chunking or early termination test_take = t 3 >> t 4 >> t 5 where t n = (print . (== ('1','5'))) =<< run =<< enum_pure_nchunk "1234567" n iter iter = do c <- runI =<< take 4 head r <- head return (c,r) test_takeR = t3 >> t4 where t3 = (print . (== ('1','4'))) =<< run =<< enum_pure_nchunk "1234567" 3 iter t4 = (print . (== ('1','5'))) =<< run =<< enum_pure_nchunk "1234567" 4 iter iter = do c <- runI =<< takeR 4 head r <- head return (c,r) test1 () = do s1 <- head s2 <- head seek_stream 0 s3 <- head seek_stream 100 s4 <- head s5 <- head seek_stream 101 s6 <- head seek_stream 1 s7 <- head return [s1,s2,s3,s4,s5,s6,s7] test2 () = do seek_stream 100 seek_stream 0 seek_stream 100 s4 <- head s5 <- head seek_stream 101 s6 <- head seek_stream 1 s7 <- head seek_stream 0 s1 <- head s2 <- head seek_stream 0 s3 <- head return [s1,s2,s3,s4,s5,s6,s7] test3 () = do let show_x fmt = map (\x -> (printf fmt x)::String) lift $ eio_set_endian Big_endian ns1 <- endian_read2 ns2 <- endian_read2 ns3 <- endian_read2 ns4 <- endian_read2 seek_stream 0 nl1 <- endian_read4 nl2 <- endian_read4 seek_stream 4 lift $ eio_set_endian Little_endian ns3' <- endian_read2 ns4' <- endian_read2 seek_stream 0 ns1' <- endian_read2 ns2' <- endian_read2 seek_stream 0 nl1' <- endian_read4 nl2' <- endian_read4 return [show_x "%04x" [ns1,ns2,ns3,ns4], show_x "%08x" [nl1,nl2], show_x "%04x" [ns1',ns2',ns3',ns4'], show_x "%08x" [nl1',nl2']] test4 () = do lift $ eio_set_endian Big_endian ns1 <- endian_read2 ns2 <- endian_read2 throwErrStr "Error" ns3 <- endian_read2 return (ns1,ns2,ns3) test_driver_random iter filepath = do fd <- openFd filepath ReadOnly Nothing defaultFileFlags putStrLn "About to read file" result <- runEIO Big_endian $ run =<< enum_fd_random fd iter closeFd fd putStrLn "Finished reading file" print_res result where print_res a = print a >> return a {- print_res (IE_done a (Err err)) = print a >> putStrLn ("Stream error: " ++ err) >> return a -} test1r = test_driver_random (test1 ()) "test_full1.txt" >>= return . (== [104,101,104,13,10,10,101]) test2r = test_driver_random (test2 ()) "test_full1.txt" >>= return . (== [104,101,104,13,10,10,101]) test3r = test_driver_random (test3 ()) "test4.txt" >>= return . (== [["0001","0203","fffe","fdfc"], ["00010203","fffefdfc"], ["0100","0302","feff","fcfd"], ["03020100","fcfdfeff"]]) test4r = test_driver_random (test4 ()) "test4.txt" -- >>= -- return . (== (1,515,Nothing)) {- Before: About to read file Read buffer, size 5 Finished reading file (1,515,Nothing) Stream error: Error Now: About to read file Read buffer, size 5 *** Exception: control message: Error -}