-- Haskell98! -- Random and Binary IO with IterateeM module RandomIO where import IterateeM import System.Posix import Foreign.C import Foreign.Ptr import Foreign.Marshal.Alloc import Foreign.Marshal.Array import Control.Monad.Trans import Data.Word import Data.Bits import Data.IORef import Text.Printf import Data.Typeable import Control.Exception import Prelude hiding (head, drop, dropWhile, take, break, catch) import qualified Prelude import System.IO (SeekMode(..)) import LowLevelIO -- The type of the IO monad supporting endianness -- Since the base monad is IO, it seems simpler to implement endianness -- as an IORef cell, propagated as the `environment' data Endianess = Big_endian | Little_endian deriving (Show, Eq) newtype EIO a = EIO{unEIO:: IORef Endianess -> IO a} instance Monad EIO where return = EIO . const . return m >>= f = EIO( \env -> unEIO m env >>= (\x -> unEIO (f x) env) ) instance MonadIO EIO where liftIO = EIO . const -- The EIO data constructor should not be exported -- The programmer should use the following functions to access -- endianness eio_msb_first :: EIO Bool eio_msb_first = EIO action where action env = readIORef env >>= return . (== Big_endian) eio_set_endian :: Endianess -> EIO () eio_set_endian flag = EIO action where action env = writeIORef env flag runEIO:: Endianess -> EIO a -> IO a runEIO endn m = newIORef endn >>= unEIO m -- ------------------------------------------------------------------------ -- Binary Random IO Iteratees -- Seek is implemented as a restartable exception with a specific -- exception code data SeekException = SeekException FileOffset deriving Show instance Typeable SeekException where typeOf _ = mkTyConApp (mkTyCon "SeekException") [] instance Exception SeekException seek_stream :: Monad m => FileOffset -> Iteratee el m () seek_stream off = throwRecoverableErr (toException . SeekException $ off) (ie_done ()) iter_err msg = throwErr (toException (ErrorCall msg)) -- Read n elements from a stream and apply the given iteratee to the -- stream of the read elements. If the given iteratee accepted fewer -- elements, we stop. -- This is the variation of `take' with the early termination -- of processing of the outer stream once the processing of the inner stream -- finished early. This variation is particularly useful for randomIO, -- where we do not have to care to `drain the input stream'. takeR :: Monad m => Int -> Enumeratee el el m a takeR 0 = return takeR n = enee_check_if_done (ie_cont . step n) where step n k (Chunk []) = ie_cont (step n k) step n k chunk@(Chunk str) | length str < n = k chunk >>== takeR (n - length str) step n k (Chunk str) = k (Chunk s1) >>== \i -> ie_done i (Chunk s2) where (s1,s2) = splitAt n str step n k stream = k stream >>== \i -> ie_done i stream -- Iteratees to read unsigned integers written in Big- or Little-endian ways endian_read2 :: Iteratee Word8 EIO Word16 endian_read2 = do c1 <- head c2 <- head flag <- lift eio_msb_first return $ if flag then (fromIntegral c1 `shiftL` 8) .|. fromIntegral c2 else (fromIntegral c2 `shiftL` 8) .|. fromIntegral c1 endian_read4 :: Iteratee Word8 EIO Word32 endian_read4 = do c1 <- head c2 <- head c3 <- head c4 <- head flag <- lift eio_msb_first return $ if flag then (((((fromIntegral c1 `shiftL` 8) .|. fromIntegral c2) `shiftL` 8) .|. fromIntegral c3) `shiftL` 8) .|. fromIntegral c4 else (((((fromIntegral c4 `shiftL` 8) .|. fromIntegral c3) `shiftL` 8) .|. fromIntegral c2) `shiftL` 8) .|. fromIntegral c1 -- ------------------------------------------------------------------------ -- Binary Random IO enumerators -- The enumerator of a POSIX Fd: a variation of enum_fd that -- supports seek requests (the restartable SeekException) -- We assume here that all exceptional conditions should be reported -- through Iteratee, rather than by throwing IO Exceptions enum_fd_random :: MonadIO m => Fd -> Enumerator Word8 m a enum_fd_random fd iv = Iteratee $ do p <- liftIO $ mallocBytes (fromIntegral buffer_size) r <- loop p (0,0) iv liftIO $ free p return r where -- buffer_size = 4096 buffer_size = 5 -- for tests; in real life, there should be 1024 or so -- the second argument of loop is (off,len), describing which part -- of the file is currently in the buffer 'p' loop :: MonadIO m => Ptr Word8 -> (FileOffset,Int) -> IterV Word8 m a -> m (IterV Word8 m a) loop p pos (IE_cont k Nothing) = do_read p pos k loop p pos (IE_cont k (Just e)) | Just (SeekException off) <- fromException e = do_seek p pos k off loop p pos iter = return iter do_seek p pos@(off,len) k off' | off <= off' && off' < off + fromIntegral len = -- Seek within buffer p do let local_off = fromIntegral $ off' - off str <- liftIO $ peekArray (len - local_off) (p `plusPtr` local_off) runIter (k (Chunk str)) >>= loop p pos do_seek p pos k off' = do -- Seek outside the buffer off' <- liftIO $ myfdSeek fd AbsoluteSeek (fromIntegral off') liftIO $ putStrLn $ "Read buffer, offset " ++ either (const "IO err") show off' case off' of Left errno -> runIter $ k (EOF (Just (toException (ErrorCall "IO error")))) Right off -> do_read p (off,0) k -- Thanks to John Lato for the strictness annotation -- Otherwise, the `off + fromIntegral len' below accumulates thunks do_read p (off,len) k | off `seq` len `seq` False = undefined do_read p (off,len) k = do n <- liftIO $ myfdRead fd (castPtr p) buffer_size liftIO $ putStrLn $ "Read buffer, size " ++ either (const "IO err") show n case n of Left errno -> runIter $ k (EOF (Just (toException (ErrorCall "IO error")))) Right 0 -> return $ IE_cont k Nothing Right n -> do str <- liftIO $ peekArray (fromIntegral n) p runIter (k (Chunk str)) >>= loop p (off + fromIntegral len,fromIntegral n) -- ------------------------------------------------------------------------ -- Tests test1 () = do s1 <- head s2 <- head seek_stream 0 s3 <- head seek_stream 100 s4 <- head s5 <- head seek_stream 101 s6 <- head seek_stream 1 s7 <- head return [s1,s2,s3,s4,s5,s6,s7] test2 () = do seek_stream 100 seek_stream 0 seek_stream 100 s4 <- head s5 <- head seek_stream 101 s6 <- head seek_stream 1 s7 <- head seek_stream 0 s1 <- head s2 <- head seek_stream 0 s3 <- head return [s1,s2,s3,s4,s5,s6,s7] test3 () = do let show_x fmt = map (\x -> (printf fmt x)::String) lift $ eio_set_endian Big_endian ns1 <- endian_read2 ns2 <- endian_read2 ns3 <- endian_read2 ns4 <- endian_read2 seek_stream 0 nl1 <- endian_read4 nl2 <- endian_read4 seek_stream 4 lift $ eio_set_endian Little_endian ns3' <- endian_read2 ns4' <- endian_read2 seek_stream 0 ns1' <- endian_read2 ns2' <- endian_read2 seek_stream 0 nl1' <- endian_read4 nl2' <- endian_read4 return [show_x "%04x" [ns1,ns2,ns3,ns4], show_x "%08x" [nl1,nl2], show_x "%04x" [ns1',ns2',ns3',ns4'], show_x "%08x" [nl1',nl2']] test4 () = do lift $ eio_set_endian Big_endian ns1 <- endian_read2 ns2 <- endian_read2 iter_err "Error" ns3 <- endian_read2 return (ns1,ns2,ns3) test_driver_random iter filepath = do fd <- openFd filepath ReadOnly Nothing defaultFileFlags putStrLn "About to read file" result <- runEIO Big_endian $ run $ enum_fd_random fd $$ iter closeFd fd putStrLn "Finished reading file" print_res result where print_res a = print a >> return a {- print_res (IE_done a (Err err)) = print a >> putStrLn ("Stream error: " ++ err) >> return a -} test1r = test_driver_random (test1 ()) "test_full1.txt" >>= return . (== [104,101,104,13,10,10,101]) test2r = test_driver_random (test2 ()) "test_full1.txt" >>= return . (== [104,101,104,13,10,10,101]) test3r = test_driver_random (test3 ()) "test4.txt" >>= return . (== [["0001","0203","fffe","fdfc"], ["00010203","fffefdfc"], ["0100","0302","feff","fcfd"], ["03020100","fcfdfeff"]]) test4r = test_driver_random (test4 ()) "test4.txt" -- >>= -- return . (== (1,515,Nothing)) {- About to read file Read buffer, size 5 Finished reading file (1,515,Nothing) Stream error: Error -}