Am I lazy enough?

Previous Topic Next Topic
 
classic Classic list List threaded Threaded
2 messages Options
Reply | Threaded
Open this post in threaded view
|

Am I lazy enough?

Joel Reymont
Folks,

I'm looking to use the following code to process a multi-GB text  
file. I am using ByteStrings but there was a discussion today on IRC  
about tail recursion, laziness and accumulators that made me wonder.

Is fixLines below lazy enough? Can it be made lazier?

     Thanks, Joel

---

module Main where

import IO
import System
import Numeric
import Data.Char
import Data.Word
import qualified Data.Map as M
import qualified Data.ByteString.Char8 as B
import Prelude hiding (lines)

grabTableInfo x = (tableId', (tableType, tableStakes))
     where (tableId:tableType:_:tableStakes:_) =
               B.split ',' x
           Just (tableId', _) = B.readInt tableId

lines = B.split '\n'

--- My Oracle ascii dump is 80 characters wide so some lines
--- are split. I need to skip empty lines and join lines
--- containing less than the required number of commas.

fixLines 0 lines = lines
fixLines _ [] = []
fixLines n (line:lines) =
     fixLines' lines line []
         where fixLines' [] str acc
                   | B.count ',' str == n
                       = acc ++ [str]
                   | otherwise
                       = acc
               fixLines' (x:xs) str acc
                   | B.null str -- skip
                       = fixLines' xs x acc
                   | B.count ',' str < n -- join with next line
                       = fixLines' xs (B.append str x) acc
                   | otherwise
                       = fixLines' xs x (acc ++ [str])

mkMap = M.fromList . map grabTableInfo . fixLines 20

loadTableInfo = do
   bracket (openFile "game_info_tbl.csv" ReadMode)
           (hClose)
           (\h -> do
              c <- B.hGetContents h
              return $ mkMap $ lines c)

--
http://wagerlabs.com/





_______________________________________________
Haskell-Cafe mailing list
[hidden email]
http://www.haskell.org/mailman/listinfo/haskell-cafe
Reply | Threaded
Open this post in threaded view
|

Re: Am I lazy enough?

Spencer Janssen
ByteString's are strict in their contents, so when you do an
hGetContents you'll read the entire file into memory!  This negates
any laziness benefits right off the bat.  The trickiest part is the
lazy IO, you have to use unsafeInterleaveIO or something similar.

Below is a program that does approximately the same as yours.  Note
the getLinesLazily function.  I've only tested that it typechecks, I
haven't run it yet.


Spencer Janssen

-- Program begins here

import System.IO
import System.IO.Unsafe (unsafeInterleaveIO)

import qualified Data.ByteString.Char8 as B
import Data.ByteString.Char8 (ByteString)

main =
    getLinesLazily stdin >>= mapM B.putStrLn . relines 8

relines :: Int -> [ByteString] -> [ByteString]
relines n = go . map (\s -> (s, B.count ',' s))
 where
    go []       = []
    go [(s, _)] = [s]
    go ((s, x) : (t, y) : ss)
     | x + y > n = s : go ((t, y) : ss)
     | otherwise = go ((B.append s t, x + y) : ss)

getLinesLazily :: Handle -> IO [ByteString]
getLinesLazily h = do
    eof <- hIsEOF h
    if eof
        then return []
        else do
            l <- B.hGetLine h
            ls <- unsafeInterleaveIO $ getLinesLazily h
            return (l:ls)

-- Program ends here

On 5/3/06, Joel Reymont <[hidden email]> wrote:

> Folks,
>
> I'm looking to use the following code to process a multi-GB text
> file. I am using ByteStrings but there was a discussion today on IRC
> about tail recursion, laziness and accumulators that made me wonder.
>
> Is fixLines below lazy enough? Can it be made lazier?
>
>      Thanks, Joel
>
> ---
>
> module Main where
>
> import IO
> import System
> import Numeric
> import Data.Char
> import Data.Word
> import qualified Data.Map as M
> import qualified Data.ByteString.Char8 as B
> import Prelude hiding (lines)
>
> grabTableInfo x = (tableId', (tableType, tableStakes))
>      where (tableId:tableType:_:tableStakes:_) =
>                B.split ',' x
>            Just (tableId', _) = B.readInt tableId
>
> lines = B.split '\n'
>
> --- My Oracle ascii dump is 80 characters wide so some lines
> --- are split. I need to skip empty lines and join lines
> --- containing less than the required number of commas.
>
> fixLines 0 lines = lines
> fixLines _ [] = []
> fixLines n (line:lines) =
>      fixLines' lines line []
>          where fixLines' [] str acc
>                    | B.count ',' str == n
>                        = acc ++ [str]
>                    | otherwise
>                        = acc
>                fixLines' (x:xs) str acc
>                    | B.null str -- skip
>                        = fixLines' xs x acc
>                    | B.count ',' str < n -- join with next line
>                        = fixLines' xs (B.append str x) acc
>                    | otherwise
>                        = fixLines' xs x (acc ++ [str])
>
> mkMap = M.fromList . map grabTableInfo . fixLines 20
>
> loadTableInfo = do
>    bracket (openFile "game_info_tbl.csv" ReadMode)
>            (hClose)
>            (\h -> do
>               c <- B.hGetContents h
>               return $ mkMap $ lines c)
>
> --
> http://wagerlabs.com/
>
>
>
>
>
> _______________________________________________
> Haskell-Cafe mailing list
> [hidden email]
> http://www.haskell.org/mailman/listinfo/haskell-cafe
>
_______________________________________________
Haskell-Cafe mailing list
[hidden email]
http://www.haskell.org/mailman/listinfo/haskell-cafe