import Parsing {- A CSV file contains 0 or more lines, each of which is terminated by the newline character. -} csv = zeroOrMore line -- each line contains one or more cells, separated by a comma line = do cells <- chain cell (char comma) char newline return cells -- -- A cell is either an ordinary cell or a quoted cell. cell = quotedcell +++ ordinarycell -- An ordinary cell is a sequence of characters not including -- comma or newline. ordinarycell = zeroOrMore $ sat (`notElem` [comma,newline]) -- A quoted cell "thus" is a sequence of zero or more characters -- other than double quote. quotedcell = do char quote cs <- zeroOrMore $ sat (/= quote) char quote return cs comma = ',' quote = '\"' newline = '\n' wikiExample = unlines [l1,l2,l3,l4,l5] l1 = "Year,Make,Model,Description,Price" l2 = "1997,Ford,E350,\"ac, abs, moon\",3000.00" l3 = "1999,Chevy,Venture \"Extended Edition\",\"\",4900.00" l4 = "1999,Chevy,Venture \"Extended Edition, Very Large\",,5000.00" l5 = "1996,Jeep,Grand Cherokee,\"MUST SELL!\nair, moon roof, loaded\",4799.00" {- -- The whole thing in 4 lines: csv = zeroOrMore line line = chain cell (char comma) <-< char newline cell = char quote >-> zeroOrMore (sat (/= quote)) <-< char quote +++ zeroOrMore (sat (`notElem` [comma,newline])) [comma,quote,newline] = ",\"\n" -}