lectures/lecture4/Parser1.hs

{-# LANGUAGE GADTs #-}

-- | Naive deep embedding of the parsing library.
module Parser1
  ( P           -- :: * -> * -> *
  , symbol      -- :: P s s
  , pfail       -- :: P s a
  , (+++)       -- :: P s a -> P s a -> P s a
  , ParseResult -- ParseResult s a = [(a, [s])]
  , parse       -- :: P s a -> [s] -> ParseResult s a
  ) where

type ParseResult s a = [(a, [s])]

-- | Each operation is turned into a constructor.
data P s a where
  Symbol :: P s s
  Fail   :: P s a
  (:+++) :: P s a -> P s a -> P s a
  Return :: a -> P s a
  (:>>=) :: P s a -> (a -> P s b) -> P s b

symbol :: P s s
symbol = Symbol

pfail :: P s a
pfail  = Fail

(+++) :: P s a -> P s a -> P s a
(+++)  = (:+++)

instance Monad (P s) where
  return = Return
  (>>=)  = (:>>=)

-- | The parse function corresponds exactly to the semantics, except that
--   we use lists instead of multisets.
parse :: P s a -> [s] -> ParseResult s a
parse Symbol     (c : s) = [(c, s)]
parse Symbol     []      = []
parse Fail       _       = []
parse (p :+++ q) s       = parse p s ++ parse q s
parse (Return x) s       = [(x, s)]
parse (p :>>= f) s       = [ (y, s'') | (x, s')  <- parse p s
                                      , (y, s'') <- parse (f x) s'
                           ]

{- There are three sources of inefficiency that we can identify:

   1. The list comprehension builds a lot of intermediate lists which might be
      costly.

   2. List append (++) is linear in its first argument which means that left
      nested applications of (+++) get a quadratic behaviour.

   3. (+++) is treated in a depth first way, first computing the results of the
      left parser, then computing the results of the second parser. This leads
      to a space leak since we have to hang on to the input string to feed to
      the second parser, while traversing the string with the first parser.

  To fix these problems we'll use the laws stated in Parser0 to simplify our
  implementation.

-}