-- | The naive shallow embedding following the semantics, using lists
--   to implement the multisets (also called bags).
module Parser0
  ( P           -- :: * -> * -> *
  , symbol      -- :: P s s
  , pfail       -- :: P s a
  , (+++)       -- :: P s a -> P s a -> P s a
  , ParseResult -- ParseResult s a =  [(a, [s])]
  , parse       -- :: P s a -> [s] -> ParseResult s a
  ) where

{-

  The semantics of a parser of type 'P s a' is a function from a
  string of 's' to a multiset of results paired with the remaining
  parts of the input string. We use a multiset to capture the fact
  that we don't care about the order of the results.

  The semantic function [| _ |] is defined as follows (we use {| |} to
  denote multisets and \/ for multiset union).

  [| _ |] :: P s a -> [s] -> {| (a, [s]) |}
  [| symbol   |] (c : s) = {| (c, s) |}
  [| symbol   |] []      = {| |}
  [| pfail    |] s       = {| |}
  [| p +++ q  |] s       = [| p |] s  \/  [| q |] s
  [| return x |] s       = {| (x, s) |}
  [| p >>= f  |] s       = {| (y, s'') | (x, s')  <- [| p   |] s
                                       , (y, s'') <- [| f x |] s'
                           |}

  Using this semantics we can prove (exercise) a number of useful laws
  about parsers. We will use these laws later to derive an efficient
  implementation of the library.

  For two parsers p and q we define
  
    p == q  iff  forall s. [| p |] s == [| q |] s

  Monad Laws

    L1.  return x >>= f   ==  f x
    L2.  p >>= return     ==  p
    L3.  (p >>= f) >>= g  ==  p >>= (\x -> f x >>= g)

  More laws about >>=

    L4.  fail >>= f       ==  fail
    L5.  (p +++ q) >>= f  ==  (p >>= f) +++ (q >>= f)

  Laws about (+++) and fail

    L6.  fail +++ q       ==  q
    L7.  p +++ fail       ==  p

  Laws about (+++)

    L8.  (p +++ q) +++ r  ==  p +++ (q +++ r)
    L9.  p +++ q          ==  q +++ p    -- bag => order irrelevant

  Laws about symbol

    L10. (symbol >>= f) +++ (symbol >>= g)
                          ==  symbol >>= (\c -> f c +++ g c)

  Here is the proof of L10 for the case of a non-empty input string:

    [| (symbol >>= f) +++ (symbol >>= g) |] (c:s)         
  ==  { Def. of [| p +++ q |] }
    [| symbol >>= f |] (c:s)  \/  [| symbol >>= g |] (c:s)  
  ==  { Def. of [| p >>= f |] and [| symbol |] }
    [| f c |] s  \/  [| g c |] s                            
  ==  { Def. of [| p +++ q |] "backwards" }
    [| f c +++ g c |] s                                   
  ==  { Def. of [| p >>= f |] and [| symbol |] "backwards" }
    [| symbol >>= (\c -> f c +++ g c) |] (c:s)

  We can make a shallow embedding following the semantics, using lists
  for the multisets.

-}

type ParseResult s a  =  [(a, [s])]
type PSem s a  =  [s] -> ParseResult s a

-- | We implement parsers by their semantics.
newtype P s a = P (PSem s a)

-- | The 'parse' function is trivial.
parse :: P s a -> PSem s a
parse (P p) s  =  p s

-- The operations simply follow the semantics inserting the newtype
-- constructor where appropriate.

symbol :: P s s
symbol = P symbolP 

symbolP :: PSem s s -- [s]   ->  ParseResult s s
symbolP  (c : s)  =  [(c, s)]
symbolP  []       =  []

pfail :: P s a
pfail = P $ \_ -> []

(+++) :: P s a -> P s a -> P s a
P p  +++  P q  =  P (mplusP p q)

mplusP :: PSem s a -> PSem s a -> PSem s a
mplusP p q  =  \s ->  p s ++ q s

returnP :: a -> PSem s a 
returnP x = \s -> [(x, s)]

bind :: P s a -> (a -> P s b) -> P s b
bind p f  =  P $ \s -> [ (y, s'') 
                       | (x, s')  <- parse p s
                       , (y, s'') <- parse (f x) s'
                       ]

instance Monad (P s) where
  return x  =  P $ returnP x
  p >>= f   =  bind p f
  fail _    =  pfail


-- There are a number of efficiency problems with this implementation.
-- To solve them we'll start with a naive deep embedding and
-- successively refine it to get something efficient.