------------------------------------------------------------------------
-- A breadth-first backend which uses the derivative operator
------------------------------------------------------------------------

module TotalParserCombinators.BreadthFirst where

open import Data.List
open import Data.List.Membership.Propositional
open import Data.Product
open import Function
open import Relation.Binary.HeterogeneousEquality as H
  using () renaming (_≅_ to _≅H_)
open import Relation.Binary.PropositionalEquality as P using (_≡_)

open import TotalParserCombinators.Congruence using (_≅P_; _∎)
import TotalParserCombinators.Congruence.Sound as CS
open import TotalParserCombinators.Derivative as D using (D)
import TotalParserCombinators.InitialBag as I
open import TotalParserCombinators.Parser
open import TotalParserCombinators.Semantics
open import TotalParserCombinators.Simplification as S using (simplify)

------------------------------------------------------------------------
-- A parametrised backend

-- The function f is applied before the derivative.

module Parse
  {Tok}
  (f :  {R xs}  Parser Tok R xs   λ xs′  Parser Tok R xs′)
  (f-correct :  {R xs} (p : Parser Tok R xs)  proj₂ (f p) ≅P p)
  where

  -- The parsing function.

  parse :  {R xs}  Parser Tok R xs  List Tok  List R
  parse {xs = xs} p []      = xs
  parse           p (t  s) = parse (D t $ proj₂ $ f p) s

  -- A variant of f-correct.

  f-correct′ :  {R xs} (p : Parser Tok R xs)  proj₂ (f p)  p
  f-correct′ = CS.sound  f-correct

  -- The backend is sound with respect to the semantics.

  sound :  {R xs x} {p : Parser Tok R xs} (s : List Tok) 
          x  parse p s  x  p · s
  sound []      x∈p = I.sound _ x∈p
  sound (t  s) x∈p =
    Inverse.to (f-correct′ _) $ D.sound _ (sound s x∈p)

  -- The backend is complete with respect to the semantics.

  complete :  {R xs x} {p : Parser Tok R xs} (s : List Tok) 
             x  p · s  x  parse p s
  complete []      x∈p = I.complete x∈p
  complete (t  s) x∈p =
    complete s $ D.complete $ Inverse.from (f-correct′ _) $ x∈p

  -- The backend does not introduce any unneeded ambiguity.
  --
  -- The proof complete is a left inverse of sound, so the (finite) type
  -- x ∈ parse p s contains at most as many proofs as x ∈ p · s. In
  -- other words, the output of parse p s can only contain n copies of x
  -- if there are at least n distinct parse trees in x ∈ p · s.

  complete∘sound :  {R xs x} s
                   (p : Parser Tok R xs) (x∈p : x  parse p s) 
                   complete s (sound s x∈p)  x∈p
  complete∘sound []      p x∈p = I.complete∘sound p x∈p
  complete∘sound (t  s) p x∈p
    rewrite Inverse.inverseʳ (f-correct′ p)
              {x = D.sound (proj₂ (f p)) (sound s x∈p)} P.refl
          | D.complete∘sound (proj₂ (f p)) (sound s x∈p) =
    complete∘sound s (D t $ proj₂ $ f p) x∈p

  -- The backend does not remove any ambiguity.
  --
  -- The proof complete is a right inverse of sound, which implies that
  -- the (finite) type x ∈ parse p s contains at least as many proofs as
  -- x ∈ p · s. In other words, if the output of parse p s contains n
  -- copies of x, then there are at most n distinct parse trees in
  -- x ∈ p · s.

  sound∘complete :  {R xs x} {p : Parser Tok R xs}
                   (s : List Tok) (x∈p : x  p · s) 
                   sound s (complete s x∈p)  x∈p
  sound∘complete []      x∈p = I.sound∘complete x∈p
  sound∘complete (t  s) x∈p
    rewrite sound∘complete s $
                  D.complete $ Inverse.from (f-correct′ _) $ x∈p
          | D.sound∘complete $ Inverse.from (f-correct′ _) $ x∈p
    = Inverse.inverseˡ (f-correct′ _) {x = x∈p} P.refl

  -- The backend is correct.

  correct :  {R xs x s} {p : Parser Tok R xs} 
            x  p · s  x  parse p s
  correct {s = s} {p} =
    mk↔ {to = complete s}
      (  { P.refl  complete∘sound s p _ })
      ,  { P.refl  sound∘complete s _ })
      )

------------------------------------------------------------------------
-- Specific instantiations

-- Parsing without simplification.

parse :  {Tok R xs}  Parser Tok R xs  List Tok  List R
parse = Parse.parse -,_ _∎

parse-correct :  {Tok R xs x s} {p : Parser Tok R xs} 
                x  p · s  x  parse p s
parse-correct = Parse.correct -,_ _∎

-- Parsing with simplification.

parse-with-simplification :
   {Tok R xs}  Parser Tok R xs  List Tok  List R
parse-with-simplification = Parse.parse  p  -, simplify p) S.correct

parse-with-simplification-correct :
   {Tok R xs x s} {p : Parser Tok R xs} 
  x  p · s  x  parse-with-simplification p s
parse-with-simplification-correct = Parse.correct _ S.correct

------------------------------------------------------------------------
-- An observation

-- The worst-case complexity of parse (without simplification) is /at
-- least/ exponential in the size of the input string. There is a
-- (finite) parser p whose derivative is p ∣ p (for any token). The
-- n-th derivative thus contains (at least) 2^n outermost occurrences
-- of _∣_, and these occurrences have to be traversed to compute the
-- initial bag of the n-th derivative.

parse-inefficient :
   {Tok R}   λ (p : Parser Tok R [])   t  D t p ≅H p  p
parse-inefficient {R = R} =
  (fail {R = R} >>=  _  fail) , λ t  H.refl)