------------------------------------------------------------------------
-- Total parser combinators
------------------------------------------------------------------------

module TotalParserCombinators where

open import Algebra
open import Coinduction
open import Data.Bool using (Bool; true; false; not; _∨_)
import Data.Bool.Properties as Bool
private
  module BoolCS = CommutativeSemiring Bool.commutativeSemiring-∧-∨
open import Data.Function using (_∘_; _$_)
open import Data.List as List using (List; []; _∷_; _++_; [_])
private
  module ListMonoid {A} = Monoid (List.monoid A)
open import Data.Product
open import Relation.Binary
open import Relation.Binary.PropositionalEquality
open import Relation.Nullary
open import Data.String using () renaming (String to Tok)
open DecSetoid Data.String.decSetoid using (_≟_)

------------------------------------------------------------------------
-- A "right-strict" variant of _∧_

-- If the left-strict variant of _∧_ were used to type _·_ below, then
-- the inferred definition of ∂n would not be total; it would contain
-- expressions of the form "∂n (♭ p₁) t ∧ false". With the
-- right-strict definition of _∧_ such expressions reduce to "false".

infixr 6 _∧_

_∧_ : Bool  Bool  Bool
b  true  = b
b  false = false

------------------------------------------------------------------------
-- Conditional coinduction

-- Coinductive if the index is /false/.

data ∞? (A : Set) : Bool  Set where
  ⟨_⟩ : (x :   A)  ∞? A true
  ⟪_⟫ : (x :  A)  ∞? A false

♯? :  {b A}  A  ∞? A b
♯? {true}  x =    x 
♯? {false} x =   x 

♭? :  {A b}  ∞? A b  A
♭?  x  =   x
♭?  x  =  x

-- A lemma.

♭?♯? :  {A} b {x : A}  ♭? (♯? {b} x)  x
♭?♯? true  = refl
♭?♯? false = refl

------------------------------------------------------------------------
-- Parser combinators

infixl 10 _·_
infixl  5 _∣_

-- The index is true if the corresponding language contains the empty
-- string (is nullable).

data P : Bool  Set where
          : P false
  ε        : P true
  tok      : Tok  P false
  _∣_      :  {n₁ n₂}      P n₁          P n₂      P (n₁  n₂)
  _·_      :  {n₁ n₂}  ∞? (P n₁) n₂  ∞? (P n₂) n₁  P (n₁  n₂)
  nonempty :  {n}  P n  P false

-- Note that, as discussed below, ∅ and nonempty could be defined as
-- derived combinators.

------------------------------------------------------------------------
-- Semantics

-- The semantics is defined inductively: s ∈ p iff the string s is
-- contained in the language defined by p.

infix 4 _∈_

data _∈_ :  {n}  List Tok  P n  Set where
  ε        : []  ε
  tok      :  {t}  [ t ]  tok t
  ∣ˡ       :  {s n₁ n₂} {p₁ : P n₁} {p₂ : P n₂} 
             s  p₁  s  p₁  p₂
  ∣ʳ       :  {s n₁ n₂} {p₁ : P n₁} {p₂ : P n₂} 
             s  p₂  s  p₁  p₂
  _·_      :  {s₁ s₂ n₁ n₂} {p₁ : ∞? (P n₁) n₂} {p₂ : ∞? (P n₂) n₁} 
             s₁  ♭? p₁  s₂  ♭? p₂  s₁ ++ s₂  p₁ · p₂
  nonempty :  {n t s} {p : P n} 
             t  s  p  t  s  nonempty p

-- A lemma.

cast :  {n} {p p′ : P n} {s s′}  s  s′  p  p′  s  p  s′  p′
cast refl refl s∈ = s∈

------------------------------------------------------------------------
-- Example: A definition which is left and right recursive

leftRight : P false
leftRight =   leftRight  ·   leftRight 

-- Note that leftRight is equivalent to ∅, so ∅ does not need to be a
-- primitive combinator.

leftRight≈∅ :  {s}  ¬ s  leftRight
leftRight≈∅ (∈₁ · ∈₂) = leftRight≈∅ ∈₁

------------------------------------------------------------------------
-- Example: Kleene star

-- The intended semantics of the Kleene star.

infixr 5 _∷_
infix  4 _∈[_]⋆

data _∈[_]⋆ {n} : List Tok  P n  Set where
  []  :  {p}        [] ∈[ p ]⋆
  _∷_ :  {s₁ s₂ p}  s₁  p  s₂ ∈[ p ]⋆  s₁ ++ s₂ ∈[ p ]⋆

module Variant₁ where

  -- This definition requires that the argument parsers are not
  -- nullable.

  _⋆ : P false  P true
  p  = ε   p  ·   (p ) 

  -- The definition of _⋆ above is correct.

  ⋆-sound :  {s p}  s  p   s ∈[ p ]⋆
  ⋆-sound (∣ˡ ε)           = []
  ⋆-sound (∣ʳ (pr₁ · pr₂)) = pr₁  ⋆-sound pr₂

  ⋆-complete :  {s p}  s ∈[ p ]⋆  s  p 
  ⋆-complete []                    = ∣ˡ ε
  ⋆-complete (_∷_ {[]}    pr₁ pr₂) = ⋆-complete pr₂
  ⋆-complete (_∷_ {_  _} pr₁ pr₂) =
    ∣ʳ {n₁ = true} (pr₁ · ⋆-complete pr₂)

module Variant₂ where

  -- This definition works for any argument parser.

  _⋆ :  {n}  P n  P true
  p  = ε   nonempty p  ·   (p ) 

  -- The definition of _⋆ above is correct.

  ⋆-sound :  {s n} {p : P n}  s  p   s ∈[ p ]⋆
  ⋆-sound (∣ˡ ε)                    = []
  ⋆-sound (∣ʳ (nonempty pr₁ · pr₂)) = pr₁  ⋆-sound pr₂

  ⋆-complete :  {s n} {p : P n}  s ∈[ p ]⋆  s  p 
  ⋆-complete []                    = ∣ˡ ε
  ⋆-complete (_∷_ {[]}    pr₁ pr₂) = ⋆-complete pr₂
  ⋆-complete (_∷_ {_  _} pr₁ pr₂) =
    ∣ʳ {n₁ = true} (nonempty pr₁ · ⋆-complete pr₂)

  -- Note, however, that for actual parsing the corresponding
  -- definition would not be correct. The reason is that p would give
  -- a result also when the empty string was accepted, and these
  -- results are ignored by the definition above. In the case of
  -- actual parsing the result of p ⋆, when p is nullable, should be a
  -- stream and not a finite list.

------------------------------------------------------------------------
-- Nullability

-- The nullability index is correct.

 :  {n} {p : P n}  []  p  n  true
 pr = ⇒′ pr refl
  where
  ⇒′ :  {n s} {p : P n}  s  p  s  []  n  true
  ⇒′ ε                     refl = refl
  ⇒′ tok                   ()
  ⇒′ (∣ˡ pr₁)              refl with  pr₁
  ⇒′ (∣ˡ pr₁)              refl | refl = refl
  ⇒′ (∣ʳ pr₂)              refl with  pr₂
  ⇒′ (∣ʳ {n₁ = n₁} pr₂)    refl | refl = proj₂ BoolCS.zero n₁
  ⇒′ (_·_ {[]}    pr₁ pr₂) refl = cong₂ _∧_ ( pr₁) ( pr₂)
  ⇒′ (_·_ {_  _} pr₁ pr₂) ()
  ⇒′ (nonempty p)          ()

 :  {n} (p : P n)  n  true  []  p
                             ()
 ε                            refl = ε
 (tok t)                      ()
 (_∣_ {true}           p₁ p₂) refl = ∣ˡ ( p₁ refl)
 (_∣_ {false} {true}   p₁ p₂) refl = ∣ʳ {p₁ = p₁} ( p₂ refl)
 (_∣_ {false} {false}  p₁ p₂) ()
 ( p₁  ·  p₂ )            refl =  p₁ refl ·  p₂ refl
 ( p₁  ·  p₂ )            ()
 ( p₁  ·  p₂ )            ()
 ( p₁  ·  p₂ )            ()
 (nonempty p)                 ()

-- We can decide if the empty string belongs to a given language.

nullable? :  {n} (p : P n)  Dec ([]  p)
nullable? {true}  p = yes ( p refl)
nullable? {false} p = no helper
  where
  helper : ¬ []  p
  helper []∈p with  []∈p
  ... | ()

------------------------------------------------------------------------
-- Derivative

-- The index of the derivative. The right-hand sides (excluding
-- t′ ≟ t) are inferable, but included here so that they can easily be
-- inspected.

∂n :  {n}  P n  Tok  Bool
∂n                  t = false
∂n ε                 t = false
∂n (tok t′)          t with t′  t
∂n (tok t′)          t | yes t′≡t = true
∂n (tok t′)          t | no  t′≢t = false
∂n (p₁  p₂)         t = ∂n p₁ t  ∂n p₂ t
∂n ( p₁  ·  p₂ ) t = ∂n p₁ t  ∂n p₂ t
∂n ( p₁  ·  p₂ ) t = ∂n p₂ t
∂n ( p₁  ·  p₂ ) t = ∂n p₁ t
∂n ( p₁  ·  p₂ ) t = false
∂n (nonempty p)      t = ∂n p t

-- ∂ p t is the "derivative" of p with respect to t. It is specified
-- by the equivalence s ∈ ∂ p t ⇔ t ∷ s ∈ p (proved below).

 :  {n} (p : P n) (t : Tok)  P (∂n p t)
                  t = 
 ε                 t = 
 (tok t′)          t with t′  t
 (tok t′)          t | yes t′≡t = ε
 (tok t′)          t | no  t′≢t = 
 (p₁  p₂)         t =  p₁ t   p₂ t
 ( p₁  ·  p₂ ) t =        p₁  t  · ♯?    p₂   p₂  t
 ( p₁  ·  p₂ ) t =    ( p₁) t  · ♯?    p₂   p₂  t
 ( p₁  ·  p₂ ) t =        p₁  t  · ♯? ( p₂)
 ( p₁  ·  p₂ ) t =    ( p₁) t  · ♯? ( p₂)
 (nonempty p)      t =  p t

-- ∂ is correct.

∂-sound :  {s n} {p : P n} {t}  s   p t  t  s  p
∂-sound s∈ = ∂-sound′ _ _ s∈
  where
  ∂-sound′ :  {s n} (p : P n) t  s   p t  t  s  p
  ∂-sound′                  t ()
  ∂-sound′ ε                 t ()
  ∂-sound′ (tok t′)          t _              with t′  t
  ∂-sound′ (tok .t)          t ε              | yes refl = tok
  ∂-sound′ (tok t′)          t ()             | no  t′≢t
  ∂-sound′ (p₁  p₂)         t (∣ˡ ∈₁)        = ∣ˡ (∂-sound′ p₁ t ∈₁)
  ∂-sound′ (p₁  p₂)         t (∣ʳ ∈₂)        = ∣ʳ {p₁ = p₁} (∂-sound′ p₂ t ∈₂)
  ∂-sound′ ( p₁  ·  p₂ ) t (∣ˡ (∈₁ · ∈₂)) = ∂-sound ∈₁ · cast refl (♭?♯? (∂n p₁ t)) ∈₂
  ∂-sound′ ( p₁  ·  p₂ ) t (∣ʳ ∈₂)        =  p₁ refl · ∂-sound′ p₂ t ∈₂
  ∂-sound′ ( p₁  ·  p₂ ) t (∈₁ · ∈₂)      = ∂-sound ∈₁ · cast refl (♭?♯? (∂n p₁ t)) ∈₂
  ∂-sound′ ( p₁  ·  p₂ ) t (∣ˡ (∈₁ · ∈₂)) = ∂-sound ∈₁ · cast refl (♭?♯? (∂n ( p₁) t)) ∈₂
  ∂-sound′ ( p₁  ·  p₂ ) t (∣ʳ ∈₂)        =  ( p₁) refl · ∂-sound′ p₂ t ∈₂
  ∂-sound′ ( p₁  ·  p₂ ) t (∈₁ · ∈₂)      = ∂-sound ∈₁ · cast refl (♭?♯? (∂n ( p₁) t)) ∈₂
  ∂-sound′ (nonempty p)      t               = nonempty (∂-sound )

∂-complete :  {s n} {p : P n} {t}  t  s  p  s   p t
∂-complete {t = t} t∷s∈ = ∂-complete′ _ t∷s∈ refl
  where
  ∂-complete′ :  {s s′ n} (p : P n)  s′  p  s′  t  s  s   p t
  ∂-complete′                 ()  refl
  ∂-complete′         ε        ()  refl
  ∂-complete′         (tok t′) _   refl with t′  t
  ∂-complete′         (tok .t) tok refl | yes refl = ε
  ∂-complete′ {[]}    (tok .t) tok refl | no  t′≢t with t′≢t refl
  ∂-complete′ {[]}    (tok .t) tok refl | no  t′≢t | ()
  ∂-complete′ {_  _} (tok t′) ()  refl | no  t′≢t
  ∂-complete′ (p₁  p₂)         (∣ˡ ∈₁)              refl = ∣ˡ (∂-complete ∈₁)
  ∂-complete′ (p₁  p₂)         (∣ʳ ∈₂)              refl = ∣ʳ {p₁ =  p₁ t} (∂-complete ∈₂)
  ∂-complete′ ( p₁  ·  p₂ ) (_·_ {[]}     ∈₁ ∈₂) refl = ∣ʳ {p₁ =   p₁ t  · _} (∂-complete ∈₂)
  ∂-complete′ ( p₁  ·  p₂ ) (_·_ {._  _} ∈₁ ∈₂) refl = ∣ˡ (∂-complete ∈₁ · cast refl (sym (♭?♯? (∂n p₁ t))) ∈₂)
  ∂-complete′ ( p₁  ·  p₂ ) (_·_ {[]}     ∈₁ ∈₂) refl with  ∈₁
  ∂-complete′ ( p₁  ·  p₂ ) (_·_ {[]}     ∈₁ ∈₂) refl | ()
  ∂-complete′ ( p₁  ·  p₂ ) (_·_ {._  _} ∈₁ ∈₂) refl = ∂-complete ∈₁ · cast refl (sym (♭?♯? (∂n p₁ t))) ∈₂
  ∂-complete′ ( p₁  ·  p₂ ) (_·_ {[]}     ∈₁ ∈₂) refl = ∣ʳ {p₁ =  _  · _} (∂-complete ∈₂)
  ∂-complete′ ( p₁  ·  p₂ ) (_·_ {._  _} ∈₁ ∈₂) refl = ∣ˡ (∂-complete ∈₁ · cast refl (sym (♭?♯? (∂n ( p₁) t))) ∈₂)
  ∂-complete′ ( p₁  ·  p₂ ) (_·_ {[]}     ∈₁ ∈₂) refl with  ∈₁
  ∂-complete′ ( p₁  ·  p₂ ) (_·_ {[]}     ∈₁ ∈₂) refl | ()
  ∂-complete′ ( p₁  ·  p₂ ) (_·_ {._  _} ∈₁ ∈₂) refl = ∂-complete ∈₁ · cast refl (sym (♭?♯? (∂n ( p₁) t))) ∈₂
  ∂-complete′ (nonempty p)      (nonempty )         refl = ∂-complete 

------------------------------------------------------------------------
-- _∈_ is decidable

-- _∈?_ runs a parser (recogniser). Note that the result is yes or no
-- plus a /proof/ verifying that the answer is correct.

infix 4 _∈?_

_∈?_ :  {n} (s : List Tok) (p : P n)  Dec (s  p)
[]    ∈? p = nullable? p
t  s ∈? p with s ∈?  p t
t  s ∈? p | yes s∈∂pt = yes (∂-sound s∈∂pt)
t  s ∈? p | no  s∉∂pt = no  (s∉∂pt  ∂-complete)

------------------------------------------------------------------------
-- The combinator nonempty does not need to be primitive

-- The variant of nonempty which is defined below (nonempty′) makes
-- many parsers larger, though.

module AlternativeNonempty where

  nonempty′ :  {n}  P n  P false
  nonempty′                  = 
  nonempty′ ε                 = 
  nonempty′ (tok t)           = tok t
  nonempty′ (p₁  p₂)         = nonempty′ p₁  nonempty′ p₂
  nonempty′ ( p₁  ·   p₂  ) =  p₁  ·   p₂
  nonempty′ ( p₁  ·  p₂ ) =  p₁  ·  p₂ 
  nonempty′ ( p₁  ·  p₂ ) = nonempty′ p₁  nonempty′ p₂
                               ♯? (nonempty′ p₁) · ♯? (nonempty′ p₂)
  nonempty′ (nonempty p)      = nonempty′ p

  sound :  {n s} {p : P n}  s  nonempty′ p  s  nonempty p
  sound {s = []}    pr with  pr
  ... | ()
  sound {s = _  _} pr = nonempty (sound′ _ pr refl)
    where
    sound′ :  {n t s s′} (p : P n) 
             s′  nonempty′ p  s′  t  s  t  s  p
    sound′                  ()                         refl
    sound′ ε                 ()                         refl
    sound′ (tok t)           tok                        refl = tok
    sound′ (p₁  p₂)         (∣ˡ pr)                    refl = ∣ˡ           (sound′ p₁ pr refl)
    sound′ (p₁  p₂)         (∣ʳ pr)                    refl = ∣ʳ {p₁ = p₁} (sound′ p₂ pr refl)
    sound′ ( p₁  ·   p₂  ) pr                         refl = pr
    sound′ ( p₁  ·  p₂ ) pr                         refl = pr
    sound′ ( p₁  ·  p₂ ) (∣ˡ (∣ˡ pr))               refl = cast (proj₂ ListMonoid.identity _) refl $
                                                                 sound′ p₁ pr refl ·  p₂ refl
    sound′ ( p₁  ·  p₂ ) (∣ˡ (∣ʳ pr))               refl =  p₁ refl · sound′ p₂ pr refl
    sound′ ( p₁  ·  p₂ ) (∣ʳ (_·_ {[]}    pr₁ pr₂)) refl with  pr₁
    ... | ()
    sound′ ( p₁  ·  p₂ ) (∣ʳ (_·_ {_  _} pr₁ pr₂)) refl with sound {p = p₂} pr₂
    ... | nonempty pr₂′ = sound′ p₁ pr₁ refl · pr₂′
    sound′ (nonempty p)      pr                         refl = nonempty (sound′ p pr refl)

  complete :  {n s} {p : P n}  s  nonempty p  s  nonempty′ p
  complete (nonempty pr) = complete′ _ pr refl
    where
    complete′ :  {n t s s′} (p : P n) 
                s  p  s  t  s′  t  s′  nonempty′ p
    complete′                  ()                            refl
    complete′ ε                 ()                            refl
    complete′ (tok t)           tok                           refl = tok
    complete′ (p₁  p₂)         (∣ˡ pr)                       refl = ∣ˡ              (complete′ p₁ pr refl)
    complete′ (p₁  p₂)         (∣ʳ pr)                       refl = ∣ʳ {n₁ = false} (complete′ p₂ pr refl)
    complete′ ( p₁  ·   p₂  ) pr                            refl = pr
    complete′ ( p₁  ·  p₂ ) pr                            refl = pr
    complete′ ( p₁  ·  p₂ ) (_·_ {[]}            pr₁ pr₂) refl = ∣ˡ (∣ʳ {n₁ = false} (complete′ p₂ pr₂ refl))
    complete′ ( p₁  ·  p₂ ) (_·_ {_  _} {[]}    pr₁ pr₂) refl = cast (sym $ proj₂ ListMonoid.identity _) refl $
                                                                       ∣ˡ (∣ˡ {n₂ = false} (complete′ p₁ pr₁ refl))
    complete′ ( p₁  ·  p₂ ) (_·_ {_  _} {_  _} pr₁ pr₂) refl = ∣ʳ {n₁ = false} (complete′ p₁ pr₁ refl ·
                                                                                      complete′ p₂ pr₂ refl)
    complete′ (nonempty p)      (nonempty pr)                 refl = complete′ p pr refl