TotalParserCombinators

------------------------------------------------------------------------
-- Total parser combinators
------------------------------------------------------------------------

module TotalParserCombinators where

open import Algebra
open import Coinduction
open import Data.Bool using (Bool; true; false; not; _∨_)
import Data.Bool.Properties as Bool
private
  module BoolCS = CommutativeSemiring Bool.commutativeSemiring-∧-∨
open import Data.Function using (_∘_; _$_)
open import Data.List as List using (List; []; _∷_; _++_; [_])
private
  module ListMonoid {A} = Monoid (List.monoid A)
open import Data.Product
open import Relation.Binary
open import Relation.Binary.PropositionalEquality
open import Relation.Nullary
open import Data.String using () renaming (String to Tok)
open DecSetoid Data.String.decSetoid using (_≟_)

------------------------------------------------------------------------
-- A "right-strict" variant of _∧_

-- If the left-strict variant of _∧_ were used to type _·_ below, then
-- the inferred definition of ∂n would not be total; it would contain
-- expressions of the form "∂n (♭ p₁) t ∧ false". With the
-- right-strict definition of _∧_ such expressions reduce to "false".

infixr 6 _∧_

_∧_ : Bool → Bool → Bool
b ∧ true  = b
b ∧ false = false

------------------------------------------------------------------------
-- Conditional coinduction

-- Coinductive if the index is /false/.

data ∞? (A : Set) : Bool → Set where
  ⟨_⟩ : (x :   A) → ∞? A true
  ⟪_⟫ : (x : ∞ A) → ∞? A false

♯? : ∀ {b A} → A → ∞? A b
♯? {true}  x = ⟨   x ⟩
♯? {false} x = ⟪ ♯ x ⟫

♭? : ∀ {A b} → ∞? A b → A
♭? ⟨ x ⟩ =   x
♭? ⟪ x ⟫ = ♭ x

-- A lemma.

♭?♯? : ∀ {A} b {x : A} → ♭? (♯? {b} x) ≡ x
♭?♯? true  = refl
♭?♯? false = refl

------------------------------------------------------------------------
-- Parser combinators

infixl 10 _·_
infixl  5 _∣_

-- The index is true if the corresponding language contains the empty
-- string (is nullable).

data P : Bool → Set where
  ∅        : P false
  ε        : P true
  tok      : Tok → P false
  _∣_      : ∀ {n₁ n₂} →     P n₁     →     P n₂     → P (n₁ ∨ n₂)
  _·_      : ∀ {n₁ n₂} → ∞? (P n₁) n₂ → ∞? (P n₂) n₁ → P (n₁ ∧ n₂)
  nonempty : ∀ {n} → P n → P false

-- Note that, as discussed below, ∅ and nonempty could be defined as
-- derived combinators.

------------------------------------------------------------------------
-- Semantics

-- The semantics is defined inductively: s ∈ p iff the string s is
-- contained in the language defined by p.

infix 4 _∈_

data _∈_ : ∀ {n} → List Tok → P n → Set where
  ε        : [] ∈ ε
  tok      : ∀ {t} → [ t ] ∈ tok t
  ∣ˡ       : ∀ {s n₁ n₂} {p₁ : P n₁} {p₂ : P n₂} →
             s ∈ p₁ → s ∈ p₁ ∣ p₂
  ∣ʳ       : ∀ {s n₁ n₂} {p₁ : P n₁} {p₂ : P n₂} →
             s ∈ p₂ → s ∈ p₁ ∣ p₂
  _·_      : ∀ {s₁ s₂ n₁ n₂} {p₁ : ∞? (P n₁) n₂} {p₂ : ∞? (P n₂) n₁} →
             s₁ ∈ ♭? p₁ → s₂ ∈ ♭? p₂ → s₁ ++ s₂ ∈ p₁ · p₂
  nonempty : ∀ {n t s} {p : P n} →
             t ∷ s ∈ p → t ∷ s ∈ nonempty p

-- A lemma.

cast : ∀ {n} {p p′ : P n} {s s′} → s ≡ s′ → p ≡ p′ → s ∈ p → s′ ∈ p′
cast refl refl s∈ = s∈

------------------------------------------------------------------------
-- Example: A definition which is left and right recursive

leftRight : P false
leftRight = ⟪ ♯ leftRight ⟫ · ⟪ ♯ leftRight ⟫

-- Note that leftRight is equivalent to ∅, so ∅ does not need to be a
-- primitive combinator.

leftRight≈∅ : ∀ {s} → ¬ s ∈ leftRight
leftRight≈∅ (∈₁ · ∈₂) = leftRight≈∅ ∈₁

------------------------------------------------------------------------
-- Example: Kleene star

-- The intended semantics of the Kleene star.

infixr 5 _∷_
infix  4 _∈[_]⋆

data _∈[_]⋆ {n} : List Tok → P n → Set where
  []  : ∀ {p}       → [] ∈[ p ]⋆
  _∷_ : ∀ {s₁ s₂ p} → s₁ ∈ p → s₂ ∈[ p ]⋆ → s₁ ++ s₂ ∈[ p ]⋆

module Variant₁ where

  -- This definition requires that the argument parsers are not
  -- nullable.

  _⋆ : P false → P true
  p ⋆ = ε ∣ ⟨ p ⟩ · ⟪ ♯ (p ⋆) ⟫

  -- The definition of _⋆ above is correct.

  ⋆-sound : ∀ {s p} → s ∈ p ⋆ → s ∈[ p ]⋆
  ⋆-sound (∣ˡ ε)           = []
  ⋆-sound (∣ʳ (pr₁ · pr₂)) = pr₁ ∷ ⋆-sound pr₂

  ⋆-complete : ∀ {s p} → s ∈[ p ]⋆ → s ∈ p ⋆
  ⋆-complete []                    = ∣ˡ ε
  ⋆-complete (_∷_ {[]}    pr₁ pr₂) = ⋆-complete pr₂
  ⋆-complete (_∷_ {_ ∷ _} pr₁ pr₂) =
    ∣ʳ {n₁ = true} (pr₁ · ⋆-complete pr₂)

module Variant₂ where

  -- This definition works for any argument parser.

  _⋆ : ∀ {n} → P n → P true
  p ⋆ = ε ∣ ⟨ nonempty p ⟩ · ⟪ ♯ (p ⋆) ⟫

  -- The definition of _⋆ above is correct.

  ⋆-sound : ∀ {s n} {p : P n} → s ∈ p ⋆ → s ∈[ p ]⋆
  ⋆-sound (∣ˡ ε)                    = []
  ⋆-sound (∣ʳ (nonempty pr₁ · pr₂)) = pr₁ ∷ ⋆-sound pr₂

  ⋆-complete : ∀ {s n} {p : P n} → s ∈[ p ]⋆ → s ∈ p ⋆
  ⋆-complete []                    = ∣ˡ ε
  ⋆-complete (_∷_ {[]}    pr₁ pr₂) = ⋆-complete pr₂
  ⋆-complete (_∷_ {_ ∷ _} pr₁ pr₂) =
    ∣ʳ {n₁ = true} (nonempty pr₁ · ⋆-complete pr₂)

  -- Note, however, that for actual parsing the corresponding
  -- definition would not be correct. The reason is that p would give
  -- a result also when the empty string was accepted, and these
  -- results are ignored by the definition above. In the case of
  -- actual parsing the result of p ⋆, when p is nullable, should be a
  -- stream and not a finite list.

------------------------------------------------------------------------
-- Nullability

-- The nullability index is correct.

⇒ : ∀ {n} {p : P n} → [] ∈ p → n ≡ true
⇒ pr = ⇒′ pr refl
  where
  ⇒′ : ∀ {n s} {p : P n} → s ∈ p → s ≡ [] → n ≡ true
  ⇒′ ε                     refl = refl
  ⇒′ tok                   ()
  ⇒′ (∣ˡ pr₁)              refl with ⇒ pr₁
  ⇒′ (∣ˡ pr₁)              refl | refl = refl
  ⇒′ (∣ʳ pr₂)              refl with ⇒ pr₂
  ⇒′ (∣ʳ {n₁ = n₁} pr₂)    refl | refl = proj₂ BoolCS.zero n₁
  ⇒′ (_·_ {[]}    pr₁ pr₂) refl = cong₂ _∧_ (⇒ pr₁) (⇒ pr₂)
  ⇒′ (_·_ {_ ∷ _} pr₁ pr₂) ()
  ⇒′ (nonempty p)          ()

⇐ : ∀ {n} (p : P n) → n ≡ true → [] ∈ p
⇐ ∅                            ()
⇐ ε                            refl = ε
⇐ (tok t)                      ()
⇐ (_∣_ {true}           p₁ p₂) refl = ∣ˡ (⇐ p₁ refl)
⇐ (_∣_ {false} {true}   p₁ p₂) refl = ∣ʳ {p₁ = p₁} (⇐ p₂ refl)
⇐ (_∣_ {false} {false}  p₁ p₂) ()
⇐ (⟨ p₁ ⟩ · ⟨ p₂ ⟩)            refl = ⇐ p₁ refl · ⇐ p₂ refl
⇐ (⟪ p₁ ⟫ · ⟨ p₂ ⟩)            ()
⇐ (⟨ p₁ ⟩ · ⟪ p₂ ⟫)            ()
⇐ (⟪ p₁ ⟫ · ⟪ p₂ ⟫)            ()
⇐ (nonempty p)                 ()

-- We can decide if the empty string belongs to a given language.

nullable? : ∀ {n} (p : P n) → Dec ([] ∈ p)
nullable? {true}  p = yes (⇐ p refl)
nullable? {false} p = no helper
  where
  helper : ¬ [] ∈ p
  helper []∈p with ⇒ []∈p
  ... | ()

------------------------------------------------------------------------
-- Derivative

-- The index of the derivative. The right-hand sides (excluding
-- t′ ≟ t) are inferable, but included here so that they can easily be
-- inspected.

∂n : ∀ {n} → P n → Tok → Bool
∂n ∅                 t = false
∂n ε                 t = false
∂n (tok t′)          t with t′ ≟ t
∂n (tok t′)          t | yes t′≡t = true
∂n (tok t′)          t | no  t′≢t = false
∂n (p₁ ∣ p₂)         t = ∂n p₁ t ∨ ∂n p₂ t
∂n (⟨ p₁ ⟩ · ⟨ p₂ ⟩) t = ∂n p₁ t ∨ ∂n p₂ t
∂n (⟪ p₁ ⟫ · ⟨ p₂ ⟩) t = ∂n p₂ t
∂n (⟨ p₁ ⟩ · ⟪ p₂ ⟫) t = ∂n p₁ t
∂n (⟪ p₁ ⟫ · ⟪ p₂ ⟫) t = false
∂n (nonempty p)      t = ∂n p t

-- ∂ p t is the "derivative" of p with respect to t. It is specified
-- by the equivalence s ∈ ∂ p t ⇔ t ∷ s ∈ p (proved below).

∂ : ∀ {n} (p : P n) (t : Tok) → P (∂n p t)
∂ ∅                 t = ∅
∂ ε                 t = ∅
∂ (tok t′)          t with t′ ≟ t
∂ (tok t′)          t | yes t′≡t = ε
∂ (tok t′)          t | no  t′≢t = ∅
∂ (p₁ ∣ p₂)         t = ∂ p₁ t ∣ ∂ p₂ t
∂ (⟨ p₁ ⟩ · ⟨ p₂ ⟩) t = ⟨   ∂    p₁  t ⟩ · ♯?    p₂ ∣ ∂ p₂  t
∂ (⟪ p₁ ⟫ · ⟨ p₂ ⟩) t = ⟪ ♯ ∂ (♭ p₁) t ⟫ · ♯?    p₂ ∣ ∂ p₂  t
∂ (⟨ p₁ ⟩ · ⟪ p₂ ⟫) t = ⟨   ∂    p₁  t ⟩ · ♯? (♭ p₂)
∂ (⟪ p₁ ⟫ · ⟪ p₂ ⟫) t = ⟪ ♯ ∂ (♭ p₁) t ⟫ · ♯? (♭ p₂)
∂ (nonempty p)      t = ∂ p t

-- ∂ is correct.

∂-sound : ∀ {s n} {p : P n} {t} → s ∈ ∂ p t → t ∷ s ∈ p
∂-sound s∈ = ∂-sound′ _ _ s∈
  where
  ∂-sound′ : ∀ {s n} (p : P n) t → s ∈ ∂ p t → t ∷ s ∈ p
  ∂-sound′ ∅                 t ()
  ∂-sound′ ε                 t ()
  ∂-sound′ (tok t′)          t _              with t′ ≟ t
  ∂-sound′ (tok .t)          t ε              | yes refl = tok
  ∂-sound′ (tok t′)          t ()             | no  t′≢t
  ∂-sound′ (p₁ ∣ p₂)         t (∣ˡ ∈₁)        = ∣ˡ (∂-sound′ p₁ t ∈₁)
  ∂-sound′ (p₁ ∣ p₂)         t (∣ʳ ∈₂)        = ∣ʳ {p₁ = p₁} (∂-sound′ p₂ t ∈₂)
  ∂-sound′ (⟨ p₁ ⟩ · ⟨ p₂ ⟩) t (∣ˡ (∈₁ · ∈₂)) = ∂-sound ∈₁ · cast refl (♭?♯? (∂n p₁ t)) ∈₂
  ∂-sound′ (⟨ p₁ ⟩ · ⟨ p₂ ⟩) t (∣ʳ ∈₂)        = ⇐ p₁ refl · ∂-sound′ p₂ t ∈₂
  ∂-sound′ (⟨ p₁ ⟩ · ⟪ p₂ ⟫) t (∈₁ · ∈₂)      = ∂-sound ∈₁ · cast refl (♭?♯? (∂n p₁ t)) ∈₂
  ∂-sound′ (⟪ p₁ ⟫ · ⟨ p₂ ⟩) t (∣ˡ (∈₁ · ∈₂)) = ∂-sound ∈₁ · cast refl (♭?♯? (∂n (♭ p₁) t)) ∈₂
  ∂-sound′ (⟪ p₁ ⟫ · ⟨ p₂ ⟩) t (∣ʳ ∈₂)        = ⇐ (♭ p₁) refl · ∂-sound′ p₂ t ∈₂
  ∂-sound′ (⟪ p₁ ⟫ · ⟪ p₂ ⟫) t (∈₁ · ∈₂)      = ∂-sound ∈₁ · cast refl (♭?♯? (∂n (♭ p₁) t)) ∈₂
  ∂-sound′ (nonempty p)      t ∈              = nonempty (∂-sound ∈)

∂-complete : ∀ {s n} {p : P n} {t} → t ∷ s ∈ p → s ∈ ∂ p t
∂-complete {t = t} t∷s∈ = ∂-complete′ _ t∷s∈ refl
  where
  ∂-complete′ : ∀ {s s′ n} (p : P n) → s′ ∈ p → s′ ≡ t ∷ s → s ∈ ∂ p t
  ∂-complete′         ∅        ()  refl
  ∂-complete′         ε        ()  refl
  ∂-complete′         (tok t′) _   refl with t′ ≟ t
  ∂-complete′         (tok .t) tok refl | yes refl = ε
  ∂-complete′ {[]}    (tok .t) tok refl | no  t′≢t with t′≢t refl
  ∂-complete′ {[]}    (tok .t) tok refl | no  t′≢t | ()
  ∂-complete′ {_ ∷ _} (tok t′) ()  refl | no  t′≢t
  ∂-complete′ (p₁ ∣ p₂)         (∣ˡ ∈₁)              refl = ∣ˡ (∂-complete ∈₁)
  ∂-complete′ (p₁ ∣ p₂)         (∣ʳ ∈₂)              refl = ∣ʳ {p₁ = ∂ p₁ t} (∂-complete ∈₂)
  ∂-complete′ (⟨ p₁ ⟩ · ⟨ p₂ ⟩) (_·_ {[]}     ∈₁ ∈₂) refl = ∣ʳ {p₁ = ⟨ ∂ p₁ t ⟩ · _} (∂-complete ∈₂)
  ∂-complete′ (⟨ p₁ ⟩ · ⟨ p₂ ⟩) (_·_ {._ ∷ _} ∈₁ ∈₂) refl = ∣ˡ (∂-complete ∈₁ · cast refl (sym (♭?♯? (∂n p₁ t))) ∈₂)
  ∂-complete′ (⟨ p₁ ⟩ · ⟪ p₂ ⟫) (_·_ {[]}     ∈₁ ∈₂) refl with ⇒ ∈₁
  ∂-complete′ (⟨ p₁ ⟩ · ⟪ p₂ ⟫) (_·_ {[]}     ∈₁ ∈₂) refl | ()
  ∂-complete′ (⟨ p₁ ⟩ · ⟪ p₂ ⟫) (_·_ {._ ∷ _} ∈₁ ∈₂) refl = ∂-complete ∈₁ · cast refl (sym (♭?♯? (∂n p₁ t))) ∈₂
  ∂-complete′ (⟪ p₁ ⟫ · ⟨ p₂ ⟩) (_·_ {[]}     ∈₁ ∈₂) refl = ∣ʳ {p₁ = ⟪ _ ⟫ · _} (∂-complete ∈₂)
  ∂-complete′ (⟪ p₁ ⟫ · ⟨ p₂ ⟩) (_·_ {._ ∷ _} ∈₁ ∈₂) refl = ∣ˡ (∂-complete ∈₁ · cast refl (sym (♭?♯? (∂n (♭ p₁) t))) ∈₂)
  ∂-complete′ (⟪ p₁ ⟫ · ⟪ p₂ ⟫) (_·_ {[]}     ∈₁ ∈₂) refl with ⇒ ∈₁
  ∂-complete′ (⟪ p₁ ⟫ · ⟪ p₂ ⟫) (_·_ {[]}     ∈₁ ∈₂) refl | ()
  ∂-complete′ (⟪ p₁ ⟫ · ⟪ p₂ ⟫) (_·_ {._ ∷ _} ∈₁ ∈₂) refl = ∂-complete ∈₁ · cast refl (sym (♭?♯? (∂n (♭ p₁) t))) ∈₂
  ∂-complete′ (nonempty p)      (nonempty ∈)         refl = ∂-complete ∈

------------------------------------------------------------------------
-- _∈_ is decidable

-- _∈?_ runs a parser (recogniser). Note that the result is yes or no
-- plus a /proof/ verifying that the answer is correct.

infix 4 _∈?_

_∈?_ : ∀ {n} (s : List Tok) (p : P n) → Dec (s ∈ p)
[]    ∈? p = nullable? p
t ∷ s ∈? p with s ∈? ∂ p t
t ∷ s ∈? p | yes s∈∂pt = yes (∂-sound s∈∂pt)
t ∷ s ∈? p | no  s∉∂pt = no  (s∉∂pt ∘ ∂-complete)

------------------------------------------------------------------------
-- The combinator nonempty does not need to be primitive

-- The variant of nonempty which is defined below (nonempty′) makes
-- many parsers larger, though.

module AlternativeNonempty where

  nonempty′ : ∀ {n} → P n → P false
  nonempty′ ∅                 = ∅
  nonempty′ ε                 = ∅
  nonempty′ (tok t)           = tok t
  nonempty′ (p₁ ∣ p₂)         = nonempty′ p₁ ∣ nonempty′ p₂
  nonempty′ (⟪ p₁ ⟫ ·   p₂  ) = ⟪ p₁ ⟫ ·   p₂
  nonempty′ (⟨ p₁ ⟩ · ⟪ p₂ ⟫) = ⟨ p₁ ⟩ · ⟪ p₂ ⟫
  nonempty′ (⟨ p₁ ⟩ · ⟨ p₂ ⟩) = nonempty′ p₁ ∣ nonempty′ p₂
                              ∣ ♯? (nonempty′ p₁) · ♯? (nonempty′ p₂)
  nonempty′ (nonempty p)      = nonempty′ p

  sound : ∀ {n s} {p : P n} → s ∈ nonempty′ p → s ∈ nonempty p
  sound {s = []}    pr with ⇒ pr
  ... | ()
  sound {s = _ ∷ _} pr = nonempty (sound′ _ pr refl)
    where
    sound′ : ∀ {n t s s′} (p : P n) →
             s′ ∈ nonempty′ p → s′ ≡ t ∷ s → t ∷ s ∈ p
    sound′ ∅                 ()                         refl
    sound′ ε                 ()                         refl
    sound′ (tok t)           tok                        refl = tok
    sound′ (p₁ ∣ p₂)         (∣ˡ pr)                    refl = ∣ˡ           (sound′ p₁ pr refl)
    sound′ (p₁ ∣ p₂)         (∣ʳ pr)                    refl = ∣ʳ {p₁ = p₁} (sound′ p₂ pr refl)
    sound′ (⟪ p₁ ⟫ ·   p₂  ) pr                         refl = pr
    sound′ (⟨ p₁ ⟩ · ⟪ p₂ ⟫) pr                         refl = pr
    sound′ (⟨ p₁ ⟩ · ⟨ p₂ ⟩) (∣ˡ (∣ˡ pr))               refl = cast (proj₂ ListMonoid.identity _) refl $
                                                                 sound′ p₁ pr refl · ⇐ p₂ refl
    sound′ (⟨ p₁ ⟩ · ⟨ p₂ ⟩) (∣ˡ (∣ʳ pr))               refl = ⇐ p₁ refl · sound′ p₂ pr refl
    sound′ (⟨ p₁ ⟩ · ⟨ p₂ ⟩) (∣ʳ (_·_ {[]}    pr₁ pr₂)) refl with ⇒ pr₁
    ... | ()
    sound′ (⟨ p₁ ⟩ · ⟨ p₂ ⟩) (∣ʳ (_·_ {_ ∷ _} pr₁ pr₂)) refl with sound {p = p₂} pr₂
    ... | nonempty pr₂′ = sound′ p₁ pr₁ refl · pr₂′
    sound′ (nonempty p)      pr                         refl = nonempty (sound′ p pr refl)

  complete : ∀ {n s} {p : P n} → s ∈ nonempty p → s ∈ nonempty′ p
  complete (nonempty pr) = complete′ _ pr refl
    where
    complete′ : ∀ {n t s s′} (p : P n) →
                s ∈ p → s ≡ t ∷ s′ → t ∷ s′ ∈ nonempty′ p
    complete′ ∅                 ()                            refl
    complete′ ε                 ()                            refl
    complete′ (tok t)           tok                           refl = tok
    complete′ (p₁ ∣ p₂)         (∣ˡ pr)                       refl = ∣ˡ              (complete′ p₁ pr refl)
    complete′ (p₁ ∣ p₂)         (∣ʳ pr)                       refl = ∣ʳ {n₁ = false} (complete′ p₂ pr refl)
    complete′ (⟪ p₁ ⟫ ·   p₂  ) pr                            refl = pr
    complete′ (⟨ p₁ ⟩ · ⟪ p₂ ⟫) pr                            refl = pr
    complete′ (⟨ p₁ ⟩ · ⟨ p₂ ⟩) (_·_ {[]}            pr₁ pr₂) refl = ∣ˡ (∣ʳ {n₁ = false} (complete′ p₂ pr₂ refl))
    complete′ (⟨ p₁ ⟩ · ⟨ p₂ ⟩) (_·_ {_ ∷ _} {[]}    pr₁ pr₂) refl = cast (sym $ proj₂ ListMonoid.identity _) refl $
                                                                       ∣ˡ (∣ˡ {n₂ = false} (complete′ p₁ pr₁ refl))
    complete′ (⟨ p₁ ⟩ · ⟨ p₂ ⟩) (_·_ {_ ∷ _} {_ ∷ _} pr₁ pr₂) refl = ∣ʳ {n₁ = false} (complete′ p₁ pr₁ refl ·
                                                                                      complete′ p₂ pr₂ refl)
    complete′ (nonempty p)      (nonempty pr)                 refl = complete′ p pr refl