A framework for implementing deeply embedded languages

Emil Axelsson

AFP guest lecture, 2011–02–14

Example language

Task: Make a deep implementation of the following language:

int :: Int -> Lang Int
  -- Integer literal

add :: Lang Int -> Lang Int -> Lang Int
  -- Addition

sub :: Lang Int -> Lang Int -> Lang Int
  -- Subtraction

Example language

Task: Make a deep implementation of the following language:

int :: Int -> Lang Int
  -- Integer literal

add :: Lang Int -> Lang Int -> Lang Int
  -- Addition

sub :: Lang Int -> Lang Int -> Lang Int
  -- Subtraction

Easy:

data Lang a
  where
    Int :: Int -> Lang Int
    Add :: Lang Int -> Lang Int -> Lang Int
    Sub :: Lang Int -> Lang Int -> Lang Int

Adding higher-order constructs

int :: Int -> Lang Int
  -- Integer literal

add :: Lang Int -> Lang Int -> Lang Int
  -- Addition

sub :: Lang Int -> Lang Int -> Lang Int
  -- Subtraction

comp :: (Lang b -> Lang c) -> (Lang a -> Lang b) -> (Lang a -> Lang c)
  -- Composition

loop :: Lang Int -> (Lang a -> Lang a) -> (Lang a -> Lang a)
  -- Iteration

How can we extend the Lang type to include these?

Higher-order abstract syntax (HOAS)

A straightforward implementation is to simply copy the type of comp and loop:

data Lang a
  where
    Int  :: Int -> Lang Int
    Add  :: Lang Int -> Lang Int -> Lang Int
    Sub  :: Lang Int -> Lang Int -> Lang Int
    Comp :: (Lang b -> Lang c) -> (Lang a -> Lang b) -> (Lang a -> Lang c)
    Loop :: Lang Int -> (Lang a -> Lang a) -> (Lang a -> Lang a)

But something is missing… How can we e.g. print the syntax of Comp?

Higher-order abstract syntax (HOAS)

Need to add variables:

data Lang a
  where
    Int  :: Int -> Lang Int
    Add  :: Lang Int -> Lang Int -> Lang Int
    Sub  :: Lang Int -> Lang Int -> Lang Int
    Comp :: (Lang b -> Lang c) -> (Lang a -> Lang b) -> (Lang a -> Lang c)
    Loop :: Lang Int -> (Lang a -> Lang a) -> (Lang a -> Lang a)
    Var  :: VarId -> Lang a

type VarId = Integer

HOAS evaluation

Evaluation of HOAS is straightforward:

eval :: Lang a -> a
eval (Int a)            = a
eval (Add a b)          = eval a + eval b
eval (Sub a b)          = eval a - eval b
eval (Comp f g a)       = eval ((f . g) a)
eval (Loop n body init) = eval (iterate body init !! eval n)

Syntactic analysis

But other syntactic analyses not so simple:

eqLang :: Lang a -> Lang a -> Bool
eqLang a b = flip evalState 0 $ eqLangM a b
  where
    eqLangM :: Lang a -> Lang b -> State VarId Bool
    eqLangM (Int a)   (Int b)   = return (a==b)
    eqLangM (Add a b) (Add c d) = liftM2 (&&) (eqLangM a c) (eqLangM b d)
    eqLangM (Sub a b) (Sub c d) = liftM2 (&&) (eqLangM a c) (eqLangM b d)
    eqLangM (Comp f1 g1 a1) (Comp f2 g2 a2) = do
        v1  <- get; put (v1+1)
        v2  <- get; put (v2+1)
        fEq <- eqLangM (f1 $ Var v1) (f2 $ Var v1)
        gEq <- eqLangM (g1 $ Var v2) (g2 $ Var v2)
        aEq <- eqLangM a1 a2
        return (fEq && gEq && aEq)
    eqLangM (Loop n1 body1 init1) (Loop n2 body2 init2) = do
        v      <- get; put (v+1)
        nEq    <- eqLangM n1 n2
        bodyEq <- eqLangM (body1 $ Var v) (body2 $ Var v)
        initEq <- eqLangM init1 init2
        return (nEq && bodyEq && initEq)
    eqLangM _ _ = return False

Repeated handling of binding and application (needs to be repeated for other analyses as well)

HOAS transformation

Alternative: First-order implementation (FOAS)

data Lang a
  where
    Int  :: Int -> Lang Int
    Add  :: Lang Int -> Lang Int -> Lang Int
    Sub  :: Lang Int -> Lang Int -> Lang Int
    Comp :: (VarId {-b-}, Lang c) -> (VarId {-a-}, Lang b) -> Lang a -> Lang c
    Loop :: Lang Int -> (VarId {-a-}, Lang a) -> Lang a -> Lang a
    Var  :: VarId -> Lang a
comp n body init = do
    v <- get; put (v+1)
    return $ Loop n (v, body (Var v)) init

Problem summary

Language implementation using algebraic data types (both HOAS and FOAS) leads to repeated handling of binding and application

Solution: Explicit abstraction and application

data Lang a
  where
    -- Domain-independent:
    Var    :: VarId -> Lang a
    Lambda :: VarId -> Lang b -> Lang (a -> b)
    Apply  :: Lang (a -> b) -> Lang a -> Lang b

    -- Domain-specific:
    Int  :: Int -> Lang Int
    Add  :: Lang (Int -> Int -> Int)
    Sub  :: Lang (Int -> Int -> Int)
    Comp :: Lang ((b -> c) -> (a -> b) -> (a -> c))
    Loop :: Lang (Int -> (a -> a) -> (a -> a))

Syntactic analysis

Syntactic analysis of the domain-independent part still requires some work:

eval' :: Typeable a => Lang a -> Reader [(VarId,Dynamic)] a
eval' (Var v) = do
    env <- ask
    case lookup v env of
      Just a -> case fromDynamic a of
        Just a' -> return a'
eval' (Lambda v f) = do
    env <- ask
    return $ \a -> flip runReader ((v,toDyn a):env) $ eval' f
eval' (Apply f a) = liftM2 ($) (eval' f) (eval' a)
eval' a           = return (evalDom a)

Syntactic analysis

Syntactic analysis of the domain-independent part still requires some work:

eval' :: Typeable a => Lang a -> Reader [(VarId,Dynamic)] a
eval' (Var v) = do
    env <- ask
    case lookup v env of
      Just a -> case fromDynamic a of
        Just a' -> return a'
eval' (Lambda v f) = do
    env <- ask
    return $ \a -> flip runReader ((v,toDyn a):env) $ eval' f
eval' (Apply f a) = liftM2 ($) (eval' f) (eval' a)
eval' a           = return (evalDom a)

…but analysis of the domain-specific part is usually trivial:

evalDom :: Lang a -> a
evalDom (Int n) = n
evalDom Add     = (+)
evalDom Sub     = (-)
evalDom Comp    = (.)
evalDom Loop    = \n f a -> iterate f a !! n

The same holds for many other analyses.

Reusable embedding

Can we reuse evaluation (etc.) of the domain-independent part across several languages?

Reusable embedding

Can we reuse evaluation (etc.) of the domain-independent part across several languages?

Yes! Just parameterize on the domain-specific part:

data Lang expr a
  where
    Var    :: VarId -> Lang expr a
    Lambda :: VarId -> Lang expr b -> Lang expr (a -> b)
    Apply  :: Lang expr (a -> b) -> Lang expr a -> Lang expr b
    Inject :: expr a -> Lang expr a

The deep-embedding library

data Lambda expr a
  where
    Variable :: Typeable a => VarId -> Lambda expr a

    Lambda :: (Typeable a, Typeable b) =>
        VarId -> Lambda expr b -> Lambda expr (a -> b)

    (:$:) :: Typeable a =>
        Lambda expr (a -> b) -> Lambda expr a -> Lambda expr b

    Inject :: expr a -> Lambda expr a

Analyses and back ends

class Eval expr
  where
    eval :: expr a -> a

class ExprEq expr
  where
    exprEq :: expr a -> expr b -> Bool

class ExprShow expr
  where
    exprShow :: expr a -> String

instance Eval     expr => Eval     (Lambda expr)
instance ExprEq   expr => ExprEq   (Lambda expr)
instance ExprShow expr => ExprShow (Lambda expr)

printExpr :: ExprShow expr => expr a -> IO ()
  -- Print expression

drawLambda :: ExprShow expr => Lambda expr a -> IO ()
  -- Print syntax tree

Monadic front end

newtype Supply a = Supply { runSupply :: State VarId a }
  deriving (Monad, MonadState VarId)

newtype Lam expr a = Lam { runLam :: Supply (Lambda expr a) }

runLambda :: Lam expr a -> Lambda expr a
runLambda = flip evalState 0 . runSupply . runLam

lambda :: (Typeable a, Typeable b) =>
    (Lam expr a -> Lam expr b) -> Lam expr (a -> b)
lambda f = Lam $ do
    v <- get; put (v+1)
    let Lam body = f (Lam $ return $ Variable v)
    liftM (Lambda v) body

($$) :: Typeable a => Lam expr (a -> b) -> Lam expr a -> Lam expr b
Lam f $$ Lam a = Lam $ liftM2 (:$:) f a

inject :: expr a -> Lam expr a
inject = Lam . return . Inject

Reimplementation of Lang

data Domain a
  where
    Int  :: Int -> Domain Int
    Add  :: Domain (Int -> Int -> Int)
    Mul  :: Domain (Int -> Int -> Int)
    Comp :: Domain ((b -> c) -> (a -> b) -> (a -> c))
    Loop :: Domain (Int -> (a -> a) -> (a -> a))

type Lang a = Lam Domain a
int :: Int -> Lang Int
int = inject . Int

add :: Lang Int -> Lang Int -> Lang Int
add a b = inject Add $$ a $$ b

sub :: Lang Int -> Lang Int -> Lang Int
sub a b = inject Sub $$ a $$ b

comp :: (Typeable a, Typeable b, Typeable c) =>
    (Lang b -> Lang c) -> (Lang a -> Lang b) -> (Lang a -> Lang c)
comp f g a = inject Comp $$ lambda f $$ lambda g $$ a

loop :: Typeable a => Lang Int -> (Lang a -> Lang a) -> (Lang a -> Lang a)
loop n body init = inject Loop $$ n $$ lambda body $$ init

Reimplementation of Lang

instance Eval Domain where
    eval (Int n) = n
    eval Add     = (+)
    eval Sub     = (-)
    eval Comp    = (.)
    eval Loop    = \n f a -> iterate f a !! n

instance ExprEq Domain where
    Int n1 `exprEq` Int n2 = n1==n2
    Add    `exprEq` Add    = True
    Sub    `exprEq` Sub    = True
    Comp   `exprEq` Comp   = True
    Loop   `exprEq` Loop   = True
    _      `exprEq` _      = False

instance ExprShow Domain where
    exprShow (Int n) = "(int " ++ show n ++ ")"
    exprShow Add     = "add"
    exprShow Sub     = "sub"
    exprShow Comp    = "comp"
    exprShow Loop    = "loop"

printLang = printExpr . runLambda
drawLang  = drawLambda . runLambda

Live testing of implementation

deep-embedding library