-- | Converts GFCC to MCFG with variants. -- The resulting grammars may be parallel and erasing. import GFCC.Abs import GFCC.Lex import GFCC.Par import GFCC.Print import GFCC.ErrM import Control.Monad import Data.List import System.Environment -- -- * Testing -- main :: IO () main = do [file] <- getArgs cont <- readFile file case readGFCC cont of Ok gr -> putStrLn (printEMCFGs gr) Bad err -> putStrLn err readGFCC :: String -> Err Grammar readGFCC = pGrammar . myLexer printEMCFGs :: Grammar -> String printEMCFGs gfcc@(Grm _ _ cs) = unlines $ map (printEMCFG gfcc) cs printEMCFG :: Grammar -> Concrete -> String printEMCFG gfcc cnc@(Cnc n _) = "--- " ++ printTree n ++ " ---\n" ++ unlines (map rend $ concreteToEMCFG gfcc cnc) where rend x = (foldr (.) id $ prt 0 x []) "" -- -- * MCFG grammars -- type Fun = String type Cat = String type Label = String -- | EMCFG is like MCFG, but with variants. type EMCFG = [EMCFGRule] data EMCFGRule = EMCFGRule Fun Cat [Cat] [(Label, EMCFGTerm)] deriving Show data EMCFGTerm = Sequence [EMCFGTerm] | Union [EMCFGTerm] | Symbol MCFGSymbol deriving Show data MCFGSymbol = Token String | Projection Integer Label deriving Show instance Print EMCFGRule where prt _ (EMCFGRule f r args fs) = str f . str ". " . str r . str " --> " . joinD " " (map str args) . str " := { " . joinD ", " [str l . str " = " . prt 0 t | (l,t) <- fs] . str " }" instance Print EMCFGTerm where prt p (Sequence xs) = prPrec p 1 (joinD " " (map (prt 1) xs)) prt p (Union xs) = prPrec p 1 (joinD " | " (map (prt 1) xs)) prt _ (Symbol x) = prt 0 x instance Print MCFGSymbol where prt _ (Token t) = str (show t) prt _ (Projection n l) = str "$" . str (show n) . str "." . str l joinD :: String -> [Doc] -> Doc joinD glue = concatD . intersperse (doc (showString glue)) str :: String -> Doc str = doc . showString -- -- * GFCC to EMCFG conversion -- -- | The path to an atomic value in a term, i.e. -- a label for a field in a flattened term. type Path = [Integer] concreteToEMCFG :: Grammar -> Concrete -> EMCFG concreteToEMCFG gfcc cnc@(Cnc _ ds) = concatMap (linToRules gfcc cnc) ds' where ds' = [ l | l@(Lin (CId x) _) <- ds, head x /= '_'] -- | Converts a single lin judgement to a set of EMCFG rules. linToRules :: Grammar -> Concrete -> CncDef -> [EMCFGRule] linToRules gfcc cnc (Lin f rhs) = map argValsToRule $ allArgumentValues argLinTypes where Typ argTypes resType = lookType gfcc f argLinTypes = map (lookLinType cnc) argTypes resLinType = lookLinType cnc resType (argFinLabels, argStrLabels) = unzip $ map labels argLinTypes (resFinLabels, resStrLabels) = labels resLinType argValsToRule args = EMCFGRule (mkFun f) resCat argCats r where t = eval cnc args "" rhs argFinVals = zipWith (map . projectPath) args argFinLabels resFinVals = map (projectPath t) resFinLabels resCat = mkCat resType resFinVals argCats = zipWith mkCat argTypes argFinVals r = [(mkLabel l, mkTerm (projectPath t l)) | l <- resStrLabels] mkFun :: CId -> Fun mkFun (CId f) = f mkLabel :: Path -> Label mkLabel = concat . intersperse "_" . map show mkCat :: CId -> [Term] -> Cat mkCat x ts = printTree x ++ show (map fromC ts) where fromC (C n) = n mkTerm :: Term -> EMCFGTerm mkTerm t = case t of S xs -> Sequence $ map mkTerm xs FV xs -> Union $ map mkTerm xs P x (C n) -> case mkTerm x of Symbol (Projection n l) -> let l' = if null l then show n else l++"_"++show n in Symbol (Projection n l') V n -> Symbol (Projection n "") K (KS t) -> Symbol (Token t) _ -> error $ "printTerm (" ++ show t ++ ")" -- | Get all combinations of argument values given -- a list of argument linearization types. allArgumentValues :: [Term] -- ^ Argument types -> [[Term]] -- ^ A list of argument combinations. -- Each combination has the same length -- as the list of argument types. allArgumentValues = zipWithM (allValues . V) [0..] -- | Gets all possible values of a linearization type. allValues :: Term -- ^ Value to project strings from. -> Term -- ^ Linearization type. -> [Term] allValues arg t = case t of R ts -> map R $ sequence [allValues (P arg (C i)) x | (i,x) <- zip [0..] ts] S [] -> [arg] C i -> map C [0..i-1] _ -> error $ "bad lin type: " ++ show t -- | Computes away all CSE and prefix table terms. Replaces -- variable by the corresponding argument. eval :: Concrete -> [Term] -- ^ Arguments -> String -- ^ Token prefix -> Term -> Term eval cnc args pref t = case t of -- record / table R xs -> R (map f xs) -- projection P x y -> project (f x) (f y) -- sequence S xs -> S (map f xs) -- CSE F cid -> f (lookLin cnc cid) -- variants FV xs -> FV (map f xs) -- suffix table W s x -> eval cnc args (pref++s) (f x) -- argument V i -> idx args (fromInteger i) -- token K (KS t) -> K $ KS (pref ++ t) -- labels/parameter values, RP, TM, L, BV _ -> t where f = eval cnc args pref project :: Term -> Term -> Term project (R ts) (C i) = idx ts (fromInteger i) project (FV rs) p = FV (map (\r -> project r p) rs) project (W s r) p = W s (project r p) project r (FV ps) = FV (map (project r) ps) project r p = P r p -- defer projections on variables etc. -- | Expands free variation to the top-level. The -- terms in the result do not contain any FV terms. expand :: Term -> [Term] expand t = case t of R xs -> liftM R $ mapM expand xs P x y -> liftM2 P (expand x) (expand y) S xs -> liftM S $ mapM expand xs FV xs -> xs >>= expand W s x -> liftM (W s) $ expand x _ -> return t -- | Project a field from a term using a record path. projectPath :: Term -> Path -> Term projectPath = foldl (\t x -> project t (C x)) -- | Computes the labels for the finite and string fields -- of the flattened version of a term. labels :: Term -> ([Path],[Path]) labels t = case t of R xs -> both (concat . zipWith (map . (:)) [0..]) $ unzip $ map labels xs S [] -> ([],[[]]) C i -> ([[]],[]) _ -> error $ "bad lin type: " ++ show t -- -- * GFCC utilities -- lookType :: Grammar -> CId -> Type lookType (Grm _ (Abs ds) _) f = case [ t | Fun x t _ <- ds, x == f] of [] -> error $ "lookType " ++ show f [t] -> t lookLin :: Concrete -> CId -> Term lookLin (Cnc _ ds) f = case [t | Lin x t <- ds, x == f] of [] -> error $ "lookLin " ++ show f [t] -> t lookLinType :: Concrete -> CId -> Term lookLinType cnc (CId f) = lookLin cnc (CId ("__" ++ f)) -- -- * General utilities -- idx :: Show a => [a] -> Int -> a idx xs i | i < length xs = xs !! i idx xs i = error $ show xs ++ "!" ++ show i both :: (a -> b) -> (a, a) -> (b, b) both f (x,y) = (f x, f y)