module MkLexicon where import Char import qualified Data.Set as S -- apply function o to each line in file f allLines o f = do s <- readFile f mapM_ (putStrLn . o) (filter noComm (lines s)) -- take each line in file k as a key, and choose those lines in file f -- that match any of the keys (i.e. whose first word matches it) chooseKeys k f = do keys <- readFile k >>= return . (S.fromList . lines) let choose line = case words line of w:_ -> S.member w keys _ -> False old <- readFile f >>= return . lines mapM_ (putStrLn . drop 2) $ filter choose old -- to remove extra indent -- discard comments and empty lines noComm s = case s of '-':'-':_ -> False "" -> False _ -> True -- remove tailing comments remTail s = case s of '-':'-':_ -> [] c:cs -> c : remTail cs _ -> s -- postfix with category postfix p s = takeWhile (not . isSpace) s ++ "_" ++ p -- make fun rule mkFun s = let (w,p) = span (/='_') s in " " ++ s ++ " : " ++ tail p ++ " ;" -- make reused lin rule mkRLin s = let w = head (words s) in " " ++ w ++ " = " ++ w ++ " ;" -- make regular lin rule mkLin s = let (w,p) = span (/='_') s in " " ++ s ++ " = " ++ lin (tail p) w ++ " ;" where lin cat w = case cat of "V2" -> "dirV2 (regV" ++ " \"" ++ w ++ "\")" 'V':_ -> "mk" ++ cat ++ " (regV" ++ " \"" ++ w ++ "\")" _ -> "reg" ++ cat ++ " \"" ++ w ++ "\"" -- normalize identifiers in Structural mkIdent s = case words s of w:ws -> if obsolete w then "" else " " ++ (unwords $ mkId (update w) : ws) _ -> s where mkId name@(c:cs) = let (x,y) = span isCat cs in toLower c : clean x ++ "_" ++ new y isCat = flip notElem "PDNVCAIS" clean x = case span isLower x of (_,[]) -> x (u,v) -> u ++ "8" ++ map toLower v new y = case y of "NumDet" -> "NDet" _ -> y obsolete w = elem w $ words "TheseNumNP ThoseNumNP NobodyNP NeitherNor NoDet AnyDet" update w = case w of "EitherOr" -> "EitherOrConjD" "BothAnd" -> "BothAndConjD" "PhrYes" -> "YesPhr" "PhrNo" -> "NoPhr" "WeNumNP" -> "WeNP" "YeNumNP" -> "YeNP" "HowManyDet" -> "HowManyIDet" "MostsDet" -> "MostManyDet" "WhichDet" -> "WhichOneIDet" "WhichNDet" -> "WhichManyIDet" "EverywhereNP" -> "EverywhereAdv" "SomewhereNP" -> "SomewhereAdv" "AgentPrep" -> "By8agentPrep" _ -> w -- massage French verbs 9/2/2005 freVerb s = case words s of v:_ -> " " ++ v ++ " : " ++ cat v ++ " ;" _ -> [] where cat v = dropWhile (not . isUpper) v -- Swedish verbs 17/2 sweVerb s = case words s of ('v':a:u:[]):verb:_ -> "fun " ++ verb ++ " : V ;\n" ++ "lin " ++ verb ++ " = " ++ infl a u verb ++ " ;" _ -> [] where infl a u verb = let (dne,geb) = span isConsonant $ tail $ reverse verb (beg,voc,end) = (reverse (tail geb), head geb, reverse dne) (pret,sup) = (beg++ [toLower a] ++end, beg++ [toLower u] ++ end ++"it") in unwords ["irregV", prQuot verb, prQuot pret, prQuot sup] prQuot s = "\"" ++ s ++ "\"" isConsonant = not . isVowel isVowel = flip elem "aeiouyäöå" -- Norwegian 13/3 groupLines :: [String] -> [String] groupLines ss = [unwords [a, b, c] | [a,_,b,c,_] <- grps ss] where grps ls = let (g,rest) = splitAt 5 ls in g:grps rest lin2fun s = case words s of _:fun:_:_ -> " fun " ++ fun ++ " : " ++ cat fun ++ " ;" _ -> s where cat fun = reverse (takeWhile (/='_') (reverse fun)) -- filter from a given file those lines whose first word is in a sought-set allThose :: [String] -> [String] -> [String] allThose soughts givens = concatMap seek soughts where seek w = let s = [line | line <- givens, w':_ <- [words line], w == w'] in if null s then ["-- " ++ w] else s -- do this with files -- example: getAllThose "abstract/Mtmp" "english/BasicEng.gf" getAllThose :: FilePath -> FilePath -> IO () getAllThose sought given = do s <- readFile sought gi <- readFile given let so = [w | l <- lines s, w:_ <- [words l]] mapM_ putStrLn $ allThose so $ lines gi -- Swadesh 7/3/2006: replace string defs with f = f ; reuseLex line = case words line of w : "=" : _ | elem '"' line -> " " ++ w ++ " = " ++ w ++ " ;" _ -> line