module LexCore where
import ParserCoreUtils
import Ratio
import Char
import qualified Numeric( readFloat, readDec )
isNameChar c = isAlpha c || isDigit c || (c == '_') || (c == '\'')
isKeywordChar c = isAlpha c || (c == '_')
lexer :: (Token -> P a) -> P a
lexer cont [] = cont TKEOF []
lexer cont ('\n':cs) = \line -> lexer cont cs (line+1)
lexer cont ('-':'>':cs) = cont TKrarrow cs
lexer cont (c:cs)
| isSpace c = lexer cont cs
| isLower c || (c == '_') = lexName cont TKname (c:cs)
| isUpper c = lexName cont TKcname (c:cs)
| isDigit c || (c == '-') = lexNum cont (c:cs)
lexer cont ('%':cs) = lexKeyword cont cs
lexer cont ('\'':cs) = lexChar cont cs
lexer cont ('\"':cs) = lexString [] cont cs
lexer cont ('#':cs) = cont TKhash cs
lexer cont ('(':cs) = cont TKoparen cs
lexer cont (')':cs) = cont TKcparen cs
lexer cont ('{':cs) = cont TKobrace cs
lexer cont ('}':cs) = cont TKcbrace cs
lexer cont ('=':cs) = cont TKeq cs
lexer cont (':':':':cs) = cont TKcoloncolon cs
lexer cont ('*':cs) = cont TKstar cs
lexer cont ('.':cs) = cont TKdot cs
lexer cont ('\\':cs) = cont TKlambda cs
lexer cont ('@':cs) = cont TKat cs
lexer cont ('?':cs) = cont TKquestion cs
lexer cont (';':cs) = cont TKsemicolon cs
lexer cont (c:cs) = failP "invalid character" [c]
lexChar cont ('\\':'x':h1:h0:'\'':cs)
| isHexEscape [h1,h0] = cont (TKchar (hexToChar h1 h0)) cs
lexChar cont ('\\':cs) = failP "invalid char character" ('\\':(take 10 cs))
lexChar cont ('\'':cs) = failP "invalid char character" ['\'']
lexChar cont ('\"':cs) = failP "invalid char character" ['\"']
lexChar cont (c:'\'':cs) = cont (TKchar c) cs
lexString s cont ('\\':'x':h1:h0:cs)
| isHexEscape [h1,h0] = lexString (s++[hexToChar h1 h0]) cont cs
lexString s cont ('\\':cs) = failP "invalid string character" ['\\']
lexString s cont ('\'':cs) = failP "invalid string character" ['\'']
lexString s cont ('\"':cs) = cont (TKstring s) cs
lexString s cont (c:cs) = lexString (s++[c]) cont cs
isHexEscape = all (\c -> isHexDigit c && (isDigit c || isLower c))
hexToChar h1 h0 = chr (digitToInt h1 * 16 + digitToInt h0)
lexNum cont cs =
case cs of
('-':cs) -> f (-1) cs
_ -> f 1 cs
where f sgn cs =
case span isDigit cs of
(digits,'.':c:rest)
| isDigit c -> cont (TKrational (fromInteger sgn * r)) rest'
where ((r,rest'):_) = readFloat (digits ++ ('.':c:rest))
-- When reading a floating-point number, which is
-- a bit complicated, use the Haskell 98 library function
(digits,rest) -> cont (TKinteger (sgn * (read digits))) rest
lexName cont cstr cs = cont (cstr name) rest
where (name,rest) = span isNameChar cs
lexKeyword cont cs =
case span isKeywordChar cs of
("module",rest) -> cont TKmodule rest
("data",rest) -> cont TKdata rest
("newtype",rest) -> cont TKnewtype rest
("forall",rest) -> cont TKforall rest
("rec",rest) -> cont TKrec rest
("let",rest) -> cont TKlet rest
("in",rest) -> cont TKin rest
("case",rest) -> cont TKcase rest
("of",rest) -> cont TKof rest
("coerce",rest) -> cont TKcoerce rest
("note",rest) -> cont TKnote rest
("external",rest) -> cont TKexternal rest
("_",rest) -> cont TKwild rest
_ -> failP "invalid keyword" ('%':cs)
readFloat :: (RealFrac a) => ReadS a
readFloat = Numeric.readFloat