/* JFlex code for MiniJava lexer specification */ import java_cup.runtime.*; /** * This class is a simple example lexer. */ %% %class MJLex %unicode %cup %line %column // %implements java_cup.runtime.Scanner %function next_token %type java_cup.runtime.Symbol %{ StringBuffer string = new StringBuffer(); public java_cup.runtime.Symbol symbol(int type) { return new java_cup.runtime.Symbol(type, yyline, yycolumn); } public java_cup.runtime.Symbol symbol(int type, Object value) { return new java_cup.runtime.Symbol(type, yyline, yycolumn, value); } public static String toBString(java_cup.runtime.Symbol s) { switch (s.sym) { case MJParse_sym.STRING: return s.sym+"\t"+opName(s)+"\t[line="+s.left+", column="+s.right+", value=|"+s.value.toString()+"|]"; case MJParse_sym.NUMBER: return s.sym+"\t"+opName(s)+"\t[line="+s.left+", column="+s.right+", value=|"+s.value.toString()+"|]"; case MJParse_sym.IDENTIFIER: return s.sym+"\t"+opName(s)+"\t[line="+s.left+", column="+s.right+", value=|"+s.value.toString()+"|]"; default: return s.sym+"\t"+opName(s)+"\t[line="+s.left+", column="+s.right+"]"; } } public static String opName(java_cup.runtime.Symbol s) { switch (s.sym) { case MJParse_sym.LBRACE: return "{" ; case MJParse_sym.RBRACE: return "}" ; case MJParse_sym.LBRACK: return "[" ; case MJParse_sym.RBRACK: return "]" ; case MJParse_sym.LPAR: return "(" ; case MJParse_sym.RPAR: return ")" ; case MJParse_sym.INT: return "int" ; case MJParse_sym.BOOL: return "boolean" ; case MJParse_sym.IF: return "if" ; case MJParse_sym.ELSE: return "else" ; case MJParse_sym.WHILE: return "while" ; case MJParse_sym.PRINT: return "System.out.println" ; case MJParse_sym.RETURN: return "return" ; case MJParse_sym.OPCAND: return "&&" ; case MJParse_sym.OPCLT: return "<" ; case MJParse_sym.OPCPLUS: return "+" ; case MJParse_sym.OPCMINUS: return "-" ; case MJParse_sym.OPCTIMES: return "*" ; case MJParse_sym.STATIC: return "static" ; case MJParse_sym.VOID: return "void" ; case MJParse_sym.MAIN: return "main" ; case MJParse_sym.KSTRING: return "String" ; case MJParse_sym.LEN: return "length" ; case MJParse_sym.NEW: return "new" ; case MJParse_sym.KTRUE: return "true" ; case MJParse_sym.KFALSE: return "false" ; case MJParse_sym.THIS: return "this" ; case MJParse_sym.PUBLIC: return "public" ; case MJParse_sym.CLASS: return "class" ; case MJParse_sym.EXTENDS: return "extends" ; case MJParse_sym.EQUALS: return "=" ; case MJParse_sym.DOT: return "." ; case MJParse_sym.COMMA: return "," ; case MJParse_sym.SEMICOL: return ";" ; case MJParse_sym.EXCL: return "!" ; case MJParse_sym.IDENTIFIER: return "ID" ; case MJParse_sym.NUMBER: return "NUM" ; case MJParse_sym.STRING: return "STRING" ; default: return "???"; } } %} LineTerminator = \r|\n|\r\n InputCharacter = [^\r\n] WhiteSpace = {LineTerminator} | [ \t\f] /* comments */ Comment = {TraditionalComment} | {EndOfLineComment} /* NB: nested comments are NOT supported by the lexer! */ TraditionalComment = "/*" [^*] ~"*/" | "/*" "*"+ "/" EndOfLineComment = "//" {InputCharacter}* {LineTerminator} CommentContent = ( [^*] | \*+ [^/*] )* //Identifier = [:jletter:] [:jletterdigit:]* Identifier = [a-zA-Z] [a-zA-Z0-9'_]* DecIntegerLiteral = 0 | [1-9][0-9]* %state INSTRING %eofval{ return symbol(MJParse_sym.EOF); %eofval} %% { /* symbols */ "{" { return symbol(MJParse_sym.LBRACE); } "}" { return symbol(MJParse_sym.RBRACE); } "[" { return symbol(MJParse_sym.LBRACK); } "]" { return symbol(MJParse_sym.RBRACK); } "(" { return symbol(MJParse_sym.LPAR); } ")" { return symbol(MJParse_sym.RPAR); } /* operators */ "&&" { return symbol(MJParse_sym.OPCAND); } "<" { return symbol(MJParse_sym.OPCLT); } "+" { return symbol(MJParse_sym.OPCPLUS); } "-" { return symbol(MJParse_sym.OPCMINUS); } "*" { return symbol(MJParse_sym.OPCTIMES); } "=" { return symbol(MJParse_sym.EQUALS); } "." { return symbol(MJParse_sym.DOT); } "," { return symbol(MJParse_sym.COMMA); } ";" { return symbol(MJParse_sym.SEMICOL); } "!" { return symbol(MJParse_sym.EXCL); } /* keywords */ "int" { return symbol(MJParse_sym.INT); } "boolean" { return symbol(MJParse_sym.BOOL); } "if" { return symbol(MJParse_sym.IF); } "else" { return symbol(MJParse_sym.ELSE); } "while" { return symbol(MJParse_sym.WHILE); } "System.out.println" { return symbol(MJParse_sym.PRINT); } "return" { return symbol(MJParse_sym.RETURN); } "static" { return symbol(MJParse_sym.STATIC); } "void" { return symbol(MJParse_sym.VOID); } "main" { return symbol(MJParse_sym.MAIN); } "String" { return symbol(MJParse_sym.KSTRING); } "length" { return symbol(MJParse_sym.LEN); } "new" { return symbol(MJParse_sym.NEW); } "true" { return symbol(MJParse_sym.KTRUE); } "false" { return symbol(MJParse_sym.KFALSE); } "this" { return symbol(MJParse_sym.THIS); } "public" { return symbol(MJParse_sym.PUBLIC); } "class" { return symbol(MJParse_sym.CLASS); } "extends" { return symbol(MJParse_sym.EXTENDS); } /* comments */ {Comment} { /* ignore */ } /* whitespace */ {WhiteSpace} { /* ignore */ } /* identifiers */ {Identifier} { return symbol(MJParse_sym.IDENTIFIER, new String(yytext())); } /* literals */ {DecIntegerLiteral} { return symbol(MJParse_sym.NUMBER,new Integer(yytext())); } \" { string.setLength(0); yybegin(INSTRING); } } { \" { yybegin(YYINITIAL); return symbol(MJParse_sym.STRING, string.toString()); } [^\n\r\"\\]+ { string.append( yytext() ); } \\t { string.append('\t'); } \\n { string.append('\n'); } \\r { string.append('\r'); } \\\" { string.append('\"'); } \\ { string.append('\\'); } } /* error fallback */ .|\n { throw new Error("Illegal character <"+ yytext()+">"); }