package se.chalmers.cs.pgf.raw; import se.chalmers.cs.pgf.PGFException; class Lexer { public static enum TokenType { OPEN_PAREN, CLOSE_PAREN, IDENT, STRING, INTEGER, DOUBLE, META; } public static class Token { public final TokenType type; public final String str; public Token(TokenType type, String str) { this.type = type; this.str = str; } public String toString() { return "[" + type + ":" + str + "]"; } } private CharSequence input; private int pos; public Lexer(CharSequence input) { this.input = input; this.pos = 0; } public boolean isEmpty() { return pos >= input.length(); } private char nextChar() { return input.charAt(pos++); } private char lookAhead() { return input.charAt(pos); } private void skipWhitespace() { while (!isEmpty() && Character.isSpaceChar(lookAhead())) nextChar(); } public int getPos() { return pos; } public boolean isDone() { skipWhitespace(); return isEmpty(); } // optimization: avoid allocating multiple copies of these private static final Token openParen = new Token(TokenType.OPEN_PAREN, "("); private static final Token closeParen = new Token(TokenType.CLOSE_PAREN, ")"); private static final Token meta = new Token(TokenType.META, "?"); public Token nextToken() { skipWhitespace(); char c = nextChar(); switch (c) { case '(': return openParen; case ')': return closeParen; case '?': return meta; case '"': return munchString(); case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': return munchNumber(c); default: if (isIdentFirst(c)) { return munchIdent(c); } else { CharSequence excerpt = input.subSequence(pos-1, Math.min(pos+9, input.length())); throw new PGFException("Lexical error at character "+ pos + ": " + excerpt); } } } private Token munchString() { int start = pos-1; // string started at the opening double quote StringBuilder sb = new StringBuilder(); loop: while (true) { char c = nextStringChar(start); switch (c) { case '"': break loop; case '\\': c = nextStringChar(start); // fall through to append default: sb.append(c); } } return new Token(TokenType.STRING, sb.toString()); } private char nextStringChar(int start) { if (isEmpty()) throw new PGFException("Unterminated string literal, started at character: " + start); return nextChar(); } private Token munchNumber(char firstChar) { StringBuilder sb = new StringBuilder(); sb.append(firstChar); // first char is already known to be a digit munchDigits(sb); if (lookAhead() == '.') { sb.append('.'); munchDigits(sb); return new Token(TokenType.DOUBLE, sb.toString()); } else { return new Token(TokenType.INTEGER, sb.toString()); } } private void munchDigits(StringBuilder sb) { while (!isEmpty() && isDigit(lookAhead())) { sb.append(nextChar()); } } private boolean isDigit(char c) { return c >= '0' && c <= '9'; } private boolean isIdentFirst(char c) { return c == '_' || Character.isLetter(c); } private boolean isIdentRest(char c) { return c == '_' || c == '\'' || Character.isLetterOrDigit(c); } private Token munchIdent(char firstChar) { StringBuilder sb = new StringBuilder(); sb.append(firstChar); // we already know that isIdentFirst(firstChar) while (!isEmpty() && isIdentRest(lookAhead())) { sb.append(nextChar()); } return new Token(TokenType.IDENT, sb.toString()); } }