/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * Copyright (C) 1998-2001 Gerwin Klein <lsf@jflex.de> * * All rights reserved. * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License. See the file * * COPYRIGHT for more information. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License along * * with this program; if not, write to the Free Software Foundation, Inc., * * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ /* Java 1.2 language lexer specification */ /* Use together with unicode.flex for Unicode preprocesssing */ /* and java12.grammar a Java 1.2 parser */ /* Note that this lexer specification is not tuned for speed. It is in fact quite slow on integer and floating point literals. For a production quality application (e.g. a Java compiler) this could be optimized */ /* 2003-12: Modified to work with Page parser generator */ /* 2004-07: ENUM, ASSERT keyword, ELLIPSIS token added to support java 1.5. / TR, MP */ package parser; import beaver.Symbol; import beaver.Scanner; import parser.JavaParser.Terminals; %% %public %final %class JavaScanner %extends Scanner %unicode %function nextToken %type Symbol %yylexthrow Scanner.Exception %eofval{ return newSymbol(Terminals.EOF, "end-of-file"); %eofval} %line %column %{ StringBuffer string = new StringBuffer(128); private Symbol newSymbol(short id) { return new Symbol(id, yyline + 1, yycolumn + 1, yylength(), yytext()); } private Symbol newSymbol(short id, Object value) { return new Symbol(id, yyline + 1, yycolumn + 1, yylength(), value); } %} /* main character classes */ LineTerminator = \r|\n|\r\n InputCharacter = [^\r\n] WhiteSpace = {LineTerminator} | [ \t\f] /* comments */ // Comment = {TraditionalComment} | {EndOfLineComment} | // {DocumentationComment} Comment = XMLComment // TraditionalComment = "/*" [^*] ~"*/" | "/*" "*"+ "/" // EndOfLineComment = "//" {InputCharacter}* {LineTerminator}? // DocumentationComment = "/*" "*"+ [^/*] ~"*/" XMLComment = "<!--" {InputCharacter}* "-->" /* identifiers */ Identifier = [:jletter:][:jletterdigit:]* /* integer literals */ DecIntegerLiteral = 0 | [1-9][0-9]* DecLongLiteral = {DecIntegerLiteral} [lL] HexIntegerLiteral = 0 [xX] 0* {HexDigit} {1,8} HexLongLiteral = 0 [xX] 0* {HexDigit} {1,16} [lL] HexDigit = [0-9a-fA-F] OctIntegerLiteral = 0+ [1-3]? {OctDigit} {1,15} OctLongLiteral = 0+ 1? {OctDigit} {1,21} [lL] OctDigit = [0-7] /* floating point literals */ FloatLiteral = ({FLit1}|{FLit2}|{FLit3}) {Exponent}? [fF] DoubleLiteral = ({FLit1}|{FLit2}|{FLit3}) {Exponent}? FLit1 = [0-9]+ \. [0-9]* FLit2 = \. [0-9]+ FLit3 = [0-9]+ Exponent = [eE] [+-]? [0-9]+ /* string and character literals */ StringCharacter = [^\r\n\"\\] SingleCharacter = [^\r\n\'\\] %state STRING, CHARLITERAL %% <YYINITIAL> { /* keywords */ "assert" { return newSymbol(Terminals.ASSERT); } "abstract" { return newSymbol(Terminals.ABSTRACT); } "boolean" { return newSymbol(Terminals.BOOLEAN); } "break" { return newSymbol(Terminals.BREAK); } "byte" { return newSymbol(Terminals.BYTE); } "case" { return newSymbol(Terminals.CASE); } "catch" { return newSymbol(Terminals.CATCH); } "char" { return newSymbol(Terminals.CHAR); } "class" { return newSymbol(Terminals.CLASS); } /* "const" { return newSymbol(Terminals.CONST); }*/ "continue" { return newSymbol(Terminals.CONTINUE); } "do" { return newSymbol(Terminals.DO); } "double" { return newSymbol(Terminals.DOUBLE); } "else" { return newSymbol(Terminals.ELSE); } /* "enum" { return newSymbol(Terminals.ENUM); }*/ "extends" { return newSymbol(Terminals.EXTENDS); } "final" { return newSymbol(Terminals.FINAL); } "finally" { return newSymbol(Terminals.FINALLY); } "float" { return newSymbol(Terminals.FLOAT); } "for" { return newSymbol(Terminals.FOR); } "default" { return newSymbol(Terminals.DEFAULT); } "implements" { return newSymbol(Terminals.IMPLEMENTS); } "import" { return newSymbol(Terminals.IMPORT); } "instanceof" { return newSymbol(Terminals.INSTANCEOF); } "int" { return newSymbol(Terminals.INT); } "interface" { return newSymbol(Terminals.INTERFACE); } "long" { return newSymbol(Terminals.LONG); } "native" { return newSymbol(Terminals.NATIVE); } "new" { return newSymbol(Terminals.NEW); } /* "goto" { return newSymbol(Terminals.GOTO); }*/ "if" { return newSymbol(Terminals.IF); } "public" { return newSymbol(Terminals.PUBLIC); } "short" { return newSymbol(Terminals.SHORT); } "super" { return newSymbol(Terminals.SUPER); } "switch" { return newSymbol(Terminals.SWITCH); } "synchronized" { return newSymbol(Terminals.SYNCHRONIZED); } "package" { return newSymbol(Terminals.PACKAGE); } "private" { return newSymbol(Terminals.PRIVATE); } "protected" { return newSymbol(Terminals.PROTECTED); } "transient" { return newSymbol(Terminals.TRANSIENT); } "return" { return newSymbol(Terminals.RETURN); } "void" { return newSymbol(Terminals.VOID); } "static" { return newSymbol(Terminals.STATIC); } "while" { return newSymbol(Terminals.WHILE); } "this" { return newSymbol(Terminals.THIS); } "throw" { return newSymbol(Terminals.THROW); } "throws" { return newSymbol(Terminals.THROWS); } "try" { return newSymbol(Terminals.TRY); } "volatile" { return newSymbol(Terminals.VOLATILE); } "strictfp" { return newSymbol(Terminals.STRICTFP); } /* boolean literals */ "true" { return newSymbol(Terminals.BOOLEAN_LITERAL, "true"); } "false" { return newSymbol(Terminals.BOOLEAN_LITERAL, "false"); } /* null literal */ "null" { return newSymbol(Terminals.NULL_LITERAL); } /* separators */ "(" { return newSymbol(Terminals.LPAREN); } ")" { return newSymbol(Terminals.RPAREN); } "{" { return newSymbol(Terminals.LBRACE); } "}" { return newSymbol(Terminals.RBRACE); } "[" { return newSymbol(Terminals.LBRACK); } "]" { return newSymbol(Terminals.RBRACK); } ";" { return newSymbol(Terminals.SEMICOLON); } "," { return newSymbol(Terminals.COMMA); } "." { return newSymbol(Terminals.DOT); } /* operators */ "=" { return newSymbol(Terminals.EQ); } ">" { return newSymbol(Terminals.GT); } "<" { return newSymbol(Terminals.LT); } "!" { return newSymbol(Terminals.NOT); } "~" { return newSymbol(Terminals.COMP); } "?" { return newSymbol(Terminals.QUESTION); } ":" { return newSymbol(Terminals.COLON); } "==" { return newSymbol(Terminals.EQEQ); } "<=" { return newSymbol(Terminals.LTEQ); } ">=" { return newSymbol(Terminals.GTEQ); } "!=" { return newSymbol(Terminals.NOTEQ); } "&&" { return newSymbol(Terminals.ANDAND); } "||" { return newSymbol(Terminals.OROR); } "++" { return newSymbol(Terminals.PLUSPLUS); } "--" { return newSymbol(Terminals.MINUSMINUS); } "+" { return newSymbol(Terminals.PLUS); } "-" { return newSymbol(Terminals.MINUS); } "*" { return newSymbol(Terminals.MULT); } "/" { return newSymbol(Terminals.DIV); } "&" { return newSymbol(Terminals.AND); } "|" { return newSymbol(Terminals.OR); } "^" { return newSymbol(Terminals.XOR); } "%" { return newSymbol(Terminals.MOD); } "<<" { return newSymbol(Terminals.LSHIFT); } ">>" { return newSymbol(Terminals.RSHIFT); } ">>>" { return newSymbol(Terminals.URSHIFT); } "+=" { return newSymbol(Terminals.PLUSEQ); } "-=" { return newSymbol(Terminals.MINUSEQ); } "*=" { return newSymbol(Terminals.MULTEQ); } "/=" { return newSymbol(Terminals.DIVEQ); } "&=" { return newSymbol(Terminals.ANDEQ); } "|=" { return newSymbol(Terminals.OREQ); } "^=" { return newSymbol(Terminals.XOREQ); } "%=" { return newSymbol(Terminals.MODEQ); } "<<=" { return newSymbol(Terminals.LSHIFTEQ); } ">>=" { return newSymbol(Terminals.RSHIFTEQ); } ">>>=" { return newSymbol(Terminals.URSHIFTEQ); } /* "..." { return newSymbol(Terminals.ELLIPSIS); }*/ /* string literal */ \" { yybegin(STRING); string.setLength(0); } /* character literal */ \' { yybegin(CHARLITERAL); } /* numeric literals */ {DecIntegerLiteral} { return newSymbol(Terminals.INTEGER_LITERAL, yytext()); } {DecLongLiteral} { return newSymbol(Terminals.LONG_LITERAL, yytext().substring(0,yylength()-1)); } {HexIntegerLiteral} { return newSymbol(Terminals.INTEGER_LITERAL, yytext()); } {HexLongLiteral} { return newSymbol(Terminals.LONG_LITERAL, yytext().substring(0, yylength()-1)); } {OctIntegerLiteral} { return newSymbol(Terminals.INTEGER_LITERAL, yytext()); } {OctLongLiteral} { return newSymbol(Terminals.LONG_LITERAL, yytext().substring(0, yylength()-1)); } {FloatLiteral} { return newSymbol(Terminals.FLOATING_POINT_LITERAL, yytext().substring(0,yylength()-1)); } {DoubleLiteral} { return newSymbol(Terminals.DOUBLE_LITERAL, yytext()); } {DoubleLiteral}[dD] { return newSymbol(Terminals.DOUBLE_LITERAL, yytext().substring(0,yylength()-1)); } /* comments */ {Comment} { /* ignore */ } /* whitespace */ {WhiteSpace} { /* ignore */ } /* identifiers */ {Identifier} { return newSymbol(Terminals.IDENTIFIER, yytext()); } } <STRING> { \" { yybegin(YYINITIAL); return newSymbol(Terminals.STRING_LITERAL, string.toString()); } {StringCharacter}+ { string.append( yytext() ); } /* escape sequences */ "\\b" { string.append( '\b' ); } "\\t" { string.append( '\t' ); } "\\n" { string.append( '\n' ); } "\\f" { string.append( '\f' ); } "\\r" { string.append( '\r' ); } "\\\"" { string.append( '\"' ); } "\\'" { string.append( '\'' ); } "\\\\" { string.append( '\\' ); } \\[0-3]?{OctDigit}?{OctDigit} { char val = (char) Integer.parseInt(yytext().substring(1),8); string.append( val ); } /* error cases */ \\. { throw new RuntimeException("Illegal escape sequence \""+yytext()+"\""); } {LineTerminator} { throw new RuntimeException("Unterminated string at end of line"); } } <CHARLITERAL> { {SingleCharacter}\' { yybegin(YYINITIAL); return newSymbol(Terminals.CHARACTER_LITERAL, new Character(yytext().charAt(0)).toString()); } /* escape sequences */ "\\b"\' { yybegin(YYINITIAL); return newSymbol(Terminals.CHARACTER_LITERAL, new Character('\b').toString());} "\\t"\' { yybegin(YYINITIAL); return newSymbol(Terminals.CHARACTER_LITERAL, new Character('\t').toString());} "\\n"\' { yybegin(YYINITIAL); return newSymbol(Terminals.CHARACTER_LITERAL, new Character('\n').toString());} "\\f"\' { yybegin(YYINITIAL); return newSymbol(Terminals.CHARACTER_LITERAL, new Character('\f').toString());} "\\r"\' { yybegin(YYINITIAL); return newSymbol(Terminals.CHARACTER_LITERAL, new Character('\r').toString());} "\\\""\' { yybegin(YYINITIAL); return newSymbol(Terminals.CHARACTER_LITERAL, new Character('\"').toString());} "\\'"\' { yybegin(YYINITIAL); return newSymbol(Terminals.CHARACTER_LITERAL, new Character('\'').toString());} "\\\\"\' { yybegin(YYINITIAL); return newSymbol(Terminals.CHARACTER_LITERAL, new Character('\\').toString()); } \\[0-3]?{OctDigit}?{OctDigit}\' { yybegin(YYINITIAL); int val = Integer.parseInt(yytext().substring(1,yylength()-1),8); return newSymbol(Terminals.CHARACTER_LITERAL, new Character((char)val).toString()); } /* error cases */ \\. { throw new RuntimeException("Illegal escape sequence \""+yytext()+"\""); } {LineTerminator} { throw new RuntimeException("Unterminated character literal at end of line"); } } /* error fallback */ .|\n { throw new RuntimeException("Illegal character \""+yytext()+ "\" at line "+yyline+", column "+yycolumn); } <<EOF>> { return newSymbol(Terminals.EOF); }