From 61505fb5919b72f2b300508e3e64896946fa24ab Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Fri, 6 Jun 2014 15:37:50 -0400 Subject: [PATCH 3/3] Use JLex to bootstrap the lexer. --- build.xml | 32 ++++++++++------ jflex2jlex.patch | 110 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ jflex2jlex.sed | 97 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 228 insertions(+), 11 deletions(-) create mode 100644 jflex2jlex.patch create mode 100644 jflex2jlex.sed diff --git a/build.xml b/build.xml index 334d439..dea8a54 100644 --- a/build.xml +++ b/build.xml @@ -2,7 +2,6 @@ - @@ -14,14 +13,13 @@ - + + - @@ -50,7 +48,6 @@ - @@ -114,13 +111,26 @@ - - + + + + + + + + + + diff --git a/jflex2jlex.patch b/jflex2jlex.patch new file mode 100644 index 0000000..5b2113b --- /dev/null +++ b/jflex2jlex.patch @@ -0,0 +1,110 @@ +--- LexScan.lex 2014-06-06 13:46:02.242828126 -0400 ++++ src/main/jflex/LexScan.flex 2014-06-06 13:07:58.611333045 -0400 +@@ -203,28 +203,57 @@ + throw new ScannerException(file,ErrorMessages.NOT_READABLE, yyline); + } + } ++ ++ int yycolumn = -1; ++ ++ private static final String ZZ_PUSHBACK_2BIG = "Error: pushback value was too large"; ++ private static final String ZZ_UNIMPLEMENTED = "Error: unimplemented feature"; ++ private void zzScanError(String message) { ++ System.out.println(message); ++ System.out.flush(); ++ throw new Error("Fatal Error.\n"); ++ } ++ ++ private boolean yymoreStreams() { return false; } ++ private void yypushStream(java.io.Reader reader) { zzScanError(ZZ_UNIMPLEMENTED); } ++ private void yypopStream() { zzScanError(ZZ_UNIMPLEMENTED); } ++ ++ private void yypushback(int number) { ++ if ( number > yylength() ) ++ zzScanError(ZZ_PUSHBACK_2BIG); ++ yy_buffer_end -= number; ++ yy_buffer_index = yy_buffer_end; ++ } + %} + + %init{ + states.insert("YYINITIAL", true); + %init} + ++%eofval{ ++ if ( yymoreStreams() ) { ++ file = (File) files.pop(); ++ yypopStream(); ++ } ++ else ++ return symbol(EOF); ++%eofval} + + Digit = ([0-9]) + HexDigit = ([0-9a-fA-F]) + OctDigit = ([0-7]) + + Number = ({Digit}+) +-HexNumber = (\\x{HexDigit}{2}) +-OctNumber = (\\[0-3]?{OctDigit}{1,2}) ++HexNumber = (\\x{HexDigit}{HexDigit}) ++OctNumber = (\\[0-3]?{OctDigit}{OctDigit}?) + +-Unicode4 = (\\u{HexDigit}{1,4}) ++Unicode4 = (\\u{HexDigit}({HexDigit}({HexDigit}{HexDigit}?)?)?) + +-Unicode6 = (\\U{HexDigit}{1,6}) ++Unicode6 = (\\U{HexDigit}({HexDigit}({HexDigit}({HexDigit}({HexDigit}{HexDigit}?)?)?)?)?) + + WSP = ([ \t\b]) + WSPNL = ([\u2028\u2029\u000A\u000B\u000C\u000D\u0085\t\b\ ]) +-NWSPNL = ([^\u2028\u2029\u000A\u000B\u000C\u000D\u0085\t\b\ ]) ++NWSPNL = ([^\u2028\u2029\u000A\u000B\u000C\u000D\u0085\t\b\ <}]) + NL = ([\u2028\u2029\u000A\u000B\u000C\u000D\u0085]|\u000D\u000A) + NNL = ([^\u2028\u2029\u000A\u000B\u000C\u000D\u0085]) + +@@ -255,13 +284,13 @@ + CharLiteral = (\'([^\u2028\u2029\u000A\u000B\u000C\u000D\u0085\'\\]|{EscapeSequence})\') + StringLiteral = (\"({StringCharacter}|{EscapeSequence})*\") + +-EscapeSequence = (\\[^\u2028\u2029\u000A\u000B\u000C\u000D\u0085]|\\+u{HexDigit}{4}|\\[0-3]?{OctDigit}{1,2}) ++EscapeSequence = (\\[^\u2028\u2029\u000A\u000B\u000C\u000D\u0085]|\\+u{HexDigit}{HexDigit}{HexDigit}{HexDigit}|\\[0-3]?{OctDigit}{OctDigit}?) + + + JavaRest = ([^\{\}\"\'/]|"/"[^*/]) + JavaCode = (({JavaRest}|{StringLiteral}|{CharLiteral}|{JavaComment})+) + +-DottedVersion = ([1-9][0-9]*(\.[0-9]+){0,2}) ++DottedVersion = ([1-9][0-9]*(\.[0-9]+(\.[0-9]+)?)?) + + %% + +@@ -441,7 +470,7 @@ + return s; + } + ^{WSP}*{NWSPNL} { yypushback(yylength()); yybegin(REGEXP); } +- {WSP}|{NL} { } ++ ({WSP}|{NL}) { } + + {Ident} { return symbol(IDENT, yytext()); } + "," { return symbol(COMMA); } +@@ -491,7 +520,7 @@ + } + + {WSPNL}+"{" { actionText.setLength(0); yybegin(JAVA_CODE); action_line = yyline+1; return symbol(REGEXPEND); } +- {NL}{if(macroDefinition) { yybegin(MACROS); } return symbol(REGEXPEND); } ++ {NL} { if (macroDefinition) { yybegin(MACROS); } return symbol(REGEXPEND); } + + {WSPNL}*"/*" { nextState = REGEXP; yybegin(COMMENT); } + +@@ -627,10 +656,3 @@ + + . { throw new ScannerException(file,ErrorMessages.UNEXPECTED_CHAR, yyline, yycolumn); } + \n { throw new ScannerException(file,ErrorMessages.UNEXPECTED_NL, yyline, yycolumn); } +- +-<> { if ( yymoreStreams() ) { +- file = (File) files.pop(); +- yypopStream(); +- } +- else +- return symbol(EOF); } diff --git a/jflex2jlex.sed b/jflex2jlex.sed new file mode 100644 index 0000000..064452a --- /dev/null +++ b/jflex2jlex.sed @@ -0,0 +1,97 @@ +#!/usr/bin/sed -rnf +# (Mostly) transforms a JFlex file into a JLex file + +:usercode + # Don't make any changes + /^%%$/ { p; n; b macros; } + p; n; b usercode + +:macros + /^%%$/ { p; n; b regex; } + + # Preserve code blocks verbatim + /^%(|init)\{\s*$/ { p; n; b macros_codeblock; } + + # Get rid of %-symbols not supported + /^%(final|column|eofclose|inputstreamctor)/ { n; b macros; } + + # Get rid of comments + /^\s*\/\// { n; b macros; } + /^\s*\/*.*\*\/$/ { n; b macros; } + + /^[A-Z].*=/ { + # Parenthesize macros + s:^([^=]*=\s*)(\S.*):\1(\2): + # Remove whitespace + :macros_again + s:(= .*[^\[]) :\1: + t macros_again + s:(= .*\\\\) :\1: + t macros_again + } + + # (Rough) character class conversion + s/\[:jletter:\]/[A-Za-z$_]/ + s/\[:jletterdigit:\]/[A-Za-z0-9$_]/ + + p; n; b macros +:macros_codeblock + /^%(|init)\}\s*$/ { p; n; b macros; } + p; n; b macros_codeblock + +# De-shorthand nested state-prefixed patterns +# Only supports nesting 2 levels deep +:regex + s: <> : ,,EOF,, :g + /^\s*\/\// { n; b regex; } + /^<[^>]*>\s*\{\s*$/ { + s:\s*\{.*:: + h + n; b regex_stateblock + } + :regex_again + s:^([^>]+>\s+.*[^ \[]) (.*\S\s+\{ ):\1\2: ; t regex_again + p; n; b regex +:regex_stateblock + s: <> : ,,EOF,, :g + /^\s*\/\// { n; b regex_stateblock; } + /^ [^ <]/ { + x + G + s:\n : : + :regex_stateblock_again + s:^([^>]+>\s+.*[^ \[]) (.*\S\s+\{ ):\1\2: ; t regex_stateblock_again + p + s:>.*:>: + x + n; b regex_stateblock + } + /^ <[^>]*>\s*\{\s*$/ { + x + G + s:>\s*\n\s*<:, : + s:\s*\{.*:: + h + n; b regex_stateblock2 + } + /^\}/ { n; b regex; } + p; n; b regex_stateblock +:regex_stateblock2 + s: <> : ,,EOF,, :g + /^\s*\/\// { n; b regex_stateblock2; } + /^ [^ <]/ { + x + G + s:\n : : + p + s:>.*:>: + x + n; b regex_stateblock2 + } + /^ \}/ { + x + s:,.*:>: + h + n; b regex_stateblock + } + p; n; b regex_stateblock2 -- 2.0.0