Computer Science 301 - 2007

Tutorial for week 23 - Recursive Descent Parsers

Develop a recursive descent parser and scanner for the EBNF grammar like those described in chapter 8:

    EBNF       = { Production } EOF .
    Production = nonterminal "=" Expression "." .
    Expression = Term { "|" Term } .
    Term       = [ Factor { Factor } ] .
    Factor     =   nonterminal
                 | terminal
                 | "[" Expression "]"
                 | "(" Expression ")"
                 | "{" Expression "}" .


// Parse a set of EBNF productions // P.D. Terry, Rhodes University, 2007 import java.util.*; import Library.*; class Token { public int kind; public String val; public Token(int kind, String val) { this.kind = kind; this.val = val; } } class EBNFParser { // +++++++++++++++++++++++++ File Handling and Error handlers ++++++++++++++++++++ static InFile input; static OutFile output; static String newFileName(String oldFileName, String ext) { // Creates new file name by changing extension of oldFileName to ext int i = oldFileName.lastIndexOf('.'); if (i < 0) return oldFileName + ext; else return oldFileName.substring(0, i) + ext; } static void reportError(String errorMessage) { // Displays errorMessage on standard output and on reflected output System.out.println(errorMessage); output.writeLine(errorMessage); } static void abort(String errorMessage) { // Abandons parsing after issuing error message reportError(errorMessage); output.close(); System.exit(1); } // +++++++++++++++++++++++ token kinds enumeration +++++++++++++++++++++++++ static final int noSym = 0, nonTermSym = 1, termSym = 2, eqlSym = 3, barSym = 4, periodSym = 5, lParenSym = 6, rParenSym = 7, lBrackSym = 8, rBrackSym = 9, lBraceSym = 10, rBraceSym = 11, EOFSym = 12; // +++++++++++++++++++++++++++++ Character Handler ++++++++++++++++++++++++++ static final char EOF = '\0'; static boolean atEndOfFile = false; // Declaring ch as a global variable is done for expediency - global variables // are not always a good thing static char ch; // look ahead character for scanner static void getChar() { // Obtains next character ch from input, or CHR(0) if EOF reached // Reflect ch to output if (atEndOfFile) ch = EOF; else { ch = input.readChar(); atEndOfFile = ch == EOF; if (!atEndOfFile) output.write(ch); } } // +++++++++++++++++++++++++++++++ Scanner ++++++++++++++++++++++++++++++++++ // Declaring sym as a global variable is done for expediency - global variables // are not always a good thing static Token sym; static void getSym() { // Scans for next sym from input while (ch > EOF && ch <= ' ') getChar(); StringBuffer symLex = new StringBuffer(); int symKind = noSym; if (Character.isLetter(ch)) { do { symLex.append(ch); getChar(); } while (Character.isLetterOrDigit(ch)); symKind = nonTermSym; } else if (ch == '\"') { do { symLex.append(ch); getChar(); } while (ch != '\"' && ch != EOF); if (ch != EOF) { symKind = termSym; getChar(); } // else symKind = noSym already } else { symLex.append(ch); switch (ch) { case EOF: symLex = new StringBuffer("EOF"); // special representation symKind = EOFSym; break; // no need to getChar here, of course case '=': symKind = eqlSym; getChar(); break; case '.': symKind = periodSym; getChar(); break; case '|': symKind = barSym; getChar(); break; case '(': symKind = lParenSym; getChar(); break; case '[': symKind = lBrackSym; getChar(); break; case '{': symKind = lBraceSym; getChar(); break; case ')': symKind = rParenSym; getChar(); break; case ']': symKind = rBrackSym; getChar(); break; case '}': symKind = rBraceSym; getChar(); break; default : symKind = noSym; getChar(); break; } } sym = new Token(symKind, symLex.toString()); } // +++++++++++++++++++++++++++++++ Parser +++++++++++++++++++++++++++++++++++ static SymSet FirstFactor = new SymSet(new int[] {lParenSym, lBrackSym, lBraceSym, nonTermSym, termSym}); static void accept(int wantedSym, String errorMessage) { // Checks that lookahead token is wantedSym if (sym.kind == wantedSym) getSym(); else abort(errorMessage); } static void EBNF() { // EBNF = { Production } EOF . while (sym.kind == nonTermSym) Production(); accept(EOFSym, "EOF expected"); } static void Production() { // Production = nonterminal "=" Expression "." . accept(nonTermSym, "non-terminal expected"); accept(eqlSym, "= expected"); Expression(); accept(periodSym, ". expected"); } static void Expression() { // Expression = Term { "|" Term } . Term(); while (sym.kind == barSym) { getSym(); Term(); } } static void Term() { // Term = [ Factor { Factor } ] . if (FirstFactor.contains(sym.kind)) { Factor(); while (FirstFactor.contains(sym.kind)) Factor(); } } static void Factor() { // Factor = nonterminal // | terminal // | "(" Expression ")" // | "[" Expression "]" // | "{" Expression "}" . switch(sym.kind) { case nonTermSym: case termSym: getSym(); break; case lParenSym: getSym(); Expression(); accept(rParenSym, ") expected"); break; case lBrackSym: getSym(); Expression(); accept(rBrackSym, "] expected"); break; case lBraceSym: getSym(); Expression(); accept(rBraceSym, "} expected"); break; default: abort("invalid start to Factor"); break; } } // +++++++++++++++++++++ Main driver function +++++++++++++++++++++++++++++++ public static void main(String[] args) { // Open input and output files from command line arguments if (args.length == 0) { System.out.println("Usage: Declarations FileName"); System.exit(1); } input = new InFile(args[0]); output = new OutFile(newFileName(args[0], ".out")); getChar(); // Lookahead character getSym(); // Lookahead symbol EBNF(); // Start to parse from the goal symbol // if we get back here everything must have been satisfactory System.out.println("Parsed correctly"); output.close(); } }


Home  © P.D. Terry