Develop a recursive descent parser and scanner for the EBNF grammar like those described in chapter 8:
EBNF = { Production } EOF . Production = nonterminal "=" Expression "." . Expression = Term { "|" Term } . Term = [ Factor { Factor } ] . Factor = nonterminal | terminal | "[" Expression "]" | "(" Expression ")" | "{" Expression "}" .
// Parse a set of EBNF productions // P.D. Terry, Rhodes University, 2015 using Library; using System; using System.Text; class Token { public int kind; public string val; public Token(int kind, string val) { this.kind = kind; this.val = val; } // constructor } // Token class EBNFParser { // +++++++++++++++++++++++++ File Handling and Error handlers ++++++++++++++++++++ static InFile input; static OutFile output; static string NewFileName(string oldFileName, string ext) { // Creates new file name by changing extension of oldFileName to ext int i = oldFileName.lastIndexOf('.'); if (i < 0) return oldFileName + ext; else return oldFileName.substring(0, i) + ext; } // NewFileName static void ReportError(string errorMessage) { // Displays errorMessage on standard output and on reflected output System.out.Println(errorMessage); output.WriteLine(errorMessage); } // ReportError static void Abort(string errorMessage) { // Abandons parsing after issuing error message ReportError(errorMessage); output.Close(); System.Environment.Exit(1); } // Abort // +++++++++++++++++++++++ token kinds enumeration +++++++++++++++++++++++++ const int noSym = 0, nonTermSym = 1, termSym = 2, eqlSym = 3, barSym = 4, periodSym = 5, lParenSym = 6, rParenSym = 7, lBrackSym = 8, rBrackSym = 9, lBraceSym = 10, rBraceSym = 11, EOFSym = 12; // +++++++++++++++++++++++++++++ Character Handler ++++++++++++++++++++++++++ const char EOF = '\0'; static bool atEndOfFile = false; // Declaring ch as a global variable is done for expediency - global variables // are not always a good thing static char ch; // look ahead character for scanner static void GetChar() { // Obtains next character ch from input, or CHR(0) if EOF reached // Reflect ch to output if (atEndOfFile) ch = EOF; else { ch = input.ReadChar(); atEndOfFile = ch == EOF; if (!atEndOfFile) output.Write(ch); } } // GetChar // +++++++++++++++++++++++++++++++ Scanner ++++++++++++++++++++++++++++++++++ // Declaring sym as a global variable is done for expediency - global variables // are not always a good thing static Token sym; static void GetSym() { // Scans for next sym from input while (ch > EOF && ch <= ' ') GetChar(); stringBuffer symLex = new stringBuffer(); int symKind = noSym; if (Character.isLetter(ch)) { do { symLex.append(ch); GetChar(); } while (Character.isLetterOrDigit(ch)); symKind = nonTermSym; } else if (ch == '\"') { do { symLex.Append(ch); GetChar(); } while (ch != '\"' && ch != EOF); if (ch != EOF) { symKind = termSym; GetChar(); } // else symKind = noSym already } else { symLex.append(ch); switch (ch) { case EOF: symLex = new stringBuffer("EOF"); // special representation symKind = EOFSym; break; // no need to GetChar here, of course case '=': symKind = eqlSym; GetChar(); break; case '.': symKind = periodSym; GetChar(); break; case '|': symKind = barSym; GetChar(); break; case '(': symKind = lParenSym; GetChar(); break; case '[': symKind = lBrackSym; GetChar(); break; case '{': symKind = lBraceSym; GetChar(); break; case ')': symKind = rParenSym; GetChar(); break; case ']': symKind = rBrackSym; GetChar(); break; case '}': symKind = rBraceSym; GetChar(); break; default : symKind = noSym; GetChar(); break; } } sym = new Token(symKind, symLex.tostring()); } // +++++++++++++++++++++++++++++++ Parser +++++++++++++++++++++++++++++++++++ static SymSet FirstFactor = new SymSet(new int[] {lParenSym, lBrackSym, lBraceSym, nonTermSym, termSym}); static void Accept(int wantedSym, string errorMessage) { // Checks that lookahead token is wantedSym if (sym.kind == wantedSym) GetSym(); else Abort(errorMessage); } static void EBNF() { // EBNF = { Production } EOF . while (sym.kind == nonTermSym) Production(); Accept(EOFSym, "EOF expected"); } static void Production() { // Production = nonterminal "=" Expression "." . Accept(nonTermSym, "non-terminal expected"); Accept(eqlSym, "= expected"); Expression(); Accept(periodSym, ". expected"); } // EBNF static void Expression() { // Expression = Term { "|" Term } . Term(); while (sym.kind == barSym) { GetSym(); Term(); } } // Expression static void Term() { // Term = [ Factor { Factor } ] . if (FirstFactor.contains(sym.kind)) { Factor(); while (FirstFactor.Contains(sym.kind)) Factor(); } } // term static void Factor() { // Factor = nonterminal // | terminal // | "(" Expression ")" // | "[" Expression "]" // | "{" Expression "}" . switch(sym.kind) { case nonTermSym: case termSym: GetSym(); break; case lParenSym: GetSym(); Expression(); Accept(rParenSym, ") expected"); break; case lBrackSym: GetSym(); Expression(); Accept(rBrackSym, "] expected"); break; case lBraceSym: GetSym(); Expression(); Accept(rBraceSym, "} expected"); break; default: abort("invalid start to Factor"); break; } } // Factor // +++++++++++++++++++++ Main driver function +++++++++++++++++++++++++++++++ public static void Main(string[] args) { // Open input and output files from command line arguments if (args.Length == 0) { Console.WriteLine("Usage: EBNFParser FileName"); System.Environment.Exit(1); } input = new InFile(args[0]); output = new OutFile(NewFileName(args[0], ".out")); GetChar(); // Lookahead character GetSym(); // Lookahead symbol EBNF(); // Start to parse from the goal symbol // if we get back here everything must have been satisfactory Console.WriteLine("Parsed correctly"); output.Close(); } // Main } // class EBNFParser
Home © P.D. Terry