Computer Science 301 - 2017


Tutorial 7 - Recursive Descent Parsers - Solutions

Develop a recursive descent parser and scanner for the EBNF grammar like those described in chapter 8:

    EBNF       = { Production } EOF .
    Production = nonterminal "=" Expression "." .
    Expression = Term { "|" Term }  .
    Term       = [ Factor { Factor } ] .
    Factor     =   nonterminal
                 | terminal
                 | "[" Expression "]"
                 | "(" Expression ")"
                 | "{" Expression "}" .

  // Parse a set of EBNF productions
  // P.D. Terry, Rhodes University, 2015
  using Library;
  using System;
  using System.Text;

  class Token {
    public int kind;
    public string val;

    public Token(int kind, string val) {
      this.kind = kind;
      this.val = val;
    } // constructor

  } // Token

  class EBNFParser {

    // +++++++++++++++++++++++++ File Handling and Error handlers ++++++++++++++++++++

    static InFile input;
    static OutFile output;

    static string NewFileName(string oldFileName, string ext) {
    // Creates new file name by changing extension of oldFileName to ext
      int i = oldFileName.lastIndexOf('.');
      if (i < 0) return oldFileName + ext; else return oldFileName.substring(0, i) + ext;
    } // NewFileName

    static void ReportError(string errorMessage) {
    // Displays errorMessage on standard output and on reflected output
      System.out.Println(errorMessage);
      output.WriteLine(errorMessage);
    } // ReportError

    static void Abort(string errorMessage) {
    // Abandons parsing after issuing error message
      ReportError(errorMessage);
      output.Close();
      System.Environment.Exit(1);
    } // Abort

    // +++++++++++++++++++++++  token kinds enumeration +++++++++++++++++++++++++

    const int
      noSym        =  0,
      nonTermSym   =  1,
      termSym      =  2,
      eqlSym       =  3,
      barSym       =  4,
      periodSym    =  5,
      lParenSym    =  6,
      rParenSym    =  7,
      lBrackSym    =  8,
      rBrackSym    =  9,
      lBraceSym    = 10,
      rBraceSym    = 11,
      EOFSym       = 12;

    // +++++++++++++++++++++++++++++ Character Handler ++++++++++++++++++++++++++

    const char EOF = '\0';
    static bool atEndOfFile = false;

    // Declaring ch as a global variable is done for expediency - global variables
    // are not always a good thing

    static char ch;    // look ahead character for scanner

    static void GetChar() {
    // Obtains next character ch from input, or CHR(0) if EOF reached
    // Reflect ch to output
      if (atEndOfFile) ch = EOF;
      else {
        ch = input.ReadChar();
        atEndOfFile = ch == EOF;
        if (!atEndOfFile) output.Write(ch);
      }
    } // GetChar

    // +++++++++++++++++++++++++++++++ Scanner ++++++++++++++++++++++++++++++++++

    // Declaring sym as a global variable is done for expediency - global variables
    // are not always a good thing

    static Token sym;

    static void
    GetSym() {
    // Scans for next sym from input
      while (ch > EOF && ch <= ' ') GetChar();
      stringBuffer symLex = new stringBuffer();
      int symKind = noSym;
      if (Character.isLetter(ch)) {
        do {
          symLex.append(ch); GetChar();
        } while (Character.isLetterOrDigit(ch));
        symKind = nonTermSym;
      }
      else if (ch == '\"') {
        do {
          symLex.Append(ch); GetChar();
        } while (ch != '\"' && ch != EOF);
        if (ch != EOF) {
          symKind = termSym; GetChar();
        } // else symKind = noSym already
      }
      else {
        symLex.append(ch);
        switch (ch) {
          case EOF:
            symLex = new stringBuffer("EOF");   // special representation
            symKind = EOFSym; break;            // no need to GetChar here, of course
          case '=':
            symKind = eqlSym; GetChar(); break;
          case '.':
            symKind = periodSym; GetChar(); break;
          case '|':
            symKind = barSym; GetChar(); break;
          case '(':
            symKind = lParenSym; GetChar(); break;
          case '[':
            symKind = lBrackSym; GetChar(); break;
          case '{':
            symKind = lBraceSym; GetChar(); break;
          case ')':
            symKind = rParenSym; GetChar(); break;
          case ']':
            symKind = rBrackSym; GetChar(); break;
          case '}':
            symKind = rBraceSym; GetChar(); break;
          default :
            symKind = noSym; GetChar(); break;
        }
      }
      sym = new Token(symKind, symLex.tostring());
    }

    // +++++++++++++++++++++++++++++++ Parser +++++++++++++++++++++++++++++++++++

    static SymSet
      FirstFactor = new SymSet(new int[] {lParenSym, lBrackSym, lBraceSym, nonTermSym, termSym});

    static void Accept(int wantedSym, string errorMessage) {
    // Checks that lookahead token is wantedSym
      if (sym.kind == wantedSym) GetSym(); else Abort(errorMessage);
    }

    static void EBNF() {
    // EBNF = { Production } EOF .
      while (sym.kind == nonTermSym) Production();
      Accept(EOFSym, "EOF expected");
    }

    static void Production() {
    // Production = nonterminal "=" Expression "." .
      Accept(nonTermSym, "non-terminal expected");
      Accept(eqlSym, "= expected");
      Expression();
      Accept(periodSym, ". expected");
    } // EBNF

    static void Expression() {
    // Expression = Term { "|" Term } .
      Term();
      while (sym.kind == barSym) {
        GetSym();
        Term();
      }
    } // Expression

    static void Term() {
    // Term = [ Factor { Factor } ] .
      if (FirstFactor.contains(sym.kind)) {
        Factor();
        while (FirstFactor.Contains(sym.kind)) Factor();
      }
    } // term

    static void Factor() {
    // Factor =    nonterminal
    //           | terminal
    //           | "(" Expression ")"
    //           | "[" Expression "]"
    //           | "{" Expression "}" .
      switch(sym.kind) {
        case nonTermSym:
        case termSym:
          GetSym(); break;
        case lParenSym:
          GetSym(); Expression(); Accept(rParenSym, ") expected"); break;
        case lBrackSym:
          GetSym(); Expression(); Accept(rBrackSym, "] expected"); break;
        case lBraceSym:
          GetSym(); Expression(); Accept(rBraceSym, "} expected"); break;
        default: abort("invalid start to Factor"); break;
      }
    } // Factor

    // +++++++++++++++++++++ Main driver function +++++++++++++++++++++++++++++++

    public static void Main(string[] args) {
      // Open input and output files from command line arguments
      if (args.Length == 0) {
        Console.WriteLine("Usage: EBNFParser FileName");
        System.Environment.Exit(1);
      }
      input = new InFile(args[0]);
      output = new OutFile(NewFileName(args[0], ".out"));

      GetChar();                                  // Lookahead character
      GetSym();                                   // Lookahead symbol
      EBNF();                                     // Start to parse from the goal symbol
      // if we get back here everything must have been satisfactory
      Console.WriteLine("Parsed correctly");
      output.Close();
    } // Main

  } // class EBNFParser


Home  © P.D. Terry