Computer Science 3 - 2006

Programming Language Translation


Practical for Week 23, beginning 2 October 2006 - Solutions

This practical was done well by all but a few groups, One point that I noticed was that some people were driving their parsers from the back, so to speak. Given a construction like

A = { "start" Something } "follow" .

it is far better to produce a parser routine like

while (sym.kind == startSym) { getSym(); Something(); } Accept(followSym);

than one like

while (sym.kind != followSym) { getSym(); Something(); } Accept(followSym);

for the simple reason that there might be something wrong with Something.

Complete source code for solutions to the prac - including the error recovery additions that you were asked to think about - are available on the WWW pages in the file PRAC23A.ZIP or PRAC23AC.ZIP (C# version).

The scanner was reasonably done by most people. Comments are trickier than they look, so if you did not get that section right study my solution below carefully and see how they should be handled. The trick is to be able to handle comments that are never closed, comments that follow one another with no tokens in between them, and comments that look as they are about to finish, but then do not (like { comment with wrong parenthesis)).

    static int literalKind(StringBuffer lex) {
      String s = lex.toString();
      if (s.equals("music"))        return musicSym;
      if (s.equals("advert"))       return advertSym;
      if (s.equals("zuma"))         return zumaSym;
      if (s.equals("shaik"))        return shaikSym;
      if (s.equals("mbeki"))        return mbekiSym;
      if (s.equals("randfalls"))    return randSym;
      if (s.equals("host"))         return hostSym;
      if (s.equals("listener"))     return listenerSym;
      if (s.equals("maxmin"))       return maxminSym;
      if (s.equals("temperatures")) return tempSym;
      return wordSym;
    }

    static void getSym() {
    // Scans for next sym from input
      while (ch > EOF && ch <= ' ') getChar();
      if (ch == '{') {  // must be a comment
        getChar();
        int level = 1;
        do {
          getChar();
          if (ch == '}') level--; else if (ch == '{') level++;
        } while (level > 0 && ch != EOF);
        if (ch == EOF)
          reportError("unclosed comment"); // sym will be EOFSym
        getChar(); getSym(); return;
      }
      else {
        StringBuffer symLex = new StringBuffer();
        int symKind;
        if (Character.isLetter(ch)) {
          do {
            symLex.append(ch); getChar();
          } while (Character.isLetter(ch));
          symKind = literalKind(symLex);
        }
        else if (Character.isDigit(ch)) {
          do {
            symLex.append(ch); getChar();
          } while (Character.isDigit(ch));
          symKind = numSym;
        }
        else {
          symLex.append(ch);
          switch (ch) {
            case EOF:
              symLex = new StringBuffer("EOF");   // special representation
              symKind = EOFSym; break;            // no need to getChar here, of course
            case ',':
              symKind = commaSym; getChar(); break;
            case '.':
              symKind = periodSym; getChar(); break;
            default :
              symKind = noSym; getChar(); break;
          }
        }
        sym = new Token(symKind, symLex.toString());
      }
    }

Here is most of a simple "sudden death" parser, devoid of error recovery:

     static SymSet
       FirstRadio    = new SymSet(new int[] {hostSym, musicSym, advertSym}),
       FirstNewsItem = new SymSet(new int[] {shaikSym, zumaSym, mbekiSym, randSym, wordSym}),
       FirstStory    = new SymSet(new int[] {shaikSym, zumaSym, mbekiSym, wordSym}),
       FirstFiller   = new SymSet(new int[] {musicSym, advertSym});

     static void accept(int wantedSym, String errorMessage) {
     // Checks that lookahead token is wantedSym
       if (sym.kind == wantedSym) getSym(); else abort(errorMessage);
     }

     static void accept(SymSet acceptable, String errorMessage) {
     // Checks that lookahead sym.kind is in acceptable set of tokens
       if (acceptable.contains(sym.kind)) getSym(); else abort(errorMessage);
     }

     static void Radio() {
     // Radio = { TalkShow | "music" | "advert" [ NewsBulletin ] } EOF .
       while (FirstRadio.contains(sym.kind)) {
         switch (sym.kind) {
           case hostSym :
             TalkShow(); break;
           case musicSym :
             getSym(); break;
           case advertSym :
             getSym();
             if (FirstNewsItem.contains(sym.kind)) NewsBulletin();
             break;
         }
       }
       accept(EOFSym, "EOF expected");
     }

     static void NewsItem() {
     // NewsItem = "zuma" [ "shaik" ] | "shaik" "zuma" | "mbeki" | "randFalls" | Story .
       switch (sym.kind) {
         case zumaSym :
           getSym(); if (sym.kind == shaikSym) getSym();
           break;
         case shaikSym :
           getSym(); accept(zumaSym, "Zuma expected");
           break;
         case mbekiSym :
         case randSym :
           getSym();
           break;
         case wordSym :
           Story();
           break;
         default :
           abort("Unacceptable start to news item");
           break;
       }
     }

     static void NewsBulletin() {
     // NewsBulletin = NewsItem { NewsItem } [ Weather ] Filler .
       NewsItem();
       while (FirstNewsItem.contains(sym.kind)) NewsItem();
       if (sym.kind == maxminSym) Weather();
       Filler();
     }

     static void Story() {
     // Story = word { word | "mbeki" | "zuma" | "shaik" } "." .
       accept(wordSym, "word expected");
       while (FirstStory.contains(sym.kind)) getSym();
       accept(periodSym, ". expected");
     }

     static void TalkShow() {
     // TalkShow = Host { Listener Host } .
       Host();
       while (sym.kind == listenerSym) {
         Listener(); Host();
       }
     }

     static void Host() {
     // Host = "host" Opinion .
       accept(hostSym, "host expected");
       Opinion();
     }

     static void Listener() {
     // Listener = "listener" Opinion .
       accept(listenerSym, "listener expected");
       Opinion();
     }

     static void Opinion() {
     // Opinion = Story .
       Story();
     }

     static void Filler() {
     // Filler = "music" | "advert"  .
       accept(FirstFiller, "unacceptable filler item");
     }

     static void Weather() {
     // Weather = "maxmin" "temperatures" OneTemp { "," OneTemp }.
       accept(maxminSym, "max/min expected");
       accept(tempSym, "temperarures expected");
       OneTemp();
       while (sym.kind == commaSym) {
         getSym();
         OneTemp();
       }
     }

     static void OneTemp() {
     // OneTemp = word number number .
       accept(wordSym, "area expected");
       accept(numSym, "min temperature expected");
       accept(numSym, "max temperature expected");
     }

To incorporate error recovery one can go to the whole extent of introducing calls to a test routine at the start and end of every subparser. But one can probably get away with something a lot simpler:

     static SymSet
       FirstRadio    = new SymSet(new int[] {hostSym, musicSym, advertSym}),
       FirstSync     = new SymSet(new int[] {hostSym, musicSym, advertSym, EOFSym}),
       FirstNewsItem = new SymSet(new int[] {shaikSym, zumaSym, mbekiSym, randSym, wordSym}),
       FollowNews    = new SymSet(new int[] {shaikSym, zumaSym, mbekiSym, randSym, wordSym, musicSym, advertSym, maxminSym}),
       FirstStory    = new SymSet(new int[] {shaikSym, zumaSym, mbekiSym, wordSym}),
       FirstFiller   = new SymSet(new int[] {musicSym, advertSym});

     static void accept(int wantedSym, String errorMessage) {
     // Checks that lookahead token is wantedSym
       if (sym.kind == wantedSym) getSym(); else reportError(errorMessage);
     }

     static void accept(SymSet acceptable, String errorMessage) {
     // Checks that lookahead sym.kind is in acceptable set of tokens
       if (acceptable.contains(sym.kind)) getSym(); else reportError(errorMessage);
     }

     static void test(SymSet allowed, SymSet beacons, String errorMessage) {
     // Test whether current Sym is in Allowed, and recover if not
       if (allowed.contains(sym.kind)) return;
       reportError(errorMessage);
       SymSet stopSet = allowed.union(beacons);
       while (!stopSet.contains(sym.kind)) getSym();
       output.writeLine();   // for demonstration purposes only
       output.writeLine("-------------- sync ---------- ");
     }

     static void Radio() {
     // Radio = { TalkShow | "music" | "advert" [ NewsBulletin ] } EOF .
       while (FirstRadio.contains(sym.kind)) {
         switch (sym.kind) {
           case hostSym :
             TalkShow(); break;
           case musicSym :
             getSym(); break;
           case advertSym :
             getSym();
             if (FirstNewsItem.contains(sym.kind)) NewsBulletin();
             break;
         }
         test(FirstSync, new SymSet(), "Invalid sequence of programmes");  // --------------------
       }
       accept(EOFSym, "EOF expected");
     }

     static void NewsItem() {
     // NewsItem = "zuma" [ "shaik" ] | "shaik" "zuma" | "mbeki" | "randFalls" | Story .
       switch (sym.kind) {
         case zumaSym :
           getSym(); if (sym.kind == shaikSym) getSym();
           break;
         case shaikSym :
           getSym(); accept(zumaSym, "Zuma expected");
           break;
         case mbekiSym :
         case randSym :
           getSym();
           break;
         case wordSym :
           Story();
           break;
         default :
           reportError("Unacceptable start to news item");                 // --------------------
           break;
       }
       test(FollowNews, new SymSet(), "invalid item after NewsItem");      // --------------------
     }


Home  © P.D. Terry