Computer Science 301 - 2002


Tutorials for week 14 - (b) Recursive descent parsing and error recovery (solution)

The sources for these programs can be found in the file TUT15.ZIP.

1. Is the following grammar LL(1)?

     COMPILER Goal $TF

     IGNORE CHR(1) .. CHR(31)

     PRODUCTIONS
       Goal =  A { B } "." .
       A    =  C  |  "(" [ B ] ")"  |  "[" C "]" .
       B    =  "x"  |  "y" { C } "z" .
       C    =  "p"  |  "q" .
     END Goal.

Yes it is. It helps to rewrite the grammar without the "Wirth meta brackets":

     COMPILER Goal $TF

     IGNORE CHR(1) .. CHR(31)

     PRODUCTIONS
       Goal =  A BSeq "." .                          /*  1  */
       BSeq =  B BSeq | e .                          /*  2  */
       A    =  C  |  "(" BOpt ")"  |  "[" C "]" .    /*  3  */
       BOpt =  B | e .                               /*  4  */
       B    =  "x"  |  "y" CSeq "z" .                /*  5  */
       CSEQ =  C CSeq | e .                          /*  6  */
       C    =  "p"  |  "q" .                         /*  7  */
     END Goal.

We need to check LL(1) "Rule 1" for productions 2, 3, 4, 5, 6, 7. Pretty obviously it is satisfied for all of them.

We need to check LL(1) "Rule 2" for productions 2, 4 and 6 which are nullable

FIRST(BSeq) = FIRST(B) = { "x", "y" } (from production 5)
FOLLOW(BSeq) = { "." } (from production 1)

FIRST(BOpt) = FIRST(B) = { "x", "y" } (from production 5)
FOLLOW(BOpt) = { ")" } (from production 3)

FIRST(CSeq) = FIRST(C) = { "p", "q" } (from production 7)
FOLLOW(CSeq) = { "Z" } (from production 5)

None of which give any trouble.

2. Write a recursive descent parser for the grammar using a brutal form of error handling (kill the parsing process when an error is discovered with a suitable error message

     // Simple Recursive Descent Parser with brutal error checking
     // P.D. Terry, Rhodes University, 2002

     #include "misc.h"
     #include "set.h"

     // +++++++++++++++++++++++++ Character Handler +++++++++++++++++++++++++

     FILE *inFile;
     char ch;                        // Lookahead character for scanner

     void GetChar (void) {
     // Obtains next character ch from inFile, or CHR(0) if EOF reached
       ch = fgetc(inFile);
       putchar(ch);                  // Reflect on output
       if (ch == EOF) ch = '\0';
     }

     // +++++++++++++++++++++++++ Scanner +++++++++++++++++++++++++++++++++++

     enum SYMBOLS { EOFSym, unknownSym, lParenSym, rParenSym, lBrackSym,
                    rBrackSym, periodSym, pSym, qSym, xSym, ySym, zSym };

     SYMBOLS sym;                    // Lookahead token for parser

     void GetSym (void) {
     // Scans for next sym from inFile
       while (ch > 0 && ch <= ' ') GetChar(); // skip whitespace
       switch (tolower(ch)) {
         case '\0' : sym = EOFSym; break;
         case 'p'  : sym = pSym; GetChar(); break;
         case 'q'  : sym = qSym; GetChar(); break;
         case 'x'  : sym = xSym; GetChar(); break;
         case 'y'  : sym = ySym; GetChar(); break;
         case 'z'  : sym = zSym; GetChar(); break;
         case '('  : sym = lParenSym; GetChar(); break;
         case ')'  : sym = rParenSym; GetChar(); break;
         case '['  : sym = lBrackSym; GetChar(); break;
         case ']'  : sym = rBrackSym; GetChar(); break;
         case '.'  : sym = periodSym; GetChar(); break;
         default   : sym = unknownSym; GetChar(); break;
       }
     }

     // +++++++++++++++++++++++++ Parser ++++++++++++++++++++++++++++++++++++

     void abort (char *S) {
     // Abandon parsing with error message S
       fprintf(stderr, "%s", S); exit(1);
     }

     void accept (SYMBOLS WantedSym, char *S) {
     // Check that lookahead token is WantedSym
       if (sym == WantedSym) GetSym(); else abort(S);
     }

     void Goal (void);    // prototypes
     void A (void);
     void B (void);
     void C (void);

     typedef Set<zSym> SymSet;

     SymSet FirstGoal(lParenSym, lBrackSym, pSym, qSym);
     SymSet FirstA(lParenSym, lBrackSym, pSym, qSym);
     SymSet FirstB(xSym, ySym);
     SymSet FirstC(pSym, qSym);

     void Goal (void) {
     // Goal = A { B } "." .
       A();
       while (FirstB.memb(sym)) B();
       accept(periodSym, " '.' expected");
     }

     void A (void) {
     // A = C | "(" [ B ] ")" | "[" C "]" .
       if (FirstC.memb(sym))
         C();
       else switch (sym) {
         case lParenSym :
           GetSym();
           if (FirstB.memb(sym))
             B();
           accept(rParenSym, " ')' expected");
           break;
         case lBrackSym :
           GetSym();
           C();
           accept(rBrackSym, " ']' expected");
           break;
         default :
           abort(" Unexpected symbol in A");
           break;
       }
     }

     void B (void) {
     //B = "x" | "y" { C } "z" .
       switch (sym) {
         case xSym :
           GetSym();
           break;
         case ySym :
           GetSym();
           while (FirstC.memb(sym))
             C();
           accept(zSym, " 'z' expected");
           break;
         default :
           abort(" Unexpected symbol in B");
           break;
       }
     }

     void C (void) {
     // C = "p" | "q" .
       switch (sym) {
         case pSym :
           GetSym();
           break;
         case qSym :
           GetSym();
           break;
         default :
           abort(" Unexpected symbol in C");
           break;
       }
     }

     void main (int argc, char *argv[])  {
       if ((inFile = fopen(argv[1], "r")) == NULL) {
         fprintf(stderr, "Could not open %s", argv[1]); exit(1);
       }
       GetChar();               // Initialise character handler
       GetSym();                // Initialise scanner
       Goal();                  // Start parser
       fprintf(stderr, "Parsed correctly");
     }


3. Improve your parsing routines to incorporate error recovery and synchronization using the techniques discussed in section 10.3, using the routines below:

     void accept (SYMBOLS WantedSym, char *S) {
     // Check that lookahead token is WantedSym
       if (sym == WantedSym) GetSym();
       else {
         fprintf(stderr, "%s\n", S);
         errors = true;
       }
     }

     void test (SymSet allowed, SymSet beacons, char *S) {
     // Test whether current Sym is in Allowed, and recover if not
       if (allowed.memb(sym)) return;
       fprintf(stderr, "%s\n", S);
       errors = true;
       SymSet stopset = allowed + beacons;
       while (!stopset.memb(sym)) GetSym();
     }


     // Better Recursive Descent Parser with First/Follow set error recovery
     // P.D. Terry, Rhodes University, 2002

     #include "misc.h"
     #include "set.h"

     // +++++++++++++++++++++++++ Character Handler +++++++++++++++++++++++++

     FILE *inFile;
     char ch;                        // Lookahead character for scanner

     void GetChar (void) {
     // Obtains next character ch from inFile, or CHR(0) if EOF reached
       ch = fgetc(inFile);
       putchar(ch);                  // Reflect on output
       if (ch == EOF) ch = '\0';
     }

     // +++++++++++++++++++++++++ Scanner +++++++++++++++++++++++++++++++++++

     enum SYMBOLS { EOFSym, unknownSym, lParenSym, rParenSym, lBrackSym,
                    rBrackSym, periodSym, pSym, qSym, xSym, ySym, zSym };

     SYMBOLS sym;                    // Lookahead token for parser

     void GetSym (void) {
     // Scans for next sym from inFile
       while (ch > 0 && ch <= ' ') GetChar(); // skip whitespace
       switch (tolower(ch)) {
         case '\0' : sym = EOFSym; break;
         case 'p'  : sym = pSym; GetChar(); break;
         case 'q'  : sym = qSym; GetChar(); break;
         case 'x'  : sym = xSym; GetChar(); break;
         case 'y'  : sym = ySym; GetChar(); break;
         case 'z'  : sym = zSym; GetChar(); break;
         case '('  : sym = lParenSym; GetChar(); break;
         case ')'  : sym = rParenSym; GetChar(); break;
         case '['  : sym = lBrackSym; GetChar(); break;
         case ']'  : sym = rBrackSym; GetChar(); break;
         case '.'  : sym = periodSym; GetChar(); break;
         default   : sym = unknownSym; GetChar(); break;
       }
     }

     // +++++++++++++++++++++++++ Parser ++++++++++++++++++++++++++++++++++++

     bool errors = false;

     void abort (char *S) {
     // Abandon parsing with error message S
       fprintf(stderr, "%s", S); exit(1);
     }

     typedef Set<zSym> SymSet;

     SymSet FirstGoal(lParenSym, lBrackSym, pSym, qSym);
     SymSet FirstA(lParenSym, lBrackSym, pSym, qSym);
     SymSet FirstB(xSym, ySym);
     SymSet FirstC(pSym, qSym);

     void accept (SYMBOLS WantedSym, char *S) {
     // Check that lookahead token is WantedSym
     // Note that the parameter is of SYMBOLS type - not char or int
       if (sym == WantedSym) GetSym();
       else { fprintf(stderr, "%s\n", S); errors = true; }
     }

     void test (SymSet allowed, SymSet beacons, char *S) {
     // Test whether current Sym is in Allowed, and recover if not
       if (allowed.memb(sym)) return;
       fprintf(stderr, "%s\n", S); errors = true;
       SymSet stopset = allowed + beacons;
       while (!stopset.memb(sym)) GetSym();
     }

     void Goal (SymSet followers);    // prototypes
     void A (SymSet followers);
     void B (SymSet followers);
     void C (SymSet followers);

     void Goal (SymSet followers) {
     // Goal = A { B } "." .
       test(FirstGoal, followers, "invalid start to Goal");
       if (FirstGoal.memb(sym)) {
         A(FirstB + SymSet(periodSym) + followers);
         while (FirstB.memb(sym))
           B(SymSet(periodSym) + followers);
         accept(periodSym, " '.' expected");
       }
       test(followers, SymSet(), "unexpected symbol");
     }

     void A (SymSet followers) {
     // A = C | "(" [ B ] ")" | "[" C "]" .
       test(FirstA, followers, "invalid start to A");
       if (FirstA.memb(sym)) {
         if (FirstC.memb(sym)) C(followers);
         else switch (sym) {
           case lParenSym :
             GetSym();
             if (FirstB.memb(sym))
               B(SymSet(rParenSym) + followers);
             accept(rParenSym, " ')' expected");
             break;
           case lBrackSym :
             GetSym();
             C(SymSet(rBrackSym) + followers);
             accept(rBrackSym, " ']' expected");
             break;
         }
       }
       test(followers, SymSet(), "unexpected symbol");
     }

     void B (SymSet followers) {
     //B = "x" | "y" { C } "z" .
       test(FirstB, followers, "invalid start to B");
       if (FirstB.memb(sym)) {
         switch (sym) {
           case xSym :
             GetSym();
             break;
           case ySym :
             GetSym();
             while (FirstC.memb(sym))
               C(SymSet(zSym) + followers);
             accept(zSym, " 'z' expected");
             break;
         }
       }
       test(followers, SymSet(), "unexpected symbol");
     }

     void C (SymSet followers) {
     // C = "p" | "q" .
       test(FirstC, followers, "invalid start to C");
       if (FirstC.memb(sym)) {
         switch (sym) {
           case pSym :
             GetSym();
             break;
           case qSym :
             GetSym();
             break;
         }
       }
       test(followers, SymSet(), "unexpected symbol");
     }

     void main (int argc, char *argv[])  {
       if ((inFile = fopen(argv[1], "r")) == NULL) {
         fprintf(stderr, "Could not open %s", argv[1]); exit(1);
       }
       GetChar();               // Initialise character handler
       GetSym();                // Initialise scanner
       Goal(SymSet(EOFSym));    // Start parser
       if (!errors) fprintf(stderr, "Parsed correctly");
     }


Home  © P.D. Terry