Computer Science 3 - 2007

Programming Language Translation


Practical for Week 23, beginning 1 October 2007 - Solutions

This practical was done fairly well by all but a few groups, These parsers and scanners are not hard to write - but, alas, they are also easy to get wrong (putting getSym() calls in the wrong places!). One point that I noticed was that some people were driving their parsers from the back, so to speak. Given a construction like

A = { "start" Something } "follow" .

it is far better to produce a parser routine like

while (sym.kind == startSym) { getSym(); Something(); } Accept(followSym);

than one like

while (sym.kind != followSym) { getSym(); Something(); } Accept(followSym);

for the simple reason that there might be something wrong with Something.

Complete source code for solutions to the prac are available on the WWW pages in the file PRAC23A.ZIP or PRAC23AC.ZIP (C# version).

The scanner was reasonably done by most people. Comments are trickier than they look, so if you did not get that section right study my solution below carefully and see how they should be handled. The trick is to be able to handle comments that are never closed, comments that follow one another with no tokens in between them, and comments that look as they are about to finish, but then do not (like (* comment with wrong parenthesis * ) )). Did your scanner handle comments like (**) or (***) or (* **) and so on?

    static int literalKind(StringBuffer lex) {
      String s = lex.toString();
      if (s.equals("ARRAY"))    return arraySym;
      if (s.equals("END"))      return endSym;
      if (s.equals("OF"))       return ofSym;
      if (s.equals("POINTER"))  return pointerSym;
      if (s.equals("RECORD"))   return recordSym;
      if (s.equals("SET"))      return setSym;
      if (s.equals("TO"))       return toSym;
      if (s.equals("TYPE"))     return typeSym;
      if (s.equals("VAR"))      return varSym;
      return identSym;
    }

    static void getSym() {
    // Scans for next sym from input
      while (ch > EOF && ch <= ' ') getChar();
      StringBuffer symLex = new StringBuffer();
      int symKind = noSym;
      if (Character.isLetter(ch)) {
        do {
          symLex.append(ch); getChar();
        } while (Character.isLetter(ch));
        symKind = literalKind(symLex);
      }
      else if (Character.isDigit(ch)) {
        do {
          symLex.append(ch); getChar();
        } while (Character.isDigit(ch));
        symKind = numSym;
      }
      else {
        symLex.append(ch);
        switch (ch) {
          case EOF:
            symLex = new StringBuffer("EOF");    // special representation
            symKind = EOFSym; break;             // no need to getChar here, of course
          case ',':
            symKind = commaSym; getChar(); break;
          case ';':
            symKind = semicolonSym; getChar(); break;

          case ':':
            symKind = colonSym; getChar(); break;

          case '(':  // might be a comment
            getChar();
            if (ch == '*') {  // (* style comments *)
              getChar();
              char lastCh;
              do {
                lastCh = ch;
                getChar();
              } while (!(lastCh == '*' && ch == ')') && ch != EOF);
              if (ch == EOF)
                reportError("unclosed comment"); // sym will be EOFSym
              getChar(); getSym(); return;
            }
            else symKind = lparenSym; break;
          case ')':
            symKind = rparenSym; getChar(); break;
          case '[':
            symKind = lbrackSym; getChar(); break;
          case ']':
            symKind = rbrackSym; getChar(); break;
          case '=':
            symKind = equalSym; getChar(); break;
          case '.':
            getChar();
            if (ch == '.') {
              symLex.append(ch);
              symKind = dotdotSym; getChar();
            }
            else symKind = periodSym;
            break;
          default :
            symKind = noSym; getChar(); break;
        }
      }
      sym = new Token(symKind, symLex.toString());
    }

Here is most of a simple "sudden death" parser, devoid of error recovery:

    static SymSet
      FirstDeclaration = new SymSet(new int[] {typeSym, varSym}),
      EndDeclSyms      = new SymSet(new int[] {EOFSym, semicolonSym});

    static void Mod2Decl() {
    // Mod2Decl    = { Declaration } EOF .
      while (FirstDeclaration.contains(sym.kind))
        Declaration();
      accept(EOFSym, "EOF expected");
    }

    static void Declaration() {
    // Declaration =   "TYPE"  { TypeDecl  SYNC ";" }
    //               | "VAR"   { VarDecl   SYNC ";" } .
      switch(sym.kind) {
        case typeSym :
          getSym();
          while (sym.kind == identSym) {
            TypeDecl();
            accept(semicolonSym, "; expected");
          }
          break;
        case varSym :
          getSym();
          while (sym.kind == identSym) {
            VarDecl();
            accept(semicolonSym, "; expected");
          }
          break;
        default:
          abort("unrecognizable declaration - TYPE or VAR expected");
          break;
      }
    }

    static void TypeDecl() {
    // TypeDecl = identifier "=" Type .
      accept(identSym, "identifier expected");  // getSym() would suffice
      accept(equalSym, "= expected");
      Type();
    }

    static void VarDecl() {
    // VarDecl = IdentList ":" Type .
      IdentList();
      accept(colonSym, ": expected");
      Type();
    }

    static void Type() {
    // Type = SimpleType | ArrayType | RecordType | SetType | PointerType .
      switch (sym.kind) {
        case identSym:
        case lparenSym:
        case lbrackSym:
          SimpleType(); break;
        case arraySym:
          ArrayType(); break;
        case recordSym:
          RecordType(); break;
        case setSym:
          SetType(); break;
        case pointerSym:
          PointerType(); break;
        default:
          abort("invalid start to Type");
          break;
      }
    }

    static void SimpleType() {
    // SimpleType = QualIdent [ Subrange ] | Enumeration | Subrange .
      switch (sym.kind) {
        case identSym:
          QualIdent();
          if (sym.kind == lbrackSym) Subrange();
          break;
        case lparenSym:
          Enumeration();
          break;
        case lbrackSym:
          Subrange();
          break;
        default:
          abort("invalid start to SimpleType");
          break;
      }
    }

    static void QualIdent() {
    // QualIdent = identifier { "." identifier } .
      accept(identSym, "identifier expected");
      while (sym.kind == periodSym) {
        getSym();
        accept(identSym, "identifier expected");
      }
    }

    static void Subrange() {
    // Subrange = "[" Constant ".." Constant "]"  .
      accept(lbrackSym, "[ expected");
      Constant();
      accept(dotdotSym, "] expected");
      Constant();
      accept(rbrackSym, "] expected");
    }

    static void Enumeration() {
    // Enumeration = "(" IdentList ")" .
      accept(lparenSym, "( expected");  // getSym() would suffice
      IdentList();
      accept(rparenSym, ") expected");
    }

    static void Constant() {
    // Constant = number | identifier .
      switch (sym.kind) {
        case numSym   :
          getSym(); break;
        case identSym :
          getSym(); break;
        default :
          abort("Illegal constant"); break;
      }
    }

    static void IdentList() {
    // IdentList = identifier { "," identifier } .
      accept(identSym, "identifier expected");
      while (sym.kind == commaSym) {
        getSym();
        accept(identSym, "identifier expected");
      }
    }

    static void ArrayType() {
    // ArrayType = "ARRAY" SimpleType { "," SimpleType } "OF" Type.
      accept(arraySym, "ARRAY expected");  // getSym() would suffice
      SimpleType();
      while (sym.kind == commaSym) {
        getSym();
        SimpleType();
      }
      accept(ofSym, "OF expected");
      Type();
    }

    static void RecordType() {
    // RecordType = "RECORD" FieldLists "END" .
      accept(recordSym, "RECORD expected");  // getSym() would suffice
      FieldLists();
      accept(endSym, "END expected");
    }

    static void FieldLists() {
    // FieldLists = FieldList { ";" FieldList } .
      FieldList();
      while (sym.kind == semicolonSym) {
        getSym();
        FieldList();
      }
    }

    static void FieldList() {
    // FieldList = [ IdentList ":" Type ] .
      if (sym.kind == identSym) {
        IdentList();
        accept(colonSym, ": expected");
        Type();
      }
    }

    static void SetType() {
    // SetType = "SET" "OF" SimpleType .
      accept(setSym, "SET expected");  // getSym() would suffice
      accept(ofSym, "OF expected");
      SimpleType();
    }

    static void PointerType() {
    // PointerType = "POINTER" "TO" Type .
      accept(pointerSym, "POINTER expected");   // getSym() would suffice
      accept(toSym, "TO expected");
      Type();
    }

A very comon mistake is to overlook and condone (not detect) some errors. Compare Constant above with this (dangerous) version. If you can't see why I don't like the code below, you had better come to ask!

    static void Constant() {
    // Constant = number | identifier .
      if (sym.kind == numSym || sym.kind == identSym) getSym();
    }


Home  © P.D. Terry