Computer Science 3 - 2004

Programming Language Translation


Practical for Week 24, beginning 11 October 2004 - Solutions

Once again, there were some outstandingly good solutions handed in, but a few groups seemed completely at a loss here and there. Full sources for the solutions can be found in the file PRAC24A.ZIP or PRAC24AC.ZIP for those who wish to experiment further.

The modifications needed to the grammar for Task 3 were as below. Using parameters:

  PRODUCTIONS
    Check3                        (. ArrayList
                                       IPList = new ArrayList(),
                                       NameList = new ArrayList(); .)
    =
    { Entry<IPList, NameList> }
    EOF                           (. if (Successful()) IO.writeLine("all correct"); .) .

    Entry<ArrayList IPL, ArrayList NL>
    = IPNumber<IPL> Name<NL> { Name<NL> } .

    Name<ArrayList NL>
    = name                        (. String s = token.val;
                                     if (NL.contains(s)) SemError("duplicate computer name");
                                     else NL.add(s); .) .

Alternatively we could dispense with parameter passing:

  static ArrayList
    IPList = new ArrayList(),
    NameList = new ArrayList();

  ...

  PRODUCTIONS
    Check4
    = { Entry } EOF               (. if (Successful()) IO.writeLine("all correct"); .) .

    Entry
    = IPNumber Name { Name } .

    Name
    = name                       (. String s = token.val;
                                     if (NameList.contains(s)) SemError("duplicate computer name");
                                     else NameList.add(s); .) .

Tasks 4 - 6 provided more of a challenge. There were some excellent submissions, but a great many that showed that not nearly enough effort had been expended on the exercise. Pity!

Here is a complete grammar itself, heavily commented to make various points that may have been missed (these comments are not in the solution sources):

      import Library.*;
      import java.util.*;

      COMPILER Calc3 $NC
      // Integer/Boolean calculator
      // P.D. Terry, Rhodes University, 2004

      static int toInt(boolean b) {
      // return 0 or 1 according as b is false or true
        return b ? 1 : 0;
      }

      static boolean toBool(int i) {
      // return false or true according as i is 0 or 1
        return i == 0 ? false : true;
      }

/* We introduce a wrapper class to allow the various "nodes" in an expression to pass back both
   the value and type of an operand.  Note how the default constructor assigns a value of 1 -
   this will prevent problems if one tries to divide by an undefined variable */

      static class ValNode {
        public int type = noType;
        public int value = 1;

        public ValNode() {                                   // constructor
          this.type = noType;
          this.value = 1;
        }

        public ValNode(int type, int value) {               // constructor
          this.type = type;
          this.value = value;
        }

      } // ValNode

/* We introduce a wrapper class for recording the name and associated value of a variable
   entry in the symbol table */

      static class Entry {
        public String name;
        public ValNode value;                               // other fields could be added

        public Entry(String name, ValNode value) {          // constructor
          this.name = name;
          this.value = value;
        }

      } // Entry

/* It suffices to declare the list of user defined "variables" local to the parser class,
   rather than pass it a parameter between almost every production */

      static ArrayList symTable = new ArrayList();          // global for simplicity here!

/* A simple look up method for locating the position of an entry in the variable table */

      public static int position(String name) {
      // Finds position of entry with search key name in list, or -1 if not found
        int i = symTable.size() - 1;                        // index of last entry
        while (i >= 0 &&                                    // short-circuit protection
               !name.equals(((Entry)symTable.get(i)).name)) // must cast before extracting field
          i--;                                              // will reach -1 if no match
        return i;
      }

/* It makes for enhanced readability to assign names to the maagic numbers useful for mapping
   the operators and types */

      static final int // enumerations
        noType   =  0,
        intType  =  1,
        boolType =  2,

        opnop    =  1,
        opadd    =  2,
        opsub    =  3,
        opor     =  4,
        opmul    =  5,
        opdvd    =  6,
        opmod    =  7,
        opand    =  8,
        opeql    =  9,
        opneq    = 10,
        oplss    = 11,
        opgeq    = 12,
        opgtr    = 13,
        opleq    = 14;

      CHARACTERS
        digit      = "0123456789" .
        letter     = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" .

      TOKENS
        number     = digit { digit } .
        identifier = letter { letter | digit } .

      IGNORE CHR(0) .. CHR(31)

      PRODUCTIONS
        Calc3
        = {   Print
            | Assignment
          } "quit" .

        Print
        = "print"
          OneExpr { WEAK "," OneExpr }
          SYNC ";"                     (. IO.writeLine(); .) .

/* OneExpr is introduced as a simple way of cutting down on actions that might otherwise
   clutter up the Print production.  It becomes responsible for evaluating a single
   expression that is to be printed rather than stored.  Note that some care is taken to
   ensure that Boolean values are printed in a user friendly form.  */

        OneExpr                        (. ValNode expValue; .)
        = Expression<out expValue>     (. switch(expValue.type) {
                                            case intType:
                                              IO.write(expValue.value); break;
                                            case boolType:
                                              IO.write(toBool(expValue.value)); break;
                                            default:
                                              IO.write(" bad value"); break;
                                          } .) .

/* Note that in the Assignment production we do not meddle with the list of variables until
   the expression has been evaluated.  In particular we do not want to enter the incomplete
   entry into the table after parsing the Variable to find its name.  In this way we avoid
   treating an initial definition of, say ABC = ABC + 1, as being correct when it is not (as
   the ABC on the right side has no defined value yet).  Some people missed this. */

        Assignment                     (. ValNode value;
                                          String name; .)
        = Variable<out name>
          "=" Expression<out value>    (. int pos = position(name);
                                          if (pos == -1)  // first definition
                                            symTable.add(new Entry(name, value));
                                          else // simply update an existing entry
                                            ((Entry)symTable.get(pos)).value = value; .)
        SYNC ";" .

/* Note the form of the type check - several people omitted it or had erroneous variations
   on the lines of    if (value.type != boolType && val2.type != boolType) ...
   Note how the toInt and toBool methods are used to provide "type casting" of a sort
   between integer and Boolean values.  Notice how the return type is set to the "best guess"
   (even in the case of an error) - this can have beneficial effects later on */

        Expression<out ValNode value>  (. ValNode val2; .)
        = AndExp<out value>
          { "||"
            AndExp<out val2>           (. if (value.type != boolType || val2.type != boolType)
                                            SemError("boolean operands required");
                                          value.type = boolType;
                                          value.value = toInt(toBool(value.value) || toBool(val2.value)); .)
          } .

/* The rest of the "binary" operations follow in much the same way */

        AndExp<out ValNode value>      (. ValNode val2; .)
        = EqlExp<out value>
          { "&&"
            EqlExp<out val2>           (. if (value.type != boolType || val2.type != boolType)
                                            SemError("boolean operands required");
                                          value.type = boolType;
                                          value.value = toInt(toBool(value.value) && toBool(val2.value)); .)
          } .

/* But the EqlExp production needs further comment.  Note that the presence of an EqlOp
   between two integer operands yields a type "change" to Boolean - some people did not
   appreciate this.  The == and != operators have meaning only between two operands of the
   same type.  */

        EqlExp<out ValNode value>      (. ValNode val2;
                                          int op; .)
        = RelExp<out value>
          { EqlOp<out op>
            RelExp<out val2>           (. if (value.type != val2.type)
                                            SemError("incomparable operands ");
                                          value.type = boolType;
                                          switch(op) {
                                            case opeql:
                                              value.value = toInt(value.value == val2.value); break;
                                            case opneq:
                                              value.value = toInt(value.value != val2.value); break;
                                            default:
                                              break;
                                          } .)
          } .

/* The RelExp production needs further comment.  Note that the presence of an RelOp
   between two integer operands yields a type "change" to Boolean - some people did not
   appreciate this.  The < <= > and >= operators have meaning only between two operands of
   the integer type.  */

        RelExp<out ValNode value>      (. ValNode val2;
                                          int op; .)
        = AddExp<out value>
          [ RelOp<out op>
            AddExp<out val2>           (. if (value.type != intType || val2.type != intType)
                                            SemError("integer operands required");
                                          value.type = boolType;
                                          switch(op) {
                                            case oplss:
                                              value.value = toInt(value.value < val2.value); break;
                                            case opleq:
                                              value.value = toInt(value.value <= val2.value); break;
                                            case opgtr:
                                              value.value = toInt(value.value > val2.value); break;
                                            case opgeq:
                                              value.value = toInt(value.value >= val2.value); break;
                                            default:
                                              break;
                                          } .)
          ] .

/* The AddExp parser is easy and needs no further comment */

        AddExp<out ValNode value>      (. ValNode val2;
                                          int op; .)
        = MultExp<out value>
          { AddOp<out op>
            MultExp<out val2>          (. if (value.type != intType || val2.type != intType)
                                            SemError("integer operands required");
                                          value.type = intType;
                                          switch(op) {
                                            case opadd:
                                              value.value = value.value + val2.value; break;
                                            case opsub:
                                              value.value = value.value - val2.value; break;
                                            default:
                                              break;
                                          } .)
          } .

/* The MultExp parser needs to check for division by zero for both the / and the % operator
   - most people checked only the obvious one involving / */

        MultExp<out ValNode value>     (. ValNode val2;
                                          int op; .)
        = UnaryExp<out value>
          { MulOp<out op>
            UnaryExp<out val2>         (. if (value.type != intType || val2.type != intType)
                                            SemError("integer operands required");
                                          value.type = intType;
                                          switch(op) {
                                            case opmul:
                                              value.value = value.value * val2.value; break;
                                            case opdvd:
                                              if (val2.value == 0) SemError("division by zero");
                                              else value.value = value.value / val2.value; break;
                                            case opmod:
                                              if (val2.value == 0) SemError("division by zero");
                                              else value.value = value.value % val2.value; break;
                                            default:
                                              break;
                                          } .)
          } .

/* The unary operators need careful type checking */

        UnaryExp<out ValNode value>    (. value = new ValNode(); .)
        =   Factor<out value>
          | "+" UnaryExp<out value>    (. if (value.type != intType)
                                            SemError("integer operand required");
                                          value.type = intType; .)
          | "-" UnaryExp<out value>    (. if (value.type != intType)
                                            SemError("integer operand required");
                                          value.type = intType;
                                          value.value = -value.value; .)
          | "!" UnaryExp<out value>    (. if (value.type != boolType)
                                            SemError("boolean operand required");
                                          value.type = intType;
                                          value.value = toInt(!toBool(value.value)); .) .

/* The Factor parser is the one ultimately responsible for starting to pass value and type
   information "up the tree".  Note that we regard an undeclared identifier at this stage to be
   an error - but we will still have assigned a value and type to the return argument to make
   sure that something "sensible" propagates up from here (using the default ValNode
   constructor).  Several people added to the table if an undeclared identifier was
   encountered, but this may not be a wise thing to do. */

        Factor<out ValNode value>      (. String name;
                                          int n;
                                          value = new ValNode(); .)
        =   Variable<out name>         (. int pos = position(name);
                                          if (pos == -1) {
                                            SemError("undeclared variable");
                                            // maybe
                                            // symTable.add(new Entry(name, value));
                                          }
                                          else {
                                            Entry e = (Entry)symTable.get(pos);
                                            // why can we NOT simple assign value = e.value?
                                            value = new ValNode(e.value.type, e.value.value);
                                          } .)
          | Number<out n>              (. value = new ValNode(intType, n); .)
          | "true"                     (. value = new ValNode(boolType, 1); .)
          | "false"                    (. value = new ValNode(boolType, 0); .)
          | "(" Expression<out value> ")" .

/* The production for Variable does nothing more than extract the name for us */

        Variable<out String name>
        = identifier                   (. name = token.val; .) .

        Number<out int n>
        =  number                     (. try {
                                           n = Integer.parseInt(token.val);
                                         } catch (NumberFormatException e) {
                                           n = 0; SemError("number too large");
                                         } .) .

/* It is convenient for readability to define the operators by their own non-terminals and for
   them simply to return an appropriate value from an enumeration type.  Note the use of a
   default opnop to mop up errors that might otherwise creep in from undefined values */

        MulOp<out int op>             (. op = opnop; .)
        =   "*"                       (. op = opmul; .)
          | "%"                       (. op = opmod; .)
          | "/"                       (. op = opdvd; .) .

        AddOp<out int op>             (. op = opnop; .)
        =   "+"                       (. op = opadd; .)
          | "-"                       (. op = opsub; .) .

        RelOp<out int op>             (. op = opnop; .)
        =   "<"                       (. op = oplss; .)
          | "<="                      (. op = opleq; .)
          | ">"                       (. op = opgtr; .)
          | ">="                      (. op = opgeq; .) .

        EqlOp<out int op>             (. op = opnop; .)
        =   "=="                      (. op = opeql; .)
          | "!="                      (. op = opneq; .) .

      END Calc3.


Home  © P.D. Terry