Prac 11 Solution

Computer Science 3 - 2002

Programming Language Translation

Practical for Week 11, beginning 29 April 2002 - Solutions

Once again, there were some outstandingly good solutions handed in, but a few groups seemed completely at a loss here and there. Full sources for the solutions can be found in the file PRAC11A.ZIP for those who wish to experiment further.

For Task 3 the modifications needed to CHECK.FRM were simply to add an extra error message and #include directive:

  #include "check.h"

  #include -->ScanHeader
  #include -->ParserHeader

  char *MyError::GetUserErrorMsg(int n)
  { switch (n) {
      // Put your customized messages here
      case 1000:
        return "Invalid IP number";
      case 1001:
        return "Duplicate IP number";
      case 1002:
        return "Duplicate computer name";
      default:
        return "Unknown error or conflicting error numbers used";
    }
  }

A modification was needed to CHECK.H to add the declaration of a StringList type:

  #include "list.h"

  struct String {
    char str[1000];   // the string itself
    friend int operator == (myString x, myString y)
      { return strcmp(x.str, y.str) == 0; }
    friend int operator != (myString x, myString y)
      { return strcmp(x.str, y.str) != 0; }
  };

  typedef List<String> StringList;

The modifications needed to the grammar were as below:

  PRODUCTIONS
    Check
    =                             (. IPList L;
                                     StringList SL; .)
    { Entry<L, SL> }
    EOF                           (. if (Successful()) printf("\n all correct"); .) .
    .

    Entry<IPList &L, StringList &SL>
    = IPNumber<L> Name<SL> { Name<SL> } .

    Name<StringList &SL>
    =                             (. String S; .)
    name                          (. LexString(S.str, 1000);
                                     if (SL.isMember(S)) SemError(1002);
                                     else SL.add(S); .) .

Tasks 4 - 6 provided more of a challenge. The modifications needed to create CALC.FRM were to add suitable error messages and a #include directive:

  #include "calc.h"

  char *MyError::GetUserErrorMsg(int n)
  { switch (n) {
      // Put your customized messages here
      case 1000:
        return "Mismatched types";
      case 1001:
        return "Undefined variable";
      case 1002:
        return "Division by zero";
      case 1003:
        return "Uncomparable types";
      default:
        return "Unknown error or conflicting error numbers used";
    }
  }

Modifications were needed to CALC.H. I chose to add the declaration of various enumerations and structured types as shown:

  #include "list.h"

  enum TYPES { none, ints, bools };

  enum OPERATORS {
    opnop, opadd, opsub, opor, opmul, opdvd, opmod, opand, opeql, opneq, oplss,
    opgeq, opgtr, opleq
  };

  struct ENTRY {       // an entry in the "symbol table"
    char name[100];    // the variable name
    int value;         // its value
    TYPES type;        // its type
    friend int operator == (ENTRY x, ENTRY y)
      { return strcmp(x.name, y.name) == 0; }
    friend int operator != (ENTRY x, ENTRY y)
      { return strcmp(x.name, y.name) != 0; }
  };

  typedef List<ENTRY> EntryList;

Here is a complete grammar itself, heavily commented to make various points that may have been missed (these comments are not in the solution sources):

      COMPILER Calc $CNX
      // Simple calculator for Boolean or Integer results and user defined
      // variable names
      // P.D. Terry, Rhodes University,  2002

      #include "calc.h"

/* It suffices to declare the list of user defined "variables" local to the parser class,
   rather than pass it a parameter between almost every production */

      EntryList VarList; // the list of variables and associated values

      IGNORE CASE
      IGNORE CHR(1) .. CHR(31)

      CHARACTERS
        letter     = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" .
        digit      = "0123456789" .

      TOKENS
        identifier = letter { letter | digit } .
        number     = digit { digit } .

      PRODUCTIONS

        Calc
        = { Print | Assignment } "quit" .

        Print
        = "print"
            OneExpr
          { WEAK ","
            OneExpr
          } SYNC ";"                  (. printf("\n"); .)
          .

/* OneExpr is introduced as a simple way of cutting down on actions that might otherwise
   clutter up the Print production.  It becomes responsible for evaluating a single
   expression that is to be printed rather than stored.  Note that some care is taken to
   ensure that Boolean values are printed in a user friendly form.  Rather than take "no
   action" on the default we could print out some suitable warning, of course. */

        OneExpr
        =                             (. TYPES exptype;
                                         int value; .)
          Expression<exptype, value>  (. switch (exptype) {
                                           case ints :
                                             printf(" %d", value);
                                             break;
                                           case bools :
                                             if (value) printf(" true");
                                             else printf(" false");
                                             break;
                                           default: /* error - no action */
                                             break;
                                         }
                                      .) .

/* Note that in the Assignment production we do not meddle with the list of variables until
   the expression has been evaluated.  In particular we do not want to enter the incomplete
   entry into the table after parsing the Variable to find its name. In this way we avoid
   treating an initial definition of, say ABC = ABC + 1, as being correct when it is not (as
   the ABC on the right side has no defined value yet).  Many people missed this. */

        Assignment
        =                             (. ENTRY e; .)
          Variable<e.name>
          "="
          Expression<e.type, e.value> (. int i = VarList.position(e);
                                         if (i == -1) { // first definition
                                           VarList.add(e);
                                         }
                                         else {         // updated definition
                                           if (e.type != VarList[i].type) SemError(1000);
                                           VarList[i].value = e.value;
                                         }
                                      .)
          SYNC ";" .

/* The production for Variable does nothing more than extract the name for us */

        Variable<char * name>
        = Identifier<name> .

/* I chose to give Expression two parameters - one for the type to be returned, the other to
   yield the value itself.  It is, of course, equally viable to use a single parameter of
   the ENTRY type, as this encapsulates both type and value in one "package".  Several
   people did this.

   Note the form of the type check - several people omitted it or had erroneous variations
   like if (t != bools && t2 != bools) ... Setting the return type to none in the case of an
   error can have beneficial effects later on */

        Expression<TYPES &t, int &value>
        =                             (. TYPES t2;
                                         int val2; .)
          AndExp<t, value>
          { "||"
            AndExp<t2, val2>          (. if (t != bools || t2 != bools)
                                           { SemError(1000); t = none; }
                                         value = value || val2; .)
          } .

/* The rest of the "binary" operations follow in much the same way */

        AndExp<TYPES &t, int &value>
        =                             (. TYPES t2;
                                         int val2; .)
          OrExp<t, value>
          { "&&"
            OrExp<t2, val2>           (. if (t != bools || t2 != bools)
                                           { SemError(1000); t = none; }
                                         value = value && val2; .)
          } .

/* But the OrExp production needs further comment.  Note that the presence of a RelOp
   between two integer operands yields a type "change" to Boolean - some people did not
   appreciate this.  Strictly we should argue that only the == and != operators have meaning
   between two operands of Boolean type.  This yields a rather complex set of switch
   statements, as you can see. */

        OrExp<TYPES &t, int &value>
        =                             (. TYPES t2;
                                         int val2;
                                         OPERATORS op; .)
          AddExp<t, value>
          [ RelOp<op>
            AddExp<t2, val2>          (. if (t != t2) SemError(1003);
                                         switch(op) {
                                           case opeql :
                                             value = value == val2; break;
                                           case opneq :
                                             value = value != val2; break;
                                           default :
                                             if (t == bools || t2 == bools) SemError(1003);
                                             switch (op) {
                                               case oplss :
                                                 value = value <  val2; break;
                                               case opleq :
                                                 value = value <= val2; break;
                                               case opgtr :
                                                 value = value >  val2; break;
                                               case opgeq :
                                                 value = value >= val2; break;
                                               default : break;
                                             }
                                         }
                                         t = bools;
                                      .)
          ] .

/* The AddExp parser is easy and needs no further comment */

        AddExp<TYPES &t, int &value>
        =                             (. TYPES t2;
                                         int val2;
                                         OPERATORS op; .)
          MultExp<t, value>
          { AddOp<op>
            MultExp<t2, val2>         (. if (t != ints || t2 != ints)
                                           { SemError(1000); t = none; }
                                         else switch (op) {
                                           case opadd :
                                             value += val2; break;
                                           case opsub :
                                             value -= val2; break;
                                           default : break;
                                         } .)
          } .

/* The MultExp parser needs to check for division by zero for both the / and the % operator
   - many people checked only the obvious division one! */

        MultExp<TYPES &t, int &value>
        =                             (. TYPES t2;
                                         int val2;
                                         OPERATORS op; .)
          UnaryExp<t, value>
          { MulOp<op>
            UnaryExp<t2, val2>        (. if (t != ints || t2 != ints)
                                           { SemError(1000); t = none; }
                                         else switch (op) {
                                           case opmul :
                                             value *= val2; break;
                                           case opmod :
                                             if (val2 != 0) value %= val2; else SemError(1002);
                                             break;
                                           case opdvd :
                                             if (val2 != 0) value /= val2; else SemError(1002);
                                             break;
                                           default : break;
                                         } .)
          } .

/* The unary operators need careful type checking */

        UnaryExp<TYPES &t, int &value>
        =   Factor<t, value>
          | "+" UnaryExp<t, value>    (. if (t != ints) {
                                           SemError(1000); t = none; } .)
          | "-" UnaryExp<t, value>    (. if (t != ints) {
                                           SemError(1000); t = none; }
                                           value = - value; .)
          | "!" UnaryExp<t, value>    (. if (t != bools) {
                                           SemError(1000); t = none; }
                                           value = ! value; .)
        .

/* The Factor parser is the one ultimately responsible for starting to pass value and type
   information "up the tree".  Note that we regard an undeclared identifier at this stage to
   be an error - but still assign a value and type to the arguments to make sure that
   something "sensible" propagates up from here.  Several people added to the table if an
   undeclared identifier was encountered, but this may not be a wise thing to do.  However,
   if it is done, care should be taken to set the fields in the entry, as hinted here. */

        Factor<TYPES &t, int &value>
        =                             (. ENTRY e;
                                         int i; .)
            Identifier<e.name>        (. i = VarList.position(e);
                                         if (i == -1) {
                                           SemError(1001);
                                           value = 1; t = none;
                                           /* maybe
                                              e.value = 1; e.type = none;
                                              VarList.Add(e)
                                           */
                                         } else {
                                           value = VarList[i].value;
                                           t = VarList[i].type;
                                         }
                                      .)
          | Number<value>             (. t = ints; .)
          | "true"                    (. t = bools; value = 1; .)
          | "false"                   (. t = bools; value = 0; .)
          | "(" Expression<t, value> ")" .

/* It is convenient for readability to define the operators by their own non-terminals and
   for them simply to return an appropriate value from an enumeration type.  Note the use of
   a default opnop to mop up errors that might otherwise creep in from undefined values */

        AddOp<OPERATORS &op>
        =                             (. op = opnop; .)
          (   "+"                     (. op = opadd; .)
            | "-"                     (. op = opsub; .)
          ) .

        MulOp<OPERATORS &op>
        =                             (. op = opnop; .)
          (   "*"                     (. op = opmul; .)
            | "%"                     (. op = opmod; .)
            | "/"                     (. op = opdvd; .)
          ) .

        RelOp<OPERATORS &op>
        =                             (. op = opnop; .)
          (   "=="                    (. op = opeql; .)
            | "!="                    (. op = opneq; .)
            | "<"                     (. op = oplss; .)
            | "<="                    (. op = opleq; .)
            | ">"                     (. op = opgtr; .)
            | ">="                    (. op = opgeq; .)
          ) .

/* Note that we have used LexName here as we are ignoring case in our identifiers.  Note
   also that we have used 100 (the size of the name of a variable) directly.  Why would it
   not work if we used LexName(name, sizeof(name); in this context? */

        Identifier<char *name>
        = identifier                  (. LexName(name, 100); .) .

        Number <int &num>
        = number                      (. char str[100];
                                         LexString(str, sizeof(str) - 1);
                                         num = atoi(str); .) .

      END Calc.