Computer Science 3 - 2001

Programming Language Translation


Practical for Week 19, beginning 27 August 2001 - Solutions

Complete solutions (sources) can be found on the course WWW pages in the file PRAC19A.ZIP.

The grammar for the train caused few problems. A few submissions used strings instead of the simpler enumeration. Here is my solution:

  COMPILER Train $CNX
  /* Grammar for railway trains with simple safety regulations
     P.D. Terry, Rhodes University, 2001 */

  #include "misc.h"

  enum KINDS {SafeTruck, FuelTruck, Loco};
  bool Safe;
  int Trucks, Locos, Coaches;
  KINDS LastKind;

  IGNORE CASE
  IGNORE CHR(9) .. CHR(13)
  COMMENTS FROM "(*" TO "*)" NESTED

  PRODUCTIONS
    Train = { OneTrain } EOF .

    OneTrain
    =                                 (. Trucks = Locos = Coaches = 0;
                                         Safe = TRUE; .)
      LocoPart GoodsPart HumanPart    (. printf("%d locos %d trucks %d coaches -", Locos, Trucks, Coaches);
                                         printf(" safety regulations ");
                                         if (Safe) printf("obeyed\n");
                                         else printf("contravened\n"); .)
      SYNC "." .

    LocoPart
    = "loco"                          (. Locos++; LastKind = Loco; .)
      { "loco"                        (. Locos++; .)
      } .

    GoodsPart = Truck { Truck } .

    HumanPart
    = "guard"                         (. Trucks++; .)
      |                               (. if (LastKind == FuelTruck) {
                                           Safe = FALSE;
                                           SemError(1001);
                                         } .)
        { "coach"                     (. Coaches++; .)
        } "brake"                     (. Coaches++; .) .

    Truck
    =                                 (. Trucks++ .)
       ( "coal" | "open" | "cattle" ) (. LastKind = SafeTruck; .)
     | ( "fuel" | "petrol" )          (. if (LastKind == Loco) {
                                           Safe = FALSE;
                                           SemError(1000);
                                         }
                                         LastKind = FuelTruck; .) .
  END Train.

A feature of solutions submitted this week that I found odd was a tendency towards code like

     int a = 0, b = 0, c = 0;
     /* a whole lot of intervening junk that does not affect a, b, c */
     while (somecondition) {
       ManipulateAfresh(a, b, c);
       a = 0; b = 0; c = 0;
     }

which I would have thought was much better expressed thus:

     while (somecondition) {
       int a = 0, b = 0, c = 0;
       ManipulateAfresh(a, b, c);
     }

The expression parsers could have been a lot better. Here is a simple solution, incorporating a simple memory and some error checking. Most people did not get the "power" operation correct. x0 produces the value 1, not x, and x to a negative power cannot be evaluated by repeated multiplication, so must be guarded against. Note that the basic grammar has been altered slightly to allow for blank lines in the input.

  COMPILER ExpLst $XNC

  #include "misc.h"

  int mem[26]; // 26 cell memory, named 'A' .. 'Z'

  IGNORE CASE
  IGNORE CHR(1) .. CHR(12)

  CHARACTERS
    digit      = "0123456789" .
    letter     = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" .
    eol        = CHR(13) .

  TOKENS
    number     = digit { digit } .
    identifier = letter .
    EOL        = eol .

  PRODUCTIONS
    ExpLst
    =                                   (. int Value; char str[100];
                                           for (int i = 0; i < 36; i++)
                                             mem[i] = 0;
                                        .)
      { [ identifier                    (. LexString(str, sizeof(str) - 1); .)
          "=" Expression<Value>         (. printf("%d\n", Value);
                                           mem[toupper(str[0]) - 'A'] = Value; .)
        ] SYNC EOL
      } EOF .

    Expression<int &expValue>
    =                                   (. int tVal; .)
       Term<expValue>
       {   "+" Term<tVal>               (. expValue += tVal; .)
         | "-" Term<tVal>               (. expValue -= tVal; .)
       } .

    Term<int &termValue>
    =                                   (. int fVal; .)
       Factor<termValue>
       {   "*" Factor<fVal>             (. termValue *= fVal; .)
         | "/" Factor<fVal>             (. if (fVal != 0) termValue /= fVal;
                                            else SemError(1000); .)
       } .

    Factor<int &factValue>
    =                                   (. int exponentValue, i, power; .)
       Primary<factValue>
       [ "^" Factor<exponentValue>      (. power = 1;
                                           if (exponentValue > 0)
                                             for (i = 1; i <= exponentValue; i++)
                                               power *= factValue;
                                           else SemError(1001);
                                           factValue = power; .)
       ] .


    Primary<int &primaryValue>
    =                                   (. char str[100] .)
        identifier                      (. LexString(str, sizeof(str) - 1);
                                           primaryValue = mem[toupper(str[0]) - 'A']; .)
      | number                          (. LexString(str, sizeof(str) - 1);
                                           primaryValue = atoi(str); .)
      | "(" Expression<primaryValue> ")"
    .

  END ExpLst.

Some submissions tried to use the pow function from math.h to do the power operation. Now this function is unpredictable for xy if x <= 0, and is strictly defined for real (float, double) parameters not integers. Not as clever an idea as it might have seemed, without further error checking.

At first it might appear that all this is not a problem, since the productions

          Factor  = Primary [ "^" Factor ] .
          Primary = number | "(" Expression")" .

do not seem to allow a factor to be raised to an element that begins with a minus sign. However, the value of a bracketed expression might be negative, as for example:

                3 ^ (4 - 123)

Notwithstanding all this, here is how one might have used pow:

    Factor<int &factValue>
    =                                   (. int exponentValue, i, power; .)
       Primary<factValue>
       [ "^" Factor<exponentValue>      (. float a = factValue, b = exponentValue;
                                           if (a != 0.0) a = pow(a, b);
                                           else SemError(1001);
                                           factValue = a; .)
       ] .

Finally, some people incorporated checks that memory cells were "defined" before their values were used in subsequent expressions. Not all got it correct. Here is one possibility:

  int mem[26];       // 26 cell memory, named 'A' .. 'Z'
  bool defined[26];  // state of memory

  PRODUCTIONS
    ExpLst
    =                                   (. int Value; char str[100];
                                           for (int i = 0; i < 36; i++) {
                                             mem[i] = 0; defined[i] = false;
                                           }
                                        .)
      { [ identifier                    (. LexString(str, sizeof(str) - 1); .)
          "=" Expression<Value>         (. printf("%d\n", Value);
                                           mem[toupper(str[0]) - 'A'] = Value;
                                           defined[toupper(str[0]) - 'A'] = true; .)
        ] SYNC EOL
      } EOF .

...

    Primary<int &primaryValue>
    =                                   (. char str[100] .)
        identifier                      (. LexString(str, sizeof(str) - 1);
                                           primaryValue = mem[toupper(str[0]) - 'A'];
                                           if (!defined[toupper(str[0]) - 'A'])
                                             SemError(1002); .)
      | number                          (. LexString(str, sizeof(str) - 1);
                                           primaryValue = atoi(str); .)
      | "(" Expression<primaryValue> ")"
    .

The CaseStatement parsing was fairly well done. There were several subtleties that some people missed. It is important to introduce a new LabelList for each case statement, as they may be nested - one cannot simply use one global list. Secondly, the list belongs to the CaseStatement parser, not to the Range parser, and must be passed down as a parameter. Thirdly, many people missed the significance of the optional negative sign in IntConst (or had the most unbelievably complicated way of handling it!).

    CaseStatement
    =                                       (. LabelList L; .)
      "CASE" Expression "OF"
      OneCase<L> { "|" OneCase<L> }
      [ "DEFAULT" ":" StatementSequence ]
      "END" .

    OneCase<LabelList &L>
    = [ Range<L> { "," Range<L> }
        ":" StatementSequence ] .

    Range<LabelList &L>
    =                                       (. int low; .)
      IntConst<low>                         (. int high = low; .)
      [ ".." IntConst<high> ]               (. if (high < low) SemError(1000);
                                               else for (int x = low; x <= high; x++) {
                                                 if (L.isMember(x)) SemError(1001);
                                                 else L.add(x);
                                               }
                                            .) .
    IntConst<int &i>
    =                                       (. int sign = 1;
                                               char str[100]; .)
    [ "+" | "-"                             (. sign = -1 .)
    ] number                                (. LexString(str, 100);
                                               i = sign * atoi(str);
                                            .) .

The solution above treats ranges exemplified by 10 .. 1 (ie running "backwards") as erroneous. It is perfectly easy to permit them, simply by coding the Range actions as

    Range<LabelList &L>
    =                                       (. int low; .)
      IntConst<low>                         (. int high = low; .)
      [ ".." IntConst<high> ]               (. if (high > low)
                                                 for (int x = high; x >= low; x--) {
                                                   if (L.isMember(x)) SemError(1001);
                                                   else L.add(x);
                                                 }
                                                 else for (int x = low; x <= high; x++) {
                                                   if (L.isMember(x)) SemError(1001);
                                                   else L.add(x);
                                                 }
                                            .) .


Home  © P.D. Terry