Prac 24 Solution

Computer Science 3 - 2001

Programming Language Translation

Practical for Week 24, beginning 6 October 2003 - solutions

As usual, the sources of full solutions for these problems may be found on the course web page as the file PRAC24A.ZIP.

Task 3 - The extended calculator.

Most people had little difficulty with this, and used two "parallel" arrays, one of double values and one of boolean values, to solve the problem of not using variables before they had been given values. An alternative solution just to show how it might be done using an array of objects representing the encapsulated properties of each variable would be as follows:

  import Library.*;
  import java.util.*;

  COMPILER Calc  $CN
  /* Simple four function calculator with 26 memory cells - extended
     P.D. Terry, Rhodes University, 2003 */

    static class MemCell {
      public double value = 0.0;
      public boolean defined = false;
    }

    static MemCell[] mem = new MemCell[26];

  CHARACTERS
    digit      = "0123456789" .
    letter     = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" .

  IGNORE CHR(1) .. CHR(31)

  TOKENS
    Number     = digit { digit } [ "." { digit } ] .
    Variable   = letter .

  PRODUCTIONS
    Calc                               (. int index = 0; double value = 0.0;
                                          for (int i = 0; i < 26; i++) mem[i] = new MemCell(); .)
    = { (
            Variable                   (. index = token.str.charAt(0) - 'A'; .)
            "=" Expression<^value>     (. mem[index].value = value;
                                          mem[index].defined = true; .)
          |
            "print" Expression<^value> (. IO.writeLine(value); .)
         ) SYNC ";"
      } EOF .

    Expression<^double expVal>         (. double expVal1 = 0.0; .)
    = Term<^expVal>
      {   "+" Term<^expVal1>           (. expVal += expVal1; .)
        | "-" Term<^expVal1>           (. expVal -= expVal1; .)
      } .

    Term<^double termVal>              (. double termVal1 = 0.0; .)
    = Factor<^termVal>
      {   "*" Factor<^termVal1>        (. termVal *= termVal1; .)
        | "/" Factor<^termVal1>        (. if (termVal1 == 0.0) SemError("division by zero");
                                          else termVal /= termVal1; .)
      } .

    Factor<^double factVal>            (. factVal = 0.0; .)
    =   Number                         (. try {
                                            factVal = Float.parseFloat(token.str);
                                          } catch (NumberFormatException e) {
                                            factVal = 0; SemError("number out of range");
                                          } .)
      | Variable                       (. int index = token.str.charAt(0) - 'A';
                                          if (!mem[index].defined) SemError("undefined value");
                                          else factVal = mem[index].value; .)
      | "(" Expression<^factVal>
        ")" .

  END Calc.

This has altered the grammar to demand that a semicolon follow each statement so that it can be used as a synchronization point.

Task 4 - Playing with trains again

Some dreadfully complicated solutions were submitted. Try always to find an elegant solution. Here is one, using a single static Boolean field to handle the safety problem:

  import Library.*;

  COMPILER Trains $CN
  /* Grammar for railway trains with simple safety regulations
     P.D. Terry, Rhodes University, 2003 */

    static int trucks, locos, coaches;
    static boolean danger;

  IGNORE CASE
  IGNORE CHR(9) .. CHR(13)
  COMMENTS FROM "(*" TO "*)" NESTED

  PRODUCTIONS
    Trains = { OneTrain } EOF .

    OneTrain
    =                                    (. trucks = locos = coaches = 0;
                                            danger = false; .)
      LocoPart [ GoodsPart ] HumanPart
      SYNC "."                           (. IO.writeLine(locos + " locos " + trucks + " trucks "
                                             + coaches + " coaches"); .)
      .

    LocoPart
    = "loco"                             (. locos++; .)
      { "loco"                           (. locos++; .)
      } .

    GoodsPart
    = Truck                              (. if (danger) SemError("fuel truck may not follow loco"); .)
      { Truck } .

    HumanPart
    = "guard"                            (. trucks++; .)
      |                                  (. if (danger) SemError("fuel truck may not precede coach"); .)
        { "coach"                        (. coaches++; .)
        } "brake"                        (. coaches++; .) .

    Truck
    =                                    (. trucks++; .)
     (   ( "coal" | "open" | "cattle" )  (. danger = false; .)
       | "fuel"                          (. danger = true; .)
     ) .

  END Trains.

Several people were guided into using a set of state variables remembering the last kind of rolling stock parsed. Here is a solution on those lines, which also shows how we could use the Warning method to include the summary of the rolling stock in the listing file, together with a final comment on safety:

  import Library.*;

  COMPILER Trains $CN
  /* Grammar for railway trains with simple safety regulations
     P.D. Terry, Rhodes University, 2003 */

    static final int  // not that these as constants!
      safeTruck = 1,
      fuelTruck = 2,
      humans    = 3,
      loco      = 3;

    static boolean safe;
    static int trucks, locos, coaches, lastKind;

  IGNORE CASE
  IGNORE CHR(9) .. CHR(13)
  COMMENTS FROM "(*" TO "*)" NESTED

  PRODUCTIONS
    Trains = { OneTrain } EOF .

    OneTrain
    =                                    (. trucks = locos = coaches = 0;
                                            safe = true; .)
      LocoPart [ GoodsPart ] HumanPart
      SYNC "."                           (. IO.write(locos + " locos " + trucks + " trucks "
                                             + coaches + " coaches");
                                            Warning(locos + " locos " + trucks + " trucks "
                                             + coaches + " coaches");
                                            IO.write(" - safety regulations ");
                                            if (safe) IO.writeLine("obeyed");
                                            else IO.writeLine("contravened"); .)
      .

    LocoPart
    = "loco"                             (. locos++; lastKind = loco; .)
      { "loco"                           (. locos++; .)
      } .

    GoodsPart = Truck { Truck } .

    HumanPart
    = "guard"                            (. trucks++; .)
      |                                  (. if (lastKind == fuelTruck) {
                                              safe = false;
                                              SemError("fuel truck may not precede coach");
                                            }
                                            lastKind = humans; .)
        { "coach"                        (. coaches++; .)
        } "brake"                        (. coaches++; .) .

    Truck
    =                                    (. trucks++; .)
     (   ( "coal" | "open" | "cattle" )  (. lastKind = safeTruck; .)
       | "fuel"                          (. if (lastKind == loco) {
                                              safe = false;
                                              SemError("fuel truck may not follow loco");
                                            }
                                            lastKind = fuelTruck; .)
     ) .

  END Trains.

Finally, several people applied tests like

    GoodsPart                           (. if (t.kind == SYM.FUELSym)
                                             SemError("fuel truck may not follow loco"); .)
    = Truck { Truck } .

This "works", but I would not recommend it as a strategy. It relies on having used the $N directive (which is not mandatory) and on knowing how Coco/R generates symbolic names for tokens, and is thus a little opaque and dangerous.

Task 5 - The extended assembler

It will suffice merely to detail the alterations to the Assem.ATG file here. The full solution is in the solution kit if you need it.

We need a list of variable names, conveniently constructed as an ArrayList of String objects, and a way to add to and search this list. If we run the search function "from the bottom" it all fits together very easily - the location in which we find the name (or into which we deposit a new name) gives us exactly the offset we need with almost no effort:

  COMPILER Assem $NC
  /* Simple assembler for the PVM - Java version
     P.D. Terry, Rhodes University, 2003 */

    static ArrayList list = new ArrayList();

    public static int offset(String name) {
    // Enters name of variable into list if not already there, retrieves allocated offset
      int i = 0;
      while (i < list.size() && !name.equals((String)list.get(i))) i++;
      if (i >= list.size()) list.add(name);
      return i;
    }

We also need a way of replacing the escape characters in a string by their internal "magic" (ie ASCII) numbers. There were some strange ideas on how to do this, and some exceedingly complicated ones too. It is probably best handled with a simple loop, copying characters or special values into a string buffer one-by-one, as follows:

    static String unescape(String s) {
    /* replaces escape sequences in s by their ASCII values. */
      StringBuffer buf = new StringBuffer();
      int i = 0;
      while (i < s.length()) {
        if (s.charAt(i) == '\\')
          switch (s.charAt(i+1)) {
            case '\\': buf.append('\\');  i += 2; break;
            case '\'': buf.append('\'');  i += 2; break;
            case '\"': buf.append('\"');  i += 2; break;
            case 'r': buf.append('\r');   i += 2; break;
            case 'n': buf.append('\n');   i += 2; break;
            case 't': buf.append('\t');   i += 2; break;
            case 'b': buf.append('\b');   i += 2; break;
            case 'f': buf.append('\f');   i += 2; break;
            default:  buf.append(s.charAt(i+1)); i += 2; break;
          }
        else {
          buf.append(s.charAt(i));
          i++;
        }
      }
      return buf.toString();
    }

To get all this to work we also need to redefine the form of a stringLit token:

  CHARACTERS
    backSlash  = CHR(92) .
    control    = CHR(0) .. CHR(31) .
    printable  = ANY - control .
    stringCh   = printable - '"' - backSlash .

  TOKENS
    stringLit  = '"' { stringCh | backSlash printable } '"' .

What many people had overlooked was that the rules for escape sequences in C like languages stipulate that a sequence like \Z where Z is not one of the "specials" just stands for Z itself (yes, escape sequences are really kludgy things).

Once we have those support methods defined the rest is pretty trivial. Note that we cannot allow LDC and DSP to take identifier arguments - some people overlooked this.

  PRODUCTIONS

    OneWord
    = (   "ADD"    | "AND"   | "ANEW"  | "CEQ"   | "CGE"   | "CGT"   | "CLE"   | "CLT"
        | "CNE"    | "DIV"   | "HALT"  | "INPB"  | "INPI"  | "LDV"   | "LDXA"  | "MUL"
        | "NEG"    | "NOP"   | "NOT"   | "OR"    | "PRNB"  | "PRNI"  | "PRNL"  | "REM"
        | "LDL_0"  | "LDL_1" | "LDL_2" | "LDL_3" | "STL_0" | "STL_1" | "STL_2" | "STL_3"
        | "LDA_0"  | "LDA_1" | "LDA_2" | "LDA_3" | "LDC_0" | "LDC_1" | "LDC_2" | "LDC_3"
        | "LDC_M1" | "STO"   | "SUB"
      )                            (. CodeGen.oneWord(token.val); .) .

    TwoWord                        (. int value = 0;
                                      String name; .)
    =   ( "DSP" | "LDC" )          (. String mnemonic = token.val; .)
        SignedNumber<^value>       (. CodeGen.twoWord(mnemonic, value); .)
      |
        ( "LDA" | "LDL" | "STL" )  (. String mnemonic = token.val; .)
        (   SignedNumber<^value>
          | Ident<^name>           (. value = offset(name); .)
        )                          (. CodeGen.twoWord(mnemonic, value); .)  .

    StringConst<^String str>
    =  stringLit                   (. str = unescape(token.str.substring(1, token.str.length()-1)); .) .

Task 6 - The Parva Cross reference generator.

Once again, this is capable of a very simple elegant solution (hint: most Pat Terry problems admit to a simple elegant solution; the trick is to find it, so learn from watching the Expert in Action, and pick up the tricks for future reference).

All the references to identifiers in the Parva grammar are channelled through the Ident non-terminal, so all we have to do is add the actions here to add the identifier to the table, suitably tagged according to how it has been encountered,

This should suggest attributing the Ident production with a value parameter. The details follow of the other productions that refer to it. Note the careful way in which we atribute to ForStatement production, which has the option of incorporating a highly local control identifier.

  COMPILER Parva $CN
  /* Parva level 1.5 grammar (EBNF)
     P.D. Terry, Rhodes University, 2003
     Cross reference generator */

    static final boolean  // useful to give more meaningful names to these!
      declared = true,
      used     = false;

  PRODUCTIONS
    Parva                             (. Table.clearTable(); .)
    = "void" Ident<declared>
      "(" ")" Block                   (. Table.printTable(); .)
      .

    OneConst
    = Ident<declared> "=" Constant .

    OneVar
    = Ident<declared> [ "=" Expression ] .

    Designator
    = Ident<used> [ "[" Expression "]" ] .

    ForStatement                      (. boolean how = used; .)
    = "for" "("
      [ BasicType                     (. how = declared; .)
      ] Ident<how>
      "=" Expression WEAK ";"
      Condition ";" Ident<used>
      "=" Expression ")" Statement .

    Ident<boolean declared>
    = identifier                      (. Table.addRef(token.str, declared, token.line); .) .

  END Parva.

Of course we need to have a table handler. I slipped up a bit, suggesting that you might use a LineRef class with a single field. One of the tutors pointed out that in Java the word String denotes a "standard" class (unlike the usage in C#, with which I am more familiar. So we don't need the LineRef class, we can just write very simple code like the following. As in Task 5, running the search loop from the bottom makes for a very simple addRef method:

  // Handle cross reference table for Parva
  // P.D. Terry, Rhodes University, 2003

  package Parva;

  import java.util.*;
  import Library.*;

    class Entry {                      // Cross reference table entries
      public String name;              // The identifier itself
      public ArrayList refs;           // Line numbers where it appears
      public Entry(String name) {
        this.name = name;
        this.refs = new ArrayList();
      }
    }

    class Table {
      static ArrayList list = new ArrayList();

      public static void clearTable() {
      // Clears cross-reference table
        list = new ArrayList();
      }

      public static void addRef(String name, boolean declared, int lineRef) {
      // Enters name if not already there, adds another line reference (negative if at
      // a declaration point in the original source program
        int i = 0;
        while (i < list.size() && !name.equals(((Entry)list.get(i)).name)) i++;
        if (i >= list.size()) list.add(new Entry(name));
        ((Entry)list.get(i)).refs.add(new Integer(declared ? -lineRef : lineRef));
      }

      public static void printTable() {
      // Prints out all references in the table
        for (int i = 0; i < list.size(); i++) {
          Entry e = (Entry)list.get(i);
          IO.write(e.name, -16);                     // left justify in 16 spaces
          for (int j = 0; j < e.refs.size(); j++)
            IO.write((Integer)e.refs.get(j), 4);     // left justify in 4 spaces
          IO.writeLine();
        }
      }

    }

The printTable method above suffers from a possible disadvantage in that multiple occurrences of an identifier on one line, as in

i = i + (i - list[i] * i);

create unnecessary duplicate entries. These could be eliminated in various ways; the simplest might be to do so at the output stage, rather than when they are added by the addRef method. Please yourself; here is my suggestion.

      public static void printTable() {
      // Prints out all references in the table (eliminate duplicates line numbers)
        for (int i = 0; i < list.size(); i++) {
          Entry e = (Entry)list.get(i);
          IO.write(e.name, -16);                     // left justify in 16 spaces
          int last = 0;                              // impossible line number
          for (int j = 0; j < e.refs.size(); j++) {
            int line = ((Integer)e.refs.get(j)).intValue();
            if (line != last) {
              IO.write(line, 4);                     // left justify in 4 spaces
              last = line;                           // remember we have printed this line
            }
          }
          IO.writeLine();
        }
      }

Several solutions revealed that people either had not thought that far or were confused about the point of the declared argument to the addRef method. In particular, they might not have realized that an identifier may legally be declared several times, in different scopes, as in the following example:

  void main () {
    int i = 1;
    while (i < 10) {
      int k = 1;              // first declaration
      while (k < i) {
        write(k); k = k - 1;
       }
      i = i + 1;
    }
    int k = 123;              // second declaration
    write(k);
  }