Computer Science 3 - 2000

Programming Language Translation


Practical for Week 24, beginning 9 October 2000 - Solutions

As usual, complete source code for these solutions is available in the files PRAC24CA.ZIP (C++ version) or PRAC24PA.ZIP (Pascal version).


Task 3 - Build a Boolean Calculator

Most people got most of this correct. There were some hideous hacks to the table handler, however, and some people had clearly not bothered to come to terms with it at all. Some people added the names to the table before they had evaluated the corresponding values, and another point frequently missed was that factors of the form x'''' needed to have the Boolean negation operation applied for every quote encountered.

  COMPILER Bool $XCN
  /* Boolean calculator
     P.D. Terry, Rhodes University, 2000 */

  #include "miscbool.h"

  IGNORE CHR(1) .. CHR(31)

  CHARACTERS
    letter = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" .
  TOKENS
    identifier = letter { letter } .
  PRODUCTIONS
    Bool
    =                                     (. TABLE_Last = 0; .)
    { Print | Assignment } "QUIT" .

    Print = "PRINT" OneExpr { "," OneExpr } SYNC ";" .

    OneExpr
    =                                     (. bool Value; .)
      Expression<Value>                   (. if (Value) printf("true\n");
                                             else printf("false\n"); .) .

    Assignment
    =                                     (. bool Value; char Name[25]; .)
      identifier                          (. LexString(Name, sizeof(Name) - 1); .)
      "=" Expression<Value>               (. TABLE_Store(Name, Value); .)
      SYNC ";" .

    Expression<bool &E>
    =                                     (. bool T; .)
      Term<E>
      { Or Term<T>                        (. E = E || T; .)
      } .

    Term<bool &T>
    =                                     (. bool F; .)
      Factor<T>
      { [ And ] Factor<F>                 (. T = T && F; .)
      } .

    Factor<bool &F>
    =   Not Factor<F>                     (. F = !F; .)
      | Primary<F>
        { "'"                             (. F = !F; .)  } .

    Primary<bool &P>
    =                                     (. char Name[25]; bool defined; .)
      identifier                          (. LexString(Name, sizeof(Name) - 1);
                                             TABLE_Retrieve(Name, P, defined);
                                             if (!defined) SemError(200); .)
      | True                              (. P = true; .)
      | False                             (. P = false; .)
      | "(" Expression<P> ")" .

    And   = "AND" | "&" | "." .
    Or    = "OR" | "|" | "+" .
    Not   = "NOT" | "!" .
    True  = "TRUE" | "1" .
    False = "FALSE" | "0" .
  END Bool.

The relevant parts of a simple table handler are as follows:

  typedef struct {
    char Name[25];
    bool Value;
  } TABLE_ENTRIES;

  const int TABLE_MAX = 100;
  static TABLE_ENTRIES TABLE_Memory[TABLE_MAX + 1];
  static int TABLE_Last = 0;

  static void TABLE_Retrieve(char *N, bool &V, bool &defined) {
  // Look up N in Memory and retrieve corresponding Boolean value V if defined
    strcpy(TABLE_Memory[0].Name, N);             // sentinel
    int i = TABLE_Last;                          // start at top end
    while (strcmp(N, TABLE_Memory[i].Name)) i--; // loop must terminate
    if (i > 0) {                                 // we really found it
      V = TABLE_Memory[i].Value; defined = true; // so can retrieve V properly
    }
    else { V = false; defined = false; }         // return error flags
  }

  static void TABLE_Store(char *N, bool V) {
  // Store N in memory with new value V (may be upating old value)
    strcpy(TABLE_Memory[TABLE_Last+1].Name, N);  // sentinel
    int i = 1;                                   // start at bottom end
    while (strcmp(N, TABLE_Memory[i].Name)) i++; // loop must terminate
    TABLE_Memory[i].Value = V;                   // store the corresponding V
    if (i > TABLE_Last) {                        // then it's a new entry
      TABLE_Last++;                              // one more entry
      if (TABLE_Last == TABLE_MAX) {             // that's disastrous
        printf("Memory Overflow"); exit(1);
      }
    }
  }


Task 5 - More power to the people

The extensions to Clang may be summarized as below. There were some peculiar ideas about the FOR loop. Many solutions would have allowed some very odd loops indeed, such as

        FOR I++ TO 10++ DO Write(I)

  PRODUCTIONS
    Statement         = SYNC [   CompoundStatement | Assignment
                               | IfStatement       | WhileStatement
                               | RepeatStatement   | ForStatement
                               | ReadStatement     | WriteStatement ] .
    Assignment        = Variable ( ":=" Expression | "++" | "--" ) SYNC .
    IfStatement       = "IF" Condition "THEN" Statement [ "ELSE" Statement ] .
    RepeatStatement   = "REPEAT" Statement { WEAK ";" Statement } "UNTIL" Condition .
    ForStatement      = "FOR" identifier ":=" Expression
                        ( "TO" | "DOWNTO" ) Expression "DO" Statement .
    MulOp             = "*" | "/" | "&&" | "MOD" .


Task 7 - A million lemmings are never satisfied

The corresponding changes to the Topsy grammar are:

  PRODUCTIONS
    Statement         =    Block            | Assignment
                         | IfStatement      | WhileStatement
                         | DoWhileStatement | ForStatement
                         | ReadStatement    | WriteStatement
                         | ReturnStatement  | EmptyStatement .
    Assignment        = Variable ( "=" Expression | "++" | "--" ) ";" .
    IfStatement       = "if" "(" Condition ")" Statement [ "else" Statement ] .
    DoWhileStatement  = "do" Statement "while" "(" Condition ")" ";" .
    ForStatement      = "for" "(" identifier "=" Expression ";" Condition ";"
                        identifier ( "++" | "--" ) ")" Statement .
    MulOp             = "*" | "/" | "&&" | "%" .


Task 8 - Feed the lemmings

A mixed bag of solutions was received. Several people had obviously worked very hard at this one, but inevitably some of the finer points were missed. You had been warned that

but notwithstanding this many people's solutions were badly incorrect for all of those situations. Another point worth mentioning is that several solutions did not balance the Indent and Exdent operations, so that the output code wandered around the page!

A complete detailed solution follows, which is worth studying carefully.

  COMPILER ClTop $XCN
  /* CLANG level 1 to Topsy Level 1 converter
     P.D. Terry, Rhodes University, 2000 */

  #include "miscpret.h"

  IGNORE CASE
  IGNORE CHR(9) .. CHR(13)

  CHARACTERS
    cr         = CHR(13) .
    lf         = CHR(10) .
    letter     = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" .
    digit      = "0123456789" .
    instring   = ANY - "'" - cr - lf .

  COMMENTS FROM "(*" TO "*)"

  TOKENS
    identifier = letter { letter | digit } .
    number     = digit { digit } .
    string     = "'" (instring | "''") { instring | "''" } "'" .

  PRODUCTIONS

  /* Note that the program name is copied to the .TOP file as a comment only */

    ClTop
    =  "PROGRAM"                   (. Append("void main(void) // "); .)
       Identifier
       WEAK ";"
       Block
       "."                         (. NewLine(); .) .

  /* Rather than Indent() here you might prefer IndentNextLine() */

    Block
    =                              (. NewLine(); Append("{ "); Indent(); .)
       SYNC { ( ConstDeclarations | VarDeclarations ) SYNC }
       CompoundStatement           (. ExdentNextLine(); Append("}"); .) .

    ConstDeclarations
    =  "CONST" OneConst { OneConst } .

  /* each constant declaration requires its own copy of the const modifier in Topsy */

    OneConst
    =                              (. Append("const "); .)
       Identifier WEAK "="         (. Append(" = ") .)
       Number ";"                  (. Append(";"); NewLine(); .) .

  /* variables are all of "int" type in Clang */

    VarDeclarations
    =  "VAR"                       (. Append("int "); .)
       OneVar
       { WEAK ","                  (. Append(", ") .)
         OneVar
       }
       ";"                         (. Append(";"); .) .

    OneVar
    =  Identifier [ UpperBound ] .

  /* Strictly we should change the value of the Number to "Number - 1".  This
     is left as an exercise for the keen reader */

    UpperBound
    =  "["                         (. Append("[") .)
       Number
       "]"                         (. Append("]") .) .

  /* Note that we suppress the interstatement semicolons and add them to the end of
     the relevant statements instead.
     Rather than Indent() here you might prefer IndentNextLine() */

    CompoundStatement
    =  "BEGIN"                     (. NewLine(); Append("{ "); Indent(); .)
          Statement
          { WEAK ";"               (. NewLine(); .)
            Statement }
       "END"                       (. ExdentNextLine(); Append("}") .) .

    Statement
    =  SYNC [   CompoundStatement | Assignment
              | IfStatement       | WhileStatement
              | RepeatStatement   | ForStatement
              | ReadStatement     | WriteStatement ] .

  /* Note the change of assignment operator */

    Assignment
    =  Variable
       (
         ":="                      (. Append(" = ") .)
         Expression SYNC
        | "++"                     (. Append("++") .)
        | "--"                     (. Append("--") .)
       )                           (. Append(";"); .)
       .

    Variable
    =  Designator .

    Designator
    =  Identifier
       [  "["                      (. Append("[") .)
          Expression
          "]"                      (. Append("]") .)
       ] .

  /* Note the addition of parentheses around the condition */

    IfStatement
    =  "IF"                        (. Append("if (") .)
          Condition                (. Append(") "); .)
       "THEN"
          Statement
       [ "ELSE"                    (. NewLine(); Append("else ") .)
           Statement
       ]
       .

  /* Note the addition of parentheses around the condition */

    WhileStatement
    =  "WHILE"                     (. Append("while (") .)
          Condition
       "DO"                        (. Append(") ") .)
          Statement .

  /* Note the addition of parentheses around the condition, and that it has to
     be negated. Another way of doing this is to parameterize the Condition and
     RelOp production and get them to invert the relational operator itself when
     parsing RepeatStatements.  Doing so is left as another exercise */

    RepeatStatement
    =  "REPEAT"                    (. Append("do {"); IndentNextLine(); .)
          Statement
          { WEAK ";"               (. NewLine(); .)
            Statement }
       "UNTIL"                     (. ExdentNextLine(); Append("} while ( !(") .)
       Condition                   (. Append(") );") .)
       .

  /* For statements are tricky.  We have to retrieve the spelling of the control
     variable and use it three times.  Note also that the synthesized condition
     has a sign thet depends on whether the loop is counting up or down, and that
     the synthesized increment/decrement clause also depends on this.  Most people
     seemed to overlook this */

    ForStatement
    =                              (. char str[100]; .)
       "FOR"                       (. Append("for ("); .)
       identifier                  (. LexString(str, 100); Append(str); .)
       ":="                        (. Append(" = "); .)
       Expression                  (. Append("; "); .)
       (   "TO"                    (. Append(str); Append(" <= "); .)
           Expression "DO"         (. Append("; ");
                                      Append(str); Append("++ ) "); .)
         | "DOWNTO"                (. Append(str); Append(" >= "); .)
           Expression "DO"         (. Append("; ");
                                      Append(str); Append("-- ) "); .)
       )
       Statement .

    ReadStatement
    =  "READ" "("                  (. Append("cin >> ") .)
        Variable
        { ","                      (. Append(", ") .)
          Variable }
       ")"                         (. Append(";") .) .

  /* Write statements are a little tricky.  There are two possible ways of doing
     them reliably.  A Topsy statement like     cout << ;
     must be avoided.  So either we append the \n, or we omit the cout
     altogether if the original Clang statement was simply "Write" with no list.
     Very few people noticed this, in spite of trying to draw attention to it
  */

    WriteStatement
    =  "WRITE"
       [ "("                       (. Append("cout << ") .)
         WriteElement
         { WEAK ","                (. Append(" << ") .)
         WriteElement }
         ")"                       (. Append(";") .)
       ] .

  /* Here is the other way.  The kinky calls to append are because of the way the
     \ meta character is used in Cocol itself

    WriteStatement
    =  "WRITE"                     (. Append("cout << ") .)
       [ "("
         WriteElement
         { WEAK ","                (. Append(" << ") .)
         WriteElement }
         ")"
       ]                           (. Append(" << \"\\"); Append("n\"; "); .)
       .
  */

    WriteElement
    =  String | Expression .

    Condition
    =  Expression RelOp Expression .

    Expression
    =  (   "+"                     (. Append("+") .)
          Term
        | "-"                      (. Append("-") .)
          Term
        | Term
       ) { AddOp Term } .

    Term
    =  Factor { MulOp Factor } .

    Factor
    =   Designator
      | Number
      | "("                        (. Append("(") .)
        Expression
        ")"                        (. Append(")") .) .

    AddOp
    =  "+"                         (. Append(" + ") .)
     | "-"                         (. Append(" - ") .) .

    MulOp
    =  "*"                         (. Append(" * ") .)
     | "/"                         (. Append(" / ") .)
     | "MOD"                       (. Append(" % ") .) .

  /* Note the change of equality comparison operator */

    RelOp
    =  "="                         (. Append(" == ") .)
     | "<>"                        (. Append(" <> ") .)
     | "<"                         (. Append(" < ") .)
     | "<="                        (. Append(" <= ") .)
     | ">"                         (. Append(" > ") .)
     | ">="                        (. Append(" >= ") .) .

    Identifier
    =                              (. char str[100]; .)
       identifier                  (. LexString(str, 100); Append(str); .) .

    Number
    =                              (. char str[100]; .)
       number                      (. LexString(str, 100); Append(str); .) .

  /* String conversion is actually very tricky, and nobody got this correct.
     Clang strings are demarcated with 'single quotes' and may contain internal
     escaped '' sequences.  The Topsy strings are demarcated by "double quotes"
     and the \ ' and " are represented internally by \\ \' and \" sequences.  Any
     Clang string with internal \ characters might just cause trouble for a real
     Topsy compiler, of course.

     A few people converted the initial and final quotes, but I don't recall
     anyone doing the full transformation, or even realising that it was needed.

     The code below manipulated the Clang string character by character to form
     the corresponding Topsy string as best it can.
  */

    String
    =                              (. char s[200], str[200]; int i, j; .)
       string                      (. LexString(str, 100);
                                      int l = strlen(str) - 1;
                                      s[0] = '\"'; i = 1; j = 1; str[l] = '\"';
                                      while (i < l) {
                                        if (str[i] == '\'' && str[i+1] == '\'') str[i] = 92;
                                        else if (str[i] == '"') { s[j] = 92; j++; }
                                        else if (str[i] == 92) { s[j] = 92; j++; }
                                        s[j] = str[i]; i++; j++;
                                      }
                                      s[j] = '\"';
                                      s[j+1] = '\0';
                                      Append(s);
                                   .) .

  END ClTop.

The string handling is (of course) different in the Pascal version, in several respects:

    String
                                   (. VAR Str, S : STRING; I, J, L : INTEGER; .)
    =  string                      (. LexString(Str);
                                      L := Length(Str);
                                      S[1] := '"'; I := 2; J := 2; Str[L] := '"';
                                      WHILE (I < L) DO BEGIN
                                        IF (Str[I] = '''') AND (Str[I + 1] = '''')
                                          THEN Str[I] := '\'
                                          ELSE IF (Str[I] = '"') OR (Str[I] = '\') THEN
                                            BEGIN S[J] := '\'; J := J + 1 END;
                                        S[J] := Str[I]; I := I + 1; J := J + 1;
                                      END;
                                      S[J] := '"';
                                      S[0] := CHR(J);
                                      Append(s);
                                   .) .