As usual, complete source code for these solutions is available in the files PRAC24CA.ZIP (C++ version) or PRAC24PA.ZIP (Pascal version).
Most people got most of this correct. There were some hideous hacks to the table handler, however, and some people had clearly not bothered to come to terms with it at all. Some people added the names to the table before they had evaluated the corresponding values, and another point frequently missed was that factors of the form x'''' needed to have the Boolean negation operation applied for every quote encountered.
COMPILER Bool $XCN /* Boolean calculator P.D. Terry, Rhodes University, 2000 */ #include "miscbool.h" IGNORE CHR(1) .. CHR(31) CHARACTERS letter = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" . TOKENS identifier = letter { letter } . PRODUCTIONS Bool = (. TABLE_Last = 0; .) { Print | Assignment } "QUIT" . Print = "PRINT" OneExpr { "," OneExpr } SYNC ";" . OneExpr = (. bool Value; .) Expression<Value> (. if (Value) printf("true\n"); else printf("false\n"); .) . Assignment = (. bool Value; char Name[25]; .) identifier (. LexString(Name, sizeof(Name) - 1); .) "=" Expression<Value> (. TABLE_Store(Name, Value); .) SYNC ";" . Expression<bool &E> = (. bool T; .) Term<E> { Or Term<T> (. E = E || T; .) } . Term<bool &T> = (. bool F; .) Factor<T> { [ And ] Factor<F> (. T = T && F; .) } . Factor<bool &F> = Not Factor<F> (. F = !F; .) | Primary<F> { "'" (. F = !F; .) } . Primary<bool &P> = (. char Name[25]; bool defined; .) identifier (. LexString(Name, sizeof(Name) - 1); TABLE_Retrieve(Name, P, defined); if (!defined) SemError(200); .) | True (. P = true; .) | False (. P = false; .) | "(" Expression<P> ")" . And = "AND" | "&" | "." . Or = "OR" | "|" | "+" . Not = "NOT" | "!" . True = "TRUE" | "1" . False = "FALSE" | "0" . END Bool.
The relevant parts of a simple table handler are as follows:
typedef struct { char Name[25]; bool Value; } TABLE_ENTRIES; const int TABLE_MAX = 100; static TABLE_ENTRIES TABLE_Memory[TABLE_MAX + 1]; static int TABLE_Last = 0; static void TABLE_Retrieve(char *N, bool &V, bool &defined) { // Look up N in Memory and retrieve corresponding Boolean value V if defined strcpy(TABLE_Memory[0].Name, N); // sentinel int i = TABLE_Last; // start at top end while (strcmp(N, TABLE_Memory[i].Name)) i--; // loop must terminate if (i > 0) { // we really found it V = TABLE_Memory[i].Value; defined = true; // so can retrieve V properly } else { V = false; defined = false; } // return error flags } static void TABLE_Store(char *N, bool V) { // Store N in memory with new value V (may be upating old value) strcpy(TABLE_Memory[TABLE_Last+1].Name, N); // sentinel int i = 1; // start at bottom end while (strcmp(N, TABLE_Memory[i].Name)) i++; // loop must terminate TABLE_Memory[i].Value = V; // store the corresponding V if (i > TABLE_Last) { // then it's a new entry TABLE_Last++; // one more entry if (TABLE_Last == TABLE_MAX) { // that's disastrous printf("Memory Overflow"); exit(1); } } }
The extensions to Clang may be summarized as below. There were some peculiar ideas about the FOR loop. Many solutions would have allowed some very odd loops indeed, such as
FOR I++ TO 10++ DO Write(I) PRODUCTIONS Statement = SYNC [ CompoundStatement | Assignment | IfStatement | WhileStatement | RepeatStatement | ForStatement | ReadStatement | WriteStatement ] . Assignment = Variable ( ":=" Expression | "++" | "--" ) SYNC . IfStatement = "IF" Condition "THEN" Statement [ "ELSE" Statement ] . RepeatStatement = "REPEAT" Statement { WEAK ";" Statement } "UNTIL" Condition . ForStatement = "FOR" identifier ":=" Expression ( "TO" | "DOWNTO" ) Expression "DO" Statement . MulOp = "*" | "/" | "&&" | "MOD" .
The corresponding changes to the Topsy grammar are:
PRODUCTIONS Statement = Block | Assignment | IfStatement | WhileStatement | DoWhileStatement | ForStatement | ReadStatement | WriteStatement | ReturnStatement | EmptyStatement . Assignment = Variable ( "=" Expression | "++" | "--" ) ";" . IfStatement = "if" "(" Condition ")" Statement [ "else" Statement ] . DoWhileStatement = "do" Statement "while" "(" Condition ")" ";" . ForStatement = "for" "(" identifier "=" Expression ";" Condition ";" identifier ( "++" | "--" ) ")" Statement . MulOp = "*" | "/" | "&&" | "%" .
A mixed bag of solutions was received. Several people had obviously worked very hard at this one, but inevitably some of the finer points were missed. You had been warned that
but notwithstanding this many people's solutions were badly incorrect for all of those situations. Another point worth mentioning is that several solutions did not balance the Indent and Exdent operations, so that the output code wandered around the page!
A complete detailed solution follows, which is worth studying carefully.
COMPILER ClTop $XCN /* CLANG level 1 to Topsy Level 1 converter P.D. Terry, Rhodes University, 2000 */ #include "miscpret.h" IGNORE CASE IGNORE CHR(9) .. CHR(13) CHARACTERS cr = CHR(13) . lf = CHR(10) . letter = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" . digit = "0123456789" . instring = ANY - "'" - cr - lf . COMMENTS FROM "(*" TO "*)" TOKENS identifier = letter { letter | digit } . number = digit { digit } . string = "'" (instring | "''") { instring | "''" } "'" . PRODUCTIONS /* Note that the program name is copied to the .TOP file as a comment only */ ClTop = "PROGRAM" (. Append("void main(void) // "); .) Identifier WEAK ";" Block "." (. NewLine(); .) . /* Rather than Indent() here you might prefer IndentNextLine() */ Block = (. NewLine(); Append("{ "); Indent(); .) SYNC { ( ConstDeclarations | VarDeclarations ) SYNC } CompoundStatement (. ExdentNextLine(); Append("}"); .) . ConstDeclarations = "CONST" OneConst { OneConst } . /* each constant declaration requires its own copy of the const modifier in Topsy */ OneConst = (. Append("const "); .) Identifier WEAK "=" (. Append(" = ") .) Number ";" (. Append(";"); NewLine(); .) . /* variables are all of "int" type in Clang */ VarDeclarations = "VAR" (. Append("int "); .) OneVar { WEAK "," (. Append(", ") .) OneVar } ";" (. Append(";"); .) . OneVar = Identifier [ UpperBound ] . /* Strictly we should change the value of the Number to "Number - 1". This is left as an exercise for the keen reader */ UpperBound = "[" (. Append("[") .) Number "]" (. Append("]") .) . /* Note that we suppress the interstatement semicolons and add them to the end of the relevant statements instead. Rather than Indent() here you might prefer IndentNextLine() */ CompoundStatement = "BEGIN" (. NewLine(); Append("{ "); Indent(); .) Statement { WEAK ";" (. NewLine(); .) Statement } "END" (. ExdentNextLine(); Append("}") .) . Statement = SYNC [ CompoundStatement | Assignment | IfStatement | WhileStatement | RepeatStatement | ForStatement | ReadStatement | WriteStatement ] . /* Note the change of assignment operator */ Assignment = Variable ( ":=" (. Append(" = ") .) Expression SYNC | "++" (. Append("++") .) | "--" (. Append("--") .) ) (. Append(";"); .) . Variable = Designator . Designator = Identifier [ "[" (. Append("[") .) Expression "]" (. Append("]") .) ] . /* Note the addition of parentheses around the condition */ IfStatement = "IF" (. Append("if (") .) Condition (. Append(") "); .) "THEN" Statement [ "ELSE" (. NewLine(); Append("else ") .) Statement ] . /* Note the addition of parentheses around the condition */ WhileStatement = "WHILE" (. Append("while (") .) Condition "DO" (. Append(") ") .) Statement . /* Note the addition of parentheses around the condition, and that it has to be negated. Another way of doing this is to parameterize the Condition and RelOp production and get them to invert the relational operator itself when parsing RepeatStatements. Doing so is left as another exercise */ RepeatStatement = "REPEAT" (. Append("do {"); IndentNextLine(); .) Statement { WEAK ";" (. NewLine(); .) Statement } "UNTIL" (. ExdentNextLine(); Append("} while ( !(") .) Condition (. Append(") );") .) . /* For statements are tricky. We have to retrieve the spelling of the control variable and use it three times. Note also that the synthesized condition has a sign thet depends on whether the loop is counting up or down, and that the synthesized increment/decrement clause also depends on this. Most people seemed to overlook this */ ForStatement = (. char str[100]; .) "FOR" (. Append("for ("); .) identifier (. LexString(str, 100); Append(str); .) ":=" (. Append(" = "); .) Expression (. Append("; "); .) ( "TO" (. Append(str); Append(" <= "); .) Expression "DO" (. Append("; "); Append(str); Append("++ ) "); .) | "DOWNTO" (. Append(str); Append(" >= "); .) Expression "DO" (. Append("; "); Append(str); Append("-- ) "); .) ) Statement . ReadStatement = "READ" "(" (. Append("cin >> ") .) Variable { "," (. Append(", ") .) Variable } ")" (. Append(";") .) . /* Write statements are a little tricky. There are two possible ways of doing them reliably. A Topsy statement like cout << ; must be avoided. So either we append the \n, or we omit the cout altogether if the original Clang statement was simply "Write" with no list. Very few people noticed this, in spite of trying to draw attention to it */ WriteStatement = "WRITE" [ "(" (. Append("cout << ") .) WriteElement { WEAK "," (. Append(" << ") .) WriteElement } ")" (. Append(";") .) ] . /* Here is the other way. The kinky calls to append are because of the way the \ meta character is used in Cocol itself WriteStatement = "WRITE" (. Append("cout << ") .) [ "(" WriteElement { WEAK "," (. Append(" << ") .) WriteElement } ")" ] (. Append(" << \"\\"); Append("n\"; "); .) . */ WriteElement = String | Expression . Condition = Expression RelOp Expression . Expression = ( "+" (. Append("+") .) Term | "-" (. Append("-") .) Term | Term ) { AddOp Term } . Term = Factor { MulOp Factor } . Factor = Designator | Number | "(" (. Append("(") .) Expression ")" (. Append(")") .) . AddOp = "+" (. Append(" + ") .) | "-" (. Append(" - ") .) . MulOp = "*" (. Append(" * ") .) | "/" (. Append(" / ") .) | "MOD" (. Append(" % ") .) . /* Note the change of equality comparison operator */ RelOp = "=" (. Append(" == ") .) | "<>" (. Append(" <> ") .) | "<" (. Append(" < ") .) | "<=" (. Append(" <= ") .) | ">" (. Append(" > ") .) | ">=" (. Append(" >= ") .) . Identifier = (. char str[100]; .) identifier (. LexString(str, 100); Append(str); .) . Number = (. char str[100]; .) number (. LexString(str, 100); Append(str); .) . /* String conversion is actually very tricky, and nobody got this correct. Clang strings are demarcated with 'single quotes' and may contain internal escaped '' sequences. The Topsy strings are demarcated by "double quotes" and the \ ' and " are represented internally by \\ \' and \" sequences. Any Clang string with internal \ characters might just cause trouble for a real Topsy compiler, of course. A few people converted the initial and final quotes, but I don't recall anyone doing the full transformation, or even realising that it was needed. The code below manipulated the Clang string character by character to form the corresponding Topsy string as best it can. */ String = (. char s[200], str[200]; int i, j; .) string (. LexString(str, 100); int l = strlen(str) - 1; s[0] = '\"'; i = 1; j = 1; str[l] = '\"'; while (i < l) { if (str[i] == '\'' && str[i+1] == '\'') str[i] = 92; else if (str[i] == '"') { s[j] = 92; j++; } else if (str[i] == 92) { s[j] = 92; j++; } s[j] = str[i]; i++; j++; } s[j] = '\"'; s[j+1] = '\0'; Append(s); .) . END ClTop.
The string handling is (of course) different in the Pascal version, in several respects:
String (. VAR Str, S : STRING; I, J, L : INTEGER; .) = string (. LexString(Str); L := Length(Str); S[1] := '"'; I := 2; J := 2; Str[L] := '"'; WHILE (I < L) DO BEGIN IF (Str[I] = '''') AND (Str[I + 1] = '''') THEN Str[I] := '\' ELSE IF (Str[I] = '"') OR (Str[I] = '\') THEN BEGIN S[J] := '\'; J := J + 1 END; S[J] := Str[I]; I := I + 1; J := J + 1; END; S[J] := '"'; S[0] := CHR(J); Append(s); .) .