Once again, there were some outstandingly good solutions handed in, but a few groups seemed completely at a loss here and there. Full sources for the solutions can be found in the file PRAC11A.ZIP for those who wish to experiment further.
For Task 3 the modifications needed to CHECK.FRM were simply to add an extra error message and #include directive:
#include "check.h" #include -->ScanHeader #include -->ParserHeader char *MyError::GetUserErrorMsg(int n) { switch (n) { // Put your customized messages here case 1000: return "Invalid IP number"; case 1001: return "Duplicate IP number"; case 1002: return "Duplicate computer name"; default: return "Unknown error or conflicting error numbers used"; } }
A modification was needed to CHECK.H to add the declaration of a StringList type:
#include "list.h" struct String { char str[1000]; // the string itself friend int operator == (myString x, myString y) { return strcmp(x.str, y.str) == 0; } friend int operator != (myString x, myString y) { return strcmp(x.str, y.str) != 0; } }; typedef List<String> StringList;
The modifications needed to the grammar were as below:
PRODUCTIONS Check = (. IPList L; StringList SL; .) { Entry<L, SL> } EOF (. if (Successful()) printf("\n all correct"); .) . . Entry<IPList &L, StringList &SL> = IPNumber<L> Name<SL> { Name<SL> } . Name<StringList &SL> = (. String S; .) name (. LexString(S.str, 1000); if (SL.isMember(S)) SemError(1002); else SL.add(S); .) .
Tasks 4 - 6 provided more of a challenge. The modifications needed to create CALC.FRM were to add suitable error messages and a #include directive:
#include "calc.h" char *MyError::GetUserErrorMsg(int n) { switch (n) { // Put your customized messages here case 1000: return "Mismatched types"; case 1001: return "Undefined variable"; case 1002: return "Division by zero"; case 1003: return "Uncomparable types"; default: return "Unknown error or conflicting error numbers used"; } }
Modifications were needed to CALC.H. I chose to add the declaration of various enumerations and structured types as shown:
#include "list.h" enum TYPES { none, ints, bools }; enum OPERATORS { opnop, opadd, opsub, opor, opmul, opdvd, opmod, opand, opeql, opneq, oplss, opgeq, opgtr, opleq }; struct ENTRY { // an entry in the "symbol table" char name[100]; // the variable name int value; // its value TYPES type; // its type friend int operator == (ENTRY x, ENTRY y) { return strcmp(x.name, y.name) == 0; } friend int operator != (ENTRY x, ENTRY y) { return strcmp(x.name, y.name) != 0; } }; typedef List<ENTRY> EntryList;
Here is a complete grammar itself, heavily commented to make various points that may have been missed (these comments are not in the solution sources):
COMPILER Calc $CNX // Simple calculator for Boolean or Integer results and user defined // variable names // P.D. Terry, Rhodes University, 2002 #include "calc.h" /* It suffices to declare the list of user defined "variables" local to the parser class, rather than pass it a parameter between almost every production */ EntryList VarList; // the list of variables and associated values IGNORE CASE IGNORE CHR(1) .. CHR(31) CHARACTERS letter = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" . digit = "0123456789" . TOKENS identifier = letter { letter | digit } . number = digit { digit } . PRODUCTIONS Calc = { Print | Assignment } "quit" . Print = "print" OneExpr { WEAK "," OneExpr } SYNC ";" (. printf("\n"); .) . /* OneExpr is introduced as a simple way of cutting down on actions that might otherwise clutter up the Print production. It becomes responsible for evaluating a single expression that is to be printed rather than stored. Note that some care is taken to ensure that Boolean values are printed in a user friendly form. Rather than take "no action" on the default we could print out some suitable warning, of course. */ OneExpr = (. TYPES exptype; int value; .) Expression<exptype, value> (. switch (exptype) { case ints : printf(" %d", value); break; case bools : if (value) printf(" true"); else printf(" false"); break; default: /* error - no action */ break; } .) . /* Note that in the Assignment production we do not meddle with the list of variables until the expression has been evaluated. In particular we do not want to enter the incomplete entry into the table after parsing the Variable to find its name. In this way we avoid treating an initial definition of, say ABC = ABC + 1, as being correct when it is not (as the ABC on the right side has no defined value yet). Many people missed this. */ Assignment = (. ENTRY e; .) Variable<e.name> "=" Expression<e.type, e.value> (. int i = VarList.position(e); if (i == -1) { // first definition VarList.add(e); } else { // updated definition if (e.type != VarList[i].type) SemError(1000); VarList[i].value = e.value; } .) SYNC ";" . /* The production for Variable does nothing more than extract the name for us */ Variable<char * name> = Identifier<name> . /* I chose to give Expression two parameters - one for the type to be returned, the other to yield the value itself. It is, of course, equally viable to use a single parameter of the ENTRY type, as this encapsulates both type and value in one "package". Several people did this. Note the form of the type check - several people omitted it or had erroneous variations like if (t != bools && t2 != bools) ... Setting the return type to none in the case of an error can have beneficial effects later on */ Expression<TYPES &t, int &value> = (. TYPES t2; int val2; .) AndExp<t, value> { "||" AndExp<t2, val2> (. if (t != bools || t2 != bools) { SemError(1000); t = none; } value = value || val2; .) } . /* The rest of the "binary" operations follow in much the same way */ AndExp<TYPES &t, int &value> = (. TYPES t2; int val2; .) OrExp<t, value> { "&&" OrExp<t2, val2> (. if (t != bools || t2 != bools) { SemError(1000); t = none; } value = value && val2; .) } . /* But the OrExp production needs further comment. Note that the presence of a RelOp between two integer operands yields a type "change" to Boolean - some people did not appreciate this. Strictly we should argue that only the == and != operators have meaning between two operands of Boolean type. This yields a rather complex set of switch statements, as you can see. */ OrExp<TYPES &t, int &value> = (. TYPES t2; int val2; OPERATORS op; .) AddExp<t, value> [ RelOp<op> AddExp<t2, val2> (. if (t != t2) SemError(1003); switch(op) { case opeql : value = value == val2; break; case opneq : value = value != val2; break; default : if (t == bools || t2 == bools) SemError(1003); switch (op) { case oplss : value = value < val2; break; case opleq : value = value <= val2; break; case opgtr : value = value > val2; break; case opgeq : value = value >= val2; break; default : break; } } t = bools; .) ] . /* The AddExp parser is easy and needs no further comment */ AddExp<TYPES &t, int &value> = (. TYPES t2; int val2; OPERATORS op; .) MultExp<t, value> { AddOp<op> MultExp<t2, val2> (. if (t != ints || t2 != ints) { SemError(1000); t = none; } else switch (op) { case opadd : value += val2; break; case opsub : value -= val2; break; default : break; } .) } . /* The MultExp parser needs to check for division by zero for both the / and the % operator - many people checked only the obvious division one! */ MultExp<TYPES &t, int &value> = (. TYPES t2; int val2; OPERATORS op; .) UnaryExp<t, value> { MulOp<op> UnaryExp<t2, val2> (. if (t != ints || t2 != ints) { SemError(1000); t = none; } else switch (op) { case opmul : value *= val2; break; case opmod : if (val2 != 0) value %= val2; else SemError(1002); break; case opdvd : if (val2 != 0) value /= val2; else SemError(1002); break; default : break; } .) } . /* The unary operators need careful type checking */ UnaryExp<TYPES &t, int &value> = Factor<t, value> | "+" UnaryExp<t, value> (. if (t != ints) { SemError(1000); t = none; } .) | "-" UnaryExp<t, value> (. if (t != ints) { SemError(1000); t = none; } value = - value; .) | "!" UnaryExp<t, value> (. if (t != bools) { SemError(1000); t = none; } value = ! value; .) . /* The Factor parser is the one ultimately responsible for starting to pass value and type information "up the tree". Note that we regard an undeclared identifier at this stage to be an error - but still assign a value and type to the arguments to make sure that something "sensible" propagates up from here. Several people added to the table if an undeclared identifier was encountered, but this may not be a wise thing to do. However, if it is done, care should be taken to set the fields in the entry, as hinted here. */ Factor<TYPES &t, int &value> = (. ENTRY e; int i; .) Identifier<e.name> (. i = VarList.position(e); if (i == -1) { SemError(1001); value = 1; t = none; /* maybe e.value = 1; e.type = none; VarList.Add(e) */ } else { value = VarList[i].value; t = VarList[i].type; } .) | Number<value> (. t = ints; .) | "true" (. t = bools; value = 1; .) | "false" (. t = bools; value = 0; .) | "(" Expression<t, value> ")" . /* It is convenient for readability to define the operators by their own non-terminals and for them simply to return an appropriate value from an enumeration type. Note the use of a default opnop to mop up errors that might otherwise creep in from undefined values */ AddOp<OPERATORS &op> = (. op = opnop; .) ( "+" (. op = opadd; .) | "-" (. op = opsub; .) ) . MulOp<OPERATORS &op> = (. op = opnop; .) ( "*" (. op = opmul; .) | "%" (. op = opmod; .) | "/" (. op = opdvd; .) ) . RelOp<OPERATORS &op> = (. op = opnop; .) ( "==" (. op = opeql; .) | "!=" (. op = opneq; .) | "<" (. op = oplss; .) | "<=" (. op = opleq; .) | ">" (. op = opgtr; .) | ">=" (. op = opgeq; .) ) . /* Note that we have used LexName here as we are ignoring case in our identifiers. Note also that we have used 100 (the size of the name of a variable) directly. Why would it not work if we used LexName(name, sizeof(name); in this context? */ Identifier<char *name> = identifier (. LexName(name, 100); .) . Number <int &num> = number (. char str[100]; LexString(str, sizeof(str) - 1); num = atoi(str); .) . END Calc.