Computer Science 301 - 2001


Tutorial for week 20 - Constraint Analysis and Symbol Tables

In class we have discussed the use of a symbol table for storing properties gleaned from the declaration of named constants and variables in Clang programs, and for checking that these have been correctly declared and used if they are referred to within statements and expressions. The relevant parts of the code (which appears in full in the source files for the book rather than the book itself) are quoted below for convenience:

     OneConst
     =                             (. TABLE_entries entry;
                                      int value; .)
        Ident<entry.name>          (. entry.idclass = TABLE_consts; .)
        "=" number ";"             (. Table->enter(entry); .) .

     OneVar
     =                             (. TABLE_entries entry; .)
                                   (. entry.idclass = TABLE_vars;
                                      entry.scalar = true; .)
        Ident<entry.name>
        [ UpperBound               (. entry.scalar = false; .)
        ]                          (. Table->enter(entry); .) .

     Assignment =  Variable ":=" Expression .

     Variable
     =                             (. TABLE_entries entry; .)
        Designator<classset(TABLE_vars), entry>.

     Designator<classset allowed, TABLE_entries &entry>
     =                             (. TABLE_alfa name;
                                      bool found; .)
        Ident<name>                (. Table->search(name, entry, found);
                                      if (!found) SemError(202);
                                      if (!allowed.memb(entry.idclass)) SemError(206);
                                      if (entry.idclass != TABLE_vars) return; .)
        ( "["                      (. if (entry.scalar) SemError(204); .)
          Expression "]"
          |                        (. if (!entry.scalar) SemError(205); .)
        ) .

     Expression =  ( "+" Term | "-" Term | Term ) { AddOp Term } .

     Term =  Factor {  MulOp Factor } .

     Factor
     =                             (. TABLE_entries entry; .)
          Designator<classset(TABLE_consts, TABLE_vars), entry>
        | number
        | "(" Expression ")" .

---------------------------------------------------------------------------------------------

            // Interface to symbol table for Clang level 1 parser (no code generation)

            const int TABLE_alfalength = 15;   // maximum length of identifiers
            typedef char TABLE_alfa[TABLE_alfalength + 1];

            enum TABLE_idclasses { TABLE_consts, TABLE_vars, TABLE_progs };

            struct TABLE_entries {
              TABLE_alfa      name;           // identifier
              TABLE_idclasses idclass;        // class
              bool            scalar;
            };

            class TABLE {
              public:
                void enter(TABLE_entries &entry);
                // Adds entry to symbol table

                void search(char *name, TABLE_entries &entry, bool &found);
                // Searches table for presence of name.  If found then returns entry

                void printtable(FILE *lst);
                // Prints symbol table for diagnostic purposes

                TABLE(REPORT *R);
                // Initializes symbol table

            };

In the similar language CS301-1, which forms the focus of the current practical, constants can be declared slightly differently, variables can be of integer or Boolean type, and the grammar for Expressions is different. How would you add attributes/actions to the corresponding parts of the grammar below to be able to exercise the same sorts of semantic constraints? Hint: This introduces the notion of "type checking" which is not present in Clang since Clang only has one type to worry about!

  ConstDeclarations = "CONST" OneConst { OneConst } .
  OneConst          = Ident "=" ( number  | "TRUE" | "FALSE" ) ";" .
  VarDeclarations   = ( "INT" | "BOOL" ) OneVar { "," OneVar } ";" .
  OneVar            = Ident [ UpperBound ] .
  Assignment        = Variable ":=" Expression .
  Variable          = Designator .
  Designator        = Ident [ "[" Expression "]" ] .
  Expression        = AndExp   { "OR" AndExp } .
  AndExp            = RelExp   { "AND" RelExp } .
  RelExp            = AddExp   [ RelOp AddExp ] .
  AddExp            = MultExp  { AddOp MultExp } .
  MultExp           = UnaryExp { MulOp UnaryExp } .
  UnaryExp          = Factor | AddOp UnaryExp | "NOT" UnaryExp .
  Factor            = Designator | number | "TRUE" | "FALSE" | "(" Expression ")" .

We extend the table handler to allow entries to record the type of each identifier. One would think there were only two types; the TABLE_none type is used for error handling (it's the type allocated in those situations where types are not properly determined):

  const int TABLE_alfalength = 15; // maximum length of identifiers
  typedef char TABLE_alfa[TABLE_alfalength + 1];

  enum TABLE_idclasses { TABLE_consts, TABLE_vars, TABLE_progs };
  enum TABLE_types {TABLE_none, TABLE_ints, TABLE_bools };

  struct TABLE_entries {
    TABLE_alfa name;             // identifier
    TABLE_idclasses idclass;     // class
    TABLE_types type;
    bool scalar;
  };

It is convenient to introduce the notion of a set of acceptable types, similar to the idea of introducing a set of acceptable classes of identifier:

  typedef Set<7> classset;
  typedef Set<4> typeset;

  typeset arithtypes = typeset(TABLE_none, TABLE_ints);
  typeset booltypes  = typeset(TABLE_none, TABLE_bools);

Note that we add the TABLE_none type to each set. This means that effectively badly declared variables or mangled expressions are going to be compatible with either Boolean or Integer entities. This device will help cut down on a plethora of "mismatched type" semantic error messages - only the first type abuse will usually trigger an error message. Quite clever - study the code in detail to see how it works!

Constant declarations are relatively easy - we have to set the entry.type field in the symbol table entry:

    ConstDeclarations
    =  "CONST" OneConst { OneConst } .

    OneConst
    =                             (. TABLE_entries entry; .)
       Ident<entry.name>          (. entry.idclass = TABLE_consts; .)
       "="
       ( number                   (. entry.type = TABLE_ints; .)
         | "TRUE"                 (. entry.type = TABLE_bools; .)
         | "FALSE"                (. entry.type = TABLE_bools; .)
       ) ";"                      (. Table->enter(entry); .) .

Variable declarations are a little more complicated. From the key word INT or BOOL we can determine the type of each of the variables in the ensuing list. This type has to be passed on (by value) to the OneVar sub parser so that the entry.type field can be correctly set up:

    VarDeclarations
    =                             (. TABLE_types type; .)
      (   "INT"                   (. type = TABLE_ints; .)
        | "BOOL"                  (. type = TABLE_bools; .)
      )  OneVar<type> { WEAK "," OneVar<type> } ";" .

    OneVar<TABLE_types type>
    =                             (. TABLE_entries entry; .)
                                  (. entry.idclass = TABLE_vars;
                                     entry.scalar = true;
                                     entry.type = type; .)
       Ident<entry.name>
       [ UpperBound               (. entry.scalar = false; .)
       ]                          (. Table->enter(entry); .)

Within the Statement part of the parser we have to make sure that assignments are only done to targets of the same type as the expression on the right of the := operator. This means that the Variable and Expression parsers need to be parameterised to obtain the type of the Variable or Expression:

    Assignment
    =                             (. TABLE_types exptype, vartype; .)
       Variable<vartype> ":="
       Expression<exptype>        (. if (exptype != vartype) SemError(218); .) .

    Variable<TABLE_types &vartype>
    =                             (. TABLE_entries entry; .)
       Designator<classset(TABLE_vars), entry>
                                  (. vartype = entry.type; .) .

The Designator parser is much as before, save that it has to check that subscripts are of integer type.

    Designator<classset allowed, TABLE_entries &entry>
    =                             (. TABLE_alfa name;
                                     TABLE_types exptype;
                                     bool found; .)
       Ident<name>                (. Table->search(name, entry, found);
                                     if (!found) SemError(202);
                                     if (!allowed.memb(entry.idclass)) SemError(206);
                                     if (entry.idclass != TABLE_vars) return; .)
       ( "["                      (. if (entry.scalar) SemError(204); .)
         Expression<exptype>      (. if (!arithtypes.memb(exptype)) SemError(227); .)
         "]"
         |                        (. if (!entry.scalar) SemError(205); .)
       ) .

The Expression parser (and its related sub-parsers) have to return (as a reference parameter) the type of the entity they parse, and perform various checks that operators only appear between operands of the appropriate types:

    Expression<TABLE_types &e>
    =                             (. TABLE_types a; .)
       AndExp<e>
       { "OR" AndExp<a>           (. if (!(booltypes.memb(e) && booltypes.memb(a)))
                                       { SemError(218); e = TABLE_none; }
                                     else e = TABLE_bools; .)
       } .

    AndExp<TABLE_types &a>
    =                             (. TABLE_types e; .)
       RelExp<a>
       { "AND" RelExp<e>          (. if (!(booltypes.memb(a) && booltypes.memb(e)))
                                       { SemError(218); a = TABLE_none; }
                                     else a = TABLE_bools; .)
       } .

    RelExp<TABLE_types &r>
    =                             (. TABLE_types a; .)
       AddExp<r>
       [ RelOp AddExp<a>          (. if (r == TABLE_bools || a == TABLE_bools) SemError(218);
                                     r = TABLE_bools; .)
       ] .

    AddExp<TABLE_types &a>
    =                             (. TABLE_types m; .)
       MultExp<a>
       { AddOp MultExp<m>         (. if (!(arithtypes.memb(a) && arithtypes.memb(m)))
                                       { SemError(218); a = TABLE_none; } .)
       } .

    MultExp<TABLE_types &m>
    =                             (. TABLE_types u; .)
       UnaryExp<m>
       { MulOp UnaryExp<u>        (. if (!(arithtypes.memb(m) && arithtypes.memb(u)))
                                       { SemError(218); m = TABLE_none; } .)
       } .

    UnaryExp<TABLE_types &u>
    =    Factor<u>
       | AddOp UnaryExp<u>        (. if (!arithtypes.memb(u)) {
                                       SemError(218); u = TABLE_none; }
       | "NOT" UnaryExp<u>        (. if (!booltypes.memb(u)) SemError(218);
                                     u = TABLE_bools; .) .

    Factor<TABLE_types &f>
    =                             (. TABLE_entries entry; .)
         Designator<classset(TABLE_consts, TABLE_vars), entry>
                                  (. f = entry.type; .)
       | number                   (. f = TABLE_ints; .)
       | "TRUE"                   (. f = TABLE_bools; .)
       | "FALSE"                  (. f = TABLE_bools; .)
       | "(" Expression<f> ")" .


Home  © P.D. Terry