Computer Science 301 - Translators


Tutorial 8 - Practice in attribute grammars - Solutions

1. Consider the by now rather hackneyed grammar for expressions which has been written out again below. How would you add attributes so that a parser, after recognising an Expression, would be able to determine whether that Expression was a "constant expression" (meaning that in principle it could be evaluated at compile time) or was a "variable expression" (meaning that it could not be evaluated until run time)? Assume that the Designator production can be attributed so that it returns the result of searching the symbol table to determine whether the associated identifier denotes a variable or a constant.

  Expression<out bool isConst>           (. bool alsoConst; .)
  = (   "+" Term<out isConst>
      | "-" Term<out isConst>
      | Term<out isConst>
    )
    { AddOp Term<out alsoConst>          (. isConst = isConst && alsoConst; .)
    } .

  Term<out bool isConst>                 (. bool alsoConst; .)
  = Factor<out isConst>
    { MulOp Factor<out alsoConst>        (. isConst = isConst && alsoConst; .)
    } .

  Factor<out bool isConst>
  =   Designator<out isConst>
    | number                             (. isConst = true; .)
    | "(" Expression<out isConst> ")"
    .

2. Consider the familiar grammar below for describing a set of EBNF productions. How could you add attributes to this grammar so that it could determine which non-terminal had the longest right hand side (in terms of the number of terminals and non terminals in any one of its alternatives)?

Two solutions follow. In the first one we simply use some static fields in the Parser class to record the quantities of interest:

  using Library;

  COMPILER EBNF $CN
  /* Describe simple EBNF productions and find which has the longest right side
     P.D. Terry, Rhodes University, 2015 */

  static string thisName, maxName;
  static int maxLength = 0;

  CHARACTERS
    letter   = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" .
    digit    = "0123456789" .
    noquote1 = ANY - "'" .
    noquote2 = ANY - '"' .

  TOKENS
    nonterminal = letter {letter | digit } .
    terminal    = "'" noquote1 {noquote1} "'" | '"' noquote2 {noquote2} '"' .

  IGNORE  CHR(9) .. CHR(13)

  PRODUCTIONS
     EBNF                                       (. int maxRHS; .)
     = { Production<out maxRHS>                 (. if (maxRHS > maxLength) maxName = thisName; .)
       }                                        (. IO.WriteLine("Longest production was for "
                                                                + maxName); .)
       EOF .

     Production<out int maxRHS>
     = SYNC nonterminal                         (. thisName = token.val; .)
       WEAK "="
       Expression<out maxRHS>
       SYNC "." .

     Expression<out int expLength>              (. int altLength; .)
     = Term<out expLength>
       { WEAK "|" Term<out altLength>           (. if (altLength > expLength)
                                                     expLength = altLength; .)
       } .

     Term<out int termLength>                   (. int N; termLength = 0; .)
     = [ Factor<out termLength>
         { Factor<out N>                        (. termLength += N; .)
         }
       ] .

     Factor<out int factorLength>               (. factorLength = 0; .)
     =   nonterminal                            (. factorLength = 1; .)
       | terminal                               (. factorLength = 1; .)
       | "[" Expression<out factorLength> "]"
       | "(" Expression<out factorLength> ")"
       | "{" Expression<out factorLength> "}"
       .

  END EBNF.

In the second solution we show how one can arrange for parsing methods to return objects that encapsulate the quantities of interest:

  import Library.*;

  COMPILER EBNF $CN
  /* Describe simple EBNF productions and find which has the longest right side
     P.D. Terry, Rhodes University, 2015 */

  class Prod {
    public string name = "";
    public int maxRHS = 0;
  }

  CHARACTERS
    letter   = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" .
    digit    = "0123456789" .
    noquote1 = ANY - "'" .
    noquote2 = ANY - '"' .

  IGNORE  CHR(9) .. CHR(13)

  TOKENS
    nonterminal = letter {letter | digit } .
    terminal    = "'" noquote1 {noquote1} "'" | '"' noquote2 {noquote2} '"' .

  IGNORE  CHR(9) .. CHR(13)

  PRODUCTIONS
     EBNF                                       (. Prod bigProd = new Prod(), nextProd; .)
     = { Production<out nextProd>               (. if (nextProd.maxRHS > bigProd.maxRHS)
                                                     bigProd = nextProd; .)
       }                                        (. IO.WriteLine("Longest production was for "
                                                                + bigProd.name); .)
       EOF .

     Production<out Prod p>                     (. p = new Prod(); .)
     = SYNC nonterminal                         (. p.name = token.val; .)
       WEAK "="
       Expression<out p.maxRHS>
       SYNC "." .

     Expression<out int expLength>              (. int altLength; .)
     = Term<out expLength>
       { WEAK "|" Term<out altLength>           (. if (altLength > expLength)
                                                     expLength = altLength; .)
       } .

     Term<out int termLength>                   (. int N; termLength = 0; .)
     = [ Factor<out termLength>
         { Factor<out N>                        (. termLength += N; .)
         }
       ] .

     Factor<out int factorLength>               (. factorLength = 0; .)
     =   nonterminal                            (. factorLength = 1; .)
       | terminal                               (. factorLength = 1; .)
       | "[" Expression<out factorLength> "]"
       | "(" Expression<out factorLength> ")"
       | "{" Expression<out factorLength> "}"
       .

  END EBNF.

3. Consider the familiar grammar below for describing a set of EBNF productions. How could you add attributes to this grammar so that it could check a set of productions to determine whether any non-terminals had not been defined, and whether any non-terminals could not possibly be reachable?

Here is a C# version, including the code for a very simple table handler

  COMPILER EBNFCheck $CN
  /* Check a set of EBNF productions to make sure all apparent non-terminals are both defined
     (appear on the left of a production rule) and referenced (appear on the right of a rule)
     P.D. Terry, Rhodes University, 2015 */

    const bool defined = true;        // Name them, name them!
    const bool referenced = false;

  CHARACTERS
    letter   = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" .
    digit    = "0123456789" .
    noquote1 = ANY - "'" .
    noquote2 = ANY - '"' .

  TOKENS
    nonterminal = letter {letter | digit | "_" } .
    terminal    = "'" noquote1 {noquote1} "'" | '"' noquote2 {noquote2} '"' .

  IGNORE  CHR(9) .. CHR(13)

  PRODUCTIONS
     EBNFCheck
     =                              (. Table.Clear(); .)
       { Production }
       EOF                          (. Table.CheckNonTerminals(); .)
     .

     Production
     =
       SYNC nonterminal             (. Table.Add(token.val, defined); .)
       WEAK "="
       Expression
       SYNC "." .

     Expression
     =
     Term
     { WEAK "|" Term
     } .

     Term
     =
     [ Factor
       { Factor
       }
     ] .

     Factor
     =   nonterminal                (. Table.Add(token.val, referenced); .)
       | terminal
       | "[" Expression "]"
       | "(" Expression ")"
       | "{" Expression "}" .

  END EBNFCheck.

  =======================  Table Handler stored in .\EBNFCheck\Table.cs

  // Table handler for EBNF checker
  // P.D. Terry, Rhodes University, 2015 (C# version)

  using Library;
  using System;
  using System.Collections.Generic;
  using System.Text;

  namespace EBNFCheck {

    class Entry {
      public bool defined, referenced;
      public string name;
      public Entry(string name) {
        this.defined = false;
        this.referenced = false;
        this.name   = name;
      }
    } // Entry

    class Table {

      static List<Entry> list = new List<Entry>();

      public static void Clear() {
      // Clear all entries in table
        list = new List<Entry>();
      } // Clear

      public static int Add(string name, bool defining) {
      // Search for, and possibly add a nonterminal "name" to the table, according as
      // to "where" in a production it was found, and return the position as the
      // index where the name was either added previously or added by this call
      // debug: IO.WriteLine("add " + name + " " + where);
        int position = 0;
        while (position < list.Count && !name.Equals(list[position].name)) position++;
        if (position >= list.Count) list.Add(new Entry(name));
        if (defining)
          list[position].defined = true;
        else
          list[position].referenced = true;
        return position;
      } // Add

      public static void CheckNonTerminals() {
      // Check to see whether any non-terminals are not defined or are not referenced
      // This could be made rather more sophisticated!
        IO.WriteLine("The following non-terminals were never defined");
        for (int i = 0; i < list.Count; i++)
          if (! list[i].defined) IO.WriteLine(list[i].name);
        IO.WriteLine("The following non-terminals were never referenced");
        for (int i = 1; i < list.Count; i++)  // no need to check goal symbol
          if (! list[i].referenced) IO.WriteLine(list[i].name);
      } // CheckNonTerminals

    } // Table

  } //namespace

4. Extend the TokenTests system to check for various other tokens

   using Library;

   COMPILER TokenTests $CN
   /* Test scanner construction and token definitions - C# version
      The generated program will read a file of words, numbers, strings etc
      and report on what characters have been scanned to give a token,
      and what that token is (as a magic number).  Useful for experimenting
      when faced with the problem of defining awkward tokens!

      P.D. Terry, Rhodes University, 2015 */

   /* Some code to be added to the parser class */

     static void Display(Token token) {
     // Simply reflect the fields of token to the standard outpul
       IO.Write("Line ");
       IO.Write(token.line, 4);
       IO.Write(" Column");
       IO.Write(token.col, 4);
       IO.Write(":  Kind");
       IO.Write(token.kind, 3);
       IO.WriteLine("  Val |" + token.val.Trim() + "|");
     } // Display

   CHARACTERS  /* You may like to introduce others */

     sp         = CHR(32) .
     backslash  = CHR(92) .
     control    = CHR(0) .. CHR(31) .
     letter     = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" .
 *   digit09    = "0123456789" .
 *   digit19    = "123456789" .
 *   stringCh   = ANY - '"' - control - backslash .
 *   charCh     = ANY - "'" - control - backslash .
     printable  = ANY - control .

   TOKENS      /* You may like to introduce others */

 *   ident      = letter { {"_"} (letter | digit09) } .

 *   integer    =   digit19 { digit09 }
 *                | digit19 { digit09 } CONTEXT(".")
 *                | "0"
 *                | "0" CONTEXT(".") .

 *   double     = ( digit19 { digit09 } "." | "0." ) digit09 { digit09 }
 *                [ "E" [ "+" | "-" ] digit09 { digit09 } ] .

 *   string     = '"' { stringCh | backslash printable } '"' .

 *   char       = "'" ( charCh   | backslash printable ) "'" .

   IGNORE control

   PRODUCTIONS

      TokenTests
      = { (    ANY
            |  "."
            |  ".."
            |  "ANY"
          )                     (. Display(token); .)
        } EOF                   (. Display(token); .)
        .

   END TokenTests.


Home  © P.D. Terry