Computer Science 301 - 2000

Programming Language Translation

Practical for Week 20, beginning 4 September 2000 - Solutions

Some very innovative solutions were received. Several were more complicated than necessary, I felt, and some of you have still to learn to set your code out neatly and add sensible commentary. A definite improvement, for the most part, over last week. I was pleased to see several students had made use of material developed in the data structures course last year. My own solutions don't do this, depending instead on the simplest possible data structures I deemed adequate for the task at hand. You can get machine readable copies of the programs from the web site in the files PRAC20A.ZIP.

Task 1 (source code listing must be submitted)

Write a very short program TASK1.CPP that simply reads a text file character by character from stdin and copies it exactly to stdout.

This was supposed to be very simple. There were a few wrong solutions - the EOF character should not be appended to the file, of course. EOF is a fudge really - it is a fictitious character returned when no real character could be read.

    #include "misc.h"

    void main (void) {
    // copy stdin to stdout
    // Normal usage:  TASK1 <infile >outfile
    // P.D. Terry, Rhodes University, 2000
      char ch;
      ch = getchar();
      while (ch != EOF) {
        putchar(ch);
        ch = getchar();
      }
    }

Task 2 (source code listing must be submitted)

Write a program TASK2.CPP that adds the numbers provided as command line arguments to give a very simple calculator. For example

TASK2 1 4 6 7 9

should display the result 27.

I suspect virtually every one eventually got this one to work!

    #include "misc.h"

    void main (int argc, char *argv[]) {
    // Calculate sum of (integer) command line arguments
    // Usage:  TASK2 n1  n2  n3 ...
    // P.D. Terry, Rhodes University, 2000
      int total = 0;
    // check correct command line usage - very wise to do this
      if (argc < 2) {
        fprintf(stderr, "Usage: TASK3 n1 n2 ... \n"); exit(EXIT_FAILURE);
      }
      for (int i = 1; i < argc; i++) total += atoi(argv[i]);
      printf("Total is %d", total);
    }

Task 3 (source code listing must be submitted)

Rewrite the program in Task 1 so that it can still work as before, but if the name of the input file appears as a parameter on the command line, input will be taken from that file, and output written to a file with the same primary name, but the extension NEW.

Most solutions did not go for the "neat" solution - simply use one loop, afer first opening the files appropriately. This was the point taken up in the prac quiz, of course.

It is really only necessary to list the solution for Task 4 (see below).

The solution to Task 3, as did some of the solutions that follow, calls on functions openfiles and closeoutput which are conveniently packaged into misc.h using the following extra code:

   // - extensions to misc.h
   // Various common items (extended for Prac 20, 2000)

   void openfiles (FILE *&infile, FILE *&outfile, int argc, char *argv[],
                   char *ext, char *outName) {
   // attempt to open infile and outfile - rules
   //   - if no command parameters are given (argc = 1), use stdin and stdout
   //   - if one command line parameter is given (argc = 2), open infile from
   //     argv[1] and outfile from argv[1].ext.
   //   - if two command line parameters are given (argc = 3), open infile from
   //     argv[1] and outfile from argv[2]
   // return outName as the name of the output file in the file system
   // issue appropriate usage prompt from argv[0] if wrong number of parameters given

   // check correct command line usage - very wise to do this
     if (argc > 3) {
       fprintf(stderr, "Usage: %s infile\n", argv[0]); exit(EXIT_FAILURE);
     }
   // check simplest use (no parameters)
     if (argc == 1) {
       infile = stdin; outfile = stdout; strcpy(outName, "stdout"); return;
     }
   // attempt to open input file and abort program if we cannot do so
     infile = fopen(argv[1], "r");
     if (infile == NULL) {
       fprintf(stderr, "could not open %s\n", argv[1]); exit(EXIT_FAILURE);
     }
   // attempt to open output file - first derive name
     if (argc == 2) appendextension(argv[1], ext, outName);
       else strcpy(outName, argv[2]);
     outfile = fopen(outName, "w");
     if (outfile == NULL) {
       fprintf(stderr, "could not open %s\n", outName); exit(EXIT_FAILURE);
     }
   }

   void closeoutput (FILE *outfile, char *outName) {
   // close output file and check that this happened properly
     if (outfile != stdout) {
       if (fclose(outfile) == EOF) {
         fprintf(stderr, "could not close %s\n", outName); exit(EXIT_FAILURE);
       }
     }
   }

Task 4 (source code listing must be submitted)

Modify the program in Task 3 so that it also counts the number of lines in the file as it copies them, and displays this count on the stderr output file (recollect that stderr is opened automagically to the screen).

The addition was trivial. Note that '\n' is not necessarily a single real character. In DOS/Windoze systems there are actually two characters separating lines in text files (CR + LF). In Unix there is only one (CR), and on the Apple Mac there is a different single one (LF). But the I/O libraries for any particular language should take care of this for you.

    #include "misc1.h"

    void main (int argc, char *argv[]) {
    // copy input file to input.NEW  and count lines
    // Usage: TASK4 input   or  TASK4 <infile >outfile
    // P.D. Terry, Rhodes University, 2000
      int linecount = 0;
      char outName[256];
      FILE *infile, *outfile;

    // check parameters and open files
      openfiles(infile, outfile, argc, argv, "NEW", outName);

    // copy file character by character
      char ch = getc(infile);
      while (ch != EOF) {
        putc(ch, outfile);
        if (ch == '\n') linecount++;
        ch = getc(infile);
      }

    // close output file and report statistics
      closeoutput(outfile, outName);
      if (argc > 1) fprintf(stderr, "File %s copied to %s\n", argv[1], outName);
      fprintf(stderr, "%d lines", linecount);
    }

Task 5 (source code listing must be submitted)

Write a program that

after compilation will be called CLANG.EXE;
will take two command line parameters, the first being the name of the file it is to process, the second the name of the file in which it is to store its output;
will open the two files and behave sensibly if this cannot be done for any reason;
will then read the source text, assuming it is a Clang program, and produce as output a list of the locations where the PROCEDURE or FUNCTION blocks are to be found, followed by a list of the locations where each identifier was declared;
will assume that each identifier is declared at most once, although it may be "used" or referenced several times thereafter.

Get this program to work in conjunction with QEdit so that you can edit a Clang program and then after "compilation" step through the procedures and functions, and through the variable declarations.

The quality of solutions submitted varied enormously. There were a few groups who had tried far more detailed parsing of the source than was needed, and there were several whose table of keywords did not include all 18 of the words reserved in Clang (see below for them all). There were several who issued error message if files did not open correctly, but many of these did not terminate execution - which must have led to interesting results. Predictably there were several groups who had not allocated space for the strings they tried to manipulate, and there were some fascinating instances of the use of library routines with very odd names, not the standard ones that are usually advertised (be careful of this - the "standard" names like strcpy and strcat should be available on all systems, but not funny ones like _strcpy which are probably meant to be hidden from view in the depths of the actual implementation). Several people had missed the point that a non-empty string could be terminated by EOF - many solutions returned noToken (in my notation) however they read EOF. There were some solutions which tried to read a whole line at a time, or to use fgets to get a piece of string from the source, but I suspect that leads to complications that are best avoided by using the strategy shown below of reading one character at a time.

Relatively few people made use of ungetc, but I think this is really useful in this problem. Once one has scanned a string one has, of course, read one character too far - into the start of the next string.

    // Process a Clang program to produce interface message file for use with
    // QEdit IDE, highlighting the start of each PROCEDURE or FUNCTION and the
    // initial point of declaration of each unique identifier
    //   Usage: CLANG input ERRORS.LST    or  CLANG <infile >outfile
    //
    // This does not handle comments.  That would require a more sophisticated
    // scanner than seemed reasonable for this exercise.  As originally conceived
    // the problem did not demand proper treatment of strings either, but the
    // string handling is easily achieved, as can be seen below.
    //
    // P.D. Terry, Rhodes University, 2000

    #include "misc1.h"

    struct entry {              // symbol table entries
      char name[25];
      int line, col;
    };

    const int MAX   = 500;      // Limit on size of table
    const int RES   = 18;       // Number of reserved words
    static int Size = RES;      // Dynamic size of table (entries actually used )
    entry Table[MAX] = {        // First 18 entries are for the reserved words
      "BEGIN",     0, 0,     "COBEGIN",   0, 0,      "COEND",     0, 0,
      "CONST",     0, 0,     "DO",        0, 0,      "END",       0, 0,
      "FUNCTION",  0, 0,     "IF",        0, 0,      "PROCEDURE", 0, 0,
      "PROGRAM",   0, 0,     "READ",      0, 0,      "RETURN",    0, 0,
      "SIGNAL",    0, 0,     "THEN",      0, 0,      "VAR",       0, 0,
      "WAIT",      0, 0,     "WHILE",     0, 0,      "WRITE",     0, 0 };

    static int currentLine = 1, currentCol = 1;

    static void Update (char *Name, bool &success, int line, int col) {
    // Search for Name among the entries in Table by performing a case-insensitive
    // search.  If the Name cannot be found, attempt to add it to the Table.
    // If it is already present, return Name with the spelling found in Table.
    // success is returned as an indication of whether the action succeeded
      if (Size >= MAX) {                        // Take care not to exceed the Table
        success = false; return;
      }
      success = true;
      strcpy(Table[Size].name, Name);           // copy the name as a sentinel
      Table[Size].line = line;                  // and record token position
      Table[Size].col  = col;
      int I = 0;                                // start at the bottom
      while (stricmp(Table[I].name, Name) != 0)
        I++;                                    // - we must find it somewhere!
      if (I == Size)                            // it was not in the original table
        Size++;                                 // so confirm the addition
      else strcpy(Name, Table[I].name);         // it was there already, so retrieve
                                                // the extant spelling
    }

    // always a very good idea to give names to enumerations, rather than simply to
    // use "magic numbers"

    typedef enum { noToken, wordToken, otherToken } tokens;

    tokens fgetnextstr (char *str, FILE *stream, int &line, int &column) {
    // Reads next string from stream into str and returns the line and column
    // on which it appeared in the stream.
    // Strings are of two kinds; the return value of the function distinguishes
    //                           them from one another:
    // Returns wordToken if the string is a valid identifier or keyword (consists
    //         of an initial letter followed by other letters and digits only)
    // Returns otherToken if the string does not start with a letter,
    //         and contains no letters (or is a 'string')
    // Returns noToken if the stream is exhausted
    //         (no further string could be extracted)
    //
    // Updates global (static) variables currentCol and currentLine as the scan proceeds
    //
      char ch = fgetc(stream);                  // first character of token
      line = currentLine; column = currentCol;  // record position of token
      if (ch == EOF) return(noToken);           // check for presence of real token
      int i = 0;                                // array index
      if (isalpha(ch)) {                        // we must have a keyword or identifier
        do {                                    // repeat
          str[i++] = ch;                        //   copy this character to the buffer
          ch = fgetc(stream);                   //   get the next character
          currentCol++;                         //   update position information
        } while (isalnum(ch));                  // until we get past the end of the word
        str[i] = '\0';                          // add trailing nul character as usual
        ungetc(ch, stream);                     // the last character read is the first
                                                // one of the next token, so poke it back
        if (ch == '\n')                         // compensate position information
          { currentLine++; currentCol = 0; }
        return wordToken;                       // return indication of word
      }
      else if (ch == '\'') {                    // we must have a string
        do {                                    // repeat
          str[i++] = ch;                        //   copy this character to the buffer
          ch = fgetc(stream);                   //   get the next character
          currentCol++;                         //   update position information
        } while (ch != EOF && ch != '\'');      // until we get to the end of the string
        str[i] = '\'';                          // add trailing quote character
        str[++i] = '\0';                        // add trailing nul character as usual
        currentCol++;                           // compensate for end of string
        return otherToken;                      // return indication of non word
      }
      else {                                    // we must have a string of non-word stuff
        do {                                    // repeat
          str[i++] = ch;                        //   copy character of the non-word to buffer
          ch = fgetc(stream);                   //   get the next character
          if (ch == '\n')                       //   update position information
            { currentLine++; currentCol = 0; }
          else currentCol++;
        } while (ch != EOF && !isalpha(ch) && ch != '\'');
                                                // until we get past the end of the non-word
        str[i] = '\0';                          // add trailing nul character as usual
        ungetc(ch, stream);                     // the last character read is the first
                                                // one of the next token, so poke it back
        return otherToken;                      // return indication of non-word
      }
    }

    void main (int argc, char *argv[]) {
      FILE *infile, *outfile;
      char outName[256], str[2000];
      bool okay;
      int lineNo, colNo;

      openfiles(infile, outfile, argc, argv, "LST", outName);

    // read the Clang file token by token
      tokens nextToken = fgetnextstr(str, infile, lineNo, colNo);
      while (nextToken != noToken) {   // noToken denotes end of scanning
        switch (nextToken) {
          case wordToken:              // identifier or key word
            if (stricmp(str, "PROCEDURE") == 0 || stricmp(str, "FUNCTION") == 0 )
              fprintf(outfile, "%s ( %d , %d )\n", argv[1], lineNo, colNo);
            Update(str, okay, lineNo, colNo);
            if (!okay) {               // can only happen if the table overflows
              fprintf(stderr, "compiler error\n"); exit(EXIT_FAILURE);
            }
            break;
          case otherToken:             // no need to do anything in this case
            break;
          default:                     // should never happen, of course!
            fprintf(stderr, "compiler error\n"); exit(EXIT_FAILURE);
        }
        nextToken = fgetnextstr(str, infile, lineNo, colNo);
      }
      for (int i = RES; i < Size; i++) // display table of identifier locations
        fprintf(outfile, "%s ( %d , %d ) %s\n",
                argv[1], Table[i].line, Table[i].col, Table[i].name);
      closeoutput(outfile, outName);
      exit(EXIT_SUCCESS);
    }