PreviousUpNext

15.4.44  src/app/lex/lexgen.pkg

##  Lexical analyzer generator for Standard ML.
##      Version 1.7.0, June 1998

# Compiled by:
#     src/app/lex/mythryl-lex.lib

#  This software comes with ABSOLUTELY NO WARRANTY.
#  This software is subject only to the PRINCETON STANDARD ML SOFTWARE LIBRARY
#  COPYRIGHT NOTICE, LICENSE AND DISCLAIMER, (in the file "COPYRIGHT",
#  distributed with this software). You may copy and distribute this software;
#  see the COPYRIGHT NOTICE for details and restrictions.
#
#       Changes:
#           07/25/89 (drt): added %header declaration, code to place
#                   user declarations at same level as make_lexer, etc.
#                   This is needed for the parser generator.
#             /10/89 (appel): added %arg declaration (see lexgen.doc).
#             /04/90 (drt): fixed following bug: couldn't use the lexer after an
#                   error occurred -- NextTok and inquote weren't being reset
#           10/22/91 (drt): disabled use of lookahead
#           10/23/92 (drt): disabled use of $ operator (which involves lookahead),
#                   added handlers for dictionary lookup routine
#           11/02/92 (drt): changed handler for exception Reject in generated lexer
#                   to internal::Reject
#           02/01/94 (appel): Moved the exception handler for Reject in such
#                   a way as to allow tail-recursion (improves performance
#                   wonderfully!).
#           02/01/94 (appel): Fixed a bug in parsing of state names.
#           05/19/94 (Mikael Pettersson, mpe@ida.liu.se):
#                   Transition tables are usually represented as strings, but
#                   when the range is too large, int vectors constructed by
#                   code like "vector::Vector[1, 2, 3, ...]" are used instead.
#                   The problem with this isn't that the vector itself takes
#                   a lot of space, but that the code generated by Lib7 to
#                   construct the intermediate list at run-time is *HUGE*. My
#                   fix is to encode an int vector as a string literal (using
#                   two bytes per int) and emit code to decode the string to
#                   a vector at run-time. Lib7 compiles string literals into
#                   substrings in the code, so this uses much less space.
#           06/02/94 (jhr): Modified export-lex.pkg to conform to new installation
#                   scheme.  Also removed tab characters from string literals.
#           10/05/94 (jhr): Changed generator to produce code that uses the new
#                   basis style strings and characters.
#           10/06/94 (jhr) Modified code to compile under new basis style strings
#                   and characters.
#           02/08/95 (jhr) Modified to use new List module interface.
#           05/18/95 (jhr) changed vector::Vector to vector::from_list

#  Revision 1.9  1998/01/06 19:23:53  appel
#    added %posarg feature to permit position-within-file to be passed
#    as a parameter to make_lexer

# Revision 1.8  1998/01/06  19:01:48  appel
#   repaired error messages like "cannot have both %package and %header"
#
# Revision 1.7  1998/01/06  18:55:49  appel
#   permit %% to be unescaped within regular expressions
#
# Revision 1.6  1998/01/06  18:46:13  appel
#   removed undocumented feature that permitted extra %% at end of rules
#
# Revision 1.5  1998/01/06  18:29:23  appel
#   put yylineno variable inside make_lexer function
#
# Revision 1.4  1998/01/06  18:19:59  appel
#   Check for newline inside quoted string
#
# Revision 1.3  1997/10/04  03:52:13  dbm
#   Fix to remove output file if mythryl-lex fails.
#
#        10/17/02 (jhr) changed bad character error message to properly
#               print the bad character.
#        10/17/02 (jhr) fixed skipws to use char::is_space test.
#       07/27/05 (jhr) add \r as a recognized escape sequence.


#    Subject: lookahead in mythryl-lex
#    Reply-to: david.tarditi@CS.CMU.EDU
#    Date: Mon, 21 Oct 91 14:13:26 -0400

# There is a serious bug in the implementation of lookahead,
# as done in mythryl-lex, and described in Aho, Sethi, and Ullman,
# p. 134 "Implementing the Lookahead Operator"

# We have disallowed the use of lookahead for now because
# of this bug.

# As a counter-example to the implementation described in
# ASU, consider the following specification with the
# input string "aba" (this example is taken from
# a comp.compilers message from Dec. 1989, I think):

# Lex_Result=Void
# linenum = REF 1
# fun error x = file::write (file::stderr, x + "\n")
# eof = fn () => ()
# %%
# %package lex
# %%
# (a|ab)/ba => (print yytext; print "\n"; ());

# The ASU proposal works as follows. Suppose that we are
# using NFA's to represent our regular expressions.  Then to
# build an NFA for e1 / e2, we build an NFA n1 for e1 
# and an NFA n2 for e2, and add an epsilon transition
# from e1 to e2.

# When lexing, when we encounter the end state of e1e2,
# we take as the end of the string the position in
# the string that was the last occurrence of the state of
# the NFA having a transition on the epsilon introduced
# for /.

# Using the example we have above, we'll have an NFA
# with the following states:


#    1 -- a --> 2 -- b --> 3
#               |          |
#               | epsilon  | epsilon
#               |          |
#               |------------> 4 -- b --> 5 -- a --> 6

# On our example, we get the following list of transitions:

# a:      2, 4      (make an epsilon transition from 2 to 4)
# ab:     3, 4, 5   (make an epsilon transition from 3 to 4)
# aba:    6

# If we chose the last state in which we made an epsilon transition,
# we'll chose the transition from 3 to 4, and end up with "ab"
# as our token, when we should have "a" as our token.



###              "Men have become the tools of their tools."
###
###                            -- Henry David Thoreau



# Is there any reason to use this instead of standard library red-black trees?
# (Probably dates from era before standard library had them?)  XXX BUGGO FIXME

generic package red_black_g (  b:  api {    Key;
                                   > : (Key, Key) -> Bool;
                              }
                       )
: (weak)
api {  Tree;
     Key;
     empty:  Tree;
     insert:  (Key, Tree) -> Tree;
     lookup:  (Key, Tree) -> Key;
    exception NOT_FOUND  Key;
}

{
    include b;

    Color = RED | BLACK;

    Tree = EMPTY | TREE  (Key, Color, Tree, Tree);     empty = EMPTY;

    exception NOT_FOUND  Key;

    fun insert (key, t)
        =
        {   fun f EMPTY
                    =>
                    TREE (key, RED, EMPTY, EMPTY);

                f (TREE (k, BLACK, l, r))
                    =>
                    if (key > k)

                        case (f r)

                            r as TREE (rk, RED, rl as TREE (rlk, RED, rll, rlr), rr)
                                =>
                                case l
                                    TREE (lk, RED, ll, lr)
                                        =>
                                        TREE (k, RED, TREE (lk, BLACK, ll, lr),
                                                      TREE (rk, BLACK, rl, rr));

                                   _ => TREE (rlk, BLACK, TREE (k, RED, l, rll),
                                                          TREE (rk, RED, rlr, rr));
                                esac;

                            r as TREE (rk, RED, rl, rr as TREE (rrk, RED, rrl, rrr))
                                =>
                                case l
                                    TREE (lk, RED, ll, lr)
                                        =>
                                        TREE (k, RED, TREE (lk, BLACK, ll, lr),
                                                      TREE (rk, BLACK, rl, rr));

                                    _   => TREE (rk, BLACK, TREE (k, RED, l, rl), rr);
                                esac;

                            r => TREE (k, BLACK, l, r);
                        esac;

                    elif (k > key)

                        case (f l)

                            l as TREE (lk, RED, ll, lr as TREE (lrk, RED, lrl, lrr))
                                =>
                                case r
                                    TREE (rk, RED, rl, rr)
                                        =>
                                        TREE (k, RED, TREE (lk, BLACK, ll, lr),
                                                      TREE (rk, BLACK, rl, rr));

                                    _   =>
                                        TREE (lrk, BLACK, TREE (lk, RED, ll, lrl),
                                                          TREE (k, RED, lrr, r));
                                esac;

                            l as TREE (lk, RED, ll as TREE (llk, RED, lll, llr), lr)
                                =>
                                case r
                                    TREE (rk, RED, rl, rr)
                                        =>
                                        TREE (k, RED, TREE (lk, BLACK, ll, lr),
                                                   TREE (rk, BLACK, rl, rr));
                                   _    =>
                                        TREE (lk, BLACK, ll, TREE (k, RED, lr, r));
                                esac;

                            l => TREE (k, BLACK, l, r);
                        esac;
                    else
                        TREE (key, BLACK, l, r);
                    fi;

                f (TREE (k, RED, l, r))
                    =>
                    if   (key > k) TREE (k, RED, l, f r);
                    elif (k > key) TREE (k, RED, f l, r);
                    else           TREE (key, RED, l, r);
                    fi;
            end;

            case (f t)
                TREE (k, RED, l as TREE(_, RED, _, _), r) => TREE (k, BLACK, l, r);
                TREE (k, RED, l, r as TREE(_, RED, _, _)) => TREE (k, BLACK, l, r);
                t => t;
            esac;
        };


    fun lookup (key, t)
        =
        get t
        where
            fun get EMPTY
                    =>
                    raise exception (NOT_FOUND key);

                get (TREE (k, _, l, r))
                    =>
                    if   (k>key) get l;
                    elif (key>k) get r;
                    else         k;
                    fi;
            end;
        end;

};

api Lexgen {

    lex_fn: String -> Void;
};

package lex_fn: (weak) Lexgen  {

    include rw_vector;
    include list;

    infix val 9  sub ;

    Token = CHARS   Rw_Vector (Bool) | QMARK | STAR | PLUS | BAR
          | LP | RP | CARAT | DOLLAR | SLASH | STATE  List( String )
          | REPS  (Int, Int) | ID  String | ACTION  String
          | BOF | EOF | ASSIGN | SEMI | ARROW | LEXMARK | LEXSTATES 
          | COUNT | REJECT | FULLCHARSET | STRUCT | HEADER | ARG | POSARG
          ;

    Expression
        = EPS | ILK  (Rw_Vector( Bool ), Int) | CLOSURE  Expression
        | ALT  (Expression, Expression) | CAT  (Expression, Expression) | TRAIL  Int
        | END  Int
        ;

    # Flags describing input Lex spec.
    # - unnecessary code is omitted 
    # if possible 

    char_format           =  REF FALSE; 
    uses_trailing_context =  REF FALSE;
    uses_previous_newline =  REF FALSE;

    # Flags for various bells & whistles that Lex has.
    # These slow the lexer down and should be omitted
    # from production lexers (if you really want speed)

    count_newlines = REF FALSE;
    pos_arg        = REF FALSE;
    have_reject    = REF FALSE;

    #  Can increase size of character set 

    char_set_size = REF 129;

    #  Can name package or declare header code 

    package_name = REF "Mlex";
    header_code  = REF "";
    header_decl  = REF FALSE;
    arg_code     = REF (NULL: Null_Or( String ));

    package_declaration
        =
        REF FALSE;

    reset_flags
        =
        fn ()
            =
            {   count_newlines := FALSE;
                have_reject    := FALSE;
                pos_arg        := FALSE;

                uses_trailing_context := FALSE;

                char_set_size  := 129;
                package_name   := "Mlex";
                header_code    := "";
                header_decl    := FALSE;
                arg_code       := NULL; 
                package_declaration := FALSE;
            };

    lex_out = REF (file::stdout);

    fun say x
        =
        file::write(*lex_out, x);

    # Union: merge two sorted lists of integers 
    #
    fun union (a, b)
        =
        merge ( reverse a,
                reverse b,
                NIL
              )
        where
            recursive val merge
                =
                fn (NIL, NIL, z) => z;
                   (NIL, el ! more, z) => merge (NIL, more, el ! z);
                   (el ! more, NIL, z) => merge (more, NIL, el ! z);

                   (x ! morex, y ! morey, z)
                       =>
                       if   ((x: Int)==(y: Int))   merge (morex, morey, x ! z);
                       elif (x > y)                merge (morex, y ! morey, x ! z);
                       else                        merge (x ! morex, morey, y ! z);
                       fi;
                end;
        end;

    # Nullable: compute if a important expression
    # parse tree node is nullable 
    #
    recursive val nullable
        =
        fn
            EPS          =>  TRUE;
            ILK(_)       =>  FALSE;
            CLOSURE(_)   =>  TRUE;
            ALT (n1, n2) =>  nullable n1 or nullable n2;
            CAT (n1, n2) =>  nullable n1 and nullable n2;
            TRAIL (_)    =>  TRUE;
            END (_)      =>  FALSE;
        end 


    # FIRSTPOS: firstpos function for parse tree expressions 
    #
    also
    firstpos
        =
        fn
            EPS          =>  NIL;
            ILK(_, i)    =>  [i];
            CLOSURE (n)  =>  firstpos n;
            ALT (n1, n2) =>  union (firstpos n1, firstpos n2);
            CAT (n1, n2) =>  if (nullable n1 ) union (firstpos n1, firstpos n2);
                                              else firstpos n1; fi;
            TRAIL i    =>  [i];
            END i      =>  [i];
        end 


    # LASTPOS: Lastpos function for parse tree expressions 
    #
    also
    lastpos
        =
        fn  EPS          => NIL;
            ILK(_, i)    => [i];
            CLOSURE n  => lastpos n;
            ALT (n1, n2) => union (lastpos n1, lastpos n2);
            CAT (n1, n2) => if  (nullable n2  )  union (lastpos n1, lastpos n2);
                                               else  lastpos n2;                   fi;
            TRAIL i    => [i];
            END i      => [i];
        end 
            ;

    #  +++: Increment an integer reference 

    fun +++(x)  : Int
        =
        {   x := *x + 1;
            *x;
        };

    package dictionary {

            Relation(X)
                =
                (X, X) -> Bool;

            abstype Dictionary (Y, X)
                =
                DATA  { table:  List( (Y, X) ),
                        leq:  (Y, Y) -> Bool
                      }
            with
                exception LOOKUP;

                fun create leqfunc
                    =
                    DATA { table => NIL, leq => leqfunc };

                fun lookup (DATA { table => entrylist, leq } ) key
                    =
                    search entrylist
                    where
                        fun search []
                                =>
                                raise exception LOOKUP;

                            search((k, item) ! entries)
                                =>
                                if  (leq (key, k))

                                    if (leq (k, key))   item;
                                    else                raise exception LOOKUP;
                                    fi;
                                else
                                    search entries;
                                fi;
                        end;
                    end;

                 fun enter (DATA { table => entrylist, leq } )
                           (newentry as (key: Y, item: X)) :     Dictionary (Y, X)
                     =
                     {   gt =   fn a =  fn b  =  not (leq (a, b));
                         eq =   fn k =  fn k' =  (leq (k, k')) and (leq (k', k));

                         fun update NIL
                                 =>
                                 [ newentry ];

                             update ((entry as (k, _)) ! entries)
                                 =>
                                 if   (eq  key k  )  newentry ! entries;
                                 elif (gt  k key  )  newentry ! (entry ! entries);
                                 else                entry    ! (update entries);
                                 fi;
                         end;

                         DATA { table => update entrylist, leq };
                     };

                 fun listofdict (DATA { table => entrylist, leq } )
                     =
                     f (entrylist, NIL)
                     where
                         fun f   (NIL, r) =>  reverse r;
                             f (a ! b, r) =>  f (b, a ! r);
                         end;
                     end;
          end;
    };

    include dictionary; 

    #  INPUT.ML:  Input w/ one character push back capability 

    line_num =  REF 1;

    abstype Ibuf
        =
        BUF (
          file::Input_Stream,

          { b:  Ref( String ),
            p:  Ref( Int )
          }
        )


    with
        fun make_ibuf s
            =
            BUF (s, { b=>REF"", p => REF 0 } );

        fun close_ibuf (BUF (s, _))
            =
            file::close_input s;

        exception EOF_EXCEPTION;

        fun getch (a as (BUF (s,{ b, p } )))
            = 
            if (*p == size *b)

                b := file::read_n (s, 1024);
                p := 0;

                if  (size *b == 0)   raise exception EOF_EXCEPTION; 
                else                 getch a;
                fi;

            else
                ch =   string::get(*b, *p);

                if (ch == '\n')
                    line_num := *line_num + 1;
                fi;

                p := *p + 1;
                ch;
            fi;


        fun ungetch (BUF (s,{ b, p } ))
            =
            {   p := *p - 1;

                if (string::get(*b,*p) == '\n')
                    line_num := *line_num - 1;
                fi;
            };
    end;

    exception ERROR;

    fun pr_err x
        =
        {   file::write (
                file::stderr,
                string::cat [
                    "mythryl-lex: error, line ",
                    (int::to_string *line_num),
                    ": ",
                    x,
                    "\n"
                ]
            );

            raise exception ERROR;
        };

    fun pr_syn_err x
        =
        {   file::write (
                file::stderr,
                string::cat [
                    "mythryl-lex: syntax error, line ", # <-- Only line differing from above fn.
                    (int::to_string *line_num),
                    ": ",
                    x,
                    "\n"
                ]
            );

            raise exception ERROR;
        };

    exception SYNTAX_ERROR;             # Error in user's input file.

    exception LEX_ERROR;                # Unexpected error in lexer.

    lex_buf   =  REF (make_ibuf (file::stdin));
    lex_state =  REF 0;
    next_tok  =  REF BOF;
    inquote   =  REF FALSE;

    fun advance_tok () : Void
        =
        {   fun is_letter c
                =
                (c >= 'a'  and  c <= 'z') or
                (c >= 'A'  and  c <= 'Z');

            fun is_digit c
                =
                (c >= '0') and (c <= '9');

            #  Check for valid (non-leading) identifier character (added by John H Reppy) 

            fun is_ident_chr c
                =
                (   is_letter c
                or  is_digit  c
                or  c == '_'
                or  c == '\''
                );

            fun atoi s
                =
                num (explode s, 0)
                where
                    fun num (c ! r, n)
                            =>
                            if (is_digit c)   num (r, 10*n + (char::to_int c - char::to_int '0'));
                            else              n;
                            fi;

                        num ([], n)
                            =>
                            n;
                    end;
                end;

            fun skipws ()
                =
                {   ch = nextch();

                    if  (char::is_space ch)   skipws();
                    else                      ch;
                    fi;
                }

            also
            fun nextch ()
                =
                getch *lex_buf

            also
            fun escaped ()
                =
                case (nextch ())

                    'b' => '\008';
                    'n' => '\n';
                    'r' => '\r';
                    't' => '\t';
                    'h' => '\128';

                     x  =>
                        {   fun err t
                                =
                                pr_err("illegal ascii escape '" + (implode (reverse t)) + "'");

                            fun convert c
                                =
                                char::to_int c - char::to_int '0';

                            fun f (n, c, t)
                                =
                                if  (c == 3)
                                    if (n >= *char_set_size)   err t;
                                    else                       char::from_int n;
                                    fi;
                                else
                                    ch = nextch ();

                                    if  (is_digit ch)   f (n*10+(convert ch), c+1, ch ! t);
                                    else                err t;
                                    fi;
                                fi;

                            if (is_digit x)   f (convert x, 1, [x]);
                            else              x;
                            fi;
                       };
                 esac

            also
            fun onechar x
                =
                {   c = make_rw_vector (*char_set_size, FALSE);

                    set (c, char::to_int x, TRUE);

                    CHARS c;
                };

            case *lex_state

                0 =>
                    next_tok := make_tok ()
                    where
                        make_tok
                            =
                            fn ()
                                =
                                case (skipws ())

                                    #  Lex % operators 
                                    #
                                    '%' =>  case (nextch ())    
                                                '%' => LEXMARK;
                                                a   =>
                                                    {   fun f s
                                                            =
                                                            {   a = nextch();
                                                                if (is_letter a)
                                                                    f (a ! s);
                                                                else
                                                                    ungetch *lex_buf;
                                                                    implode (reverse s);
                                                                fi;
                                                            };

                                                        case (f [a])
                                                            "reject" => REJECT;
                                                            "count"  => COUNT;
                                                            "full"   => FULLCHARSET;
                                                            "s"      => LEXSTATES;
                                                            "S"      => LEXSTATES;
                                                            "package" => STRUCT;
                                                            "header" => HEADER;
                                                            "arg"    => ARG;
                                                            "posarg" => POSARG;
                                                            _ => pr_err "unknown % operator ";
                                                        esac;
                                                    };
                                            esac;

                                    # Semicolon (for end of LEXSTATES):
                                    #
                                    ';' => SEMI;

                                    # Anything else:
                                    #
                                    ch =>   if (is_letter ch)

                                                fun get_id matched
                                                    =
                                                    {   x = nextch();
                      /**** fix by John H Reppy
                                                        if is_letter x or is_digit x or
                                                           x == "_" or x == "'"
                      ****/
                                                        if (is_ident_chr  x)
                                                            get_id (x ! matched);
                                                        else
                                                            ungetch *lex_buf;
                                                            implode (reverse matched);
                                                        fi;
                                                    };
                                                ID (get_id [ch]);

                                            else
                                                pr_syn_err (string::cat [
                                                   "bad character: \"", char::to_string ch, "\""
                                                ]);
                                            fi;
                                esac;


                    end;

                1 =>
                    {   recursive val make_tok
                            =
                            fn ()
                                =
                                if *inquote

                                    case (nextch ())   

                                        # Inside quoted string 
                                        #
                                        '\\' => onechar (escaped());

                                        '"'  => {   inquote := FALSE;
                                                    make_tok();
                                                };

                                        '\n' => {   pr_syn_err "end-of-line inside quoted string";
                                                    inquote := FALSE;
                                                    make_tok();
                                                };

                                        x    => onechar x;
                                    esac;
                                else
                                    case (skipws ())

                                        # Single character operators:
                                        #
                                        '?' => QMARK;
                                        '*' => STAR;
                                        '+' => PLUS;
                                        '|' => BAR;
                                        '(' => LP;
                                        ')' => RP;
                                        '^' => CARAT;
                                        '$' => DOLLAR;
                                        '/' => SLASH;
                                        ';' => SEMI;

                                        '.' =>  {   c = make_rw_vector (*char_set_size, TRUE); 
                                                    set (c, 10, FALSE);
                                                    CHARS c;
                                                };

                                                # Assign and arrow 
                                        '=' =>  {   c = nextch(); 

                                                    if (c == '>')
                                                        ARROW;
                                                    else
                                                        ungetch *lex_buf;
                                                        ASSIGN;
                                                    fi;
                                                };

                                                # Character set:
                                        '[' =>  {   recursive val ilkch
                                                       =
                                                       fn () = {   x = skipws();
                                                                   if (x == '\\')  escaped ();
                                                                   else            x;
                                                                   fi;
                                                               };
                                                    first = ilkch();
                                                    flag = (first != '^');
                                                    c = make_rw_vector(*char_set_size, not flag);

                                                    fun add NULL    =>  ();
                                                        add (THE x) =>  set (c, char::to_int x, flag);
                                                    end 

                                                    also
                                                    fun range (x, y)
                                                        =
                                                        if (x > y)
                                                            pr_err "bad char. range";
                                                        else
                                                            i = REF (char::to_int x);
                                                            j = char::to_int y;

                                                            for (*i <= j) {
                                                                add (THE (char::from_int *i));
                                                                i := *i + 1;
                                                            };
                                                        fi

                                                    also
                                                    fun get_ilk last
                                                        =
                                                        case (ilkch ())

                                                            ']' =>  {   add last;
                                                                        c;
                                                                    };

                                                            '-' =>  case last

                                                                        NULL
                                                                            =>
                                                                            get_ilk (THE '-');

                                                                        THE last'
                                                                            =>
                                                                            {   x = ilkch ();

                                                                                if (x == ']')
                                                                                    add last;
                                                                                    add (THE '-'); c;
                                                                                else
                                                                                    range (last', x);
                                                                                    get_ilk NULL;
                                                                                fi;
                                                                            };
                                                                    esac;

                                                            x   =>  {   add last;
                                                                        get_ilk (THE x);
                                                                    };
                                                        esac;

                                                    CHARS (get_ilk (first == '^'  ??  NULL  :: THE first));
                                                };

                                        # Start States specification:
                                        # 
                                        '<' =>  {   recursive val get_state
                                                        =
                                                        fn (prev, matched)
                                                            =
                                                            case (nextch ())
                                                                '>' =>  matched ! prev;
                                                                ',' =>  get_state (matched ! prev, "");
                                                                 x  =>  if  (is_ident_chr  x)   get_state (prev, matched + string::from_char  x);
                                                                        else                    pr_syn_err "bad start state list";
                                                                        fi;
                                                            esac;

                                                    STATE (get_state (NIL, ""));
                                                };
                                                #  { id } or repetitions 

                                        '{' =>  {   ch = nextch();

                                                    if (is_letter ch)

                                                        fun get_id matched
                                                            =
                                                            case (nextch ())

                                                                '}' => matched;

                                                                 x => if (is_ident_chr x)

                                                                          get_id (matched + string::from_char x);
                                                                      else
                                                                          pr_err "invalid char. class name";
                                                                      fi;
                                                            esac;

                                                        ID (get_id (string::from_char ch));

                                                    elif (is_digit ch)

                                                        fun get_r (matched, r1)
                                                            =
                                                            case (nextch ())

                                                                '}' =>  {   n = atoi matched; 

                                                                            if (r1 == -1)  (n, n);
                                                                            else          (r1, n);
                                                                            fi;
                                                                        };

                                                                ',' =>  if (r1 == -1)    get_r("", atoi matched);
                                                                        else             pr_err "invalid repetitions spec.";
                                                                        fi;

                                                                x   =>  if (is_digit x)  get_r (matched + string::from_char x, r1);
                                                                        else             pr_err "invalid char in repetitions spec";
                                                                        fi;
                                                            esac;

                                                        REPS (get_r (string::from_char ch, -1));

                                                    else
                                                        pr_err "bad repetitions spec";
                                                    fi;
                                                };

                                                # Lex % operators: 
                                        '\\' => onechar (escaped());

                                                # Start quoted string: 
                                                #
                                        '"' =>  {   inquote := TRUE;
                                                    make_tok ();
                                                };

                                                # Anything else: 
                                                #
                                        ch  =>  onechar ch;
                                    esac;
                                fi;

                        next_tok := make_tok();
                    };

                2   =>  next_tok
                            :=
                            case (skipws ())

                                '(' =>
                                    {   fun loop_to_end (backslash, x)
                                            =
                                            {   c    = getch *lex_buf;
                                                notb = not backslash;
                                                nstr = c ! x;

                                                case c
                                                    '"' =>  if notb  nstr;
                                                            else     loop_to_end (FALSE, nstr);
                                                            fi;

                                                    _   =>  loop_to_end (c == '\\' and notb, nstr);
                                                esac;
                                            };

                                        fun get_act (lpct, x)
                                            =
                                            {   c    = getch *lex_buf;
                                                nstr = c ! x;

                                                case c
                                                     '"' => get_act (lpct, loop_to_end (FALSE, nstr));
                                                     '(' => get_act (lpct + 1, nstr);

                                                     ')' => if (lpct == 0 ) implode (reverse x);
                                                            else get_act (lpct - 1, nstr);
                                                            fi;

                                                     _   => get_act (lpct, nstr);
                                                esac;
                                            };

                                        ACTION (get_act (0, NIL));
                                    };

                                ';' => SEMI;

                                c   => (pr_syn_err ("invalid character " + string::from_char c));

                            esac;

                _   =>  raise exception LEX_ERROR;

            esac;
        }
        except
            EOF_EXCEPTION
                =
                next_tok := EOF;

    fun get_tok (_: Void) : Token
        = 
        {   t = *next_tok;
            advance_tok();
            t;
        };

    sym_tab
        =
        REF (create string::(<=)) : Ref( Dictionary( String, Expression ) );

    fun get_expression () : Expression
        =
        expression0 ()
        where
            recursive val optional
                =
                fn e =  ALT (EPS, e)

            also
            lookup'
                =
                fn name
                    =
                    lookup *sym_tab name 
                    except
                        LOOKUP
                            =
                            pr_err ("bad regular expression name: " + name)

            also
            newline
                =
                fn ()
                    =
                    {   c = make_rw_vector (*char_set_size, FALSE); 
                        set (c, 10, TRUE);
                        c;
                    }

            also
            endline
                =
                fn e =  trail (e, ILK (newline(), 0))

            also
            trail
                =
                fn (e1, e2)
                    =
                    CAT (CAT (e1, TRAIL 0), e2)

            also
            closure1
                =
                fn e
                    =
                    CAT (e, CLOSURE e)

            also
            repeat
                =
                fn (min, max, e)
                    =
                    rep (min, max)
                    where 
                        recursive val rep
                            =
                            fn (0, 0) =>  EPS;
                               (0, 1) =>  ALT (e, EPS);
                               (0, i) =>  CAT (rep (0, 1), rep (0, i - 1));
                               (i, j) =>  CAT (e, rep (i - 1, j - 1));
                            end;
                    end

            also
            expression0
                =
                fn ()
                    =
                    case (get_tok ())

                        CHARS c => expression1 (ILK (c, 0));

                        LP => {   e = expression0(); 

                                  if  (*next_tok == RP)
                                       advance_tok ();
                                       expression1 e;
                                  else
                                       pr_syn_err "missing '('";
                                  fi;
                              };

                        ID name => expression1 (lookup' name);

                        _ => raise exception SYNTAX_ERROR;
                    esac

            also
            expression1
                =
                fn e
                    =
                    case *next_tok

                        SEMI => e;
                        ARROW => e;
                        EOF => e;
                        LP => expression2 (e, expression0());
                        RP => e;

                        t   =>  {   advance_tok();

                                    case t
                                        QMARK   => expression1 (optional e);
                                        STAR    => expression1 (CLOSURE e);

                                        PLUS    => expression1 (closure1 e);
                                        CHARS c => expression2 (e, ILK (c, 0));

                                        BAR     => ALT (e, expression0());

                                        DOLLAR  =>  {   uses_trailing_context := TRUE;
                                                        endline e;
                                                    };

                                        SLASH   =>  {   uses_trailing_context := TRUE;
                                                        trail (e, expression0());
                                                    };

                                        REPS (i, j)
                                            =>
                                            expression1 (repeat (i, j, e));

                                        ID name
                                            =>
                                            expression2 (e, lookup' name);

                                        _ => raise exception SYNTAX_ERROR;
                                    esac;
                                };
                    esac

            also
            expression2
                =
                fn (e1, e2)
                    =
                    case *next_tok

                        SEMI  => CAT (e1, e2);
                        ARROW => CAT (e1, e2);
                        EOF   => CAT (e1, e2);
                        LP    => expression2 (CAT (e1, e2), expression0());
                        RP    => CAT (e1, e2);

                        t   =>  {   advance_tok();

                                    case t
                                        QMARK => expression1 (CAT (e1, optional e2));
                                        STAR  => expression1 (CAT (e1, CLOSURE e2));
                                        PLUS  => expression1 (CAT (e1, closure1 e2));

                                        CHARS c => expression2 (CAT (e1, e2), ILK (c, 0));
                                        BAR     => ALT (CAT (e1, e2), expression0());

                                        DOLLAR  =>  {   uses_trailing_context := TRUE;
                                                        endline (CAT (e1, e2));
                                                    };
                                        SLASH   =>  {   uses_trailing_context := TRUE;
                                                        trail (CAT (e1, e2), expression0());
                                                    };

                                        REPS (i, j)
                                            =>
                                            expression1 (CAT (e1, repeat (i, j, e2)));

                                        ID name
                                            =>
                                            expression2 (CAT (e1, e2), lookup' name);

                                        _   => raise exception SYNTAX_ERROR;
                                    esac;
                                };
                    esac;
        end;                                    # fun get_expression

    state_tab
        =
        REF (create (string::(<=))) : Ref( Dictionary( String, Int ) );

    state_num = REF 0;

    fun get_states () : List( Int )
        =
        {   fun add NIL sl
                    =>
                    sl;

                add (x ! y) sl
                    =>
                    add y (union ( [ lookup *state_tab x
                                                   except
                                                       LOOKUP = pr_err ("bad state name: " + x)
                                                 ],
                                                 sl));
            end;

            fun addall i sl
                = 
                if  (i <= *state_num)   addall (i+2) (union ([i], sl));
                else                    sl;
                fi;

            fun incall (x ! y) =>  (x+1) ! incall y;
                incall NIL     =>  NIL;
            end;

            fun addincs (x ! y) =>  x ! (x+1) ! addincs y;
                addincs NIL     =>  NIL;
            end;

            state_list
                =
                case *next_tok

                    STATE s =>  {   advance_tok(); 
                                    lex_state := 1;
                                    add s NIL;
                                };

                    _       =>  addall 1 NIL;
                esac;

            case *next_tok

                CARAT
                    =>
                    {   lex_state := 1;
                        advance_tok ();
                        uses_previous_newline := TRUE;
                        incall state_list;
                    };

                _   =>
                    addincs state_list;
            esac;
        };                      # fun get_states


    leaf_num = REF -1;


    fun renum (e:  Expression) : Expression
        =
        label e
        where
            recursive val label
                =
                fn EPS          =>  EPS;
                   ILK (x, _)   =>  ILK (x,+++leaf_num);
                   CLOSURE e  =>  CLOSURE (label e);
                   ALT (e1, e2) =>  ALT (label e1, label e2);
                   CAT (e1, e2) =>  CAT (label e1, label e2);
                   TRAIL i    =>  TRAIL(+++leaf_num);
                   END i      =>  END(+++leaf_num);
                end;
        end;

    exception PARSE_ERROR;


    fun parse () : ((String,  List( (List( Int ), Expression)),  Dictionary (String, String)))
        =
        {   accept
                =
                REF (create string::(<=)) : Ref( Dictionary( String, String ) );

            recursive val parse_rtns
                =
                fn l =  case (getch *lex_buf)

                            '%' => {   c = getch *lex_buf; 

                                       if (c == '%')   implode (reverse l);
                                       else            parse_rtns (c ! '%' ! l);
                                       fi;
                                   };

                            c   => parse_rtns (c ! l);
                        esac

            also
            parse_defs
                =
                fn ()
                    =
                    {   lex_state := 0;
                        advance_tok ();

                        case *next_tok

                            LEXMARK
                                =>
                                ();

                            LEXSTATES
                                =>
                                {   fun f ()
                                        =
                                        case *next_tok

                                            ID i
                                                =>
                                                {   state_tab := enter *state_tab (i, +++state_num);
                                                    +++state_num;
                                                    advance_tok ();
                                                    f ();
                                                };

                                            _ => ();
                                        esac;

                                    advance_tok();

                                    f ();

                                    if  (*next_tok == SEMI)   parse_defs ();
                                    else                      pr_syn_err "expected ';'";
                                    fi;
                                };

                            ID x
                                =>
                                {   lex_state := 1;

                                    advance_tok ();

                                    if (get_tok() == ASSIGN)

                                        sym_tab := enter *sym_tab (x, get_expression());

                                        if  (*next_tok == SEMI)   parse_defs();
                                        else                      pr_syn_err "expected ';'";
                                        fi;
                                    else
                                        raise exception SYNTAX_ERROR;
                                    fi;
                                };

                            REJECT      =>  {   have_reject    := TRUE;   parse_defs(); };
                            COUNT       =>  {   count_newlines := TRUE;   parse_defs(); };
                            FULLCHARSET =>  {   char_set_size  := 256;    parse_defs(); };

                            HEADER =>   {   lex_state := 2; advance_tok();

                                            case (get_tok ())

                                                ACTION s
                                                    => 
                                                    if  *package_declaration
                                                        (pr_err "cannot have both %package and %header \
                                                        \declarations");
                                                    elif *header_decl 
                                                        pr_err "duplicate %header declarations";
                                                    else 
                                                        header_code :=  s;
                                                        lex_state   :=  0;
                                                        header_decl :=  TRUE;
                                                        parse_defs();
                                                    fi;

                                                _ => raise exception SYNTAX_ERROR;
                                            esac;
                                        };

                            POSARG =>   {   pos_arg := TRUE;
                                            parse_defs ();
                                        };

                            ARG =>  {   lex_state := 2;
                                        advance_tok();

                                        case (get_tok ())

                                            ACTION s
                                                => 
                                                {   case *arg_code
                                                        THE _ =>  pr_err "duplicate %arg declarations";
                                                        NULL  =>  arg_code := THE s;
                                                    esac;

                                                    lex_state := 0;

                                                    parse_defs ();
                                                };

                                             _ => raise exception SYNTAX_ERROR;
                                         esac;
                                    };

                            STRUCT  =>  {   advance_tok();

                                            case *next_tok

                                                ID i =>
                                                    if *header_decl

                                                        pr_err "cannot have both %package and %header \
                                                               \declarations";
                                                    elif *package_declaration

                                                        pr_err "duplicate %package declarations";

                                                    else
                                                        package_name        := i;
                                                        package_declaration := TRUE; 
                                                    fi;

                                                  _  => (pr_err "expected ID");
                                            esac;

                                            parse_defs ();
                                        };

                            _ => raise exception SYNTAX_ERROR; 
                        esac;
                   }                    # fun parse_defs

            also
            parse_rules
                =
                fn rules
                    =
                    {   lex_state := 1;

                        advance_tok ();

                        case *next_tok

                            EOF => rules;

                            _   =>
                                {   s =  get_states();

                                    e =  renum (CAT (get_expression(), END 0));

                                    if  (*next_tok == ARROW)

                                        lex_state := 2;
                                        advance_tok ();

                                        case (get_tok ())

                                            ACTION act
                                                =>
                                                if (*next_tok == SEMI)
                                                    accept := enter *accept (int::to_string *leaf_num, act);
                                                    parse_rules((s, e) ! rules);
                                                else 
                                                    pr_syn_err "expected ';'";
                                                fi;

                                            _   =>
                                                raise exception SYNTAX_ERROR;
                                        esac;
                                    else
                                        pr_syn_err "expected '=>'";
                                    fi;
                                };
                        esac;
                    };

            usercode =  parse_rtns  NIL;
            parse_defs ();

            ( usercode,
              parse_rules NIL,
              *accept
            );
        }
        except
            SYNTAX_ERROR
                =
                pr_syn_err "";

    fun makebegin () : Void
        =
        {   fun make ((x, n: Int) ! y)
                    =>
                    {   say "my ";
                        say x;
                        say " = " ;
                        say "STARTSTATE ";
                        say (int::to_string n);
                        say ";\n";
                        make y;
                    };

                make NIL
                    =>
                    ();
            end;

            say "\n#  start state definitions \n\n";

            make (listofdict *state_tab);
        };

    package l
        = 
        package {

            nonfix val  > ;

            Key =  (List (Int), String);

            fun > ((key, item: String), (key', item'))
                =
                f key key'
                where
                    fun f ((a: Int) ! a') (b ! b')
                            =>
                            if   (int::(>) (a, b))  TRUE;
                            elif (a == b)           f a' b';
                            else                    FALSE;
                            fi;
                        f _ _
                            =>
                            FALSE;
                    end;
                end;
        };

    package rb
        =
        red_black_g( l );

    fun maketable (fins: List( (Int, (List( Int )))),
                 tcs:  List ((Int, (List( Int )))),
                 tcpairs:  List ((Int, Int)),
                 trans:   List ((Int,(List( Int ))))) : Void
        =
        {   # Fins = List (state #, list of final leaves for the state)
            #  tcs = List (state #, list of trailing context leaves which begin in this state)
            #    
            #   tcpairs = List (trailing context leaf, end leaf)
            #   trans   = List (state #, list of transitions for state)

            Element = NN  Int | TT  Int | DD  Int;

            count = REF 0;

            char_format :=   length trans < 256;

            if *uses_trailing_context   say "\nYyfinstate = NN Int | TT Int | DD Int;\n";
            else                        say "\nYyfinstate = NN Int;";
            fi;

            say "\nStatedata = { fin:  List( Yyfinstate ), trans: ";

            case *char_format
                TRUE  =>  say "String };";
                FALSE =>  say "vector::Vector( Int ) };";
            esac;

            say "\n\
                 \#  transition & final state table \n\
                 \tab = {\n";

            case *char_format

                TRUE => ();

                FALSE =>
                    {   say "fun decode s k =\n";
                        say "  {   k' = k + k;\n";
                        say "      hi = char::to_int (string::get (s, k'));\n";
                        say "      lo = char::to_int (string::get (s, k' + 1));\n";
                        say "\n";
                        say "      hi * 256 + lo;\n";
                        say "  };\n";
                    };
            esac;

            newfins
                =
                {   fun is_end_leaf t
                        =
                        f tcpairs
                        where 
                            fun f ((l, e) ! r) =>  if (e==t)   TRUE;
                                                   else        f r;
                                                   fi;

                                f NIL          =>  FALSE;
                            end;
                        end;

                    fun get_end_leaf t
                        =
                        f tcpairs
                        where
                            fun f ((tl, el) ! r)
                                    =>
                                    tl == t   ??   el
                                              ::   f r;

                                f _ =>
                                    raise exception MATCH;
                            end;
                        end;

                    fun get_tr_con_leaves s
                        =
                        f tcs
                        where
                            fun f ((s', l) ! r)
                                    =>
                                    s == s'   ??   l
                                              ::   f r;

                                f NIL => NIL;
                            end;
                        end;

                    fun sort_leaves s
                        =
                        {   fun insert (x: Int) (a ! b)
                                    =>
                                    if (x <= a)  x ! (a ! b);
                                    else         a ! (insert x b);
                                    fi;

                                insert x NIL
                                    =>
                                    [x];
                            end;

                            list::fold_right
                                (fn (x, r) = insert x r)
                                [] s;
                        };

                    fun conv a
                        =
                        is_end_leaf a   ??   DD a
                                        ::   NN a;

                    fun merge (a ! a', b ! b')
                            =>
                            if   (a <= b)   (conv a) !  merge (a', b ! b');
                            else            (TT   b) !  merge (a ! a', b');
                            fi;

                        merge (a ! a', NIL) => (conv a) ! (merge (a', NIL));
                        merge (NIL, b ! b') => (TT b) ! (merge (b', NIL));
                        merge (NIL, NIL) => NIL;
                    end;

                    map
                        (fn (x, l)
                            =
                            reverse (
                                merge (
                                    l,
                                    sort_leaves (
                                        map
                                        (fn x =  get_end_leaf x)
                                        (get_tr_con_leaves x)
                                    )
                                )
                            )
                        )
                        fins;
                };

            rs  =   result
                    where 

                        include rb;

                        fun make_items x
                            =
                            {   fun emit8 (x, pos)
                                    =
                                    {   s =   number_string::pad_left '0' 3 (int::to_string x);

                                        case pos
                                            16  => { say "\\\n\\\\";   say s;      1; };
                                            _   => { say "\\";         say s;  pos+1; };
                                        esac;
                                    };

                                fun emit16 (x, pos)
                                    =
                                    {   hi8 = x / 256;
                                        lo8 = x - hi8 * 256;    #  x rem 256 

                                        emit8 (lo8, emit8 (hi8, pos));
                                    };


                                fun make_string ([], _, _)
                                        =>
                                        ();

                                    make_string (x ! xs, emitter, pos)
                                        =>
                                        make_string (xs, emitter, emitter (x, pos));
                                end;

                                case *char_format

                                    TRUE    =>  {   say " \n\"";
                                                    make_string (x, emit8, 0);
                                                    say "\"\n";
                                                };

                                    FALSE   =>  {   say (int::to_string (length x));
                                                    say ", \n\"";
                                                    make_string (x, emit16, 0);
                                                    say "\"\n";
                                                };
                                esac;
                            };


                        fun make_entry (NIL, rs, t)
                                =>
                                reverse rs;

                            make_entry(((l: Int, x) ! y), rs, t)
                                =>
                                {   name = (int::to_string l);

                                    {   my (r, n)
                                            =
                                            lookup ((x, name), t);

                                        make_entry (y, (n ! rs), t);
                                    }
                                    except
                                        NOT_FOUND _
                                            =
                                            {   count := *count+1;
                                                say " (";
                                                say name;
                                                say ", ";
                                                make_items x;
                                                say "),\n";
                                                make_entry (y, (name ! rs), (insert ((x, name), t)));
                                            };
                                };
                        end;

                        say "    s = [ \n"; 

                        result =  make_entry (trans, NIL, empty);

                        case *char_format 

                             TRUE
                                 =>
                                 {   say "    (0, \"\")];\n";
                                     say "    fun f x = x;\n";
                                 };

                             FALSE
                                 =>
                                 {   say "    (0, 0, \"\")];\n";
                                     say "    fun f (n, i, x) = (n, vector::tabulate (i, decode x));\n";
                                 };
                        esac;

                        say "    s = map f (reverse (tail (reverse s)));\n";
                        say "    exception LEX_HACKING_ERROR;\n";
                        say "    fun get ((j, x) ! r, i: Int)\n";
                        say "            =>\n";
                        say "            if (i == j)  x;   else get (r, i); fi;\n\n";
                        say "        get ([], i)\n";
                        say "            =>\n";
                        say "            raise exception LEX_HACKING_ERROR;\n";
                        say "    end;\n";

                        say "fun g {   fin => x,   trans => i   }\n";
                        say "    =\n";
                        say "    {   fin => x,   trans => get (s, i)   };\n";
                    end;

            fun make_table args
                =
                maketable args
                where  

                    fun make_one (a, b)
                        =
                        {   fun item (NN i) => ("NN", i);
                                item (TT i) => ("TT", i);
                                item (DD i) => ("DD", i);
                            end;

                            fun make_item x
                                =
                                {   my (t, n)
                                        =
                                        item x;

                                    apply say ["(", t, " ", int::to_string n, ")"];
                                };

                            fun make_items []  =>  ();
                                make_items [x] =>  make_item x;

                                make_items (hd ! tl)
                                    =>
                                    {   make_item hd;
                                        say ", ";
                                        make_items tl;
                                    };
                            end;

                            say "{ fin => [";
                            make_items b;
                            apply say ["], trans => ", a, "}"];
                        };

                    fun maketable ([], []) => ();
                        maketable ([a], [b]) => make_one (a, b);

                        maketable (a ! a', b ! b')
                             =>
                             {   make_one (a, b);
                                 say ",\n";
                                 maketable (a', b');
                             };

                        maketable _ => raise exception MATCH;
                    end;
                end;


        #       fun make_table (NIL, NIL) => ();
        #          make_table (a ! a', b ! b') =>
        #            {   funx make_items NIL = ()
        #                  | make_items (hd ! tl) =
        #                    { my (t, n) =
        #                        case hd of
        #                          (NN i) => ("(NN ", i)
        #                        | (TT i) => ("(TT ", i)
        #                        | (DD i) => ("(DD ", i);
        #                        say t; say (int::to_string n); say ")";
        #                        if (null tl)
        #                             ();
        #                        else (say ", "; make_items tl); fi;
        #                    };
        #                 say "{ fin = ["; make_items b;
        #                 say "], trans = "; say a; say "}";
        #                 if (null a')
        #                    ();
        #                 else (say ",\n"; make_table (a', b')); fi;
        #             };
        #        end;


            fun msg x
                =
                file::say .{ x; };

            say " vector::from_list (map g \n[";
            make_table (rs, newfins); 
            say "]);\n};\n";

            msg ("\nNumber of states = " + (int::to_string (length trans)));
            msg ("\nNumber of distinct rows = " + (int::to_string *count));

            msg ("\nApproximate memory size of translation table = " +
                 (int::to_string (*count * *char_set_size * (*char_format ?? 1 :: 8))));

            msg " bytes\n\n";
        };

    #   makeaccept: Takes a (String, String) dictionary, prints case statement for
    #   accepting leaf actions.  The key strings are the leaf #'s, the data strings
    #   are the actions

    fun makeaccept ends
        =
        make (listofdict ends, TRUE)
        where 

            fun startline f
                =
                say "  ";

            fun make (NIL, f)
                => 
                {   startline f;
                    say "_ => raise exception internal::LEXER_ERROR;\n";
                };

                make ((x, a) ! y, f)
                    =>
                    {   startline f;
                        say x;
                        say " => ";

                        if  (substring::size(#2 (substring::position "yytext" (substring::from_string a)))  ==  0)

                            say "{ ";
                            say a;
                            say "; };";
                        else
                            say "{   yytext=yymktext();\n";
                            say a;
                            say "; };";
                        fi;

                        say "\n";

                        make (y, FALSE);
                    };
            end;
        end;

    fun leafdata (e: List( (List( Int ), Expression)))
        =
        {   fp   =  make_rw_vector (*leaf_num + 1, NIL);
            leaf =  make_rw_vector (*leaf_num + 1, EPS);

            tcpairs   = REF NIL;
            trailmark = REF -1;

            recursive val add
                =
                fn (NIL,     x) => ();
                   (hd ! tl, x) => {   set (fp, hd, union (fp[ hd ], x));
                                       add (tl, x);
                                   };
                end 

            also
            moredata
                =
                fn  CLOSURE e1 =>
                            { moredata e1;    add (lastpos e1, firstpos e1); };

                    ALT (e1, e2) => { moredata e1;
                                      moredata e2;
                                    };

                    CAT (e1, e2) => { moredata e1;
                                      moredata e2;
                                      add (lastpos e1, firstpos e2);
                                    };

                    ILK (x, i) => set (leaf, i, ILK (x, i));

                    TRAIL i => { set (leaf, i, TRAIL i);

                                   if (*trailmark == -1)
                                        trailmark :=  i;
                                   fi;
                                 };

                    END i => {  set (leaf, i, END i);

                                  if (*trailmark != -1)
                                       trailmark := -1;
                                       tcpairs   :=  (*trailmark, i) ! *tcpairs;
                                  fi;
                               };
                    _ => ();
                end 

            also
            makedata
                =
                fn
                    NIL => ();

                    (_, x) ! tl
                        =>
                        {   moredata x;
                            makedata tl;
                        };
                end;

            trailmark := -1;
            makedata e;

            (fp, leaf, *tcpairs);
        };

    fun makedfa rules
        =
        {   visitstarts( startstates() );

            ( listofdict *fintab,
              listofdict *transtab,
              listofdict *tctab,
              tcpairs
            );
        }
        where

            state_tab = REF (create (string::(<=))):   Ref( Dictionary (String, Int        ));
            fintab    = REF (create    (int::(<=))):   Ref( Dictionary (Int,   (List( Int))));
            transtab  = REF (create    (int::(<=))):   Ref( Dictionary (Int,    List( Int)) );
            tctab     = REF (create    (int::(<=))):   Ref( Dictionary (Int,   (List( Int))));

            my (fp, leaf, tcpairs)
                =
                leafdata rules;

            fun visit (state, statenum)
                =
                {  transitions = gettrans state; 

                   fintab   := enter *fintab   (statenum, getfin state);
                   tctab    := enter *tctab    (statenum, gettc state);
                   transtab := enter *transtab (statenum, transitions);
                }

            also
            fun visitstarts states
                =
                vs states 0
                where
                    fun vs NIL i => ();
                        vs (hd ! tl) i => { visit (hd, i);   vs tl (i+1); };
                    end;
                end

            also
            fun hashstate (s: List( Int ))
                =
                hs (s, "")
                where
                    recursive val hs
                        =
                        fn ((x: Int) ! y, z)
                               =>
                               hs (y, z + " " + (int::to_string x));

                           (NIL, z)
                               =>
                               z;
                        end;
                end

            also
            fun find s
                =
                lookup *state_tab (hashstate s)

            also
            fun add (s, n)
                =
                state_tab := enter *state_tab (hashstate s, n)

            also
            fun getstate state
                =
                find state
                except
                    LOOKUP
                        =
                        {   n = +++state_num; 
                            add (state, n);
                            visit (state, n);
                            n;
                        }

            also
            fun getfin state
                =
                f state NIL
                where
                    fun f (hd ! tl) fins
                            =>
                            case (leaf[ hd ])
                                END _ => f tl (hd ! fins);
                                _     => f tl fins;
                            esac;

                        f NIL fins
                            =>
                            fins;
                    end;
                end

            also
            fun gettc state
                =
                f state NIL
                where
                    fun f (hd ! tl) fins
                            =>
                            case (leaf[ hd ])
                                TRAIL _ =>  f tl (hd ! fins);
                                _       =>  f tl fins;
                            esac;

                        f NIL fins
                            =>
                            fins;
                    end;
                end

            also
            fun gettrans state
                =
                loop (*char_set_size - 1) NIL
                where
                    fun loop c tlist
                        =
                        {   fun cktrans NIL r
                                    =>
                                    r;

                                cktrans (hd ! tl) r
                                    =>
                                    case (leaf[ hd ])

                                        ILK (i, _)
                                            =>
                                            if (i[ c ])
                                                cktrans tl (union (r, fp[ hd ]));
                                            else
                                                cktrans tl r
                                                except
                                                    (SUBSCRIPT | INDEX_OUT_OF_BOUNDS)
                                                        =
                                                        cktrans tl r;
                                            fi;

                                        _ => cktrans tl r;
                                    esac;
                            end;

                            if (c >= 0)
                                v=cktrans state NIL;
                                loop (c - 1) if (v==NIL ) 0 ! tlist; else (getstate v) ! tlist; fi;
                            else
                                tlist;
                           fi;
                        };
                end

            also
            fun startstates ()
                =
                {   makess rules;
                    listofarray (startarray, *state_num + 1);
                }
                where
                    startarray
                        =
                        make_rw_vector (*state_num + 1, NIL);

                    fun listofarray (a, n)
                        =
                        f (n - 1) NIL
                        where
                            fun f i l
                                =
                                i >= 0
                                 ?? f (i - 1) (a[i] ! l)
                                 ::                   l;
                        end;

                    recursive val makess
                        =
                        fn
                           NIL => ();

                           (startlist, e) ! tl
                               =>
                               {   fix (startlist, firstpos e);
                                   makess tl;
                               };
                        end 

                    also
                    fix = fn
                            (NIL, _) => ();

                            (s ! tl, firsts)
                                =>
                                {   set (startarray,
                                         s,
                                         union (firsts, startarray[ s ])
                                    );

                                    fix (tl, firsts);
                                };
                          end ;
                end;


        end;                            # fun makedfa

    skel_hd
        = 
        "   \n\
        \    package user_declarations {\n\
        \      \n\
        \";

    skel_mid2
        =
        "                       | internal::DD k => action (i, (acts ! l), k ! rs)\n\
        \                       | internal::TT k =>\n\
        \                         {   fun f (a ! b, r)\n\
        \                                     =>\n\
        \                                     if (a == k)\n\
        \                                         action (i, (((internal::NN a) ! acts) ! l), (b@r));\n\
        \                                     else\n\
        \                                         f (b, a ! r);\n\
        \                                     fi;\n\
        \                                        \n\
        \                                 f (NIL, r)\n\
        \                                     =>\n\
        \                                     action (i, (acts ! l), rs);\n\
        \                             end;\n\
        \                             \n\
        \                             f (rs, NIL);\n\
        \                          }\n\
        \";


    fun lex_fn  infile
        =
        {   outfile = infile + ".pkg";

            fun print_lexer ends
                =
                {   sayln
                        =
                        fn x = { say x;   say "\n"; };

                    case *arg_code 

                        NULL  => {   sayln "fun lex () : internal::Result =";
                                     sayln "{ fun continue () = lex(); ";
                                 };

                        THE s => {   say "fun lex ";
                                     say "(yyarg as (";
                                     say s;
                                     sayln ")) =";
                                     sayln " { fun continue () : internal::Result = ";
                                  };
                    esac;

                    say "  { fun scan (s, accepting_leaves:  List( List( internal::Yyfinstate";
                    sayln " ) ), l, i0) =";

                    if *uses_trailing_context   say "\t { fun action (i, NIL, rs)";
                    else                        say "\t { fun action (i, NIL)";
                    fi;

                    sayln " => raise exception LEX_ERROR;";

                    if *uses_trailing_context   sayln "\t action (i, NIL ! l, rs) => action (i - 1, l, rs);";
                    else                        sayln "\t action (i, NIL ! l)     => action (i - 1, l);";
                    fi;

                    if *uses_trailing_context   sayln "\t action (i, (node ! acts) ! l, rs) => ";
                    else                        sayln "\t action (i, (node ! acts) ! l) => ";
                    fi;

                    sayln "\t\t case node";
                    sayln "\t\t ";
                    sayln "\t\t    internal::NN yyk => ";
                    sayln "\t\t\t ( { fun yymktext () = substring(*yyb, i0, i-i0);\n\
                           \\t\t\t     yypos = i0 + *yygone;";

                    if *count_newlines 
                        sayln "\t\t\t yylineno := vector_slice_of_chars::keyed_fold_left";
                        sayln "\t\t\t\t (fn (_, '\\n', n) => n+1; (_, _, n) => n; end) *yylineno (vector_slice_of_chars::slice (*yyb, i0, THE (i-i0)));";
                    fi;

                    if *have_reject

                        say "\t\t\t fun REJECT() = action (i, acts ! l";

                        if *uses_trailing_context    sayln ", rs);";
                        else                         sayln     ");";
                        fi;
                    fi;  

                    sayln "\t\t\t include user_declarations;";
                    sayln "\t\t\t include internal::start_states;";
                    sayln "  {   yybufpos := i;";
                    sayln "      case yyk";
                    sayln " ";

                    sayln "";
                    sayln "\t\t\t#  Application actions \n";
                    makeaccept ends;
                    say "\n\t\t esac; }; } ";
                    say "); esac; end;    # fun action\n\n";

                    if *uses_trailing_context
                         say skel_mid2;
                    fi;

                    sayln "\t my { fin, trans } = unsafe::vector::get (internal::tab, s);";
                    sayln "\t new_accepting_leaves = fin ! accepting_leaves;";
                    sayln "\t if (l == *yybl)";
                    sayln "\t     if (trans == .trans (vector::get (internal::tab, 0)))";
                    say   "\t       action (l, new_accepting_leaves";

                    if *uses_trailing_context
                        say ", NIL";
                    fi;

                    say ");\n\t else";

                    sayln "\t     newchars= if *yydone \"\"; else yyinput 1024; fi;";
                    sayln "\t     if ((size newchars) == 0)";
                    sayln "\t\t        yydone := TRUE;";
                    say   "\t\t        if (l == i0)  user_declarations::eof ";

                    sayln
                        case *arg_code
                            NULL  => "();";
                            THE _ => "yyarg;";
                        esac;

                    say   "\t\t                  else action (l, new_accepting_leaves";

                    if *uses_trailing_context   sayln ", NIL); fi;";
                    else                        sayln      "); fi;";
                    fi;

                    sayln "\t\t  else if (l == i0)  yyb := newchars;";
                    sayln "\t\t\t     else yyb := substring(*yyb, i0, l-i0) + newchars; fi;";
                    sayln "\t\t       yygone := *yygone+i0;";
                    sayln "\t\t       yybl := size *yyb;";
                    sayln "\t\t       scan (s, accepting_leaves, l-i0, 0);";

                    sayln "\t     fi;   # (size newchars) == 0";
                    sayln "\t     fi;   # trans == $trans ...";

                    sayln "\t  else new_char = char::to_int (unsafe::vector_of_chars::get(*yyb, l));";

                    if (*char_set_size == 129)
                        sayln "\t\t new_char = if (new_char < 128) new_char; else 128; fi;"; 
                    fi;

                    say "\t\t new_state = ";

                    sayln (   if   *char_format 
                                   "char::to_int (unsafe::vector_of_chars::get (trans, new_char));";
                              else
                                   "unsafe::vector::get (trans, new_char);";
                              fi
                          );

                    say "\t\t if (new_state == 0) action (l, new_accepting_leaves";

                    if *uses_trailing_context   sayln ", NIL);";
                    else                        sayln      ");";
                    fi;

                    sayln "\t\t else scan (new_state, new_accepting_leaves, l+1, i0); fi;";
                    sayln "\t fi;";
                    sayln "  };    # fun scan";

                    if (not *uses_previous_newline)
                        sayln "/*";
                    fi;

                    say   "\t start= if (substring(*yyb,*yybufpos - 1, 1)==\"\\n\")";
                    sayln " *yybegin_i+1; else *yybegin_i; fi;";

                    if (not *uses_previous_newline)
                        sayln "*/";
                    fi;

                    say "\t scan(";

                    if *uses_previous_newline   say "start"; 
                    else                        say "*yybegin_i /* start */ ";
                    fi;

                    sayln ", NIL, *yybufpos, *yybufpos);   # fun continue";
                    sayln "    };   # fun continue";

                    sayln
                        case *arg_code
                            NULL  =>           " };    # fun lex";
                            THE _ => " continue; };    # fun lex";
                        esac;


                    sayln "  lex; ";
                    sayln "  };   # fun make_lexer";
                    sayln "};";
                };                                      # fun print_lexer


            uses_previous_newline := FALSE;
            reset_flags();

            lex_buf   := make_ibuf (file::open_for_read infile);
            next_tok  := BOF;
            inquote   := FALSE;

            lex_out   := file::open_for_write  outfile;
            state_num := 2;
            line_num  := 1;

            state_tab := enter (create (string::(<=)))("initial", 1);
            leaf_num  := -1;

            my  (user_code, rules, ends)
                =
                parse()
                except
                    x =  {   close_ibuf *lex_buf;
                             file::close_output *lex_out;
                             winix::file::remove_file  outfile;
                             raise exception x;
                         };

            my (fins, trans, tctab, tcpairs)
                =
                makedfa rules;

            if *uses_trailing_context
                close_ibuf *lex_buf;
                file::close_output *lex_out;
                winix::file::remove_file  outfile;
                pr_err "lookahead is unimplemented";
            fi;

            if *header_decl     say *header_code;
            else                    say ("package " + *package_name);
            fi;

            say "{\n";
            say skel_hd;
            say user_code;
            say "}; #  end of user routines \n";
            say "exception LEX_ERROR; # Raised if illegal leaf action tried.\n";
            say "package internal {\n\t \n";

            maketable (fins, tctab, tcpairs, trans);

            say "package start_states {\n\t \n";
            say "\t Yystartstate = STARTSTATE Int;\n";

            makebegin();

            say "\n };\n";
            say "Result = user_declarations::Lex_Result;\n";
            say "\t exception LEXER_ERROR; # Raised if illegal leaf action tried */\n";
            say "};\n\n";

            say     if *pos_arg   "fun make_lexer (yyinput, yygone0: Int) =\n { \n";
                    else          "fun make_lexer yyinput =\n{\t my yygone0=1;\n";
                    fi;


            if *count_newlines
                say "\t my yylineno = REF 0;\n\n";
            fi;

            say "\t yyb = REF \"\\n\"; \t\t#  Buffer \n\
                 \\t yybl = REF 1;\t\t# Buffer length \n\
                 \\t yybufpos = REF 1;\t\t#  location of next character to use \n\
                 \\t yygone = REF yygone0;\t#  position in file of beginning of buffer \n\
                 \\t yydone = REF FALSE;\t\t#  eof found yet? \n\
                 \\t yybegin_i = REF 1;\t\t# Current 'start state' for lexer \n\
                 \\n\t yybegin = fn (internal::start_states::STARTSTATE x) =\n\
                 \\t\t yybegin_i := x;\n\n";

            print_lexer ends;

            close_ibuf *lex_buf;

            file::close_output *lex_out;
        };                                      # fun lex_fn
};








Comments and suggestions to: bugs@mythryl.org

PreviousUpNext