PreviousUpNext

15.4.34  src/app/future-lex/src/backends/sml/sml-fun-output.pkg

## sml-fun-output.pkg

# Compiled by:
#     src/app/future-lex/src/lexgen.lib



# Code generation for SML, using control-flow

###                          "Chaos theory is not nearly as exciting as it sounds. How could it be?"
###
###                                                           -- Stephen Kellert


stipulate
    package lms =  list_mergesort;                              # list_mergesort        is from   src/lib/src/list-mergesort.pkg
    package lo  =  lex_output_spec;                             # lex_output_spec       is from   src/app/future-lex/src/backends/lex-output-spec.pkg
    package re  =  regular_expression;                          # regular_expression    is from   src/app/future-lex/src/regular-expression.pkg
    package sis =  regular_expression::symbol_set;
    package sym =  re::sym;
herein

    package smlfun_output

    : (weak) Output                                             # Output                is from   src/app/future-lex/src/backends/output.api

    {


        Ml_Exp == ml::Ml_Exp;
        Ml_Pat == ml::Ml_Pat;

        inp = "inp";
        inp_variable = ML_VAR inp;

        fun id_of (lo::STATE { id, ... } )
            =
            id;

        fun name_of' i =  "yyQ" + (int::to_string i);
        fun name_of  s =  name_of' (id_of s);
        fun act_name i =  "yyAction" + (int::to_string i);

        # Simple heuristic to avoid computing unused values 
        stipulate 
            has = string::is_substring;
        herein
            hasyytext   = has "yytext";
            has_reject   = has "REJECT";
            hasyylineno = has "yylineno";
        end;

        # Map over the intervals of a symbol set 
        fun map_int f syms
            =
            sis::foldl_int
                (fn (i, ls) =  (f i) ! ls)
                []
                syms;

        # Transition interval representation:
        # 
        Transition_Interval
            =
            TI  (sis::Interval, Int, Ml_Exp);

        fun interval_of (TI (i, t, e)) = i;
        fun tag_of      (TI (i, t, e)) = t;
        fun action_of   (TI (i, t, e)) = e;
        fun same_tag    (TI (_, t1, _), TI (_, t2, _))   =   t1 == t2;
        fun singleton  (TI ((i, j), _, _))              =   i  == j;

        # Generate code for transitions: generate a hard-coded binary
        # search on accepting characters

        fun mk_trans ([ ], _) =>  raise exception FAIL "(BUG) SMLFunOutput: alphabet not covered";
            mk_trans ([t], _) =>  action_of t;

            mk_trans ([t1, t2], _)
                => 
                if (same_tag (t1, t2) )

                     action_of t1;
                else
                     my (_, t1end)   =  interval_of t1;
                     my (t2start, _) =  interval_of t2;

                     if (singleton t1)

                       ML_IF (ML_CMP (ml::EQ, inp_variable, ML_SYM t1end),
                              action_of t1,
                              action_of t2);

                     elif (singleton t2)

                       ML_IF (ML_CMP (ml::EQ, inp_variable, ML_SYM t2start),
                              action_of t2,
                              action_of t1);
                     else

                       ML_IF (ML_CMP (ml::LEQ, inp_variable, ML_SYM t1end),
                              action_of t1,
                              action_of t2);
                     fi;
                fi;

           mk_trans (ts, len)
               =>
               {   lh = len / 2;

                   fun split (    ls,  0,  l1) =>  (list::reverse l1, ls);
                       split (l ! ls, count, l1) =>  split (ls, count - 1, l ! l1);
                       split _                 =>  raise exception FAIL "(BUG) SMLFunOutput: split failed";
                   end;

                   my (ts1, ts2)
                       =
                       split (ts, lh, []);

                   my (ts2start, ts2end)
                       =
                       interval_of (list::head ts2);

                   my (ts2', ts2len)
                       =
                       ts2start == ts2end
                        ??   (list::tail ts2, len - lh - 1)
                        ::   (           ts2, len - lh    );

                   # we want to take advantage of the special case when 
                   # len = 3 and hd ts2 is a singleton.  this case often
                   # occurs when we have an arrow for a single character.
                   #    
                   else_clause
                       = 
                       if (lh == 1 and ts2len == 1)

                            mk_trans ([list::head ts1, list::head ts2'], 2);
                       else
                            ML_IF (ML_CMP (ml::LT, inp_variable, ML_SYM ts2start),
                                   mk_trans (ts1, lh),
                                   mk_trans (ts2', ts2len));
                       fi;

                   ML_IF (ML_CMP (ml::EQ, inp_variable, ML_SYM ts2start),
                          action_of (list::head ts2),
                          else_clause);
              };
        end;

        fun mk_state action_vec (s, k)
            =
            {   s ->   lo::STATE { id, start_state, label, final, next };

                fun add_match (i, last_match)
                    =
                    {   last_match' =   has_reject (vector::get (action_vec, i))
                                         ?? last_match
                                         :: ML_VAR "yyNO_MATCH";

                        ML_APP ("yyMATCH",
                                [ML_VAR "stream",
                                 ML_VAR (act_name i),
                                 last_match']);
                    };

                my (cur_match, next_matches)
                    =
                    case final
                         []     => (NULL, []);
                         f ! fs => (THE f, fs);
                    esac;

                last_match
                    =
                    list::fold_right add_match (ML_VAR "lastMatch") next_matches;

                #  Collect all valid transition symbols 

                labels
                    =
                    list::fold_left sis::union sis::empty (list::map #1 *next);

                #  pair transition intervals with associated actions/transitions 

                new_final
                    =
                    case cur_match
                         THE j =>  add_match (j, last_match);
                         NULL  =>  last_match;
                    esac;

                fun arrows (syms, s)
                    = 
                    map_int 
                      (fn i => TI (i, id_of s, 
                         ML_APP (name_of s, [ML_VAR "stream'", new_final])); end )
                      syms;

                tis = list::map arrows *next;

                err_act'
                    = 
                    case cur_match

                         THE j
                             => 
                             ML_APP ( act_name j, 

                                      [ ML_VAR "stream", 

                                        has_reject (vector::get (action_vec, j))
                                         ?? last_match
                                         :: ML_VAR "yyNO_MATCH"
                                      ]
                                    );

                         NULL =>  ML_APP ("yystuck", [last_match]);
                     esac;

                # If start state, check for eof:
                # 
                err_act
                    =
                    if start_state

                         ML_IF (ML_APP("yyInput::eof", [ML_VAR "stream"]),
                                         ML_APP("user_declarations::eof", [ML_VAR "yyarg"]),
                                         err_act');
                    else
                         err_act';
                    fi;

                # Error transitions = complement (valid transitions) 
                #
                error   =  sis::complement labels;
                err_tis =  map_int (fn i =  TI (i, -1, err_act)) error;

                # The arrows represent intervals that partition the entire
                # alphabet, with each interval mapped to some transition or
                # action.  We sort the intervals by their smallest member:
                #
                fun gt (a, b)
                    =
                    (#1 (interval_of a)) > (#1 (interval_of b));

                sorted =   lms::sort_list  gt  (list::cat (err_tis ! tis));

                # Now we want to find adjacent partitions with the same 
                # action, and merge their intervals:
                #
                fun merge [ ] => [ ];
                    merge [t] => [t];

                    merge (t1 ! t2 ! ts)
                        => 
                        if (same_tag (t1, t2) )

                            my TI ((i, _), tag, act) = t1;
                            my TI ((_, j), _,   _  ) = t2;

                            t = TI ((i, j), tag, act);

                            merge (t ! ts);

                        else
                            t1 ! (merge (t2 ! ts));
                        fi;
                end;

                merged = merge sorted;

                # Create the transition code 
                #
                trans = mk_trans (merged, list::length merged);

                # Create the input code 
                #
                get_inp
                    = 
                    # trans has at least the error action.  if length (merged)
                    # is 1 then we can avoid getting any input and simply
                    # take the error transition in all cases.  note that
                    # the "error" transition may actually be a match
                    #
                    case merged

                         [_] => err_act;

                         _   => ML_CASE (ML_APP ("yygetc", [ML_VAR "stream"]),
                                  [(ML_CON_PATTERN ("NULL", []), err_act),
                                   (ML_CON_PATTERN ("THE", [ML_VAR_PATTERN (inp + ", stream'")]), 
                                      trans)]);
                    esac;

                  ML_FUN (name_of s, ["stream", "lastMatch"], get_inp, k);
              };

        fun mk_action (i, action, k)
            =
            {   upd_strm =  ML_REF_PUT (ML_VAR "yystrm", ML_VAR "stream");
                act      =  ML_RAW [ml::TOK action];
                seq      =  ML_SEQ [upd_strm, act];

                lett = if (hasyytext action )

                           ML_LET ( "yytext", 
                                    ML_APP("yymktext", [ML_VAR "stream"]), 
                                    seq
                                  );
                       else
                           seq;
                       fi;

                letl = if (hasyylineno action)

                           ML_LET ( "yylineno", 
                                    ML_APP ( "REF", 
                                             [ML_APP ("yyInput::getlineNo", 
                                                [ML_REF_GET (ML_VAR "yystrm")])]), 
                                   lett
                                  );
                       else
                           lett;
                       fi;

                letr = if (has_reject action)

                           ML_LET ("oldStrm", ML_REF_GET (ML_VAR "yystrm"),
                                   ML_FUN
                                     ("REJECT", [],
                                      ML_SEQ 
                                        [ML_REF_PUT (ML_VAR "yystrm", 
                                                    ML_VAR "oldStrm"),
                                         ML_APP("yystuck", [ML_VAR "lastMatch"])],
                                      letl));
                       else
                           letl;
                       fi;

                ML_NEW_GROUP (ML_FUN (act_name i, ["stream", "lastMatch"], letr, k));
            };

        package scc
            =
            digraph_strongly_connected_components_g (
                package {
                   Key = lo::Dfa_State;

                   fun compare (lo::STATE { id => id1, ... }, lo::STATE { id => id2, ... } )
                       =
                       int::compare (id1, id2);
                }
            );

        fun mk_states (actions, dfa, start_states, k)
            =
            {   fun follow (lo::STATE { next, ... } )
                    = 
                    #2 (paired_lists::unzip *next);

                scc = scc::topological_order' { roots => start_states, follow };

                mk_state' = mk_state actions;

                fun mk_grp (scc::SIMPLE    state,  k)   =>   ML_NEW_GROUP (mk_state' (state, k));
                    mk_grp (scc::RECURSIVE states, k)   =>   ML_NEW_GROUP (list::fold_right mk_state' k states);
                end;

                list::fold_left mk_grp k scc;
            };

        fun lexer_hook spec stream
            =
            {
                my lo::SPEC { actions, dfa, start_states, ... }
                    =
                    spec;

                fun match_ss (label, state)
                    =
                    (  ML_CON_PATTERN (label, []), 
                         ML_APP (name_of state, 
                                      [ML_REF_GET (ML_VAR "yystrm"), 
                                       ML_VAR "yyNO_MATCH"])
                    );

                inner_expression = ML_CASE (ML_REF_GET (ML_VAR "yyss"),
                                        list::map match_ss start_states);

                states_expression = mk_states 
                                  (actions, dfa, 
                                   #2 (paired_lists::unzip start_states), inner_expression);

                lexer_expression =   vector::keyed_fold_right mk_action states_expression actions;

                prettyprint_stream =   text_iopp::open { dst => stream, wid => 80 };

                ml::prettyprint_ml (prettyprint_stream, lexer_expression);
            };

        fun start_states_hook spec stream
            =
            {   my lo::SPEC { start_states, ... }
                    =
                    spec;

                mach_names = #1 (paired_lists::unzip start_states);

                file::write (stream, string::join " | " mach_names);
            };

        fun user_decls_hook spec stream
            =
            { my lo::SPEC { decls, ... } = spec;

                file::write (stream, decls);
            };

        fun header_hook spec stream
            =
            { my lo::SPEC { header, ... } = spec;

                file::write (stream, header);
            };

        fun args_hook spec stream
            =
            {   my lo::SPEC { arg, ... }
                    =
                    spec;

                arg' = if   (string::length arg == 0 )

                            "(yyarg as ())";
                       else
                            "(yyarg as " + arg + ") ()";
                       fi;

                file::write (stream, arg');
            };

        package tio= file;      # file  is from   src/lib/std/src/posix/file.pkg

        template
            =
            {
                file = tio::open_for_read "backends/sml/template-sml-fun.pkg";

                fun done () = tio::close_input file;

                fun read ()
                    =
                    case (tio::read_line file)

                         NULL     =>  [];
                         THE line =>  line ! read();
                    esac;


                (read()
                 except
                    ex = { done(); raise exception ex;}
                )
                before
                    done();
            };

        fun output (spec, fname)
            = 
            expand_file::expand {
                  src   => template,
                  dst   => fname + ".pkg",
                  hooks => [  ("lexer",       lexer_hook       spec),
                             ("startstates", start_states_hook spec),
                             ("userdecls",   user_decls_hook   spec),
                             ("header",      header_hook      spec),
                             ("args",        args_hook        spec)
                          ]
            };

    };
end;


## John Reppy (http://www.cs.uchicago.edu/~jhr)
## Aaron Turon (adrassi@gmail.com)
## All rights reserved.
## COPYRIGHT (c) 2005 
## Subsequent changes by Jeff Prothero Copyright (c) 2010-2012,
## released under Gnu Public Licence version 3.


Comments and suggestions to: bugs@mythryl.org

PreviousUpNext