PreviousUpNext

15.4.1050  src/lib/std/src/char.pkg

## char.pkg

# Compiled by:
#     src/lib/std/src/standard-core.sublib

###                          "Almost everything that distinguishes the modern world
###                           from earlier centuries is attributable to science,
###                           which achieved its most spectacular triumphs
###                           in the seventeenth century."
###
###                                                          -- Bertrand Russell



stipulate
    package rt  =  runtime;                                     # runtime       is from   src/lib/core/init/built-in.pkg.
herein

    package char: (weak)
                            api {
                                include Char;                   # Char          is from   src/lib/std/src/char.api

                                scan_c:   number_string::Reader( Char, X ) -> number_string::Reader( Char, X );
                                    #
                                    # Internal scanning function for C-style escape sequences 
                            }
    {
        package c = inline_t::char;                             # inline_t      is from   src/lib/core/init/built-in.pkg

        my (+) = inline_t::default_int::(+);
        my (-) = inline_t::default_int::(-);
        my (*) = inline_t::default_int::(*);

        my itoc:  Int -> Char = inline_t::cast;
        my ctoi:  Char -> Int = inline_t::cast;

        Char = Char;
        String = String;

        my min_char:  Char      = c::chr 0;
        my max_char:  Char      = c::chr c::max_ord;
        max_ord         = c::max_ord;


        fun prior (c:  Char) : Char
            =
            {   c' = (ctoi c - 1);

                if (inline_t::default_int::(<) (c', 0))
                    #   
                    raise exception exceptions_guts::BAD_CHAR;  # exceptions_guts       is from   src/lib/std/src/exceptions-guts.pkg
                else
                    (itoc c');
                fi;
            };


        fun next (c:  Char) : Char
            =
            {   c' = (ctoi c + 1);

                if (inline_t::default_int::(<) (max_ord, c'))
                    #
                    raise exception exceptions_guts::BAD_CHAR;
                else
                    (itoc c');
                fi;
            };


        from_int = c::chr;
        to_int   = c::ord;

        my (<)  = c::(<);
        my (<=) = c::(<=);
        my (>)  = c::(>);
        my (>=) = c::(>=);

        fun compare (c1:  Char, c2:  Char)
            =
            if   (c1 == c2)  EQUAL;
            elif (c1 <  c2)  LESS;
            else             GREATER;
            fi;

        # Testing character membership:
        #
        stipulate
            #
            fun make_array (s, s_len)
                =
                {   cv =   rt::asm::make_string (max_ord+1);                            # "rt" == "runtime" -- from   src/lib/core/init/built-in.pkg

                    fun init i
                        =
                        if (inline_t::default_int::(<=) (i, max_ord))
                            #                   
                            inline_t::vector_of_chars::set (cv, i, '\000');
                            init (i+1);
                        fi;

                    fun ins i
                        =
                        if (inline_t::default_int::(<) (i, s_len))
                            #                   
                            inline_t::vector_of_chars::set (
                                cv,
                                to_int (inline_t::vector_of_chars::get (s, i)),
                                '\001'
                            );

                            ins (i+1);
                        fi;

                    init 0;
                    ins 0;
                    cv;
                };
        herein

            fun contains ""
                    =>
                    (fn c = FALSE);

                contains s
                    =>
                    {   s_len = inline_t::vector_of_chars::length s;

                        if (s_len == 1)

                            c' = inline_t::vector_of_chars::get (s, 0);
                            fn c = (c == c');
                        else
                            cv = make_array (s, s_len);
                            fn c = (inline_t::vector_of_chars::get (cv, to_int c) != '\000');
                        fi;
                    };
            end;

            fun not_contains ""
                    =>
                    fn c = TRUE;

                not_contains s
                    =>
                    {   s_len = inline_t::vector_of_chars::length s;

                        if (s_len == 1)
                            c' = inline_t::vector_of_chars::get (s, 0);
                            fn c = (c != c');
                        else
                            cv = make_array (s, s_len);
                            fn c = (inline_t::vector_of_chars::get (cv, to_int c) == '\000');
                        fi;
                    };
            end;
        end;             #  stipulate

        # For each character code we have an 8-bit vector, which is interpreted
        # as follows:
        #   0x01  ==  set for upper-case letters
        #   0x02  ==  set for lower-case letters
        #   0x04  ==  set for digits
        #   0x08  ==  set for white space characters
        #   0x10  ==  set for punctuation characters
        #   0x20  ==  set for control characters
        #   0x40  ==  set for hexadecimal characters
        #   0x80  ==  set for SPACE

        ctype_table = "\
                \\032\032\032\032\032\032\032\032\032\040\040\040\040\040\032\032\
                \\032\032\032\032\032\032\032\032\032\032\032\032\032\032\032\032\
                \\136\016\016\016\016\016\016\016\016\016\016\016\016\016\016\016\
                \\068\068\068\068\068\068\068\068\068\068\016\016\016\016\016\016\
                \\016\065\065\065\065\065\065\001\001\001\001\001\001\001\001\001\
                \\001\001\001\001\001\001\001\001\001\001\001\016\016\016\016\016\
                \\016\066\066\066\066\066\066\002\002\002\002\002\002\002\002\002\
                \\002\002\002\002\002\002\002\002\002\002\002\016\016\016\016\032\
                \\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\
                \\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\
                \\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\
                \\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\
                \\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\
                \\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\
                \\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\
                \\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\
              \";


        fun in_set (c, s)
            =
            {   m = to_int (inline_t::vector_of_chars::get (ctype_table, to_int c));

                (inline_t::default_int::bitwise_and (m, s) != 0);
            };

        # Predicates on integer coding of Ascii values:
        #
        fun is_alpha c    = in_set (c, 0x03);
        fun is_upper c    = in_set (c, 0x01);
        fun is_lower c    = in_set (c, 0x02);

        fun is_digit c    = in_set (c, 0x04);
        fun is_hex_digit c        = in_set (c, 0x40);
        fun is_alphanumeric c = in_set (c, 0x07);

        fun is_space c    = in_set (c, 0x08);
        fun is_punct c    = in_set (c, 0x10);

        fun is_graph c    = in_set (c, 0x17);
        fun is_print c    = in_set (c, 0x97);
        fun is_cntrl c    = in_set (c, 0x20);

        fun is_ascii c            = inline_t::default_int::(<) (to_int c, 128);

        offset = ctoi 'a' - ctoi 'A';

        fun to_upper c = if (is_lower c)  itoc (ctoi c - offset); else c;fi;
        fun to_lower c = if (is_upper c)  itoc (ctoi c + offset); else c;fi;

        fun scan_digits is_digit getc n stream
            =
            scan (stream, n, [])
            where
                fun scan (stream, 0, l)
                        =>
                        (list::reverse l, stream);

                    scan (stream, i, l)
                        =>
                        case (getc stream)

                            NULL
                                =>
                                (list::reverse l, stream);

                            THE (c, stream')
                                =>
                                is_digit c
                                  ??  scan (stream', i - 1, c ! l)
                                  ::  (list::reverse l, stream);
                      esac;
                end;
            end;

        fun check_digits radix (l, stream)
            =
            {   fun next (x ! r) => THE (x, r);
                    next [] => NULL;
                end;

                case ( num_scan::scan_int  radix  next  l)
                    #
                    THE (i, _)
                        =>
                        inline_t::i1::(<) (i, 256)
                          ??  THE (from_int (inline_t::i1::to_int i), stream)
                          ::  NULL;

                    NULL => NULL;
                esac;
            };


        # Conversions between characters
        # and printable representations:

        fun scan getc
            =
            scan'
            where
                fun scan' rep
                    = 
                    {   fun get2 rep
                            =
                            case (getc rep)

                                THE (c1, rep')
                                    =>
                                    case (getc rep')
                                        THE (c2, rep'') =>  THE (c1, c2, rep'');
                                        _               =>  NULL;
                                    esac;

                                _   => NULL;

                            esac;


                        case (getc rep)

                            NULL => NULL;

                            THE('\\', rep')
                                =>
                                case (getc rep')
                                    NULL => NULL;
                                    THE('\\',rep'') => THE('\\', rep'');
                                    THE('"', rep'') => THE('"',  rep'');
                                    THE('a', rep'') => THE('\a', rep'');
                                    THE('b', rep'') => THE('\b', rep'');
                                    THE('t', rep'') => THE('\t', rep'');
                                    THE('n', rep'') => THE('\n', rep'');
                                    THE('v', rep'') => THE('\v', rep'');
                                    THE('f', rep'') => THE('\f', rep'');
                                    THE('r', rep'') => THE('\r', rep'');
                                    THE('^', rep'')
                                        =>
                                        case (getc rep'')

                                            THE (c, rep''')
                                                =>
                                                if (('@' <= c) and (c <= '_'))    THE (from_int (to_int c - to_int '@'), rep''');
                                                else                              NULL;
                                                fi;

                                            NULL => NULL;

                                        esac;

                                    THE (d1, rep'')
                                        =>
                                        if (is_digit d1)

                                            case (get2 rep'')

                                                THE (d2, d3, rep''')
                                                    =>
                                                    {   fun convert d
                                                            =
                                                            (to_int d - to_int '0');

                                                        if (is_digit d2 and is_digit d3)

                                                            n = 100*(convert d1) + 10*(convert d2) + (convert d3);

                                                            if (inline_t::default_int::(<) (n, 256))
                                                                 THE (from_int n, rep''');
                                                            else NULL;
                                                            fi;
                                                        else
                                                            NULL;
                                                        fi;
                                                    };

                                                NULL => NULL;
                                            esac;

                                        elif (is_space d1)

                                            # Skip over \<ws>+\ 
                                            #
                                            fun skip_ws stream
                                                =
                                                case (getc stream)

                                                    NULL => NULL;

                                                    THE('\\', stream')
                                                        =>
                                                        scan' stream';

                                                    THE (c, stream')
                                                        =>
                                                        if (is_space c)  skip_ws stream';
                                                        else             NULL;
                                                        fi;
                                                esac;


                                            skip_ws rep'';
                                        else
                                            NULL;
                                        fi;
                                esac;

                            THE ('"', rep')
                                =>
                                NULL;

                            THE (c, rep')
                                =>
                                if (is_print c)   THE (c, rep');
                                else              NULL;
                                fi;
                        esac;

                    };                          # fun scan'
            end;                                        # fun scan

        from_string
            =
            number_string::scan_string scan;

        itoa =
            (num_format::format_int number_string::DECIMAL)
            o
            inline_t::i1::from_int;

        fun to_string '\a' => "\\a";
            to_string '\b' => "\\b";
            to_string '\t' => "\\t";
            to_string '\n' => "\\n";
            to_string '\v' => "\\v";
            to_string '\f' => "\\f";
            to_string '\r' => "\\r";
            to_string '"' => "\\\"";
            to_string '\\' => "\\\\";

            to_string c
                =>
                if (is_print c)
                    inline_t::poly_vector::get (prestring::chars, to_int c);
                    #
                    # NOTE: we should probably recognize the control characters  XXX BUGGO FIXME
                else
                    c' = to_int c;

                    if (inline_t::default_int::(>) (c', 32))
                         prestring::meld2 ("\\", itoa c');
                    else prestring::meld2 ("\\^", inline_t::poly_vector::get (prestring::chars, c'+64));
                    fi;
                fi;
        end;



        # Scanning function for C escape sequences 

        fun scan_c getc
            =
            scan
            where

                fun is_oct_digit d
                    =
                   '0' <=  d     and
                    d  <= '7';

                fun scan stream
                    =
                    case (getc stream)

                        NULL => NULL;

                        THE ('\\', stream')
                            =>
                            case (getc stream')

                                NULL => NULL;

                                THE ('a',  stream'') =>  THE ('\a', stream'');
                                THE ('b',  stream'') =>  THE ('\b', stream'');
                                THE ('t',  stream'') =>  THE ('\t', stream'');
                                THE ('n',  stream'') =>  THE ('\n', stream'');
                                THE ('v',  stream'') =>  THE ('\v', stream'');
                                THE ('f',  stream'') =>  THE ('\f', stream'');
                                THE ('r',  stream'') =>  THE ('\r', stream'');
                                THE ('\\', stream'') =>  THE ('\\', stream'');
                                THE ('"',  stream'') =>  THE ('"',  stream'');
                                THE ('\'', stream'') =>  THE ('\'', stream'');
                                THE ('?',  stream'') =>  THE ('?',  stream'');

                                THE ('x', stream'')
                                    =>
                                    # Hex escape code 
                                    #
                                    check_digits number_string::HEX
                                        (scan_digits is_hex_digit getc -1 stream'');
                                _   =>
                                    # Should be octal escape code 
                                   check_digits number_string::OCTAL
                                       (scan_digits is_oct_digit getc 3 stream');
                            esac;


    # NOT SURE ABOUT THE FOLLOWING TWO CASES: XXX BUGGO FIXME
    #               THE('"',  stream'') =>  NULL; #  error --- not escaped 
    #               THE('\'', stream'') =>  NULL; #  error --- not escaped 


                        THE (c, stream'')
                            =>
                            if (is_print c)   THE (c, stream'');
                            else              NULL;
                            fi;
                    esac;

          end;

        from_cstring
            =
            number_string::scan_string scan_c;

        fun to_cstring '\a' => "\\a";
            to_cstring '\b' => "\\b";
            to_cstring '\t' => "\\t";
            to_cstring '\n' => "\\n";
            to_cstring '\v' => "\\v";
            to_cstring '\f' => "\\f";
            to_cstring '\r' => "\\r";
            to_cstring '"'  => "\\\"";
            to_cstring '\\' => "\\\\";
            to_cstring '?'  => "\\?";
            to_cstring '\'' => "\\'";

            to_cstring '\000' => "\\0";

            to_cstring c
                =>
                if (is_print c)

                    inline_t::poly_vector::get (prestring::chars, to_int c);
                else
                    i = inline_t::i1::from_int (to_int c);

                    prefix
                        =
                        if (inline_t::i1::(<) (i, 8))
                            #
                            "\\00";
                        else
                            inline_t::i1::(<) (i, 64)
                              ??  "\\0"
                              ::  "\\";
                        fi;

                    prestring::meld2 (prefix, num_format::format_int number_string::OCTAL i);
                fi;
        end;

    };                          # package char 
end;


Comments and suggestions to: bugs@mythryl.org

PreviousUpNext