


## char.pkg
# Compiled by:
# src/lib/std/src/standard-core.sublib### "Almost everything that distinguishes the modern world
### from earlier centuries is attributable to science,
### which achieved its most spectacular triumphs
### in the seventeenth century."
###
### -- Bertrand Russell
stipulate
package rt = runtime; # runtime is from src/lib/core/init/built-in.pkg.
herein
package char: (weak)
api {
include Char; # Char is from src/lib/std/src/char.api scan_c: number_string::Reader( Char, X ) -> number_string::Reader( Char, X );
#
# Internal scanning function for C-style escape sequences
}
{
package c = inline_t::char; # inline_t is from src/lib/core/init/built-in.pkg my (+) = inline_t::default_int::(+);
my (-) = inline_t::default_int::(-);
my (*) = inline_t::default_int::(*);
my itoc: Int -> Char = inline_t::cast;
my ctoi: Char -> Int = inline_t::cast;
Char = Char;
String = String;
my min_char: Char = c::chr 0;
my max_char: Char = c::chr c::max_ord;
max_ord = c::max_ord;
fun prior (c: Char) : Char
=
{ c' = (ctoi c - 1);
if (inline_t::default_int::(<) (c', 0))
#
raise exception exceptions_guts::BAD_CHAR; # exceptions_guts is from src/lib/std/src/exceptions-guts.pkg else
(itoc c');
fi;
};
fun next (c: Char) : Char
=
{ c' = (ctoi c + 1);
if (inline_t::default_int::(<) (max_ord, c'))
#
raise exception exceptions_guts::BAD_CHAR;
else
(itoc c');
fi;
};
from_int = c::chr;
to_int = c::ord;
my (<) = c::(<);
my (<=) = c::(<=);
my (>) = c::(>);
my (>=) = c::(>=);
fun compare (c1: Char, c2: Char)
=
if (c1 == c2) EQUAL;
elif (c1 < c2) LESS;
else GREATER;
fi;
# Testing character membership:
#
stipulate
#
fun make_array (s, s_len)
=
{ cv = rt::asm::make_string (max_ord+1); # "rt" == "runtime" -- from src/lib/core/init/built-in.pkg fun init i
=
if (inline_t::default_int::(<=) (i, max_ord))
#
inline_t::vector_of_chars::set (cv, i, '\000');
init (i+1);
fi;
fun ins i
=
if (inline_t::default_int::(<) (i, s_len))
#
inline_t::vector_of_chars::set (
cv,
to_int (inline_t::vector_of_chars::get (s, i)),
'\001'
);
ins (i+1);
fi;
init 0;
ins 0;
cv;
};
herein
fun contains ""
=>
(fn c = FALSE);
contains s
=>
{ s_len = inline_t::vector_of_chars::length s;
if (s_len == 1)
c' = inline_t::vector_of_chars::get (s, 0);
fn c = (c == c');
else
cv = make_array (s, s_len);
fn c = (inline_t::vector_of_chars::get (cv, to_int c) != '\000');
fi;
};
end;
fun not_contains ""
=>
fn c = TRUE;
not_contains s
=>
{ s_len = inline_t::vector_of_chars::length s;
if (s_len == 1)
c' = inline_t::vector_of_chars::get (s, 0);
fn c = (c != c');
else
cv = make_array (s, s_len);
fn c = (inline_t::vector_of_chars::get (cv, to_int c) == '\000');
fi;
};
end;
end; # stipulate
# For each character code we have an 8-bit vector, which is interpreted
# as follows:
# 0x01 == set for upper-case letters
# 0x02 == set for lower-case letters
# 0x04 == set for digits
# 0x08 == set for white space characters
# 0x10 == set for punctuation characters
# 0x20 == set for control characters
# 0x40 == set for hexadecimal characters
# 0x80 == set for SPACE
ctype_table = "\
\\032\032\032\032\032\032\032\032\032\040\040\040\040\040\032\032\
\\032\032\032\032\032\032\032\032\032\032\032\032\032\032\032\032\
\\136\016\016\016\016\016\016\016\016\016\016\016\016\016\016\016\
\\068\068\068\068\068\068\068\068\068\068\016\016\016\016\016\016\
\\016\065\065\065\065\065\065\001\001\001\001\001\001\001\001\001\
\\001\001\001\001\001\001\001\001\001\001\001\016\016\016\016\016\
\\016\066\066\066\066\066\066\002\002\002\002\002\002\002\002\002\
\\002\002\002\002\002\002\002\002\002\002\002\016\016\016\016\032\
\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\
\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\
\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\
\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\
\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\
\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\
\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\
\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\
\";
fun in_set (c, s)
=
{ m = to_int (inline_t::vector_of_chars::get (ctype_table, to_int c));
(inline_t::default_int::bitwise_and (m, s) != 0);
};
# Predicates on integer coding of Ascii values:
#
fun is_alpha c = in_set (c, 0x03);
fun is_upper c = in_set (c, 0x01);
fun is_lower c = in_set (c, 0x02);
fun is_digit c = in_set (c, 0x04);
fun is_hex_digit c = in_set (c, 0x40);
fun is_alphanumeric c = in_set (c, 0x07);
fun is_space c = in_set (c, 0x08);
fun is_punct c = in_set (c, 0x10);
fun is_graph c = in_set (c, 0x17);
fun is_print c = in_set (c, 0x97);
fun is_cntrl c = in_set (c, 0x20);
fun is_ascii c = inline_t::default_int::(<) (to_int c, 128);
offset = ctoi 'a' - ctoi 'A';
fun to_upper c = if (is_lower c) itoc (ctoi c - offset); else c;fi;
fun to_lower c = if (is_upper c) itoc (ctoi c + offset); else c;fi;
fun scan_digits is_digit getc n stream
=
scan (stream, n, [])
where
fun scan (stream, 0, l)
=>
(list::reverse l, stream);
scan (stream, i, l)
=>
case (getc stream)
NULL
=>
(list::reverse l, stream);
THE (c, stream')
=>
is_digit c
?? scan (stream', i - 1, c ! l)
:: (list::reverse l, stream);
esac;
end;
end;
fun check_digits radix (l, stream)
=
{ fun next (x ! r) => THE (x, r);
next [] => NULL;
end;
case ( num_scan::scan_int radix next l)
#
THE (i, _)
=>
inline_t::i1::(<) (i, 256)
?? THE (from_int (inline_t::i1::to_int i), stream)
:: NULL;
NULL => NULL;
esac;
};
# Conversions between characters
# and printable representations:
fun scan getc
=
scan'
where
fun scan' rep
=
{ fun get2 rep
=
case (getc rep)
THE (c1, rep')
=>
case (getc rep')
THE (c2, rep'') => THE (c1, c2, rep'');
_ => NULL;
esac;
_ => NULL;
esac;
case (getc rep)
NULL => NULL;
THE('\\', rep')
=>
case (getc rep')
NULL => NULL;
THE('\\',rep'') => THE('\\', rep'');
THE('"', rep'') => THE('"', rep'');
THE('a', rep'') => THE('\a', rep'');
THE('b', rep'') => THE('\b', rep'');
THE('t', rep'') => THE('\t', rep'');
THE('n', rep'') => THE('\n', rep'');
THE('v', rep'') => THE('\v', rep'');
THE('f', rep'') => THE('\f', rep'');
THE('r', rep'') => THE('\r', rep'');
THE('^', rep'')
=>
case (getc rep'')
THE (c, rep''')
=>
if (('@' <= c) and (c <= '_')) THE (from_int (to_int c - to_int '@'), rep''');
else NULL;
fi;
NULL => NULL;
esac;
THE (d1, rep'')
=>
if (is_digit d1)
case (get2 rep'')
THE (d2, d3, rep''')
=>
{ fun convert d
=
(to_int d - to_int '0');
if (is_digit d2 and is_digit d3)
n = 100*(convert d1) + 10*(convert d2) + (convert d3);
if (inline_t::default_int::(<) (n, 256))
THE (from_int n, rep''');
else NULL;
fi;
else
NULL;
fi;
};
NULL => NULL;
esac;
elif (is_space d1)
# Skip over \<ws>+\
#
fun skip_ws stream
=
case (getc stream)
NULL => NULL;
THE('\\', stream')
=>
scan' stream';
THE (c, stream')
=>
if (is_space c) skip_ws stream';
else NULL;
fi;
esac;
skip_ws rep'';
else
NULL;
fi;
esac;
THE ('"', rep')
=>
NULL;
THE (c, rep')
=>
if (is_print c) THE (c, rep');
else NULL;
fi;
esac;
}; # fun scan'
end; # fun scan
from_string
=
number_string::scan_string scan;
itoa =
(num_format::format_int number_string::DECIMAL)
o
inline_t::i1::from_int;
fun to_string '\a' => "\\a";
to_string '\b' => "\\b";
to_string '\t' => "\\t";
to_string '\n' => "\\n";
to_string '\v' => "\\v";
to_string '\f' => "\\f";
to_string '\r' => "\\r";
to_string '"' => "\\\"";
to_string '\\' => "\\\\";
to_string c
=>
if (is_print c)
inline_t::poly_vector::get (prestring::chars, to_int c);
#
# NOTE: we should probably recognize the control characters XXX BUGGO FIXME
else
c' = to_int c;
if (inline_t::default_int::(>) (c', 32))
prestring::meld2 ("\\", itoa c');
else prestring::meld2 ("\\^", inline_t::poly_vector::get (prestring::chars, c'+64));
fi;
fi;
end;
# Scanning function for C escape sequences
fun scan_c getc
=
scan
where
fun is_oct_digit d
=
'0' <= d and
d <= '7';
fun scan stream
=
case (getc stream)
NULL => NULL;
THE ('\\', stream')
=>
case (getc stream')
NULL => NULL;
THE ('a', stream'') => THE ('\a', stream'');
THE ('b', stream'') => THE ('\b', stream'');
THE ('t', stream'') => THE ('\t', stream'');
THE ('n', stream'') => THE ('\n', stream'');
THE ('v', stream'') => THE ('\v', stream'');
THE ('f', stream'') => THE ('\f', stream'');
THE ('r', stream'') => THE ('\r', stream'');
THE ('\\', stream'') => THE ('\\', stream'');
THE ('"', stream'') => THE ('"', stream'');
THE ('\'', stream'') => THE ('\'', stream'');
THE ('?', stream'') => THE ('?', stream'');
THE ('x', stream'')
=>
# Hex escape code
#
check_digits number_string::HEX
(scan_digits is_hex_digit getc -1 stream'');
_ =>
# Should be octal escape code
check_digits number_string::OCTAL
(scan_digits is_oct_digit getc 3 stream');
esac;
# NOT SURE ABOUT THE FOLLOWING TWO CASES: XXX BUGGO FIXME
# THE('"', stream'') => NULL; # error --- not escaped
# THE('\'', stream'') => NULL; # error --- not escaped
THE (c, stream'')
=>
if (is_print c) THE (c, stream'');
else NULL;
fi;
esac;
end;
from_cstring
=
number_string::scan_string scan_c;
fun to_cstring '\a' => "\\a";
to_cstring '\b' => "\\b";
to_cstring '\t' => "\\t";
to_cstring '\n' => "\\n";
to_cstring '\v' => "\\v";
to_cstring '\f' => "\\f";
to_cstring '\r' => "\\r";
to_cstring '"' => "\\\"";
to_cstring '\\' => "\\\\";
to_cstring '?' => "\\?";
to_cstring '\'' => "\\'";
to_cstring '\000' => "\\0";
to_cstring c
=>
if (is_print c)
inline_t::poly_vector::get (prestring::chars, to_int c);
else
i = inline_t::i1::from_int (to_int c);
prefix
=
if (inline_t::i1::(<) (i, 8))
#
"\\00";
else
inline_t::i1::(<) (i, 64)
?? "\\0"
:: "\\";
fi;
prestring::meld2 (prefix, num_format::format_int number_string::OCTAL i);
fi;
end;
}; # package char
end;


