


## string-chartype.pkg
#
# Predicates on characters. This is modelled after the Unix C libraries.
# Each predicate comes in two forms; one that works on integers, and one
# that works on an arbitrary character in a string. The meanings of these
# predicates are documented in Section 3 of the Unix manual.
# Compiled by:
# src/lib/std/src/standard-core.sublib# See also:
# src/lib/std/src/char.pkg# src/lib/std/src/int-chartype.pkgpackage string_chartype
: String_Chartype # String_Chartype is from src/lib/std/src/string-chartype.api{
my itoc: Int -> Char = inline_t::cast;
my ctoi: Char -> Int = inline_t::cast;
# For each character code we have an 8-bit vector, which is interpreted
# as follows:
# 0x01 == set for upper-case letters
# 0x02 == set for lower-case letters
# 0x04 == set for digits
# 0x08 == set for white space characters
# 0x10 == set for punctuation characters
# 0x20 == set for control characters
# 0x40 == set for hexadecimal characters
# 0x80 == set for SPACE
ctype_table = "\
\\032\032\032\032\032\032\032\032\032\040\040\040\040\040\032\032\
\\032\032\032\032\032\032\032\032\032\032\032\032\032\032\032\032\
\\136\016\016\016\016\016\016\016\016\016\016\016\016\016\016\016\
\\068\068\068\068\068\068\068\068\068\068\016\016\016\016\016\016\
\\016\065\065\065\065\065\065\001\001\001\001\001\001\001\001\001\
\\001\001\001\001\001\001\001\001\001\001\001\016\016\016\016\016\
\\016\066\066\066\066\066\066\002\002\002\002\002\002\002\002\002\
\\002\002\002\002\002\002\002\002\002\002\002\016\016\016\016\032\
\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\
\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\
\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\
\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\
\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\
\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\
\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\
\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\
\";
# XXX BUGGO FIXME This table is duplicated from char.pkg, should share it.
fun in_set (c, s)
=
{ m = to_int (inline_t::vector_of_chars::get (ctype_table, c));
(inline_t::default_int::bitwise_and (m, s) != 0);
};
# fun in_set (c, s)
# =
# bits::bitwise_and (ordof (ctype_table, c), s) != 0;
unsafe_get = inline_t::vector_of_chars::get;
# Predicates on indexed strings
#
fun is_alpha (s, i) = in_set (ctoi (unsafe_get (s, i)), 0x03);
fun is_upper (s, i) = in_set (ctoi (unsafe_get (s, i)), 0x01);
fun is_lower (s, i) = in_set (ctoi (unsafe_get (s, i)), 0x02);
fun is_digit (s, i) = in_set (ctoi (unsafe_get (s, i)), 0x04);
fun is_hex_digit (s, i) = in_set (ctoi (unsafe_get (s, i)), 0x40);
fun is_alphanumeric (s, i) = in_set (ctoi (unsafe_get (s, i)), 0x07);
fun is_space (s, i) = in_set (ctoi (unsafe_get (s, i)), 0x08);
fun is_punct (s, i) = in_set (ctoi (unsafe_get (s, i)), 0x10);
fun is_graph (s, i) = in_set (ctoi (unsafe_get (s, i)), 0x17);
fun is_print (s, i) = in_set (ctoi (unsafe_get (s, i)), 0x97);
fun is_cntrl (s, i) = in_set (ctoi (unsafe_get (s, i)), 0x20);
fun is_ascii (s, i) = ctoi (unsafe_get (s, i)) < 128;
}; # package string_chartype


