


## string-guts.pkg
# Compiled by:
# src/lib/std/src/standard-core.sublib### "Harp not on that string."
###
### -- William Shakespeare, "Henry VI"
package string_guts: (weak) String { # String is from src/lib/std/src/string.api # inline_t is from src/lib/core/init/built-in.pkg my (+) = inline_t::default_int::(+);
my (-) = inline_t::default_int::(-);
my (<) = inline_t::default_int::(<);
my (<=) = inline_t::default_int::(<=);
my (>) = inline_t::default_int::(>);
my (>=) = inline_t::default_int::(>=);
# (==) = inline_t::(==);
unsafe_get = inline_t::vector_of_chars::get;
unsafe_set = inline_t::vector_of_chars::set;
# List reverse
#
fun reverse ([], l) => l;
reverse (x ! r, l) => reverse (r, x ! l);
end;
Char = Char;
String = String;
max_size = core::max_length;
# The length of a string:
#
length = inline_t::vector_of_chars::length;
unsafe_create
=
runtime::asm::make_string;
# Allocate an uninitialized string of given length
#
fun create n
=
if (inline_t::default_int::ltu (max_size, n))
#
raise exception exceptions_guts::SIZE; # exceptions_guts is from src/lib/std/src/exceptions-guts.pkg else
runtime::asm::make_string n;
fi;
# Convert a character into a single character string
#
fun from_char (c: char::Char) : String
=
inline_t::poly_vector::get (prestring::chars, inline_t::cast c);
# Get a character from a string
#
my get: ((String, Int)) -> Char
=
inline_t::vector_of_chars::check_sub;
# The (_[]) enables 'vec[index]' notation;
#
my (_[]): (String, Int) -> Char
=
inline_t::vector_of_chars::check_sub;
# Return the n-character substring of s starting at position i.
# NOTE: we use words to check the right bound so as to avoid
# raising overflow.
#
stipulate
package w = inline_t::default_unt; # inline_t is from src/lib/core/init/built-in.pkg herein
fun substring (s, i, n)
=
if (((i < 0) or (n < 0)
or
w::(<) (w::from_int (size s), w::(+) (w::from_int i, w::from_int n)))
)
raise exception exceptions_guts::SUBSCRIPT; # exceptions_guts is from src/lib/std/src/exceptions-guts.pkg else
prestring::unsafe_substring (s, i, n);
fi;
end;
fun extract (v, base, opt_len)
=
{ len = size v;
fun new_vec n
=
{ new_v = runtime::asm::make_string n;
fun fill i
=
if (i < n)
#
unsafe_set (new_v, i, unsafe_get (v, base+i));
fill (i+1);
fi;
fill 0;
new_v;
};
case (base, opt_len)
#
(0, NULL) => v;
(_, THE 0)
=>
if (base < 0 or len < base)
#
raise exception exceptions_guts::SUBSCRIPT; # exceptions_guts is from src/lib/std/src/exceptions-guts.pkg else "";
fi;
(_, NULL)
=>
if (base < 0 or len < base)
#
raise exception exceptions_guts::SUBSCRIPT; # exceptions_guts is from src/lib/std/src/exceptions-guts.pkg elif (base == len)
"";
else
new_vec (len - base);
fi;
(_, THE 1)
=>
if (base < 0 or len < base+1)
raise exception exceptions_guts::SUBSCRIPT; # exceptions_guts is from src/lib/std/src/exceptions-guts.pkg else str (unsafe_get (v, base));
fi;
(_, THE n)
=>
if (base < 0 or n < 0 or len < base+n)
#
raise exception exceptions_guts::SUBSCRIPT;
else
new_vec n;
fi;
esac;
};
# Concatenate a list of strings:
#
fun cat [ string ]
=>
string;
cat (sl: List( String ))
=>
{ fun length (i, [])
=>
i;
length (i, s ! rest)
=>
length (i+size s, rest);
end;
case (length (0, sl))
#
0 => "";
1 => find sl
where
fun find ("" ! r) => find r;
find ( s ! _) => s;
find _ => ""; # Impossible.
end;
end;
tot_len
=>
{ ss = create tot_len;
fun copy ([], _)
=>
();
copy (s ! r, i)
=>
{ len = size s;
fun copy' j
=
if (j != len)
#
unsafe_set (ss, i+j, unsafe_get (s, j));
copy'(j+1);
fi;
copy' 0;
copy (r, i+len);
};
end;
copy (sl, 0);
ss;
};
esac;
};
end; # cat
# Concatenate a list of strings using the
# given separator string, so
# join " " ["an", "example"]
# ->
# "an example"
#
fun join _ [] => "";
join _ [x] => x;
join sep (h ! t)
=>
cat (
reverse (
fold_left
(fn (x, l) = x ! sep ! l)
[h]
t,
[]
)
);
end;
# As above, with null delimiters:
# Implode a list of characters into a string:
fun implode [] => "";
implode cl
=>
{ fun length ([], n) => n;
length (_ ! r, n) => length (r, n+1);
end;
prestring::implode (length (cl, 0), cl);
};
end;
# Explode a string into a list of characters:
#
fun explode s
=
f (NIL, size s - 1)
where
fun f (l, -1) => l;
f (l, i) => f (unsafe_get (s, i) ! l, i - 1);
end;
end;
fun map f vec
=
case (size vec)
#
0 => "";
#
len => { new_vec = runtime::asm::make_string len;
mapf 0
where
fun mapf i
=
if (i < len)
#
unsafe_set (new_vec, i, f (unsafe_get (vec, i)));
mapf (i+1);
fi;
end;
new_vec;
};
esac;
# Map a translation function across the characters of a string
#
fun translate tr s
=
prestring::translate (tr, s, 0, size s);
fun tokens is_delimiter s # Tokenize a string using the given predicate
= # to define the delimiter characters.
reverse (scan_token (0, 0, []), [])
where
n = size s;
fun substr (i, j, l)
=
if (i == j) l;
else prestring::unsafe_substring (s, i, j-i) ! l;
fi;
fun scan_token (i, j, toks)
=
if (j < n)
#
if (is_delimiter (unsafe_get (s, j))) skip_delimiters (j+1, substr (i, j, toks));
else scan_token (i, j+1, toks);
fi;
else
substr (i, j, toks);
fi
also
fun skip_delimiters (j, toks)
=
if (j < n)
#
if (is_delimiter (unsafe_get (s, j))) skip_delimiters (j+1, toks);
else scan_token (j, j+1, toks);
fi;
else
toks;
fi;
end;
fun fields is_delimiter s
=
{ n = size s;
reverse (scan_token (0, 0, []), [])
where
fun scan_token (i, j, toks)
=
if (j < n)
#
if (is_delimiter (unsafe_get (s, j))) scan_token (j+1, j+1, substr (i, j, toks));
else scan_token (i, j+1, toks);
fi;
else
substr (i, j, toks);
fi
where
fun substr (i, j, l)
=
prestring::unsafe_substring (s, i, j-i) ! l;
end;
end;
};
# String comparisons
#
fun is_prefix s1 s2
=
prestring::is_prefix (s1, s2, 0, size s2);
fun is_suffix s1 s2
=
{ sz2 = size s2;
#
prestring::is_prefix (s1, s2, sz2 - size s1, sz2);
};
fun is_substring s
=
{ stringsearch = prestring::kmp s;
#
fun search s'
=
{ epos = size s';
stringsearch (s', 0, epos) < epos;
};
search;
};
fun compare (a, b)
=
prestring::compare (a, 0, size a, b, 0, size b);
fun collate compare_g (a, b)
=
prestring::collate compare_g (a, 0, size a, b, 0, size b);
fun has_alpha string = list::exists char::is_alpha (explode string); # For efficiency, should really have string::exists and string::all someday. XXX SUCKO FIXME.
fun has_upper string = list::exists char::is_upper (explode string);
fun has_lower string = list::exists char::is_lower (explode string);
fun is_alpha string = length string > 0 and list::all char::is_alpha (explode string);
fun is_upper string = length string > 0 and list::all char::is_upper (explode string);
fun is_lower string = length string > 0 and list::all char::is_lower (explode string);
fun is_mixed string = is_alpha string and has_upper string and has_lower string;
# String greater or equal
#
fun string_gt (a, b)
=
compare 0
where
al = size a;
bl = size b;
n = if (al < bl) al;
else bl;
fi;
fun compare i
=
if (i == n)
#
al > bl;
else
ai = unsafe_get (a, i);
bi = unsafe_get (b, i);
char::(>) (ai, bi)
or
( (ai == bi)
and
compare (i+1)
);
fi;
end;
fun (<=) (a, b) = if (string_gt (a, b) ) FALSE; else TRUE; fi;
fun (<) (a, b) = string_gt (b, a);
fun (>=) (a, b)
=
b <= a;
my (>) = string_gt;
fun from_string' scan_char s
=
accum (0, [])
where
len = size s;
fun getc i
=
if (inline_t::default_int::(<) (i, len))
#
THE (unsafe_get (s, i), i+1);
else
NULL;
fi;
scan_char = scan_char getc;
fun accum (i, chars)
=
case (scan_char i)
#
NULL
=>
if (inline_t::default_int::(<) (i, len)) NULL; # Bad format
else THE (implode (list::reverse chars));
fi;
#
THE (c, i')
=>
accum (i', c ! chars);
esac;
end;
fun (+) ("", s) => s;
(+) (s, "") => s;
(+) (x, y) => prestring::meld2 (x, y);
end;
# Concatenate a list of strings using the
# given separator and delimiter strings, so
# join' "(" " " ")" ["an", "example"]
# ->
# "(an example)"
#
fun join' _ _ _ [] => "";
#
join' start _ stop [x] => start + x + stop;
#
join' start sep stop (h ! t)
=>
cat (
start
!
h
!
fold_right
(fn (x, l) = sep ! x ! l)
[ stop ]
t
);
end;
# Drop trailing newline on string, if present:
#
fun chomp ""
=>
"";
chomp string
=>
{ len = length string;
if (get (string, len - 1) != '\n') string;
else extract (string, 0, THE (len - 1));
fi;
};
end;
# There's a shorter definition of this fn in src/lib/compiler/toplevel/interact/read-eval-print-loop-g.pkg # -- should we use it instead? XXX BUGGO FIXME
to_lower = map char::to_lower;
to_upper = map char::to_upper;
fun to_mixed string # "THIS_is_tExt" -> "This_Is_Text"
=
to_mixed' (' ', explode string, [])
where
fun to_mixed' (_, [], chars)
=>
(implode (list::reverse chars));
to_mixed' (last, this ! rest, chars)
=>
if (not (char::is_alpha this)) to_mixed' (this, rest, this ! chars);
elif (not (char::is_alpha last)) to_mixed' (this, rest, char::to_upper this ! chars);
else to_mixed' (this, rest, char::to_lower this ! chars);
fi;
end;
end;
from_string = from_string' char::scan;
to_string = translate char::to_string;
from_cstring = from_string' char::scan_c;
to_cstring = translate char::to_cstring;
}; # package string


