


## substring.pkg
# Compiled by:
# src/lib/core/init/init.cmi
### "There has never been an intelligent person of the age of sixty
### who would consent to live his life over again.
###
### "His or anyone else's."
###
### -- Mark Twain,
### Letters from the Earth
stipulate
infix val 80 * / % mod div ;
infix val 70 $ ^ + - ;
infix val 40 := o ;
infix val 50 > < >= <= != == ;
infixr val 60 . ! @ ;
infix val 10 before ;
include base_types;
herein
package substring
: Substring # Substring is from src/lib/core/init/substring.api where Char == base_types::Char
where String == base_types::String
=
package {
include pre_pervasive;
package w= inline_t::default_unt; # inline_t is from src/lib/core/init/built-in.pkg my (+) = inline_t::default_int::(+);
my (-) = inline_t::default_int::(-);
my (<) = inline_t::default_int::(<);
my (<=) = inline_t::default_int::(<=);
my (>) = inline_t::default_int::(>);
my (>=) = inline_t::default_int::(>=);
# my (==) = inline_t::(==);
unsafe_sub = inline_t::vector_of_chars::get;
string_size = inline_t::vector_of_chars::length;
# list reverse
#
fun reverse ([], l) => l;
reverse (x ! r, l) => reverse (r, x ! l);
end;
Char = base_types::Char;
String = base_types::String;
Substring
=
SUBSTRING ((String, Int, Int));
fun base (SUBSTRING arg)
=
arg;
fun to_string (SUBSTRING arg)
=
prestring::unsafe_substring arg;
# NOTE: we use words to check the right bound
# so as to avoid raising overflow.
#
fun make_substring (s, i, n)
=
if (((i < 0) or (n < 0)
or w::(<) (w::from_int (string_size s), w::(+) (w::from_int i, w::from_int n)))
)
raise exception core::SUBSCRIPT;
else
SUBSTRING (s, i, n);
fi;
fun extract (s, i, NULL)
=>
{ len = string_size s;
if ((0 <= i) and (i <= len))
SUBSTRING (s, i, len - i);
else
raise exception core::SUBSCRIPT;
fi;
};
extract (s, i, THE n)
=>
make_substring (s, i, n);
end;
fun from_string s
=
SUBSTRING (s, 0, string_size s);
fun is_empty (SUBSTRING(_, _, 0)) => TRUE;
is_empty _ => FALSE;
end;
fun getc (SUBSTRING (s, i, 0)) => NULL;
getc (SUBSTRING (s, i, n)) => THE (unsafe_sub (s, i), SUBSTRING (s, i+1, n - 1));
end;
fun first (SUBSTRING (s, i, 0)) => NULL;
first (SUBSTRING (s, i, n)) => THE (unsafe_sub (s, i));
end;
fun drop_first k (SUBSTRING (s, i, n))
=
if (k < 0)
raise exception core::SUBSCRIPT;
else if (k >= n)
SUBSTRING (s, i+n, 0);
else
SUBSTRING (s, i+k, n-k);
fi;
fi;
fun drop_last k (SUBSTRING (s, i, n))
=
if (k < 0)
raise exception core::SUBSCRIPT;
else
if (k >= n)
SUBSTRING (s, i, 0);
else
SUBSTRING (s, i, n-k);
fi;
fi;
fun get (SUBSTRING (s, i, n), j)
=
if (inline_t::default_int::geu (j, n))
raise exception core::SUBSCRIPT;
else
unsafe_sub (s, i+j);
fi;
fun size (SUBSTRING(_, _, n))
=
n;
fun make_slice (SUBSTRING (s, i, n), j, NULL)
=>
if (0 <= j and j <= n)
SUBSTRING (s, i+j, n-j);
else
raise exception core::SUBSCRIPT;
fi;
make_slice (SUBSTRING (s, i, n), j, THE m)
=>
# NOTE: we use words to check the right bound so as to avoid
# raising overflow.
if (((j < 0)
or (m < 0)
or w::(<) (w::from_int n, w::(+) (w::from_int j, w::from_int m)))
)
raise exception core::SUBSCRIPT;
else
SUBSTRING (s, i+j, m);
fi;
end;
# Concatenate a list of substrings:
#
fun cat ssl
=
{ fun length (len, sl, [])
=>
(len, sl);
length (len, sl, (SUBSTRING (s, i, n) ! rest))
=>
length (len + n, prestring::unsafe_substring (s, i, n) ! sl, rest);
end;
prestring::rev_meld (length (0, [], ssl));
};
# Concatenate a list of substrings using the
# given separator string:
#
fun join _ [] => "";
join _ [x] => to_string x;
join sep (h ! t)
=>
{ sep' = from_string sep;
fun loop ([], l) => cat (reverse (l, []));
loop (h ! t, l) => loop (t, h ! sep' ! l);
end;
loop (t, [h]);
};
end;
fun join' _ _ _ [] => "";
join' start _ stop [x] => cat [ (from_string start), x, (from_string stop) ]; # XXX BUGGO FIXME there's likely a better expression here.
join' start sep stop (h ! t)
=>
{ sep' = from_string sep;
fun loop ([], l) => cat (reverse (l, [from_string stop]));
loop (h ! t, l) => loop (t, h ! sep' ! l);
end;
loop (t, [h, from_string start]);
};
end;
# Explode a substring into a list of characters
#
fun explode (SUBSTRING (s, i, n))
=
{ fun f (l, j)
=
if (j < i)
l;
else
f (unsafe_sub (s, j) ! l, j - 1);
fi;
f (NIL, (i + n) - 1);
};
# substring comparisons
#
fun is_prefix s1 (SUBSTRING (s2, i2, n2))
=
prestring::is_prefix (s1, s2, i2, n2);
fun is_suffix s1 (SUBSTRING (s2, i2, n2))
=
prestring::is_prefix (s1, s2, i2 + n2 - string_size s1, n2);
fun is_substring s
=
{ stringsearch = prestring::kmp s;
fun search (SUBSTRING (s', i, n))
=
{ epos = i + n;
stringsearch (s', i, epos) < epos
;};
search
;};
fun compare (SUBSTRING (s1, i1, n1), SUBSTRING (s2, i2, n2))
=
prestring::compare (s1, i1, n1, s2, i2, n2);
fun collate compare_g (SUBSTRING (s1, i1, n1), SUBSTRING (s2, i2, n2))
=
prestring::collate compare_g (s1, i1, n1, s2, i2, n2);
fun split_at (SUBSTRING (s, i, n), k)
=
if (inline_t::default_int::ltu (n, k) )
raise exception core::SUBSCRIPT;
else
( SUBSTRING (s, i, k),
SUBSTRING (s, i+k, n-k)
);
fi;
stipulate
# Call 'chop' on the longest prefix of substring
# for which 'predicate' is true of each character:
#
fun scan_from_left chop predicate (SUBSTRING (s, i, n))
=
chop (s, i, n, scan i - i)
where
stop = i + n;
fun scan j
=
if (j != stop and predicate (unsafe_sub (s, j)))
scan (j+1);
else
j;
fi;
end;
# Call 'chop' on the longest suffix of substring
# for which 'predicate' is true of each character:
#
fun scan_from_right chop predicate (SUBSTRING (s, i, n))
=
{ stop = i - 1;
fun scan j
=
if (j != stop and predicate (unsafe_sub (s, j)))
scan (j - 1);
else
j;
fi;
chop (s, i, n, (scan (i+n - 1) - i) + 1);
};
herein
# Return the longest prefix/suffix
# whose chars each satisfy predicate.
#
# These have type (Char -> Bool) -> Substring -> Substring
#
get_prefix = scan_from_left (fn (s, i, n, k) = SUBSTRING (s, i, k));
get_suffix = scan_from_right (fn (s, i, n, k) = SUBSTRING (s, i+k, n-k));
# Opposite of above: return all of string
# except longest prefix/suffix whose chars
# satisfy predicate.
#
# These also have type (Char -> Bool) -> Substring -> Substring
#
drop_prefix = scan_from_left (fn (s, i, n, k) = SUBSTRING (s, i+k, n-k));
drop_suffix = scan_from_right (fn (s, i, n, k) = SUBSTRING (s, i, k));
# Split substring into two substrings:
# First is the longest prefix whose chars
# all satisfy given predicate, second is the rest:
#
# This has type (Char -> Bool) -> Substring -> (Substring, Substring)
#
split_off_prefix
=
scan_from_left
(fn (s, i, n, k) = (SUBSTRING (s, i, k), SUBSTRING (s, i+k, n-k)));
# Converse of above: Split substring into
# two substrings, second of which is the
# longest suffix whose chars all satisfy
# given predicate, first of which is the rest:
#
# This also has type (Char -> Bool) -> Substring -> (Substring, Substring)
#
split_off_suffix = scan_from_right (fn (s, i, n, k) = (SUBSTRING (s, i, k), SUBSTRING (s, i+k, n-k)));
end; # with
# This is using the KMP (Knuth-Morris-Pratt) matcher from prestring.
#
fun position s
=
{ stringsearch = prestring::kmp s;
fun search (ss as SUBSTRING (s', i, n))
=
{ epos = i + n;
match = stringsearch (s', i, epos);
(SUBSTRING (s', i, match - i), SUBSTRING (s', match, epos - match))
;};
search;
};
fun span (SUBSTRING (s1, i1, n1), SUBSTRING (s2, i2, n2))
=
if (s1 == s2
and i1 <= i2 + n2
)
SUBSTRING (s1, i1, (i2+n2)-i1);
else
raise exception SPAN;
fi;
fun translate tr (SUBSTRING (s, i, n))
=
prestring::translate (tr, s, i, n);
fun tokens is_delim (SUBSTRING (s, i, n))
=
{ stop = i+n;
fun substr (i, j, l)
=
if (i == j)
l;
else
SUBSTRING (s, i, j-i) ! l;
fi;
fun scan_tok (i, j, toks)
=
if (j < stop)
if (is_delim (unsafe_sub (s, j)))
skip_sep (j+1, substr (i, j, toks));
else scan_tok (i, j+1, toks); fi;
else
substr (i, j, toks);
fi
also
fun skip_sep (j, toks)
=
if (j < stop)
if (is_delim (unsafe_sub (s, j)))
skip_sep (j+1, toks);
else scan_tok (j, j+1, toks); fi;
else
toks;
fi;
reverse (scan_tok (i, i, []), []);
};
fun fields is_delim (SUBSTRING (s, i, n))
=
{ stop = i+n;
fun substr (i, j, l)
=
SUBSTRING (s, i, j-i) ! l;
fun scan_tok (i, j, toks)
=
if (j < stop)
if (is_delim (unsafe_sub (s, j)))
scan_tok (j+1, j+1, substr (i, j, toks));
else scan_tok (i, j+1, toks); fi;
else
substr (i, j, toks);
fi;
reverse (scan_tok (i, i, []), []);
};
fun fold_left f init (SUBSTRING (s, i, n))
=
iter (i, init)
where
stop = i+n;
fun iter (j, accum)
=
if (j < stop)
iter (j+1, f (unsafe_sub (s, j), accum));
else
accum;
fi;
end;
fun fold_right f init (SUBSTRING (s, i, n))
=
iter (i+n - 1, init)
where
fun iter (j, accum)
=
if (j >= i)
iter (j - 1, f (unsafe_sub (s, j), accum));
else
accum;
fi;
end;
fun apply f (SUBSTRING (s, i, n))
=
iter i
where
stop = i + n;
fun iter j
=
if (j < stop)
f (unsafe_sub (s, j));
iter (j+1);
fi;
end;
}; # package substring.
end;


