


## univariate-sample.pkg
## Author: Matthias Blume (blume@tti-c.org)
# Compiled by:
# src/lib/std/standard.lib# Some statistical functions on unweighted univariate samples.
### "It's kind of fun to do the impossible."
###
### -- Walt Disney
package univariate_sample : api {
# We distinguish between two kinds of samples. Only the "heavy"
# kind permits calculation of average deviation and median.
# It is also considerably more expensive because it keeps an
# rw_vector of all points while the "light" variety is constant-size.
Light;
Heavy;
Sample(X); # Light or heavy.
Evaluation(X); # Light or heavy.
# Samples are built incrementally by adding points to an initially
# empty sample:
lempty: Sample( Light );
hempty: Void -> Sample( Heavy );
ladd: (Float, Sample( Light )) -> Sample( Light ); # Constant
hadd: (Float, Sample( Heavy )) -> Sample( Heavy ); # Amortized constant
# Evaluate the sample.
# This completes all the expensive work except
# for things that depend on "heavy" samples:
#
evaluate: Sample(X) -> Evaluation(X); # Constant
# Extracting "cheap" information (constant-time):
#
nn: Evaluation(X) -> Int;
n: Evaluation(X) -> Float; # nn as real
mean: Evaluation(X) -> Float;
variance: Evaluation(X) -> Float;
standard_deviation: Evaluation(X) -> Float;
skew: Evaluation(X) -> Float;
kurtosis: Evaluation(X) -> Float;
# Extracting "expensive" information:
#
median: Evaluation( Heavy ) -> Float; # randomized linear
average_deviation: Evaluation( Heavy ) -> Float; # linear
}
{
infix val 90 @@@ ; my (@@@) = unsafe::rw_vector::get;
infix val 40 <- ; fun (a, i) <- x = unsafe::rw_vector::set (a, i, x);
# Two kinds of "extra info":
#
Light = Void; # nothing
Heavy = (Rw_Vector( Float ), Int); # rubber rw_vector of points, size
# A sample: extra info * nn * sum x^4 * sum x^3 * sum x^2 * sum x:
Sample(X) = (X, Int, Float, Float, Float, Float);
# An evaluation: extra info * nn * nn as real *
# mean * variance * standard deviation *
# skew * kurtosis :
Evaluation X
=
EVALUATION {
ext_info: X, # Extra info
ni: Int, # Number of points
nr: Float, # Number of points (as real)
mean: Float,
sd2: Float, # sd * sd = variance
sd: Float, # standard deviation
skew: Float,
kurtosis: Float
};
min_size = 1024; # minimum allocated size of heavy rw_vector
lempty = ((), 0, 0.0, 0.0, 0.0, 0.0);
fun hempty ()
=
((rw_vector::make_rw_vector (min_size, 0.0), min_size), 0, 0.0, 0.0, 0.0, 0.0);
fun ladd (x: Float, ((), n, sx4, sx3, sx2, sx1))
=
{ x2 = x*x; my (x3, x4) = (x2*x, x2*x2);
((), n+1, sx4+x4, sx3+x3, sx2+x2, sx1+x);
};
fun hadd (x: Float, ((a, size), n, sx4, sx3, sx2, sx1))
=
{ x2 = x*x; my (x3, x4) = (x2*x, x2*x2);
my (a, size)
=
if (n < size)
(a, size);
else
size = size+size;
b = rw_vector::tabulate (
size,
fn i = if (i < n ) a @@@ i;
else 0.0; fi
);
(b, size);
fi;
(a, n) <- x;
((a, size), n+1, sx4+x4, sx3+x3, sx2+x2, sx1+x);
};
fun evaluate (ext_info, ni, sx4, sx3, sx2, sx1)
=
{ n = real ni;
n' = n - 1.0;
m = sx1 // n;
m2 = m*m;
m3 = m2*m;
sd2 = (sx2 + m*(n*m - 2.0*sx1)) // n';
sd = math::sqrt sd2;
my (sd3, sd4)
=
(sd*sd2, sd2*sd2);
sk = (sx3-m*(3.0*(sx2-sx1*m)+n*m2)) // (n*sd3);
k = ((sx4+m*(6.0*sx2*m - 4.0*(sx3+sx1*m2)+n*m3)) // (n*sd4)) - 3.0;
EVALUATION {
ext_info,
ni,
nr => n,
mean => m,
sd2,
sd,
skew => sk,
kurtosis => k
};
};
fun un_e (EVALUATION r)
=
r;
fun nn e = .ni (un_e e);
fun n e = .nr (un_e e);
fun mean e = .mean (un_e e);
fun variance e = .sd2 (un_e e);
fun standard_deviation e = .sd (un_e e);
fun skew e = .skew (un_e e);
fun kurtosis e = .kurtosis (un_e e);
fun median (EVALUATION { ext_info => (a, _), ni, ... } )
=
random_sample::median' (rw_vector_slice::make_slice (a, 0, THE ni));
fun average_deviation (EVALUATION { ext_info => (a, _), ni, nr, mean => m, ... } )
=
ad (0, 0.0)
where
fun ad (i, ds)
=
if ( i>= ni) ds // nr;
else ad (i+1, ds + abs (a@@@i-m));
fi;
end;
};


