package dataprocessing;

import java.util.ArrayList;
import java.util.Random;

/** A naive, memory-inefficient version which stores all the
 *  data and calculates the average and standard deviation
 *  in a batch
 * 
 *  This version works with Doubles but a search and replace
 *  will convert it to work with e.g. ints. Ideally however it 
 *  should be made generic.
 *
 * @author Tim Kovacs
 */
public class Averager {
    private ArrayList<Double> values;

    public Averager() {
	values = new ArrayList<Double>();
    };
    
    public Averager(int size) {
	values = new ArrayList<Double>(size);
    };
    
    public void add(double next) {
	values.add(next);
    }

    public double average() {
	double sum=0;
	for (double current: values)
	    sum += current;
	return sum / (double) values.size();
    }

    /** Sample variance is the sum of the squared deviations from the average **/
    public double variance() {
	double avg = average();
	double sumOfSquaredDiff = 0.0;
	for (double current: values) {
	    double difference = current - avg;
	    sumOfSquaredDiff += difference * difference;
	}
	return sumOfSquaredDiff;
    }

    public double stdDev() {
	return Math.sqrt(variance() / values.size());
    }

    public double upper95ConfidenceLimit() {
	// Use 2.576 for 99% confidence intervals instead of 1.96
	double range = 1.96 * stdDev() / Math.sqrt(size());
	return average() + range;
    }

    public double lower95ConfidenceLimit() {
	double range = 1.96 * stdDev() / Math.sqrt(size());
	return average() - range;
    }

    public int size() { 
	return values.size(); 
    };

    public void clear() {
	values.clear();
    }

    /** @return the t statistic for the averages of two samples of equal size drawn
     *  from distributions with assumed equal variance.
     *  The lower the returned value, the less likely the averages are drawn from the same
     *  distribution. 
     *  The usual thresholds for statistical significance are 0.1, 0.05 and 0.01 **/
    public double equalSizeEqualVarianceTTest(Averager avg2) {
	return equalSizeEqualVarianceTTestDetails(this.average(), avg2.average(), 
						  this.variance(), avg2.variance(), 
						  this.size());
    }

    private double equalSizeEqualVarianceTTestDetails(double avg1, double avg2, double var1, double var2, double size) {
	double diffOfAveragers = avg1 - avg2;
	//System.out.println("diff of averagers = " + diffOfAveragers);
	double pooledStdDev = Math.sqrt((var1 + var2) / 2.0);
	//System.out.println("pooled std. dev. = " + pooledStdDev);
	double denominator = pooledStdDev * Math.sqrt(2.0/size);	
	//System.out.println("denominator = " + denominator);
	return diffOfAveragers / denominator;
    }



    // ---------
    // TEST CODE
    // ---------

    
    /** Test equalSizeEqualVarianceTTestDetails **/
    public static void testTTest1() {
	Averager avg = new Averager();
	System.out.println(avg.equalSizeEqualVarianceTTestDetails(100, 101, 
						  5, 6, 
						  1000)
			   );	
    }

    /** Generate some averages by sampling and compare them **/
    public static void testTTest2() {
	Random rand = new Random();
	Averager avg1 = new Averager();
	Averager avg2 = new Averager();
	final int SAMPLE_SIZE = 10;
	final int DIFFERENCE = 100000; // difference in means

	// we generate ints here rather than longs so that we 
	// can add someting to one of them without overflowing
	for (int i=0; i<SAMPLE_SIZE; i++) {
	    avg1.add(rand.nextInt() + DIFFERENCE);
	    avg2.add(rand.nextInt());
	}	
	System.out.println("t = " + avg1.equalSizeEqualVarianceTTest(avg2));
    }

    // a simple test of the calculations
    public static void statsTest() {
	Averager avg = new Averager();
	avg.add(2);
	avg.add(4);
	avg.add(6);
	System.out.println("average " + avg.average()); // sqrt(8/3) = 1.6329...
	System.out.println("size " + avg.size());
	System.out.println("variance " + avg.variance());
	System.out.println("stdDev " + avg.stdDev());
	System.out.println("lower limit of 95% confidence interval " + avg.lower95ConfidenceLimit());
	System.out.println("upper limit of 95% confidence interval " + avg.upper95ConfidenceLimit());
    }

    /** A worked example from http://en.wikipedia.org/wiki/T-test
     *  which unfortunately used real values so the result is different. **/
    public static void statsTest2() {	
	Averager avg1 = new Averager();
	Averager avg2 = new Averager();
	long[] arr1 = {3002, 2999, 3011, 2997, 3001, 2999};
	long[] arr2 = {2989, 2993, 2972, 2998, 3002, 2998};
	for (int i=0; i<arr1.length; i++) {
	    avg1.add(arr1[i]);
	    avg2.add(arr2[i]);
	}
	System.out.println("t = " + avg1.equalSizeEqualVarianceTTest(avg2));
	System.out.println("should be 0.084");
    }

    public static void main(String[] args) {
	statsTest();
    }
}

