Show:

File: ia\utils\Statistics.js

/** 
 * Provides statistical information about an array of numbers (<code>data</code>).
 *
 * @author J Clare
 * @class ia.Statistics
 * @constructor
 * @param {Number[]} data An array of numbers.
 */
ia.Statistics = function(data) 
{
	if (data !== undefined) this.setData(data);
};

/** 
 * An array in which any duplicated values in the data array are removed.
 *
 * @property unique
 * @type Number[]
 */
ia.Statistics.prototype.unique;

/** 
 * The data array sorted by numeric ascending, with none numeric values removed.
 *
 * @property sorted
 * @type Number[]
 */
ia.Statistics.prototype.sorted;

//--------------------------------------------------------------------------
//
// Central tendency of a dataset, i.e. the centre of a frequency distribution.
//
//--------------------------------------------------------------------------

/** 
 * The sum.
 *
 * @property sum
 * @type Number
 */
ia.Statistics.prototype.sum;

/** 
 * The average value of the dataset, i.e. the sum of all the data divided 
 * by the number of variables. The arithmetic mean is commonly called 
 * the "average". When the word "mean" is used without a modifier, it 
 * usually refers to the arithmetic mean.
 * 
 * <p>The mean is a good measure of central tendency for symmetrical 
 * (e.g. normal) distributions but can be misleading in skewed distributions 
 * since it is influenced by outliers. In general, the mean is larger 
 * than the median in positively skewed distributions and less than the median 
 * in negatively skewed distributions.</p>
 * 
 * <p>Therefore, other statistics such as the median may be more informative 
 * for distributions that are frequently very skewed. The mean, median, and 
 * mode are equal in symmetrical frequency distributions. The mean is higher 
 * than the median in positively (right) skewed distributions and lower than 
 * the median in negatively (left) skewed distributions.</p>
 *
 * @property mean
 * @type Number
 */
ia.Statistics.prototype.mean;

/** 
 * The middle value in the dataset, i.e. half the variables 
 * have values greater than the median and the other half 
 * values which are less. The median is less sensitive to 
 * outliers (extreme scores) than the mean and thus a 
 * better measure than the mean for highly skewed distributions.
 *
 * @property median
 * @type Number
 */
ia.Statistics.prototype.median;

/** 
 * The most frequently occurring value in the dataset. 
 * Easy to determine, but subject to variation and of limited value.
 *
 * @property mode
 * @type Number[]
 */
ia.Statistics.prototype.mode;

/** 
 * The most frequently occurring value in the dataset. 
 * Easy to determine, but subject to variation and of limited value.
 *
 * @property modeString
 * @type String
 */
ia.Statistics.prototype.modeString;

/** 
 * The number of occurrences of the mode value
 *
 * @property modeCount
 * @type Number
 */
ia.Statistics.prototype.modeCount;

//--------------------------------------------------------------------------
//
// Variability (or dispersion) measures the amount of scatter in a dataset.
//
//--------------------------------------------------------------------------

/** 
 * The minimum value.
 *
 * @property minValue
 * @type Number
 */
ia.Statistics.prototype.minValue;

/** 
 * The maximum value.
 *
 * @property maxValue
 * @type Number
 */
ia.Statistics.prototype.maxValue;

/** 
 * The difference between the largest and the smallest value in the dataset. 
 * Since the range only takes into account two values from the entire dataset, 
 * it may be heavily influenced by outliers in the data.
 *
 * @property modeString
 * @type Number
 */
ia.Statistics.prototype.range;

/** 
 * The lower quartile.
 *
 * @property lowerQuartile
 * @type Number
 */
ia.Statistics.prototype.lowerQuartile;

/** 
 * The upper quartile.
 *
 * @property upperQuartile
 * @type Number
 */
ia.Statistics.prototype.upperQuartile;

/** 
 * The interquartile range.
 *
 * @property interquartileRange
 * @type Number
 */
ia.Statistics.prototype.interquartileRange;

/** 
 * The variance for a population using the deviation score method.
 * Variance is the average squared deviation of the scores from the mean.
 *
 * @property variance
 * @type Number
 */
ia.Statistics.prototype.variance;

/** 
 * The standard deviation for a population using the deviation score method.
 * Standard Deviation is the average deviation of the scores from the mean.
 *
 * @property standardDeviation
 * @type Number
 */
ia.Statistics.prototype.standardDeviation;

/** 
 * Gets the data array.
 *
 * @method getData
 * @param {Number[]} the array.
 */
ia.Statistics.prototype.getData = function()
{
	return this._data;
};

/** 
 * Sets the data array.
 *
 * @method setData
 * @param {Number[]} data the array.
 */
ia.Statistics.prototype.setData = function(data)
{
	this._data = data;
	this._invalidateData();
};

/** 
 * Returns the percentile at the given position in the dataset.
 * Uses the sorted array.
 * 
 * @method getPercentile
 * @param {Number} percentile Valid values are 0 to 1.
 * <p>
 * <li>
 * <ul>Q1 (Lower Quartile): The 25th percentile (0.25).</ul>
 * <ul>Q2 (Median): The 50th percentile (0.5).</ul>
 * <ul>Q3 (Upper Quartile): The 75th percentile (0.75).</ul>
 * </li>
 * </p>
 * @param {Number[]} a The array of numbers to use - sorted or unique.
 * @return {Number} The value of the given percentile or NaN if
 * the percentile is outside the valid range (0 - 1).
 */
ia.Statistics.prototype.getPercentile = function(percentile, a)
{	
	if ((percentile < 0) || (percentile > 1)) return NaN;

	var value;

	var n = a.length;
	var pos = percentile * (n + 1);
	var fpos = Math.floor(pos);        
	var dif = pos - fpos;
	var lower = a[fpos - 1];
	var upper = a[fpos];
	value = parseFloat(lower) + (dif * (upper - lower));

	return value;
};

/** 
 * Get a simple text representation of this object.
 * 
 * @method toString
 * @return {String} A text string.
 */
ia.Statistics.prototype.toString = function() 
{
	var txt =  "-----Data-----" +
		"\n Data: "+this.getData() +
		"\n Sorted data: "+this.sorted +
		"\n Unique data: "+this.unique +

		"\n\n -----Central tendency-----" + 
		"\n Mean: "+this.mean +
		"\n Median: "+this.median +
		"\n Mode: "+this.mode +
		"\n Mode Count: "+this.modeCount +

		"\n\n -----Variability-----" +
		"\n Min: "+this.minValue +
		"\n Max: "+this.maxValue +
		"\n Range: "+this.range +
		"\n Lower quartile: "+this.lowerQuartile +
		"\n Upper quartile: "+this.upperQuartile +
		"\n Interquartile range: "+this.interquartileRange +
		"\n Population Variance: "+this.variance +
		"\n Population Standard deviation: "+this.standardDeviation
	return txt;
};		

/** 
 * Get a simple text representation of the main statistics.
 * 
 * @method statsToString
 * @return {String} A text string.
 */
ia.Statistics.prototype.statsToString = function() 
{
	var txt = "-----Central tendency-----" +
		"\n Mean: "+this.mean +
		"\n Median: "+this.median +
		"\n Mode: "+this.mode +
		"\n Mode Count: "+this.modeCount +

		"\n\n -----Variability-----" +
		"\n Min: "+this.minValue +
		"\n Max: "+this.maxValue +
		"\n Range: "+this.range +
		"\n Lower quartile: "+this.lowerQuartile +
		"\n Upper quartile: "+this.upperQuartile +
		"\n Interquartile range: "+this.interquartileRange +
		"\n Population Variance: "+this.variance +
		"\n Population Standard deviation: "+this.standardDeviation
	return txt;
};

/** 
 * Called when the data has been changed to recalculate statistics.
 * 
 * @method _invalidateData
 * @private
 */
ia.Statistics.prototype._invalidateData = function() 
{
	// Make a shallow copy of the data array.
	this.sorted = this._data.concat();

	// Sort data.
	this.sorted.sort(function sortNumber(a,b) {return a - b;});

	// Calculate complex statistical info.
	// Unique data array variables.
	this.unique = [];
	var prevValue;

	// Mean variables.
	this.sum = 0;

	// Variance variables.
	var sumOfValuesSquared = 0;
	var vSquared;

	// Mode variables.
	this.mode = [];
	this.modeCount = 0;
	var duplicateCount = 1;

	// Loop through the data set only once
	// for all calculations.
	var n = this.sorted.length;
	for (var i = 0; i < n; i++)
	{
		var v = this.sorted[i];

		if (v !== prevValue) 
		{
			// Unique data array.
			this.unique.push(v);

			// The prevValue is a mode if it
			// appears as often as the other modes.
			// Reset the mode array if it appears
			// more often than the previous modes.
			if (duplicateCount > this.modeCount) this.mode = [];
			if (duplicateCount === this.modeCount)
			{
				this.mode.push(prevValue);
				this.modeCount = duplicateCount;
			}

			// Reset duplicate value count.
			duplicateCount = 1;
		}
		else
			duplicateCount++;

		prevValue = v;

		// Mean.
		this.sum += parseFloat(v);

		// Variance.
		vSquared = v * v;
		sumOfValuesSquared = sumOfValuesSquared + vSquared;
	}

	// Mean calculation.
	this.mean = this.sum / n;

	// Percentile Calculation.
	this.lowerQuartile = this.getPercentile(0.25, this.sorted);
	this.median = this.getPercentile(0.5, this.sorted);
	this.upperQuartile = this.getPercentile(0.75, this.sorted);
	this.interquartileRange = this.upperQuartile - this.lowerQuartile;

	// Variance calculation.
	var meanSquared = this.mean * this.mean;
	var sumOfDeviationSquared = 0;

	for (i=0; i < n; i++)
	{
		v = this.sorted[i];
		var deviation = v - this.mean;
		var deviationSquared = deviation*deviation;
		sumOfDeviationSquared = sumOfDeviationSquared + deviationSquared;
	}

	// Standard deviation calculation.
	this.variance = sumOfDeviationSquared / (n-1);
	this.standardDeviation = Math.sqrt(Math.abs(this.variance));

	// Calculate simple statistical info.
	this.minValue = this.sorted[0];
	this.maxValue = this.sorted[n-1];
	this.range = this.maxValue - this.minValue;
};