Histogram.java

/*
 * Java Genetic Algorithm Library (@__identifier__@).
 * Copyright (c) @__year__@ Franz Wilhelmstötter
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 * Author:
 *    Franz Wilhelmstötter (franz.wilhelmstoetter@gmx.at)
 */
package org.jenetics.stat;

import static java.lang.Math.max;
import static java.lang.Math.round;
import static java.lang.String.format;
import static java.util.Objects.requireNonNull;
import static org.jenetics.internal.util.object.NonNull;
import static org.jenetics.internal.util.object.eq;
import static org.jenetics.util.arrays.forEach;
import static org.jenetics.util.functions.DoubleToFloat64;
import static org.jenetics.util.functions.LongToInteger64;
import static org.jenetics.util.math.statistics.sum;

import java.io.IOException;
import java.util.Arrays;
import java.util.Comparator;

import org.jscience.mathematics.number.Float64;
import org.jscience.mathematics.number.Integer64;

import org.jenetics.internal.util.HashBuilder;

import org.jenetics.util.Function;
import org.jenetics.util.MappedAccumulator;
import org.jenetics.util.arrays;

/**
 * To create an <i>Histogram Accumulator</i> you have to define the <i>class
 * border</i> which define the histogram classes. A value is part of the
 * <i>i</i><sup>th</sup> histogram array element:
 * <p>
 * <img
 *     src="doc-files/histogram-class.gif"
 *     alt="i=\left\{\begin{matrix}  0 & when & v < c_0 \\
 *         len(c) & when & v \geq c_{len(c)-1} \\
 *         j & when & c_j< v \leq c_{j-1}  \\  \end{matrix}\right."
 * />
 * </p>
 *
 * Example:
 * <pre>
 * Separators:             0    1    2    3    4    5    6    7    8    9
 *                  -------+----+----+----+----+----+----+----+----+----+------
 * Frequencies:        20  | 12 | 14 | 17 | 12 | 11 | 13 | 11 | 10 | 19 |  18
 *                  -------+----+----+----+----+----+----+----+----+----+------
 * Histogram index:     0     1    2    3    4    5    6    7    8    9    10
 * </pre>
 * <p/>
 * <strong>Note that this implementation is not synchronized.</strong> If
 * multiple threads access this object concurrently, and at least one of the
 * threads modifies it, it must be synchronized externally.
 *
 * @author <a href="mailto:franz.wilhelmstoetter@gmx.at">Franz Wilhelmstötter</a>
 * @since 1.0
 * @version 1.0 &mdash; <em>$Date: 2014-03-01 $</em>
 */
public class Histogram<C> extends MappedAccumulator<C> {

	private final C[] _separators;
	private final Comparator<C> _comparator;
	private final long[] _histogram;

	/**
	 * Create a new Histogram with the given class separators. The number of
	 * classes is {@code separators.length + 1}. A valid histogram consists of
	 * at least two classes (with one separator).
	 *
	 * @see #valueOf(Comparable...)
	 *
	 * @param comparator the comparator for the separators.
	 * @param separators the class separators.
	 * @throws NullPointerException if the classes of one of its elements
	 *         or the {@code comparator} is {@code null}.
	 * @throws IllegalArgumentException if the given separators array is empty.
	 */
	@SafeVarargs
	public Histogram(final Comparator<C> comparator, final C... separators) {
		_separators = check(separators);
		_comparator = requireNonNull(comparator, "Comparator");
		_histogram = new long[separators.length + 1];

		Arrays.sort(_separators, _comparator);
		Arrays.fill(_histogram, 0L);
	}

	@SafeVarargs
	private Histogram(
		final long[] histogram,
		final Comparator<C> comparator,
		final C... separators
	) {
		_histogram = histogram;
		_comparator = comparator;
		_separators = separators;
	}

	@SuppressWarnings("unchecked")
	private static <C> C[] check(final C... classes) {
		forEach(classes, NonNull);
		if (classes.length == 0) {
			throw new IllegalArgumentException("Given classes array is empty.");
		}

		return classes;
	}

	@Override
	public void accumulate(final C value) {
		++_histogram[index(value)];
		++_samples;
	}

	/**
	 * Do binary search for the index to use.
	 *
	 * @param value the value to search.
	 * @return the histogram index.
	 */
	final int index(final C value) {
		int low = 0;
		int high = _separators.length - 1;

		while (low <= high) {
			if (_comparator.compare(value, _separators[low]) < 0) {
				return low;
			}
			if (_comparator.compare(value, _separators[high]) >= 0) {
				return high + 1;
			}

			final int mid = (low + high) >>> 1;
			if (_comparator.compare(value, _separators[mid]) < 0) {
				high = mid;
			} else if (_comparator.compare(value, _separators[mid]) >= 0) {
				low = mid + 1;
			}
		}

		assert (false): "This line will never be reached.";
		return -1;
	}

	/**
	 * Add the given {@code histogram} to this in a newly created one.
	 *
	 * @param histogram the histogram to add.
	 * @return a new histogram with the added values of this and the given one.
	 * @throws IllegalArgumentException if the {@link #length()} and the
	 *         separators of {@code this} and the given {@code histogram} are
	 *         not the same.
	 * @throws NullPointerException if the given {@code histogram} is {@code null}.
	 */
	public Histogram<C> plus(final Histogram<C> histogram) {
		if (!_comparator.equals(histogram._comparator)) {
			throw new IllegalArgumentException(
				"The histogram comparators are not equals."
			);
		}
		if (!Arrays.equals(_separators, histogram._separators)) {
			throw new IllegalArgumentException(
				"The histogram separators are not equals."
			);
		}

		final long[] data = new long[_histogram.length];
		for (int i = 0; i < data.length; ++i) {
			data[i] = _histogram[i] + histogram._histogram[i];
		}

		return new Histogram<>(data, _comparator, _separators);
	}

	/**
	 * Return the comparator used for class search.
	 *
	 * @return the comparator.
	 */
	public Comparator<C> getComparator() {
		return _comparator;
	}

	/**
	 * Return a copy of the class separators.
	 *
	 * @return the class separators.
	 */
	public C[] getSeparators() {
		return _separators.clone();
	}

	/**
	 * Copy the histogram into the given array. If the array is big enough
	 * the same array is returned, otherwise a new array is created and
	 * returned. The length of the histogram array is the number of separators
	 * plus one ({@code getSeparators().length + 1}).
	 *
	 * @param histogram array to copy the histogram.
	 * @return the histogram array.
	 * @throws NullPointerException if the given array is {@code null}.
	 */
	public long[] getHistogram(final long[] histogram) {
		requireNonNull(histogram);

		long[] hist = histogram;
		if (histogram.length >= _histogram.length) {
			System.arraycopy(_histogram, 0, hist, 0, _histogram.length);
		} else {
			hist = _histogram.clone();
		}

		return hist;
	}

	/**
	 * Return a copy of the current histogram.
	 *
	 * @return a copy of the current histogram.
	 */
	public long[] getHistogram() {
		return getHistogram(new long[_histogram.length]);
	}

	/**
	 * Return the number of classes of this histogram.
	 *
	 * @return the number of classes of this histogram.
	 */
	public int length() {
		return _histogram.length;
	}

	/**
	 * Return the <i>histogram</i> as probability array.
	 *
	 * @return the class probabilities.
	 */
	public double[] getProbabilities() {
		final double[] probabilities = new double[_histogram.length];

		assert (sum(_histogram) == _samples);
		for (int i = 0; i < probabilities.length; ++i) {
			probabilities[i] = (double)_histogram[i]/(double)_samples;
		}

		return probabilities;
	}

	/**
	 * Calculate the χ2 value of the current histogram for the assumed
	 * <a href="http://en.wikipedia.org/wiki/Cumulative_distribution_function">
	 * Cumulative density function</a> {@code cdf}.
	 *
	 * @see <a href="http://en.wikipedia.org/wiki/Chi-square_test">χ2-test</a>
	 * @see <a href="http://en.wikipedia.org/wiki/Chi-square_distribution">χ2-distribution</a>
	 *
	 * @param cdf the assumed Probability density function.
	 * @param min the lower limit of the CDF domain. A {@code null} value means
	 *         an open interval.
	 * @param max the upper limit of the CDF domain. A {@code null} value means
	 *         an open interval.
	 * @return the χ2 value of the current histogram.
	 * @throws NullPointerException if {@code cdf} is {@code null}.
	 */
	public double χ2(final Function<C, Float64> cdf, final C min, final C max) {
		double χ2 = 0;
		for (int j = 0; j < _histogram.length; ++j) {
			final long n0j = n0(j, cdf, min, max);
			χ2 += ((_histogram[j] - n0j)*(_histogram[j] - n0j))/(double)n0j;
		}
		return χ2;
	}

	/**
	 * Calculate the χ2 value of the current histogram for the assumed
	 * <a href="http://en.wikipedia.org/wiki/Cumulative_distribution_function">
	 * Cumulative density function</a> {@code cdf}.
	 *
	 * @see <a href="http://en.wikipedia.org/wiki/Chi-square_test">χ2-test</a>
	 * @see <a href="http://en.wikipedia.org/wiki/Chi-square_distribution">χ2-distribution</a>
	 *
	 * @param cdf the assumed Probability density function.
	 * @return the χ2 value of the current histogram.
	 * @throws NullPointerException if {@code cdf} is {@code null}.
	 */
	public double χ2(final Function<C, Float64> cdf) {
		return χ2(cdf, null, null);
	}

	private long n0(final int j, final Function<C, Float64> cdf, final C min, final C max) {
		Float64 p0j = Float64.ZERO;
		if (j == 0) {
			p0j = cdf.apply(_separators[0]);
			if (min != null) {
				p0j = p0j.minus(cdf.apply(min));
			}
		} else if (j == _histogram.length - 1) {
			if (max != null) {
				p0j = cdf.apply(max).minus(cdf.apply(_separators[_separators.length - 1]));
			} else {
				p0j = Float64.ONE.minus(cdf.apply(_separators[_separators.length - 1]));
			}
		} else {
			p0j = cdf.apply(_separators[j]).minus(cdf.apply(_separators[j - 1]));
		}

		return max(round(p0j.doubleValue()*_samples), 1L);
	}

	/**
	 * @see #χ2(Function)
	 */
	public double chisqr(final Function<C, Float64> cdf) {
		return χ2(cdf);
	}

	/**
	 * @see #χ2(Function, Object, Object)
	 */
	public double chisqr(final Function<C, Float64> cdf, final C min, final C max) {
		return χ2(cdf, min, max);
	}

	@Override
	public int hashCode() {
		return HashBuilder.of(getClass()).
				and(super.hashCode()).
				and(_separators).
				and(_histogram).value();
	}

	@Override
	public boolean equals(final Object obj) {
		if (obj == this) {
			return true;
		}
		if (obj == null || getClass() != obj.getClass()) {
			return false;
		}

		final Histogram<?> histogram = (Histogram<?>)obj;
		return 	eq(_separators, histogram._separators) &&
				eq(_histogram, histogram._histogram) &&
				super.equals(obj);
	}

	@Override
	public String toString() {
		return Arrays.toString(_separators) + "\n" + Arrays.toString(getHistogram()) +
				"\nSamples: " + _samples;
	}

	@Override
	public Histogram<C> clone() {
		return (Histogram<C>)super.clone();
	}


	/**
	 * Create a new Histogram with the given class separators. The classes are
	 * sorted by its natural order.
	 *
	 * @param separators the class separators.
	 * @return a new Histogram.
	 * @throws NullPointerException if the {@code separators} are {@code null}.
	 * @throws IllegalArgumentException if {@code separators.length == 0}.
	 *
	 * @deprecated Use {@link #of(Comparable[])} instead.
	 */
	@Deprecated
	@SuppressWarnings("unchecked")
	public static <C extends Comparable<? super C>> Histogram<C> valueOf(
		final C... separators
	) {
		return new Histogram<C>(COMPARATOR, separators);
	}

	/**
	 * Create a new Histogram with the given class separators. The classes are
	 * sorted by its natural order.
	 *
	 * @param separators the class separators.
	 * @return a new Histogram.
	 * @throws NullPointerException if the {@code separators} are {@code null}.
	 * @throws IllegalArgumentException if {@code separators.length == 0}.
	 */
	@SuppressWarnings("unchecked")
	public static <C extends Comparable<? super C>> Histogram<C> of(
		final C... separators
	) {
		return new Histogram<C>(COMPARATOR, separators);
	}

	@SuppressWarnings({"rawtypes", "unchecked"})
	private static final Comparator COMPARATOR = new Comparator() {
		@Override
		public int compare(final Object o1, final Object o2) {
			return ((Comparable)o1).compareTo(o2);
		}
	};

	/**
	 * Return a <i>histogram</i> for {@link Double} values. The <i>histogram</i>
	 * array of the returned {@link Histogram} will look like this:
	 *
	 * <pre>
	 *    min                            max
	 *     +----+----+----+----+  ~  +----+
	 *     | 1  | 2  | 3  | 4  |     | nc |
	 *     +----+----+----+----+  ~  +----+
	 * </pre>
	 *
	 * The range of all classes will be equal: {@code (max - min)/nclasses}.
	 *
	 * @param min the minimum range value of the returned histogram.
	 * @param max the maximum range value of the returned histogram.
	 * @param nclasses the number of classes of the returned histogram. The
	 *        number of separators will be {@code nclasses - 1}.
	 * @return a new <i>histogram</i> for {@link Double} values.
	 * @throws NullPointerException if {@code min} or {@code max} is {@code null}.
	 * @throws IllegalArgumentException if {@code min.compareTo(max) >= 0} or
	 *         {@code nclasses < 2}.
	 *
	 * @deprecated Use {@link #of(Double, Double, int)} instead.
	 */
	@Deprecated
	public static Histogram<Double> valueOf(
		final Double min,
		final Double max,
		final int nclasses
	) {
		return of(toSeparators(min, max, nclasses));
	}

	/**
	 * Return a <i>histogram</i> for {@link Double} values. The <i>histogram</i>
	 * array of the returned {@link Histogram} will look like this:
	 *
	 * <pre>
	 *    min                            max
	 *     +----+----+----+----+  ~  +----+
	 *     | 1  | 2  | 3  | 4  |     | nc |
	 *     +----+----+----+----+  ~  +----+
	 * </pre>
	 *
	 * The range of all classes will be equal: {@code (max - min)/nclasses}.
	 *
	 * @param min the minimum range value of the returned histogram.
	 * @param max the maximum range value of the returned histogram.
	 * @param nclasses the number of classes of the returned histogram. The
	 *        number of separators will be {@code nclasses - 1}.
	 * @return a new <i>histogram</i> for {@link Double} values.
	 * @throws NullPointerException if {@code min} or {@code max} is {@code null}.
	 * @throws IllegalArgumentException if {@code min.compareTo(max) >= 0} or
	 *         {@code nclasses < 2}.
	 */
	public static Histogram<Double> of(
		final Double min,
		final Double max,
		final int nclasses
	) {
		return of(toSeparators(min, max, nclasses));
	}

	private static Double[] toSeparators(
		final Double min,
		final Double max,
		final int nclasses
	) {
		check(min, max, nclasses);

		final double stride = (max - min)/nclasses;
		final Double[] separators = new Double[nclasses - 1];
		for (int i = 0; i < separators.length; ++i) {
			separators[i] = min + stride*(i + 1);
		}

		return separators;
	}

	/**
	 * @deprecated Use {@link #of(Double, Double, int)} instead.
	 */
	@Deprecated
	public static Histogram<Float64> valueOf(
		final Float64 min,
		final Float64 max,
		final int nclasses
	) {
		return of(arrays.map(
			toSeparators(min.doubleValue(), max.doubleValue(), nclasses),
			new Float64[nclasses - 1],
			DoubleToFloat64
		));
	}

	/**
	 * @deprecated Use {@link #of(Long, Long, int)} instead.
	 */
	@Deprecated
	public static Histogram<Integer64> valueOf(
		final Integer64 min,
		final Integer64 max,
		final int nclasses
	) {
		return of(arrays.map(
			toSeparators(min.longValue(), max.longValue(), nclasses),
			new Integer64[0],
			LongToInteger64
		));
	}

	/**
	 * Return a <i>histogram</i> for {@link Long} values. The <i>histogram</i>
	 * array of the returned {@link Histogram} will look like this:
	 *
	 * <pre>
	 *    min                            max
	 *     +----+----+----+----+  ~  +----+
	 *     | 1  | 2  | 3  | 4  |     | nc |
	 *     +----+----+----+----+  ~  +----+
	 * </pre>
	 *
	 * The range of all classes are more or less the same. But this is not
	 * always possible due to integer rounding issues. Calling this method with
	 * {@code min = 13} and {@code max = 99} will generate the following class
	 * separators for the given number of classes:
	 * <pre>
	 *  nclasses = 2: [56]
	 *  nclasses = 3: [41, 70]
	 *  nclasses = 4: [34, 55, 77]
	 *  nclasses = 5: [30, 47, 64, 81]
	 *  nclasses = 6: [27, 41, 55, 69, 84]
	 *  nclasses = 7: [25, 37, 49, 61, 73, 86]
	 *  nclasses = 8: [23, 33, 44, 55, 66, 77, 88]
	 *  nclasses = 9: [22, 31, 40, 49, 59, 69, 79, 89]
	 * </pre>
	 *
	 * @param min the minimum range value of the returned histogram.
	 * @param max the maximum range value of the returned histogram.
	 * @param nclasses the number of classes of the returned histogram. The
	 *        number of separators will be {@code nclasses - 1}.
	 * @return a new <i>histogram</i> for {@link Long} values.
	 * @throws NullPointerException if {@code min} or {@code max} is {@code null}.
	 * @throws IllegalArgumentException if {@code min.compareTo(max) >= 0} or
	 *         {@code nclasses < 2}.
	 */
	public static Histogram<Long> of(
		final Long min,
		final Long max,
		final int nclasses
	) {
		return of(toSeparators(min, max, nclasses));
	}

	/**
	 * @deprecated Use {@link #of(Long, Long, int)} instead.
	 */
	@Deprecated
	public static Histogram<Long> valueOf(
		final Long min,
		final Long max,
		final int nclasses
	) {
		return of(toSeparators(min, max, nclasses));
	}

	private static Long[] toSeparators(
		final Long min,
		final Long max,
		final int nclasses
	) {
		check(min, max, nclasses);

		final int size = (int)(max - min);
		final int pts = Math.min(size, nclasses);
		final int bulk = size/pts;
		final int rest = size%pts;
		assert ((bulk*pts + rest) == size);

		final Long[] separators = new Long[pts - 1];
		for (int i = 1, n = pts - rest; i < n; ++i) {
			separators[i - 1] = i*bulk + min;
		}
		for (int i = 0; i < rest; ++i) {
			separators[separators.length - rest + i] =
					(pts - rest)*bulk + i*(bulk + 1) + min;
		}

		return separators;
	}

	/*
	 * Check the input values of the valueOf methods.
	 */
	private static <C extends Comparable<? super C>> void
	check(final C min, final C max, final int nclasses)
	{
		requireNonNull(min, "Minimum");
		requireNonNull(max, "Maximum");
		if (min.compareTo(max) >= 0) {
			throw new IllegalArgumentException(format(
					"Min must be smaller than max: %s < %s failed.", min, max
				));
		}
		if (nclasses < 2) {
			throw new IllegalArgumentException(format(
				"nclasses should be < 2, but was %s.", nclasses
			));
		}
	}


	public <A extends Appendable> A print(final A out) throws IOException {
		Object min = "...";
		Object max = null;
		for (int i = 0; i < length() - 1; ++i) {
			max = _separators[i];
			out.append("[" + min + "," + max + ")");
			out.append(" " + _histogram[i] + "\n");
			min = max;
		}
		if (length() - 1 > 0) {
			out.append("[" + min + ",...)");
			out.append(" " + _histogram[length() - 1] + "\n");
		}

		return out;
	}

}