001 /*
002 * Java Genetic Algorithm Library (jenetics-2.0.2).
003 * Copyright (c) 2007-2014 Franz Wilhelmstötter
004 *
005 * Licensed under the Apache License, Version 2.0 (the "License");
006 * you may not use this file except in compliance with the License.
007 * You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 *
017 * Author:
018 * Franz Wilhelmstötter (franz.wilhelmstoetter@gmx.at)
019 */
020 package org.jenetics.stat;
021
022 import static java.lang.Math.max;
023 import static java.lang.Math.round;
024 import static java.lang.String.format;
025 import static java.util.Objects.requireNonNull;
026 import static org.jenetics.internal.util.object.NonNull;
027 import static org.jenetics.internal.util.object.eq;
028 import static org.jenetics.util.arrays.forEach;
029 import static org.jenetics.util.math.statistics.sum;
030
031 import java.io.IOException;
032 import java.util.Arrays;
033 import java.util.Comparator;
034
035 import org.jenetics.internal.util.HashBuilder;
036
037 import org.jenetics.util.Function;
038 import org.jenetics.util.MappedAccumulator;
039
040 /**
041 * To create an <i>Histogram Accumulator</i> you have to define the <i>class
042 * border</i> which define the histogram classes. A value is part of the
043 * <i>i</i><sup>th</sup> histogram array element:
044 * <p>
045 * <img
046 * src="doc-files/histogram-class.gif"
047 * alt="i=\left\{\begin{matrix} 0 & when & v < c_0 \\
048 * len(c) & when & v \geq c_{len(c)-1} \\
049 * j & when & c_j< v \leq c_{j-1} \\ \end{matrix}\right."
050 * >
051 * </p>
052 *
053 * Example:
054 * <pre>
055 * Separators: 0 1 2 3 4 5 6 7 8 9
056 * -------+----+----+----+----+----+----+----+----+----+------
057 * Frequencies: 20 | 12 | 14 | 17 | 12 | 11 | 13 | 11 | 10 | 19 | 18
058 * -------+----+----+----+----+----+----+----+----+----+------
059 * Histogram index: 0 1 2 3 4 5 6 7 8 9 10
060 * </pre>
061 * <p>
062 * <strong>Note that this implementation is not synchronized.</strong> If
063 * multiple threads access this object concurrently, and at least one of the
064 * threads modifies it, it must be synchronized externally.
065 *
066 * @author <a href="mailto:franz.wilhelmstoetter@gmx.at">Franz Wilhelmstötter</a>
067 * @since 1.0
068 * @version 2.0 — <em>$Date: 2014-03-31 $</em>
069 */
070 public class Histogram<C> extends MappedAccumulator<C> {
071
072 private final C[] _separators;
073 private final Comparator<C> _comparator;
074 private final long[] _histogram;
075
076 /**
077 * Create a new Histogram with the given class separators. The number of
078 * classes is {@code separators.length + 1}. A valid histogram consists of
079 * at least two classes (with one separator).
080 *
081 * @see #of(Comparable...)
082 *
083 * @param comparator the comparator for the separators.
084 * @param separators the class separators.
085 * @throws NullPointerException if the classes of one of its elements
086 * or the {@code comparator} is {@code null}.
087 * @throws IllegalArgumentException if the given separators array is empty.
088 */
089 @SafeVarargs
090 public Histogram(final Comparator<C> comparator, final C... separators) {
091 _separators = check(separators);
092 _comparator = requireNonNull(comparator, "Comparator");
093 _histogram = new long[separators.length + 1];
094
095 Arrays.sort(_separators, _comparator);
096 Arrays.fill(_histogram, 0L);
097 }
098
099 @SafeVarargs
100 private Histogram(
101 final long[] histogram,
102 final Comparator<C> comparator,
103 final C... separators
104 ) {
105 _histogram = histogram;
106 _comparator = comparator;
107 _separators = separators;
108 }
109
110 @SuppressWarnings("unchecked")
111 private static <C> C[] check(final C... classes) {
112 forEach(classes, NonNull);
113 if (classes.length == 0) {
114 throw new IllegalArgumentException("Given classes array is empty.");
115 }
116
117 return classes;
118 }
119
120 @Override
121 public void accumulate(final C value) {
122 ++_histogram[index(value)];
123 ++_samples;
124 }
125
126 /**
127 * Do binary search for the index to use.
128 *
129 * @param value the value to search.
130 * @return the histogram index.
131 */
132 final int index(final C value) {
133 int low = 0;
134 int high = _separators.length - 1;
135
136 while (low <= high) {
137 if (_comparator.compare(value, _separators[low]) < 0) {
138 return low;
139 }
140 if (_comparator.compare(value, _separators[high]) >= 0) {
141 return high + 1;
142 }
143
144 final int mid = (low + high) >>> 1;
145 if (_comparator.compare(value, _separators[mid]) < 0) {
146 high = mid;
147 } else if (_comparator.compare(value, _separators[mid]) >= 0) {
148 low = mid + 1;
149 }
150 }
151
152 assert (false): "This line will never be reached.";
153 return -1;
154 }
155
156 /**
157 * Add the given {@code histogram} to this in a newly created one.
158 *
159 * @param histogram the histogram to add.
160 * @return a new histogram with the added values of this and the given one.
161 * @throws IllegalArgumentException if the {@link #length()} and the
162 * separators of {@code this} and the given {@code histogram} are
163 * not the same.
164 * @throws NullPointerException if the given {@code histogram} is {@code null}.
165 */
166 public Histogram<C> plus(final Histogram<C> histogram) {
167 if (!_comparator.equals(histogram._comparator)) {
168 throw new IllegalArgumentException(
169 "The histogram comparators are not equals."
170 );
171 }
172 if (!Arrays.equals(_separators, histogram._separators)) {
173 throw new IllegalArgumentException(
174 "The histogram separators are not equals."
175 );
176 }
177
178 final long[] data = new long[_histogram.length];
179 for (int i = 0; i < data.length; ++i) {
180 data[i] = _histogram[i] + histogram._histogram[i];
181 }
182
183 return new Histogram<>(data, _comparator, _separators);
184 }
185
186 /**
187 * Return the comparator used for class search.
188 *
189 * @return the comparator.
190 */
191 public Comparator<C> getComparator() {
192 return _comparator;
193 }
194
195 /**
196 * Return a copy of the class separators.
197 *
198 * @return the class separators.
199 */
200 public C[] getSeparators() {
201 return _separators.clone();
202 }
203
204 /**
205 * Copy the histogram into the given array. If the array is big enough
206 * the same array is returned, otherwise a new array is created and
207 * returned. The length of the histogram array is the number of separators
208 * plus one ({@code getSeparators().length + 1}).
209 *
210 * @param histogram array to copy the histogram.
211 * @return the histogram array.
212 * @throws NullPointerException if the given array is {@code null}.
213 */
214 public long[] getHistogram(final long[] histogram) {
215 requireNonNull(histogram);
216
217 long[] hist = histogram;
218 if (histogram.length >= _histogram.length) {
219 System.arraycopy(_histogram, 0, hist, 0, _histogram.length);
220 } else {
221 hist = _histogram.clone();
222 }
223
224 return hist;
225 }
226
227 /**
228 * Return a copy of the current histogram.
229 *
230 * @return a copy of the current histogram.
231 */
232 public long[] getHistogram() {
233 return getHistogram(new long[_histogram.length]);
234 }
235
236 /**
237 * Return the number of classes of this histogram.
238 *
239 * @return the number of classes of this histogram.
240 */
241 public int length() {
242 return _histogram.length;
243 }
244
245 /**
246 * Return the <i>histogram</i> as probability array.
247 *
248 * @return the class probabilities.
249 */
250 public double[] getProbabilities() {
251 final double[] probabilities = new double[_histogram.length];
252
253 assert (sum(_histogram) == _samples);
254 for (int i = 0; i < probabilities.length; ++i) {
255 probabilities[i] = (double)_histogram[i]/(double)_samples;
256 }
257
258 return probabilities;
259 }
260
261 /**
262 * Calculate the χ2 value of the current histogram for the assumed
263 * <a href="http://en.wikipedia.org/wiki/Cumulative_distribution_function">
264 * Cumulative density function</a> {@code cdf}.
265 *
266 * @see <a href="http://en.wikipedia.org/wiki/Chi-square_test">χ2-test</a>
267 * @see <a href="http://en.wikipedia.org/wiki/Chi-square_distribution">χ2-distribution</a>
268 *
269 * @param cdf the assumed Probability density function.
270 * @param min the lower limit of the CDF domain. A {@code null} value means
271 * an open interval.
272 * @param max the upper limit of the CDF domain. A {@code null} value means
273 * an open interval.
274 * @return the χ2 value of the current histogram.
275 * @throws NullPointerException if {@code cdf} is {@code null}.
276 */
277 public double χ2(final Function<C, Double> cdf, final C min, final C max) {
278 double χ2 = 0;
279 for (int j = 0; j < _histogram.length; ++j) {
280 final long n0j = n0(j, cdf, min, max);
281 χ2 += ((_histogram[j] - n0j)*(_histogram[j] - n0j))/(double)n0j;
282 }
283 return χ2;
284 }
285
286 /**
287 * Calculate the χ2 value of the current histogram for the assumed
288 * <a href="http://en.wikipedia.org/wiki/Cumulative_distribution_function">
289 * Cumulative density function</a> {@code cdf}.
290 *
291 * @see <a href="http://en.wikipedia.org/wiki/Chi-square_test">χ2-test</a>
292 * @see <a href="http://en.wikipedia.org/wiki/Chi-square_distribution">χ2-distribution</a>
293 *
294 * @param cdf the assumed Probability density function.
295 * @return the χ2 value of the current histogram.
296 * @throws NullPointerException if {@code cdf} is {@code null}.
297 */
298 public double χ2(final Function<C, Double> cdf) {
299 return χ2(cdf, null, null);
300 }
301
302 private long n0(final int j, final Function<C, Double> cdf, final C min, final C max) {
303 double p0j = 0.0;
304 if (j == 0) {
305 p0j = cdf.apply(_separators[0]);
306 if (min != null) {
307 p0j = p0j - cdf.apply(min);
308 }
309 } else if (j == _histogram.length - 1) {
310 if (max != null) {
311 p0j = cdf.apply(max) - cdf.apply(_separators[_separators.length - 1]);
312 } else {
313 p0j = 1.0 - cdf.apply(_separators[_separators.length - 1]);
314 }
315 } else {
316 p0j = cdf.apply(_separators[j]) - cdf.apply(_separators[j - 1]);
317 }
318
319 return max(round(p0j*_samples), 1L);
320 }
321
322 /**
323 * @see #χ2(Function)
324 *
325 * @param cdf the cumulative density function
326 * @return the chi square value of the given function
327 */
328 public double chisqr(final Function<C, Double> cdf) {
329 return χ2(cdf);
330 }
331
332 /**
333 * @see #χ2(Function, Object, Object)
334 *
335 * @param cdf the cumulative density function
336 * @param min the lower limit
337 * @param max the upper limit
338 * @return the chi square value of the given function
339 */
340 public double chisqr(final Function<C, Double> cdf, final C min, final C max) {
341 return χ2(cdf, min, max);
342 }
343
344 @Override
345 public int hashCode() {
346 return HashBuilder.of(getClass()).
347 and(super.hashCode()).
348 and(_separators).
349 and(_histogram).value();
350 }
351
352 @Override
353 public boolean equals(final Object obj) {
354 if (obj == this) {
355 return true;
356 }
357 if (obj == null || getClass() != obj.getClass()) {
358 return false;
359 }
360
361 final Histogram<?> histogram = (Histogram<?>)obj;
362 return eq(_separators, histogram._separators) &&
363 eq(_histogram, histogram._histogram) &&
364 super.equals(obj);
365 }
366
367 @Override
368 public String toString() {
369 return Arrays.toString(_separators) + "\n" + Arrays.toString(getHistogram()) +
370 "\nSamples: " + _samples;
371 }
372
373 @Override
374 public Histogram<C> clone() {
375 return (Histogram<C>)super.clone();
376 }
377
378 /**
379 * Create a new Histogram with the given class separators. The classes are
380 * sorted by its natural order.
381 *
382 * @param <C> the separator types
383 * @param separators the class separators.
384 * @return a new Histogram.
385 * @throws NullPointerException if the {@code separators} are {@code null}.
386 * @throws IllegalArgumentException if {@code separators.length == 0}.
387 */
388 @SuppressWarnings("unchecked")
389 public static <C extends Comparable<? super C>> Histogram<C> of(
390 final C... separators
391 ) {
392 return new Histogram<C>(COMPARATOR, separators);
393 }
394
395 @SuppressWarnings({"rawtypes", "unchecked"})
396 private static final Comparator COMPARATOR = new Comparator() {
397 @Override
398 public int compare(final Object o1, final Object o2) {
399 return ((Comparable)o1).compareTo(o2);
400 }
401 };
402
403 /**
404 * Return a <i>histogram</i> for {@link Double} values. The <i>histogram</i>
405 * array of the returned {@link Histogram} will look like this:
406 *
407 * <pre>
408 * min max
409 * +----+----+----+----+ ~ +----+
410 * | 1 | 2 | 3 | 4 | | nc |
411 * +----+----+----+----+ ~ +----+
412 * </pre>
413 *
414 * The range of all classes will be equal: {@code (max - min)/nclasses}.
415 *
416 * @param min the minimum range value of the returned histogram.
417 * @param max the maximum range value of the returned histogram.
418 * @param nclasses the number of classes of the returned histogram. The
419 * number of separators will be {@code nclasses - 1}.
420 * @return a new <i>histogram</i> for {@link Double} values.
421 * @throws NullPointerException if {@code min} or {@code max} is {@code null}.
422 * @throws IllegalArgumentException if {@code min.compareTo(max) >= 0} or
423 * {@code nclasses < 2}.
424 */
425 public static Histogram<Double> of(
426 final Double min,
427 final Double max,
428 final int nclasses
429 ) {
430 return of(toSeparators(min, max, nclasses));
431 }
432
433 private static Double[] toSeparators(
434 final Double min,
435 final Double max,
436 final int nclasses
437 ) {
438 check(min, max, nclasses);
439
440 final double stride = (max - min)/nclasses;
441 final Double[] separators = new Double[nclasses - 1];
442 for (int i = 0; i < separators.length; ++i) {
443 separators[i] = min + stride*(i + 1);
444 }
445
446 return separators;
447 }
448
449 /**
450 * Return a <i>histogram</i> for {@link Long} values. The <i>histogram</i>
451 * array of the returned {@link Histogram} will look like this:
452 *
453 * <pre>
454 * min max
455 * +----+----+----+----+ ~ +----+
456 * | 1 | 2 | 3 | 4 | | nc |
457 * +----+----+----+----+ ~ +----+
458 * </pre>
459 *
460 * The range of all classes are more or less the same. But this is not
461 * always possible due to integer rounding issues. Calling this method with
462 * {@code min = 13} and {@code max = 99} will generate the following class
463 * separators for the given number of classes:
464 * <pre>
465 * nclasses = 2: [56]
466 * nclasses = 3: [41, 70]
467 * nclasses = 4: [34, 55, 77]
468 * nclasses = 5: [30, 47, 64, 81]
469 * nclasses = 6: [27, 41, 55, 69, 84]
470 * nclasses = 7: [25, 37, 49, 61, 73, 86]
471 * nclasses = 8: [23, 33, 44, 55, 66, 77, 88]
472 * nclasses = 9: [22, 31, 40, 49, 59, 69, 79, 89]
473 * </pre>
474 *
475 * @param min the minimum range value of the returned histogram.
476 * @param max the maximum range value of the returned histogram.
477 * @param nclasses the number of classes of the returned histogram. The
478 * number of separators will be {@code nclasses - 1}.
479 * @return a new <i>histogram</i> for {@link Long} values.
480 * @throws NullPointerException if {@code min} or {@code max} is {@code null}.
481 * @throws IllegalArgumentException if {@code min.compareTo(max) >= 0} or
482 * {@code nclasses < 2}.
483 */
484 public static Histogram<Long> of(
485 final Long min,
486 final Long max,
487 final int nclasses
488 ) {
489 return of(toSeparators(min, max, nclasses));
490 }
491
492 private static Long[] toSeparators(
493 final Long min,
494 final Long max,
495 final int nclasses
496 ) {
497 check(min, max, nclasses);
498
499 final int size = (int)(max - min);
500 final int pts = Math.min(size, nclasses);
501 final int bulk = size/pts;
502 final int rest = size%pts;
503 assert ((bulk*pts + rest) == size);
504
505 final Long[] separators = new Long[pts - 1];
506 for (int i = 1, n = pts - rest; i < n; ++i) {
507 separators[i - 1] = i*bulk + min;
508 }
509 for (int i = 0; i < rest; ++i) {
510 separators[separators.length - rest + i] =
511 (pts - rest)*bulk + i*(bulk + 1) + min;
512 }
513
514 return separators;
515 }
516
517 /*
518 * Check the input values of the valueOf methods.
519 */
520 private static <C extends Comparable<? super C>> void
521 check(final C min, final C max, final int nclasses)
522 {
523 requireNonNull(min, "Minimum");
524 requireNonNull(max, "Maximum");
525 if (min.compareTo(max) >= 0) {
526 throw new IllegalArgumentException(format(
527 "Min must be smaller than max: %s < %s failed.", min, max
528 ));
529 }
530 if (nclasses < 2) {
531 throw new IllegalArgumentException(format(
532 "nclasses should be < 2, but was %s.", nclasses
533 ));
534 }
535 }
536
537
538 public <A extends Appendable> A print(final A out) throws IOException {
539 Object min = "...";
540 Object max = null;
541 for (int i = 0; i < length() - 1; ++i) {
542 max = _separators[i];
543 out.append("[" + min + "," + max + ")");
544 out.append(" " + _histogram[i] + "\n");
545 min = max;
546 }
547 if (length() - 1 > 0) {
548 out.append("[" + min + ",...)");
549 out.append(" " + _histogram[length() - 1] + "\n");
550 }
551
552 return out;
553 }
554
555 }
|