001 /*
002 * Java Genetic Algorithm Library (jenetics-1.6.0).
003 * Copyright (c) 2007-2014 Franz Wilhelmstötter
004 *
005 * Licensed under the Apache License, Version 2.0 (the "License");
006 * you may not use this file except in compliance with the License.
007 * You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 *
017 * Author:
018 * Franz Wilhelmstötter (franz.wilhelmstoetter@gmx.at)
019 */
020 package org.jenetics.stat;
021
022 import static java.lang.Math.max;
023 import static java.lang.Math.round;
024 import static java.lang.String.format;
025 import static java.util.Objects.requireNonNull;
026 import static org.jenetics.internal.util.object.NonNull;
027 import static org.jenetics.internal.util.object.eq;
028 import static org.jenetics.util.arrays.forEach;
029 import static org.jenetics.util.functions.DoubleToFloat64;
030 import static org.jenetics.util.functions.LongToInteger64;
031 import static org.jenetics.util.math.statistics.sum;
032
033 import java.io.IOException;
034 import java.util.Arrays;
035 import java.util.Comparator;
036
037 import org.jscience.mathematics.number.Float64;
038 import org.jscience.mathematics.number.Integer64;
039
040 import org.jenetics.internal.util.HashBuilder;
041
042 import org.jenetics.util.Function;
043 import org.jenetics.util.MappedAccumulator;
044 import org.jenetics.util.arrays;
045
046 /**
047 * To create an <i>Histogram Accumulator</i> you have to define the <i>class
048 * border</i> which define the histogram classes. A value is part of the
049 * <i>i</i><sup>th</sup> histogram array element:
050 * <p>
051 * <img
052 * src="doc-files/histogram-class.gif"
053 * alt="i=\left\{\begin{matrix} 0 & when & v < c_0 \\
054 * len(c) & when & v \geq c_{len(c)-1} \\
055 * j & when & c_j< v \leq c_{j-1} \\ \end{matrix}\right."
056 * />
057 * </p>
058 *
059 * Example:
060 * <pre>
061 * Separators: 0 1 2 3 4 5 6 7 8 9
062 * -------+----+----+----+----+----+----+----+----+----+------
063 * Frequencies: 20 | 12 | 14 | 17 | 12 | 11 | 13 | 11 | 10 | 19 | 18
064 * -------+----+----+----+----+----+----+----+----+----+------
065 * Histogram index: 0 1 2 3 4 5 6 7 8 9 10
066 * </pre>
067 * <p/>
068 * <strong>Note that this implementation is not synchronized.</strong> If
069 * multiple threads access this object concurrently, and at least one of the
070 * threads modifies it, it must be synchronized externally.
071 *
072 * @author <a href="mailto:franz.wilhelmstoetter@gmx.at">Franz Wilhelmstötter</a>
073 * @since 1.0
074 * @version 1.0 — <em>$Date: 2014-03-01 $</em>
075 */
076 public class Histogram<C> extends MappedAccumulator<C> {
077
078 private final C[] _separators;
079 private final Comparator<C> _comparator;
080 private final long[] _histogram;
081
082 /**
083 * Create a new Histogram with the given class separators. The number of
084 * classes is {@code separators.length + 1}. A valid histogram consists of
085 * at least two classes (with one separator).
086 *
087 * @see #valueOf(Comparable...)
088 *
089 * @param comparator the comparator for the separators.
090 * @param separators the class separators.
091 * @throws NullPointerException if the classes of one of its elements
092 * or the {@code comparator} is {@code null}.
093 * @throws IllegalArgumentException if the given separators array is empty.
094 */
095 @SafeVarargs
096 public Histogram(final Comparator<C> comparator, final C... separators) {
097 _separators = check(separators);
098 _comparator = requireNonNull(comparator, "Comparator");
099 _histogram = new long[separators.length + 1];
100
101 Arrays.sort(_separators, _comparator);
102 Arrays.fill(_histogram, 0L);
103 }
104
105 @SafeVarargs
106 private Histogram(
107 final long[] histogram,
108 final Comparator<C> comparator,
109 final C... separators
110 ) {
111 _histogram = histogram;
112 _comparator = comparator;
113 _separators = separators;
114 }
115
116 @SuppressWarnings("unchecked")
117 private static <C> C[] check(final C... classes) {
118 forEach(classes, NonNull);
119 if (classes.length == 0) {
120 throw new IllegalArgumentException("Given classes array is empty.");
121 }
122
123 return classes;
124 }
125
126 @Override
127 public void accumulate(final C value) {
128 ++_histogram[index(value)];
129 ++_samples;
130 }
131
132 /**
133 * Do binary search for the index to use.
134 *
135 * @param value the value to search.
136 * @return the histogram index.
137 */
138 final int index(final C value) {
139 int low = 0;
140 int high = _separators.length - 1;
141
142 while (low <= high) {
143 if (_comparator.compare(value, _separators[low]) < 0) {
144 return low;
145 }
146 if (_comparator.compare(value, _separators[high]) >= 0) {
147 return high + 1;
148 }
149
150 final int mid = (low + high) >>> 1;
151 if (_comparator.compare(value, _separators[mid]) < 0) {
152 high = mid;
153 } else if (_comparator.compare(value, _separators[mid]) >= 0) {
154 low = mid + 1;
155 }
156 }
157
158 assert (false): "This line will never be reached.";
159 return -1;
160 }
161
162 /**
163 * Add the given {@code histogram} to this in a newly created one.
164 *
165 * @param histogram the histogram to add.
166 * @return a new histogram with the added values of this and the given one.
167 * @throws IllegalArgumentException if the {@link #length()} and the
168 * separators of {@code this} and the given {@code histogram} are
169 * not the same.
170 * @throws NullPointerException if the given {@code histogram} is {@code null}.
171 */
172 public Histogram<C> plus(final Histogram<C> histogram) {
173 if (!_comparator.equals(histogram._comparator)) {
174 throw new IllegalArgumentException(
175 "The histogram comparators are not equals."
176 );
177 }
178 if (!Arrays.equals(_separators, histogram._separators)) {
179 throw new IllegalArgumentException(
180 "The histogram separators are not equals."
181 );
182 }
183
184 final long[] data = new long[_histogram.length];
185 for (int i = 0; i < data.length; ++i) {
186 data[i] = _histogram[i] + histogram._histogram[i];
187 }
188
189 return new Histogram<>(data, _comparator, _separators);
190 }
191
192 /**
193 * Return the comparator used for class search.
194 *
195 * @return the comparator.
196 */
197 public Comparator<C> getComparator() {
198 return _comparator;
199 }
200
201 /**
202 * Return a copy of the class separators.
203 *
204 * @return the class separators.
205 */
206 public C[] getSeparators() {
207 return _separators.clone();
208 }
209
210 /**
211 * Copy the histogram into the given array. If the array is big enough
212 * the same array is returned, otherwise a new array is created and
213 * returned. The length of the histogram array is the number of separators
214 * plus one ({@code getSeparators().length + 1}).
215 *
216 * @param histogram array to copy the histogram.
217 * @return the histogram array.
218 * @throws NullPointerException if the given array is {@code null}.
219 */
220 public long[] getHistogram(final long[] histogram) {
221 requireNonNull(histogram);
222
223 long[] hist = histogram;
224 if (histogram.length >= _histogram.length) {
225 System.arraycopy(_histogram, 0, hist, 0, _histogram.length);
226 } else {
227 hist = _histogram.clone();
228 }
229
230 return hist;
231 }
232
233 /**
234 * Return a copy of the current histogram.
235 *
236 * @return a copy of the current histogram.
237 */
238 public long[] getHistogram() {
239 return getHistogram(new long[_histogram.length]);
240 }
241
242 /**
243 * Return the number of classes of this histogram.
244 *
245 * @return the number of classes of this histogram.
246 */
247 public int length() {
248 return _histogram.length;
249 }
250
251 /**
252 * Return the <i>histogram</i> as probability array.
253 *
254 * @return the class probabilities.
255 */
256 public double[] getProbabilities() {
257 final double[] probabilities = new double[_histogram.length];
258
259 assert (sum(_histogram) == _samples);
260 for (int i = 0; i < probabilities.length; ++i) {
261 probabilities[i] = (double)_histogram[i]/(double)_samples;
262 }
263
264 return probabilities;
265 }
266
267 /**
268 * Calculate the χ2 value of the current histogram for the assumed
269 * <a href="http://en.wikipedia.org/wiki/Cumulative_distribution_function">
270 * Cumulative density function</a> {@code cdf}.
271 *
272 * @see <a href="http://en.wikipedia.org/wiki/Chi-square_test">χ2-test</a>
273 * @see <a href="http://en.wikipedia.org/wiki/Chi-square_distribution">χ2-distribution</a>
274 *
275 * @param cdf the assumed Probability density function.
276 * @param min the lower limit of the CDF domain. A {@code null} value means
277 * an open interval.
278 * @param max the upper limit of the CDF domain. A {@code null} value means
279 * an open interval.
280 * @return the χ2 value of the current histogram.
281 * @throws NullPointerException if {@code cdf} is {@code null}.
282 */
283 public double χ2(final Function<C, Float64> cdf, final C min, final C max) {
284 double χ2 = 0;
285 for (int j = 0; j < _histogram.length; ++j) {
286 final long n0j = n0(j, cdf, min, max);
287 χ2 += ((_histogram[j] - n0j)*(_histogram[j] - n0j))/(double)n0j;
288 }
289 return χ2;
290 }
291
292 /**
293 * Calculate the χ2 value of the current histogram for the assumed
294 * <a href="http://en.wikipedia.org/wiki/Cumulative_distribution_function">
295 * Cumulative density function</a> {@code cdf}.
296 *
297 * @see <a href="http://en.wikipedia.org/wiki/Chi-square_test">χ2-test</a>
298 * @see <a href="http://en.wikipedia.org/wiki/Chi-square_distribution">χ2-distribution</a>
299 *
300 * @param cdf the assumed Probability density function.
301 * @return the χ2 value of the current histogram.
302 * @throws NullPointerException if {@code cdf} is {@code null}.
303 */
304 public double χ2(final Function<C, Float64> cdf) {
305 return χ2(cdf, null, null);
306 }
307
308 private long n0(final int j, final Function<C, Float64> cdf, final C min, final C max) {
309 Float64 p0j = Float64.ZERO;
310 if (j == 0) {
311 p0j = cdf.apply(_separators[0]);
312 if (min != null) {
313 p0j = p0j.minus(cdf.apply(min));
314 }
315 } else if (j == _histogram.length - 1) {
316 if (max != null) {
317 p0j = cdf.apply(max).minus(cdf.apply(_separators[_separators.length - 1]));
318 } else {
319 p0j = Float64.ONE.minus(cdf.apply(_separators[_separators.length - 1]));
320 }
321 } else {
322 p0j = cdf.apply(_separators[j]).minus(cdf.apply(_separators[j - 1]));
323 }
324
325 return max(round(p0j.doubleValue()*_samples), 1L);
326 }
327
328 /**
329 * @see #χ2(Function)
330 */
331 public double chisqr(final Function<C, Float64> cdf) {
332 return χ2(cdf);
333 }
334
335 /**
336 * @see #χ2(Function, Object, Object)
337 */
338 public double chisqr(final Function<C, Float64> cdf, final C min, final C max) {
339 return χ2(cdf, min, max);
340 }
341
342 @Override
343 public int hashCode() {
344 return HashBuilder.of(getClass()).
345 and(super.hashCode()).
346 and(_separators).
347 and(_histogram).value();
348 }
349
350 @Override
351 public boolean equals(final Object obj) {
352 if (obj == this) {
353 return true;
354 }
355 if (obj == null || getClass() != obj.getClass()) {
356 return false;
357 }
358
359 final Histogram<?> histogram = (Histogram<?>)obj;
360 return eq(_separators, histogram._separators) &&
361 eq(_histogram, histogram._histogram) &&
362 super.equals(obj);
363 }
364
365 @Override
366 public String toString() {
367 return Arrays.toString(_separators) + "\n" + Arrays.toString(getHistogram()) +
368 "\nSamples: " + _samples;
369 }
370
371 @Override
372 public Histogram<C> clone() {
373 return (Histogram<C>)super.clone();
374 }
375
376
377 /**
378 * Create a new Histogram with the given class separators. The classes are
379 * sorted by its natural order.
380 *
381 * @param separators the class separators.
382 * @return a new Histogram.
383 * @throws NullPointerException if the {@code separators} are {@code null}.
384 * @throws IllegalArgumentException if {@code separators.length == 0}.
385 *
386 * @deprecated Use {@link #of(Comparable[])} instead.
387 */
388 @Deprecated
389 @SuppressWarnings("unchecked")
390 public static <C extends Comparable<? super C>> Histogram<C> valueOf(
391 final C... separators
392 ) {
393 return new Histogram<C>(COMPARATOR, separators);
394 }
395
396 /**
397 * Create a new Histogram with the given class separators. The classes are
398 * sorted by its natural order.
399 *
400 * @param separators the class separators.
401 * @return a new Histogram.
402 * @throws NullPointerException if the {@code separators} are {@code null}.
403 * @throws IllegalArgumentException if {@code separators.length == 0}.
404 */
405 @SuppressWarnings("unchecked")
406 public static <C extends Comparable<? super C>> Histogram<C> of(
407 final C... separators
408 ) {
409 return new Histogram<C>(COMPARATOR, separators);
410 }
411
412 @SuppressWarnings({"rawtypes", "unchecked"})
413 private static final Comparator COMPARATOR = new Comparator() {
414 @Override
415 public int compare(final Object o1, final Object o2) {
416 return ((Comparable)o1).compareTo(o2);
417 }
418 };
419
420 /**
421 * Return a <i>histogram</i> for {@link Double} values. The <i>histogram</i>
422 * array of the returned {@link Histogram} will look like this:
423 *
424 * <pre>
425 * min max
426 * +----+----+----+----+ ~ +----+
427 * | 1 | 2 | 3 | 4 | | nc |
428 * +----+----+----+----+ ~ +----+
429 * </pre>
430 *
431 * The range of all classes will be equal: {@code (max - min)/nclasses}.
432 *
433 * @param min the minimum range value of the returned histogram.
434 * @param max the maximum range value of the returned histogram.
435 * @param nclasses the number of classes of the returned histogram. The
436 * number of separators will be {@code nclasses - 1}.
437 * @return a new <i>histogram</i> for {@link Double} values.
438 * @throws NullPointerException if {@code min} or {@code max} is {@code null}.
439 * @throws IllegalArgumentException if {@code min.compareTo(max) >= 0} or
440 * {@code nclasses < 2}.
441 *
442 * @deprecated Use {@link #of(Double, Double, int)} instead.
443 */
444 @Deprecated
445 public static Histogram<Double> valueOf(
446 final Double min,
447 final Double max,
448 final int nclasses
449 ) {
450 return of(toSeparators(min, max, nclasses));
451 }
452
453 /**
454 * Return a <i>histogram</i> for {@link Double} values. The <i>histogram</i>
455 * array of the returned {@link Histogram} will look like this:
456 *
457 * <pre>
458 * min max
459 * +----+----+----+----+ ~ +----+
460 * | 1 | 2 | 3 | 4 | | nc |
461 * +----+----+----+----+ ~ +----+
462 * </pre>
463 *
464 * The range of all classes will be equal: {@code (max - min)/nclasses}.
465 *
466 * @param min the minimum range value of the returned histogram.
467 * @param max the maximum range value of the returned histogram.
468 * @param nclasses the number of classes of the returned histogram. The
469 * number of separators will be {@code nclasses - 1}.
470 * @return a new <i>histogram</i> for {@link Double} values.
471 * @throws NullPointerException if {@code min} or {@code max} is {@code null}.
472 * @throws IllegalArgumentException if {@code min.compareTo(max) >= 0} or
473 * {@code nclasses < 2}.
474 */
475 public static Histogram<Double> of(
476 final Double min,
477 final Double max,
478 final int nclasses
479 ) {
480 return of(toSeparators(min, max, nclasses));
481 }
482
483 private static Double[] toSeparators(
484 final Double min,
485 final Double max,
486 final int nclasses
487 ) {
488 check(min, max, nclasses);
489
490 final double stride = (max - min)/nclasses;
491 final Double[] separators = new Double[nclasses - 1];
492 for (int i = 0; i < separators.length; ++i) {
493 separators[i] = min + stride*(i + 1);
494 }
495
496 return separators;
497 }
498
499 /**
500 * @deprecated Use {@link #of(Double, Double, int)} instead.
501 */
502 @Deprecated
503 public static Histogram<Float64> valueOf(
504 final Float64 min,
505 final Float64 max,
506 final int nclasses
507 ) {
508 return of(arrays.map(
509 toSeparators(min.doubleValue(), max.doubleValue(), nclasses),
510 new Float64[nclasses - 1],
511 DoubleToFloat64
512 ));
513 }
514
515 /**
516 * @deprecated Use {@link #of(Long, Long, int)} instead.
517 */
518 @Deprecated
519 public static Histogram<Integer64> valueOf(
520 final Integer64 min,
521 final Integer64 max,
522 final int nclasses
523 ) {
524 return of(arrays.map(
525 toSeparators(min.longValue(), max.longValue(), nclasses),
526 new Integer64[0],
527 LongToInteger64
528 ));
529 }
530
531 /**
532 * Return a <i>histogram</i> for {@link Long} values. The <i>histogram</i>
533 * array of the returned {@link Histogram} will look like this:
534 *
535 * <pre>
536 * min max
537 * +----+----+----+----+ ~ +----+
538 * | 1 | 2 | 3 | 4 | | nc |
539 * +----+----+----+----+ ~ +----+
540 * </pre>
541 *
542 * The range of all classes are more or less the same. But this is not
543 * always possible due to integer rounding issues. Calling this method with
544 * {@code min = 13} and {@code max = 99} will generate the following class
545 * separators for the given number of classes:
546 * <pre>
547 * nclasses = 2: [56]
548 * nclasses = 3: [41, 70]
549 * nclasses = 4: [34, 55, 77]
550 * nclasses = 5: [30, 47, 64, 81]
551 * nclasses = 6: [27, 41, 55, 69, 84]
552 * nclasses = 7: [25, 37, 49, 61, 73, 86]
553 * nclasses = 8: [23, 33, 44, 55, 66, 77, 88]
554 * nclasses = 9: [22, 31, 40, 49, 59, 69, 79, 89]
555 * </pre>
556 *
557 * @param min the minimum range value of the returned histogram.
558 * @param max the maximum range value of the returned histogram.
559 * @param nclasses the number of classes of the returned histogram. The
560 * number of separators will be {@code nclasses - 1}.
561 * @return a new <i>histogram</i> for {@link Long} values.
562 * @throws NullPointerException if {@code min} or {@code max} is {@code null}.
563 * @throws IllegalArgumentException if {@code min.compareTo(max) >= 0} or
564 * {@code nclasses < 2}.
565 */
566 public static Histogram<Long> of(
567 final Long min,
568 final Long max,
569 final int nclasses
570 ) {
571 return of(toSeparators(min, max, nclasses));
572 }
573
574 /**
575 * @deprecated Use {@link #of(Long, Long, int)} instead.
576 */
577 @Deprecated
578 public static Histogram<Long> valueOf(
579 final Long min,
580 final Long max,
581 final int nclasses
582 ) {
583 return of(toSeparators(min, max, nclasses));
584 }
585
586 private static Long[] toSeparators(
587 final Long min,
588 final Long max,
589 final int nclasses
590 ) {
591 check(min, max, nclasses);
592
593 final int size = (int)(max - min);
594 final int pts = Math.min(size, nclasses);
595 final int bulk = size/pts;
596 final int rest = size%pts;
597 assert ((bulk*pts + rest) == size);
598
599 final Long[] separators = new Long[pts - 1];
600 for (int i = 1, n = pts - rest; i < n; ++i) {
601 separators[i - 1] = i*bulk + min;
602 }
603 for (int i = 0; i < rest; ++i) {
604 separators[separators.length - rest + i] =
605 (pts - rest)*bulk + i*(bulk + 1) + min;
606 }
607
608 return separators;
609 }
610
611 /*
612 * Check the input values of the valueOf methods.
613 */
614 private static <C extends Comparable<? super C>> void
615 check(final C min, final C max, final int nclasses)
616 {
617 requireNonNull(min, "Minimum");
618 requireNonNull(max, "Maximum");
619 if (min.compareTo(max) >= 0) {
620 throw new IllegalArgumentException(format(
621 "Min must be smaller than max: %s < %s failed.", min, max
622 ));
623 }
624 if (nclasses < 2) {
625 throw new IllegalArgumentException(format(
626 "nclasses should be < 2, but was %s.", nclasses
627 ));
628 }
629 }
630
631
632 public <A extends Appendable> A print(final A out) throws IOException {
633 Object min = "...";
634 Object max = null;
635 for (int i = 0; i < length() - 1; ++i) {
636 max = _separators[i];
637 out.append("[" + min + "," + max + ")");
638 out.append(" " + _histogram[i] + "\n");
639 min = max;
640 }
641 if (length() - 1 > 0) {
642 out.append("[" + min + ",...)");
643 out.append(" " + _histogram[length() - 1] + "\n");
644 }
645
646 return out;
647 }
648
649 }
|