hmbdc
simplify-high-performance-messaging-programming
StatHistogram.hpp
1 #include "hmbdc/Copyright.hpp"
2 #pragma once
3 
4 #include "hmbdc/Exception.hpp"
5 #include <map>
6 #include <vector>
7 #include <utility>
8 #include <limits>
9 #include <algorithm>
10 #include <stdexcept>
11 #include <ext/mt_allocator.h>
12 
13 namespace hmbdc { namespace numeric {
14 namespace stathistogram_detail {
15 using namespace std;
16 
18  template <typename Hist>
19  static
20  void display(ostream& os, Hist const& hist, size_t sampleSize
21  , vector<float> percentages = {0, 1, 10, 50, 90, 99, 100}) {
22  auto h = hist.report(percentages);
23  for (auto i = 0u; i < percentages.size(); ++i) {
24  os << percentages[i] << "%=" << h[i] << ',';
25  }
26  os << "sample=" << sampleSize;
27  }
28 
29 };
30 
31 /**
32  * @brief collect sample values and keep histogram for top percentages
33  * @details top values are the smaller values
34  *
35  * @tparam T value type that supports less than operator
36  * @tparam DETAILED if false, the samples are kept in coarser grain and the class's
37  * speed performance is better
38  */
39 template <typename T, bool DETAILED = true>
41 : private StatHistogramBase {
43  : threshold_(numeric_limits<T>::max())
44  , worst_(numeric_limits<T>::min())
45  , sampleSize_(0ul)
46  {}
47 
48  explicit StatHistogram(T threshold)
49  : threshold_(threshold)
50  , worst_(numeric_limits<T>::min())
51  , sampleSize_(0ul){}
52 
53  bool add(T sample) {
54  ++sampleSize_;
55  if (sample < threshold_)
56  buckets_[sample]++;
57  else
58  buckets_[threshold_]++;
59 
60  if (sample > worst_) {
61  worst_ = sample;
62  return true;
63  }
64  return false;
65  }
66 
67  size_t sampleSize() const {
68  return sampleSize_;
69  }
70 
71  StatHistogram<T>& operator += (StatHistogram<T> const& other) {
72  if (threshold_ == other.threshold_) {
73  for (auto const& v : other.buckets_) {
74  buckets_[v.first] += v.second;
75  }
76  worst_ = max(worst_, other.worst_);
77  } else {
78  HMBDC_THROW(runtime_error, "histogram collection parameters mismatch - failed");
79  }
80  sampleSize_ += other.sampleSize_;
81  return *this;
82  }
83 
84  vector<T> report(vector<float> percentages
85  = {0, 1, 10, 50, 90, 99, 100}) const {
86  vector<T> p(percentages.size());
87  if (!buckets_.empty() && !p.empty()) {
88  *p.begin() = buckets_.begin()->first;
89  *p.rbegin() = worst_;
90  }
91  size_t count = 0;
92  size_t perIndex = 1;
93  for(auto& i : buckets_) {
94  count += i.second;
95  for (auto j = perIndex; j < percentages.size() - 1; ++j) {
96  if (count * 100ul >= percentages[j] * sampleSize_) {
97  p[j] = i.first;
98  perIndex++;
99  } else {
100  break;
101  }
102  }
103  }
104 
105  return p;
106  }
107 
108  void display(ostream& os
109  , vector<float> percentages = {0, 1, 10, 50, 90, 99, 100}) const {
110  StatHistogramBase::display(os, *this, sampleSize_, percentages);
111  }
112 
113  friend
114  ostream& operator << (ostream& os, StatHistogram const& hist) {
115  hist.display(os);
116  return os;
117  }
118 private:
119 
120  using Buckets = map<T, size_t, less<T>
121  , __gnu_cxx::__mt_alloc<pair<const T, size_t>>>;
122  Buckets buckets_;
123  T threshold_;
124  T worst_;
125  size_t sampleSize_;
126 };
127 
128 template <typename T>
129 struct StatHistogram<T, false>
130 : private StatHistogramBase {
132  T thresholdMin
133  , T thresholdMax
134  , size_t bucketCount = 1000u)
135  : thresholdMin_(thresholdMin)
136  , thresholdMax_(thresholdMax)
137  , best_(numeric_limits<T>::max())
138  , worst_(numeric_limits<T>::min())
139  , sampleSize_(0ul)
140  , unit_((thresholdMax - thresholdMin) / bucketCount)
141  , buckets_(bucketCount + 1) {
142  if (thresholdMax <= thresholdMin) {
143  HMBDC_THROW(runtime_error, "thresholdMax <= thresholdMin");
144  }
145  }
146 
147  int add(T sample) {
148  ++sampleSize_;
149 
150  if (sample < thresholdMin_)
151  buckets_[0]++;
152  else if (sample < thresholdMax_)
153  buckets_[(sample - thresholdMin_) / unit_]++;
154  else
155  buckets_[buckets_.size() - 1]++;
156 
157  auto res = 0;
158  if (sample < best_) {
159  best_ = sample;
160  res = -1;
161  }
162  if (sample > worst_) {
163  worst_ = sample;
164  res = 1;
165  }
166 
167  return res;
168  }
169 
170  size_t sampleSize() const {
171  return sampleSize_;
172  }
173 
174  StatHistogram<T, false>& operator += (StatHistogram<T, false> const& other) {
175  if (thresholdMax_ == other.thresholdMax_ &&
176  thresholdMin_ == other.thresholdMin_ &&
177  buckets_.size() == other.buckets_.size()) {
178  for (auto i = 0u; i < buckets_.size(); ++i) {
179  buckets_[i] += other.buckets_[i];
180  }
181  worst_ = max(worst_, other.worst_);
182  best_ = min(best_, other.best_);
183  sampleSize_ += other.sampleSize_;
184  } else {
185  HMBDC_THROW(runtime_error, "thresholds or bucketCount mismatch - failed");
186  }
187  return *this;
188  }
189 
190  vector<T> report(vector<float> percentages
191  = {0, 1, 10, 50, 90, 99, 100}) const {
192 
193  vector<T> p(percentages.size());
194  if (sampleSize_ && !p.empty()) {
195  *p.begin() = best_;
196  *p.rbegin() = worst_;
197  size_t count = 0;
198  auto val = thresholdMin_;
199  size_t perIndex = 1;
200  for(auto& i : buckets_) {
201  count += i;
202  val += unit_;
203  for (auto j = perIndex; j < percentages.size() - 1; ++j) {
204  if (count * 100ul >= percentages[j] * sampleSize_) {
205  p[j] = min(val, worst_);
206  perIndex++;
207  } else {
208  break;
209  }
210  }
211  }
212  }
213 
214  return p;
215  }
216 
217  void display(ostream& os
218  , vector<float> percentages = {0, 1, 10, 50, 90, 99, 100}) const {
219  StatHistogramBase::display(os, *this, sampleSize_, percentages);
220  }
221 
222  friend
223  ostream& operator << (ostream& os, StatHistogram const& hist) {
224  hist.display(os);
225  return os;
226  }
227 
228 private:
229  T thresholdMin_;
230  T thresholdMax_;
231  T best_;
232  T worst_;
233  size_t sampleSize_;
234  using Buckets = vector<size_t>;
235  T unit_;
236  Buckets buckets_;
237 };
238 
239 } //stathistogram_detail
240 
241 template <typename T, bool DETAILED = true>
243 }}
244 
245 
Definition: TypedString.hpp:74
collect sample values and keep histogram for top percentages
Definition: StatHistogram.hpp:40
Definition: Base.hpp:12