hmbdc
simplify-high-performance-messaging-programming
StatHistogram.hpp
1 #include "hmbdc/Copyright.hpp"
2 #pragma once
3 
4 #include "hmbdc/Exception.hpp"
5 #include <map>
6 #include <vector>
7 #include <utility>
8 #include <limits>
9 #include <algorithm>
10 #include <stdexcept>
11 #include <ext/mt_allocator.h>
12 
13 namespace hmbdc { namespace numeric {
14 using namespace std;
15 
17  template <typename Hist>
18  static
19  void display(ostream& os, Hist const& hist, size_t sampleSize
20  , vector<float> percentages = {0, 1, 10, 50, 90, 99, 100}) {
21  auto h = hist.report(percentages);
22  for (auto i = 0u; i < percentages.size(); ++i) {
23  os << percentages[i] << "%=" << h[i] << ',';
24  }
25  os << "sample=" << sampleSize;
26  }
27 
28 };
29 
30 /**
31  * @brief collect sample values and keep histogram for top percentages
32  * @details top values are the smaller values
33  *
34  * @tparam T value type that supports less than operator
35  * @tparam DETAILED if false, the samples are kept in coarser grain and the class's
36  * speed performance is better
37  */
38 template <typename T, bool DETAILED = true>
40 : private StatHistogramBase {
42  : threshold_(numeric_limits<T>::max())
43  , worst_(numeric_limits<T>::min())
44  , sampleSize_(0ul)
45  {}
46 
47  explicit StatHistogram(T threshold)
48  : threshold_(threshold)
49  , worst_(numeric_limits<T>::min())
50  , sampleSize_(0ul){}
51 
52  bool add(T sample) {
53  ++sampleSize_;
54  if (sample < threshold_)
55  buckets_[sample]++;
56  else
57  buckets_[threshold_]++;
58 
59  if (sample > worst_) {
60  worst_ = sample;
61  return true;
62  }
63  return false;
64  }
65 
66  size_t sampleSize() const {
67  return sampleSize_;
68  }
69 
70  StatHistogram<T>& operator += (StatHistogram<T> const& other) {
71  if (threshold_ == other.threshold_) {
72  for (auto const& v : other.buckets_) {
73  buckets_[v.first] += v.second;
74  }
75  worst_ = max(worst_, other.worst_);
76  } else {
77  HMBDC_THROW(runtime_error, "histogram collection parameters mismatch - failed");
78  }
79  sampleSize_ += other.sampleSize_;
80  return *this;
81  }
82 
83  vector<T> report(vector<float> percentages
84  = {0, 1, 10, 50, 90, 99, 100}) const {
85  vector<T> p(percentages.size());
86  if (!buckets_.empty() && !p.empty()) {
87  *p.begin() = buckets_.begin()->first;
88  *p.rbegin() = worst_;
89  }
90  size_t count = 0;
91  size_t perIndex = 1;
92  for(auto& i : buckets_) {
93  count += i.second;
94  for (auto j = perIndex; j < percentages.size() - 1; ++j) {
95  if (count * 100ul >= percentages[j] * sampleSize_) {
96  p[j] = i.first;
97  perIndex++;
98  } else {
99  break;
100  }
101  }
102  }
103 
104  return p;
105  }
106 
107  void display(ostream& os
108  , vector<float> percentages = {0, 1, 10, 50, 90, 99, 100}) const {
109  StatHistogramBase::display(os, *this, sampleSize_, percentages);
110  }
111 
112  friend
113  ostream& operator << (ostream& os, StatHistogram const& hist) {
114  hist.display(os);
115  return os;
116  }
117 private:
118 
119  using Buckets = map<T, size_t, less<T>
120  , __gnu_cxx::__mt_alloc<pair<const T, size_t>>>;
121  Buckets buckets_;
122  T threshold_;
123  T worst_;
124  size_t sampleSize_;
125 };
126 
127 template <typename T>
128 struct StatHistogram<T, false>
129 : private StatHistogramBase {
131  T thresholdMin
132  , T thresholdMax
133  , size_t bucketCount = 1000u)
134  : thresholdMin_(thresholdMin)
135  , thresholdMax_(thresholdMax)
136  , best_(numeric_limits<T>::max())
137  , worst_(numeric_limits<T>::min())
138  , sampleSize_(0ul)
139  , unit_((thresholdMax - thresholdMin) / bucketCount)
140  , buckets_(bucketCount + 1) {
141  if (thresholdMax <= thresholdMin) {
142  HMBDC_THROW(runtime_error, "thresholdMax <= thresholdMin");
143  }
144  }
145 
146  int add(T sample) {
147  ++sampleSize_;
148 
149  if (sample < thresholdMin_)
150  buckets_[0]++;
151  else if (sample < thresholdMax_)
152  buckets_[(sample - thresholdMin_) / unit_]++;
153  else
154  buckets_[buckets_.size() - 1]++;
155 
156  auto res = 0;
157  if (sample < best_) {
158  best_ = sample;
159  res = -1;
160  }
161  if (sample > worst_) {
162  worst_ = sample;
163  res = 1;
164  }
165 
166  return res;
167  }
168 
169  size_t sampleSize() const {
170  return sampleSize_;
171  }
172 
173  StatHistogram<T, false>& operator += (StatHistogram<T, false> const& other) {
174  if (thresholdMax_ == other.thresholdMax_ &&
175  thresholdMin_ == other.thresholdMin_ &&
176  buckets_.size() == other.buckets_.size()) {
177  for (auto i = 0u; i < buckets_.size(); ++i) {
178  buckets_[i] += other.buckets_[i];
179  }
180  worst_ = max(worst_, other.worst_);
181  best_ = min(best_, other.best_);
182  sampleSize_ += other.sampleSize_;
183  } else {
184  HMBDC_THROW(runtime_error, "thresholds or bucketCount mismatch - failed");
185  }
186  return *this;
187  }
188 
189  vector<T> report(vector<float> percentages
190  = {0, 1, 10, 50, 90, 99, 100}) const {
191 
192  vector<T> p(percentages.size());
193  if (sampleSize_ && !p.empty()) {
194  *p.begin() = best_;
195  *p.rbegin() = worst_;
196  size_t count = 0;
197  auto val = thresholdMin_;
198  size_t perIndex = 1;
199  for(auto& i : buckets_) {
200  count += i;
201  val += unit_;
202  for (auto j = perIndex; j < percentages.size() - 1; ++j) {
203  if (count * 100ul >= percentages[j] * sampleSize_) {
204  p[j] = min(val, worst_);
205  perIndex++;
206  } else {
207  break;
208  }
209  }
210  }
211  }
212 
213  return p;
214  }
215 
216  void display(ostream& os
217  , vector<float> percentages = {0, 1, 10, 50, 90, 99, 100}) const {
218  StatHistogramBase::display(os, *this, sampleSize_, percentages);
219  }
220 
221  friend
222  ostream& operator << (ostream& os, StatHistogram const& hist) {
223  hist.display(os);
224  return os;
225  }
226 
227 private:
228  T thresholdMin_;
229  T thresholdMax_;
230  T best_;
231  T worst_;
232  size_t sampleSize_;
233  using Buckets = vector<size_t>;
234  T unit_;
235  Buckets buckets_;
236 };
237 }}
238 
239 
Definition: TypedString.hpp:74
Definition: StatHistogram.hpp:128
collect sample values and keep histogram for top percentages
Definition: StatHistogram.hpp:39
Definition: StatHistogram.hpp:16
Definition: Client.hpp:11