Chi-Tech
cdfsampler.cc
Go to the documentation of this file.
1#include <math/chi_math.h>
2
3#include "cdfsampler.h"
4
5#include "chi_runtime.h"
6#include "chi_log.h"
7
8#include <unistd.h>
9
10//###################################################################
11/**Constructor for a sub interval*/
13 int ibin, int fbin,
14 std::vector<double> &in_cdf,
15 int subdiv_factor,
16 int final_res,
17 bool inhibit) :
18 ref_cdf(in_cdf)
19{
20 inhibited = inhibit;
21 cbin_i = ibin;
22 cbin_f = fbin;
23
24 if (!inhibited)
25 {
26 size_t cdf_size = cbin_f - cbin_i + 1;
27 size_t intvl_size = ceil(cdf_size/(double)subdiv_factor);
28
29 if (intvl_size < final_res)
30 {
31 sub_intvls.push_back(new SubIntvl(offset+std::string(" "),
32 ibin,fbin,
33 ref_cdf,
34 subdiv_factor,
35 final_res,
36 true));
37 }
38 else
39 {
40 sub_intvls.resize(subdiv_factor);
41 for (int i=0; i<subdiv_factor; i++)
42 {
43 int beg = ibin + i*intvl_size;
44 int end = ibin + (i+1)*intvl_size-1;
45
46 if (i == (subdiv_factor-1))
47 end = fbin;
48
49// chi::log.Log()
50// << offset
51// << "Sub-interval " << beg
52// << " " << end;
53
54 sub_intvls[i] = new SubIntvl(offset+std::string(" "),
55 beg,end,
56 ref_cdf,
57 subdiv_factor,
58 final_res);
59
60 }
61 }
62 }
63
64
65}
66
67//###################################################################
68/**Default constructor.*/
69chi_math::CDFSampler::CDFSampler(std::vector<double> &in_cdf,
70 int subdiv_factor,
71 int final_res) :
72 ref_cdf_(in_cdf)
73{
74 //=================================== Setting sub-division factor
75 if (subdiv_factor >= 1)
76 this->subdiv_factor_ = subdiv_factor;
77 else
78 {
79 if (in_cdf.size() <= 10)
80 this->subdiv_factor_ = 1;
81 else if (in_cdf.size() <= 10000)
82 this->subdiv_factor_ = 10;
83 else
84 this->subdiv_factor_ = 10; //sqrt(in_cdf.size());
85 }
86
87 //=================================== Setting final resolution
88 if (final_res >= 3)
89 this->final_res_ = final_res;
90 else
91 {
92 this->final_res_ = 100;
93 }
94
95// chi::log.Log()
96// << "Factors: " << this->subdiv_factor
97// << " " << this->final_res;
98
99 //=================================== Sub-dividing the interval
100 size_t cdf_size = in_cdf.size();
101 size_t intvl_size = ceil(cdf_size/(double)this->subdiv_factor_);
102
103 if (intvl_size < this->final_res_)
104 sub_intvls_.push_back(new SubIntvl(std::string(" "), 0, cdf_size - 1,
105 ref_cdf_,
106 this->subdiv_factor_,
107 this->final_res_,
108 true));
109 else
110 {
111 sub_intvls_.resize(this->subdiv_factor_);
112 for (int i=0; i<this->subdiv_factor_; i++)
113 {
114 int beg = i*intvl_size;
115 int end = (i+1)*intvl_size-1;
116
117 if (i == (this->subdiv_factor_ - 1))
118 end = cdf_size-1;
119
120// chi::log.Log()
121// << "Sub-interval " << beg
122// << " " << end;
123
124 sub_intvls_[i] = new SubIntvl(std::string(" "), beg, end,
125 ref_cdf_,
126 this->subdiv_factor_,
127 this->final_res_);
128 }
129 }
130
131}
132
133//###################################################################
134/**Initiates the sampling process.*/
136{
137 int ret_val=-1;
138 int cdf_size = ref_cdf_.size();
139
140 //============================================= Check bracket lo and hi
141 if (x <= ref_cdf_[0])
142 ret_val = 0;
143 else if (x >= ref_cdf_[cdf_size - 1])
144 ret_val = cdf_size-1;
145 //============================================= Check internal
146 else
147 {
148 std::pair<int,int> range(0,cdf_size-1);
149
150 //================================= Sample sub-intvls for range
151 int num_sub_intvls = sub_intvls_.size();
152 for (int s=0; s<num_sub_intvls; s++)
153 {
154 if (sub_intvls_[s]->Sample(x, range))
155 break;
156 }
157
158 //================================= Sample range
159// chi::log.Log()
160// << "Sampling " << x
161// << " in range " << range.first << "-" << range.second;
162 for (int k=range.first; k<=range.second; k++)
163 {
164 if (k==0)
165 {
166 if (x < ref_cdf_[k])
167 {
168 ret_val = k;
169 break;
170 }
171 }
172 else if ((x >= ref_cdf_[k - 1]) and (x < ref_cdf_[k]))
173 {
174 ret_val = k;
175 break;
176 }
177 }//for k
178
179 }//if internal
180
181 if (ret_val < 0)
182 {
184 << "chi_math::CDFSampler::Sample. Error in CDF sampling routine. "
185 << "A bin was not found.";
186 Chi::Exit(EXIT_FAILURE);
187 }
188
189 return ret_val;
190}
191
192//###################################################################
193/**Sampling a sub-interval.*/
195 std::pair<int, int> &range)
196{
197 //======================================== If this was an inhibited intvl
198 if (inhibited)
199 {
200 if (cbin_i == 0)
201 {
202 if (x < ref_cdf[cbin_i])
203 {
204 range.first = cbin_i;
205 range.second = cbin_f;
206
207 return true;
208 }
209 }
210
211 if ((x >= ref_cdf[cbin_i-1]) and (x < ref_cdf[cbin_f]))
212 {
213 range.first = cbin_i;
214 range.second = cbin_f;
215
216 return true;
217 }
218
219 }
220 //======================================== If not inhibited
221 // sample sub-intvls
222 else
223 {
224 int num_sub_intvls = sub_intvls.size();
225 for (int s=0; s<num_sub_intvls; s++)
226 {
227 if (sub_intvls[s]->Sample(x,range))
228 return true;
229 }
230 }
231
232 return false;
233}
234
235
236
237
238
239//###################################################################
240/**Sample a Cumulative Distribution Function (CDF) given a probability.
241 *
242 * The supplied vector should contain the upper bin boundary for each
243 * bin and will return the bin associated with the bin that brackets
244 * the supplied probability.
245 *
246 * Example:
247 * Suppose we sample bins 0-9. Suppose also that the probalities for each
248 * bin is as follows:
249 * - 0.1 bin 0
250 * - 0.1 bin 1
251 * - 0.5 bin 5
252 * - 0.3 bin 8
253 *
254 * The CDF for this probability distribution will look like this
255 * - bin 0 = 0.1
256 * - bin 1 = 0.2
257 * - bin 2 = 0.2
258 * - bin 3 = 0.2
259 * - bin 4 = 0.2
260 * - bin 5 = 0.7
261 * - bin 6 = 0.7
262 * - bin 7 = 0.7
263 * - bin 8 = 1.0
264 * - bin 9 = 1.0
265 *
266 * Supplying a random number between 0 and 1 should indicate sampling one
267 * of the bins 0,1,5 or 8. The most inefficient way to do this is to
268 * linearly loop through the cdf and check \f$ cdf_{i-1} \ge \theta < cdf_i \f$.
269 * An optimized version of this sampling would be to perform a recursive
270 * block search which starts with a course view of the cdf and then gradually
271 * refines the view until the final linear search can be performed.*/
272int chi_math::SampleCDF(double x, std::vector<double> cdf_bin)
273{
274 size_t fine_limit = 5;
275 size_t cdf_size = cdf_bin.size();
276
277 size_t lookup_i = 0;
278 size_t lookup_f = cdf_size-1;
279
280 //======================================== Initial coursest level
281 size_t indA = 0;
282 size_t indB = std::ceil(cdf_size/2.0)-1;
283 size_t indC = cdf_size-1;
284
285 bool refine_limit_reached = false;
286
287 if ((indB-indA) <= fine_limit)
288 refine_limit_reached = true;
289
290// chi::log.Log() << "************ new prob x=" << x
291// << " " << indA << " " << indB << " " << indC;
292// refine_limit_reached = true;
293 //======================================== Recursively refine
294 int refine_count = 0;
295 while (!refine_limit_reached)
296 {
297 int intvl_size = 0;
298 if (x <= cdf_bin[indA])
299 refine_limit_reached = true;
300 else if (x > cdf_bin[indC])
301 refine_limit_reached = true;
302 else if ((x >= cdf_bin[indA]) and (x < cdf_bin[indB]))
303 {
304 intvl_size = indB-indA+1;
305
306 indC = indB;
307 indB = indA + std::ceil(intvl_size/2.0)-1;
308 }
309 else
310 {
311 intvl_size = indC-indB+1;
312
313 indA = indB;
314 indB = indA + std::ceil(intvl_size/2.0)-1;
315 }
316
317// chi::log.Log()
318// << refine_count << " "
319// << "newA=" << indA
320// << "newB=" << indB
321// << "newC=" << indC;
322 refine_count++;
323
324 if (intvl_size <= fine_limit)
325 {
326 refine_limit_reached = true;
327 lookup_i = indA;
328 lookup_f = indC;
329 }
330 }
331
332// chi::log.Log()
333// << "Final lookup range=" << lookup_i << "-" << lookup_f << " " << x;
334
335
336 //======================================== Perform final lookup
337 int ret_val = -1;
338
339 if (x <= cdf_bin[0])
340 ret_val = 0;
341 else if (x >= cdf_bin[cdf_size-1])
342 ret_val = cdf_size-1;
343 else
344 {
345 for (int k=lookup_i; k<=lookup_f; k++)
346 {
347 if (k==0)
348 {
349 if (x < cdf_bin[k])
350 {
351 ret_val = k;
352 break;
353 }
354 }
355 else if ((x >= cdf_bin[k-1]) and (x < cdf_bin[k]))
356 {
357 ret_val = k;
358 break;
359 }
360 }//for k
361 }
362
363
364
365 if (ret_val < 0)
366 {
368 << "chi_math::SampleCDF. Error in CDF sampling routine. "
369 << "A bin was not found."
370 << " i=" << lookup_i
371 << " f=" << lookup_f
372 << " x=" << x;
373 Chi::Exit(EXIT_FAILURE);
374 }
375
376
377// chi::log.Log() << ret_val;
378// usleep(100000);
379
380 return ret_val;
381}
static void Exit(int error_code)
Definition: chi_runtime.cc:342
static chi::ChiLog & log
Definition: chi_runtime.h:81
LogStream LogAllError()
Definition: chi_log.h:239
CDFSampler(std::vector< double > &in_cdf, int subdiv_factor=AUTO_SUBDIV, int final_res=AUTO_FINERES)
Definition: cdfsampler.cc:69
std::vector< SubIntvl * > sub_intvls_
Definition: cdfsampler.h:37
std::vector< double > & ref_cdf_
Definition: cdfsampler.h:36
int Sample(double x)
Definition: cdfsampler.cc:135
int SampleCDF(double x, std::vector< double > cdf_bin)
Definition: cdfsampler.cc:272
bool Sample(double x, std::pair< int, int > &range)
Definition: cdfsampler.cc:194
std::vector< double > & ref_cdf
Definition: cdfsampler.h:53
std::vector< SubIntvl * > sub_intvls
Definition: cdfsampler.h:56
SubIntvl(std::string offset, int ibin, int fbin, std::vector< double > &in_cdf, int subdiv_factor=10, int final_res=10, bool inhibit=false)
Definition: cdfsampler.cc:12