├── .gitignore
├── README.rst
├── LICENSE.rst
└── bb.py


/.gitignore:
--------------------------------------------------------------------------------
1 | *~
2 | 
3 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
 1 | Bayesian Blocks
 2 | =======================================
 3 | 
 4 | This program computes Bayesian Blocks, including a visualisation, 
 5 | from a sequence of event times.
 6 | 
 7 | Usage
 8 | ------
 9 | 
10 | $ ./bb.py mydatafile
11 | 
12 | * Will create a plot "mydatafilebblocks.pdf"
13 | * And a data file "mydatafilebblocks.txt", containing three columns:
14 | 
15 |   * start time
16 |   * stop time
17 |   * rate
18 | 
19 | License
20 | --------
21 | 
22 | The implementation is adopted from astroML, under BSD-3 clause license.
23 | 
24 | 


--------------------------------------------------------------------------------
/LICENSE.rst:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2012-2013, Jacob Vanderplas
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
 5 | 
 6 |     Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
 7 |     Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
 8 | 
 9 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
10 | 
11 | 


--------------------------------------------------------------------------------
/bb.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | """
  3 | Bayesian Block implementation
  4 | =============================
  5 | 
  6 | Dynamic programming algorithm for finding the optimal adaptive-width histogram.
  7 | 
  8 | Based on Scargle et al 2012 [1]_
  9 | 
 10 | References
 11 | ----------
 12 | .. [1] http://adsabs.harvard.edu/abs/2012arXiv1207.5578S
 13 | """
 14 | import numpy as np
 15 | # TODO: implement other fitness functions from appendix B of Scargle 2012
 16 | 
 17 | 
 18 | class FitnessFunc(object):
 19 |     """Base class for fitness functions
 20 | 
 21 |     Each fitness function class has the following:
 22 |     - fitness(...) : compute fitness function.
 23 |        Arguments accepted by fitness must be among [T_k, N_k, a_k, b_k, c_k]
 24 |     - prior(N, Ntot) : compute prior on N given a total number of points Ntot
 25 |     """
 26 |     def __init__(self, p0=0.05, gamma=None):
 27 |         self.p0 = p0
 28 |         self.gamma = gamma
 29 | 
 30 |     def validate_input(self, t, x, sigma):
 31 |         """Check that input is valid"""
 32 |         pass
 33 | 
 34 |     def fitness(**kwargs):
 35 |         raise NotImplementedError()
 36 | 
 37 |     def prior(self, N, Ntot):
 38 |         if self.gamma is None:
 39 |             return self.p0_prior(N, Ntot)
 40 |         else:
 41 |             return self.gamma_prior(N, Ntot)
 42 | 
 43 |     def p0_prior(self, N, Ntot):
 44 |         # eq. 21 from Scargle 2012
 45 |         return 4 - np.log(73.53 * self.p0 * (N ** -0.478))
 46 | 
 47 |     def gamma_prior(self, N, Ntot):
 48 |         """Basic prior, parametrized by gamma (eq. 3 in Scargle 2012)"""
 49 |         if self.gamma == 1:
 50 |             return 0
 51 |         else:
 52 |             return (np.log(1 - self.gamma)
 53 |                     - np.log(1 - self.gamma ** (Ntot + 1))
 54 |                     + N * np.log(self.gamma))
 55 | 
 56 |     # the fitness_args property will return the list of arguments accepted by
 57 |     # the method fitness().  This allows more efficient computation below.
 58 |     @property
 59 |     def args(self):
 60 |         try:
 61 |             # Python 2
 62 |             return self.fitness.func_code.co_varnames[1:]
 63 |         except AttributeError:
 64 |             return self.fitness.__code__.co_varnames[1:]
 65 | 
 66 | 
 67 | class Events(FitnessFunc):
 68 |     """Fitness for binned or unbinned events
 69 | 
 70 |     Parameters
 71 |     ----------
 72 |     p0 : float
 73 |         False alarm probability, used to compute the prior on N
 74 |         (see eq. 21 of Scargle 2012).  Default prior is for p0 = 0.
 75 |     gamma : float or None
 76 |         If specified, then use this gamma to compute the general prior form,
 77 |         p ~ gamma^N.  If gamma is specified, p0 is ignored.
 78 |     """
 79 |     def fitness(self, N_k, T_k):
 80 |         # eq. 19 from Scargle 2012
 81 |         return N_k * (np.log(N_k) - np.log(T_k))
 82 | 
 83 |     def prior(self, N, Ntot):
 84 |         if self.gamma is not None:
 85 |             return self.gamma_prior(N, Ntot)
 86 |         else:
 87 |             # eq. 21 from Scargle 2012
 88 |             return 4 - np.log(73.53 * self.p0 * (N ** -0.478))
 89 | 
 90 | 
 91 | class RegularEvents(FitnessFunc):
 92 |     """Fitness for regular events
 93 | 
 94 |     This is for data which has a fundamental "tick" length, so that all
 95 |     measured values are multiples of this tick length.  In each tick, there
 96 |     are either zero or one counts.
 97 | 
 98 |     Parameters
 99 |     ----------
100 |     dt : float
101 |         tick rate for data
102 |     gamma : float
103 |         specifies the prior on the number of bins: p ~ gamma^N
104 |     """
105 |     def __init__(self, dt, p0=0.05, gamma=None):
106 |         self.dt = dt
107 |         self.p0 = p0
108 |         self.gamma = gamma
109 | 
110 |     def validate_input(self, t, x, sigma):
111 |         unique_x = np.unique(x)
112 |         if list(unique_x) not in ([0], [1], [0, 1]):
113 |             raise ValueError("Regular events must have only 0 and 1 in x")
114 | 
115 |     def fitness(self, T_k, N_k):
116 |         # Eq. 75 of Scargle 2012
117 |         M_k = T_k / self.dt
118 |         N_over_M = N_k * 1. / M_k
119 | 
120 |         eps = 1E-8
121 |         if np.any(N_over_M > 1 + eps):
122 |             import warnings
123 |             warnings.warn('regular events: N/M > 1.  '
124 |                           'Is the time step correct?')
125 | 
126 |         one_m_NM = 1 - N_over_M
127 |         N_over_M[N_over_M <= 0] = 1
128 |         one_m_NM[one_m_NM <= 0] = 1
129 | 
130 |         return N_k * np.log(N_over_M) + (M_k - N_k) * np.log(one_m_NM)
131 | 
132 | 
133 | class PointMeasures(FitnessFunc):
134 |     """Fitness for point measures
135 | 
136 |     Parameters
137 |     ----------
138 |     gamma : float
139 |         specifies the prior on the number of bins: p ~ gamma^N
140 |         if gamma is not specified, then a prior based on simulations
141 |         will be used (see sec 3.3 of Scargle 2012)
142 |     """
143 |     def __init__(self, p0=None, gamma=None):
144 |         self.p0 = p0
145 |         self.gamma = gamma
146 | 
147 |     def fitness(self, a_k, b_k):
148 |         # eq. 41 from Scargle 2012
149 |         return (b_k * b_k) / (4 * a_k)
150 | 
151 |     def prior(self, N, Ntot):
152 |         if self.gamma is not None:
153 |             return self.gamma_prior(N, Ntot)
154 |         elif self.p0 is not None:
155 |             return self.p0_prior(N, Ntot)
156 |         else:
157 |             # eq. at end of sec 3.3 in Scargle 2012
158 |             return 1.32 + 0.577 * np.log10(N)
159 | 
160 | 
161 | def bayesian_blocks(t, x=None, sigma=None,
162 |                     fitness='events', **kwargs):
163 |     """Bayesian Blocks Implementation
164 | 
165 |     This is a flexible implementation of the Bayesian Blocks algorithm
166 |     described in Scargle 2012 [1]_
167 | 
168 |     Parameters
169 |     ----------
170 |     t : array_like
171 |         data times (one dimensional, length N)
172 |     x : array_like (optional)
173 |         data values
174 |     sigma : array_like or float (optional)
175 |         data errors
176 |     fitness : str or object
177 |         the fitness function to use.
178 |         If a string, the following options are supported:
179 | 
180 |         - 'events' : binned or unbinned event data
181 |             extra arguments are `p0`, which gives the false alarm probability
182 |             to compute the prior, or `gamma` which gives the slope of the
183 |             prior on the number of bins.
184 |         - 'regular_events' : non-overlapping events measured at multiples
185 |             of a fundamental tick rate, `dt`, which must be specified as an
186 |             additional argument.  The prior can be specified through `gamma`,
187 |             which gives the slope of the prior on the number of bins.
188 |         - 'measures' : fitness for a measured sequence with Gaussian errors
189 |             The prior can be specified using `gamma`, which gives the slope
190 |             of the prior on the number of bins.  If `gamma` is not specified,
191 |             then a simulation-derived prior will be used.
192 | 
193 |         Alternatively, the fitness can be a user-specified object of
194 |         type derived from the FitnessFunc class.
195 | 
196 |     Returns
197 |     -------
198 |     edges : ndarray
199 |         array containing the (N+1) bin edges
200 | 
201 |     Examples
202 |     --------
203 |     Event data:
204 | 
205 |     >>> t = np.random.normal(size=100)
206 |     >>> bins = bayesian_blocks(t, fitness='events', p0=0.01)
207 | 
208 |     Event data with repeats:
209 | 
210 |     >>> t = np.random.normal(size=100)
211 |     >>> t[80:] = t[:20]
212 |     >>> bins = bayesian_blocks(t, fitness='events', p0=0.01)
213 | 
214 |     Regular event data:
215 | 
216 |     >>> dt = 0.01
217 |     >>> t = dt * np.arange(1000)
218 |     >>> x = np.zeros(len(t))
219 |     >>> x[np.random.randint(0, len(t), len(t) / 10)] = 1
220 |     >>> bins = bayesian_blocks(t, fitness='regular_events', dt=dt, gamma=0.9)
221 | 
222 |     Measured point data with errors:
223 | 
224 |     >>> t = 100 * np.random.random(100)
225 |     >>> x = np.exp(-0.5 * (t - 50) ** 2)
226 |     >>> sigma = 0.1
227 |     >>> x_obs = np.random.normal(x, sigma)
228 |     >>> bins = bayesian_blocks(t, fitness='measures')
229 | 
230 |     References
231 |     ----------
232 |     .. [1] Scargle, J `et al.` (2012)
233 |            http://adsabs.harvard.edu/abs/2012arXiv1207.5578S
234 | 
235 |     See Also
236 |     --------
237 |     astroML.plotting.hist : histogram plotting function which can make use
238 |                             of bayesian blocks.
239 |     """
240 |     # validate array input
241 |     t = np.asarray(t, dtype=float)
242 |     if x is not None:
243 |         x = np.asarray(x)
244 |     if sigma is not None:
245 |         sigma = np.asarray(sigma)
246 | 
247 |     # verify the fitness function
248 |     if fitness == 'events':
249 |         if x is not None and np.any(x % 1 > 0):
250 |             raise ValueError("x must be integer counts for fitness='events'")
251 |         fitfunc = Events(**kwargs)
252 |     elif fitness == 'regular_events':
253 |         if x is not None and (np.any(x % 1 > 0) or np.any(x > 1)):
254 |             raise ValueError("x must be 0 or 1 for fitness='regular_events'")
255 |         fitfunc = RegularEvents(**kwargs)
256 |     elif fitness == 'measures':
257 |         if x is None:
258 |             raise ValueError("x must be specified for fitness='measures'")
259 |         fitfunc = PointMeasures(**kwargs)
260 |     else:
261 |         if not (hasattr(fitness, 'args') and
262 |                 hasattr(fitness, 'fitness') and
263 |                 hasattr(fitness, 'prior')):
264 |             raise ValueError("fitness not understood")
265 |         fitfunc = fitness
266 | 
267 |     # find unique values of t
268 |     t = np.array(t, dtype=float)
269 |     assert t.ndim == 1
270 |     unq_t, unq_ind, unq_inv = np.unique(t, return_index=True,
271 |                                         return_inverse=True)
272 | 
273 |     # if x is not specified, x will be counts at each time
274 |     if x is None:
275 |         if sigma is not None:
276 |             raise ValueError("If sigma is specified, x must be specified")
277 | 
278 |         if len(unq_t) == len(t):
279 |             x = np.ones_like(t)
280 |         else:
281 |             x = np.bincount(unq_inv)
282 | 
283 |         t = unq_t
284 |         sigma = 1
285 | 
286 |     # if x is specified, then we need to sort t and x together
287 |     else:
288 |         x = np.asarray(x)
289 | 
290 |         if len(t) != len(x):
291 |             raise ValueError("Size of t and x does not match")
292 | 
293 |         if len(unq_t) != len(t):
294 |             raise ValueError("Repeated values in t not supported when "
295 |                              "x is specified")
296 |         t = unq_t
297 |         x = x[unq_ind]
298 | 
299 |     # verify the given sigma value
300 |     N = t.size
301 |     if sigma is not None:
302 |         sigma = np.asarray(sigma)
303 |         if sigma.shape not in [(), (1,), (N,)]:
304 |             raise ValueError('sigma does not match the shape of x')
305 |     else:
306 |         sigma = 1
307 | 
308 |     # validate the input
309 |     fitfunc.validate_input(t, x, sigma)
310 | 
311 |     # compute values needed for computation, below
312 |     if 'a_k' in fitfunc.args:
313 |         ak_raw = np.ones_like(x) / sigma / sigma
314 |     if 'b_k' in fitfunc.args:
315 |         bk_raw = x / sigma / sigma
316 |     if 'c_k' in fitfunc.args:
317 |         ck_raw = x * x / sigma / sigma
318 | 
319 |     # create length-(N + 1) array of cell edges
320 |     edges = np.concatenate([t[:1],
321 |                             0.5 * (t[1:] + t[:-1]),
322 |                             t[-1:]])
323 |     block_length = t[-1] - edges
324 | 
325 |     # arrays to store the best configuration
326 |     best = np.zeros(N, dtype=float)
327 |     last = np.zeros(N, dtype=int)
328 | 
329 |     #-----------------------------------------------------------------
330 |     # Start with first data cell; add one cell at each iteration
331 |     #-----------------------------------------------------------------
332 |     for R in range(N):
333 |         # Compute fit_vec : fitness of putative last block (end at R)
334 |         kwds = {}
335 | 
336 |         # T_k: width/duration of each block
337 |         if 'T_k' in fitfunc.args:
338 |             kwds['T_k'] = block_length[:R + 1] - block_length[R + 1]
339 | 
340 |         # N_k: number of elements in each block
341 |         if 'N_k' in fitfunc.args:
342 |             kwds['N_k'] = np.cumsum(x[:R + 1][::-1])[::-1]
343 | 
344 |         # a_k: eq. 31
345 |         if 'a_k' in fitfunc.args:
346 |             kwds['a_k'] = 0.5 * np.cumsum(ak_raw[:R + 1][::-1])[::-1]
347 | 
348 |         # b_k: eq. 32
349 |         if 'b_k' in fitfunc.args:
350 |             kwds['b_k'] = - np.cumsum(bk_raw[:R + 1][::-1])[::-1]
351 | 
352 |         # c_k: eq. 33
353 |         if 'c_k' in fitfunc.args:
354 |             kwds['c_k'] = 0.5 * np.cumsum(ck_raw[:R + 1][::-1])[::-1]
355 | 
356 |         # evaluate fitness function
357 |         fit_vec = fitfunc.fitness(**kwds)
358 | 
359 |         A_R = fit_vec - fitfunc.prior(R + 1, N)
360 |         A_R[1:] += best[:R]
361 | 
362 |         i_max = np.argmax(A_R)
363 |         last[R] = i_max
364 |         best[R] = A_R[i_max]
365 | 
366 |     #-----------------------------------------------------------------
367 |     # Now find changepoints by iteratively peeling off the last block
368 |     #-----------------------------------------------------------------
369 |     change_points = np.zeros(N, dtype=int)
370 |     i_cp = N
371 |     ind = N
372 |     while True:
373 |         i_cp -= 1
374 |         change_points[i_cp] = ind
375 |         if ind == 0:
376 |             break
377 |         ind = last[ind - 1]
378 |     change_points = change_points[i_cp:]
379 | 
380 |     return edges[change_points]
381 | 
382 | def bbhist(x):
383 | 	edges = bayesian_blocks(x)
384 | 	los = edges[:-1]
385 | 	his = edges[1:]
386 | 	values = np.array([np.logical_and(x >= lo, x < hi).sum() / (hi - lo) for lo, hi  in zip(edges[:-1], edges[1:])])
387 | 	return edges, values
388 | 
389 | if __name__ == '__main__':
390 | 	import sys
391 | 	filename = sys.argv[1]
392 | 	data = np.loadtxt(filename)
393 | 	edges, values = bbhist(data)
394 | 	np.savetxt(filename + 'bblocks.txt', zip(edges[:-1], edges[1:], values))
395 | 	import matplotlib.pyplot as plt
396 | 	plt.bar(left=edges[:-1], width=edges[1:] - edges[:-1], height=values)
397 | 	plt.savefig(filename + 'bblocks.pdf', bbox_inches='tight')
398 | 	plt.close()
399 | 	
400 | 
401 | 


--------------------------------------------------------------------------------