├── setup.cfg
├── pybursts
    ├── __init__.py
    ├── .DS_Store
    └── pybursts.py
├── .gitignore
├── CHANGELOG.md
├── setup.py
├── LICENSE.txt
└── README.md


/setup.cfg:
--------------------------------------------------------------------------------
1 | [metadata]
2 | description-file = README.md
3 | 


--------------------------------------------------------------------------------
/pybursts/__init__.py:
--------------------------------------------------------------------------------
1 | __all__ = ["pybursts"]
2 | from pybursts import *
3 | 


--------------------------------------------------------------------------------
/pybursts/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/romain-fontugne/pybursts/HEAD/pybursts/.DS_Store


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | __pycache__/
 2 | *.py[cod]
 3 | 
 4 | build/
 5 | develop-eggs/
 6 | dist/
 7 | eggs/
 8 | lib/
 9 | lib64/
10 | parts/
11 | sdist/
12 | var/
13 | *.egg-info/
14 | .installed.cfg
15 | *.egg
16 | 
17 | pip-log.txt
18 | pip-delete-this-directory.txt
19 | 
20 | MANIFEST
21 | .DS_Store
22 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | ### 0.1.1
2 | * Update readme [view commit](http://github.com/rpoddighe/pybursts/commit/92e695f30ab8faf7375d81030f1124b73b903fa5)
3 | * Tidy up module imports [view commit](http://github.com/rpoddighe/pybursts/commit/c665e5ffee63d3087eae99bc6781773ea4d64aef)
4 | * Add .gitignore [view commit](http://github.com/rpoddighe/pybursts/commit/d8ed0480afe89193e4f56c008a7edf4922571855)
5 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from distutils.core import setup
 2 | 
 3 | setup(
 4 | 	name = 'pybursts',
 5 | 	packages = ['pybursts'], # this must be the same as the name above
 6 | 	version = '0.1.1',
 7 | 	description = 'A Python port from the \'burst detection\' algorithm by Kleinberg, originally implemented in R',
 8 | 	author = 'Renzo Poddighe',
 9 | 	author_email = 'poddighe.renzo@gmail.com',
10 | 	url = 'https://github.com/rpoddighe/pybursts', # use the URL to the github repo
11 | 	download_url = 'https://github.com/rpoddighe/pybursts/tarball/0.1.1', # I'll explain this in a second
12 | 	keywords = ['burst detection', 'data mining', 'text mining'], # arbitrary keywords
13 | 	classifiers = [],
14 | )
15 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2014 Renzo Poddighe
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in
13 | all copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | THE SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Pybursts
 2 | 
 3 | ## Changelog
 4 | 
 5 | ### 0.1.2
 6 | * Update readme [view commit](http://github.com/rpoddighe/pybursts/commit/92e695f30ab8faf7375d81030f1124b73b903fa5)
 7 | * Tidy up module imports [view commit](http://github.com/rpoddighe/pybursts/commit/c665e5ffee63d3087eae99bc6781773ea4d64aef)
 8 | * Add .gitignore [view commit](http://github.com/rpoddighe/pybursts/commit/d8ed0480afe89193e4f56c008a7edf4922571855)
 9 | 
10 | ## Description
11 | This is a Python port of the [R implementation](http://cran.r-project.org/web/packages/bursts/index.html) of Kleinberg's algorithm (described in ['Bursty and Hierarchical Structure in Streams'](http://www.cs.cornell.edu/home/kleinber/bhs.pdf)). The algorithm models activity bursts in a time series as an infinite hidden Markov model.
12 | 
13 | ## Installation
14 | 
15 | ```shell
16 | pip install pybursts
17 | ```
18 | 
19 | or
20 | 
21 | ```shell
22 | easy_install pybursts
23 | ```
24 | 
25 | ## Dependencies
26 | * [NumPy](http://www.numpy.org/)
27 | 
28 | 
29 | ## Usage
30 | ```python
31 | 
32 | import pybursts
33 | 
34 | offsets = [4, 17, 23, 27, 33, 35, 37, 76, 77, 82, 84, 88, 90, 92]
35 | print pybursts.kleinberg(offsets, s=2, gamma=0.1)
36 | 
37 | ```
38 | 
39 | ## Input
40 | 
41 | * *offsets*: a list of time offsets (numeric)
42 | * *s*: the base of the exponential distribution that is used for modeling the event frequencies
43 | * *gamma*: coefficient for the transition costs between states
44 | 
45 | This version also alows to set n and T in order to have a fixed cost function (not dependent of the given offsets). Which is needed if you want to compare bursts for different inputs.
46 | 
47 | ## Output
48 | 
49 | An array of intervals in which a burst of activity was detected. The first column denotes the level within the hierarchy; the second column the start value of the interval; the third column the end value. The first row is always the top-level activity (the complete interval from start to finish).
50 | 
51 | ## References
52 | 
53 | * [CRAN - Package bursts](http://cran.r-project.org/web/packages/bursts/index.html)
54 | * [J. Kleinberg. Bursty and Hierarchical Structure in Streams. Proc. 8th ACM SIGKDD Intl. Conf. on Knowledge Discovery and Data Mining, 2002.](http://www.cs.cornell.edu/home/kleinber/bhs.pdf)
55 | 


--------------------------------------------------------------------------------
/pybursts/pybursts.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | import numpy as np
  3 | import math
  4 | 
  5 | def kleinberg(offsets, s=2, gamma=1, n=None, T=None, k=None):
  6 |     """Kleinberg's algorithm (described in 'Bursty and Hierarchical Structure 
  7 |     in Streams'). The algorithm models activity bursts in a time series as an 
  8 |     infinite hidden Markov model.
  9 |     
 10 |     Input:
 11 |         offsets: a list of time offsets (numeric)
 12 |         s: the base of the exponential distribution that is used for modeling 
 13 |         the event frequencies
 14 |         gamma: coefficient for the transition costs between states
 15 |         n, T: to have a fixed cost function (not dependent of the given offsets). 
 16 |         Which is needed if you want to compare bursts for different inputs.
 17 |         k: maximum burst level"""
 18 | 
 19 |     if s <= 1:
 20 |         raise ValueError("s must be greater than 1!")
 21 |     if gamma <= 0:
 22 |         raise ValueError("gamma must be positive!")
 23 |     if not n is None and n <= 0:
 24 |         raise ValueError("n must be positive!")
 25 |     if not T is None and T <= 0:
 26 |         raise ValueError("T must be positive!")
 27 |     if len(offsets) < 1:
 28 |         raise ValueError("offsets must be non-empty!")
 29 | 
 30 |     offsets = np.array(offsets, dtype=object)
 31 |     
 32 |     if offsets.size == 1:
 33 |         bursts = np.array([0, offsets[0], offsets[0]], ndmin=2, dtype=object)
 34 |         return bursts
 35 | 
 36 |     offsets = np.sort(offsets)
 37 |     gaps = np.diff(offsets)
 38 | 
 39 |     if not np.all(gaps):
 40 |         raise ValueError("Input cannot contain events with zero time between!")
 41 | 
 42 |     if T is None:
 43 |         T = np.sum(gaps)
 44 | 
 45 |     if n is None:
 46 |         n = np.size(gaps)
 47 | 
 48 |     g_hat = T / n
 49 |     gamma_log_n = gamma * math.log(n)
 50 | 
 51 |     if k is None:
 52 |         k = int(math.ceil(float(1 + math.log(T, s) + math.log(1 / np.amin(gaps), s))))
 53 | 
 54 |     def tau(i, j):
 55 |         if i >= j:
 56 |             return 0
 57 |         else:
 58 |             return (j - i) * gamma_log_n
 59 |     
 60 |     alpha_function = np.vectorize(lambda x: s ** x / g_hat)
 61 |     alpha = alpha_function(np.arange(k))
 62 | 
 63 |     def f(j, x):
 64 |         return alpha[j] * math.exp(-alpha[j] * x)
 65 | 
 66 |     C = np.repeat(float("inf"), k)
 67 |     C[0] = 0
 68 | 
 69 |     q = np.empty((k, 0))
 70 |     for t in range(np.size(gaps)):
 71 |         C_prime = np.repeat(float("inf"), k)
 72 |         q_prime = np.empty((k, t+1))
 73 |         q_prime.fill(np.nan)
 74 | 
 75 |         for j in range(k):
 76 |             cost_function = np.vectorize(lambda x: C[x] + tau(x, j))
 77 |             cost = cost_function(np.arange(0, k))
 78 | 
 79 |             el = np.argmin(cost)
 80 | 
 81 |             if f(j, gaps[t]) > 0:
 82 |                 C_prime[j] = cost[el] - math.log(f(j, gaps[t]))
 83 |             
 84 |             if t > 0:
 85 |                 q_prime[j,:t] = q[el,:]
 86 | 
 87 |             q_prime[j, t] = j + 1
 88 | 
 89 |         C = C_prime
 90 |         q = q_prime
 91 | 
 92 |     j = np.argmin(C)
 93 |     q = q[j,:]
 94 | 
 95 |     prev_q = 0
 96 |     
 97 |     N = 0
 98 |     for t in range(np.size(gaps)):
 99 |         if q[t] > prev_q:
100 |             N = N + q[t] - prev_q
101 |         prev_q = q[t]
102 | 
103 |     bursts = np.array([np.repeat(np.nan, N), np.repeat(offsets[0],N),np.repeat(offsets[0], N)], ndmin=2, dtype=object).transpose()
104 | 
105 |     burst_counter = -1
106 |     prev_q = 0
107 |     stack = np.zeros(int(N), dtype=int)
108 |     stack_counter = -1
109 |     for t in range(np.size(gaps)):
110 |         if q[t] > prev_q:
111 |             num_levels_opened = q[t] - prev_q
112 |             for i in range(int(num_levels_opened)):
113 |                 burst_counter += 1
114 |                 bursts[burst_counter, 0] = prev_q + i
115 |                 bursts[burst_counter, 1] = offsets[t]
116 |                 stack_counter += 1
117 |                 stack[stack_counter] = int(burst_counter)
118 |         elif q[t] < prev_q:
119 |             num_levels_closed = prev_q - q[t]
120 |             for i in range(int(num_levels_closed)):
121 |                 bursts[stack[stack_counter], 2] = offsets[t]
122 |                 stack_counter -= 1
123 |         prev_q = q[t] 
124 | 
125 |     while stack_counter >= 0:
126 |         bursts[stack[stack_counter], 2] = offsets[np.size(gaps)]
127 |         stack_counter -= 1
128 | 
129 |     return bursts
130 |     
131 | 


--------------------------------------------------------------------------------