├── .gitignore
├── LICENSE
├── README.md
├── RELEASE_NOTES
├── examples
└── kde_example.ipynb
├── kde
├── __init__.py
├── classes.py
├── cudakde.py
├── kde.c
├── pykde.py
├── stat_tools.py
└── test_kde.py
└── setup.py
/.gitignore:
--------------------------------------------------------------------------------
1 | # Ignore precompiled packages
2 | *.py[cod]
3 |
4 | # Ignore cached
5 |
6 | # OSX finder files
7 | .DS_Store
8 |
9 | # Ignore auto-generated libraries and files
10 | *.so
11 | *.o
12 |
13 | # Packages
14 | dist
15 | build
16 | *.egg-info
17 |
18 | # Common editor remnants
19 | *~
20 | *.swp
21 |
22 | # Unison backups
23 | .backup
24 |
25 | # Plots
26 | *.pdf
27 | *.png
28 |
29 | # IPython notebook backups
30 | .ipynb_checkpoints
31 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2017 The IceCube Collaboration
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | kde
2 | ---
3 | Multi-dimenstional Kernel Density Estimation (KDE) including adaptive
4 | bandwidths and C and CUDA implementations for specific cases.
5 |
6 |
7 | Authors
8 | -------
9 | Sebastian Schoenen (schoenen@physik.rwth-aachen.de) and Martin Leuermann for
10 | the IceCube collaboration.
11 |
12 |
13 | Installation Instructions
14 | -------------------------
15 | Download the software into directory
. There should be a subdirectory
16 | named "kde" within the directory.
17 |
18 | To install in a location independent of your system Python files, install via
19 | the following command:
20 |
21 | $ pip install [cuda] --user
22 |
23 | where [cuda] is optional, ensuring support for GPU.
24 |
25 | To install with references to the source code where it is downloaded (so that
26 | changes in the sourcecode are reflected immediately):
27 |
28 | $ pip install -e [cuda] --user
29 |
--------------------------------------------------------------------------------
/RELEASE_NOTES:
--------------------------------------------------------------------------------
1 | October 18, 2016 Sebastian Schoenen (schoenen@physik.rwth-aachen.de)
2 | ----------------------------------------------------------------------------
3 | Release V00-00-02
4 | - c code now supports n-dim. kdes
5 | - pykde: bw_method default changed from 'None' to 'silverman'
6 |
--------------------------------------------------------------------------------
/kde/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/icecube/kde/9f65f3de7d228b61a27a4433e87b951de96ffec2/kde/__init__.py
--------------------------------------------------------------------------------
/kde/classes.py:
--------------------------------------------------------------------------------
1 | # pylint: disable=line-too-long, invalid-name
2 |
3 |
4 | from __future__ import absolute_import, division, print_function
5 |
6 | __license__ = """MIT License
7 |
8 | Copyright (c) 2014-2019 Sebastian Schoenen and Martin Leuermann
9 |
10 | Permission is hereby granted, free of charge, to any person obtaining a copy
11 | of this software and associated documentation files (the "Software"), to deal
12 | in the Software without restriction, including without limitation the rights
13 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 | copies of the Software, and to permit persons to whom the Software is
15 | furnished to do so, subject to the following conditions:
16 |
17 | The above copyright notice and this permission notice shall be included in all
18 | copies or substantial portions of the Software.
19 |
20 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26 | SOFTWARE.
27 | """
28 |
29 | from warnings import warn
30 |
31 | import numpy as np
32 | from scipy.interpolate import RectBivariateSpline as spl2D
33 |
34 | from .kde import getLambda_ND, kde_ND
35 | from .pykde import gaussian_kde
36 | from .stat_tools import weighted_cov
37 |
38 |
39 | class KDE(object):
40 | """Initialize KDE object
41 |
42 | Parameters
43 | ----------
44 | data
45 | use_cuda
46 | weights
47 | alpha
48 | method
49 |
50 | """
51 | def __init__(self, data, use_cuda, weights=None, alpha=0.3, method='silverman'):
52 | self.use_cuda = use_cuda
53 | if self.use_cuda:
54 | import pycuda.driver as cuda
55 | import pycuda.autoinit # pylint: disable=unused-variable
56 | self.cuda = cuda
57 |
58 | self.data = np.atleast_2d(data)
59 |
60 | self.d, self.n = self.data.shape
61 |
62 | self.alpha = alpha
63 |
64 | if weights is None or len(weights) == 0:
65 | self.w = np.full(shape=self.n, fill_value=1/self.n, dtype=float)
66 | self.setCovariance(weights=False)
67 | elif len(weights) == self.n:
68 | self.w = np.asarray(weights, dtype=float) / np.sum(weights)
69 | self.setCovariance(weights=True)
70 | else:
71 | raise AssertionError("Length of data (%d) and length of weights"
72 | " (%d) incompatible."
73 | % (self.n, len(weights)))
74 |
75 | self.hMethod = method
76 | self.lambdas = None
77 | self.points = None
78 | self.m = None
79 | self.d_pt = None
80 | self.values = None
81 | self.h = None
82 | self.weights = None
83 | self.w_norm = None
84 | self.w_norm_lambdas = None
85 | self.preFac = None
86 | self.logSum = None
87 | self.invGlob = None
88 |
89 | def setCovariance(self, weights=False):
90 | """Set covariance from data and weights
91 |
92 | Parameters
93 | ----------
94 | weights : bool, optional
95 |
96 | """
97 | if weights:
98 | self.c = weighted_cov(self.data, weights=self.w, bias=False)
99 | else:
100 | self.c = np.cov(self.data)
101 |
102 | if self.d != 1:
103 | self.c_inv = np.linalg.inv(self.c)
104 | self.detC = np.linalg.det(self.c_inv)
105 | else:
106 | self.c_inv = 1.0/self.c
107 | self.detC = self.c_inv
108 |
109 | def calcLambdas(self, weights=False, weightedCov=False, use_grid=False):
110 | """Calculate bandwidth lambda for data points in KDE function
111 |
112 | Parameters
113 | ----------
114 | weights : bool, optional
115 | weightedCov : bool, optional
116 | use_grid : bool, optional
117 |
118 | """
119 | self.configure("lambdas", weights=weights, weightedCov=weightedCov)
120 |
121 | if self.use_cuda:
122 | self.cuda_calc_lambdas()
123 | else:
124 | if use_grid and self.d == 2:
125 | # grid #
126 | N_grid = 100
127 | ext = np.zeros((2, self.d))
128 | for i in range(self.d):
129 | diff = (np.max(self.data[i]) - np.min(self.data[i])) * 0.1
130 | ext[i, 0] = np.min(self.data[i]) - diff
131 | ext[i, 1] = np.max(self.data[i]) + diff
132 | spaces1D = [np.linspace(t[0], t[1], N_grid) for t in ext]
133 | grid = np.array(np.meshgrid(*spaces1D))
134 | grid = grid.reshape((self.d, grid.size / self.d))
135 |
136 | # kde #
137 | n_w = len(self.w)
138 | w = self.w if weights else np.full(shape=n_w, fill_value=1/n_w)
139 | ss_kde = gaussian_kde(self.data, weights=w, adaptive=False,
140 | weight_adaptive_bw=False,
141 | alpha=self.alpha, bw_method=self.hMethod)
142 |
143 | vals = ss_kde(grid) # evaluate in log instead of linear to avoid negative values
144 | spline1 = spl2D(*spaces1D, z=(np.array(vals)).reshape([N_grid, N_grid]).T, kx=3, ky=3) # TRANSPOSE HERE!!!!
145 | kde_vals2 = np.array([spline1(x, y)[0][0] for x, y in self.data.T]) # go back to linear world
146 |
147 | #print("Ratio direct:")
148 | #print(( np.array(kde_vals2) - np.array(kde_vals) ) / np.array(kde_vals))
149 |
150 | # lambdas #
151 | glob_sum = np.exp(np.sum(np.log(kde_vals2) / len(kde_vals2)))
152 | self.lambdas = np.power(kde_vals2 / glob_sum, (-1.0) * self.alpha) #self.alpha*(-1.0)) # hack !!!
153 | else:
154 | self.lambdas = getLambda_ND(
155 | int(self.d),
156 | list(self.c_inv.flatten()),
157 | list(self.data.flatten()),
158 | list(self.w_norm_lambdas),
159 | float(self.detC),
160 | self.h,
161 | self.alpha
162 | )
163 |
164 | def kde(self, points, weights=True, weightedCov=True):
165 | """Evaluate kde function
166 |
167 | Parameters
168 | ----------
169 | points
170 | weights : bool, optional
171 | weightedCov : bool, optional
172 |
173 | """
174 | self.points = np.atleast_2d(points)
175 | self.d_pt, self.m = self.points.shape
176 |
177 | if self.d > 1 and self.d != self.d_pt:
178 | assert self.d == self.m
179 | points = list(zip(*points[::1]))
180 | self.d_pt, self.m = np.array(points).shape
181 | warn("Dimensions of given points did not fit initialized kde"
182 | " function. Rotate given sample and proceed with fingers"
183 | " crossed.")
184 |
185 | self.configure("kde", weights=weights, weightedCov=weightedCov)
186 |
187 | if self.use_cuda:
188 | self.cuda_kde(points)
189 | else:
190 | self.values = kde_ND(
191 | int(self.d),
192 | list(self.c_inv.flatten()),
193 | list(self.data.flatten()),
194 | list(self.points.flatten()),
195 | list(self.w_norm),
196 | list(self.preFac),
197 | float(self.detC),
198 | self.h,
199 | )
200 |
201 | def configure(self, mode, weights=False, weightedCov=False):
202 | """Get h, tempNorm, w_norm, w_lambdas, preFac
203 |
204 | Parameters
205 | ----------
206 | mode
207 | weights : bool
208 | weightedCov : bool
209 |
210 | """
211 | if isinstance(self.hMethod, str):
212 | if self.hMethod == 'silverman':
213 | # (n * (d + 2) / 4.)**(-1. / (d + 4)).
214 | self.h = np.power(1.0/(self.n*(self.d+2.0)/4.0), 1.0/(self.d+4.0))
215 | elif self.hMethod == 'scott':
216 | self.h = np.power(1.0/(self.n), 1.0/(self.d+4.0))
217 | else:
218 | raise ValueError("%s unknown string as normalization"
219 | " constant. Implemented are 'scott',"
220 | " 'silverman'" %(self.hMethod,))
221 | elif isinstance(self.hMethod, (int, float)):
222 | self.h = self.hMethod
223 | else:
224 | raise ValueError("Normalization constant must be of type int,"
225 | " float or str!")
226 |
227 | self.setCovariance(weights=weightedCov)
228 |
229 | if weights:
230 | self.weights = self.w
231 | else:
232 | self.weights = np.full(shape=self.n, fill_value=1/self.n,
233 | dtype=float)
234 |
235 | if mode == "lambdas":
236 | self.w_norm_lambdas = self.weights * np.sqrt(self.detC / np.power(2.0*np.pi*self.h*self.h, self.d))
237 | self.preFac = -0.5/np.power(self.h, 2)
238 | elif mode == "kde":
239 | self.w_norm = self.weights * np.sqrt(self.detC / np.power(2.0*np.pi*self.h*self.h*np.array(self.lambdas)*np.array(self.lambdas), self.d))
240 | self.preFac = -0.5/np.power(self.h*np.array(self.lambdas), 2)
241 | else:
242 | raise ValueError("Could not configure kde object. Unknown mode: %s" %(mode,))
243 |
244 | def cuda_calc_lambdas(self):
245 | """Calculate lambdas using cuda implementation"""
246 | from pycuda.compiler import SourceModule
247 |
248 | # conversion of python variables
249 | n = np.int32(self.n)
250 | logSum = np.zeros(n)
251 | kde_val_la = np.zeros(n)
252 |
253 | h_kde_val_la = np.array(kde_val_la).astype(np.float64)
254 | h_logSum = logSum.astype(np.float64)
255 | h_w_norm_lambdas = np.array(self.w_norm_lambdas).astype(np.float32)
256 |
257 | # reservation of memory on gpu
258 | d_kde_val_la = self.cuda.mem_alloc(h_kde_val_la.nbytes)
259 | d_logSum = self.cuda.mem_alloc(h_logSum.nbytes)
260 | d_w_norm_lambdas = self.cuda.mem_alloc(h_w_norm_lambdas.nbytes)
261 |
262 | # memory copy to gpu
263 | self.cuda.memcpy_htod(d_kde_val_la, h_kde_val_la)
264 | self.cuda.memcpy_htod(d_logSum, h_logSum)
265 | self.cuda.memcpy_htod(d_w_norm_lambdas, h_w_norm_lambdas)
266 |
267 | # dimension-dependent memory allocation
268 | if self.d == 2:
269 | h_x1 = np.array(self.data[0]).astype(np.float32)
270 | h_x2 = np.array(self.data[1]).astype(np.float32)
271 | d_x1 = self.cuda.mem_alloc(h_x1.nbytes)
272 | d_x2 = self.cuda.mem_alloc(h_x2.nbytes)
273 | self.cuda.memcpy_htod(d_x1, h_x1)
274 | self.cuda.memcpy_htod(d_x2, h_x2)
275 | addParam = "const float *x2, const double c11, const double c12, const double c21, const double c22"
276 | addDeclare = "double ent2;"
277 | calculation = """
278 | ent1 = x1[j]-x1[idx];
279 | ent2 = x2[j]-x2[idx];
280 | thisKde += w_norm_lambda[j] * exp(preFac * (ent1*(c11*ent1+c12*ent2) + ent2*(c21*ent1+c22*ent2)));
281 | """
282 | elif self.d == 1:
283 | h_x1 = np.array(self.data[0]).astype(np.float32)
284 | d_x1 = self.cuda.mem_alloc(h_x1.nbytes)
285 | self.cuda.memcpy_htod(d_x1, h_x1)
286 | addParam = "const double c"
287 | addDeclare = ""
288 | calculation = """
289 | ent1 = x1[j]-x1[idx];
290 | thisKde += w_norm_lambda[j] * exp(preFac * (ent1*c*ent1));
291 | """
292 |
293 | # define function on gpu to be executed
294 | mod = SourceModule("""
295 | __global__ void CalcLambda(const float *x1, """+addParam+""", const int n, const double preFac, const float *w_norm_lambda, double *logSum, double *kde){
296 | int idx = threadIdx.x + blockIdx.x*blockDim.x;
297 | if (idx < n){
298 | double thisKde, ent1;
299 | """+addDeclare+"""
300 | int j;
301 | thisKde = 0.0;
302 | for (j=0; j < n; j++) {
303 | """+calculation+"""
304 | } // for
305 | logSum[idx] = 1.0/n * log(thisKde);
306 | kde[idx] = thisKde;
307 | } // if
308 | __syncthreads();
309 | } // CalcLambda_2d
310 | """)
311 |
312 | if n >= 512:
313 | bx = np.int32(512)
314 | else:
315 | bx = np.int32(n)
316 | gx = np.int32(n/bx)
317 | if n % bx != 0:
318 | gx += 1
319 |
320 | func = mod.get_function("CalcLambda") # code compiling
321 | if self.d == 2:
322 | # call of gpu function
323 | func(d_x1, d_x2,
324 | self.c_inv[0, 0], self.c_inv[1, 0],
325 | self.c_inv[0, 1], self.c_inv[1, 1],
326 | n,
327 | self.preFac,
328 | d_w_norm_lambdas, d_logSum, d_kde_val_la,
329 | block=(int(bx), 1, 1),
330 | grid=(int(gx), 1, 1))
331 | elif self.d == 1:
332 | # call of gpu function
333 | func(d_x1,
334 | self.c_inv,
335 | n,
336 | self.preFac,
337 | d_w_norm_lambdas, d_logSum, d_kde_val_la,
338 | block=(int(bx), 1, 1),
339 | grid=(int(gx), 1, 1))
340 |
341 | # backward copy from gpu to cpu memory
342 | self.cuda.memcpy_dtoh(h_logSum, d_logSum)
343 | self.cuda.memcpy_dtoh(h_kde_val_la, d_kde_val_la)
344 |
345 | self.logSum = sum(h_logSum)
346 | self.invGlob = 1.0/np.exp(self.logSum)
347 | self.lambdas = np.array(1.0/np.power(self.invGlob*np.array(h_kde_val_la), self.alpha))
348 |
349 | def cuda_kde(self, points, weights=True):
350 | """Calculate kde values using CUDA implementation
351 |
352 | Parameters
353 | ----------
354 | points
355 | weights : bool, optional
356 |
357 | """
358 | from pycuda.compiler import SourceModule
359 |
360 | self.points = np.atleast_2d(points)
361 | self.d_pt, self.m = self.points.shape
362 |
363 | # conversion of python variables
364 | n = np.int32(self.n)
365 | m = np.int32(self.m)
366 | kde_val = np.zeros(self.m)
367 |
368 | h_preFac = np.array(self.preFac).astype(np.float64)
369 | h_w_norm = np.array(self.w_norm).astype(np.float64)
370 | h_kde_val = np.array(kde_val).astype(np.float64)
371 |
372 | # reservation of memory on gpu
373 | d_preFac = self.cuda.mem_alloc(h_preFac.nbytes)
374 | d_w_norm = self.cuda.mem_alloc(h_w_norm.nbytes)
375 | d_kde_val = self.cuda.mem_alloc(h_kde_val.nbytes)
376 |
377 | # memory copy to gpu
378 | self.cuda.memcpy_htod(d_preFac, h_preFac)
379 | self.cuda.memcpy_htod(d_w_norm, h_w_norm)
380 | self.cuda.memcpy_htod(d_kde_val, h_kde_val)
381 |
382 | # dimension-dependent memory allocation
383 | if self.d == 2:
384 | h_x1 = np.array(self.data[0]).astype(np.float32)
385 | h_x2 = np.array(self.data[1]).astype(np.float32)
386 | h_y1 = np.array(self.points[0]).astype(np.float32)
387 | h_y2 = np.array(self.points[1]).astype(np.float32)
388 | d_x1 = self.cuda.mem_alloc(h_x1.nbytes)
389 | d_x2 = self.cuda.mem_alloc(h_x2.nbytes)
390 | d_y1 = self.cuda.mem_alloc(h_y1.nbytes)
391 | d_y2 = self.cuda.mem_alloc(h_y2.nbytes)
392 | self.cuda.memcpy_htod(d_x1, h_x1)
393 | self.cuda.memcpy_htod(d_x2, h_x2)
394 | self.cuda.memcpy_htod(d_y1, h_y1)
395 | self.cuda.memcpy_htod(d_y2, h_y2)
396 | addDeclare = "double ent2;"
397 | addParam = "const float *x2, const float *y2, const double c11, const double c12, const double c21, const double c22"
398 | calculation = """
399 | ent1 = x1[j]-y1[idx];
400 | ent2 = x2[j]-y2[idx];
401 | thisKde += w_norm[j] * exp(preFac[j] * (ent1*(c11*ent1+c12*ent2) + ent2*(c21*ent1+c22*ent2)));
402 | """
403 | elif self.d == 1:
404 | h_x1 = np.array(self.data[0]).astype(np.float32)
405 | h_y1 = np.array(self.points[0]).astype(np.float32)
406 | d_x1 = self.cuda.mem_alloc(h_x1.nbytes)
407 | d_y1 = self.cuda.mem_alloc(h_y1.nbytes)
408 | self.cuda.memcpy_htod(d_x1, h_x1)
409 | self.cuda.memcpy_htod(d_y1, h_y1)
410 | addParam = "const double c"
411 | addDeclare = ""
412 | calculation = """
413 | ent1 = x1[j]-y1[idx];
414 | thisKde += w_norm[j] * exp(preFac[j] * c * pow(ent1, 2));
415 | """
416 |
417 | # define executed function
418 | mod = SourceModule("""
419 | __global__ void CalcKde(const float *x1, const float *y1, """+addParam+""", const int n, const int m, const double *preFac, const double *w_norm, double *kde){
420 | int idx = threadIdx.x + blockIdx.x*blockDim.x;
421 | if (idx < m){
422 | double thisKde, ent1;
423 | """+addDeclare+"""
424 | int j;
425 | thisKde = 0.0;
426 | for (j=0; j < n; j++) {
427 | """+calculation+"""
428 | } // for
429 | kde[idx] = thisKde;
430 | } // if
431 | __syncthreads();
432 | } // CalcKde_2d
433 | """)
434 |
435 | if n >= 512:
436 | bx = np.int32(512)
437 | else:
438 | bx = np.int32(n)
439 | gx = np.int32(self.m/bx)
440 | if n / bx != 0.0:
441 | gx += 1
442 |
443 | # code compiling
444 | func = mod.get_function("CalcKde")
445 | if self.d == 2:
446 | # call of gpu function
447 | func(d_x1, d_y1, d_x2, d_y2,
448 | self.c_inv[0, 0], self.c_inv[1, 0],
449 | self.c_inv[0, 1], self.c_inv[1, 1],
450 | n, m,
451 | d_preFac, d_w_norm, d_kde_val,
452 | block=(int(bx), 1, 1),
453 | grid=(int(gx), 1, 1))
454 | elif self.d == 1:
455 | # call of gpu function
456 | func(d_x1, d_y1,
457 | self.c_inv,
458 | n, m,
459 | d_preFac, d_w_norm, d_kde_val,
460 | block=(int(bx), 1, 1),
461 | grid=(int(gx), 1, 1))
462 |
463 | # backward copy from gpu to cpu memory
464 | self.cuda.memcpy_dtoh(h_kde_val, d_kde_val)
465 |
466 | self.values = h_kde_val
467 |
--------------------------------------------------------------------------------
/kde/cudakde.py:
--------------------------------------------------------------------------------
1 | # pylint: disable=invalid-name
2 |
3 |
4 | from __future__ import absolute_import, division
5 |
6 | __license__ = """MIT License
7 |
8 | Copyright (c) 2014-2019 Sebastian Schoenen and Martin Leuermann
9 |
10 | Permission is hereby granted, free of charge, to any person obtaining a copy
11 | of this software and associated documentation files (the "Software"), to deal
12 | in the Software without restriction, including without limitation the rights
13 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 | copies of the Software, and to permit persons to whom the Software is
15 | furnished to do so, subject to the following conditions:
16 |
17 | The above copyright notice and this permission notice shall be included in all
18 | copies or substantial portions of the Software.
19 |
20 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26 | SOFTWARE.
27 | """
28 |
29 | import warnings
30 | import numpy as n
31 | from .classes import KDE
32 |
33 |
34 | class gaussian_kde(KDE):
35 | def __init__(self, data, weights=None, kde_values=None, use_cuda=True,
36 | adaptive=False, weight_adaptive_bw=False, alpha=0.3,
37 | bw_method='silverman'):
38 | if kde_values != None:
39 | raise NotImplementedError("`kde_values` is not supported for"
40 | " cudakde.")
41 | KDE.__init__(self, data, use_cuda, weights=weights, alpha=alpha,
42 | method=bw_method)
43 |
44 | self.weighted = False if weights is None or len(weights) == 0 else True
45 |
46 | if adaptive:
47 | if not self.weighted and weight_adaptive_bw:
48 | warnings.warn("Since `weights` aren't given"
49 | " `weight_adaptive_bw` will have no effect!")
50 | self.calcLambdas(weights=weight_adaptive_bw,
51 | weightedCov=weight_adaptive_bw)
52 | else:
53 | self.lambdas = n.ones(self.n)
54 |
55 | def __call__(self, points):
56 | points = n.atleast_2d(points)
57 | self.kde(points, weights=self.weighted, weightedCov=self.weighted)
58 | return n.array(self.values)
59 |
60 |
61 | class bootstrap_kde(object):
62 | def __init__(self, data, niter=10, weights=None, **kwargs):
63 | assert int(niter) == float(niter)
64 | niter = int(niter)
65 |
66 | self.kernels = []
67 | self.bootstrap_indices = []
68 |
69 | self.data = n.atleast_2d(data)
70 | self.d, self.n = self.data.shape
71 | self.weighted = False if weights is None or len(weights) == 0 else True
72 |
73 | for _ in range(niter):
74 | indices = n.array(self.get_bootstrap_indices())
75 | self.bootstrap_indices.append(indices)
76 | if self.weighted:
77 | kernel = gaussian_kde(data[..., indices],
78 | weights=weights[indices],
79 | **kwargs)
80 | else:
81 | kernel = gaussian_kde(data[..., indices], **kwargs)
82 | self.kernels.append(kernel)
83 |
84 | def __call__(self, points):
85 | return self.evaluate(points)
86 |
87 | def evaluate(self, points):
88 | points = n.atleast_2d(points)
89 | _, m = points.shape
90 | means, sqmeans = n.zeros(m), n.zeros(m)
91 | for kernel in self.kernels:
92 | values = kernel(points)
93 | means += values
94 | sqmeans += values**2
95 | means /= len(self.kernels)
96 | sqmeans /= len(self.kernels)
97 | errors = n.sqrt(sqmeans - means**2)
98 | return means, errors
99 |
100 | def get_bootstrap_indices(self):
101 | bootstrap_indices = n.random.choice(self.n, size=self.n, replace=True)
102 | return bootstrap_indices
103 |
--------------------------------------------------------------------------------
/kde/kde.c:
--------------------------------------------------------------------------------
1 | ////////////////////////////////////////////////////////////
2 | ///// KDE CLASS - C IMPLEMENTATION //////
3 | ///// copyright (C) 2014 Martin Leuermann (May 2014) //////
4 | ////////////////////////////////////////////////////////////
5 | //
6 | // MIT License
7 | //
8 | // Copyright (c) 2014-2019 Martin Leuermann
9 | //
10 | // Permission is hereby granted, free of charge, to any person obtaining a copy
11 | // of this software and associated documentation files (the "Software"), to deal
12 | // in the Software without restriction, including without limitation the rights
13 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 | // copies of the Software, and to permit persons to whom the Software is
15 | // furnished to do so, subject to the following conditions:
16 | //
17 | // The above copyright notice and this permission notice shall be included in all
18 | // copies or substantial portions of the Software.
19 | //
20 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26 | // SOFTWARE.
27 |
28 | #include
29 | #include
30 | #include
31 |
32 | #include
33 | #define _USE_MATH_DEFINES
34 | #include
35 |
36 | int diff_ms(struct timeval t1, struct timeval t2)
37 | {
38 | return (((t1.tv_sec - t2.tv_sec) * 1000000) + (t1.tv_usec - t2.tv_usec))/1000;
39 | }
40 |
41 | ///////////////////////////////////////////////////
42 | //////// CALCULATE KDE VALUES FOR /////////////////
43 | //////// DATASET X AND POINTS Y /////////////////
44 | ///////////////////////////////////////////////////
45 |
46 | ////////////////////////
47 | /// FOR 1 DIMENSION ///
48 | ////////////////////////
49 | static PyObject *pr_kde_1d(PyObject *self, PyObject *args){
50 |
51 | ////////////////// DECLARATIONS ///////////////////////
52 | int ListSize1, ListSize2, i,j;
53 | double c, res, ent, h;
54 | PyObject *objx, *objy, *objpreFac, *objw_norm;
55 | PyObject *ListItem, *ListItem2, *ListItem3;
56 | double *x, *y, *preFac, *w_norm;
57 |
58 | /////////////////// GET INPUT //////////////////////////
59 | if (!PyArg_ParseTuple(args, "dOOdOO", &c, &objx, &objy, &h, &objpreFac, &objw_norm))
60 | return NULL;
61 |
62 |
63 | ////////// GET FIRST LIST-GROUP FROM PYTHON ////////////
64 | ListSize1 = PyList_Size(objx);
65 | x = (double*) malloc(sizeof(double)*ListSize1);
66 | preFac = (double*) malloc(sizeof(double)*ListSize1);
67 | w_norm = (double*) malloc(sizeof(double)*ListSize1);
68 |
69 | for(i=0; i < ListSize1; i++ ) {
70 | ListItem = PyList_GetItem(objx, i);
71 | ListItem2 = PyList_GetItem(objw_norm, i);
72 | ListItem3 = PyList_GetItem(objpreFac, i);
73 | if( PyFloat_Check(ListItem) && PyFloat_Check(ListItem2) && PyFloat_Check(ListItem3) ){
74 | x[i] = PyFloat_AsDouble(ListItem);
75 | w_norm[i] = PyFloat_AsDouble(ListItem2);
76 | preFac[i] = PyFloat_AsDouble(ListItem3);
77 | } else {
78 | printf("Error: lists contain a non-float value.\n");
79 | exit(1);
80 | }
81 | }
82 |
83 | ////////// GET SECOND LIST-GROUP FROM PYTHON ////////////
84 | ListSize2 = PyList_Size(objy);
85 | y = (double*) malloc(sizeof(double)*ListSize2);
86 |
87 | for(i=0; i < ListSize2; i++ ) {
88 | ListItem = PyList_GetItem(objy, i);
89 | if( PyFloat_Check(ListItem) ) {
90 | y[i] = PyFloat_AsDouble(ListItem);
91 | }else{
92 | printf("Error: lists contain a non-float value.\n");
93 | exit(1);
94 | }
95 | }
96 |
97 | /////////////// RUN CALCULATIONS /////////////////////
98 | PyObject *pylist;
99 | pylist = PyList_New(ListSize2);
100 |
101 | for(i=0; i < ListSize2; i++) {
102 | res = 0.0;
103 | for (j=0; j < ListSize1; j++) {
104 | ent = x[j]-y[i];
105 | res += w_norm[j] * exp(preFac[j] * c*pow(ent, 2));
106 | }
107 | PyList_SET_ITEM(pylist, i, PyFloat_FromDouble(res));
108 | }
109 |
110 | ///////// FREE MEMORY ///////////
111 | free(x);
112 | free(preFac);
113 | free(w_norm);
114 |
115 | ///////// RETURN RESULTING VALUES FOR y /////////
116 | return pylist;
117 | }
118 |
119 |
120 | ////////////////////////
121 | /// FOR 2 DIMENSIONS ///
122 | ////////////////////////
123 | static PyObject *pr_kde_2d(PyObject *self, PyObject *args){
124 |
125 | ////////////////// DECLARATIONS ///////////////////////
126 | int ListSize1, ListSize2, i,j;
127 | double c11, c12, c21, c22, res, ent1, ent2, h;
128 | PyObject *objx1, *objx2, *objy1, *objy2, *objpreFac, *objw_norm;
129 | PyObject *ListItem, *ListItem2, *ListItem3, *ListItem4;
130 | double *x1, *x2, *y1,*y2, *preFac, *w_norm;
131 |
132 | /////////////////// GET INPUT //////////////////////////
133 | if (!PyArg_ParseTuple(args, "ddddOOOOdOO", &c11, &c12, &c21, &c22, &objx1, &objx2, &objy1, &objy2, &h, &objpreFac, &objw_norm))
134 | return NULL;
135 |
136 | ////////// GET FIRST LIST-GROUP FROM PYTHON ////////////
137 | ListSize1 = PyList_Size(objx2);
138 | x1 = (double*) malloc(sizeof(double)*ListSize1);
139 | x2 = (double*) malloc(sizeof(double)*ListSize1);
140 | preFac = (double*) malloc(sizeof(double)*ListSize1);
141 | w_norm = (double*) malloc(sizeof(double)*ListSize1);
142 |
143 | for(i=0; i < ListSize1; i++ ) {
144 | ListItem = PyList_GetItem(objx1, i);
145 | ListItem2 = PyList_GetItem(objx2, i);
146 | ListItem3 = PyList_GetItem(objpreFac, i);
147 | ListItem4 = PyList_GetItem(objw_norm, i);
148 | if( PyFloat_Check(ListItem) && PyFloat_Check(ListItem2) && PyFloat_Check(ListItem3) && PyFloat_Check(ListItem4) ){
149 | x1[i] = PyFloat_AsDouble(ListItem);
150 | x2[i] = PyFloat_AsDouble(ListItem2);
151 | preFac[i] = PyFloat_AsDouble(ListItem3);
152 | w_norm[i] = PyFloat_AsDouble(ListItem4);
153 | } else {
154 | printf("Error: lists contain a non-float value.\n");
155 | exit(1);
156 | }
157 | }
158 |
159 | ////////// GET SECOND LIST-GROUP FROM PYTHON ////////////
160 | ListSize2 = PyList_Size(objy1);
161 | y1 = (double*) malloc(sizeof(double)*ListSize2);
162 | y2 = (double*) malloc(sizeof(double)*ListSize2);
163 |
164 | for(i=0; i < ListSize2; i++ ) {
165 | ListItem = PyList_GetItem(objy1, i);
166 | ListItem2 = PyList_GetItem(objy2, i);
167 | if( PyFloat_Check(ListItem) && PyFloat_Check(ListItem2) ) {
168 | y1[i] = PyFloat_AsDouble(ListItem);
169 | y2[i] = PyFloat_AsDouble(ListItem2);
170 | } else {
171 | printf("Error: lists contain a non-float value.\n");
172 | exit(1);
173 | }
174 | }
175 |
176 | /////////////// RUN CALCULATIONS /////////////////////
177 | PyObject *pylist;
178 |
179 | pylist = PyList_New(ListSize2);
180 |
181 | for(i=0; i < ListSize2; i++) {
182 | res = 0.0;
183 | for (j=0; j < ListSize1; j++) {
184 | ent1 = x1[j]-y1[i];
185 | ent2 = x2[j]-y2[i];
186 | res += w_norm[j] * exp(preFac[j] * (ent1*(c11*ent1+c12*ent2) + ent2*(c21*ent1+c22*ent2)));
187 | }
188 | PyList_SET_ITEM(pylist, i, PyFloat_FromDouble(res));
189 | }
190 |
191 | ///////// FREE MEMORY ///////////
192 | free(x1);
193 | free(x2);
194 | free(preFac);
195 | free(w_norm);
196 |
197 | ///////// RETURN RESULTING VALUES FOR y /////////
198 | return pylist;
199 | }
200 |
201 |
202 |
203 | ///////////////////////////////////////////////////
204 | //////// GET LAMBDA FOR DATASET OF X //////////////
205 | ///////////////////////////////////////////////////
206 |
207 | ////////////////////////
208 | /// FOR 1 DIMENSION ///
209 | ////////////////////////
210 |
211 | static PyObject *pr_getLambda_1d(PyObject *self, PyObject *args){
212 |
213 | ////////////////// DECLARATIONS ///////////////////////
214 | int ListSize1, i,j;
215 | double c, thisKde, ent, invGlob, logSum, alpha, h, preFac; //, tempNorm, weight, tempNormOld;
216 | PyObject *objx;
217 | PyObject *ListItem, *ListItem2;
218 | PyObject *obj_w_norm;
219 | double *x, *lambda, *kde, *w_norm;
220 |
221 | /////////////////// GET INPUT //////////////////////////
222 | if (!PyArg_ParseTuple(args, "dOOdd", &c, &objx, &obj_w_norm, &h, &alpha ) )
223 | return NULL;
224 |
225 | ////////// GET FIRST LIST-GROUP FROM PYTHON ////////////
226 | ListSize1 = PyList_Size(objx);
227 | x = (double*) malloc(sizeof(double)*ListSize1);
228 | lambda = (double*) malloc(sizeof(double)*ListSize1);
229 | kde = (double*) malloc(sizeof(double)*ListSize1);
230 | w_norm = (double*) malloc(sizeof(double)*ListSize1);
231 |
232 | for(i=0; i < ListSize1; i++ ) {
233 | ListItem = PyList_GetItem(objx, i);
234 | ListItem2 = PyList_GetItem(obj_w_norm, i);
235 |
236 | if( PyFloat_Check(ListItem) && PyFloat_Check(ListItem2) ) {
237 | x[i] = PyFloat_AsDouble(ListItem);
238 | w_norm[i] = PyFloat_AsDouble(ListItem2);
239 | } else {
240 | printf("Error: lists contain a non-float value.\n");
241 | exit(1);
242 | }
243 | }
244 |
245 | /////////////// RUN CALCULATIONS /////////////////////
246 | PyObject *lambdaList;
247 | lambdaList = PyList_New(ListSize1);
248 |
249 | invGlob = 0.0;
250 | logSum = 0.0;
251 | preFac = -0.5/pow(h, 2);
252 |
253 | for(i=0; i < ListSize1; i++) {
254 | thisKde = 0.0;
255 | for (j=0; j < ListSize1; j++) {
256 | ent = x[j]-x[i];
257 | thisKde += w_norm[j] * exp(preFac * (ent*c*ent));
258 | }
259 | logSum += 1.0/ListSize1 * log(thisKde);
260 | kde[i] = thisKde;
261 | }
262 | invGlob = 1.0/exp(logSum);
263 |
264 | for(i=0; i< ListSize1; i++) {
265 | lambda[i] = 1.0/pow(invGlob*kde[i], alpha);
266 | PyList_SET_ITEM(lambdaList, i, PyFloat_FromDouble(lambda[i]));
267 | }
268 |
269 | ///////// FREE MEMORY ///////////
270 | free(x);
271 | free(lambda);
272 | free(kde);
273 | free(w_norm);
274 |
275 | ///////// RETURN RESULTING VALUES FOR LAMBDAS /////////
276 | return lambdaList;
277 | }
278 |
279 | ////////////////////////
280 | /// FOR 2 DIMENSIONS ///
281 | ////////////////////////
282 |
283 | static PyObject *pr_getLambda_2d(PyObject *self, PyObject *args){
284 | ////////////////// DECLARATIONS ///////////////////////
285 | int ListSize1, i,j;
286 | double c11, c12, c21, c22, thisKde, ent1, ent2, invGlob, logSum, alpha, h, preFac; // , tempNorm, weight;
287 | PyObject *objx1, *objx2, *obj_w_norm;
288 | PyObject *ListItem, *ListItem2, *ListItem3;
289 | double *x1, *x2, *lambda, *kde, *w_norm;
290 |
291 | /////////////////// GET INPUT //////////////////////////
292 | if (!PyArg_ParseTuple(args, "ddddOOOdd", &c11, &c12, &c21, &c22, &objx1, &objx2, &obj_w_norm, &h, &alpha ) )
293 | return NULL;
294 |
295 | ////////// GET FIRST LIST-GROUP FROM PYTHON ////////////
296 | ListSize1 = PyList_Size(objx2);
297 | x1 = (double*) malloc(sizeof(double)*ListSize1);
298 | x2 = (double*) malloc(sizeof(double)*ListSize1);
299 | lambda = (double*) malloc(sizeof(double)*ListSize1);
300 | kde = (double*) malloc(sizeof(double)*ListSize1);
301 | w_norm = (double*) malloc(sizeof(double)*ListSize1);
302 |
303 | for(i=0; i < ListSize1; i++ ) {
304 | ListItem = PyList_GetItem(objx1, i);
305 | ListItem2 = PyList_GetItem(objx2, i);
306 | ListItem3 = PyList_GetItem(obj_w_norm, i);
307 |
308 | if( PyFloat_Check(ListItem) && PyFloat_Check(ListItem2) && PyFloat_Check(ListItem3)) {
309 | x1[i] = PyFloat_AsDouble(ListItem);
310 | x2[i] = PyFloat_AsDouble(ListItem2);
311 | w_norm[i] = PyFloat_AsDouble(ListItem3);
312 | } else {
313 | printf("Error: lists contain a non-float value.\n");
314 | exit(1);
315 | }
316 | }
317 |
318 | /////////////// RUN CALCULATIONS /////////////////////
319 | PyObject *lambdaList;
320 | lambdaList = PyList_New(ListSize1);
321 |
322 |
323 | invGlob = 0.0;
324 | logSum = 0.0;
325 | preFac = -0.5/(h*h);
326 |
327 | for(i=0; i < ListSize1; i++) {
328 | thisKde = 0.0;
329 | for (j=0; j < ListSize1; j++) {
330 | ent1 = x1[j]-x1[i];
331 | ent2 = x2[j]-x2[i];
332 | thisKde += w_norm[j] * exp(preFac * (ent1*(c11*ent1+c12*ent2) + ent2*(c21*ent1+c22*ent2)));
333 | }
334 | logSum += 1.0/ListSize1 * log(thisKde);
335 | kde[i] = thisKde;
336 | }
337 | invGlob = 1.0/exp(logSum);
338 |
339 | for(i=0; i< ListSize1; i++) {
340 | lambda[i] = 1.0/pow(invGlob*kde[i], alpha);
341 | PyList_SET_ITEM(lambdaList, i, PyFloat_FromDouble(lambda[i]));
342 | }
343 |
344 | ///////// FREE MEMORY ///////////
345 | free(x1);
346 | free(x2);
347 | free(lambda);
348 | free(kde);
349 | free(w_norm);
350 |
351 | ///////// RETURN RESULTING VALUES FOR LAMBDAS /////////
352 | return lambdaList;
353 | }
354 |
355 | ////////////////////////
356 | /// FOR N DIMENSIONS ///
357 | ////////////////////////
358 |
359 | static PyObject *pr_kde_ND(PyObject *self, PyObject *args){
360 |
361 | ////////////////// DECLARATIONS ///////////////////////
362 | int ndata, neval, nelems_c, ndim, i,j,d,k;
363 | double det_inv_cov, h;
364 | PyObject *obj_entries, *obj_preFac, *obj_w_norm, *obj_evals;
365 | PyObject *ListItem, *ListItem2, *obj_inv_cov;
366 | double *x, *preFac, *w_norm, *inv_cov, *evals;
367 |
368 | /////////////////// GET INPUT //////////////////////////
369 | if (!PyArg_ParseTuple(args, "iOOOOOdd", &ndim, &obj_inv_cov, &obj_entries, &obj_evals, &obj_w_norm, &obj_preFac, &det_inv_cov, &h) )
370 | return NULL;
371 |
372 | ////////// GET FIRST LIST-GROUP FROM PYTHON ////////////
373 | ndata = PyList_Size(obj_w_norm);
374 | neval = PyList_Size(obj_evals) / ndim ;
375 |
376 | x = (double*) malloc(ndim * sizeof(double)*ndata);
377 | w_norm = (double*) malloc(sizeof(double)*ndata);
378 | preFac = (double*) malloc(sizeof(double)*ndata);
379 | evals = (double*) malloc(sizeof(double)*neval*ndim);
380 |
381 | nelems_c= PyList_Size(obj_inv_cov);
382 | inv_cov = (double*) malloc(sizeof(double)*nelems_c);
383 |
384 | // get data //
385 | for( i=0; i < ndim*ndata; i++ ) {
386 | ListItem = PyList_GetItem(obj_entries , i);
387 |
388 | if( PyFloat_Check(ListItem) ) {
389 | x[i] = PyFloat_AsDouble(ListItem);
390 | } else {
391 | printf("Error: lists contain a non-float value.\n");
392 | exit(1);
393 | }
394 | }
395 |
396 | // get evals //
397 | for( i=0; i < neval*ndim; i++ ) {
398 | ListItem = PyList_GetItem(obj_evals , i);
399 |
400 | if( PyFloat_Check(ListItem) ) {
401 | evals[i] = PyFloat_AsDouble(ListItem);
402 | } else {
403 | printf("Error: lists contain a non-float value.\n");
404 | exit(1);
405 | }
406 | }
407 |
408 | // get inv_cov //
409 | for( i=0; i < nelems_c; i++ ) {
410 | ListItem = PyList_GetItem(obj_inv_cov , i);
411 |
412 | if( PyFloat_Check(ListItem) ) {
413 | inv_cov[i] = PyFloat_AsDouble(ListItem);
414 | } else {
415 | printf("Error: lists contain a non-float value.\n");
416 | exit(1);
417 | }
418 | }
419 |
420 |
421 | // get preFacs and weights //
422 | for( i=0; i < ndata; i++ ) {
423 | ListItem = PyList_GetItem(obj_preFac, i);
424 | ListItem2 = PyList_GetItem(obj_w_norm, i);
425 |
426 | if( PyFloat_Check(ListItem) && PyFloat_Check(ListItem2)) {
427 | preFac[i] = PyFloat_AsDouble(ListItem);
428 | w_norm[i] = PyFloat_AsDouble(ListItem2);
429 | } else {
430 | printf("Error: lists contain a non-float value.\n");
431 | exit(1);
432 | }
433 | }
434 |
435 | // new stuff for ND //
436 | PyObject *pylist;
437 | pylist = PyList_New(neval);
438 |
439 | for( j = 0; j>> from scipy import stats
163 | >>> def measure(n):
164 | >>> "Measurement model, return two coupled measurements."
165 | >>> m1 = np.random.normal(size=n)
166 | >>> m2 = np.random.normal(scale=0.5, size=n)
167 | >>> return m1+m2, m1-m2
168 |
169 | >>> m1, m2 = measure(2000)
170 | >>> xmin = m1.min()
171 | >>> xmax = m1.max()
172 | >>> ymin = m2.min()
173 | >>> ymax = m2.max()
174 |
175 | Perform a kernel density estimate on the data:
176 |
177 | >>> X, Y = np.mgrid[xmin:xmax:100j, ymin:ymax:100j]
178 | >>> positions = np.vstack([X.ravel(), Y.ravel()])
179 | >>> values = np.vstack([m1, m2])
180 | >>> kernel = gaussian_kde(values)
181 | >>> Z = np.reshape(kernel(positions).T, X.shape)
182 |
183 | Plot the results:
184 |
185 | >>> import matplotlib.pyplot as plt
186 | >>> fig = plt.figure()
187 | >>> ax = fig.add_subplot(111)
188 | >>> ax.imshow(np.rot90(Z), cmap=plt.cm.gist_earth_r,
189 | ... extent=[xmin, xmax, ymin, ymax])
190 | >>> ax.plot(m1, m2, 'k.', markersize=2)
191 | >>> ax.set_xlim([xmin, xmax])
192 | >>> ax.set_ylim([ymin, ymax])
193 | >>> plt.show()
194 |
195 | """
196 | def __init__(self, dataset, weights=None, kde_values=None,
197 | adaptive=False, weight_adaptive_bw=False, alpha=0.3,
198 | bw_method='silverman'):
199 | self.inv_cov12 = None
200 | self.ds = None
201 | self._normalized_weights = None
202 |
203 | self.dataset = np.atleast_2d(dataset)
204 | self.d, self.n = self.dataset.shape
205 |
206 | max_array_length = 1e8
207 | """Maximum amount of data in memory (~2GB, scales linearly)"""
208 |
209 | self.m_max = int(np.floor(max_array_length/self.n))
210 | if self.n > max_array_length:
211 | raise ValueError("`dataset` is too large (too many array entries)!")
212 |
213 | if weights is not None and len(weights) == self.n:
214 | self.weights = weights
215 | elif weights is None:
216 | self.weights = np.ones(self.n)
217 | else:
218 | raise ValueError("unequal dimension of `dataset` and `weights`.")
219 |
220 | self.kde_values = kde_values
221 | if self.kde_values is not None:
222 | print("Warning: By giving `kde_values`, `weight_adaptive_bw` is"
223 | " useless. You have to be sure what was used to calculate"
224 | " those values!")
225 | if len(self.kde_values) != self.n:
226 | raise ValueError("unequal dimension of `dataset` and `kde_values`.")
227 | if not self.dataset.size > 1:
228 | raise ValueError("`dataset` input should have multiple elements.")
229 |
230 | # compute covariance matrix
231 | self.set_bandwidth(bw_method=bw_method)
232 |
233 | self.adaptive = adaptive
234 | if self.adaptive:
235 | self.weight_adaptive_bw = weight_adaptive_bw
236 | try:
237 | self.alpha = float(alpha)
238 | except:
239 | raise ValueError("`alpha` has to be a number.")
240 | if self.alpha < 0. or self.alpha > 1.:
241 | raise ValueError("`alpha` has to be in the range [0,1].")
242 | self._compute_adaptive_covariance()
243 | elif not self.adaptive and self.kde_values is not None:
244 | raise ValueError("Giving `kde_values`, `adaptive` cannot be False!")
245 |
246 | def evaluate(self, points, adaptive=False):
247 | """Evaluate the estimated pdf on a set of points.
248 |
249 | Parameters
250 | ----------
251 | points : (# of dimensions, # of points)-array
252 | Alternatively, a (# of dimensions,) vector can be passed in and
253 | treated as a single point.
254 |
255 | Returns
256 | -------
257 | values : (# of points,)-array
258 | The values at each point.
259 |
260 | Raises
261 | ------
262 | ValueError : if the dimensionality of the input points is different than
263 | the dimensionality of the KDE.
264 |
265 | """
266 | points = np.dot(self.inv_cov12, np.atleast_2d(points))
267 | ds = self.ds # pylint: disable=unused-variable
268 | normalized_weights = self._normalized_weights # pylint: disable=unused-variable
269 |
270 | d, m = points.shape
271 | if d != self.d:
272 | if d == 1 and m == self.d:
273 | # points was passed in as a row vector
274 | points = np.reshape(points, (m, d))
275 | d, m = points.shape
276 | else:
277 | msg = "points have dimension %s, dataset has dimension %s" % (d, self.d)
278 | raise ValueError(msg)
279 |
280 | nloops = int(np.ceil(m/self.m_max))
281 | dm = self.m_max
282 | modulo_dm = m%dm
283 | results = np.empty((m,), dtype=float)
284 | if adaptive:
285 | inv_loc_bw = self.inv_loc_bw # pylint: disable=unused-variable
286 |
287 | for i in range(nloops):
288 | index = i*dm
289 | if modulo_dm and i == (nloops-1):
290 | dm = modulo_dm
291 | pt = points[:, index:index+dm].T.reshape(dm, self.d, 1)
292 |
293 | # has to be done due to BUG in `numexpr` (`sum` in `numexpr` != `numpy.sum`)
294 | if self.d == 1:
295 | energy = numexpr.evaluate( # pylint: disable=unused-variable
296 | "(ds - pt)**2",
297 | optimization='aggressive'
298 | ).reshape(dm, self.n)
299 | else:
300 | energy = numexpr.evaluate( # pylint: disable=unused-variable
301 | "sum((ds - pt)**2, axis=1)",
302 | optimization='aggressive'
303 | )
304 |
305 | results[index:index+dm] = numexpr.evaluate(
306 | "sum(normalized_weights * exp(-0.5 * energy * inv_loc_bw), axis=1)",
307 | optimization='aggressive'
308 | )
309 | del pt
310 |
311 | else:
312 | for i in range(nloops):
313 | index = i*dm
314 | if modulo_dm and i == (nloops-1):
315 | dm = modulo_dm
316 | pt = points[:, index:index+dm].T.reshape(dm, self.d, 1)
317 |
318 | # has to be done due to BUG in `numexpr` (`sum` in `numexpr` != `numpy.sum`)
319 | if self.d == 1:
320 | energy = numexpr.evaluate(
321 | "(ds - pt)**2",
322 | optimization='aggressive'
323 | ).reshape(dm, self.n)
324 | else:
325 | energy = numexpr.evaluate(
326 | "sum((ds - pt)**2, axis=1)",
327 | optimization='aggressive'
328 | )
329 |
330 | results[index:index+dm] = numexpr.evaluate(
331 | "sum(normalized_weights * exp(-0.5 * energy), axis=1)",
332 | optimization='aggressive'
333 | )
334 | del pt
335 |
336 | return results
337 |
338 | def __call__(self, points):
339 | return self.evaluate(points, adaptive=self.adaptive)
340 |
341 | def scotts_factor(self):
342 | return self.n ** (-1 / (self.d + 4))
343 |
344 | def silverman_factor(self):
345 | return (self.n * (self.d + 2) / 4) ** (-1 / (self.d + 4))
346 |
347 | # Default method to calculate bandwidth, can be overwritten by subclass
348 | covariance_factor = scotts_factor
349 |
350 | def set_bandwidth(self, bw_method=None):
351 | """Compute the estimator bandwidth with given method.
352 |
353 | The new bandwidth calculated after a call to `set_bandwidth` is used
354 | for subsequent evaluations of the estimated density.
355 |
356 | Parameters
357 | ----------
358 | bw_method : str, scalar or callable, optional
359 | The method used to calculate the estimator bandwidth. This can be
360 | 'scott', 'silverman', a scalar constant or a callable. If a
361 | scalar, this will be used directly as `kde.factor`. If a callable,
362 | it should take a `gaussian_kde` instance as only parameter and
363 | return a scalar. If None (default), nothing happens; the current
364 | `kde.covariance_factor` method is kept.
365 |
366 | Examples
367 | --------
368 | >>> x1 = np.array([-7, -5, 1, 4, 5.])
369 | >>> kde = stats.gaussian_kde(x1)
370 | >>> xs = np.linspace(-10, 10, num=50)
371 | >>> y1 = kde(xs)
372 | >>> kde.set_bandwidth(bw_method='silverman')
373 | >>> y2 = kde(xs)
374 | >>> kde.set_bandwidth(bw_method=kde.factor / 3.)
375 | >>> y3 = kde(xs)
376 |
377 | >>> fig = plt.figure()
378 | >>> ax = fig.add_subplot(111)
379 | >>> ax.plot(x1, np.ones(x1.shape) / (4. * x1.size), 'bo',
380 | ... label='Data points (rescaled)')
381 | >>> ax.plot(xs, y1, label='Scott (default)')
382 | >>> ax.plot(xs, y2, label='Silverman')
383 | >>> ax.plot(xs, y3, label='Const (1/3 * Silverman)')
384 | >>> ax.legend()
385 | >>> plt.show()
386 |
387 | """
388 | if bw_method is None:
389 | pass
390 | elif bw_method == 'scott':
391 | self.covariance_factor = self.scotts_factor
392 | elif bw_method == 'silverman':
393 | self.covariance_factor = self.silverman_factor
394 | elif np.isscalar(bw_method) and not isinstance(bw_method, basestring):
395 | self._bw_method = 'use constant'
396 | self.covariance_factor = lambda: bw_method
397 | elif callable(bw_method):
398 | self._bw_method = bw_method
399 | self.covariance_factor = lambda: self._bw_method(self)
400 | else:
401 | msg = "`bw_method` should be 'scott', 'silverman', a scalar " \
402 | "or a callable."
403 | raise ValueError(msg)
404 |
405 | self._compute_covariance()
406 |
407 | def _compute_covariance(self):
408 | """Computes the covariance matrix for each Gaussian kernel using
409 | covariance_factor().
410 | """
411 | factor = self.covariance_factor()
412 | # Cache covariance and inverse covariance of the data
413 | data_covariance = np.atleast_2d(weighted_cov(self.dataset, weights=self.weights, bias=False))
414 | data_inv_cov = linalg.inv(data_covariance)
415 |
416 | covariance = data_covariance * factor**2
417 | inv_cov = data_inv_cov / factor**2
418 | self.inv_cov12 = linalg.cholesky(inv_cov).T
419 |
420 | self.ds = np.dot(self.inv_cov12, self.dataset)
421 |
422 | norm_factor = np.sqrt(linalg.det(2*np.pi*covariance))
423 | #inv_norm_factor = 1. / (norm_factor * sum(self.weights))
424 | self._normalized_weights = self.weights / (norm_factor * sum(self.weights))
425 |
426 | def _compute_adaptive_covariance(self):
427 | """Computes an adaptive covariance matrix for each Gaussian kernel using
428 | _compute_covariance().
429 | """
430 | # evaluate dataset for kde without adaptive kernel:
431 | if self.kde_values is None:
432 | if self.weight_adaptive_bw:
433 | self.kde_values = self.evaluate(self.dataset, adaptive=False)
434 | else:
435 | weights_temp = copy(self.weights)
436 | self.weights = np.ones(self.n)
437 | self._compute_covariance()
438 | self.kde_values = self.evaluate(self.dataset, adaptive=False)
439 | self.weights = weights_temp
440 | self._compute_covariance()
441 |
442 | # Define global bandwidth `glob_bw` by using the kde without adaptive kernel:
443 | # NOTE: is this really self.n or should it be sum(weights)?
444 | glob_bw = np.exp(1./self.n * np.sum(np.log(self.kde_values)))
445 | # Define local bandwidth `loc_bw`:
446 | self.inv_loc_bw = np.power(self.kde_values/glob_bw, 2.*self.alpha)
447 |
448 | #inv_local_norm_factors = self._inv_norm_factor * power(self.inv_loc_bw, 0.5*self.d)
449 | self._normalized_weights = self._normalized_weights * np.power(self.inv_loc_bw, 0.5*self.d)
450 |
451 | class bootstrap_kde(object):
452 | """Bootstrapping to estimate uncertainty in KDE.
453 |
454 | Parameters
455 | ----------
456 | dataset
457 | niter : int > 0
458 | **kwargs
459 | Passed on to `gaussian_kde`, except 'weights' which,if present, is
460 | extracted and re-sampled in the same manner as `dataset`.
461 |
462 | """
463 | def __init__(self, dataset, niter=10, **kwargs):
464 | self.kernels = []
465 | self.bootstrap_indices = []
466 |
467 | self.dataset = np.atleast_2d(dataset)
468 | self.d, self.n = self.dataset.shape
469 | if "weights" in kwargs:
470 | weights = kwargs.pop("weights")
471 | else:
472 | weights = None
473 |
474 | for _ in range(niter):
475 | indices = self.get_bootstrap_indices()
476 | self.bootstrap_indices.append(indices)
477 | if weights is not None:
478 | kernel = gaussian_kde(self.dataset[:, indices], weights=weights[indices], **kwargs)
479 | self.kernels.append(kernel)
480 | else:
481 | kernel = gaussian_kde(self.dataset[:, indices], **kwargs)
482 | self.kernels.append(kernel)
483 |
484 | def __call__(self, points):
485 | return self.evaluate(points)
486 |
487 | def evaluate(self, points):
488 | points = np.atleast_2d(points)
489 | _, m = points.shape
490 | means, sqmeans = np.zeros(m), np.zeros(m)
491 | for kernel in self.kernels:
492 | values = kernel(points)
493 | means += values
494 | sqmeans += values**2
495 | means /= len(self.kernels)
496 | sqmeans /= len(self.kernels)
497 | errors = np.sqrt(sqmeans - means**2)
498 | return means, errors
499 |
500 | def get_bootstrap_indices(self):
501 | """Get random indices used to resample (with replacement) `dataset`.
502 |
503 | Returns
504 | -------
505 | bootstrap_indices : array
506 |
507 | """
508 | return np.random.choice(self.n, size=self.n, replace=True)
509 |
--------------------------------------------------------------------------------
/kde/stat_tools.py:
--------------------------------------------------------------------------------
1 | # pylint: disable=line-too-long, invalid-name
2 |
3 |
4 | from __future__ import absolute_import, division, print_function
5 |
6 | __license__ = """MIT License
7 |
8 | Copyright (c) 2014-2019 Sebastian Schoenen and Martin Leuermann
9 |
10 | Permission is hereby granted, free of charge, to any person obtaining a copy
11 | of this software and associated documentation files (the "Software"), to deal
12 | in the Software without restriction, including without limitation the rights
13 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 | copies of the Software, and to permit persons to whom the Software is
15 | furnished to do so, subject to the following conditions:
16 |
17 | The above copyright notice and this permission notice shall be included in all
18 | copies or substantial portions of the Software.
19 |
20 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26 | SOFTWARE.
27 | """
28 |
29 | import numpy as np
30 |
31 |
32 | def rebin(a, *args, **kwargs):
33 | """Rebin ndarray data into a smaller ndarray of the same rank whose
34 | dimensions are factors of the original dimensions. eg. An array with 6
35 | columns and 4 rows can be reduced to have 6,3,2 or 1 columns and 4,2 or 1
36 | rows.
37 |
38 | Examples
39 | --------
40 | >>> a = np.rand(6, 4)
41 | >>> b = rebin(a, 3, 2)
42 | >>> print(b.shape)
43 | (2, 2)
44 |
45 | >>> a = np.rand(6)
46 | >>> b = rebin(a, 2)
47 | >>> print b.shape
48 | (3,)
49 |
50 | """
51 | method = kwargs.get("method", "sum")
52 | verbose = kwargs.get("verbose", False)
53 |
54 | shape = a.shape
55 | lenShape = len(shape)
56 | factor = np.asarray(shape) / np.asarray(args) # pylint: disable=unused-variable
57 | evList = (
58 | ['a.reshape('] +
59 | ['args[%d],factor[%d],'%(i, i) for i in range(lenShape)] +
60 | [')'] + ['.sum(%d)'%(i+1) for i in range(lenShape)]
61 | )
62 |
63 | if method == "sum":
64 | pass
65 | elif method == "average":
66 | evList += ['/factor[%d]'%i for i in range(lenShape)]
67 | else:
68 | raise AttributeError("method: %s not defined" % method)
69 |
70 | evStr = ''.join(evList)
71 |
72 | if verbose:
73 | print(evStr)
74 |
75 | return eval(evStr) # pylint: disable=eval-used
76 |
77 |
78 | def covariance_form(point, mean, cov):
79 | """Calculate 2D map of covariance form (2D quadratic approximation to
80 | -2lnL)
81 |
82 | """
83 | cov_inv = np.linalg.inv(cov)
84 | diff = point - mean
85 |
86 | stats = []
87 | for y_i in range(len(diff)):
88 | current_y = []
89 | for x_i in range(len(diff[y_i])):
90 | a = np.matrix(diff[y_i][x_i])
91 | current_y.append((a * cov_inv * a.transpose()).item(0))
92 | stats.append(current_y)
93 | return np.array(stats)
94 |
95 |
96 | def estimate_cov_from_contour(xaxis, yaxis, zmesh, point):
97 | """Calculate estimate of covariance matrix from 2D Hessian of -2lnL
98 |
99 | Note:
100 | RectBivariateSpline expects zmesh to have shape (len(xaxis), len(yaxis))
101 | but my mesh has shape (len(yaxis), len(xaxis)) thus everything is mirrored
102 |
103 | """
104 | from scipy.interpolate import RectBivariateSpline
105 | x, y = point
106 | spline = RectBivariateSpline(yaxis, xaxis, np.asarray(zmesh))
107 | dx2 = 0.5 * spline(y, x, mth=None, dx=0, dy=2, grid=False)
108 | dy2 = 0.5 * spline(y, x, mth=None, dx=2, dy=0, grid=False)
109 | dxdy = 0.5 * spline(y, x, mth=None, dx=1, dy=1, grid=False)
110 |
111 | hessian = np.matrix([[dx2, dxdy], [dxdy, dy2]])
112 | cov = np.linalg.inv(hessian)
113 | return cov
114 |
115 |
116 | def interpolate_statistic(xaxis, yaxis, zmesh, xaxis_new, yaxis_new):
117 | """Calculate 2D spline surface of -2lnL test-statistic.
118 |
119 | The same spline is used to calculate derivatives in
120 | "estimate_cov_from_contour(xaxis, yaxis, zmesh, point)"
121 |
122 | Note:
123 | RectBivariateSpline expects zmesh to have shape (len(xaxis), len(yaxis))
124 | but my mesh has shape (len(yaxis), len(xaxis))
125 | thus everything is mirrored
126 |
127 | """
128 | from scipy.interpolate import RectBivariateSpline
129 | spline = RectBivariateSpline(yaxis, xaxis, np.asarray(zmesh))
130 | stats = [[spline(yaxis_new[yi], xaxis_new[xi], mth=None, dx=0, dy=0, grid=False)
131 | for xi in range(len(xaxis_new))]
132 | for yi in range(len(yaxis_new))]
133 | return np.array(stats)
134 |
135 |
136 | def wilks_test(profiles):
137 | """Calculate the compatibility of statistically independent measurements.
138 |
139 | Here, we assume that Wilks' theorem holds.
140 |
141 | Parameters
142 | ----------
143 | profiles : list of (x, y, llh) for different measurements
144 |
145 | """
146 | from scipy.stats import chisqprob
147 | from scipy.special import erfinv
148 |
149 | xmin, xmax = +np.inf, -np.inf
150 | ymin, ymax = +np.inf, -np.inf
151 | for x, y, _ in profiles:
152 | xmin_, xmax_ = np.min(x), np.max(x)
153 | if xmin_ < xmin:
154 | xmin = xmin_
155 | if xmax_ > xmax:
156 | xmax = xmax_
157 |
158 | ymin_, ymax_ = np.min(y), np.max(y)
159 | if ymin_ < ymin:
160 | ymin = ymin_
161 | if ymax_ > ymax:
162 | ymax = ymax_
163 |
164 | x = np.linspace(xmin, xmax, 1000)
165 | y = np.linspace(ymin, ymax, 1000)
166 |
167 | sum_llhs = 0
168 | for xpar, ypar, llhs in profiles:
169 | sum_llhs += interpolate_statistic(xpar, ypar, llhs, x, y)
170 |
171 | chi2 = np.min(sum_llhs)
172 | ndof = 2 * (len(profiles) - 1)
173 | pvalue = chisqprob(chi2, ndof)
174 | nsigma = erfinv(1 - pvalue) * np.sqrt(2) # 2-sided significance
175 |
176 | return (chi2, ndof, pvalue, nsigma)
177 |
178 |
179 | def walds_test(profile1, profile2):
180 | """Calculate the compatibility of two statistically independent
181 | measurements using normal approximation (Wald's method).
182 |
183 | This assumes that the log-likelihood space is approximately elliptically.
184 |
185 | Parameters
186 | ----------
187 | profile1 : (x,y,llh) for measurement 1
188 | profile2 : (x,y,llh) for measurement 2
189 |
190 | """
191 | from scipy.stats import chisqprob
192 | from scipy.special import erfinv
193 | bestfits, covariances = [], []
194 | for x, y, llhs in [profile1, profile2]:
195 | idx_min = np.unravel_index(llhs.argmin(), llhs.shape)
196 | bestfit = x[idx_min[1]], y[idx_min[0]]
197 | bestfits.append(bestfit)
198 | covariance = estimate_cov_from_contour(x, y, llhs, bestfit)
199 | covariances.append(covariance)
200 |
201 | diff = np.matrix(bestfits[0]) - np.matrix(bestfits[1])
202 | cov_inv = np.linalg.inv(covariances[0] + covariances[1])
203 |
204 | chi2 = diff*cov_inv*diff.transpose()
205 | ndof = 2
206 | pvalue = chisqprob(chi2, ndof)
207 | nsigma = erfinv(1-pvalue) * np.sqrt(2) # 2-sided significance
208 |
209 | return (chi2, ndof, pvalue, nsigma)
210 |
211 |
212 | def _weighted_quantile_arg(values, weights, q=0.5):
213 | indices = np.argsort(values)
214 | sorted_indices = np.arange(len(values))[indices]
215 | medianidx = (weights[indices].cumsum()/weights[indices].sum()).searchsorted(q)
216 | if (medianidx >= 0) and (medianidx < len(values)):
217 | return sorted_indices[medianidx]
218 | return np.nan
219 |
220 |
221 | def weighted_quantile(values, weights, q=0.5):
222 | if len(values) != len(weights):
223 | raise ValueError("shape of `values` and `weights` doesn't match!")
224 | index = _weighted_quantile_arg(values, weights, q=q)
225 | if index != np.nan:
226 | return values[index]
227 | return np.nan
228 |
229 |
230 | def weighted_median(values, weights):
231 | return weighted_quantile(values, weights, q=0.5)
232 |
233 |
234 | def weighted_cov(m, y=None, weights=None, bias=0):
235 | """Estimate a (weighted) covariance matrix, given data.
236 |
237 | Covariance indicates the level to which two variables vary together.
238 | If we examine N-dimensional samples, :math:`X = [x_1, x_2, ... x_N]^T`,
239 | then the covariance matrix element :math:`C_{ij}` is the covariance of
240 | :math:`x_i` and :math:`x_j`. The element :math:`C_{ii}` is the variance
241 | of :math:`x_i`.
242 |
243 | Parameters
244 | ----------
245 | m : array_like
246 | A 1-D or 2-D array containing multiple variables and observations.
247 | Each row of `m` represents a variable, and each column a single
248 | observation of all those variables.
249 | y : array_like, optional
250 | An additional set of variables and observations. `y` has the same
251 | form as that of `m`.
252 | weights : array_like, optional
253 | A 1-D array containing the weights of the data points. This option
254 | should be used if data points have different weights in order to
255 | calculate the weighted covariance.
256 | bias : int, optional
257 | Default normalization is by ``(N - 1)``, where ``N`` is the number of
258 | observations given (unbiased estimate). If `bias` is 1, then
259 | normalization is by ``N``.
260 |
261 | Returns
262 | -------
263 | out : ndarray
264 | The covariance matrix of the variables.
265 |
266 | Examples
267 | --------
268 | Consider two variables, :math:`x_0` and :math:`x_1`, which
269 | correlate perfectly, but in opposite directions:
270 |
271 | >>> x = np.array([[0, 2], [1, 1], [2, 0]]).T
272 | >>> x
273 | array([[0, 1, 2],
274 | [2, 1, 0]])
275 |
276 | Note how :math:`x_0` increases while :math:`x_1` decreases. The covariance
277 | matrix shows this clearly:
278 |
279 | >>> weighted_cov(x)
280 | array([[ 1., -1.],
281 | [-1., 1.]])
282 |
283 | Note that element :math:`C_{0,1}`, which shows the correlation between
284 | :math:`x_0` and :math:`x_1`, is negative.
285 |
286 | Further, note how `x` and `y` are combined:
287 |
288 | >>> x = [-2.1, -1, 4.3]
289 | >>> y = [3, 1.1, 0.12]
290 | >>> X = np.vstack((x,y))
291 | >>> print(weighted_cov(X))
292 | [[ 11.71 -4.286 ]
293 | [ -4.286 2.14413333]]
294 | >>> print(weighted_cov(x, y))
295 | [[ 11.71 -4.286 ]
296 | [ -4.286 2.14413333]]
297 | >>> print(weighted_cov(x))
298 | 11.71
299 |
300 | """
301 | X = np.array(m, ndmin=2, dtype=float)
302 | if X.size == 0:
303 | # handle empty arrays
304 | return np.array(m)
305 |
306 | axis = 0
307 | tup = (slice(None), np.newaxis)
308 |
309 | N = X.shape[1]
310 |
311 | if weights is not None:
312 | weights = np.asarray(weights)/np.sum(weights)
313 | if len(weights) != N:
314 | raise ValueError("unequal dimension of `data` and `weights`.")
315 |
316 | if y is not None:
317 | y = np.array(y, copy=False, ndmin=2, dtype=float)
318 | X = np.concatenate((X, y), axis)
319 |
320 | X -= np.average(X, axis=1-axis, weights=weights)[tup]
321 |
322 | if bias == 0:
323 | if weights is not None:
324 | fact = np.sum(weights) / (np.sum(weights)**2 - np.sum(weights**2))
325 | else:
326 | fact = 1 / (N - 1)
327 | else:
328 | if weights is not None:
329 | fact = 1 / np.sum(weights)
330 | else:
331 | fact = 1 / N
332 |
333 | if weights is not None:
334 | return (np.dot(weights * X, X.T.conj()) * fact).squeeze()
335 |
336 | return (np.dot(X, X.T.conj()) * fact).squeeze()
337 |
--------------------------------------------------------------------------------
/kde/test_kde.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | """
4 | Test functions for the kde library.
5 | """
6 |
7 |
8 | from __future__ import absolute_import, division, print_function
9 |
10 | __license__ = """MIT License
11 |
12 | Copyright (c) 2014-2019 Sebastian Schoenen and Martin Leuermann
13 |
14 | Permission is hereby granted, free of charge, to any person obtaining a copy
15 | of this software and associated documentation files (the "Software"), to deal
16 | in the Software without restriction, including without limitation the rights
17 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
18 | copies of the Software, and to permit persons to whom the Software is
19 | furnished to do so, subject to the following conditions:
20 |
21 | The above copyright notice and this permission notice shall be included in all
22 | copies or substantial portions of the Software.
23 |
24 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
25 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
26 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
27 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
28 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
29 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 | SOFTWARE.
31 | """
32 |
33 | import numpy as np
34 |
35 |
36 | def test_kde(version, sampling_method, bw_method, n_samples, adaptive,
37 | alpha=0.3, weight_adaptive_bw=False):
38 | """Test the KDE routines of the kde package.
39 |
40 |
41 | Parameters
42 | ----------
43 | version : string
44 | One of "pykde" or "cudakde"
45 | sampling_method : string
46 | One of "uniform" or "exponential"
47 | bw_method : string
48 | One of "silverman" or "scott"
49 | n_samples : int > 0
50 | Number of random samples to use
51 | adaptive : bool
52 | Whether to use adaptive-bandwidth KDE
53 | alpha : float
54 | Alpha parameter (used onl for adaptive-BW KDE)
55 | weight_adaptive_bw : bool
56 | Whether to apply weights to samples
57 |
58 |
59 | Raises
60 | ------
61 | Exception if test fails
62 |
63 | """
64 | # Translate inputs
65 | version = version.strip().lower()
66 | sampling_method = sampling_method.strip().lower()
67 | bw_method = bw_method.strip().lower()
68 |
69 | if version == "pykde":
70 | from .pykde import bootstrap_kde, gaussian_kde
71 | elif version == "cudakde":
72 | from .cudakde import bootstrap_kde, gaussian_kde
73 | else:
74 | raise ValueError('`version` must be one of "pykde" or "cudakde".')
75 |
76 | # Define a data model and generate some random data between 0 and 10
77 | # Number of trials
78 | n_samples = int(n_samples)
79 |
80 | # Exponential Model
81 | expec = lambda x: 1./(np.exp(-10)-1.)**2 * np.exp(-x)
82 |
83 | # Generated data and reweighted to the exponential model
84 | np.random.seed(0)
85 | if sampling_method == "uniform":
86 | # Uniformly-generated data and weights
87 | x1 = np.random.uniform(0, 10, n_samples)
88 | x1_weights = np.exp(-x1)
89 | elif sampling_method == "exponential":
90 | # Exponentially-generated data and weights
91 | x1 = np.random.exponential(2, n_samples)
92 | x1_weights = np.exp(-0.5*x1)
93 | else:
94 | raise ValueError('`sampling_method` must be one of "uniform" or'
95 | ' "exponential".')
96 |
97 | # Exponentially generated data (w/o weights)
98 | x2 = np.random.exponential(1, n_samples)
99 |
100 | #
101 | # Get histograms
102 | #
103 |
104 | # Define bins
105 | bins = np.linspace(0, 10, 31)
106 |
107 | # Weighted data
108 | hist_weights = np.histogram(x1, bins=bins, weights=x1_weights,
109 | density=True)
110 |
111 | # Exponential data
112 | hist_expo = np.histogram(x2, bins=bins, density=True)
113 |
114 | #
115 | # Get KDE kernels
116 | #
117 |
118 | # Kernels for weighted data (w/o adaptive kernels)
119 | kernel_weights = gaussian_kde(x1, weights=x1_weights, bw_method=bw_method)
120 |
121 | # Kernels for weighted data (with adaptive kernels)
122 | kernel_weights_adaptive = gaussian_kde(
123 | x1, weights=x1_weights, bw_method=bw_method, adaptive=adaptive,
124 | weight_adaptive_bw=weight_adaptive_bw, alpha=alpha
125 | )
126 |
127 | # Kernels for exponential data (w/o adaptive kernels)
128 | kernel_expo = gaussian_kde(x2, bw_method=bw_method)
129 |
130 | # Kernels for exponential data (with adaptive kernels)
131 | kernel_expo_adaptive = gaussian_kde(x2, bw_method=bw_method,
132 | adaptive=adaptive, alpha=alpha)
133 |
134 | #
135 | # Plot histograms and KDEs
136 | #
137 |
138 | # Define evaluation points
139 | X = np.linspace(0, 10, 1001)
140 |
141 | # In presence of boundaries reflect the KDEs at the boundary
142 |
143 | # Define reflection range
144 | x_below = (-2., 0.)
145 | # Refelection only necessary if data is uniformly generated between [0,10]
146 | x_above = (10., 12.)
147 |
148 | # Define evaluation points beyond the boundaries (below 0 and above 10)
149 | mask_below = (X <= (x_below[1]-(x_below[0]-x_below[1])))
150 | X_below = x_below[1] - (X[mask_below] - x_below[1])
151 |
152 | mask_above = (X >= (x_above[0]-(x_above[1]-x_above[0])))
153 | X_above = x_above[0] + (x_above[0] - X[mask_above])
154 |
155 | Y_weights = kernel_weights(X)
156 | Y_weights[mask_below] += kernel_weights(X_below)
157 | if sampling_method == "uniform":
158 | Y_weights[mask_above] += kernel_weights(X_above)
159 |
160 | Y_weights_adaptive = kernel_weights_adaptive(X)
161 | Y_weights_adaptive[mask_below] += kernel_weights_adaptive(X_below)
162 | if sampling_method == "uniform":
163 | Y_weights_adaptive[mask_above] += kernel_weights_adaptive(X_above)
164 |
165 | #
166 | # Plots for exponential data
167 | #
168 |
169 | Y_expo = kernel_expo(X)
170 | Y_expo[mask_below] += kernel_expo(X_below)
171 |
172 | Y_expo_adaptive = kernel_expo_adaptive(X)
173 | Y_expo_adaptive[mask_below] += kernel_expo_adaptive(X_below)
174 |
175 | #
176 | # For an error estimate on an evaluation point use bootstrapping
177 | #
178 |
179 | # Define the number of bootstrap iterations
180 | nbootstraps = 1000
181 |
182 | #
183 | # Get bootstrapped KDE kernels (settings as set above)
184 | #
185 |
186 | # Kernels for weighted data (w/o adaptive kernels)
187 | bootstrap_kernel_weights = bootstrap_kde(x1, weights=x1_weights,
188 | bw_method=bw_method,
189 | niter=nbootstraps)
190 |
191 | # Kernels for weighted data (with adaptive kernels)
192 | bootstrap_kernel_weights_adaptive = bootstrap_kde(
193 | x1, weights=x1_weights, bw_method=bw_method, adaptive=adaptive,
194 | weight_adaptive_bw=weight_adaptive_bw, alpha=alpha, niter=nbootstraps
195 | )
196 |
197 | # Kernels for exponential data (w/o adaptive kernels)
198 | bootstrap_kernel_expo = bootstrap_kde(x2, bw_method=bw_method,
199 | niter=nbootstraps)
200 |
201 | # Kernels for exponential data (with adaptive kernels)
202 | bootstrap_kernel_expo_adaptive = bootstrap_kde(x2, bw_method=bw_method,
203 | adaptive=adaptive,
204 | alpha=alpha,
205 | niter=nbootstraps)
206 |
207 | # Plots using reflection and bootstrapping
208 |
209 | # Plots for weighted data
210 |
211 | Y_weights = bootstrap_kernel_weights(X)
212 | Y_weights_below = bootstrap_kernel_weights(X_below)
213 | Y_weights_above = bootstrap_kernel_weights(X_above)
214 |
215 | Y_weights[0][mask_below] += Y_weights_below[0]
216 | Y_weights[1][mask_below] = np.sqrt(
217 | Y_weights[1][mask_below]**2 + Y_weights_below[1]**2
218 | )
219 | if sampling_method == "uniform":
220 | Y_weights[0][mask_above] += Y_weights_above[0]
221 | Y_weights[1][mask_above] = np.sqrt(
222 | Y_weights[1][mask_above]**2 + Y_weights_above[1]**2
223 | )
224 |
225 | Y_weights_adaptive = bootstrap_kernel_weights_adaptive(X)
226 | Y_weights_adaptive_below = bootstrap_kernel_weights_adaptive(X_below)
227 | Y_weights_adaptive_above = bootstrap_kernel_weights_adaptive(X_above)
228 |
229 | Y_weights_adaptive[0][mask_below] += Y_weights_adaptive_below[0]
230 | Y_weights_adaptive[1][mask_below] = np.sqrt(
231 | Y_weights_adaptive[1][mask_below]**2 + Y_weights_adaptive_below[1]**2
232 | )
233 | if sampling_method == "uniform":
234 | Y_weights_adaptive[0][mask_above] += Y_weights_adaptive_above[0]
235 | Y_weights_adaptive[1][mask_above] = np.sqrt(
236 | Y_weights_adaptive[1][mask_above]**2
237 | + Y_weights_adaptive_above[1]**2
238 | )
239 |
240 | Y_expo = bootstrap_kernel_expo(X)
241 | Y_expo_below = bootstrap_kernel_expo(X_below)
242 | Y_expo_above = bootstrap_kernel_expo(X_above)
243 |
244 | Y_expo[0][mask_below] += Y_expo_below[0]
245 | Y_expo[1][mask_below] = np.sqrt(
246 | Y_expo[1][mask_below]**2 + Y_expo_below[1]**2
247 | )
248 |
249 | Y_expo_adaptive = bootstrap_kernel_expo_adaptive(X)
250 | Y_expo_adaptive_below = bootstrap_kernel_expo_adaptive(X_below)
251 | Y_expo_adaptive_above = bootstrap_kernel_expo_adaptive(X_above)
252 |
253 | Y_expo_adaptive[0][mask_below] += Y_expo_adaptive_below[0]
254 | Y_expo_adaptive[1][mask_below] = np.sqrt(
255 | Y_expo_adaptive[1][mask_below]**2 + Y_expo_adaptive_below[1]**2
256 | )
257 |
258 |
259 | def main():
260 | """Main"""
261 | test_kde(version='pykde',
262 | sampling_method='exponential',
263 | bw_method='silverman',
264 | n_samples=100,
265 | adaptive=True,
266 | alpha=0.3,
267 | weight_adaptive_bw=False)
268 | print("<< test_kde.py / pykde : PASS >>")
269 | test_kde(version='cudakde',
270 | sampling_method='exponential',
271 | bw_method='silverman',
272 | n_samples=100,
273 | adaptive=True,
274 | alpha=0.3,
275 | weight_adaptive_bw=True)
276 | print("<< test_kde.py / cudakde : PASS >>")
277 |
278 |
279 | if __name__ == "__main__":
280 | main()
281 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 |
4 | from distutils.core import setup, Extension
5 |
6 |
7 | if __name__ == "__main__":
8 | ckde = Extension(
9 | name='kde.kde',
10 | sources=['kde/kde.c'],
11 | extra_compile_args=['-Wall', '-O3', '-fPIC', '-Werror']
12 | )
13 |
14 | setup(
15 | name='kde',
16 | version='0.1',
17 | description=('Multi-dimensional Kernel Density Estimation (KDE)'
18 | ' including adaptive bandwidths and C and'
19 | ' CUDA implementations for specific cases.'),
20 | author='Sebastian Schoenen, Martin Leuermann',
21 | author_email='schoenen@physik.rwth-aachen.de',
22 | url='https://github.com/icecubeopensource/kde',
23 | install_requires=[
24 | 'numexpr',
25 | 'numpy',
26 | 'scipy',
27 | ],
28 | extras_require={'cuda': ['pycuda']},
29 | ext_modules=[ckde],
30 | packages=['kde'],
31 | entry_points={
32 | 'console_scripts': ['test_kde.py = kde.test_kde:main']
33 | }
34 | )
35 |
--------------------------------------------------------------------------------