├── .gitignore ├── LICENSE ├── README.md ├── RELEASE_NOTES ├── examples └── kde_example.ipynb ├── kde ├── __init__.py ├── classes.py ├── cudakde.py ├── kde.c ├── pykde.py ├── stat_tools.py └── test_kde.py └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Ignore precompiled packages 2 | *.py[cod] 3 | 4 | # Ignore cached 5 | 6 | # OSX finder files 7 | .DS_Store 8 | 9 | # Ignore auto-generated libraries and files 10 | *.so 11 | *.o 12 | 13 | # Packages 14 | dist 15 | build 16 | *.egg-info 17 | 18 | # Common editor remnants 19 | *~ 20 | *.swp 21 | 22 | # Unison backups 23 | .backup 24 | 25 | # Plots 26 | *.pdf 27 | *.png 28 | 29 | # IPython notebook backups 30 | .ipynb_checkpoints 31 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 The IceCube Collaboration 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | kde 2 | --- 3 | Multi-dimenstional Kernel Density Estimation (KDE) including adaptive 4 | bandwidths and C and CUDA implementations for specific cases. 5 | 6 | 7 | Authors 8 | ------- 9 | Sebastian Schoenen (schoenen@physik.rwth-aachen.de) and Martin Leuermann for 10 | the IceCube collaboration. 11 | 12 | 13 | Installation Instructions 14 | ------------------------- 15 | Download the software into directory . There should be a subdirectory 16 | named "kde" within the directory. 17 | 18 | To install in a location independent of your system Python files, install via 19 | the following command: 20 | 21 | $ pip install [cuda] --user 22 | 23 | where [cuda] is optional, ensuring support for GPU. 24 | 25 | To install with references to the source code where it is downloaded (so that 26 | changes in the sourcecode are reflected immediately): 27 | 28 | $ pip install -e [cuda] --user 29 | -------------------------------------------------------------------------------- /RELEASE_NOTES: -------------------------------------------------------------------------------- 1 | October 18, 2016 Sebastian Schoenen (schoenen@physik.rwth-aachen.de) 2 | ---------------------------------------------------------------------------- 3 | Release V00-00-02 4 | - c code now supports n-dim. kdes 5 | - pykde: bw_method default changed from 'None' to 'silverman' 6 | -------------------------------------------------------------------------------- /kde/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/icecube/kde/9f65f3de7d228b61a27a4433e87b951de96ffec2/kde/__init__.py -------------------------------------------------------------------------------- /kde/classes.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=line-too-long, invalid-name 2 | 3 | 4 | from __future__ import absolute_import, division, print_function 5 | 6 | __license__ = """MIT License 7 | 8 | Copyright (c) 2014-2019 Sebastian Schoenen and Martin Leuermann 9 | 10 | Permission is hereby granted, free of charge, to any person obtaining a copy 11 | of this software and associated documentation files (the "Software"), to deal 12 | in the Software without restriction, including without limitation the rights 13 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 14 | copies of the Software, and to permit persons to whom the Software is 15 | furnished to do so, subject to the following conditions: 16 | 17 | The above copyright notice and this permission notice shall be included in all 18 | copies or substantial portions of the Software. 19 | 20 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 21 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 22 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 23 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 24 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 25 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 26 | SOFTWARE. 27 | """ 28 | 29 | from warnings import warn 30 | 31 | import numpy as np 32 | from scipy.interpolate import RectBivariateSpline as spl2D 33 | 34 | from .kde import getLambda_ND, kde_ND 35 | from .pykde import gaussian_kde 36 | from .stat_tools import weighted_cov 37 | 38 | 39 | class KDE(object): 40 | """Initialize KDE object 41 | 42 | Parameters 43 | ---------- 44 | data 45 | use_cuda 46 | weights 47 | alpha 48 | method 49 | 50 | """ 51 | def __init__(self, data, use_cuda, weights=None, alpha=0.3, method='silverman'): 52 | self.use_cuda = use_cuda 53 | if self.use_cuda: 54 | import pycuda.driver as cuda 55 | import pycuda.autoinit # pylint: disable=unused-variable 56 | self.cuda = cuda 57 | 58 | self.data = np.atleast_2d(data) 59 | 60 | self.d, self.n = self.data.shape 61 | 62 | self.alpha = alpha 63 | 64 | if weights is None or len(weights) == 0: 65 | self.w = np.full(shape=self.n, fill_value=1/self.n, dtype=float) 66 | self.setCovariance(weights=False) 67 | elif len(weights) == self.n: 68 | self.w = np.asarray(weights, dtype=float) / np.sum(weights) 69 | self.setCovariance(weights=True) 70 | else: 71 | raise AssertionError("Length of data (%d) and length of weights" 72 | " (%d) incompatible." 73 | % (self.n, len(weights))) 74 | 75 | self.hMethod = method 76 | self.lambdas = None 77 | self.points = None 78 | self.m = None 79 | self.d_pt = None 80 | self.values = None 81 | self.h = None 82 | self.weights = None 83 | self.w_norm = None 84 | self.w_norm_lambdas = None 85 | self.preFac = None 86 | self.logSum = None 87 | self.invGlob = None 88 | 89 | def setCovariance(self, weights=False): 90 | """Set covariance from data and weights 91 | 92 | Parameters 93 | ---------- 94 | weights : bool, optional 95 | 96 | """ 97 | if weights: 98 | self.c = weighted_cov(self.data, weights=self.w, bias=False) 99 | else: 100 | self.c = np.cov(self.data) 101 | 102 | if self.d != 1: 103 | self.c_inv = np.linalg.inv(self.c) 104 | self.detC = np.linalg.det(self.c_inv) 105 | else: 106 | self.c_inv = 1.0/self.c 107 | self.detC = self.c_inv 108 | 109 | def calcLambdas(self, weights=False, weightedCov=False, use_grid=False): 110 | """Calculate bandwidth lambda for data points in KDE function 111 | 112 | Parameters 113 | ---------- 114 | weights : bool, optional 115 | weightedCov : bool, optional 116 | use_grid : bool, optional 117 | 118 | """ 119 | self.configure("lambdas", weights=weights, weightedCov=weightedCov) 120 | 121 | if self.use_cuda: 122 | self.cuda_calc_lambdas() 123 | else: 124 | if use_grid and self.d == 2: 125 | # grid # 126 | N_grid = 100 127 | ext = np.zeros((2, self.d)) 128 | for i in range(self.d): 129 | diff = (np.max(self.data[i]) - np.min(self.data[i])) * 0.1 130 | ext[i, 0] = np.min(self.data[i]) - diff 131 | ext[i, 1] = np.max(self.data[i]) + diff 132 | spaces1D = [np.linspace(t[0], t[1], N_grid) for t in ext] 133 | grid = np.array(np.meshgrid(*spaces1D)) 134 | grid = grid.reshape((self.d, grid.size / self.d)) 135 | 136 | # kde # 137 | n_w = len(self.w) 138 | w = self.w if weights else np.full(shape=n_w, fill_value=1/n_w) 139 | ss_kde = gaussian_kde(self.data, weights=w, adaptive=False, 140 | weight_adaptive_bw=False, 141 | alpha=self.alpha, bw_method=self.hMethod) 142 | 143 | vals = ss_kde(grid) # evaluate in log instead of linear to avoid negative values 144 | spline1 = spl2D(*spaces1D, z=(np.array(vals)).reshape([N_grid, N_grid]).T, kx=3, ky=3) # TRANSPOSE HERE!!!! 145 | kde_vals2 = np.array([spline1(x, y)[0][0] for x, y in self.data.T]) # go back to linear world 146 | 147 | #print("Ratio direct:") 148 | #print(( np.array(kde_vals2) - np.array(kde_vals) ) / np.array(kde_vals)) 149 | 150 | # lambdas # 151 | glob_sum = np.exp(np.sum(np.log(kde_vals2) / len(kde_vals2))) 152 | self.lambdas = np.power(kde_vals2 / glob_sum, (-1.0) * self.alpha) #self.alpha*(-1.0)) # hack !!! 153 | else: 154 | self.lambdas = getLambda_ND( 155 | int(self.d), 156 | list(self.c_inv.flatten()), 157 | list(self.data.flatten()), 158 | list(self.w_norm_lambdas), 159 | float(self.detC), 160 | self.h, 161 | self.alpha 162 | ) 163 | 164 | def kde(self, points, weights=True, weightedCov=True): 165 | """Evaluate kde function 166 | 167 | Parameters 168 | ---------- 169 | points 170 | weights : bool, optional 171 | weightedCov : bool, optional 172 | 173 | """ 174 | self.points = np.atleast_2d(points) 175 | self.d_pt, self.m = self.points.shape 176 | 177 | if self.d > 1 and self.d != self.d_pt: 178 | assert self.d == self.m 179 | points = list(zip(*points[::1])) 180 | self.d_pt, self.m = np.array(points).shape 181 | warn("Dimensions of given points did not fit initialized kde" 182 | " function. Rotate given sample and proceed with fingers" 183 | " crossed.") 184 | 185 | self.configure("kde", weights=weights, weightedCov=weightedCov) 186 | 187 | if self.use_cuda: 188 | self.cuda_kde(points) 189 | else: 190 | self.values = kde_ND( 191 | int(self.d), 192 | list(self.c_inv.flatten()), 193 | list(self.data.flatten()), 194 | list(self.points.flatten()), 195 | list(self.w_norm), 196 | list(self.preFac), 197 | float(self.detC), 198 | self.h, 199 | ) 200 | 201 | def configure(self, mode, weights=False, weightedCov=False): 202 | """Get h, tempNorm, w_norm, w_lambdas, preFac 203 | 204 | Parameters 205 | ---------- 206 | mode 207 | weights : bool 208 | weightedCov : bool 209 | 210 | """ 211 | if isinstance(self.hMethod, str): 212 | if self.hMethod == 'silverman': 213 | # (n * (d + 2) / 4.)**(-1. / (d + 4)). 214 | self.h = np.power(1.0/(self.n*(self.d+2.0)/4.0), 1.0/(self.d+4.0)) 215 | elif self.hMethod == 'scott': 216 | self.h = np.power(1.0/(self.n), 1.0/(self.d+4.0)) 217 | else: 218 | raise ValueError("%s unknown string as normalization" 219 | " constant. Implemented are 'scott'," 220 | " 'silverman'" %(self.hMethod,)) 221 | elif isinstance(self.hMethod, (int, float)): 222 | self.h = self.hMethod 223 | else: 224 | raise ValueError("Normalization constant must be of type int," 225 | " float or str!") 226 | 227 | self.setCovariance(weights=weightedCov) 228 | 229 | if weights: 230 | self.weights = self.w 231 | else: 232 | self.weights = np.full(shape=self.n, fill_value=1/self.n, 233 | dtype=float) 234 | 235 | if mode == "lambdas": 236 | self.w_norm_lambdas = self.weights * np.sqrt(self.detC / np.power(2.0*np.pi*self.h*self.h, self.d)) 237 | self.preFac = -0.5/np.power(self.h, 2) 238 | elif mode == "kde": 239 | self.w_norm = self.weights * np.sqrt(self.detC / np.power(2.0*np.pi*self.h*self.h*np.array(self.lambdas)*np.array(self.lambdas), self.d)) 240 | self.preFac = -0.5/np.power(self.h*np.array(self.lambdas), 2) 241 | else: 242 | raise ValueError("Could not configure kde object. Unknown mode: %s" %(mode,)) 243 | 244 | def cuda_calc_lambdas(self): 245 | """Calculate lambdas using cuda implementation""" 246 | from pycuda.compiler import SourceModule 247 | 248 | # conversion of python variables 249 | n = np.int32(self.n) 250 | logSum = np.zeros(n) 251 | kde_val_la = np.zeros(n) 252 | 253 | h_kde_val_la = np.array(kde_val_la).astype(np.float64) 254 | h_logSum = logSum.astype(np.float64) 255 | h_w_norm_lambdas = np.array(self.w_norm_lambdas).astype(np.float32) 256 | 257 | # reservation of memory on gpu 258 | d_kde_val_la = self.cuda.mem_alloc(h_kde_val_la.nbytes) 259 | d_logSum = self.cuda.mem_alloc(h_logSum.nbytes) 260 | d_w_norm_lambdas = self.cuda.mem_alloc(h_w_norm_lambdas.nbytes) 261 | 262 | # memory copy to gpu 263 | self.cuda.memcpy_htod(d_kde_val_la, h_kde_val_la) 264 | self.cuda.memcpy_htod(d_logSum, h_logSum) 265 | self.cuda.memcpy_htod(d_w_norm_lambdas, h_w_norm_lambdas) 266 | 267 | # dimension-dependent memory allocation 268 | if self.d == 2: 269 | h_x1 = np.array(self.data[0]).astype(np.float32) 270 | h_x2 = np.array(self.data[1]).astype(np.float32) 271 | d_x1 = self.cuda.mem_alloc(h_x1.nbytes) 272 | d_x2 = self.cuda.mem_alloc(h_x2.nbytes) 273 | self.cuda.memcpy_htod(d_x1, h_x1) 274 | self.cuda.memcpy_htod(d_x2, h_x2) 275 | addParam = "const float *x2, const double c11, const double c12, const double c21, const double c22" 276 | addDeclare = "double ent2;" 277 | calculation = """ 278 | ent1 = x1[j]-x1[idx]; 279 | ent2 = x2[j]-x2[idx]; 280 | thisKde += w_norm_lambda[j] * exp(preFac * (ent1*(c11*ent1+c12*ent2) + ent2*(c21*ent1+c22*ent2))); 281 | """ 282 | elif self.d == 1: 283 | h_x1 = np.array(self.data[0]).astype(np.float32) 284 | d_x1 = self.cuda.mem_alloc(h_x1.nbytes) 285 | self.cuda.memcpy_htod(d_x1, h_x1) 286 | addParam = "const double c" 287 | addDeclare = "" 288 | calculation = """ 289 | ent1 = x1[j]-x1[idx]; 290 | thisKde += w_norm_lambda[j] * exp(preFac * (ent1*c*ent1)); 291 | """ 292 | 293 | # define function on gpu to be executed 294 | mod = SourceModule(""" 295 | __global__ void CalcLambda(const float *x1, """+addParam+""", const int n, const double preFac, const float *w_norm_lambda, double *logSum, double *kde){ 296 | int idx = threadIdx.x + blockIdx.x*blockDim.x; 297 | if (idx < n){ 298 | double thisKde, ent1; 299 | """+addDeclare+""" 300 | int j; 301 | thisKde = 0.0; 302 | for (j=0; j < n; j++) { 303 | """+calculation+""" 304 | } // for 305 | logSum[idx] = 1.0/n * log(thisKde); 306 | kde[idx] = thisKde; 307 | } // if 308 | __syncthreads(); 309 | } // CalcLambda_2d 310 | """) 311 | 312 | if n >= 512: 313 | bx = np.int32(512) 314 | else: 315 | bx = np.int32(n) 316 | gx = np.int32(n/bx) 317 | if n % bx != 0: 318 | gx += 1 319 | 320 | func = mod.get_function("CalcLambda") # code compiling 321 | if self.d == 2: 322 | # call of gpu function 323 | func(d_x1, d_x2, 324 | self.c_inv[0, 0], self.c_inv[1, 0], 325 | self.c_inv[0, 1], self.c_inv[1, 1], 326 | n, 327 | self.preFac, 328 | d_w_norm_lambdas, d_logSum, d_kde_val_la, 329 | block=(int(bx), 1, 1), 330 | grid=(int(gx), 1, 1)) 331 | elif self.d == 1: 332 | # call of gpu function 333 | func(d_x1, 334 | self.c_inv, 335 | n, 336 | self.preFac, 337 | d_w_norm_lambdas, d_logSum, d_kde_val_la, 338 | block=(int(bx), 1, 1), 339 | grid=(int(gx), 1, 1)) 340 | 341 | # backward copy from gpu to cpu memory 342 | self.cuda.memcpy_dtoh(h_logSum, d_logSum) 343 | self.cuda.memcpy_dtoh(h_kde_val_la, d_kde_val_la) 344 | 345 | self.logSum = sum(h_logSum) 346 | self.invGlob = 1.0/np.exp(self.logSum) 347 | self.lambdas = np.array(1.0/np.power(self.invGlob*np.array(h_kde_val_la), self.alpha)) 348 | 349 | def cuda_kde(self, points, weights=True): 350 | """Calculate kde values using CUDA implementation 351 | 352 | Parameters 353 | ---------- 354 | points 355 | weights : bool, optional 356 | 357 | """ 358 | from pycuda.compiler import SourceModule 359 | 360 | self.points = np.atleast_2d(points) 361 | self.d_pt, self.m = self.points.shape 362 | 363 | # conversion of python variables 364 | n = np.int32(self.n) 365 | m = np.int32(self.m) 366 | kde_val = np.zeros(self.m) 367 | 368 | h_preFac = np.array(self.preFac).astype(np.float64) 369 | h_w_norm = np.array(self.w_norm).astype(np.float64) 370 | h_kde_val = np.array(kde_val).astype(np.float64) 371 | 372 | # reservation of memory on gpu 373 | d_preFac = self.cuda.mem_alloc(h_preFac.nbytes) 374 | d_w_norm = self.cuda.mem_alloc(h_w_norm.nbytes) 375 | d_kde_val = self.cuda.mem_alloc(h_kde_val.nbytes) 376 | 377 | # memory copy to gpu 378 | self.cuda.memcpy_htod(d_preFac, h_preFac) 379 | self.cuda.memcpy_htod(d_w_norm, h_w_norm) 380 | self.cuda.memcpy_htod(d_kde_val, h_kde_val) 381 | 382 | # dimension-dependent memory allocation 383 | if self.d == 2: 384 | h_x1 = np.array(self.data[0]).astype(np.float32) 385 | h_x2 = np.array(self.data[1]).astype(np.float32) 386 | h_y1 = np.array(self.points[0]).astype(np.float32) 387 | h_y2 = np.array(self.points[1]).astype(np.float32) 388 | d_x1 = self.cuda.mem_alloc(h_x1.nbytes) 389 | d_x2 = self.cuda.mem_alloc(h_x2.nbytes) 390 | d_y1 = self.cuda.mem_alloc(h_y1.nbytes) 391 | d_y2 = self.cuda.mem_alloc(h_y2.nbytes) 392 | self.cuda.memcpy_htod(d_x1, h_x1) 393 | self.cuda.memcpy_htod(d_x2, h_x2) 394 | self.cuda.memcpy_htod(d_y1, h_y1) 395 | self.cuda.memcpy_htod(d_y2, h_y2) 396 | addDeclare = "double ent2;" 397 | addParam = "const float *x2, const float *y2, const double c11, const double c12, const double c21, const double c22" 398 | calculation = """ 399 | ent1 = x1[j]-y1[idx]; 400 | ent2 = x2[j]-y2[idx]; 401 | thisKde += w_norm[j] * exp(preFac[j] * (ent1*(c11*ent1+c12*ent2) + ent2*(c21*ent1+c22*ent2))); 402 | """ 403 | elif self.d == 1: 404 | h_x1 = np.array(self.data[0]).astype(np.float32) 405 | h_y1 = np.array(self.points[0]).astype(np.float32) 406 | d_x1 = self.cuda.mem_alloc(h_x1.nbytes) 407 | d_y1 = self.cuda.mem_alloc(h_y1.nbytes) 408 | self.cuda.memcpy_htod(d_x1, h_x1) 409 | self.cuda.memcpy_htod(d_y1, h_y1) 410 | addParam = "const double c" 411 | addDeclare = "" 412 | calculation = """ 413 | ent1 = x1[j]-y1[idx]; 414 | thisKde += w_norm[j] * exp(preFac[j] * c * pow(ent1, 2)); 415 | """ 416 | 417 | # define executed function 418 | mod = SourceModule(""" 419 | __global__ void CalcKde(const float *x1, const float *y1, """+addParam+""", const int n, const int m, const double *preFac, const double *w_norm, double *kde){ 420 | int idx = threadIdx.x + blockIdx.x*blockDim.x; 421 | if (idx < m){ 422 | double thisKde, ent1; 423 | """+addDeclare+""" 424 | int j; 425 | thisKde = 0.0; 426 | for (j=0; j < n; j++) { 427 | """+calculation+""" 428 | } // for 429 | kde[idx] = thisKde; 430 | } // if 431 | __syncthreads(); 432 | } // CalcKde_2d 433 | """) 434 | 435 | if n >= 512: 436 | bx = np.int32(512) 437 | else: 438 | bx = np.int32(n) 439 | gx = np.int32(self.m/bx) 440 | if n / bx != 0.0: 441 | gx += 1 442 | 443 | # code compiling 444 | func = mod.get_function("CalcKde") 445 | if self.d == 2: 446 | # call of gpu function 447 | func(d_x1, d_y1, d_x2, d_y2, 448 | self.c_inv[0, 0], self.c_inv[1, 0], 449 | self.c_inv[0, 1], self.c_inv[1, 1], 450 | n, m, 451 | d_preFac, d_w_norm, d_kde_val, 452 | block=(int(bx), 1, 1), 453 | grid=(int(gx), 1, 1)) 454 | elif self.d == 1: 455 | # call of gpu function 456 | func(d_x1, d_y1, 457 | self.c_inv, 458 | n, m, 459 | d_preFac, d_w_norm, d_kde_val, 460 | block=(int(bx), 1, 1), 461 | grid=(int(gx), 1, 1)) 462 | 463 | # backward copy from gpu to cpu memory 464 | self.cuda.memcpy_dtoh(h_kde_val, d_kde_val) 465 | 466 | self.values = h_kde_val 467 | -------------------------------------------------------------------------------- /kde/cudakde.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=invalid-name 2 | 3 | 4 | from __future__ import absolute_import, division 5 | 6 | __license__ = """MIT License 7 | 8 | Copyright (c) 2014-2019 Sebastian Schoenen and Martin Leuermann 9 | 10 | Permission is hereby granted, free of charge, to any person obtaining a copy 11 | of this software and associated documentation files (the "Software"), to deal 12 | in the Software without restriction, including without limitation the rights 13 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 14 | copies of the Software, and to permit persons to whom the Software is 15 | furnished to do so, subject to the following conditions: 16 | 17 | The above copyright notice and this permission notice shall be included in all 18 | copies or substantial portions of the Software. 19 | 20 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 21 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 22 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 23 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 24 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 25 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 26 | SOFTWARE. 27 | """ 28 | 29 | import warnings 30 | import numpy as n 31 | from .classes import KDE 32 | 33 | 34 | class gaussian_kde(KDE): 35 | def __init__(self, data, weights=None, kde_values=None, use_cuda=True, 36 | adaptive=False, weight_adaptive_bw=False, alpha=0.3, 37 | bw_method='silverman'): 38 | if kde_values != None: 39 | raise NotImplementedError("`kde_values` is not supported for" 40 | " cudakde.") 41 | KDE.__init__(self, data, use_cuda, weights=weights, alpha=alpha, 42 | method=bw_method) 43 | 44 | self.weighted = False if weights is None or len(weights) == 0 else True 45 | 46 | if adaptive: 47 | if not self.weighted and weight_adaptive_bw: 48 | warnings.warn("Since `weights` aren't given" 49 | " `weight_adaptive_bw` will have no effect!") 50 | self.calcLambdas(weights=weight_adaptive_bw, 51 | weightedCov=weight_adaptive_bw) 52 | else: 53 | self.lambdas = n.ones(self.n) 54 | 55 | def __call__(self, points): 56 | points = n.atleast_2d(points) 57 | self.kde(points, weights=self.weighted, weightedCov=self.weighted) 58 | return n.array(self.values) 59 | 60 | 61 | class bootstrap_kde(object): 62 | def __init__(self, data, niter=10, weights=None, **kwargs): 63 | assert int(niter) == float(niter) 64 | niter = int(niter) 65 | 66 | self.kernels = [] 67 | self.bootstrap_indices = [] 68 | 69 | self.data = n.atleast_2d(data) 70 | self.d, self.n = self.data.shape 71 | self.weighted = False if weights is None or len(weights) == 0 else True 72 | 73 | for _ in range(niter): 74 | indices = n.array(self.get_bootstrap_indices()) 75 | self.bootstrap_indices.append(indices) 76 | if self.weighted: 77 | kernel = gaussian_kde(data[..., indices], 78 | weights=weights[indices], 79 | **kwargs) 80 | else: 81 | kernel = gaussian_kde(data[..., indices], **kwargs) 82 | self.kernels.append(kernel) 83 | 84 | def __call__(self, points): 85 | return self.evaluate(points) 86 | 87 | def evaluate(self, points): 88 | points = n.atleast_2d(points) 89 | _, m = points.shape 90 | means, sqmeans = n.zeros(m), n.zeros(m) 91 | for kernel in self.kernels: 92 | values = kernel(points) 93 | means += values 94 | sqmeans += values**2 95 | means /= len(self.kernels) 96 | sqmeans /= len(self.kernels) 97 | errors = n.sqrt(sqmeans - means**2) 98 | return means, errors 99 | 100 | def get_bootstrap_indices(self): 101 | bootstrap_indices = n.random.choice(self.n, size=self.n, replace=True) 102 | return bootstrap_indices 103 | -------------------------------------------------------------------------------- /kde/kde.c: -------------------------------------------------------------------------------- 1 | //////////////////////////////////////////////////////////// 2 | ///// KDE CLASS - C IMPLEMENTATION ////// 3 | ///// copyright (C) 2014 Martin Leuermann (May 2014) ////// 4 | //////////////////////////////////////////////////////////// 5 | // 6 | // MIT License 7 | // 8 | // Copyright (c) 2014-2019 Martin Leuermann 9 | // 10 | // Permission is hereby granted, free of charge, to any person obtaining a copy 11 | // of this software and associated documentation files (the "Software"), to deal 12 | // in the Software without restriction, including without limitation the rights 13 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 14 | // copies of the Software, and to permit persons to whom the Software is 15 | // furnished to do so, subject to the following conditions: 16 | // 17 | // The above copyright notice and this permission notice shall be included in all 18 | // copies or substantial portions of the Software. 19 | // 20 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 21 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 22 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 23 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 24 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 25 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 26 | // SOFTWARE. 27 | 28 | #include 29 | #include 30 | #include 31 | 32 | #include 33 | #define _USE_MATH_DEFINES 34 | #include 35 | 36 | int diff_ms(struct timeval t1, struct timeval t2) 37 | { 38 | return (((t1.tv_sec - t2.tv_sec) * 1000000) + (t1.tv_usec - t2.tv_usec))/1000; 39 | } 40 | 41 | /////////////////////////////////////////////////// 42 | //////// CALCULATE KDE VALUES FOR ///////////////// 43 | //////// DATASET X AND POINTS Y ///////////////// 44 | /////////////////////////////////////////////////// 45 | 46 | //////////////////////// 47 | /// FOR 1 DIMENSION /// 48 | //////////////////////// 49 | static PyObject *pr_kde_1d(PyObject *self, PyObject *args){ 50 | 51 | ////////////////// DECLARATIONS /////////////////////// 52 | int ListSize1, ListSize2, i,j; 53 | double c, res, ent, h; 54 | PyObject *objx, *objy, *objpreFac, *objw_norm; 55 | PyObject *ListItem, *ListItem2, *ListItem3; 56 | double *x, *y, *preFac, *w_norm; 57 | 58 | /////////////////// GET INPUT ////////////////////////// 59 | if (!PyArg_ParseTuple(args, "dOOdOO", &c, &objx, &objy, &h, &objpreFac, &objw_norm)) 60 | return NULL; 61 | 62 | 63 | ////////// GET FIRST LIST-GROUP FROM PYTHON //////////// 64 | ListSize1 = PyList_Size(objx); 65 | x = (double*) malloc(sizeof(double)*ListSize1); 66 | preFac = (double*) malloc(sizeof(double)*ListSize1); 67 | w_norm = (double*) malloc(sizeof(double)*ListSize1); 68 | 69 | for(i=0; i < ListSize1; i++ ) { 70 | ListItem = PyList_GetItem(objx, i); 71 | ListItem2 = PyList_GetItem(objw_norm, i); 72 | ListItem3 = PyList_GetItem(objpreFac, i); 73 | if( PyFloat_Check(ListItem) && PyFloat_Check(ListItem2) && PyFloat_Check(ListItem3) ){ 74 | x[i] = PyFloat_AsDouble(ListItem); 75 | w_norm[i] = PyFloat_AsDouble(ListItem2); 76 | preFac[i] = PyFloat_AsDouble(ListItem3); 77 | } else { 78 | printf("Error: lists contain a non-float value.\n"); 79 | exit(1); 80 | } 81 | } 82 | 83 | ////////// GET SECOND LIST-GROUP FROM PYTHON //////////// 84 | ListSize2 = PyList_Size(objy); 85 | y = (double*) malloc(sizeof(double)*ListSize2); 86 | 87 | for(i=0; i < ListSize2; i++ ) { 88 | ListItem = PyList_GetItem(objy, i); 89 | if( PyFloat_Check(ListItem) ) { 90 | y[i] = PyFloat_AsDouble(ListItem); 91 | }else{ 92 | printf("Error: lists contain a non-float value.\n"); 93 | exit(1); 94 | } 95 | } 96 | 97 | /////////////// RUN CALCULATIONS ///////////////////// 98 | PyObject *pylist; 99 | pylist = PyList_New(ListSize2); 100 | 101 | for(i=0; i < ListSize2; i++) { 102 | res = 0.0; 103 | for (j=0; j < ListSize1; j++) { 104 | ent = x[j]-y[i]; 105 | res += w_norm[j] * exp(preFac[j] * c*pow(ent, 2)); 106 | } 107 | PyList_SET_ITEM(pylist, i, PyFloat_FromDouble(res)); 108 | } 109 | 110 | ///////// FREE MEMORY /////////// 111 | free(x); 112 | free(preFac); 113 | free(w_norm); 114 | 115 | ///////// RETURN RESULTING VALUES FOR y ///////// 116 | return pylist; 117 | } 118 | 119 | 120 | //////////////////////// 121 | /// FOR 2 DIMENSIONS /// 122 | //////////////////////// 123 | static PyObject *pr_kde_2d(PyObject *self, PyObject *args){ 124 | 125 | ////////////////// DECLARATIONS /////////////////////// 126 | int ListSize1, ListSize2, i,j; 127 | double c11, c12, c21, c22, res, ent1, ent2, h; 128 | PyObject *objx1, *objx2, *objy1, *objy2, *objpreFac, *objw_norm; 129 | PyObject *ListItem, *ListItem2, *ListItem3, *ListItem4; 130 | double *x1, *x2, *y1,*y2, *preFac, *w_norm; 131 | 132 | /////////////////// GET INPUT ////////////////////////// 133 | if (!PyArg_ParseTuple(args, "ddddOOOOdOO", &c11, &c12, &c21, &c22, &objx1, &objx2, &objy1, &objy2, &h, &objpreFac, &objw_norm)) 134 | return NULL; 135 | 136 | ////////// GET FIRST LIST-GROUP FROM PYTHON //////////// 137 | ListSize1 = PyList_Size(objx2); 138 | x1 = (double*) malloc(sizeof(double)*ListSize1); 139 | x2 = (double*) malloc(sizeof(double)*ListSize1); 140 | preFac = (double*) malloc(sizeof(double)*ListSize1); 141 | w_norm = (double*) malloc(sizeof(double)*ListSize1); 142 | 143 | for(i=0; i < ListSize1; i++ ) { 144 | ListItem = PyList_GetItem(objx1, i); 145 | ListItem2 = PyList_GetItem(objx2, i); 146 | ListItem3 = PyList_GetItem(objpreFac, i); 147 | ListItem4 = PyList_GetItem(objw_norm, i); 148 | if( PyFloat_Check(ListItem) && PyFloat_Check(ListItem2) && PyFloat_Check(ListItem3) && PyFloat_Check(ListItem4) ){ 149 | x1[i] = PyFloat_AsDouble(ListItem); 150 | x2[i] = PyFloat_AsDouble(ListItem2); 151 | preFac[i] = PyFloat_AsDouble(ListItem3); 152 | w_norm[i] = PyFloat_AsDouble(ListItem4); 153 | } else { 154 | printf("Error: lists contain a non-float value.\n"); 155 | exit(1); 156 | } 157 | } 158 | 159 | ////////// GET SECOND LIST-GROUP FROM PYTHON //////////// 160 | ListSize2 = PyList_Size(objy1); 161 | y1 = (double*) malloc(sizeof(double)*ListSize2); 162 | y2 = (double*) malloc(sizeof(double)*ListSize2); 163 | 164 | for(i=0; i < ListSize2; i++ ) { 165 | ListItem = PyList_GetItem(objy1, i); 166 | ListItem2 = PyList_GetItem(objy2, i); 167 | if( PyFloat_Check(ListItem) && PyFloat_Check(ListItem2) ) { 168 | y1[i] = PyFloat_AsDouble(ListItem); 169 | y2[i] = PyFloat_AsDouble(ListItem2); 170 | } else { 171 | printf("Error: lists contain a non-float value.\n"); 172 | exit(1); 173 | } 174 | } 175 | 176 | /////////////// RUN CALCULATIONS ///////////////////// 177 | PyObject *pylist; 178 | 179 | pylist = PyList_New(ListSize2); 180 | 181 | for(i=0; i < ListSize2; i++) { 182 | res = 0.0; 183 | for (j=0; j < ListSize1; j++) { 184 | ent1 = x1[j]-y1[i]; 185 | ent2 = x2[j]-y2[i]; 186 | res += w_norm[j] * exp(preFac[j] * (ent1*(c11*ent1+c12*ent2) + ent2*(c21*ent1+c22*ent2))); 187 | } 188 | PyList_SET_ITEM(pylist, i, PyFloat_FromDouble(res)); 189 | } 190 | 191 | ///////// FREE MEMORY /////////// 192 | free(x1); 193 | free(x2); 194 | free(preFac); 195 | free(w_norm); 196 | 197 | ///////// RETURN RESULTING VALUES FOR y ///////// 198 | return pylist; 199 | } 200 | 201 | 202 | 203 | /////////////////////////////////////////////////// 204 | //////// GET LAMBDA FOR DATASET OF X ////////////// 205 | /////////////////////////////////////////////////// 206 | 207 | //////////////////////// 208 | /// FOR 1 DIMENSION /// 209 | //////////////////////// 210 | 211 | static PyObject *pr_getLambda_1d(PyObject *self, PyObject *args){ 212 | 213 | ////////////////// DECLARATIONS /////////////////////// 214 | int ListSize1, i,j; 215 | double c, thisKde, ent, invGlob, logSum, alpha, h, preFac; //, tempNorm, weight, tempNormOld; 216 | PyObject *objx; 217 | PyObject *ListItem, *ListItem2; 218 | PyObject *obj_w_norm; 219 | double *x, *lambda, *kde, *w_norm; 220 | 221 | /////////////////// GET INPUT ////////////////////////// 222 | if (!PyArg_ParseTuple(args, "dOOdd", &c, &objx, &obj_w_norm, &h, &alpha ) ) 223 | return NULL; 224 | 225 | ////////// GET FIRST LIST-GROUP FROM PYTHON //////////// 226 | ListSize1 = PyList_Size(objx); 227 | x = (double*) malloc(sizeof(double)*ListSize1); 228 | lambda = (double*) malloc(sizeof(double)*ListSize1); 229 | kde = (double*) malloc(sizeof(double)*ListSize1); 230 | w_norm = (double*) malloc(sizeof(double)*ListSize1); 231 | 232 | for(i=0; i < ListSize1; i++ ) { 233 | ListItem = PyList_GetItem(objx, i); 234 | ListItem2 = PyList_GetItem(obj_w_norm, i); 235 | 236 | if( PyFloat_Check(ListItem) && PyFloat_Check(ListItem2) ) { 237 | x[i] = PyFloat_AsDouble(ListItem); 238 | w_norm[i] = PyFloat_AsDouble(ListItem2); 239 | } else { 240 | printf("Error: lists contain a non-float value.\n"); 241 | exit(1); 242 | } 243 | } 244 | 245 | /////////////// RUN CALCULATIONS ///////////////////// 246 | PyObject *lambdaList; 247 | lambdaList = PyList_New(ListSize1); 248 | 249 | invGlob = 0.0; 250 | logSum = 0.0; 251 | preFac = -0.5/pow(h, 2); 252 | 253 | for(i=0; i < ListSize1; i++) { 254 | thisKde = 0.0; 255 | for (j=0; j < ListSize1; j++) { 256 | ent = x[j]-x[i]; 257 | thisKde += w_norm[j] * exp(preFac * (ent*c*ent)); 258 | } 259 | logSum += 1.0/ListSize1 * log(thisKde); 260 | kde[i] = thisKde; 261 | } 262 | invGlob = 1.0/exp(logSum); 263 | 264 | for(i=0; i< ListSize1; i++) { 265 | lambda[i] = 1.0/pow(invGlob*kde[i], alpha); 266 | PyList_SET_ITEM(lambdaList, i, PyFloat_FromDouble(lambda[i])); 267 | } 268 | 269 | ///////// FREE MEMORY /////////// 270 | free(x); 271 | free(lambda); 272 | free(kde); 273 | free(w_norm); 274 | 275 | ///////// RETURN RESULTING VALUES FOR LAMBDAS ///////// 276 | return lambdaList; 277 | } 278 | 279 | //////////////////////// 280 | /// FOR 2 DIMENSIONS /// 281 | //////////////////////// 282 | 283 | static PyObject *pr_getLambda_2d(PyObject *self, PyObject *args){ 284 | ////////////////// DECLARATIONS /////////////////////// 285 | int ListSize1, i,j; 286 | double c11, c12, c21, c22, thisKde, ent1, ent2, invGlob, logSum, alpha, h, preFac; // , tempNorm, weight; 287 | PyObject *objx1, *objx2, *obj_w_norm; 288 | PyObject *ListItem, *ListItem2, *ListItem3; 289 | double *x1, *x2, *lambda, *kde, *w_norm; 290 | 291 | /////////////////// GET INPUT ////////////////////////// 292 | if (!PyArg_ParseTuple(args, "ddddOOOdd", &c11, &c12, &c21, &c22, &objx1, &objx2, &obj_w_norm, &h, &alpha ) ) 293 | return NULL; 294 | 295 | ////////// GET FIRST LIST-GROUP FROM PYTHON //////////// 296 | ListSize1 = PyList_Size(objx2); 297 | x1 = (double*) malloc(sizeof(double)*ListSize1); 298 | x2 = (double*) malloc(sizeof(double)*ListSize1); 299 | lambda = (double*) malloc(sizeof(double)*ListSize1); 300 | kde = (double*) malloc(sizeof(double)*ListSize1); 301 | w_norm = (double*) malloc(sizeof(double)*ListSize1); 302 | 303 | for(i=0; i < ListSize1; i++ ) { 304 | ListItem = PyList_GetItem(objx1, i); 305 | ListItem2 = PyList_GetItem(objx2, i); 306 | ListItem3 = PyList_GetItem(obj_w_norm, i); 307 | 308 | if( PyFloat_Check(ListItem) && PyFloat_Check(ListItem2) && PyFloat_Check(ListItem3)) { 309 | x1[i] = PyFloat_AsDouble(ListItem); 310 | x2[i] = PyFloat_AsDouble(ListItem2); 311 | w_norm[i] = PyFloat_AsDouble(ListItem3); 312 | } else { 313 | printf("Error: lists contain a non-float value.\n"); 314 | exit(1); 315 | } 316 | } 317 | 318 | /////////////// RUN CALCULATIONS ///////////////////// 319 | PyObject *lambdaList; 320 | lambdaList = PyList_New(ListSize1); 321 | 322 | 323 | invGlob = 0.0; 324 | logSum = 0.0; 325 | preFac = -0.5/(h*h); 326 | 327 | for(i=0; i < ListSize1; i++) { 328 | thisKde = 0.0; 329 | for (j=0; j < ListSize1; j++) { 330 | ent1 = x1[j]-x1[i]; 331 | ent2 = x2[j]-x2[i]; 332 | thisKde += w_norm[j] * exp(preFac * (ent1*(c11*ent1+c12*ent2) + ent2*(c21*ent1+c22*ent2))); 333 | } 334 | logSum += 1.0/ListSize1 * log(thisKde); 335 | kde[i] = thisKde; 336 | } 337 | invGlob = 1.0/exp(logSum); 338 | 339 | for(i=0; i< ListSize1; i++) { 340 | lambda[i] = 1.0/pow(invGlob*kde[i], alpha); 341 | PyList_SET_ITEM(lambdaList, i, PyFloat_FromDouble(lambda[i])); 342 | } 343 | 344 | ///////// FREE MEMORY /////////// 345 | free(x1); 346 | free(x2); 347 | free(lambda); 348 | free(kde); 349 | free(w_norm); 350 | 351 | ///////// RETURN RESULTING VALUES FOR LAMBDAS ///////// 352 | return lambdaList; 353 | } 354 | 355 | //////////////////////// 356 | /// FOR N DIMENSIONS /// 357 | //////////////////////// 358 | 359 | static PyObject *pr_kde_ND(PyObject *self, PyObject *args){ 360 | 361 | ////////////////// DECLARATIONS /////////////////////// 362 | int ndata, neval, nelems_c, ndim, i,j,d,k; 363 | double det_inv_cov, h; 364 | PyObject *obj_entries, *obj_preFac, *obj_w_norm, *obj_evals; 365 | PyObject *ListItem, *ListItem2, *obj_inv_cov; 366 | double *x, *preFac, *w_norm, *inv_cov, *evals; 367 | 368 | /////////////////// GET INPUT ////////////////////////// 369 | if (!PyArg_ParseTuple(args, "iOOOOOdd", &ndim, &obj_inv_cov, &obj_entries, &obj_evals, &obj_w_norm, &obj_preFac, &det_inv_cov, &h) ) 370 | return NULL; 371 | 372 | ////////// GET FIRST LIST-GROUP FROM PYTHON //////////// 373 | ndata = PyList_Size(obj_w_norm); 374 | neval = PyList_Size(obj_evals) / ndim ; 375 | 376 | x = (double*) malloc(ndim * sizeof(double)*ndata); 377 | w_norm = (double*) malloc(sizeof(double)*ndata); 378 | preFac = (double*) malloc(sizeof(double)*ndata); 379 | evals = (double*) malloc(sizeof(double)*neval*ndim); 380 | 381 | nelems_c= PyList_Size(obj_inv_cov); 382 | inv_cov = (double*) malloc(sizeof(double)*nelems_c); 383 | 384 | // get data // 385 | for( i=0; i < ndim*ndata; i++ ) { 386 | ListItem = PyList_GetItem(obj_entries , i); 387 | 388 | if( PyFloat_Check(ListItem) ) { 389 | x[i] = PyFloat_AsDouble(ListItem); 390 | } else { 391 | printf("Error: lists contain a non-float value.\n"); 392 | exit(1); 393 | } 394 | } 395 | 396 | // get evals // 397 | for( i=0; i < neval*ndim; i++ ) { 398 | ListItem = PyList_GetItem(obj_evals , i); 399 | 400 | if( PyFloat_Check(ListItem) ) { 401 | evals[i] = PyFloat_AsDouble(ListItem); 402 | } else { 403 | printf("Error: lists contain a non-float value.\n"); 404 | exit(1); 405 | } 406 | } 407 | 408 | // get inv_cov // 409 | for( i=0; i < nelems_c; i++ ) { 410 | ListItem = PyList_GetItem(obj_inv_cov , i); 411 | 412 | if( PyFloat_Check(ListItem) ) { 413 | inv_cov[i] = PyFloat_AsDouble(ListItem); 414 | } else { 415 | printf("Error: lists contain a non-float value.\n"); 416 | exit(1); 417 | } 418 | } 419 | 420 | 421 | // get preFacs and weights // 422 | for( i=0; i < ndata; i++ ) { 423 | ListItem = PyList_GetItem(obj_preFac, i); 424 | ListItem2 = PyList_GetItem(obj_w_norm, i); 425 | 426 | if( PyFloat_Check(ListItem) && PyFloat_Check(ListItem2)) { 427 | preFac[i] = PyFloat_AsDouble(ListItem); 428 | w_norm[i] = PyFloat_AsDouble(ListItem2); 429 | } else { 430 | printf("Error: lists contain a non-float value.\n"); 431 | exit(1); 432 | } 433 | } 434 | 435 | // new stuff for ND // 436 | PyObject *pylist; 437 | pylist = PyList_New(neval); 438 | 439 | for( j = 0; j>> from scipy import stats 163 | >>> def measure(n): 164 | >>> "Measurement model, return two coupled measurements." 165 | >>> m1 = np.random.normal(size=n) 166 | >>> m2 = np.random.normal(scale=0.5, size=n) 167 | >>> return m1+m2, m1-m2 168 | 169 | >>> m1, m2 = measure(2000) 170 | >>> xmin = m1.min() 171 | >>> xmax = m1.max() 172 | >>> ymin = m2.min() 173 | >>> ymax = m2.max() 174 | 175 | Perform a kernel density estimate on the data: 176 | 177 | >>> X, Y = np.mgrid[xmin:xmax:100j, ymin:ymax:100j] 178 | >>> positions = np.vstack([X.ravel(), Y.ravel()]) 179 | >>> values = np.vstack([m1, m2]) 180 | >>> kernel = gaussian_kde(values) 181 | >>> Z = np.reshape(kernel(positions).T, X.shape) 182 | 183 | Plot the results: 184 | 185 | >>> import matplotlib.pyplot as plt 186 | >>> fig = plt.figure() 187 | >>> ax = fig.add_subplot(111) 188 | >>> ax.imshow(np.rot90(Z), cmap=plt.cm.gist_earth_r, 189 | ... extent=[xmin, xmax, ymin, ymax]) 190 | >>> ax.plot(m1, m2, 'k.', markersize=2) 191 | >>> ax.set_xlim([xmin, xmax]) 192 | >>> ax.set_ylim([ymin, ymax]) 193 | >>> plt.show() 194 | 195 | """ 196 | def __init__(self, dataset, weights=None, kde_values=None, 197 | adaptive=False, weight_adaptive_bw=False, alpha=0.3, 198 | bw_method='silverman'): 199 | self.inv_cov12 = None 200 | self.ds = None 201 | self._normalized_weights = None 202 | 203 | self.dataset = np.atleast_2d(dataset) 204 | self.d, self.n = self.dataset.shape 205 | 206 | max_array_length = 1e8 207 | """Maximum amount of data in memory (~2GB, scales linearly)""" 208 | 209 | self.m_max = int(np.floor(max_array_length/self.n)) 210 | if self.n > max_array_length: 211 | raise ValueError("`dataset` is too large (too many array entries)!") 212 | 213 | if weights is not None and len(weights) == self.n: 214 | self.weights = weights 215 | elif weights is None: 216 | self.weights = np.ones(self.n) 217 | else: 218 | raise ValueError("unequal dimension of `dataset` and `weights`.") 219 | 220 | self.kde_values = kde_values 221 | if self.kde_values is not None: 222 | print("Warning: By giving `kde_values`, `weight_adaptive_bw` is" 223 | " useless. You have to be sure what was used to calculate" 224 | " those values!") 225 | if len(self.kde_values) != self.n: 226 | raise ValueError("unequal dimension of `dataset` and `kde_values`.") 227 | if not self.dataset.size > 1: 228 | raise ValueError("`dataset` input should have multiple elements.") 229 | 230 | # compute covariance matrix 231 | self.set_bandwidth(bw_method=bw_method) 232 | 233 | self.adaptive = adaptive 234 | if self.adaptive: 235 | self.weight_adaptive_bw = weight_adaptive_bw 236 | try: 237 | self.alpha = float(alpha) 238 | except: 239 | raise ValueError("`alpha` has to be a number.") 240 | if self.alpha < 0. or self.alpha > 1.: 241 | raise ValueError("`alpha` has to be in the range [0,1].") 242 | self._compute_adaptive_covariance() 243 | elif not self.adaptive and self.kde_values is not None: 244 | raise ValueError("Giving `kde_values`, `adaptive` cannot be False!") 245 | 246 | def evaluate(self, points, adaptive=False): 247 | """Evaluate the estimated pdf on a set of points. 248 | 249 | Parameters 250 | ---------- 251 | points : (# of dimensions, # of points)-array 252 | Alternatively, a (# of dimensions,) vector can be passed in and 253 | treated as a single point. 254 | 255 | Returns 256 | ------- 257 | values : (# of points,)-array 258 | The values at each point. 259 | 260 | Raises 261 | ------ 262 | ValueError : if the dimensionality of the input points is different than 263 | the dimensionality of the KDE. 264 | 265 | """ 266 | points = np.dot(self.inv_cov12, np.atleast_2d(points)) 267 | ds = self.ds # pylint: disable=unused-variable 268 | normalized_weights = self._normalized_weights # pylint: disable=unused-variable 269 | 270 | d, m = points.shape 271 | if d != self.d: 272 | if d == 1 and m == self.d: 273 | # points was passed in as a row vector 274 | points = np.reshape(points, (m, d)) 275 | d, m = points.shape 276 | else: 277 | msg = "points have dimension %s, dataset has dimension %s" % (d, self.d) 278 | raise ValueError(msg) 279 | 280 | nloops = int(np.ceil(m/self.m_max)) 281 | dm = self.m_max 282 | modulo_dm = m%dm 283 | results = np.empty((m,), dtype=float) 284 | if adaptive: 285 | inv_loc_bw = self.inv_loc_bw # pylint: disable=unused-variable 286 | 287 | for i in range(nloops): 288 | index = i*dm 289 | if modulo_dm and i == (nloops-1): 290 | dm = modulo_dm 291 | pt = points[:, index:index+dm].T.reshape(dm, self.d, 1) 292 | 293 | # has to be done due to BUG in `numexpr` (`sum` in `numexpr` != `numpy.sum`) 294 | if self.d == 1: 295 | energy = numexpr.evaluate( # pylint: disable=unused-variable 296 | "(ds - pt)**2", 297 | optimization='aggressive' 298 | ).reshape(dm, self.n) 299 | else: 300 | energy = numexpr.evaluate( # pylint: disable=unused-variable 301 | "sum((ds - pt)**2, axis=1)", 302 | optimization='aggressive' 303 | ) 304 | 305 | results[index:index+dm] = numexpr.evaluate( 306 | "sum(normalized_weights * exp(-0.5 * energy * inv_loc_bw), axis=1)", 307 | optimization='aggressive' 308 | ) 309 | del pt 310 | 311 | else: 312 | for i in range(nloops): 313 | index = i*dm 314 | if modulo_dm and i == (nloops-1): 315 | dm = modulo_dm 316 | pt = points[:, index:index+dm].T.reshape(dm, self.d, 1) 317 | 318 | # has to be done due to BUG in `numexpr` (`sum` in `numexpr` != `numpy.sum`) 319 | if self.d == 1: 320 | energy = numexpr.evaluate( 321 | "(ds - pt)**2", 322 | optimization='aggressive' 323 | ).reshape(dm, self.n) 324 | else: 325 | energy = numexpr.evaluate( 326 | "sum((ds - pt)**2, axis=1)", 327 | optimization='aggressive' 328 | ) 329 | 330 | results[index:index+dm] = numexpr.evaluate( 331 | "sum(normalized_weights * exp(-0.5 * energy), axis=1)", 332 | optimization='aggressive' 333 | ) 334 | del pt 335 | 336 | return results 337 | 338 | def __call__(self, points): 339 | return self.evaluate(points, adaptive=self.adaptive) 340 | 341 | def scotts_factor(self): 342 | return self.n ** (-1 / (self.d + 4)) 343 | 344 | def silverman_factor(self): 345 | return (self.n * (self.d + 2) / 4) ** (-1 / (self.d + 4)) 346 | 347 | # Default method to calculate bandwidth, can be overwritten by subclass 348 | covariance_factor = scotts_factor 349 | 350 | def set_bandwidth(self, bw_method=None): 351 | """Compute the estimator bandwidth with given method. 352 | 353 | The new bandwidth calculated after a call to `set_bandwidth` is used 354 | for subsequent evaluations of the estimated density. 355 | 356 | Parameters 357 | ---------- 358 | bw_method : str, scalar or callable, optional 359 | The method used to calculate the estimator bandwidth. This can be 360 | 'scott', 'silverman', a scalar constant or a callable. If a 361 | scalar, this will be used directly as `kde.factor`. If a callable, 362 | it should take a `gaussian_kde` instance as only parameter and 363 | return a scalar. If None (default), nothing happens; the current 364 | `kde.covariance_factor` method is kept. 365 | 366 | Examples 367 | -------- 368 | >>> x1 = np.array([-7, -5, 1, 4, 5.]) 369 | >>> kde = stats.gaussian_kde(x1) 370 | >>> xs = np.linspace(-10, 10, num=50) 371 | >>> y1 = kde(xs) 372 | >>> kde.set_bandwidth(bw_method='silverman') 373 | >>> y2 = kde(xs) 374 | >>> kde.set_bandwidth(bw_method=kde.factor / 3.) 375 | >>> y3 = kde(xs) 376 | 377 | >>> fig = plt.figure() 378 | >>> ax = fig.add_subplot(111) 379 | >>> ax.plot(x1, np.ones(x1.shape) / (4. * x1.size), 'bo', 380 | ... label='Data points (rescaled)') 381 | >>> ax.plot(xs, y1, label='Scott (default)') 382 | >>> ax.plot(xs, y2, label='Silverman') 383 | >>> ax.plot(xs, y3, label='Const (1/3 * Silverman)') 384 | >>> ax.legend() 385 | >>> plt.show() 386 | 387 | """ 388 | if bw_method is None: 389 | pass 390 | elif bw_method == 'scott': 391 | self.covariance_factor = self.scotts_factor 392 | elif bw_method == 'silverman': 393 | self.covariance_factor = self.silverman_factor 394 | elif np.isscalar(bw_method) and not isinstance(bw_method, basestring): 395 | self._bw_method = 'use constant' 396 | self.covariance_factor = lambda: bw_method 397 | elif callable(bw_method): 398 | self._bw_method = bw_method 399 | self.covariance_factor = lambda: self._bw_method(self) 400 | else: 401 | msg = "`bw_method` should be 'scott', 'silverman', a scalar " \ 402 | "or a callable." 403 | raise ValueError(msg) 404 | 405 | self._compute_covariance() 406 | 407 | def _compute_covariance(self): 408 | """Computes the covariance matrix for each Gaussian kernel using 409 | covariance_factor(). 410 | """ 411 | factor = self.covariance_factor() 412 | # Cache covariance and inverse covariance of the data 413 | data_covariance = np.atleast_2d(weighted_cov(self.dataset, weights=self.weights, bias=False)) 414 | data_inv_cov = linalg.inv(data_covariance) 415 | 416 | covariance = data_covariance * factor**2 417 | inv_cov = data_inv_cov / factor**2 418 | self.inv_cov12 = linalg.cholesky(inv_cov).T 419 | 420 | self.ds = np.dot(self.inv_cov12, self.dataset) 421 | 422 | norm_factor = np.sqrt(linalg.det(2*np.pi*covariance)) 423 | #inv_norm_factor = 1. / (norm_factor * sum(self.weights)) 424 | self._normalized_weights = self.weights / (norm_factor * sum(self.weights)) 425 | 426 | def _compute_adaptive_covariance(self): 427 | """Computes an adaptive covariance matrix for each Gaussian kernel using 428 | _compute_covariance(). 429 | """ 430 | # evaluate dataset for kde without adaptive kernel: 431 | if self.kde_values is None: 432 | if self.weight_adaptive_bw: 433 | self.kde_values = self.evaluate(self.dataset, adaptive=False) 434 | else: 435 | weights_temp = copy(self.weights) 436 | self.weights = np.ones(self.n) 437 | self._compute_covariance() 438 | self.kde_values = self.evaluate(self.dataset, adaptive=False) 439 | self.weights = weights_temp 440 | self._compute_covariance() 441 | 442 | # Define global bandwidth `glob_bw` by using the kde without adaptive kernel: 443 | # NOTE: is this really self.n or should it be sum(weights)? 444 | glob_bw = np.exp(1./self.n * np.sum(np.log(self.kde_values))) 445 | # Define local bandwidth `loc_bw`: 446 | self.inv_loc_bw = np.power(self.kde_values/glob_bw, 2.*self.alpha) 447 | 448 | #inv_local_norm_factors = self._inv_norm_factor * power(self.inv_loc_bw, 0.5*self.d) 449 | self._normalized_weights = self._normalized_weights * np.power(self.inv_loc_bw, 0.5*self.d) 450 | 451 | class bootstrap_kde(object): 452 | """Bootstrapping to estimate uncertainty in KDE. 453 | 454 | Parameters 455 | ---------- 456 | dataset 457 | niter : int > 0 458 | **kwargs 459 | Passed on to `gaussian_kde`, except 'weights' which,if present, is 460 | extracted and re-sampled in the same manner as `dataset`. 461 | 462 | """ 463 | def __init__(self, dataset, niter=10, **kwargs): 464 | self.kernels = [] 465 | self.bootstrap_indices = [] 466 | 467 | self.dataset = np.atleast_2d(dataset) 468 | self.d, self.n = self.dataset.shape 469 | if "weights" in kwargs: 470 | weights = kwargs.pop("weights") 471 | else: 472 | weights = None 473 | 474 | for _ in range(niter): 475 | indices = self.get_bootstrap_indices() 476 | self.bootstrap_indices.append(indices) 477 | if weights is not None: 478 | kernel = gaussian_kde(self.dataset[:, indices], weights=weights[indices], **kwargs) 479 | self.kernels.append(kernel) 480 | else: 481 | kernel = gaussian_kde(self.dataset[:, indices], **kwargs) 482 | self.kernels.append(kernel) 483 | 484 | def __call__(self, points): 485 | return self.evaluate(points) 486 | 487 | def evaluate(self, points): 488 | points = np.atleast_2d(points) 489 | _, m = points.shape 490 | means, sqmeans = np.zeros(m), np.zeros(m) 491 | for kernel in self.kernels: 492 | values = kernel(points) 493 | means += values 494 | sqmeans += values**2 495 | means /= len(self.kernels) 496 | sqmeans /= len(self.kernels) 497 | errors = np.sqrt(sqmeans - means**2) 498 | return means, errors 499 | 500 | def get_bootstrap_indices(self): 501 | """Get random indices used to resample (with replacement) `dataset`. 502 | 503 | Returns 504 | ------- 505 | bootstrap_indices : array 506 | 507 | """ 508 | return np.random.choice(self.n, size=self.n, replace=True) 509 | -------------------------------------------------------------------------------- /kde/stat_tools.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=line-too-long, invalid-name 2 | 3 | 4 | from __future__ import absolute_import, division, print_function 5 | 6 | __license__ = """MIT License 7 | 8 | Copyright (c) 2014-2019 Sebastian Schoenen and Martin Leuermann 9 | 10 | Permission is hereby granted, free of charge, to any person obtaining a copy 11 | of this software and associated documentation files (the "Software"), to deal 12 | in the Software without restriction, including without limitation the rights 13 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 14 | copies of the Software, and to permit persons to whom the Software is 15 | furnished to do so, subject to the following conditions: 16 | 17 | The above copyright notice and this permission notice shall be included in all 18 | copies or substantial portions of the Software. 19 | 20 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 21 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 22 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 23 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 24 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 25 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 26 | SOFTWARE. 27 | """ 28 | 29 | import numpy as np 30 | 31 | 32 | def rebin(a, *args, **kwargs): 33 | """Rebin ndarray data into a smaller ndarray of the same rank whose 34 | dimensions are factors of the original dimensions. eg. An array with 6 35 | columns and 4 rows can be reduced to have 6,3,2 or 1 columns and 4,2 or 1 36 | rows. 37 | 38 | Examples 39 | -------- 40 | >>> a = np.rand(6, 4) 41 | >>> b = rebin(a, 3, 2) 42 | >>> print(b.shape) 43 | (2, 2) 44 | 45 | >>> a = np.rand(6) 46 | >>> b = rebin(a, 2) 47 | >>> print b.shape 48 | (3,) 49 | 50 | """ 51 | method = kwargs.get("method", "sum") 52 | verbose = kwargs.get("verbose", False) 53 | 54 | shape = a.shape 55 | lenShape = len(shape) 56 | factor = np.asarray(shape) / np.asarray(args) # pylint: disable=unused-variable 57 | evList = ( 58 | ['a.reshape('] + 59 | ['args[%d],factor[%d],'%(i, i) for i in range(lenShape)] + 60 | [')'] + ['.sum(%d)'%(i+1) for i in range(lenShape)] 61 | ) 62 | 63 | if method == "sum": 64 | pass 65 | elif method == "average": 66 | evList += ['/factor[%d]'%i for i in range(lenShape)] 67 | else: 68 | raise AttributeError("method: %s not defined" % method) 69 | 70 | evStr = ''.join(evList) 71 | 72 | if verbose: 73 | print(evStr) 74 | 75 | return eval(evStr) # pylint: disable=eval-used 76 | 77 | 78 | def covariance_form(point, mean, cov): 79 | """Calculate 2D map of covariance form (2D quadratic approximation to 80 | -2lnL) 81 | 82 | """ 83 | cov_inv = np.linalg.inv(cov) 84 | diff = point - mean 85 | 86 | stats = [] 87 | for y_i in range(len(diff)): 88 | current_y = [] 89 | for x_i in range(len(diff[y_i])): 90 | a = np.matrix(diff[y_i][x_i]) 91 | current_y.append((a * cov_inv * a.transpose()).item(0)) 92 | stats.append(current_y) 93 | return np.array(stats) 94 | 95 | 96 | def estimate_cov_from_contour(xaxis, yaxis, zmesh, point): 97 | """Calculate estimate of covariance matrix from 2D Hessian of -2lnL 98 | 99 | Note: 100 | RectBivariateSpline expects zmesh to have shape (len(xaxis), len(yaxis)) 101 | but my mesh has shape (len(yaxis), len(xaxis)) thus everything is mirrored 102 | 103 | """ 104 | from scipy.interpolate import RectBivariateSpline 105 | x, y = point 106 | spline = RectBivariateSpline(yaxis, xaxis, np.asarray(zmesh)) 107 | dx2 = 0.5 * spline(y, x, mth=None, dx=0, dy=2, grid=False) 108 | dy2 = 0.5 * spline(y, x, mth=None, dx=2, dy=0, grid=False) 109 | dxdy = 0.5 * spline(y, x, mth=None, dx=1, dy=1, grid=False) 110 | 111 | hessian = np.matrix([[dx2, dxdy], [dxdy, dy2]]) 112 | cov = np.linalg.inv(hessian) 113 | return cov 114 | 115 | 116 | def interpolate_statistic(xaxis, yaxis, zmesh, xaxis_new, yaxis_new): 117 | """Calculate 2D spline surface of -2lnL test-statistic. 118 | 119 | The same spline is used to calculate derivatives in 120 | "estimate_cov_from_contour(xaxis, yaxis, zmesh, point)" 121 | 122 | Note: 123 | RectBivariateSpline expects zmesh to have shape (len(xaxis), len(yaxis)) 124 | but my mesh has shape (len(yaxis), len(xaxis)) 125 | thus everything is mirrored 126 | 127 | """ 128 | from scipy.interpolate import RectBivariateSpline 129 | spline = RectBivariateSpline(yaxis, xaxis, np.asarray(zmesh)) 130 | stats = [[spline(yaxis_new[yi], xaxis_new[xi], mth=None, dx=0, dy=0, grid=False) 131 | for xi in range(len(xaxis_new))] 132 | for yi in range(len(yaxis_new))] 133 | return np.array(stats) 134 | 135 | 136 | def wilks_test(profiles): 137 | """Calculate the compatibility of statistically independent measurements. 138 | 139 | Here, we assume that Wilks' theorem holds. 140 | 141 | Parameters 142 | ---------- 143 | profiles : list of (x, y, llh) for different measurements 144 | 145 | """ 146 | from scipy.stats import chisqprob 147 | from scipy.special import erfinv 148 | 149 | xmin, xmax = +np.inf, -np.inf 150 | ymin, ymax = +np.inf, -np.inf 151 | for x, y, _ in profiles: 152 | xmin_, xmax_ = np.min(x), np.max(x) 153 | if xmin_ < xmin: 154 | xmin = xmin_ 155 | if xmax_ > xmax: 156 | xmax = xmax_ 157 | 158 | ymin_, ymax_ = np.min(y), np.max(y) 159 | if ymin_ < ymin: 160 | ymin = ymin_ 161 | if ymax_ > ymax: 162 | ymax = ymax_ 163 | 164 | x = np.linspace(xmin, xmax, 1000) 165 | y = np.linspace(ymin, ymax, 1000) 166 | 167 | sum_llhs = 0 168 | for xpar, ypar, llhs in profiles: 169 | sum_llhs += interpolate_statistic(xpar, ypar, llhs, x, y) 170 | 171 | chi2 = np.min(sum_llhs) 172 | ndof = 2 * (len(profiles) - 1) 173 | pvalue = chisqprob(chi2, ndof) 174 | nsigma = erfinv(1 - pvalue) * np.sqrt(2) # 2-sided significance 175 | 176 | return (chi2, ndof, pvalue, nsigma) 177 | 178 | 179 | def walds_test(profile1, profile2): 180 | """Calculate the compatibility of two statistically independent 181 | measurements using normal approximation (Wald's method). 182 | 183 | This assumes that the log-likelihood space is approximately elliptically. 184 | 185 | Parameters 186 | ---------- 187 | profile1 : (x,y,llh) for measurement 1 188 | profile2 : (x,y,llh) for measurement 2 189 | 190 | """ 191 | from scipy.stats import chisqprob 192 | from scipy.special import erfinv 193 | bestfits, covariances = [], [] 194 | for x, y, llhs in [profile1, profile2]: 195 | idx_min = np.unravel_index(llhs.argmin(), llhs.shape) 196 | bestfit = x[idx_min[1]], y[idx_min[0]] 197 | bestfits.append(bestfit) 198 | covariance = estimate_cov_from_contour(x, y, llhs, bestfit) 199 | covariances.append(covariance) 200 | 201 | diff = np.matrix(bestfits[0]) - np.matrix(bestfits[1]) 202 | cov_inv = np.linalg.inv(covariances[0] + covariances[1]) 203 | 204 | chi2 = diff*cov_inv*diff.transpose() 205 | ndof = 2 206 | pvalue = chisqprob(chi2, ndof) 207 | nsigma = erfinv(1-pvalue) * np.sqrt(2) # 2-sided significance 208 | 209 | return (chi2, ndof, pvalue, nsigma) 210 | 211 | 212 | def _weighted_quantile_arg(values, weights, q=0.5): 213 | indices = np.argsort(values) 214 | sorted_indices = np.arange(len(values))[indices] 215 | medianidx = (weights[indices].cumsum()/weights[indices].sum()).searchsorted(q) 216 | if (medianidx >= 0) and (medianidx < len(values)): 217 | return sorted_indices[medianidx] 218 | return np.nan 219 | 220 | 221 | def weighted_quantile(values, weights, q=0.5): 222 | if len(values) != len(weights): 223 | raise ValueError("shape of `values` and `weights` doesn't match!") 224 | index = _weighted_quantile_arg(values, weights, q=q) 225 | if index != np.nan: 226 | return values[index] 227 | return np.nan 228 | 229 | 230 | def weighted_median(values, weights): 231 | return weighted_quantile(values, weights, q=0.5) 232 | 233 | 234 | def weighted_cov(m, y=None, weights=None, bias=0): 235 | """Estimate a (weighted) covariance matrix, given data. 236 | 237 | Covariance indicates the level to which two variables vary together. 238 | If we examine N-dimensional samples, :math:`X = [x_1, x_2, ... x_N]^T`, 239 | then the covariance matrix element :math:`C_{ij}` is the covariance of 240 | :math:`x_i` and :math:`x_j`. The element :math:`C_{ii}` is the variance 241 | of :math:`x_i`. 242 | 243 | Parameters 244 | ---------- 245 | m : array_like 246 | A 1-D or 2-D array containing multiple variables and observations. 247 | Each row of `m` represents a variable, and each column a single 248 | observation of all those variables. 249 | y : array_like, optional 250 | An additional set of variables and observations. `y` has the same 251 | form as that of `m`. 252 | weights : array_like, optional 253 | A 1-D array containing the weights of the data points. This option 254 | should be used if data points have different weights in order to 255 | calculate the weighted covariance. 256 | bias : int, optional 257 | Default normalization is by ``(N - 1)``, where ``N`` is the number of 258 | observations given (unbiased estimate). If `bias` is 1, then 259 | normalization is by ``N``. 260 | 261 | Returns 262 | ------- 263 | out : ndarray 264 | The covariance matrix of the variables. 265 | 266 | Examples 267 | -------- 268 | Consider two variables, :math:`x_0` and :math:`x_1`, which 269 | correlate perfectly, but in opposite directions: 270 | 271 | >>> x = np.array([[0, 2], [1, 1], [2, 0]]).T 272 | >>> x 273 | array([[0, 1, 2], 274 | [2, 1, 0]]) 275 | 276 | Note how :math:`x_0` increases while :math:`x_1` decreases. The covariance 277 | matrix shows this clearly: 278 | 279 | >>> weighted_cov(x) 280 | array([[ 1., -1.], 281 | [-1., 1.]]) 282 | 283 | Note that element :math:`C_{0,1}`, which shows the correlation between 284 | :math:`x_0` and :math:`x_1`, is negative. 285 | 286 | Further, note how `x` and `y` are combined: 287 | 288 | >>> x = [-2.1, -1, 4.3] 289 | >>> y = [3, 1.1, 0.12] 290 | >>> X = np.vstack((x,y)) 291 | >>> print(weighted_cov(X)) 292 | [[ 11.71 -4.286 ] 293 | [ -4.286 2.14413333]] 294 | >>> print(weighted_cov(x, y)) 295 | [[ 11.71 -4.286 ] 296 | [ -4.286 2.14413333]] 297 | >>> print(weighted_cov(x)) 298 | 11.71 299 | 300 | """ 301 | X = np.array(m, ndmin=2, dtype=float) 302 | if X.size == 0: 303 | # handle empty arrays 304 | return np.array(m) 305 | 306 | axis = 0 307 | tup = (slice(None), np.newaxis) 308 | 309 | N = X.shape[1] 310 | 311 | if weights is not None: 312 | weights = np.asarray(weights)/np.sum(weights) 313 | if len(weights) != N: 314 | raise ValueError("unequal dimension of `data` and `weights`.") 315 | 316 | if y is not None: 317 | y = np.array(y, copy=False, ndmin=2, dtype=float) 318 | X = np.concatenate((X, y), axis) 319 | 320 | X -= np.average(X, axis=1-axis, weights=weights)[tup] 321 | 322 | if bias == 0: 323 | if weights is not None: 324 | fact = np.sum(weights) / (np.sum(weights)**2 - np.sum(weights**2)) 325 | else: 326 | fact = 1 / (N - 1) 327 | else: 328 | if weights is not None: 329 | fact = 1 / np.sum(weights) 330 | else: 331 | fact = 1 / N 332 | 333 | if weights is not None: 334 | return (np.dot(weights * X, X.T.conj()) * fact).squeeze() 335 | 336 | return (np.dot(X, X.T.conj()) * fact).squeeze() 337 | -------------------------------------------------------------------------------- /kde/test_kde.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Test functions for the kde library. 5 | """ 6 | 7 | 8 | from __future__ import absolute_import, division, print_function 9 | 10 | __license__ = """MIT License 11 | 12 | Copyright (c) 2014-2019 Sebastian Schoenen and Martin Leuermann 13 | 14 | Permission is hereby granted, free of charge, to any person obtaining a copy 15 | of this software and associated documentation files (the "Software"), to deal 16 | in the Software without restriction, including without limitation the rights 17 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 18 | copies of the Software, and to permit persons to whom the Software is 19 | furnished to do so, subject to the following conditions: 20 | 21 | The above copyright notice and this permission notice shall be included in all 22 | copies or substantial portions of the Software. 23 | 24 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 25 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 26 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 27 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 28 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 29 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 | SOFTWARE. 31 | """ 32 | 33 | import numpy as np 34 | 35 | 36 | def test_kde(version, sampling_method, bw_method, n_samples, adaptive, 37 | alpha=0.3, weight_adaptive_bw=False): 38 | """Test the KDE routines of the kde package. 39 | 40 | 41 | Parameters 42 | ---------- 43 | version : string 44 | One of "pykde" or "cudakde" 45 | sampling_method : string 46 | One of "uniform" or "exponential" 47 | bw_method : string 48 | One of "silverman" or "scott" 49 | n_samples : int > 0 50 | Number of random samples to use 51 | adaptive : bool 52 | Whether to use adaptive-bandwidth KDE 53 | alpha : float 54 | Alpha parameter (used onl for adaptive-BW KDE) 55 | weight_adaptive_bw : bool 56 | Whether to apply weights to samples 57 | 58 | 59 | Raises 60 | ------ 61 | Exception if test fails 62 | 63 | """ 64 | # Translate inputs 65 | version = version.strip().lower() 66 | sampling_method = sampling_method.strip().lower() 67 | bw_method = bw_method.strip().lower() 68 | 69 | if version == "pykde": 70 | from .pykde import bootstrap_kde, gaussian_kde 71 | elif version == "cudakde": 72 | from .cudakde import bootstrap_kde, gaussian_kde 73 | else: 74 | raise ValueError('`version` must be one of "pykde" or "cudakde".') 75 | 76 | # Define a data model and generate some random data between 0 and 10 77 | # Number of trials 78 | n_samples = int(n_samples) 79 | 80 | # Exponential Model 81 | expec = lambda x: 1./(np.exp(-10)-1.)**2 * np.exp(-x) 82 | 83 | # Generated data and reweighted to the exponential model 84 | np.random.seed(0) 85 | if sampling_method == "uniform": 86 | # Uniformly-generated data and weights 87 | x1 = np.random.uniform(0, 10, n_samples) 88 | x1_weights = np.exp(-x1) 89 | elif sampling_method == "exponential": 90 | # Exponentially-generated data and weights 91 | x1 = np.random.exponential(2, n_samples) 92 | x1_weights = np.exp(-0.5*x1) 93 | else: 94 | raise ValueError('`sampling_method` must be one of "uniform" or' 95 | ' "exponential".') 96 | 97 | # Exponentially generated data (w/o weights) 98 | x2 = np.random.exponential(1, n_samples) 99 | 100 | # 101 | # Get histograms 102 | # 103 | 104 | # Define bins 105 | bins = np.linspace(0, 10, 31) 106 | 107 | # Weighted data 108 | hist_weights = np.histogram(x1, bins=bins, weights=x1_weights, 109 | density=True) 110 | 111 | # Exponential data 112 | hist_expo = np.histogram(x2, bins=bins, density=True) 113 | 114 | # 115 | # Get KDE kernels 116 | # 117 | 118 | # Kernels for weighted data (w/o adaptive kernels) 119 | kernel_weights = gaussian_kde(x1, weights=x1_weights, bw_method=bw_method) 120 | 121 | # Kernels for weighted data (with adaptive kernels) 122 | kernel_weights_adaptive = gaussian_kde( 123 | x1, weights=x1_weights, bw_method=bw_method, adaptive=adaptive, 124 | weight_adaptive_bw=weight_adaptive_bw, alpha=alpha 125 | ) 126 | 127 | # Kernels for exponential data (w/o adaptive kernels) 128 | kernel_expo = gaussian_kde(x2, bw_method=bw_method) 129 | 130 | # Kernels for exponential data (with adaptive kernels) 131 | kernel_expo_adaptive = gaussian_kde(x2, bw_method=bw_method, 132 | adaptive=adaptive, alpha=alpha) 133 | 134 | # 135 | # Plot histograms and KDEs 136 | # 137 | 138 | # Define evaluation points 139 | X = np.linspace(0, 10, 1001) 140 | 141 | # In presence of boundaries reflect the KDEs at the boundary 142 | 143 | # Define reflection range 144 | x_below = (-2., 0.) 145 | # Refelection only necessary if data is uniformly generated between [0,10] 146 | x_above = (10., 12.) 147 | 148 | # Define evaluation points beyond the boundaries (below 0 and above 10) 149 | mask_below = (X <= (x_below[1]-(x_below[0]-x_below[1]))) 150 | X_below = x_below[1] - (X[mask_below] - x_below[1]) 151 | 152 | mask_above = (X >= (x_above[0]-(x_above[1]-x_above[0]))) 153 | X_above = x_above[0] + (x_above[0] - X[mask_above]) 154 | 155 | Y_weights = kernel_weights(X) 156 | Y_weights[mask_below] += kernel_weights(X_below) 157 | if sampling_method == "uniform": 158 | Y_weights[mask_above] += kernel_weights(X_above) 159 | 160 | Y_weights_adaptive = kernel_weights_adaptive(X) 161 | Y_weights_adaptive[mask_below] += kernel_weights_adaptive(X_below) 162 | if sampling_method == "uniform": 163 | Y_weights_adaptive[mask_above] += kernel_weights_adaptive(X_above) 164 | 165 | # 166 | # Plots for exponential data 167 | # 168 | 169 | Y_expo = kernel_expo(X) 170 | Y_expo[mask_below] += kernel_expo(X_below) 171 | 172 | Y_expo_adaptive = kernel_expo_adaptive(X) 173 | Y_expo_adaptive[mask_below] += kernel_expo_adaptive(X_below) 174 | 175 | # 176 | # For an error estimate on an evaluation point use bootstrapping 177 | # 178 | 179 | # Define the number of bootstrap iterations 180 | nbootstraps = 1000 181 | 182 | # 183 | # Get bootstrapped KDE kernels (settings as set above) 184 | # 185 | 186 | # Kernels for weighted data (w/o adaptive kernels) 187 | bootstrap_kernel_weights = bootstrap_kde(x1, weights=x1_weights, 188 | bw_method=bw_method, 189 | niter=nbootstraps) 190 | 191 | # Kernels for weighted data (with adaptive kernels) 192 | bootstrap_kernel_weights_adaptive = bootstrap_kde( 193 | x1, weights=x1_weights, bw_method=bw_method, adaptive=adaptive, 194 | weight_adaptive_bw=weight_adaptive_bw, alpha=alpha, niter=nbootstraps 195 | ) 196 | 197 | # Kernels for exponential data (w/o adaptive kernels) 198 | bootstrap_kernel_expo = bootstrap_kde(x2, bw_method=bw_method, 199 | niter=nbootstraps) 200 | 201 | # Kernels for exponential data (with adaptive kernels) 202 | bootstrap_kernel_expo_adaptive = bootstrap_kde(x2, bw_method=bw_method, 203 | adaptive=adaptive, 204 | alpha=alpha, 205 | niter=nbootstraps) 206 | 207 | # Plots using reflection and bootstrapping 208 | 209 | # Plots for weighted data 210 | 211 | Y_weights = bootstrap_kernel_weights(X) 212 | Y_weights_below = bootstrap_kernel_weights(X_below) 213 | Y_weights_above = bootstrap_kernel_weights(X_above) 214 | 215 | Y_weights[0][mask_below] += Y_weights_below[0] 216 | Y_weights[1][mask_below] = np.sqrt( 217 | Y_weights[1][mask_below]**2 + Y_weights_below[1]**2 218 | ) 219 | if sampling_method == "uniform": 220 | Y_weights[0][mask_above] += Y_weights_above[0] 221 | Y_weights[1][mask_above] = np.sqrt( 222 | Y_weights[1][mask_above]**2 + Y_weights_above[1]**2 223 | ) 224 | 225 | Y_weights_adaptive = bootstrap_kernel_weights_adaptive(X) 226 | Y_weights_adaptive_below = bootstrap_kernel_weights_adaptive(X_below) 227 | Y_weights_adaptive_above = bootstrap_kernel_weights_adaptive(X_above) 228 | 229 | Y_weights_adaptive[0][mask_below] += Y_weights_adaptive_below[0] 230 | Y_weights_adaptive[1][mask_below] = np.sqrt( 231 | Y_weights_adaptive[1][mask_below]**2 + Y_weights_adaptive_below[1]**2 232 | ) 233 | if sampling_method == "uniform": 234 | Y_weights_adaptive[0][mask_above] += Y_weights_adaptive_above[0] 235 | Y_weights_adaptive[1][mask_above] = np.sqrt( 236 | Y_weights_adaptive[1][mask_above]**2 237 | + Y_weights_adaptive_above[1]**2 238 | ) 239 | 240 | Y_expo = bootstrap_kernel_expo(X) 241 | Y_expo_below = bootstrap_kernel_expo(X_below) 242 | Y_expo_above = bootstrap_kernel_expo(X_above) 243 | 244 | Y_expo[0][mask_below] += Y_expo_below[0] 245 | Y_expo[1][mask_below] = np.sqrt( 246 | Y_expo[1][mask_below]**2 + Y_expo_below[1]**2 247 | ) 248 | 249 | Y_expo_adaptive = bootstrap_kernel_expo_adaptive(X) 250 | Y_expo_adaptive_below = bootstrap_kernel_expo_adaptive(X_below) 251 | Y_expo_adaptive_above = bootstrap_kernel_expo_adaptive(X_above) 252 | 253 | Y_expo_adaptive[0][mask_below] += Y_expo_adaptive_below[0] 254 | Y_expo_adaptive[1][mask_below] = np.sqrt( 255 | Y_expo_adaptive[1][mask_below]**2 + Y_expo_adaptive_below[1]**2 256 | ) 257 | 258 | 259 | def main(): 260 | """Main""" 261 | test_kde(version='pykde', 262 | sampling_method='exponential', 263 | bw_method='silverman', 264 | n_samples=100, 265 | adaptive=True, 266 | alpha=0.3, 267 | weight_adaptive_bw=False) 268 | print("<< test_kde.py / pykde : PASS >>") 269 | test_kde(version='cudakde', 270 | sampling_method='exponential', 271 | bw_method='silverman', 272 | n_samples=100, 273 | adaptive=True, 274 | alpha=0.3, 275 | weight_adaptive_bw=True) 276 | print("<< test_kde.py / cudakde : PASS >>") 277 | 278 | 279 | if __name__ == "__main__": 280 | main() 281 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | 4 | from distutils.core import setup, Extension 5 | 6 | 7 | if __name__ == "__main__": 8 | ckde = Extension( 9 | name='kde.kde', 10 | sources=['kde/kde.c'], 11 | extra_compile_args=['-Wall', '-O3', '-fPIC', '-Werror'] 12 | ) 13 | 14 | setup( 15 | name='kde', 16 | version='0.1', 17 | description=('Multi-dimensional Kernel Density Estimation (KDE)' 18 | ' including adaptive bandwidths and C and' 19 | ' CUDA implementations for specific cases.'), 20 | author='Sebastian Schoenen, Martin Leuermann', 21 | author_email='schoenen@physik.rwth-aachen.de', 22 | url='https://github.com/icecubeopensource/kde', 23 | install_requires=[ 24 | 'numexpr', 25 | 'numpy', 26 | 'scipy', 27 | ], 28 | extras_require={'cuda': ['pycuda']}, 29 | ext_modules=[ckde], 30 | packages=['kde'], 31 | entry_points={ 32 | 'console_scripts': ['test_kde.py = kde.test_kde:main'] 33 | } 34 | ) 35 | --------------------------------------------------------------------------------