├── .gitignore ├── MANIFEST ├── adaptivekde ├── __init__.py ├── sshist.py ├── sskernel.py └── ssvkernel.py ├── setup.py ├── README.txt └── LICENSE.txt /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | *.pyc 3 | adaptivekde.egg-info/ 4 | dist/ 5 | -------------------------------------------------------------------------------- /MANIFEST: -------------------------------------------------------------------------------- 1 | # file GENERATED by distutils, do NOT edit 2 | README.txt 3 | setup.py 4 | adaptivekde/__init__.py 5 | adaptivekde/sshist.py 6 | adaptivekde/sskernel.py 7 | adaptivekde/ssvkernel.py 8 | -------------------------------------------------------------------------------- /adaptivekde/__init__.py: -------------------------------------------------------------------------------- 1 | from .sshist import sshist 2 | from .sskernel import sskernel 3 | from .ssvkernel import ssvkernel 4 | 5 | __version__ = '1.0.0' 6 | 7 | __all__ = ('sshist', 8 | 'sskernel', 9 | 'ssvkernel') 10 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | setup( 4 | name = "adaptivekde", 5 | packages = [ "adaptivekde" ], 6 | version = "1.1.0", 7 | description = 'Optimal fixed or locally adaptive kernel density estimation', 8 | long_description=open('README.txt').read(), 9 | long_description_content_type='text/markdown', 10 | author = "Lee A.D. Cooper", 11 | author_email = 'cooperle@gmail.com', 12 | url = 'https://github.com/cooperlab/AdaptiveKDE', 13 | classifiers = ['Development Status :: 5 - Production/Stable', 14 | 'Environment :: Console', 15 | 'License :: OSI Approved :: Apache Software License', 16 | 'Operating System :: OS Independent', 17 | 'Programming Language :: Python :: 3', 18 | 'Topic :: Scientific/Engineering :: Artificial Intelligence', 19 | 'Topic :: Software Development :: Libraries :: Python Modules'], 20 | ) 21 | -------------------------------------------------------------------------------- /README.txt: -------------------------------------------------------------------------------- 1 | This package implements adaptive kernel density estimation algorithms for 1-dimensional 2 | signals developed by Hideaki Shimazaki. This enables the generation of smoothed histograms 3 | that preserve important density features at multiple scales, as opposed to naive 4 | single-bandwidth kernel density methods that can either over or under smooth density 5 | estimates. These methods are described in Shimazaki's paper: 6 | 7 | H. Shimazaki and S. Shinomoto, "Kernel Bandwidth Optimization in Spike Rate Estimation," 8 | in Journal of Computational Neuroscience 29(1-2): 171–182, 2010 9 | http://dx.doi.org/10.1007/s10827-009-0180-4. 10 | 11 | License: 12 | All software in this package is licensed under the Apache License 2.0. 13 | See LICENSE.txt for more details. 14 | 15 | Authors: 16 | Hideaki Shimazaki (shimazaki.hideaki.8x@kyoto-u.jp) shimazaki on Github 17 | Lee A.D. Cooper (cooperle@gmail.com) cooperlab on GitHub 18 | Subhasis Ray (ray.subhasis@gmail.com) 19 | 20 | Three methods are implemented in this package: 21 | 1. sshist - can be used to determine the optimal number of histogram bins for independent 22 | identically distributed samples from an underlying one-dimensional distribution. The 23 | principal here is to minimize the L2 norm of the difference between the histogram and the 24 | underlying distribution. 25 | 26 | 2. sskernel - implements kernel density estimation with a single globally-optimized 27 | bandwidth. 28 | 29 | 3. ssvkernel - implements kernel density estimation with a locally variable bandwidth. 30 | 31 | Dependencies: These functions in this package depend on NumPy for various operations 32 | including fast-fourier transforms and histogram generation. -------------------------------------------------------------------------------- /adaptivekde/sshist.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def sshist(x, N=range(2, 501), SN=30): 5 | """ 6 | Returns the optimal number of bins in a histogram used for density 7 | estimation. 8 | 9 | Optimization principle is to minimize expected L2 loss function between 10 | the histogram and an unknown underlying density function. 11 | An assumption made is merely that samples are drawn from the density 12 | independently each other. 13 | 14 | The optimal binwidth D* is obtained as a minimizer of the formula, 15 | (2K-V) / D^2, 16 | where K and V are mean and variance of sample counts across bins with width 17 | D. Optimal number of bins is given as (max(x) - min(x)) / D. 18 | 19 | Parameters 20 | ---------- 21 | x : array_like 22 | One-dimensional data to fit histogram to. 23 | N : array_like, optional 24 | Array containing number of histogram bins to evaluate for fit. 25 | Default value = 500. 26 | SN : double, optional 27 | Scalar natural number defining number of bins for shift-averaging. 28 | 29 | Returns 30 | ------- 31 | optN : int 32 | Optimal number of bins to represent the data in X 33 | optD : double 34 | Optimal width of bins 35 | edges : array_like 36 | Edges of optimized bins 37 | N : double 38 | Maximum number of bins to be evaluated. Default value = 500. 39 | C : array_like 40 | Cost function C[i] of evaluating histogram fit with N[i] bins 41 | 42 | See Also 43 | -------- 44 | sskernel, ssvkernel 45 | 46 | References 47 | ---------- 48 | .. [1] H. Shimazaki and S. Shinomoto, "A method for selecting the bin size 49 | of a time histogram," in Neural Computation 19(6), 1503-1527, 2007 50 | http://dx.doi.org/10.1162/neco.2007.19.6.1503 51 | """ 52 | 53 | # determine range of input 'x' 54 | x_min = np.min(x) 55 | x_max = np.max(x) 56 | 57 | # get smallest difference 'dx' between all pairwise samples 58 | buf = np.abs(np.diff(np.sort(x))) 59 | dx = min(buf[buf > 0]) 60 | 61 | # setup bins to evaluate 62 | N_MIN = 2 63 | N_MAX = min(np.floor((x_max - x_min) / (2*dx)), max(N)) 64 | N = range(N_MIN, N_MAX+1) 65 | D = (x_max - x_min) / N 66 | 67 | # compute cost function over each possible number of bins 68 | Cs = np.zeros((len(N), SN)) 69 | for i, n in enumerate(N): # loop over number of bins 70 | shift = np.linspace(0, D[i], SN) 71 | for p, sh in enumerate(shift): # loop over shift window positions 72 | 73 | # define bin edges 74 | edges = np.linspace(x_min + sh - D[i]/2, 75 | x_max + sh - D[i]/2, N[i]+1) 76 | 77 | # count number of events in these bins 78 | ki = np.histogram(x, edges) 79 | 80 | # get mean and variance of events 81 | k = ki[0].mean() 82 | v = np.sum((ki[0] - k)**2) / N[i] 83 | 84 | Cs[i, p] = (2*k - v) / D[i]**2 85 | 86 | # average over shift window 87 | C = Cs.mean(axis=1) 88 | 89 | # get bin count that minimizes cost C 90 | idx = np.argmin(C) 91 | optN = N[idx] 92 | optD = D[idx] 93 | edges = np.linspace(x_min, x_max, optN) 94 | 95 | return optN, optD, edges, C, N 96 | -------------------------------------------------------------------------------- /adaptivekde/sskernel.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def sskernel(x, tin=None, W=None, nbs=1000): 5 | """ 6 | Generates a kernel density estimate with globally-optimized bandwidth. 7 | 8 | The optimal bandwidth is obtained as a minimizer of the formula, sum_{i,j} 9 | \int k(x - x_i) k(x - x_j) dx - 2 sum_{i~=j} k(x_i - x_j), where k(x) is 10 | the kernel function. 11 | 12 | 13 | Parameters 14 | ---------- 15 | x : array_like 16 | The one-dimensional samples drawn from the underlying density 17 | tin : array_like, optional 18 | The values where the density estimate is to be evaluated in generating 19 | the output 'y'. 20 | W : array_like, optional 21 | The kernel bandwidths to use in optimization. Should not be chosen 22 | smaller than the sampling resolution of 'x'. 23 | nbs : int, optional 24 | The number of bootstrap samples to use in estimating the [0.05, 0.95] 25 | confidence interval of the output 'y' 26 | 27 | Returns 28 | ------- 29 | y : array_like 30 | The estimated density, evaluated at points t / tin. 31 | t : array_like 32 | The points where the density estimate 'y' is evaluated. 33 | optw : double 34 | The optimal global kernel bandwidth. 35 | W : array_like 36 | The kernel bandwidths evaluated during optimization. 37 | C : array_like 38 | The cost functions associated with the bandwidths 'W'. 39 | confb95 : array_like 40 | The 5% and 95% confidence interval of the kernel density estimate 'y'. 41 | Has dimensions 2 x len(y). confb95[0,:] corresponds to the 5% interval, 42 | and confb95[1,:] corresponds to the 95% interval. 43 | yb : array_like 44 | The bootstrap samples used in estimating confb95. Each row corresponds 45 | to one bootstrap sample. 46 | 47 | See Also 48 | -------- 49 | sshist, ssvkernel 50 | 51 | References 52 | ---------- 53 | .. [1] H. Shimazaki and S. Shinomoto, "Kernel Bandwidth Optimization in 54 | Spike Rate Estimation," in Journal of Computational Neuroscience 55 | 29(1-2): 171–182, 2010 http://dx.doi.org/10.1007/s10827-009-0180-4 56 | """ 57 | 58 | # set argument 't' if not provided 59 | if tin is None: 60 | T = np.max(x) - np.min(x) 61 | dx = np.sort(np.diff(np.sort(x))) 62 | dt_samp = dx[np.nonzero(dx)][0] 63 | tin = np.linspace(np.min(x), np.max(x), int(min(np.ceil(T / dt_samp), 1e3))) 64 | t = tin 65 | x_ab = x[(x >= min(tin)) & (x <= max(tin))] 66 | else: 67 | T = np.max(x) - np.min(x) 68 | x_ab = x[(x >= min(tin)) & (x <= max(tin))] 69 | dx = np.sort(np.diff(np.sort(x))) 70 | dt_samp = dx[np.nonzero(dx)][0] 71 | if dt_samp > min(np.diff(tin)): 72 | t = np.linspace(min(tin), max(tin), int(min(np.ceil(T / dt_samp), 1e3))) 73 | else: 74 | t = tin 75 | 76 | # calculate delta t 77 | dt = min(np.diff(t)) 78 | 79 | # create the finest histogram 80 | thist = np.concatenate((t, (t[-1]+dt)[np.newaxis])) 81 | y_hist = np.histogram(x_ab, thist-dt/2)[0] 82 | N = sum(y_hist).astype(np.float) 83 | y_hist = y_hist / N / dt 84 | 85 | # global search if input 'W' is defined 86 | if W is not None: 87 | C = np.zeros((1, len(W))) 88 | C_min = np.Inf 89 | for k, w in enumerate(W): 90 | C[k], yh = CostFunction(y_hist, N, w, dt) 91 | if(C[k] < C_min): 92 | C_min = C[k] 93 | optw = w 94 | y = yh 95 | else: # optimized search using golden section 96 | k = 0 97 | C = np.zeros((20, 1)) 98 | W = np.zeros((20, 1)) 99 | Wmin = 2*dt 100 | Wmax = (np.max(x) - np.min(x)) 101 | tol = 10e-5 102 | phi = (5**0.5 + 1) / 2 103 | a = ilogexp(Wmin) 104 | b = ilogexp(Wmax) 105 | c1 = (phi - 1) * a + (2 - phi) * b 106 | c2 = (2 - phi) * a + (phi - 1) * b 107 | f1, dummy = CostFunction(y_hist, N, logexp(c1), dt) 108 | f2, dummy = CostFunction(y_hist, N, logexp(c2), dt) 109 | while (np.abs(b-a) > tol * (np.abs(c1) + np.abs(c2))) & (k < 20): 110 | if f1 < f2: 111 | b = c2 112 | c2 = c1 113 | c1 = (phi - 1) * a + (2 - phi) * b 114 | f2 = f1 115 | f1, yh1 = CostFunction(y_hist, N, logexp(c1), dt) 116 | W[k] = logexp(c1) 117 | C[k] = f1 118 | optw = logexp(c1) 119 | y = yh1 / np.sum(yh1 * dt) 120 | else: 121 | a = c1 122 | c1 = c2 123 | c2 = (2 - phi) * a + (phi - 1) * b 124 | f1 = f2 125 | f2, yh2 = CostFunction(y_hist, N, logexp(c2), dt) 126 | W[k] = logexp(c2) 127 | C[k] = f2 128 | optw = logexp(c2) 129 | y = yh2 / np.sum(yh2 * dt) 130 | 131 | # increment iteration counter 132 | k = k + 1 133 | 134 | # discard unused entries in gs, C 135 | C = C[0:k] 136 | W = W[0:k] 137 | 138 | # estimate confidence intervals by bootstrapping 139 | nbs = np.asarray(nbs) 140 | yb = np.zeros((nbs, len(tin))) 141 | for i in range(nbs): 142 | idx = np.random.randint(0, len(x_ab)-1, len(x_ab)) 143 | xb = x_ab[idx] 144 | thist = np.concatenate((t, (t[-1]+dt)[np.newaxis])) 145 | y_histb = np.histogram(xb, thist - dt / 2)[0] / dt / N 146 | yb_buf = fftkernel(y_histb, optw / dt) 147 | yb_buf = yb_buf / np.sum(yb_buf * dt) 148 | yb[i, ] = np.interp(tin, t, yb_buf) 149 | ybsort = np.sort(yb, axis=0) 150 | y95b = ybsort[np.int(np.floor(0.05 * nbs)), :] 151 | y95u = ybsort[np.int(np.floor(0.95 * nbs)), :] 152 | confb95 = np.concatenate((y95b[np.newaxis], y95u[np.newaxis]), axis=0) 153 | 154 | # return outputs 155 | y = np.interp(tin, t, y) 156 | t = tin 157 | 158 | return y, t, optw, W, C, confb95, yb 159 | 160 | 161 | def CostFunction(y_hist, N, w, dt): 162 | 163 | # build normal smoothing kernel 164 | yh = fftkernel(y_hist, w / dt) 165 | 166 | # formula for density 167 | C = np.sum(yh**2) * dt - 2 * np.sum(yh * y_hist) * dt + 2 \ 168 | / (2 * np.pi)**0.5 / w / N 169 | C = C * N**2 170 | 171 | return C, yh 172 | 173 | 174 | def fftkernel(x, w): 175 | 176 | L = x.size 177 | Lmax = L + 3 * w 178 | n = 2 ** np.ceil(np.log2(Lmax)) 179 | 180 | X = np.fft.fft(x, n.astype(np.int)) 181 | 182 | f = np.linspace(0, n-1, n.astype(np.int)) / n 183 | f = np.concatenate((-f[0: np.int(n / 2 + 1)], 184 | f[1: np.int(n / 2 - 1 + 1)][::-1])) 185 | 186 | K = np.exp(-0.5 * (w * 2 * np.pi * f) ** 2) 187 | 188 | y = np.real(np.fft.ifft(X * K, n)) 189 | 190 | y = y[0:L] 191 | 192 | return y 193 | 194 | 195 | def logexp(x): 196 | if x < 1e2: 197 | y = np.log(1 + np.exp(x)) 198 | else: 199 | y = x 200 | return y 201 | 202 | 203 | def ilogexp(x): 204 | # ilogexp = log(exp(x)-1); 205 | if x < 1e2: 206 | y = np.log(np.exp(x) - 1) 207 | else: 208 | y = x 209 | return y 210 | -------------------------------------------------------------------------------- /adaptivekde/ssvkernel.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def ssvkernel(x, tin=None, M=80, nbs=100, WinFunc='Boxcar'): 5 | """ 6 | Generates a locally adaptive kernel-density estimate for one-dimensional 7 | data. 8 | 9 | The user provides a one-dimensional vector of samples drawn from some 10 | underlying unknown distribution, and optionally the values where they want 11 | to estimate the probability density of that distribution. The algorithm 12 | solves an optimization problem to identify variable bandwidths across the 13 | domain where the data is provided. 14 | 15 | The optimization is based on a principle of minimizing expected L2 loss 16 | function between the kernel estimate and an unknown underlying density 17 | function. An assumption is merely that samples are drawn from the density 18 | independently of each other. 19 | 20 | The locally adaptive bandwidth is obtained by iteratively computing optimal 21 | fixed-size bandwidths wihtihn local intervals. The optimal bandwidths are 22 | selected such that they are selected in the intervals that are gamma times 23 | larger than the optimal bandwidths themselves. The paramter gamma is 24 | optimized by minimizing the L2 risk estimate. 25 | 26 | Parameters 27 | ---------- 28 | x : array_like 29 | The one-dimensional samples drawn from the underlying density 30 | tin : array_like, optional 31 | The values where the density estimate is to be evaluated in generating 32 | the output 'y'. Default value = None. 33 | M : int, optional 34 | The number of window sizes to evaluate. Default value = 80. 35 | nbs : int, optional 36 | The number of bootstrap samples to use in estimating the [0.05, 0.95] 37 | confidence interval of the output 'y'. 38 | WinFunc : string, optional 39 | The type of window function to use in estimating local bandwidth. 40 | Choose from one of 'Boxcar', 'Laplace', 'Cauchy' and 'Gauss'. Default 41 | value = 'Gauss'. 42 | 43 | Returns 44 | ------- 45 | y : array_like 46 | The estimated density, evaluated at points t / tin. 47 | t : array_like 48 | The points where the density estimate 'y' is evaluated. 49 | optw : array_like 50 | The optimal local kernel bandwidths at 't'. 51 | gs : array_like 52 | The stiffness constants of the variables bandwidths evaluated. 53 | C : array_like 54 | Cost functions associated with stiffness constraints. 55 | confb95 : array_like 56 | The 5% and 95% confidence interval of the kernel density estimate 'y'. 57 | Has dimensions 2 x len(y). confb95[0,:] corresponds to the 5% interval, 58 | and confb95[1,:] corresponds to the 95% interval. 59 | yb : array_like 60 | The bootstrap samples used in estimating confb95. Each row corresponds 61 | to one bootstrap sample. 62 | 63 | See Also 64 | -------- 65 | sshist, sskernel 66 | 67 | References 68 | ---------- 69 | .. [1] H. Shimazaki and S. Shinomoto, "Kernel Bandwidth Optimization in 70 | Spike Rate Estimation," in Journal of Computational Neuroscience 71 | 29(1-2): 171–182, 2010 http://dx.doi.org/10.1007/s10827-009-0180-4 72 | """ 73 | 74 | # set argument 't' if not provided 75 | if tin is None: 76 | T = np.max(x) - np.min(x) 77 | dx = np.sort(np.diff(np.sort(x))) 78 | dt_samp = dx[np.nonzero(dx)][0] 79 | tin = np.linspace(np.min(x), np.max(x), int(min(np.ceil(T / dt_samp), 1e3))) 80 | t = tin 81 | x_ab = x[(x >= min(tin)) & (x <= max(tin))] 82 | else: 83 | T = np.max(x) - np.min(x) 84 | x_ab = x[(x >= min(tin)) & (x <= max(tin))] 85 | dx = np.sort(np.diff(np.sort(x))) 86 | dt_samp = dx[np.nonzero(dx)][0] 87 | if dt_samp > min(np.diff(tin)): 88 | t = np.linspace(min(tin), max(tin), int(min(np.ceil(T / dt_samp), 1e3))) 89 | else: 90 | t = tin 91 | 92 | # calculate delta t 93 | dt = min(np.diff(t)) 94 | 95 | # create the finest histogram 96 | thist = np.concatenate((t, (t[-1]+dt)[np.newaxis])) 97 | y_hist = np.histogram(x_ab, thist-dt/2)[0] / dt 98 | L = y_hist.size 99 | N = sum(y_hist * dt).astype(np.float) 100 | 101 | # initialize window sizes 102 | W = logexp(np.linspace(ilogexp(5 * dt), ilogexp(T), M)) 103 | 104 | # compute local cost functions 105 | c = np.zeros((M, L)) 106 | for j in range(M): 107 | w = W[j] 108 | yh = fftkernel(y_hist, w / dt) 109 | c[j, :] = yh**2 - 2 * yh * y_hist + 2 / (2 * np.pi)**0.5 / w * y_hist 110 | 111 | # initialize optimal ws 112 | optws = np.zeros((M, L)) 113 | for i in range(M): 114 | Win = W[i] 115 | C_local = np.zeros((M, L)) 116 | for j in range(M): 117 | C_local[j, :] = fftkernelWin(c[j, :], Win / dt, WinFunc) 118 | n = np.argmin(C_local, axis=0) 119 | optws[i, :] = W[n] 120 | 121 | # golden section search for stiffness parameter of variable bandwidths 122 | k = 0 123 | gs = np.zeros((30, 1)) 124 | C = np.zeros((30, 1)) 125 | tol = 1e-5 126 | a = 1e-12 127 | b = 1 128 | phi = (5**0.5 + 1) / 2 129 | c1 = (phi - 1) * a + (2 - phi) * b 130 | c2 = (2 - phi) * a + (phi - 1) * b 131 | f1 = CostFunction(y_hist, N, t, dt, optws, W, WinFunc, c1)[0] 132 | f2 = CostFunction(y_hist, N, t, dt, optws, W, WinFunc, c2)[0] 133 | while (np.abs(b-a) > tol * (abs(c1) + abs(c2))) & (k < 30): 134 | if f1 < f2: 135 | b = c2 136 | c2 = c1 137 | c1 = (phi - 1) * a + (2 - phi) * b 138 | f2 = f1 139 | f1, yv1, optwp1 = CostFunction(y_hist, N, t, dt, optws, W, 140 | WinFunc, c1) 141 | yopt = yv1 / np.sum(yv1 * dt) 142 | optw = optwp1 143 | else: 144 | a = c1 145 | c1 = c2 146 | c2 = (2 - phi) * a + (phi - 1) * b 147 | f1 = f2 148 | f2, yv2, optwp2 = CostFunction(y_hist, N, t, dt, optws, W, 149 | WinFunc, c2) 150 | yopt = yv2 / np.sum(yv2 * dt) 151 | optw = optwp2 152 | 153 | # capture estimates and increment iteration counter 154 | gs[k] = c1 155 | C[k] = f1 156 | k = k + 1 157 | 158 | # discard unused entries in gs, C 159 | gs = gs[0:k] 160 | C = C[0:k] 161 | 162 | # estimate confidence intervals by bootstrapping 163 | nbs = np.asarray(nbs) 164 | yb = np.zeros((nbs, tin.size)) 165 | for i in range(nbs): 166 | Nb = np.random.poisson(lam=N) 167 | idx = np.random.randint(0, N, Nb) 168 | xb = x_ab[idx] 169 | thist = np.concatenate((t, (t[-1]+dt)[np.newaxis])) 170 | y_histb = np.histogram(xb, thist - dt / 2)[0] 171 | idx = y_histb.nonzero() 172 | y_histb_nz = y_histb[idx] 173 | t_nz = t[idx] 174 | yb_buf = np.zeros((L, )) 175 | for k in range(L): 176 | yb_buf[k] = np.sum(y_histb_nz * Gauss(t[k] - t_nz, optw[k])) / Nb 177 | yb_buf = yb_buf / np.sum(yb_buf * dt) 178 | yb[i, :] = np.interp(tin, t, yb_buf) 179 | ybsort = np.sort(yb, axis=0) 180 | y95b = ybsort[np.int(np.floor(0.05 * nbs)), :] 181 | y95u = ybsort[np.int(np.floor(0.95 * nbs)), :] 182 | confb95 = np.concatenate((y95b[np.newaxis], y95u[np.newaxis]), axis=0) 183 | 184 | # return outputs 185 | y = np.interp(tin, t, yopt) 186 | optw = np.interp(tin, t, optw) 187 | t = tin 188 | 189 | return y, t, optw, gs, C, confb95, yb 190 | 191 | 192 | def CostFunction(y_hist, N, t, dt, optws, WIN, WinFunc, g): 193 | 194 | L = y_hist.size 195 | optwv = np.zeros((L, )) 196 | for k in range(L): 197 | gs = optws[:, k] / WIN 198 | if g > np.max(gs): 199 | optwv[k] = np.min(WIN) 200 | else: 201 | if g < min(gs): 202 | optwv[k] = np.max(WIN) 203 | else: 204 | idx = np.max(np.nonzero(gs >= g)) 205 | optwv[k] = g * WIN[idx] 206 | 207 | # Nadaraya-Watson kernel regression 208 | optwp = np.zeros((L, )) 209 | for k in range(L): 210 | if WinFunc == 'Boxcar': 211 | Z = Boxcar(t[k]-t, optwv / g) 212 | elif WinFunc == 'Laplace': 213 | Z = Laplace(t[k]-t, optwv / g) 214 | elif WinFunc == 'Cauchy': 215 | Z = Cauchy(t[k]-t, optwv / g) 216 | else: # WinFunc == 'Gauss' 217 | Z = Gauss(t[k]-t, optwv / g) 218 | optwp[k] = np.sum(optwv * Z) / np.sum(Z) 219 | 220 | # speed-optimized baloon estimator 221 | idx = y_hist.nonzero() 222 | y_hist_nz = y_hist[idx] 223 | t_nz = t[idx] 224 | yv = np.zeros((L, )) 225 | for k in range(L): 226 | yv[k] = np.sum(y_hist_nz * dt * Gauss(t[k]-t_nz, optwp[k])) 227 | yv = yv * N / np.sum(yv * dt) 228 | 229 | # cost function of estimated kernel 230 | cg = yv**2 - 2 * yv * y_hist + 2 / (2 * np.pi)**0.5 / optwp * y_hist 231 | Cg = np.sum(cg * dt) 232 | 233 | return Cg, yv, optwp 234 | 235 | 236 | def fftkernel(x, w): 237 | # forward padded transform 238 | L = x.size 239 | Lmax = L + 3 * w 240 | n = 2 ** np.ceil(np.log2(Lmax)) 241 | X = np.fft.fft(x, n.astype(np.int)) 242 | 243 | # generate kernel domain 244 | f = np.linspace(0, n-1, n.astype(np.int)) / n 245 | f = np.concatenate((-f[0: np.int(n / 2 + 1)], 246 | f[1: np.int(n / 2 - 1 + 1)][::-1])) 247 | 248 | # evaluate kernel 249 | K = np.exp(-0.5 * (w * 2 * np.pi * f) ** 2) 250 | 251 | # convolve and transform back from frequency domain 252 | y = np.real(np.fft.ifft(X * K, n)) 253 | y = y[0:L] 254 | 255 | return y 256 | 257 | 258 | def fftkernelWin(x, w, WinFunc): 259 | # forward padded transform 260 | L = x.size 261 | Lmax = L + 3 * w 262 | n = 2 ** np.ceil(np.log2(Lmax)) 263 | X = np.fft.fft(x, n.astype(np.int)) 264 | 265 | # generate kernel domain 266 | 267 | f = np.linspace(0, n-1, n.astype(np.int)) / n 268 | f = np.concatenate((-f[0: np.int(n / 2 + 1)], 269 | f[1: np.int(n / 2 - 1 + 1)][::-1])) 270 | t = 2 * np.pi * f 271 | 272 | # determine window function - evaluate kernel 273 | if WinFunc == 'Boxcar': 274 | a = 12**0.5 * w 275 | K = np.zeros(len(t)) 276 | K[0] = 1 277 | K[1:] = 2 * np.sin(a * t[1:] / 2) / (a * t[1:]) 278 | elif WinFunc == 'Laplace': 279 | K = 1 / (1 + (w * 2 * np.pi * f)**2 / 2) 280 | elif WinFunc == 'Cauchy': 281 | K = np.exp(-w * np.abs(2 * np.pi * f)) 282 | else: # WinFunc == 'Gauss' 283 | K = np.exp(-0.5 * (w * 2 * np.pi * f)**2) 284 | 285 | # convolve and transform back from frequency domain 286 | y = np.real(np.fft.ifft(X * K, n)) 287 | y = y[0:L] 288 | 289 | return y 290 | 291 | 292 | def Gauss(x, w): 293 | y = 1 / (2 * np.pi)**2 / w * np.exp(-x**2 / 2 / w**2) 294 | return y 295 | 296 | 297 | def Laplace(x, w): 298 | y = 1 / 2**0.5 / w * np.exp(-(2**0.5) / w * np.abs(x)) 299 | return y 300 | 301 | 302 | def Cauchy(x, w): 303 | y = 1 / (np.pi * w * (1 + (x / w)**2)) 304 | return y 305 | 306 | 307 | def Boxcar(x, w): 308 | a = 12**0.5 * w 309 | y = 1 / a 310 | y[np.abs(x) > a / 2] = 0 311 | return y 312 | 313 | 314 | def logexp(x): 315 | y = np.zeros(x.shape) 316 | y[x < 1e2] = np.log(1+np.exp(x[x < 1e2])) 317 | y[x >= 1e2] = x[x >= 1e2] 318 | return y 319 | 320 | 321 | def ilogexp(x): 322 | y = np.zeros(x.shape) 323 | y[x < 1e2] = np.log(np.exp(x[x < 1e2]) - 1) 324 | y[x >= 1e2] = x[x >= 1e2] 325 | return y 326 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright 2016 Hideaki Shimazaki 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. --------------------------------------------------------------------------------