├── .gitignore ├── README.md ├── examples ├── __init__.py ├── bayesianModel.ipynb ├── gaussNewton_chemical.py ├── registerGrowthData.ipynb ├── registration.py └── registrationByLocalRegression.ipynb ├── pyFDA ├── __init__.py ├── bspline.py ├── function.py ├── gaussNewton.py ├── register.py └── register │ ├── __init__.py │ └── localRegression.py └── requirements.txt /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # pyFDA 2 | 3 | Functional data analysis in python. 4 | 5 | ## References 6 | * Functional Data Analysis, Ramsay and Silverman, 2005 7 | -------------------------------------------------------------------------------- /examples/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ptonner/pyFDA/c8769fd6958d0f41f98f3d4ccd0384ecf873804b/examples/__init__.py -------------------------------------------------------------------------------- /examples/gaussNewton_chemical.py: -------------------------------------------------------------------------------- 1 | """ 2 | Example of Gauss Newton method on chemical production rate equation. 3 | 4 | Data and details are from the Gauss Netwon wikipedia page. 5 | 6 | Author: Peter Tonner 7 | """ 8 | 9 | import numpy as np 10 | import matplotlib.pyplot as plt 11 | from pyFDA.gaussNewton import GaussNewton 12 | 13 | S = np.array([0.038,0.194,.425,.626,1.253,2.500,3.740])[:,None] 14 | rate = np.array([0.050,0.127,0.094,0.2122,0.2729,0.2665,0.3317]) 15 | 16 | def f(x,theta): 17 | return (theta[0]*x)/(x+theta[1]) 18 | 19 | def resid(y,x,theta): 20 | return y - theta[0]*x/(x+theta[1]) 21 | 22 | def partial1(x,theta): 23 | return x/(theta[1]+x) 24 | 25 | def partial2(x,theta): 26 | return -theta[0]*x/(theta[1]+x)**2 27 | 28 | thetaInit = np.array([.1,.6]) 29 | xpred = np.linspace(.1,4) 30 | yhatInit = f(xpred,thetaInit) 31 | 32 | gn = GaussNewton(rate,S,thetaInit,resid,[partial1,partial2]) 33 | print "Initial error: %.5lf" % gn.SRS() 34 | 35 | for i in range(5): 36 | gn.run() 37 | print "Iteration %d error: %.5lf" % (i,gn.SRS()) 38 | 39 | yhatFinal = f(xpred,gn.thetaCurrent) 40 | print gn.thetaCurrent 41 | 42 | plt.plot(xpred,yhatInit,label="initial estimate") 43 | plt.plot(xpred,yhatFinal,label="final estimate") 44 | plt.scatter(S,rate,label="raw data") 45 | 46 | plt.legend(loc="best") 47 | plt.show() -------------------------------------------------------------------------------- /examples/registration.py: -------------------------------------------------------------------------------- 1 | """ 2 | Example of function registration using a local regression framework 3 | 4 | Method adapted from "Curve registration by local regression", Kneip et al., 2000 5 | 6 | Author: Peter Tonner 7 | """ 8 | 9 | import numpy as np 10 | import scipy.interpolate 11 | from pyFDA import bspline 12 | 13 | def binarySearch_inverseMonotonic(t0,t1,f,ft,i): 14 | """Binary search on a monotone increasing function for the inverse of f at point t""" 15 | tm = (t0+t1)/2 16 | if i > 100: 17 | return tm 18 | f0 = f(t0) #scipy.interpolate.splev(t0,f) 19 | f1 = f(t1) #scipy.interpolate.splev(t1,f) 20 | if np.abs(f0-ft) < 1e-3: 21 | return t0 22 | if np.abs(f1-ft) < 1e-3: 23 | return t1 24 | fm = f(tm)#scipy.interpolate.splev(tm,f) 25 | if fm < ft: 26 | return binarySearch_inverseMonotonic(tm,t1,f,ft,i+1) 27 | else: 28 | return binarySearch_inverseMonotonic(t0,tm,f,ft,i+1) 29 | 30 | def g(t,theta): 31 | return np.exp(theta[0]) *(t+theta[1]) 32 | 33 | def resid(y,t,theta): 34 | return y - np.exp(theta[2]) * xspline(g(t,theta)) 35 | 36 | def partial1(t,theta): 37 | return np.exp(theta[2]) * xspline(g(t,theta),deriv=1) * np.exp(theta[0]) * (t+theta[1]) 38 | 39 | def partial2(t,theta): 40 | return np.exp(theta[2]) * xspline(g(t,theta),deriv=1) * np.exp(theta[0]) 41 | 42 | def partial3(t,theta): 43 | return np.exp(theta[2]) * xspline(g(t,theta),deriv=0) 44 | 45 | 46 | n = 200 47 | t = np.linspace(0,6,200) 48 | y = np.cos(t**3/(np.pi**2)) 49 | 50 | # define the registration for curve x 51 | # hpoints = np.array([(0,0),(2,1),(4,3.75),(6,6)]) 52 | # hspline = bspline.Bspline(hpoints[:,0],hpoints[:,1]) 53 | # h = hspline(t) 54 | 55 | theta1points = np.array([(0,0),(2,-.05),(4,-.03),(6,0)]) 56 | theta1 = bspline.Bspline(theta1points[:,0],theta1points[:,1]) 57 | 58 | theta2points = np.array([(0,0),(1,.2),(5,-.2),(6,0)]) 59 | theta2 = bspline.Bspline(theta2points[:,0],theta2points[:,1]) 60 | 61 | h = g(t,[theta1(t),0,0]) 62 | hspline = bspline.Bspline(t,h) 63 | hinv = np.array([binarySearch_inverseMonotonic(0.,6.,hspline,z,0) for z in t]) 64 | hinvspline = bspline.Bspline(t,hinv) 65 | 66 | # x = np.cos(hinv**3/(np.pi**2)) 67 | x = np.cos(hinvspline(t)**3/(np.pi**2)) 68 | 69 | # add some amplitude variation 70 | ampPoints = np.array([(0,0),(1,.05),(2,-.2),(4,-.05),(5,0.05)]) 71 | amp = scipy.interpolate.splrep(ampPoints[:,0],ampPoints[:,1]) 72 | 73 | x = (1+scipy.interpolate.splev(t,amp)) * x 74 | 75 | theta1points = .05*((t-3)**2 - 9) 76 | theta1 = bspline.Bspline(t,theta1points) 77 | h = g(t,[theta1(t),0,0]) 78 | hspline = bspline.Bspline(t,h) 79 | 80 | x = np.cos(t**3/(np.pi**2)) 81 | y = np.cos(hspline(t)**3/(np.pi**2)) 82 | 83 | y = (t)/(1+t) 84 | x = (g(t,[.1,0,0]))/(1+g(t,[.1,0,0])) 85 | 86 | 87 | 88 | bandwidth = np.pi/2 89 | variance = 1 90 | 91 | xspline = bspline.Bspline(t,x) 92 | xhats = [xspline] 93 | ghats = [] 94 | thetas = [] 95 | for i in range(3): 96 | ghat = [] 97 | thetas.append([]) 98 | for j in range(n): 99 | decay = 2**(-i) 100 | w = 1 - variance * ((t - t[j])**2)/((bandwidth*decay)**2) 101 | w = np.max((w,np.zeros(x.shape[0])),0) 102 | 103 | # gn = gaussNewton.GaussNewton(y,x[:,None],np.array([0,0,0]),resid,[partial1,partial2,partial3],w,n/2) 104 | # gn = gaussNewton.GaussNewton(y,x[:,None],np.array([0,0,0]),resid,[partial1,partial2],w,n) 105 | gn = gaussNewton.GaussNewton(y,x[:,None],np.array([0,0,0]),resid,[partial1],w,1) 106 | # gn = gaussNewton.GaussNewton(y,x[:,None],np.array([0,0,0]),resid,[partial1],w) 107 | gn.run() 108 | thetas[-1].append(gn.thetaCurrent) 109 | 110 | ghat.append(g(t[j],gn.thetaCurrent)) 111 | 112 | gspline = bspline.Bspline(t,ghat) 113 | ginv = np.array([binarySearch_inverseMonotonic(0.,6.,gspline,z,0) for z in t]) 114 | ginvspline = bspline.Bspline(t,ginv) 115 | 116 | xhat = xspline(gspline(t)) 117 | # xhat = xspline(ginvspline(t)) 118 | 119 | xspline = bspline.Bspline(t,xhat) 120 | 121 | xhats.append(xspline) 122 | ghats.append(gspline) 123 | thetas = np.array(thetas) 124 | # plt.plot(t+t[j],g(t,gn.thetaCurrent)) 125 | # plt.show() -------------------------------------------------------------------------------- /pyFDA/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ptonner/pyFDA/c8769fd6958d0f41f98f3d4ccd0384ecf873804b/pyFDA/__init__.py -------------------------------------------------------------------------------- /pyFDA/bspline.py: -------------------------------------------------------------------------------- 1 | from function import Function 2 | from scipy import interpolate 3 | 4 | class Bspline(Function): 5 | 6 | def __init__(self,x,y,*args,**kwargs): 7 | Function.__init__(self,"B-spline",x,y,*args,**kwargs) 8 | 9 | def _fit(self,x,y,*args,**kwargs): 10 | self.knots,self.coeff,self.degree = interpolate.splrep(x,y,*args,**kwargs) 11 | 12 | def predict(self,x,deriv=None,*args,**kwargs): 13 | if deriv is None: 14 | deriv = 0 15 | if deriv == -1: 16 | return interpolate.splev(x,interpolate.splantider((self.knots,self.coeff,self.degree))) 17 | return interpolate.splev(x,(self.knots,self.coeff,self.degree),der=deriv) -------------------------------------------------------------------------------- /pyFDA/function.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | class Function(object): 4 | """A wrapper for function representations in functional data analysis""" 5 | 6 | def __init__(self,name=None,x=None,y=None,*args,**kwargs): 7 | self.name = name 8 | self.fit(x,y,*args,**kwargs) 9 | 10 | def fit(self,x=None,y=None,*args,**kwargs): 11 | if x is None or y is None: 12 | return 13 | self._fit(x,y,*args,**kwargs) 14 | 15 | def _fit(self,x,y,*args,**kwargs): 16 | raise NotImplementedError("") 17 | 18 | def predict(self,x,*args,**kwargs): 19 | raise NotImplementedError("") 20 | 21 | def __call__(self,x,*args,**kwargs): 22 | return self.predict(x,*args,**kwargs) -------------------------------------------------------------------------------- /pyFDA/gaussNewton.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | class GaussNewton(object): 4 | 5 | def __init__(self,y,x,thetaInit,residual,partials,weights=None,ridge=None): 6 | """Minimized the residual sum of squares for a function f using the Gauss Newton method. 7 | 8 | Parameters: 9 | ----------- 10 | y : array_like 11 | array of functions values, shape (n,1) 12 | x : array_like 13 | array of independent values, shape (n,k) 14 | thetaInit : array_like 15 | array of initial parameter estimates (p,1) 16 | residual : function 17 | function defining the residual, r(y,x,theta), should be of the form y - f(x,theta) 18 | partials: list of functions 19 | functions defining the partial derivative of the residual for each parameter, p(x,theta). len(partials) = p""" 20 | self.y = y 21 | self.x = x 22 | self.n = y.shape[0] 23 | assert x.shape[0] == self.n, "x and y must be same size!" 24 | 25 | self.iteration = 0 26 | self.thetaInit = self.thetaCurrent = thetaInit 27 | self.thetaHistory = [self.thetaInit] 28 | self.p = len(partials) 29 | self.residual = residual 30 | self.partials = partials 31 | self.deltas = [] 32 | # assert len(partials) == self.p, "must provide partial function for each parameter!" 33 | 34 | self.weights = weights 35 | self.ridge = ridge 36 | 37 | def jacobian(self,): 38 | return np.array([[self.partials[j](self.x[i,:],self.thetaCurrent)[0] for j in range(self.p)] for i in range(self.n)]) 39 | 40 | def resid(self,): 41 | return np.array([self.residual(self.y[i],self.x[i,:],self.thetaCurrent)[0] for i in range(self.n)]) 42 | 43 | def SRS(self,): 44 | return np.sum(self.resid()**2) 45 | 46 | def run(self,iterations = None): 47 | if iterations is None: 48 | iterations = 1 49 | for i in range(iterations): 50 | self._iteration() 51 | 52 | def w(self): 53 | w = np.eye(self.n) 54 | if not self.weights is None: 55 | w = np.diag(self.weights) 56 | 57 | return w 58 | 59 | def _iteration(self,): 60 | 61 | w = np.eye(self.n) 62 | if not self.weights is None: 63 | w = np.diag(self.weights) 64 | 65 | ridge = 0 66 | if not self.ridge is None: 67 | ridge = self.ridge 68 | ridge = np.eye(self.p)*ridge 69 | 70 | J = self.jacobian() 71 | proj = np.dot(np.linalg.inv(np.dot(J.T,J)+ridge),J.T) 72 | r = np.dot(w,self.resid()) 73 | 74 | temp = np.zeros(self.thetaCurrent.shape[0]) 75 | temp[:self.p] = temp[:self.p] + self.thetaCurrent[:self.p] 76 | temp[:self.p] = temp[:self.p] + np.dot(proj,r) 77 | self.thetaCurrent = temp 78 | # print self.thetaCurrent, temp, np.dot(proj,r) 79 | # self.thetaCurrent = self.thetaCurrent + np.dot(proj,r) 80 | 81 | self.deltas.append(np.dot(proj,r)) 82 | 83 | self.thetaHistory.append(self.thetaCurrent) 84 | self.iteration += 1 -------------------------------------------------------------------------------- /pyFDA/register.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | class Register(object): 4 | 5 | def __init__(self,x,funcs,functionalModel,meanFunction): 6 | self.n = len(funcs) 7 | self.delta = np.zeros(n) 8 | self.funcs = funcs 9 | self.functionalModel = functionalModel 10 | self.meanFunction = meanFunction 11 | 12 | def compute(self,alpha=.1,tol=1e-1,maxiter=1000): 13 | z = 0 14 | diff = 1e10 15 | lastErr = 1e10 16 | mu = meanFunction(self.x,self.funcs,self.delta) 17 | 18 | while diff > tol: 19 | 20 | for i in range(self.n): 21 | mu_pred = mu.predict(self.x) 22 | x_shift = self.func[i].predict(self.x-self.delta[i]) 23 | mu_deriv = mu.predict(self.x,der=1) 24 | 25 | delta_d1 = np.sum((mu_pred-x_shift)*mu_deriv) 26 | delta_d2 = np.sum(mu_deriv**2) 27 | 28 | self.delta[i] -= alpha * delta_d1/delta_d2 29 | 30 | mu = meanFunction(time,self.func,self.delta) 31 | 32 | mu_pred = mu.predict(self.x) 33 | err = 0 34 | for i in range(self.n): 35 | x_shift = self.func[i].predict(self.x-self.delta[i]) 36 | err += np.sum((mu_pred-x_shift)**2) 37 | diff = err - lastErr 38 | lastErr = err 39 | 40 | z+=1 41 | if z > maxiter: 42 | break 43 | 44 | return err 45 | 46 | -------------------------------------------------------------------------------- /pyFDA/register/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ptonner/pyFDA/c8769fd6958d0f41f98f3d4ccd0384ecf873804b/pyFDA/register/__init__.py -------------------------------------------------------------------------------- /pyFDA/register/localRegression.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from .. import bspline, gaussNewton 3 | from ..util import binarySearch_inverseMonotonic 4 | 5 | def g(t,theta): 6 | return np.exp(theta[0]) *(t+theta[1]) 7 | 8 | class RegisterLocalRegression(object): 9 | 10 | def __init__(self,x,y,t,bandwidth=None,ridge=None,decay=True,variance=None): 11 | self.x = x 12 | self.xspline = bspline.Bspline(t,x) 13 | self.y = y 14 | self.yspline = bspline.Bspline(t,y) 15 | self.n = x.shape[0] 16 | self.t = t 17 | 18 | self.bandwidth = bandwidth 19 | self.ridge = ridge 20 | self.decay = decay 21 | self.variance = variance 22 | 23 | self.xhats = [self.xspline] 24 | self.ghats = [] 25 | self._thetas = [] 26 | self.error = [self.SRS()] 27 | 28 | def thetas(self,): 29 | return np.array(self._thetas) 30 | 31 | def partials(self,): 32 | return [self.partial1,self.partial2,self.partial3] 33 | 34 | def gaussNewton(self,timeInd,decay=None): 35 | partials = [self.partial1,self.partial2,self.partial3] 36 | 37 | variance = self.variance 38 | if variance is None: 39 | variance = 1 40 | 41 | if self.bandwidth is None: 42 | w = None 43 | else: 44 | if decay is None: 45 | decay = 1 46 | 47 | # w = 1 - variance * ((self.t - self.t[j])**2)/((self.bandwidth*decay)**2) 48 | w = variance * np.exp((-(self.t - self.t[timeInd])**2)/((self.bandwidth*decay))) 49 | # w = np.max((w,np.zeros(self.n)),0) 50 | 51 | # gn = gaussNewton.GaussNewton(self.y,self.xspline(self.t)[:,None],np.array([0,0,0]),self.residual,self.partials(),w,self.ridge) 52 | gn = gaussNewton.GaussNewton(self.y,self.t[:,None],np.array([0,0,0]),self.residual,self.partials(),w,self.ridge) 53 | 54 | return gn 55 | 56 | def run(self,iter=None): 57 | if iter is None: 58 | iter = 1 59 | 60 | partials = [self.partial1,self.partial2,self.partial3] 61 | xhat = self.xspline(self.t) 62 | self.deltas = [] 63 | 64 | for i in range(iter): 65 | ghat = [] 66 | self._thetas.append([]) 67 | self.deltas.append([]) 68 | for j in range(self.n): 69 | 70 | variance = self.variance 71 | if variance is None: 72 | variance = 1 73 | 74 | if self.bandwidth is None: 75 | w = None 76 | else: 77 | if self.decay: 78 | decay = 2**(-i) 79 | else: 80 | decay = 1 81 | 82 | # w = 1 - variance * ((self.t - self.t[j])**2)/((self.bandwidth*decay)**2) 83 | w = variance * np.exp((-(self.t - self.t[j])**2)/((self.bandwidth*decay))) 84 | # w = np.max((w,np.zeros(self.n)),0) 85 | 86 | # TEMP REMOVE! 87 | # w = 1 - ((self.t - self.t[j])**2)/((self.bandwidth)**2) 88 | 89 | # gn = gaussNewton.GaussNewton(self.y,xhat[:,None],np.array([0,0,0]),self.residual,partials,w,self.ridge) 90 | gn = gaussNewton.GaussNewton(self.y,self.xspline(self.t)[:,None],np.array([0,0,0]),self.residual,partials,w,self.ridge) 91 | 92 | gn = self.gaussNewton(j,decay=decay) 93 | gn.run() 94 | 95 | self._thetas[-1].append(gn.thetaCurrent) 96 | ghat.append(self.g(self.t[j],gn.thetaCurrent)) 97 | self.deltas[-1].append(gn.deltas[-1]) 98 | 99 | gspline = bspline.Bspline(self.t,ghat) 100 | xhat = self.xspline(gspline(self.t)) 101 | self.xspline = bspline.Bspline(self.t,xhat) 102 | 103 | # uncomment to update x with the inverse of g, as described in the paper (is this correct?) 104 | # ginv = bspline.Bspline(ghat,self.t) 105 | # xhat = self.xspline(ginv(self.t)) 106 | # self.xspline = bspline.Bspline(self.t,xhat) 107 | 108 | self.xhats.append(self.xspline) 109 | self.ghats.append(gspline) 110 | 111 | self.error.append(self.SRS()) 112 | 113 | def h(self,): 114 | htemp = self.ghats[0](self.t) 115 | for i in range(1,len(self.ghats)): 116 | htemp = self.ghats[i](htemp) 117 | 118 | return bspline.Bspline(self.t,htemp) 119 | 120 | def SRS(self,): 121 | if len(self._thetas) > 0: 122 | return np.sum((self.y-np.exp(self.thetas()[-1,:,2]) * self.xspline(self.t))**2) 123 | else: 124 | return np.sum((self.y- self.xspline(self.t))**2) 125 | 126 | def g(self,t,theta): 127 | return np.exp(theta[0]) *(t+theta[1]) 128 | 129 | def residual(self,y,t,theta): 130 | return y - np.exp(theta[2]) * self.xspline(self.g(t,theta)) 131 | 132 | def partial1(self,t,theta): 133 | return np.exp(theta[2]) * self.xspline(g(t,theta),deriv=1) * np.exp(theta[0]) * (t+theta[1]) 134 | 135 | def partial2(self,t,theta): 136 | return np.exp(theta[2]) * self.xspline(g(t,theta),deriv=1) * np.exp(theta[0]) 137 | 138 | def partial3(self,t,theta): 139 | return np.exp(theta[2]) * self.xspline(g(t,theta),deriv=0) -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | alabaster==0.7.6 2 | Babel==2.1.1 3 | backports.ssl-match-hostname==3.4.0.2 4 | certifi==2015.9.6.2 5 | decorator==4.0.4 6 | docutils==0.12 7 | funcsigs==0.4 8 | functools32==3.2.3.post2 9 | ipykernel==4.1.0 10 | ipyparallel==4.0.2 11 | ipython==4.0.0 12 | ipython-genutils==0.1.0 13 | Jinja2==2.8 14 | jsonschema==2.5.1 15 | jupyter-client==4.1.1 16 | jupyter-core==4.0.6 17 | MarkupSafe==0.23 18 | matplotlib==1.4.3 19 | mistune==0.7.1 20 | mock==1.3.0 21 | nbconvert==4.0.0 22 | nbformat==4.0.1 23 | nose==1.3.7 24 | notebook==4.0.6 25 | numpy==1.10.1 26 | numpydoc==0.5 27 | pandas==0.17.0 28 | path.py==8.1.2 29 | pbr==1.8.1 30 | pexpect==4.0.1 31 | pickleshare==0.5 32 | ptyprocess==0.5 33 | Pygments==2.0.2 34 | pyparsing==2.0.3 35 | pyreadline==2.1 36 | python-dateutil==2.4.2 37 | pytz==2015.6 38 | pyzmq==14.7.0 39 | qtconsole==4.1.0 40 | requests==2.8.1 41 | scipy==0.16.0 42 | seaborn==0.6.0 43 | simplegeneric==0.8.1 44 | six==1.10.0 45 | snowballstemmer==1.2.0 46 | Sphinx==1.3.1 47 | sphinx-rtd-theme==0.1.9 48 | terminado==0.5 49 | testpath==0.2 50 | tornado==4.2.1 51 | traitlets==4.0.0 52 | xlrd==0.9.4 53 | --------------------------------------------------------------------------------