├── .gitignore ├── ODE ├── final │ ├── paper2_F.pdf │ ├── paper2_F.py │ ├── paper2.py │ ├── exam3.py │ ├── exam1.py │ └── exam2.py ├── figures │ ├── Figure1e_SR_curve.pdf │ ├── figure1A_SR_curve.pdf │ ├── figure1B_SR_curve.pdf │ ├── figure1C_SR_curve.pdf │ ├── figure1f_SR_curve.pdf │ ├── figure1g_SR_curve.pdf │ ├── figure1A_rate_curve.pdf │ ├── figure1B_rate_curve.pdf │ ├── figure1C_rate_curve.pdf │ ├── figure1d_rate_curve.pdf │ ├── figure1e_rate_curve.pdf │ ├── figure1f_rate_curve.pdf │ ├── figure1g_rate_curve.pdf │ ├── figure1A_solution_curve.pdf │ ├── figure1B_solution_curve.pdf │ ├── figure1C_solution_curve.pdf │ └── figure1d_solution_curve.pdf ├── submission │ └── Frank_system_biology_hw.pdf ├── README.md └── codes │ └── script.py ├── 1_numpy_ndarray.py ├── LICENSE ├── tricks ├── 9_LaTex.md ├── 7_np_where.ipynb ├── 1_tuple_index.ipynb ├── 2_nan.ipynb ├── 6_sorting.ipynb ├── 4_arg_sort.ipynb ├── 8_parallelization.ipynb ├── 5_random.ipynb └── 3_compute_dist_NA.ipynb ├── 2_linear_algebra.py ├── README.md ├── prob └── dist_intuit.md └── 3_probabilistic_model.py /.gitignore: -------------------------------------------------------------------------------- 1 | **/.DS_Store 2 | **/.ipynb_checkpoints -------------------------------------------------------------------------------- /ODE/final/paper2_F.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frankligy/ScPyT/HEAD/ODE/final/paper2_F.pdf -------------------------------------------------------------------------------- /ODE/figures/Figure1e_SR_curve.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frankligy/ScPyT/HEAD/ODE/figures/Figure1e_SR_curve.pdf -------------------------------------------------------------------------------- /ODE/figures/figure1A_SR_curve.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frankligy/ScPyT/HEAD/ODE/figures/figure1A_SR_curve.pdf -------------------------------------------------------------------------------- /ODE/figures/figure1B_SR_curve.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frankligy/ScPyT/HEAD/ODE/figures/figure1B_SR_curve.pdf -------------------------------------------------------------------------------- /ODE/figures/figure1C_SR_curve.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frankligy/ScPyT/HEAD/ODE/figures/figure1C_SR_curve.pdf -------------------------------------------------------------------------------- /ODE/figures/figure1f_SR_curve.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frankligy/ScPyT/HEAD/ODE/figures/figure1f_SR_curve.pdf -------------------------------------------------------------------------------- /ODE/figures/figure1g_SR_curve.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frankligy/ScPyT/HEAD/ODE/figures/figure1g_SR_curve.pdf -------------------------------------------------------------------------------- /ODE/figures/figure1A_rate_curve.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frankligy/ScPyT/HEAD/ODE/figures/figure1A_rate_curve.pdf -------------------------------------------------------------------------------- /ODE/figures/figure1B_rate_curve.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frankligy/ScPyT/HEAD/ODE/figures/figure1B_rate_curve.pdf -------------------------------------------------------------------------------- /ODE/figures/figure1C_rate_curve.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frankligy/ScPyT/HEAD/ODE/figures/figure1C_rate_curve.pdf -------------------------------------------------------------------------------- /ODE/figures/figure1d_rate_curve.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frankligy/ScPyT/HEAD/ODE/figures/figure1d_rate_curve.pdf -------------------------------------------------------------------------------- /ODE/figures/figure1e_rate_curve.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frankligy/ScPyT/HEAD/ODE/figures/figure1e_rate_curve.pdf -------------------------------------------------------------------------------- /ODE/figures/figure1f_rate_curve.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frankligy/ScPyT/HEAD/ODE/figures/figure1f_rate_curve.pdf -------------------------------------------------------------------------------- /ODE/figures/figure1g_rate_curve.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frankligy/ScPyT/HEAD/ODE/figures/figure1g_rate_curve.pdf -------------------------------------------------------------------------------- /ODE/figures/figure1A_solution_curve.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frankligy/ScPyT/HEAD/ODE/figures/figure1A_solution_curve.pdf -------------------------------------------------------------------------------- /ODE/figures/figure1B_solution_curve.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frankligy/ScPyT/HEAD/ODE/figures/figure1B_solution_curve.pdf -------------------------------------------------------------------------------- /ODE/figures/figure1C_solution_curve.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frankligy/ScPyT/HEAD/ODE/figures/figure1C_solution_curve.pdf -------------------------------------------------------------------------------- /ODE/figures/figure1d_solution_curve.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frankligy/ScPyT/HEAD/ODE/figures/figure1d_solution_curve.pdf -------------------------------------------------------------------------------- /ODE/submission/Frank_system_biology_hw.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frankligy/ScPyT/HEAD/ODE/submission/Frank_system_biology_hw.pdf -------------------------------------------------------------------------------- /1_numpy_ndarray.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | # understanding numpy array 4 | a = np.array([[1,2,3],[4,5,6]]) 5 | a.strides 6 | a.dtype 7 | 8 | 9 | # slicing and indexing 10 | b = np.array([[1,2,3,4,5,6,7,8,9,10], 11 | [4,5,6,7,8,9,20,11,12,13], 12 | [1,2,3,4,5,6,7,8,9,9]]) 13 | 14 | # what is a slice object 15 | test = slice(1,10,2) 16 | 17 | # basic indexing only return a "view", it will change the original array content 18 | b0 = b[1:3,4:7] 19 | b0[0,1] = 99 20 | b1 = b[:,(4,7)] 21 | b1[0,1] = 99 22 | 23 | 24 | # understand dtype 25 | d_type = np.dtype(''}) 26 | ax.annotate(s='SN2',xy=(0.37,80),xytext=(0.2,90),arrowprops={'arrowstyle':'->'}) 27 | plt.savefig('/Users/ligk2e/Desktop/github/exercise_codes/ODE/final/paper2_F.pdf',bbox_inches='tight') 28 | 29 | 30 | 31 | 32 | -------------------------------------------------------------------------------- /tricks/9_LaTex.md: -------------------------------------------------------------------------------- 1 | ## In scientific computing, it is very important to know how to write succint math in LaTex. I use Overleaf, 2 | 3 | ### Official pages for reference: 4 | 5 | 1. [Mathematical expression (inline and block)](https://www.overleaf.com/learn/latex/Mathematical_expressions) 6 | 2. [Greek letter and math symbols (main)](https://www.overleaf.com/learn/latex/List_of_Greek_letters_and_math_symbols) 7 | 8 | 9 | 10 | ### Additional notes: 11 | 12 | 1. paragraph indent and paragraph skip 13 | 14 | ``` 15 | \setlength{\parindent}{0em} 16 | \setlength{\parskip}{1em} 17 | ``` 18 | 19 | 20 | 2. bold math and upright (not tilted) 21 | 22 | ``` 23 | \mathbf{} 24 | \mathsf{} 25 | ``` 26 | 27 | 3. notes 28 | 29 | ``` 30 | \underset{}{} # underset, the first bracket represent stuff under the second bracket 31 | \lfloor \rfloor # floor/ceil 32 | \hat # hat 33 | \Bar # bar 34 | \left\| \right\| # norm 35 | \sim # ~ 36 | \arg # arg 37 | \mathbbm{1} # indicator function # make sure to usepackage{bbm} 38 | \cdot # a dot that is vertically aligned in the center 39 | \backslash # \ 40 | \propto # proportional to 41 | \circ # element wise multiplication 42 | ``` 43 | 44 | 4. [space](https://www.overleaf.com/learn/latex/Spacing_in_math_mode) in math mode 45 | 46 | 47 | 48 | -------------------------------------------------------------------------------- /ODE/final/paper2.py: -------------------------------------------------------------------------------- 1 | from scipy.integrate import solve_ivp 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | import matplotlib.lines as mlines 5 | import matplotlib as mpl 6 | mpl.rcParams['text.usetex'] = False 7 | 8 | # parameter 9 | BCL2_T = 80 10 | BAX_T = 100 11 | Stress = 0.6 12 | k_f1 = 1 13 | k_f2 = 3 14 | k_b = 2 15 | k_asBAXmBCL2 = 90 16 | k_dsBAXmBCL2 = 0.05 17 | k_asBH3BCL2 = 10 18 | k_dsBH3BCL2 = 0.01 19 | k_prime_sBH3 = 0.1 20 | k_prime2_sBH3 = 0.6 21 | k_dBH3 = 0.01 22 | 23 | # process 24 | def process(t,y): 25 | ''' 26 | y[0] is BAX 27 | y[1] is BAXm/BCL2 28 | y[2] is BH3_F 29 | y[3] is BH3/BCL2 30 | ''' 31 | BAXm_F = BAX_T-y[0]-y[1] 32 | BCL2_F = BCL2_T-y[3]-y[1] 33 | y0d = -(k_f1 + k_f2 * y[2]) * y[0] + k_b * BAXm_F + k_b * y[1] 34 | y1d = k_asBAXmBCL2 * BAXm_F * BCL2_F - k_dsBAXmBCL2 * y[1] - k_b * y[1] 35 | y2d = k_prime_sBH3+k_prime2_sBH3*Stress-k_dBH3*y[2]-k_asBH3BCL2*y[2]*BCL2_F+k_dsBH3BCL2*y[3] 36 | y3d = k_asBH3BCL2*y[2]*BCL2_F-k_dsBH3BCL2*y[3]-k_dBH3*y[3] 37 | return [y0d,y1d,y2d,y3d] 38 | 39 | sol = solve_ivp(process,t_span=(0,400),y0=[66.6,33.4,0,16]) 40 | tmp = sol.y 41 | 42 | BH3_T = np.sum(tmp[2:,:],axis=0) 43 | BAXm_T = BAX_T - tmp[0,:] 44 | BAXm_F = BAX_T - np.sum(tmp[0:2,:],axis=0) 45 | BH3_F = tmp[2,:] 46 | 47 | fig,ax = plt.subplots() 48 | x = np.arange(tmp.shape[1]) 49 | ax.plot(x,BH3_T,linestyle='--',color='k',label=r'$[BH3]_{T}$') 50 | ax.plot(x,BAXm_T,linestyle='-',color='k',label=r'$[BAXm]_{T}$') 51 | ax.plot(x,BAXm_F,linestyle='-',color='grey',label=r'$[BAXm]_{F}$') 52 | ax.plot(x,BH3_F,linestyle='--',color='grey',label=r'$[BH3]_{F}$') 53 | ax.legend() 54 | ax.set_xticks([]) 55 | ax.set_xlabel('t,min') 56 | ax.set_ylabel('Levels') 57 | plt.savefig('/Users/ligk2e/Desktop/ODE/final/paper2.pdf',bbox_inches='tight') 58 | plt.close() 59 | 60 | -------------------------------------------------------------------------------- /2_linear_algebra.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import scipy.linalg as sl 3 | 4 | # vector space part 5 | a = np.random.randn(2,3) 6 | a.T 7 | 8 | b = np.random.randn(3,2) 9 | a @ b 10 | 11 | # matrix_rank function 12 | x = np.random.rand(2,3) 13 | np.linalg.matrix_rank(x) 14 | 15 | # determinant 16 | x = np.random.rand(4,4) 17 | np.linalg.det(x) 18 | 19 | # inner product, outer product, hadamard product of two 1D array 20 | a = np.array([4,5,6]) 21 | b = np.array([7,8,9]) 22 | np.inner(a,b) 23 | np.outer(a,b) 24 | a * b 25 | 26 | # how to project a to b 27 | a = np.array([4,5,6]) 28 | b = np.array([7,8,9]) 29 | proj_b_a = np.inner(a,b) / np.inner(b,b) * b 30 | 31 | # LU decomposition 32 | a = np.random.randn(3,4) 33 | p,l,u = sl.lu(a) 34 | 35 | # QR decomposition 36 | a = np.random.randn(3,4) 37 | q,r = np.linalg.qr(a) 38 | 39 | # eigendecompostion 40 | ei = np.random.randn(4,4) 41 | w,v = np.linalg.eig(ei) 42 | 43 | # characteristic polynomial 44 | a = np.random.randn(5,5) 45 | np.linalg.det(np.trace(a)*np.identity(5)-a) 46 | 47 | # SVD 48 | svd = np.random.rand(4,5) 49 | u,s,vh = np.linalg.svd(svd) 50 | 51 | # norm 52 | a = np.array([4,5,6]) 53 | np.linalg.norm(a,ord=3) 54 | 55 | x = np.random.rand(2,3) 56 | np.linalg.norm(x,ord='fro') 57 | 58 | # einsum 59 | x = np.random.rand(2,3) 60 | np.einsum('ij -> ji',x) # transpose 61 | np.einsum('ij ->',x) # sum 62 | np.einsum('ij -> i',x) # column sum 63 | np.einsum('ij -> j',x) # row sum 64 | 65 | x = np.random.rand(2,3) 66 | y = np.random.rand(5,3) 67 | np.einsum('ij,kj -> ik',x,y) # matrix multiplication 68 | 69 | a = np.array([4,5,6]) 70 | b = np.array([7,8,9]) 71 | np.einsum('i,i ->',a,b) # inner product 72 | np.einsum('i,j ->ij',a,b) # outer product 73 | np.einsum('i,i ->i',a,b) # hadamard product 74 | 75 | y = np.random.rand(5,3) 76 | np.einsum('ij -> j',y) # diagonal 77 | np.einsum('ij ->',y) # trace 78 | 79 | svd = np.random.rand(4,5) 80 | u,s,vh = np.linalg.svd(svd) 81 | 82 | ei = np.random.randn(4,4) 83 | w,v = np.linalg.eig(ei) 84 | 85 | 86 | 87 | 88 | 89 | 90 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ScPyT 2 | Scientific Computing in Python, a set of tutorials and useful examples. All come from my own experience. 3 | 4 | # What is this repository? 5 | 6 | Scientific Computing scans a wide spectrum including basic numeric programming, linear algebra, all kinds of distributions, and realizing Maximum Likelihood Estimation (MLE), Expectation-Maximization (EM) algorithm, Monte-Carlo Markov Chain (MCMC) sampling. This tutorial aims to help you to master the basic skills to implement relevant algorithms in python. 7 | 8 | In the second part, titled as [Practical tricks](#Practical_tricks), I will share some useful code snippets which relate to some confusing points when using numpy, I got into these traps before, so I hope it can serve as a reminder to me and other readers. 9 | 10 | 11 | 1. [Understanding Numpy and ndarray](https://frankligy.medium.com/revisiting-numpy-and-ndarray-e1e5f9a69791) 12 | 2. [Linear algebra in python](https://frankligy.medium.com/linear-algebra-in-python-b967061e342a) 13 | 3. [Ordinal Differential Equation](https://frankligy.medium.com/ordinal-differential-equation-ode-in-python-8dc1de21323b) 14 | 4. [Bayesian Probabilistic model](https://medium.com/@frankligy/probabilistic-modeling-in-python-not-finished-yet-8af06aef92c7) 15 | 5. Frequentist statistical model (MLE, optimization methods, EM, factor analysis, etc) 16 | 6. stay tuned... 17 | 18 | 19 | # Practical tricks 20 | 21 | In this Section, I want to share some caveats that numpy user may benefit from: 22 | 23 | 1. [Tuple Index in Numpy](https://github.com/frankligy/ScPyT/blob/main/tricks/1_tuple_index.ipynb) 24 | 2. [Missing value in Python](https://github.com/frankligy/ScPyT/blob/main/tricks/2_nan.ipynb) 25 | 3. [Pairwise distance when having missing value](https://github.com/frankligy/ScPyT/blob/main/tricks/3_compute_dist_NA.ipynb) 26 | 4. [argsort, argwhere, argmin, argmax](https://github.com/frankligy/ScPyT/blob/main/tricks/4_arg_sort.ipynb) 27 | 5. [random package and np.random()](https://github.com/frankligy/ScPyT/blob/main/tricks/5_random.ipynb) 28 | 6. [sorted array, index array, rank array, inverse index array](https://github.com/frankligy/ScPyT/blob/main/tricks/6_sorting.ipynb) 29 | 7. [np where function](https://github.com/frankligy/ScPyT/blob/main/tricks/7_np_where.ipynb) 30 | 8. [Parallelization in python scientific computing](https://github.com/frankligy/ScPyT/blob/main/tricks/8_parallelization.ipynb) 31 | 9. [LaTex notes](https://github.com/frankligy/ScPyT/blob/main/tricks/9_LaTex.md) 32 | -------------------------------------------------------------------------------- /prob/dist_intuit.md: -------------------------------------------------------------------------------- 1 | 2 | **Simple distribution** 3 | 4 | # Beta 5 | 6 | Anything that are intrinsically bounded by 0 and 1. Emphasizing "intrinsical" is to set it apart from `logit_normal` distribution where it is also bounded by 0 and 1 due to the logi function. But it is more like a continuous random variable whereas a bound has been added. 7 | 8 | - You can use `Beta (s,s)` to represent distribution peaked at 0.5, `s` determine how informative this disitribution will be 9 | 10 | - You can use `Beta (0.5,0.5)` to represent bimodal distribution 11 | 12 | - When `a <= 1`, it is left skewed, when `b <= 1`, it is right skewed. 13 | 14 | # Gamma 15 | 16 | It is very flexible, anything that is positive can be modeled by Gamma, you just need to change the parameter to flexibly adjust the variance and shape. `chi2` distribution is a special case for Gamma. 17 | 18 | - You can use `Gamma (m,1)` to make sure it is peaked at `m` 19 | 20 | - You can use `Gamma (s,s)` to make sure it is peaked at 1, and `s` determine how much regularisation you'd like to add on that when deviating from 1. 21 | 22 | # LogNorm 23 | 24 | It is to measure any positive values, without clear overdispersion. 25 | 26 | # Normal distribution 27 | 28 | Honestly the default, like to model the coefficent of the regression. 29 | 30 | # Student T distribution 31 | 32 | Normal distribution with heavier tails 33 | 34 | # Poisson 35 | 36 | Default for counts data, the intuition is during certain time interval, how many buses arriving. 37 | 38 | # Exponential 39 | 40 | Related to the Poisson, the intuition is for the same situation in Poisson, the Exponential can measure the waiting time for the buses or the interval to be more general. It can be used for regularization as the probability goes down when the randome variable increases. 41 | 42 | # Negative Bionomial 43 | 44 | Poisson with overdispersion for counting data like RNA count, the intuition is, when achieving certain success, how many failures have been experienced. 45 | 46 | # Binomial 47 | 48 | Very intuitive, total number of trials each has a probability to succeed. 49 | 50 | # Bernoulli 51 | 52 | Very intuitive, toss the coin, a special case for binomial 53 | 54 | # Categorical 55 | 56 | Very intuitive, toss the dice, a special case for multinomial 57 | 58 | # Multinomial 59 | 60 | Very intuitive, toss n number of dice, the count of each side 61 | 62 | # Dirichlet 63 | 64 | The multi-variate version of Beta, parameterizing Multinomial and categorical distribution 65 | 66 | # Wishard 67 | 68 | The multi-variate version of Gamme distribution. 69 | 70 | # HyperGeometric 71 | 72 | Very strong intuition, when having certain amount of fake product, and try to sample certain amount of product from total product. 73 | 74 | **Compound distribution** 75 | 76 | # Spike-and-Slab (or mixutre in general) 77 | 78 | A mixed distribution of a spike and a slab, useful for variable selection, a spike is a distribution that puts all mass into a center, whereas slab is a distribution that gives mass to all values. 79 | 80 | # Zero-inflated 81 | Add inflated zero component to like Poisson or Negative Binomial 82 | 83 | # Affine transformation 84 | 85 | The loc and scale parameter 86 | 87 | 88 | 89 | 90 | -------------------------------------------------------------------------------- /3_probabilistic_model.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from scipy.stats import norm,uniform,invgamma 3 | import seaborn as sns 4 | import matplotlib.pyplot as plt 5 | 6 | norm.pdf(2,loc=0,scale=1) 7 | norm.cdf(0,loc=0,scale=1) 8 | norm.ppf(0.5,loc=0,scale=1) 9 | norm.sf(0,loc=0,scale=1) 10 | norm.isf(0.5,loc=0,scale=1) 11 | norm.rvs(size=100,loc=0,scale=1,random_state=1) 12 | 13 | # accept-reject method 14 | x = np.linspace(0,1,1000) 15 | f_x = x**1.7*((1-x)/(1+x))**5.3 16 | M = f_x.max() 17 | def f(x): 18 | return x**1.7*((1-x)/(1+x))**5.3 19 | def g(x): 20 | return uniform.pdf(x,loc=0,scale=1) 21 | 22 | n = 2500 23 | u = uniform.rvs(loc=0,scale=1,size=n,random_state=1) 24 | y = uniform.rvs(loc=0,scale=1,size=n,random_state=2) 25 | f_y = f(y) 26 | g_y = g(y) 27 | acc_or_rej = u <= f_y / (M * g_y) 28 | accepted_y = y[acc_or_rej] 29 | sns.histplot(accepted_y) 30 | 31 | # MCMC 32 | def f(x): 33 | import math 34 | return 2*x**2*(1-x)**8*math.cos(4*math.pi*x)**2 35 | def q(x,y): 36 | return norm.pdf(x,loc=y,scale=0.1) 37 | 38 | n = 10000 39 | x = np.zeros(n) 40 | x[0] = norm.rvs(loc=0,scale=0.03,size=1)[0] 41 | for i in range(n-1): 42 | while True: 43 | x_cand = norm.rvs(loc=x[i],scale=0.03,size=1)[0] 44 | if x_cand >= 0 and x_cand <= 1: 45 | break 46 | if x_cand >= 0 and x_cand <= 1: 47 | rho = (q(x[i],x_cand)/q(x_cand,x[i]))*(f(x_cand)/f(x[i])) 48 | alpha = min(1,rho) 49 | u = uniform.rvs(loc=0,scale=1,size=1)[0] 50 | if u < alpha: 51 | x[i+1] = x_cand 52 | else: 53 | x[i+1] = x[i] 54 | sns.histplot(x) 55 | fig,ax = plt.subplots() 56 | ax.plot(np.arange(10000),x) 57 | 58 | # Gibbs sampling and Hierarchical bayesian 59 | nruns = 10000 60 | K = 100 # K = 100, K hospitals 61 | n = 1000 # each hospital has 1000 patients 62 | y = np.zeros(K,n) # generated observations 63 | lambda_est = np.zeros(K,nruns) 64 | sigma_est = np.zeros(nruns) 65 | mu_est = np.zeros(nruns) 66 | tau_est = np.zeros(nruns) 67 | 68 | for i in range(K): 69 | loc = uniform.rvs(loc=0,scale=10,size=1)[0] 70 | scale = uniform.rvs(loc=0,scale=0.1,size=1)[0] 71 | lambda_est[i,0] = norm.rvs(loc=loc,scale=scale,size=1)[0] 72 | sigma_est[0] = uniform.rvs(loc=0,scale=0.1,size=1)[0] 73 | mu_est[0] = norm.rvs(loc=uniform.rvs(loc=0,scale=10,size=1)[0],scale=1,size=1)[0] 74 | tau_est[0] = uniform.rvs(loc=0,scale=0.1,size=1)[0] 75 | 76 | for runs in range(1,n_runs-1,1): 77 | # estimate lambda 78 | for i in range(1,K-1): 79 | mean = i/math.sqrt(1/tau_est[runs-1]) + n/sigma_est[runs-1] 80 | std = (mean^2)*(mu_est[runs-1]/(tau_est[runs-1])+y[i,:].mean()*n/sigma_est[runs-1]) 81 | lambda_est[i,runs] = norm.rvs(loc=mean,scale=std,size=1)[0] 82 | # estimate sigma 83 | sigma_sum_term = 0 84 | for i in range(K): 85 | for j in range(n): 86 | sigma_sum_term += (y[i,j]-lambda_est[i,runs])**2 87 | sigma_est[runs] = invgamma(loc=K*n/2,scale=sigma_sum_term/2) 88 | # estimate tau 89 | tau_sum_term = 0 90 | for i in range(K): 91 | tau_sum_term += (lambda_est[i,runs]-mu_est[runs-1])**2 92 | tau_est[runs] = invgamma(loc=K/2,scale=tau_sum_term/2) 93 | # estimate mu 94 | mu_est[runs] = norm.rvs(loc=lambda_est[:,runs-1].mean(),scale=math.sqrt(tau_est[runs]/2)) 95 | 96 | 97 | -------------------------------------------------------------------------------- /ODE/final/exam3.py: -------------------------------------------------------------------------------- 1 | from scipy.integrate import solve_ivp 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | import matplotlib.lines as mlines 5 | 6 | ''' 7 | Let's set 8 | DNAdamage as y[0] 9 | p53 as y[1] 10 | Mdm2 as y[2] 11 | ''' 12 | 13 | # parameters 14 | k_prime_d53 = 0.005 15 | k_prime2_d53 = 0.1 16 | theta = 0.5 17 | k_prime_d2 = 0.4 18 | k_s53 = 0.0276 19 | k_prime_s2 = 0.01 20 | k_prime2_s2 = 0.5 21 | J_s2 = 0.5 22 | k_repair=0.0021429 23 | J1 = 0.1 24 | J2 = 0.1 25 | 26 | #helper function 27 | def Goldbeter_koshland(u,v,q,r): 28 | a = 2*u*r 29 | b = (v-u+v*q+u*r)**2 30 | c = 4*u*r*(v-u) 31 | d = np.sqrt(b-c) 32 | e = v-u+v*q+u*r+d 33 | return a/e 34 | 35 | def heavisible(x): 36 | if x > 0: 37 | return 1 38 | else: 39 | return 0 40 | 41 | 42 | def process(t,y): 43 | ''' 44 | y[0] is DNAdamage 45 | y[1] is p53 46 | y[2] is Mdm2 47 | ''' 48 | y0d = -k_repair*heavisible(y[0]) 49 | k_d2 = k_prime_d2*(1+y[0]) 50 | k_d53 = k_prime_d53+k_prime2_d53*Goldbeter_koshland(y[2],theta,J1/y[1],J2/y[1]) 51 | y1d = k_s53-k_d53*y[1] 52 | y2d = k_prime_s2+k_prime2_s2*(y[2]**4/(J_s2**4+y[2]**4))-k_d2*y[2] 53 | return [y0d,y1d,y2d] 54 | 55 | sol = solve_ivp(process,t_span=[0,50],y0=[0,0.36,0.71]) 56 | tmp1 = sol.y 57 | sol = solve_ivp(process,t_span=[0,50],y0=[0.07,0.302,1.240]) 58 | tmp2 = sol.y 59 | 60 | result = np.concatenate([tmp1,tmp2],axis=1) 61 | 62 | fig,ax = plt.subplots() 63 | ax.plot(np.arange(result.shape[1]),result[1,:],linestyle='-',color='k') 64 | ax.plot(np.arange(result.shape[1]),result[2,:],linestyle='--',color='k') 65 | ax1 = ax.twinx() 66 | ax1.plot(np.arange(result.shape[1]),result[0,:],linestyle='-',color='grey') # DNAdamage 67 | handles = [] 68 | handles.append(mlines.Line2D([],[],linestyle='-',color='k')) 69 | handles.append(mlines.Line2D([],[],linestyle='--',color='k')) 70 | handles.append(mlines.Line2D([],[],linestyle='-',color='grey')) 71 | ax.legend(handles=handles,labels=['p53','Mdm2_nuc','DNAdamage']) 72 | ax.set_xlabel('Time',weight='bold') 73 | ax.set_ylabel('Protein concentration',weight='bold') 74 | ax.set_xticks([]) 75 | plt.savefig('/Users/ligk2e/Desktop/ODE/final/exam3_small_damage.pdf',bbox_inches='tight') 76 | plt.close() 77 | 78 | 79 | # larger DNA damage 80 | sol = solve_ivp(process,t_span=[0,50],y0=[0,0.36,0.71]) 81 | tmp1 = sol.y 82 | sol = solve_ivp(process,t_span=[0,50],y0=[0.20,0.302,1.240]) 83 | tmp2 = sol.y 84 | 85 | result = np.concatenate([tmp1,tmp2],axis=1) 86 | 87 | fig,ax = plt.subplots() 88 | ax.plot(np.arange(result.shape[1]),result[1,:],linestyle='-',color='k') 89 | ax.plot(np.arange(result.shape[1]),result[2,:],linestyle='--',color='k') 90 | ax1 = ax.twinx() 91 | ax1.plot(np.arange(result.shape[1]),result[0,:],linestyle='-',color='grey') # DNAdamage 92 | handles = [] 93 | handles.append(mlines.Line2D([],[],linestyle='-',color='k')) 94 | handles.append(mlines.Line2D([],[],linestyle='--',color='k')) 95 | handles.append(mlines.Line2D([],[],linestyle='-',color='grey')) 96 | ax.legend(handles=handles,labels=['p53','Mdm2_nuc','DNAdamage']) 97 | ax.set_xlabel('Time',weight='bold') 98 | ax.set_ylabel('Protein concentration',weight='bold') 99 | ax.set_xticks([]) 100 | plt.savefig('/Users/ligk2e/Desktop/ODE/final/exam3_large_damage.pdf',bbox_inches='tight') 101 | plt.close() 102 | -------------------------------------------------------------------------------- /tricks/7_np_where.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import numpy as np" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 2, 15 | "metadata": {}, 16 | "outputs": [ 17 | { 18 | "data": { 19 | "text/plain": [ 20 | "array([4, 5, 6, 7])" 21 | ] 22 | }, 23 | "execution_count": 2, 24 | "metadata": {}, 25 | "output_type": "execute_result" 26 | } 27 | ], 28 | "source": [ 29 | "# one dimension, short version of np.where\n", 30 | "a = np.array([4,5,6,7])\n", 31 | "a" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": 3, 37 | "metadata": {}, 38 | "outputs": [ 39 | { 40 | "data": { 41 | "text/plain": [ 42 | "(array([1]),)" 43 | ] 44 | }, 45 | "execution_count": 3, 46 | "metadata": {}, 47 | "output_type": "execute_result" 48 | } 49 | ], 50 | "source": [ 51 | "np.where(a==5)" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": 5, 57 | "metadata": {}, 58 | "outputs": [ 59 | { 60 | "data": { 61 | "text/plain": [ 62 | "array([[1, 2],\n", 63 | " [3, 2]])" 64 | ] 65 | }, 66 | "execution_count": 5, 67 | "metadata": {}, 68 | "output_type": "execute_result" 69 | } 70 | ], 71 | "source": [ 72 | "# two dimension, long version of np.where\n", 73 | "b = np.array([[1,2],[3,2]])\n", 74 | "b" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": 7, 80 | "metadata": {}, 81 | "outputs": [ 82 | { 83 | "data": { 84 | "text/plain": [ 85 | "(array([0, 1]), array([1, 1]))" 86 | ] 87 | }, 88 | "execution_count": 7, 89 | "metadata": {}, 90 | "output_type": "execute_result" 91 | } 92 | ], 93 | "source": [ 94 | "np.where(b==2)\n", 95 | "# first array is the index of hits along axis=0, second array is the index of hits along axis=1" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": 8, 101 | "metadata": {}, 102 | "outputs": [ 103 | { 104 | "data": { 105 | "text/plain": [ 106 | "array([ 3, 16, 25])" 107 | ] 108 | }, 109 | "execution_count": 8, 110 | "metadata": {}, 111 | "output_type": "execute_result" 112 | } 113 | ], 114 | "source": [ 115 | "# full version of np.where\n", 116 | "c = np.array([3,4,5])\n", 117 | "np.where(c<4,c,c*c)\n", 118 | "# last two arguments have to be the same length of first array argument" 119 | ] 120 | }, 121 | { 122 | "cell_type": "code", 123 | "execution_count": null, 124 | "metadata": {}, 125 | "outputs": [], 126 | "source": [] 127 | } 128 | ], 129 | "metadata": { 130 | "kernelspec": { 131 | "display_name": "Python 3", 132 | "language": "python", 133 | "name": "python3" 134 | }, 135 | "language_info": { 136 | "codemirror_mode": { 137 | "name": "ipython", 138 | "version": 3 139 | }, 140 | "file_extension": ".py", 141 | "mimetype": "text/x-python", 142 | "name": "python", 143 | "nbconvert_exporter": "python", 144 | "pygments_lexer": "ipython3", 145 | "version": "3.6.12" 146 | } 147 | }, 148 | "nbformat": 4, 149 | "nbformat_minor": 4 150 | } 151 | -------------------------------------------------------------------------------- /ODE/final/exam1.py: -------------------------------------------------------------------------------- 1 | from scipy.integrate import solve_ivp 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | import matplotlib.lines as mlines 5 | 6 | ''' 7 | let's set 8 | DNAdamage is y[0] 9 | p53 is y[1] 10 | Mdm2_cyt is y[2] 11 | Mdm2_nuc is y[3] 12 | ''' 13 | 14 | # all the parameters 15 | k_prime_d53 = 0.27 16 | k_prime2_d53 = 8.25 17 | theta = 0.83 18 | k_prime_d2 = 0.05 19 | k_prime_s53 = 0.6 20 | k_prime2_s53 = 2.56 21 | J_s53 = 0.45 22 | k_prime_s2 = 0.15 23 | k_prime2_s2 = 4.23 24 | J_s2 = 0.92 25 | k_i = 0.41 26 | k_o = 0.05 27 | k_prime2_d2 = 0.79 28 | k_repair = 0.08 29 | J1 = 0.1 30 | J2 = 0.1 31 | 32 | def Goldbeter_koshland(u,v,q,r): 33 | a = 2*u*r 34 | b = (v-u+v*q+u*r)**2 35 | c = 4*u*r*(v-u) 36 | d = np.sqrt(b-c) 37 | e = v-u+v*q+u*r+d 38 | return a/e 39 | 40 | def heavisible(x): 41 | if x > 0: 42 | return 1 43 | else: 44 | return 0 45 | 46 | 47 | def process(t,y): 48 | ''' 49 | y[0] DNAdamage 50 | y[1] p53 51 | y[2] Mdm2_cyt 52 | y[3] Mdm2_nuc 53 | ''' 54 | y0d = -k_repair*heavisible(y[0]) 55 | k_d2 = k_prime_d2*(1+y[0]) 56 | k_d53 = k_prime_d53+k_prime2_d53*Goldbeter_koshland(y[3],theta,J1/y[1],J2/y[1]) 57 | y1d = k_prime_s53+k_prime2_s53*(y[2]**4/(J_s53**4+y[2]**4))-k_d53*y[1] 58 | y2d = k_prime_s2+k_prime2_s2*(y[1]**4/(J_s2**4+y[1]**4))-k_i*y[2]+k_o*y[3]-k_prime2_d2*y[2] 59 | y3d = k_i*y[2]-k_o*y[3]-k_d2*y[3] 60 | return [y0d,y1d,y2d,y3d] 61 | 62 | 63 | # to reproduce column2, small DNAdamage 64 | sol = solve_ivp(process,t_span=[0,10],y0=[0,0.19,0.19,0.78]) 65 | tmp1 = sol.y 66 | sol = solve_ivp(process,t_span=[0,30],y0=[2.0,0.17,0.158,0.704]) 67 | tmp2 = sol.y 68 | 69 | result = np.concatenate([tmp1,tmp2],axis=1) 70 | 71 | fig,ax = plt.subplots() 72 | ax.set_xlim(0,40) 73 | ax.set_ylim(0,2.0) 74 | ax.set_xticks([0,10,20,30,40]) 75 | ax.set_xticklabels(['40','50','60','70','80']) 76 | ax.plot(np.arange(result.shape[1]),result[1,:],linestyle='-',color='k') 77 | ax.plot(np.arange(result.shape[1]),result[3,:],linestyle='--',color='k') 78 | ax1 = ax.twinx() 79 | ax1.set_ylim(-0.5,2.5) 80 | ax1.plot(np.arange(result.shape[1]),result[0,:],linestyle='-',color='grey') 81 | handles = [] 82 | handles.append(mlines.Line2D([],[],linestyle='-',color='k')) 83 | handles.append(mlines.Line2D([],[],linestyle='--',color='k')) 84 | handles.append(mlines.Line2D([],[],linestyle='-',color='grey')) 85 | ax.legend(handles=handles,labels=['p53','Mdm2_nuc','DNAdamage']) 86 | ax.set_xlabel('Time',weight='bold') 87 | ax.set_ylabel('Protein concentration',weight='bold') 88 | plt.savefig('/Users/ligk2e/Desktop/ODE/final/exam1_small_damage.pdf',bbox_inches='tight') 89 | plt.close() 90 | 91 | # to reproduce larger DNAdamage 92 | sol = solve_ivp(process,t_span=[0,10],y0=[0,0.19,0.19,0.78]) 93 | tmp1 = sol.y 94 | sol = solve_ivp(process,t_span=[0,90],y0=[5.0,0.17,0.158,0.704]) 95 | tmp2 = sol.y 96 | 97 | result = np.concatenate([tmp1,tmp2],axis=1) 98 | 99 | fig,ax = plt.subplots() 100 | ax.set_xlim(0,100) 101 | ax.set_ylim(0,2.0) 102 | ax.set_xticks([0,20,40,60,80,100]) 103 | ax.set_xticklabels(['40','60','80','100','120','140']) 104 | ax.plot(np.arange(result.shape[1]),result[1,:],linestyle='-',color='k') 105 | ax.plot(np.arange(result.shape[1]),result[3,:],linestyle='--',color='k') 106 | ax1 = ax.twinx() 107 | ax1.set_ylim(-0.5,6) 108 | ax1.plot(np.arange(result.shape[1]),result[0,:],linestyle='-',color='grey') 109 | handles = [] 110 | handles.append(mlines.Line2D([],[],linestyle='-',color='k')) 111 | handles.append(mlines.Line2D([],[],linestyle='--',color='k')) 112 | handles.append(mlines.Line2D([],[],linestyle='-',color='grey')) 113 | ax.legend(handles=handles,labels=['p53','Mdm2_nuc','DNAdamage']) 114 | ax.set_xlabel('Time',weight='bold') 115 | ax.set_ylabel('Protein concentration',weight='bold') 116 | plt.savefig('/Users/ligk2e/Desktop/ODE/final/exam1_large_damage.pdf',bbox_inches='tight') 117 | plt.close() -------------------------------------------------------------------------------- /tricks/1_tuple_index.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Tuple index is a specialized way to access certain number of disjoint entries in the 2D array" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "We know there are several ways to index a ndarray:\n", 15 | "1. basic index. (integer or slicing object)\n", 16 | "2. advanced index. (pass an array, either list of ndarray, not tuple)\n", 17 | "3. field index. (for structured and record array)\n", 18 | "4. boolean accessing\n", 19 | "\n", 20 | "Now we introduce an convenient way to batch assign/update certain entries in a 2D array" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 1, 26 | "metadata": {}, 27 | "outputs": [], 28 | "source": [ 29 | "import numpy as np" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": 2, 35 | "metadata": {}, 36 | "outputs": [], 37 | "source": [ 38 | "test = np.random.randn(4,5)" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 3, 44 | "metadata": {}, 45 | "outputs": [ 46 | { 47 | "data": { 48 | "text/plain": [ 49 | "array([[ 0.27857494, -1.00332675, -1.2273951 , -0.27148046, -0.57063863],\n", 50 | " [ 0.70390151, -0.02991058, 0.99940696, 0.59763233, -0.01773857],\n", 51 | " [-0.47215608, -0.07363405, -0.10947593, -0.71048801, 1.21208119],\n", 52 | " [ 0.88613458, 0.60891323, -0.61609823, 0.5813559 , 1.47086536]])" 53 | ] 54 | }, 55 | "execution_count": 3, 56 | "metadata": {}, 57 | "output_type": "execute_result" 58 | } 59 | ], 60 | "source": [ 61 | "test" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": 4, 67 | "metadata": {}, 68 | "outputs": [], 69 | "source": [ 70 | "test[(1,2),(2,3)] = (55,55)" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": 5, 76 | "metadata": {}, 77 | "outputs": [ 78 | { 79 | "data": { 80 | "text/plain": [ 81 | "array([[ 2.78574936e-01, -1.00332675e+00, -1.22739510e+00,\n", 82 | " -2.71480460e-01, -5.70638628e-01],\n", 83 | " [ 7.03901508e-01, -2.99105795e-02, 5.50000000e+01,\n", 84 | " 5.97632331e-01, -1.77385688e-02],\n", 85 | " [-4.72156078e-01, -7.36340527e-02, -1.09475933e-01,\n", 86 | " 5.50000000e+01, 1.21208119e+00],\n", 87 | " [ 8.86134583e-01, 6.08913230e-01, -6.16098230e-01,\n", 88 | " 5.81355901e-01, 1.47086536e+00]])" 89 | ] 90 | }, 91 | "execution_count": 5, 92 | "metadata": {}, 93 | "output_type": "execute_result" 94 | } 95 | ], 96 | "source": [ 97 | "test" 98 | ] 99 | }, 100 | { 101 | "cell_type": "markdown", 102 | "metadata": {}, 103 | "source": [ 104 | "We change the (1,2), which is second row, third column item,\n", 105 | "and (2,3), which is third row, fourth column item to 55" 106 | ] 107 | }, 108 | { 109 | "cell_type": "markdown", 110 | "metadata": {}, 111 | "source": [ 112 | "This is called tuple index!" 113 | ] 114 | }, 115 | { 116 | "cell_type": "code", 117 | "execution_count": null, 118 | "metadata": {}, 119 | "outputs": [], 120 | "source": [] 121 | } 122 | ], 123 | "metadata": { 124 | "kernelspec": { 125 | "display_name": "Python 3", 126 | "language": "python", 127 | "name": "python3" 128 | }, 129 | "language_info": { 130 | "codemirror_mode": { 131 | "name": "ipython", 132 | "version": 3 133 | }, 134 | "file_extension": ".py", 135 | "mimetype": "text/x-python", 136 | "name": "python", 137 | "nbconvert_exporter": "python", 138 | "pygments_lexer": "ipython3", 139 | "version": "3.6.12" 140 | } 141 | }, 142 | "nbformat": 4, 143 | "nbformat_minor": 4 144 | } 145 | -------------------------------------------------------------------------------- /tricks/2_nan.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 3, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import numpy as np\n", 10 | "import pandas as pd" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "metadata": {}, 16 | "source": [ 17 | "There are several types of 'nan' value in python, I was very confused about that, and I asked this question on StackOverflow: https://stackoverflow.com/questions/65895670/different-kinds-of-nan-in-python?noredirect=1#comment116508499_65895670 Thanks a lot for people who helps me out!\n", 18 | "\n", 19 | "Now, I want to summarize what I learned:" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 4, 25 | "metadata": {}, 26 | "outputs": [], 27 | "source": [ 28 | "a = float('nan')\n", 29 | "b = np.float64('nan')\n", 30 | "c = np.float32('nan')\n", 31 | "d = np.nan\n", 32 | "e = pd.NA\n" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": 5, 38 | "metadata": {}, 39 | "outputs": [ 40 | { 41 | "name": "stdout", 42 | "output_type": "stream", 43 | "text": [ 44 | "\n", 45 | "\n", 46 | "\n", 47 | "\n", 48 | "\n" 49 | ] 50 | } 51 | ], 52 | "source": [ 53 | "# let's check their types\n", 54 | "print(type(a))\n", 55 | "print(type(b))\n", 56 | "print(type(c))\n", 57 | "print(type(d))\n", 58 | "print(type(e))" 59 | ] 60 | }, 61 | { 62 | "cell_type": "markdown", 63 | "metadata": {}, 64 | "source": [ 65 | "### np.nan is a reference to float('nan'), it is differnt from np.float64('nan')" 66 | ] 67 | }, 68 | { 69 | "cell_type": "markdown", 70 | "metadata": {}, 71 | "source": [ 72 | "We can convert np.float64('nan') to np.nan by:" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": 6, 78 | "metadata": {}, 79 | "outputs": [ 80 | { 81 | "data": { 82 | "text/plain": [ 83 | "nan" 84 | ] 85 | }, 86 | "execution_count": 6, 87 | "metadata": {}, 88 | "output_type": "execute_result" 89 | } 90 | ], 91 | "source": [ 92 | "float(np.float('nan'))" 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "execution_count": 7, 98 | "metadata": {}, 99 | "outputs": [ 100 | { 101 | "data": { 102 | "text/plain": [ 103 | "float" 104 | ] 105 | }, 106 | "execution_count": 7, 107 | "metadata": {}, 108 | "output_type": "execute_result" 109 | } 110 | ], 111 | "source": [ 112 | "type(float(np.float('nan')))" 113 | ] 114 | }, 115 | { 116 | "cell_type": "markdown", 117 | "metadata": {}, 118 | "source": [ 119 | "# nan != nan" 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": 8, 125 | "metadata": {}, 126 | "outputs": [ 127 | { 128 | "data": { 129 | "text/plain": [ 130 | "False" 131 | ] 132 | }, 133 | "execution_count": 8, 134 | "metadata": {}, 135 | "output_type": "execute_result" 136 | } 137 | ], 138 | "source": [ 139 | "a == d" 140 | ] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "execution_count": null, 145 | "metadata": {}, 146 | "outputs": [], 147 | "source": [] 148 | } 149 | ], 150 | "metadata": { 151 | "kernelspec": { 152 | "display_name": "Python 3", 153 | "language": "python", 154 | "name": "python3" 155 | }, 156 | "language_info": { 157 | "codemirror_mode": { 158 | "name": "ipython", 159 | "version": 3 160 | }, 161 | "file_extension": ".py", 162 | "mimetype": "text/x-python", 163 | "name": "python", 164 | "nbconvert_exporter": "python", 165 | "pygments_lexer": "ipython3", 166 | "version": "3.6.12" 167 | } 168 | }, 169 | "nbformat": 4, 170 | "nbformat_minor": 4 171 | } 172 | -------------------------------------------------------------------------------- /tricks/6_sorting.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 3, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import numpy as np" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "For an array: x = np.array([2,8,3,3.1,5])\n", 17 | "\n", 18 | "sorting, by default, is acscending.\n", 19 | "\n", 20 | "1. sorted array: [2,3,3.1,5,8]\n", 21 | "2. rank array (1-basd): [1,5,2,3,4]\n", 22 | "3. index array with which you can get the sorted array, meaning, which one in the orginal array is the smallest one, second smallest one,...: [0,2,3,4,1]\n", 23 | "4. inverse index array with which you can recover the original array from sorted array, meaning, which one in the sorted array is the first one in orginal array, which is the second one in original array... : [0,4,1,2,3]" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": 8, 29 | "metadata": {}, 30 | "outputs": [ 31 | { 32 | "data": { 33 | "text/plain": [ 34 | "array([2. , 8. , 3. , 3.1, 5. ])" 35 | ] 36 | }, 37 | "execution_count": 8, 38 | "metadata": {}, 39 | "output_type": "execute_result" 40 | } 41 | ], 42 | "source": [ 43 | "# original array\n", 44 | "x = np.array([2,8,3,3.1,5])\n", 45 | "x" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 9, 51 | "metadata": {}, 52 | "outputs": [ 53 | { 54 | "data": { 55 | "text/plain": [ 56 | "array([2. , 3. , 3.1, 5. , 8. ])" 57 | ] 58 | }, 59 | "execution_count": 9, 60 | "metadata": {}, 61 | "output_type": "execute_result" 62 | } 63 | ], 64 | "source": [ 65 | "# sorted array\n", 66 | "sorted_x = np.sort(x)\n", 67 | "sorted_x" 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "execution_count": 10, 73 | "metadata": {}, 74 | "outputs": [ 75 | { 76 | "data": { 77 | "text/plain": [ 78 | "array([1, 5, 2, 3, 4])" 79 | ] 80 | }, 81 | "execution_count": 10, 82 | "metadata": {}, 83 | "output_type": "execute_result" 84 | } 85 | ], 86 | "source": [ 87 | "# rank array\n", 88 | "from scipy.stats import rankdata\n", 89 | "rankdata(x,method='max') # max,min,average, dense, see the documentation" 90 | ] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": 7, 95 | "metadata": {}, 96 | "outputs": [ 97 | { 98 | "data": { 99 | "text/plain": [ 100 | "array([0, 2, 3, 4, 1])" 101 | ] 102 | }, 103 | "execution_count": 7, 104 | "metadata": {}, 105 | "output_type": "execute_result" 106 | } 107 | ], 108 | "source": [ 109 | "# index array\n", 110 | "x_argsort1 = np.argsort(x)\n", 111 | "x_argsort1" 112 | ] 113 | }, 114 | { 115 | "cell_type": "code", 116 | "execution_count": 11, 117 | "metadata": {}, 118 | "outputs": [ 119 | { 120 | "data": { 121 | "text/plain": [ 122 | "array([0, 4, 1, 2, 3])" 123 | ] 124 | }, 125 | "execution_count": 11, 126 | "metadata": {}, 127 | "output_type": "execute_result" 128 | } 129 | ], 130 | "source": [ 131 | "# inverse index array\n", 132 | "x_argsort2 = np.argsort(x_argsort1)\n", 133 | "x_argsort2" 134 | ] 135 | }, 136 | { 137 | "cell_type": "markdown", 138 | "metadata": {}, 139 | "source": [ 140 | "## tricks:\n", 141 | "\n", 142 | "1. inverse index array can connect any sorted-array associated length-matched information back to original array (opposite for index array)\n", 143 | "2. cumsum and diff function can be very handy\n", 144 | "3. remainder and quotien can also be handy sometime" 145 | ] 146 | }, 147 | { 148 | "cell_type": "code", 149 | "execution_count": null, 150 | "metadata": {}, 151 | "outputs": [], 152 | "source": [] 153 | } 154 | ], 155 | "metadata": { 156 | "kernelspec": { 157 | "display_name": "Python 3", 158 | "language": "python", 159 | "name": "python3" 160 | }, 161 | "language_info": { 162 | "codemirror_mode": { 163 | "name": "ipython", 164 | "version": 3 165 | }, 166 | "file_extension": ".py", 167 | "mimetype": "text/x-python", 168 | "name": "python", 169 | "nbconvert_exporter": "python", 170 | "pygments_lexer": "ipython3", 171 | "version": "3.6.12" 172 | } 173 | }, 174 | "nbformat": 4, 175 | "nbformat_minor": 4 176 | } 177 | -------------------------------------------------------------------------------- /ODE/final/exam2.py: -------------------------------------------------------------------------------- 1 | from scipy.integrate import solve_ivp 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | import matplotlib.lines as mlines 5 | 6 | ''' 7 | Let's set 8 | DNAdamage as y[0] 9 | p53_active as y[1] 10 | p53_inactive as y[2] 11 | Mdm2_cyt as y[3] 12 | Mdm2_nuc as y[4] 13 | ''' 14 | 15 | # parameters 16 | k_prime_d53 = 0.3 17 | k_prime2_d53 = 8 18 | theta = 0.8 19 | k_prime_d2 = 0.1 20 | k_prime_s53 = 0.6 21 | k_inactivation = 0.1 22 | k_prime_activation = 0.2 23 | k_prime2_activation = 5 24 | J_activation = 0.2 25 | k_prime_s2 = 0.2 26 | k_prime2_s2 = 3 27 | J_s2 = 0.7 28 | k_i = 0.4 29 | k_o = 0.05 30 | k_prime2_d2 = 0.7 31 | k_repair = 0.05 32 | J1 = 0.1 33 | J2 = 0.1 34 | 35 | #helper function 36 | def Goldbeter_koshland(u,v,q,r): 37 | a = 2*u*r 38 | b = (v-u+v*q+u*r)**2 39 | c = 4*u*r*(v-u) 40 | d = np.sqrt(b-c) 41 | e = v-u+v*q+u*r+d 42 | return a/e 43 | 44 | def heavisible(x): 45 | if x > 0: 46 | return 1 47 | else: 48 | return 0 49 | 50 | def process(t,y): 51 | ''' 52 | y[0]: DNAdamage 53 | y[1]: p53_active 54 | y[2]: p53_inactive 55 | y[3]: Mdm2_cyt 56 | y[4]: Mdm2_nuc 57 | ''' 58 | p53_total = y[1]+y[2] 59 | k_activation = k_prime_activation+k_prime2_activation*(y[1]**3/(J_activation**3+y[1]**3)) 60 | y0d = -k_repair*heavisible(y[0]) 61 | k_d2 = k_prime_d2*(1+y[0]) 62 | k_d53 = k_prime_d53+k_prime2_d53*Goldbeter_koshland(y[4],theta,J1/p53_total,J2/p53_total) 63 | y1d = k_activation*y[2]-k_inactivation*y[1]-k_d53*y[1] 64 | y2d = k_prime_s53-k_activation*y[2]+k_inactivation*y[1]-k_d53*y[2] 65 | y3d = k_prime_s2+k_prime2_s2*(y[1]**3/(J_s2**3+y[1]**3))-k_i*y[3]+k_o*y[4]-k_prime_d2*y[3] 66 | y4d = k_i*y[3]-k_o*y[4]-k_d2*y[4] 67 | return [y0d,y1d,y2d,y3d,y4d] 68 | 69 | sol = solve_ivp(process,t_span=(0,50),y0=[0,0.01,0.19,0.21,0.55]) 70 | tmp1 = sol.y 71 | sol = solve_ivp(process,t_span=(0,50),y0=[1,0.003,0.097,0.544,1.447]) 72 | tmp2 = sol.y 73 | 74 | result = np.concatenate([tmp1,tmp2],axis=1) 75 | 76 | fig,ax = plt.subplots() 77 | ax.plot(np.arange(result.shape[1]),result[2,:],linestyle='-',color='k') # p53_inactive 78 | ax.plot(np.arange(result.shape[1]),result[4,:],linestyle='--',color='k') # Mdm2_nuc 79 | ax1 = ax.twinx() 80 | ax1.plot(np.arange(result.shape[1]),result[0,:],linestyle='-',color='grey') # DNAdamage 81 | handles = [] 82 | handles.append(mlines.Line2D([],[],linestyle='-',color='k')) 83 | handles.append(mlines.Line2D([],[],linestyle='--',color='k')) 84 | handles.append(mlines.Line2D([],[],linestyle='-',color='grey')) 85 | ax.legend(handles=handles,labels=['p53_inactive','Mdm2_nuc','DNAdamage']) 86 | ax.set_xlabel('Time',weight='bold') 87 | ax.set_ylabel('Protein concentration',weight='bold') 88 | ax.set_xticks([]) 89 | plt.savefig('/Users/ligk2e/Desktop/ODE/final/exam2_small_damage.pdf',bbox_inches='tight') 90 | plt.close() 91 | 92 | 93 | # larger damage 94 | sol = solve_ivp(process,t_span=(0,50),y0=[0,0.01,0.19,0.21,0.55]) 95 | tmp1 = sol.y 96 | sol = solve_ivp(process,t_span=(0,50),y0=[2,0.003,0.097,0.544,1.447]) 97 | tmp2 = sol.y 98 | 99 | result = np.concatenate([tmp1,tmp2],axis=1) 100 | 101 | fig,ax = plt.subplots() 102 | ax.plot(np.arange(result.shape[1]),result[2,:],linestyle='-',color='k') # p53_inactive 103 | ax.plot(np.arange(result.shape[1]),result[4,:],linestyle='--',color='k') # Mdm2_nuc 104 | ax1 = ax.twinx() 105 | ax1.plot(np.arange(result.shape[1]),result[0,:],linestyle='-',color='grey') # DNAdamage 106 | handles = [] 107 | handles.append(mlines.Line2D([],[],linestyle='-',color='k')) 108 | handles.append(mlines.Line2D([],[],linestyle='--',color='k')) 109 | handles.append(mlines.Line2D([],[],linestyle='-',color='grey')) 110 | ax.legend(handles=handles,labels=['p53_inactive','Mdm2_nuc','DNAdamage']) 111 | ax.set_xlabel('Time',weight='bold') 112 | ax.set_ylabel('Protein concentration',weight='bold') 113 | ax.set_xticks([]) 114 | plt.savefig('/Users/ligk2e/Desktop/ODE/final/exam2_large_damage.pdf',bbox_inches='tight') 115 | plt.close() 116 | 117 | 118 | 119 | 120 | fig,ax = plt.subplots() 121 | ax.plot(np.arange(result.shape[1]),result[1,:],linestyle='-',color='k') 122 | ax.plot(np.arange(result.shape[1]),result[3,:],linestyle='--',color='k') 123 | ax1 = ax.twinx() 124 | ax1.plot(np.arange(result.shape[1]),result[0,:],linestyle='-',color='grey') 125 | handles = [] 126 | handles.append(mlines.Line2D([],[],linestyle='-',color='k')) 127 | handles.append(mlines.Line2D([],[],linestyle='--',color='k')) 128 | handles.append(mlines.Line2D([],[],linestyle='-',color='grey')) 129 | ax.legend(handles=handles,labels=['p53','Mdm2_nuc','DNAdamage']) 130 | ax.set_xlabel('Time',weight='bold') 131 | ax.set_ylabel('Protein concentration',weight='bold') 132 | plt.savefig('/Users/ligk2e/Desktop/ODE/final/exam1_small_damage.pdf',bbox_inches='tight') 133 | plt.close() 134 | 135 | -------------------------------------------------------------------------------- /tricks/4_arg_sort.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# This notebook i to illustrate:\n", 8 | "\n", 9 | "1. argsort\n", 10 | "2. argwhere\n", 11 | "3. argmin\n", 12 | "4. argmax" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 1, 18 | "metadata": {}, 19 | "outputs": [], 20 | "source": [ 21 | "import numpy as np" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": 2, 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [ 30 | "a = np.array([3,4,5,6,1,2])\n", 31 | "b = np.array([[3,4,5,6],\n", 32 | " [1,9,4,8]])" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": 3, 38 | "metadata": {}, 39 | "outputs": [ 40 | { 41 | "data": { 42 | "text/plain": [ 43 | "array([4, 5, 0, 1, 2, 3])" 44 | ] 45 | }, 46 | "execution_count": 3, 47 | "metadata": {}, 48 | "output_type": "execute_result" 49 | } 50 | ], 51 | "source": [ 52 | "# argsort, the index array should be the same dimension as original array\n", 53 | "np.argsort(a)" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": 4, 59 | "metadata": {}, 60 | "outputs": [ 61 | { 62 | "data": { 63 | "text/plain": [ 64 | "array([[0, 1, 2, 3],\n", 65 | " [0, 2, 3, 1]])" 66 | ] 67 | }, 68 | "execution_count": 4, 69 | "metadata": {}, 70 | "output_type": "execute_result" 71 | } 72 | ], 73 | "source": [ 74 | "np.argsort(b,axis=1)" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": 6, 80 | "metadata": {}, 81 | "outputs": [ 82 | { 83 | "data": { 84 | "text/plain": [ 85 | "array([[1]])" 86 | ] 87 | }, 88 | "execution_count": 6, 89 | "metadata": {}, 90 | "output_type": "execute_result" 91 | } 92 | ], 93 | "source": [ 94 | "# argwhere\n", 95 | "np.argwhere(a==4)" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": 8, 101 | "metadata": {}, 102 | "outputs": [ 103 | { 104 | "data": { 105 | "text/plain": [ 106 | "array([[0, 1],\n", 107 | " [1, 2]])" 108 | ] 109 | }, 110 | "execution_count": 8, 111 | "metadata": {}, 112 | "output_type": "execute_result" 113 | } 114 | ], 115 | "source": [ 116 | "np.argwhere(b==4)" 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "execution_count": 9, 122 | "metadata": {}, 123 | "outputs": [ 124 | { 125 | "data": { 126 | "text/plain": [ 127 | "4" 128 | ] 129 | }, 130 | "execution_count": 9, 131 | "metadata": {}, 132 | "output_type": "execute_result" 133 | } 134 | ], 135 | "source": [ 136 | "# argmin\n", 137 | "np.argmin(a)" 138 | ] 139 | }, 140 | { 141 | "cell_type": "code", 142 | "execution_count": 10, 143 | "metadata": {}, 144 | "outputs": [ 145 | { 146 | "data": { 147 | "text/plain": [ 148 | "array([0, 0])" 149 | ] 150 | }, 151 | "execution_count": 10, 152 | "metadata": {}, 153 | "output_type": "execute_result" 154 | } 155 | ], 156 | "source": [ 157 | "np.argmin(b,axis=1)" 158 | ] 159 | }, 160 | { 161 | "cell_type": "code", 162 | "execution_count": 11, 163 | "metadata": {}, 164 | "outputs": [ 165 | { 166 | "data": { 167 | "text/plain": [ 168 | "3" 169 | ] 170 | }, 171 | "execution_count": 11, 172 | "metadata": {}, 173 | "output_type": "execute_result" 174 | } 175 | ], 176 | "source": [ 177 | "# argmax\n", 178 | "np.argmax(a)" 179 | ] 180 | }, 181 | { 182 | "cell_type": "code", 183 | "execution_count": 12, 184 | "metadata": {}, 185 | "outputs": [ 186 | { 187 | "data": { 188 | "text/plain": [ 189 | "array([3, 1])" 190 | ] 191 | }, 192 | "execution_count": 12, 193 | "metadata": {}, 194 | "output_type": "execute_result" 195 | } 196 | ], 197 | "source": [ 198 | "np.argmax(b,axis=1)" 199 | ] 200 | }, 201 | { 202 | "cell_type": "code", 203 | "execution_count": null, 204 | "metadata": {}, 205 | "outputs": [], 206 | "source": [] 207 | } 208 | ], 209 | "metadata": { 210 | "kernelspec": { 211 | "display_name": "Python 3", 212 | "language": "python", 213 | "name": "python3" 214 | }, 215 | "language_info": { 216 | "codemirror_mode": { 217 | "name": "ipython", 218 | "version": 3 219 | }, 220 | "file_extension": ".py", 221 | "mimetype": "text/x-python", 222 | "name": "python", 223 | "nbconvert_exporter": "python", 224 | "pygments_lexer": "ipython3", 225 | "version": "3.6.12" 226 | } 227 | }, 228 | "nbformat": 4, 229 | "nbformat_minor": 4 230 | } 231 | -------------------------------------------------------------------------------- /tricks/8_parallelization.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Parallelization in python scientific computing" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 2, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import multiprocessing as mp" 17 | ] 18 | }, 19 | { 20 | "cell_type": "markdown", 21 | "metadata": {}, 22 | "source": [ 23 | "- In Linux system: it forks the sub processes, meaning each sub process will inheret the current state of parent program, they do have a 4GB argument restriction, but other than that, it is pretty flexible since no much pickling process got involved. The inherent parental variable is \"copy-on-modify\"\n", 24 | "\n", 25 | "- In other system, it spawns sub processes, so a new interpreter gets launched, quite a lot pickling process involved, so like file IO, generating figures can not be parallelized, better to have a clear input and output in the enqueue and dequeue." 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": 3, 31 | "metadata": {}, 32 | "outputs": [], 33 | "source": [ 34 | "def func(x):\n", 35 | " return x+1" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": 20, 41 | "metadata": {}, 42 | "outputs": [], 43 | "source": [ 44 | "querys = [1,2,3,4]" 45 | ] 46 | }, 47 | { 48 | "cell_type": "markdown", 49 | "metadata": {}, 50 | "source": [ 51 | "1. pool.map will take the iteratble, make them a list, divide list to chunk and finally picklize the chunk to each sub-process (worker).\n", 52 | "2. it runs out of order, but output will preserve the order, faster subprocess will have to wait slower one\n", 53 | "3. result is just a list\n", 54 | "4. result won't immediately return" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": 5, 60 | "metadata": {}, 61 | "outputs": [ 62 | { 63 | "name": "stdout", 64 | "output_type": "stream", 65 | "text": [ 66 | "[2, 3, 4, 5]\n" 67 | ] 68 | } 69 | ], 70 | "source": [ 71 | "pool = mp.Pool(processes=mp.cpu_count())\n", 72 | "r = pool.map(func,querys)\n", 73 | "print(r)" 74 | ] 75 | }, 76 | { 77 | "cell_type": "markdown", 78 | "metadata": {}, 79 | "source": [ 80 | "1. pool.map_async will immediately return AsyncResult object and main program can proceed, but can not actually call AsyncResult.get() until all the subprocesses finish, so again, it preserve the order\n", 81 | "2. you can mannually call r.wait() or pool.close() + pool.join() to make sure main program won't go until all workers finish. r is the AsyncResult obejct, but pool is the Pool object, Pool.close() instruct Pool to not take any new jobs, Pool.join() to instruct Pool to wait until all subprocesses have finished" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": 6, 87 | "metadata": {}, 88 | "outputs": [ 89 | { 90 | "name": "stdout", 91 | "output_type": "stream", 92 | "text": [ 93 | "[2, 3, 4, 5]\n" 94 | ] 95 | } 96 | ], 97 | "source": [ 98 | "pool = mp.Pool(processes=mp.cpu_count())\n", 99 | "r = pool.map_async(func,querys)\n", 100 | "print(r.get())" 101 | ] 102 | }, 103 | { 104 | "cell_type": "markdown", 105 | "metadata": {}, 106 | "source": [ 107 | "1. pool.imap will not make them a list and divide to chunk, it will just take one from the queue, send to subprocess, and one after the another\n", 108 | "2. it will preserve the order as well" 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": 9, 114 | "metadata": {}, 115 | "outputs": [ 116 | { 117 | "name": "stdout", 118 | "output_type": "stream", 119 | "text": [ 120 | "2\n", 121 | "3\n", 122 | "4\n", 123 | "5\n" 124 | ] 125 | } 126 | ], 127 | "source": [ 128 | "pool = mp.Pool(processes=mp.cpu_count())\n", 129 | "r = pool.imap(func,querys)\n", 130 | "for item in r:\n", 131 | " print(item)" 132 | ] 133 | }, 134 | { 135 | "cell_type": "markdown", 136 | "metadata": {}, 137 | "source": [ 138 | "1. pool.imap_unordered will not preserve the order, return the result immediately, and you can access it, main program will continue" 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "execution_count": 12, 144 | "metadata": {}, 145 | "outputs": [ 146 | { 147 | "name": "stdout", 148 | "output_type": "stream", 149 | "text": [ 150 | "2\n", 151 | "3\n", 152 | "4\n", 153 | "5\n" 154 | ] 155 | } 156 | ], 157 | "source": [ 158 | "pool = mp.Pool(processes=mp.cpu_count())\n", 159 | "r = pool.imap_unordered(func,querys)\n", 160 | "for item in r:\n", 161 | " print(item)" 162 | ] 163 | }, 164 | { 165 | "cell_type": "markdown", 166 | "metadata": {}, 167 | "source": [ 168 | "1. pool.apply just can take additional argument\n", 169 | "2. pool.apply_async, the same as pool.map_async" 170 | ] 171 | }, 172 | { 173 | "cell_type": "code", 174 | "execution_count": 14, 175 | "metadata": {}, 176 | "outputs": [], 177 | "source": [ 178 | "def func_apply(x,y):\n", 179 | " return x+y" 180 | ] 181 | }, 182 | { 183 | "cell_type": "code", 184 | "execution_count": 15, 185 | "metadata": {}, 186 | "outputs": [ 187 | { 188 | "name": "stdout", 189 | "output_type": "stream", 190 | "text": [ 191 | "[6, 7, 8, 9]\n" 192 | ] 193 | } 194 | ], 195 | "source": [ 196 | "y = 5\n", 197 | "pool = mp.Pool(processes=mp.cpu_count())\n", 198 | "r = [pool.apply(func_apply,args=(x,y)) for x in querys]\n", 199 | "print(r)" 200 | ] 201 | }, 202 | { 203 | "cell_type": "code", 204 | "execution_count": 19, 205 | "metadata": {}, 206 | "outputs": [ 207 | { 208 | "name": "stdout", 209 | "output_type": "stream", 210 | "text": [ 211 | "6\n", 212 | "7\n", 213 | "8\n", 214 | "9\n" 215 | ] 216 | } 217 | ], 218 | "source": [ 219 | "y = 5\n", 220 | "pool = mp.Pool(processes=mp.cpu_count())\n", 221 | "r = [pool.apply_async(func_apply,args=(x,y)) for x in querys]\n", 222 | "for item in r:\n", 223 | " print(item.get())" 224 | ] 225 | }, 226 | { 227 | "cell_type": "code", 228 | "execution_count": null, 229 | "metadata": {}, 230 | "outputs": [], 231 | "source": [] 232 | } 233 | ], 234 | "metadata": { 235 | "kernelspec": { 236 | "display_name": "Python 3", 237 | "language": "python", 238 | "name": "python3" 239 | }, 240 | "language_info": { 241 | "codemirror_mode": { 242 | "name": "ipython", 243 | "version": 3 244 | }, 245 | "file_extension": ".py", 246 | "mimetype": "text/x-python", 247 | "name": "python", 248 | "nbconvert_exporter": "python", 249 | "pygments_lexer": "ipython3", 250 | "version": "3.6.13" 251 | } 252 | }, 253 | "nbformat": 4, 254 | "nbformat_minor": 4 255 | } 256 | -------------------------------------------------------------------------------- /tricks/5_random.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# This notebok is for illustrating random module in numpy and random package\n", 8 | "\n", 9 | "1. only two commands in random package can generate multiple instances, sample and choices, former is without replacement, latter is with replacement. Others are all just generating single value. But still need to know of them, because they are widely used.\n", 10 | "\n", 11 | "2. vanilla random module can still be useful when the sample is just an iterable, instead of a well-organized numpy array.\n", 12 | "\n", 13 | "3. np.random.choice() have argument to control replace or not, every np.random should be able to define size" 14 | ] 15 | }, 16 | { 17 | "cell_type": "markdown", 18 | "metadata": {}, 19 | "source": [ 20 | "## In numpy.random\n", 21 | "1. np.random.rand()\n", 22 | "2. np.random.randn()\n", 23 | "3. np.random.choice()\n", 24 | "4. np.random.randint()\n", 25 | "5. np.random.seed()\n", 26 | "6. np.random.shuffle()" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": 1, 32 | "metadata": {}, 33 | "outputs": [], 34 | "source": [ 35 | "import numpy as np\n", 36 | "import random" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": 3, 42 | "metadata": {}, 43 | "outputs": [ 44 | { 45 | "data": { 46 | "text/plain": [ 47 | "array([[0.21223688, 0.71417717, 0.5689742 ],\n", 48 | " [0.57024937, 0.49583009, 0.62827653]])" 49 | ] 50 | }, 51 | "execution_count": 3, 52 | "metadata": {}, 53 | "output_type": "execute_result" 54 | } 55 | ], 56 | "source": [ 57 | "np.random.rand(2,3) # need a sequence instead of list, different from np.empty" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": 4, 63 | "metadata": {}, 64 | "outputs": [ 65 | { 66 | "data": { 67 | "text/plain": [ 68 | "array([[ 1.24684682, -1.46491864, 0.67243279],\n", 69 | " [-1.69646683, -0.10200022, 1.01922968]])" 70 | ] 71 | }, 72 | "execution_count": 4, 73 | "metadata": {}, 74 | "output_type": "execute_result" 75 | } 76 | ], 77 | "source": [ 78 | "np.random.randn(2,3)" 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": 6, 84 | "metadata": {}, 85 | "outputs": [ 86 | { 87 | "data": { 88 | "text/plain": [ 89 | "array([4, 5])" 90 | ] 91 | }, 92 | "execution_count": 6, 93 | "metadata": {}, 94 | "output_type": "execute_result" 95 | } 96 | ], 97 | "source": [ 98 | "np.random.choice([4,5,6,7],size=2,replace=False)" 99 | ] 100 | }, 101 | { 102 | "cell_type": "code", 103 | "execution_count": 7, 104 | "metadata": {}, 105 | "outputs": [ 106 | { 107 | "data": { 108 | "text/plain": [ 109 | "array([[2, 2, 1],\n", 110 | " [8, 9, 5]])" 111 | ] 112 | }, 113 | "execution_count": 7, 114 | "metadata": {}, 115 | "output_type": "execute_result" 116 | } 117 | ], 118 | "source": [ 119 | "np.random.randint(low=1,high=10,size=(2,3))" 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": 8, 125 | "metadata": {}, 126 | "outputs": [], 127 | "source": [ 128 | "np.random.seed(42)" 129 | ] 130 | }, 131 | { 132 | "cell_type": "markdown", 133 | "metadata": {}, 134 | "source": [ 135 | "## In random package\n", 136 | "1. random.seed()\n", 137 | "2. random.randrange()\n", 138 | "3. random.randint()\n", 139 | "4. random.choice()\n", 140 | "5. random.choices()\n", 141 | "6. random.shuffle()\n", 142 | "7. random.sample()\n", 143 | "8. random.random()\n", 144 | "9. random.uniform()\n", 145 | "10. random.gauss()" 146 | ] 147 | }, 148 | { 149 | "cell_type": "code", 150 | "execution_count": 12, 151 | "metadata": {}, 152 | "outputs": [], 153 | "source": [ 154 | "random.seed(40)" 155 | ] 156 | }, 157 | { 158 | "cell_type": "code", 159 | "execution_count": 13, 160 | "metadata": {}, 161 | "outputs": [ 162 | { 163 | "data": { 164 | "text/plain": [ 165 | "7" 166 | ] 167 | }, 168 | "execution_count": 13, 169 | "metadata": {}, 170 | "output_type": "execute_result" 171 | } 172 | ], 173 | "source": [ 174 | "random.randrange(1,10,2) # return one number " 175 | ] 176 | }, 177 | { 178 | "cell_type": "code", 179 | "execution_count": 14, 180 | "metadata": {}, 181 | "outputs": [ 182 | { 183 | "data": { 184 | "text/plain": [ 185 | "10" 186 | ] 187 | }, 188 | "execution_count": 14, 189 | "metadata": {}, 190 | "output_type": "execute_result" 191 | } 192 | ], 193 | "source": [ 194 | "random.randint(1,10) # return one number" 195 | ] 196 | }, 197 | { 198 | "cell_type": "code", 199 | "execution_count": 15, 200 | "metadata": {}, 201 | "outputs": [ 202 | { 203 | "data": { 204 | "text/plain": [ 205 | "3" 206 | ] 207 | }, 208 | "execution_count": 15, 209 | "metadata": {}, 210 | "output_type": "execute_result" 211 | } 212 | ], 213 | "source": [ 214 | "random.choice([1,2,3])" 215 | ] 216 | }, 217 | { 218 | "cell_type": "code", 219 | "execution_count": 16, 220 | "metadata": {}, 221 | "outputs": [ 222 | { 223 | "data": { 224 | "text/plain": [ 225 | "[4, 5]" 226 | ] 227 | }, 228 | "execution_count": 16, 229 | "metadata": {}, 230 | "output_type": "execute_result" 231 | } 232 | ], 233 | "source": [ 234 | "random.choices([4,5,6,7],k=2) # with replacement" 235 | ] 236 | }, 237 | { 238 | "cell_type": "code", 239 | "execution_count": 18, 240 | "metadata": {}, 241 | "outputs": [ 242 | { 243 | "data": { 244 | "text/plain": [ 245 | "[6, 5, 4]" 246 | ] 247 | }, 248 | "execution_count": 18, 249 | "metadata": {}, 250 | "output_type": "execute_result" 251 | } 252 | ], 253 | "source": [ 254 | "a = [4,5,6]\n", 255 | "random.shuffle(a)\n", 256 | "a" 257 | ] 258 | }, 259 | { 260 | "cell_type": "code", 261 | "execution_count": 19, 262 | "metadata": {}, 263 | "outputs": [ 264 | { 265 | "data": { 266 | "text/plain": [ 267 | "[6, 7]" 268 | ] 269 | }, 270 | "execution_count": 19, 271 | "metadata": {}, 272 | "output_type": "execute_result" 273 | } 274 | ], 275 | "source": [ 276 | "random.sample([4,5,6,7],k=2) # without replacement\n" 277 | ] 278 | }, 279 | { 280 | "cell_type": "code", 281 | "execution_count": 20, 282 | "metadata": {}, 283 | "outputs": [ 284 | { 285 | "data": { 286 | "text/plain": [ 287 | "0.4407622204637107" 288 | ] 289 | }, 290 | "execution_count": 20, 291 | "metadata": {}, 292 | "output_type": "execute_result" 293 | } 294 | ], 295 | "source": [ 296 | "random.random() # uniformly distributed in [0,1]" 297 | ] 298 | }, 299 | { 300 | "cell_type": "code", 301 | "execution_count": 21, 302 | "metadata": {}, 303 | "outputs": [ 304 | { 305 | "data": { 306 | "text/plain": [ 307 | "5.114528105482348" 308 | ] 309 | }, 310 | "execution_count": 21, 311 | "metadata": {}, 312 | "output_type": "execute_result" 313 | } 314 | ], 315 | "source": [ 316 | "random.uniform(5,9) # broader uniform" 317 | ] 318 | }, 319 | { 320 | "cell_type": "code", 321 | "execution_count": 22, 322 | "metadata": {}, 323 | "outputs": [ 324 | { 325 | "data": { 326 | "text/plain": [ 327 | "3.1676717828001544" 328 | ] 329 | }, 330 | "execution_count": 22, 331 | "metadata": {}, 332 | "output_type": "execute_result" 333 | } 334 | ], 335 | "source": [ 336 | "random.gauss(mu=3,sigma=0.4) # normal distribution" 337 | ] 338 | }, 339 | { 340 | "cell_type": "code", 341 | "execution_count": null, 342 | "metadata": {}, 343 | "outputs": [], 344 | "source": [] 345 | } 346 | ], 347 | "metadata": { 348 | "kernelspec": { 349 | "display_name": "Python 3", 350 | "language": "python", 351 | "name": "python3" 352 | }, 353 | "language_info": { 354 | "codemirror_mode": { 355 | "name": "ipython", 356 | "version": 3 357 | }, 358 | "file_extension": ".py", 359 | "mimetype": "text/x-python", 360 | "name": "python", 361 | "nbconvert_exporter": "python", 362 | "pygments_lexer": "ipython3", 363 | "version": "3.6.13" 364 | } 365 | }, 366 | "nbformat": 4, 367 | "nbformat_minor": 4 368 | } 369 | -------------------------------------------------------------------------------- /ODE/codes/script.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from scipy.integrate import odeint 3 | import pandas as pd 4 | import matplotlib.pyplot as plt 5 | 6 | ''' 7 | Figure1A linear signal response 8 | dR/dt = k0 + k1*S - k2*R 9 | ''' 10 | 11 | def model(R,t,S): 12 | k0 = 0.01 13 | k1 = 1 14 | k2 = 5 15 | dRdt = k0 + k1*S - k2*R 16 | return dRdt 17 | 18 | R0 = [0,0.3,0.5] 19 | t = np.linspace(0,1,10) 20 | S = 1 21 | result = odeint(model,R0,t,args=(S,)) 22 | 23 | # solution curve 24 | fig,ax = plt.subplots() 25 | ax.plot(t,result[:,0],label='R0=0') 26 | ax.plot(t,result[:,1],label='R0=0.3') 27 | ax.plot(t,result[:,2],label='R0=0.5') 28 | ax.legend() 29 | ax.set_xlabel('t') 30 | ax.set_ylabel('R') 31 | ax.axhline(y=0.202,xmin=0,xmax=1,linestyle='--',c='k') 32 | plt.savefig('/Users/ligk2e/Desktop/ODE/figure1A_solution_curve.pdf',bbox_inches='tight') 33 | plt.close() 34 | 35 | # rate curve 36 | k0 = 0.01 37 | k1 = 1 38 | k2 = 5 39 | fig,ax = plt.subplots() 40 | S_options = [1,2,3] 41 | for S in S_options: 42 | R = np.linspace(0,1,10) 43 | removal_rate = k2 * R 44 | production_rate = [k1 * S] * len(R) 45 | ax.plot(R,removal_rate,linestyle='-',c='k') 46 | ax.plot(R,production_rate,linestyle='--',c='k') 47 | ax.set_xlim(0,1) 48 | ax.set_ylim(0,6) 49 | ax.set_xlabel('R') 50 | ax.set_ylabel('Rate') 51 | plt.savefig('/Users/ligk2e/Desktop/ODE/figure1A_rate_curve.pdf',bbox_inches='tight') 52 | plt.close() 53 | 54 | # signal-response curve 55 | k0 = 0.01 56 | k1 = 1 57 | k2 = 5 58 | S = np.linspace(0,3,7) 59 | R_ss = (k0 + k1*S) / k2 60 | fig,ax = plt.subplots() 61 | ax.plot(S,R_ss,linestyle='-',c='k') 62 | ax.set_xlim(0,3) 63 | ax.set_ylim(0,0.7) 64 | ax.set_xlabel('Signal(S)') 65 | ax.set_ylabel('Response(R)') 66 | plt.savefig('/Users/ligk2e/Desktop/ODE/figure1A_SR_curve.pdf',bbox_inches='tight') 67 | plt.close() 68 | 69 | ''' 70 | Figure1B, hyperbolic response 71 | dRp/dt = k1*S(Rt-Rp) - k2*Rp 72 | ''' 73 | 74 | def model(y,t,S): 75 | k1 = 1 76 | k2 = 1 77 | Rt = 1 78 | dydt = k1*S*(Rt-y) - k2*y 79 | return dydt 80 | 81 | S = 1 82 | Rp0 = [0,0.5,1] 83 | t = np.linspace(0,1,10) 84 | result = odeint(model,Rp0,t,args=(S,)) 85 | 86 | 87 | # solution curve 88 | fig,ax = plt.subplots() 89 | ax.plot(t,result[:,0],label='R0=0') 90 | ax.plot(t,result[:,1],label='R0=0.5') 91 | ax.plot(t,result[:,2],label='R0=1') 92 | ax.legend() 93 | ax.set_xlabel('t') 94 | ax.set_ylabel('Rp') 95 | ax.axhline(y=0.5,xmin=0,xmax=1,linestyle='--',c='k') 96 | plt.savefig('/Users/ligk2e/Desktop/ODE/figure1B_solution_curve.pdf',bbox_inches='tight') 97 | plt.close() 98 | 99 | # rate curve 100 | k1 = 1 101 | k2 = 1 102 | Rt = 1 103 | fig,ax = plt.subplots() 104 | S_options = [2,4,8] 105 | for S in S_options: 106 | Rp = np.linspace(0,1,10) 107 | removal_rate = k2 * Rp 108 | production_rate = k1 * S * (Rt - Rp) 109 | ax.plot(R,removal_rate,linestyle='-',c='k') 110 | ax.plot(R,production_rate,linestyle='--',c='k') 111 | ax.set_xlim(0,1) 112 | ax.set_ylim(0,6) 113 | ax.set_xlabel('R') 114 | ax.set_ylabel('Rate') 115 | plt.savefig('/Users/ligk2e/Desktop/ODE/figure1B_rate_curve.pdf',bbox_inches='tight') 116 | plt.close() 117 | 118 | # signal-response curve 119 | k1 = 1 120 | k2 = 1 121 | Rt = 1 122 | S = np.linspace(0,10,100) 123 | Rp_ss = (S * Rt) / (k2/k1 + S) 124 | fig,ax = plt.subplots() 125 | ax.plot(S,Rp_ss,linestyle='-',c='k') 126 | ax.set_xlim(0,10) 127 | ax.set_ylim(0,1.1) 128 | ax.set_xlabel('Signal(S)') 129 | ax.set_ylabel('Response(R)') 130 | plt.savefig('/Users/ligk2e/Desktop/ODE/figure1B_SR_curve.pdf',bbox_inches='tight') 131 | plt.close() 132 | 133 | ''' 134 | sigmoidal curve 135 | dRp/dt = (k1*S*(Rt-Rp)/(km1+Rt-Rp)) - k2*Rp/(km2+Rp) 136 | ''' 137 | 138 | def model(Rp,t,S): 139 | k1 = 1 140 | k2 = 1 141 | Rt = 1 142 | km1 = 0.05 143 | km2 = 0.05 144 | dRpdt = (k1*S*(Rt-Rp)/(km1+Rt-Rp)) - k2*Rp/(km2+Rp) 145 | return dRpdt 146 | 147 | S = 1 148 | Rp0 = [0,0.3,1] 149 | t = np.linspace(0,20,200) 150 | result = odeint(model,Rp0,t,args=(S,)) 151 | 152 | # solution curve 153 | fig,ax = plt.subplots() 154 | ax.plot(t,result[:,0],label='R0=0') 155 | ax.plot(t,result[:,1],label='R0=0.3') 156 | ax.plot(t,result[:,2],label='R0=1') 157 | ax.legend() 158 | ax.set_xlabel('t') 159 | ax.set_ylabel('Rp') 160 | plt.savefig('/Users/ligk2e/Desktop/ODE/figure1C_solution_curve.pdf',bbox_inches='tight') 161 | plt.close() 162 | 163 | 164 | # rate curve 165 | k1 = 1 166 | k2 = 1 167 | Rt = 1 168 | km1 = 0.05 169 | km2 = 0.05 170 | Rp = np.linspace(0,1,100) 171 | fig,ax = plt.subplots() 172 | for S in [0.25,0.5,1,1.5,2]: 173 | removal_rate = k2*Rp/(km2+Rp) 174 | production_rate = k1*S*(Rt-Rp)/(km1+Rt-Rp) 175 | ax.plot(Rp,removal_rate,linestyle='-',c='k') 176 | ax.plot(Rp,production_rate,linestyle='--',c='k') 177 | ax.set_xlim(0,1) 178 | ax.set_xlabel('Rp') 179 | ax.set_ylim(0,2) 180 | ax.set_ylabel('Rate') 181 | plt.savefig('/Users/ligk2e/Desktop/ODE/figure1C_rate_curve.pdf',bbox_inches='tight') 182 | plt.close() 183 | 184 | # signal-response curve 185 | S_all = np.linspace(0,3,100) 186 | def equation(Rp,S): 187 | k1 = 1 188 | k2 = 1 189 | Rt = 1 190 | km1 = 0.05 191 | km2 = 0.05 192 | return k1*S*(Rt-Rp)/(km1+Rt-Rp) - k2*Rp/(km2+Rp) 193 | 194 | from scipy.optimize import fsolve 195 | store = [] 196 | for S in S_all: 197 | Rp_ss = fsolve(equation,[1],args=(S,))[0] 198 | store.append(Rp_ss) 199 | 200 | fig,ax = plt.subplots() 201 | ax.plot(S_all,store,c='k') 202 | ax.set_xlim(0,3) 203 | ax.set_xlabel('Signal(S)') 204 | ax.set_ylim(0,1.1) 205 | ax.set_ylabel('Response(R_ss)') 206 | plt.savefig('/Users/ligk2e/Desktop/ODE/figure1C_SR_curve.pdf',bbox_inches='tight') 207 | 208 | 209 | ''' 210 | Figure1d perfectly adaptation to signal 211 | dR/dt = k1*S - k2*X*R 212 | dX/dt = k3*S - k4*X 213 | ''' 214 | 215 | # solution curve 216 | from gekko import GEKKO 217 | k1,k2,k3,k4 = 2,2,1,1 218 | m = GEKKO() 219 | m.time = np.linspace(0,20,201) 220 | S = np.array([0] * 40 + [1] * 40 + [2] * 40 + [3] * 40 + [4] * 41) 221 | S_param = m.Param(value=S) 222 | X_var = m.Var(value=0) 223 | R_var = m.Var(value=0) 224 | m.Equation(X_var.dt()==k3*S_param-k4*X_var) 225 | m.Equation(R_var.dt()==k1*S_param-k2*X_var*R_var) 226 | m.options.IMODE = 4 227 | m.solve() 228 | 229 | fig,ax1 = plt.subplots() 230 | ax1.plot(m.time,R_var,c='k') 231 | ax1.plot(m.time,X_var,c='g') 232 | ax1.set_xlim(0,20) 233 | ax1.set_xlabel('Time') 234 | ax2 = ax1.twinx() 235 | ax2.plot(m.time,S_param,c='r') 236 | import matplotlib.lines as mlines 237 | ax1.legend(handles=[mlines.Line2D([],[],c=i,linestyle='-') for i in 'kgr'],labels=['R','X','S']) 238 | plt.savefig('/Users/ligk2e/Desktop/ODE/figure1d_solution_curve.pdf',bbox_inches='tight') 239 | plt.close() 240 | 241 | # rate curve 242 | S_options = [1,2,3] 243 | fig,ax = plt.subplots() 244 | for S in S_options: 245 | R = np.linspace(0,2,100) 246 | X_ss = k3*S/k4 247 | removal_rate = k2 * X_ss * R 248 | production_rate = [k1*S] * len(R) 249 | tmp_dict = {1:'r',2:'purple',3:'g'} 250 | ax.plot(R,removal_rate,c=tmp_dict[S],linestyle='-') 251 | ax.plot(R,production_rate,c=tmp_dict[S],linestyle='--') 252 | ax.set_xlim(0,2) 253 | ax.set_ylim(0,8) 254 | ax.set_xlabel('R') 255 | ax.set_ylabel('Rate') 256 | plt.savefig('/Users/ligk2e/Desktop/ODE/figure1d_rate_curve.pdf',bbox_inches='tight') 257 | plt.close() 258 | 259 | ''' 260 | Figure1e mutual activation, one-way switch 261 | ''' 262 | k0,k1,k2,k3,k4,J3,J4 = 0.4,0.01,1,1,0.2,0.05,0.05 263 | def Goldbeter_Koshland(v1,v2,J1,J2): 264 | ''' 265 | will return the equalibrium concertration of either phosphorated or unphosphorated form of E, 266 | v1,J1: incoming 267 | v2,J2: outcoming 268 | ''' 269 | B = v2-v1+J1*v2+J2*v1 270 | equilibrium = 2*v1*J2/(B+np.sqrt(B**2-4*(v2-v1)*v1*J2)) 271 | return equilibrium 272 | 273 | def model(R,t,S): 274 | EP = Goldbeter_Koshland(k3*R,k4,J3,J4) 275 | dRdt = k0*EP + k1*S - k2*R 276 | return dRdt 277 | 278 | t = np.linspace(0,10,100) 279 | result = odeint(model,y0=1,t=t,args=(0,)) 280 | 281 | # solution curve, they don't ask me to do that, skip for now 282 | 283 | # rate curve 284 | R = np.linspace(0,0.7,100) 285 | S_options = [0,8,16] 286 | fig,ax = plt.subplots() 287 | for S in S_options: 288 | removal_rate = k2*R 289 | EP = Goldbeter_Koshland(k3*R,k4,J3,J4) 290 | production_rate = k0*EP+k1*S 291 | ax.plot(R,removal_rate,c='k',linestyle='-') 292 | ax.plot(R,production_rate,c='k',linestyle='--') 293 | ax.set_xlim(0,0.7) 294 | ax.set_xlabel('R') 295 | ax.set_ylim(0,0.6) 296 | ax.set_ylabel('Rate') 297 | plt.savefig('/Users/ligk2e/Desktop/ODE/figure1e_rate_curve.pdf',bbox_inches='tight') 298 | plt.close() 299 | 300 | # signal-response curve 301 | S_options = np.linspace(0,15,100) 302 | def equation(R,S): 303 | EP = Goldbeter_Koshland(k3*R,k4,J3,J4) 304 | return k0*EP + k1*S - k2*R 305 | fig,ax = plt.subplots() 306 | store_uplimb,store_downlimb = [],[] 307 | for S in S_options: 308 | R_ss_uplimb = fsolve(func=equation,x0=[1],args=(S,))[0] 309 | R_ss_downlimb = fsolve(func=equation,x0=[0],args=(S,))[0] 310 | store_uplimb.append(R_ss_uplimb) 311 | store_downlimb.append(R_ss_downlimb) 312 | ax.plot(S_options,store_uplimb) 313 | ax.plot(S_options,store_downlimb) 314 | 315 | 316 | 317 | R = np.linspace(-1,2,10000) 318 | result = equation(R,S=5) 319 | fig,ax = plt.subplots() 320 | ax.plot(R,result) 321 | ax.axhline(0,linestyle='--',c='k') 322 | 323 | fsolve(func=equation,x0=[0.0],args=(5,)) 324 | 325 | 326 | 327 | 328 | 329 | 330 | 331 | 332 | 333 | -------------------------------------------------------------------------------- /tricks/3_compute_dist_NA.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# How to compute pairwise distance when having missing value?" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 2, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import pandas as pd\n", 17 | "import numpy as np\n", 18 | "from sklearn.metrics import pairwise_distances\n", 19 | "from sklearn.metrics.pairwise import nan_euclidean_distances\n", 20 | "from scipy.spatial.distance import squareform, pdist" 21 | ] 22 | }, 23 | { 24 | "cell_type": "markdown", 25 | "metadata": {}, 26 | "source": [ 27 | "## The easist way, when we are free of NA, I'd like to use pdist function" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 3, 33 | "metadata": {}, 34 | "outputs": [], 35 | "source": [ 36 | "a = np.random.randn(3,5)" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": 4, 42 | "metadata": {}, 43 | "outputs": [ 44 | { 45 | "data": { 46 | "text/plain": [ 47 | "array([[ 2.2834488 , -0.74877306, 2.3029904 , -0.33272168, 0.62174965],\n", 48 | " [-0.89000248, 0.4347403 , 1.97113721, -1.15321046, -1.84173417],\n", 49 | " [ 0.32945355, -0.20609533, 0.83525747, -1.11246698, -0.5230511 ]])" 50 | ] 51 | }, 52 | "execution_count": 4, 53 | "metadata": {}, 54 | "output_type": "execute_result" 55 | } 56 | ], 57 | "source": [ 58 | "a" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": 5, 64 | "metadata": {}, 65 | "outputs": [ 66 | { 67 | "data": { 68 | "text/plain": [ 69 | "array([4.28060484, 2.86101521, 2.22003405])" 70 | ] 71 | }, 72 | "execution_count": 5, 73 | "metadata": {}, 74 | "output_type": "execute_result" 75 | } 76 | ], 77 | "source": [ 78 | "# pdist will return a dense distance matrix\n", 79 | "pdist(a)" 80 | ] 81 | }, 82 | { 83 | "cell_type": "markdown", 84 | "metadata": {}, 85 | "source": [ 86 | "# you can convert to a square distance matrix\n", 87 | "squareform(pdist(a))" 88 | ] 89 | }, 90 | { 91 | "cell_type": "markdown", 92 | "metadata": {}, 93 | "source": [ 94 | "# What if we have NA value?" 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": 12, 100 | "metadata": {}, 101 | "outputs": [], 102 | "source": [ 103 | "# if you want to know more about NA value, refer to trick 2 jupyter notebook in the same folder\n", 104 | "a[1,3] = np.nan" 105 | ] 106 | }, 107 | { 108 | "cell_type": "code", 109 | "execution_count": 13, 110 | "metadata": {}, 111 | "outputs": [ 112 | { 113 | "data": { 114 | "text/plain": [ 115 | "array([[ 2.2834488 , -0.74877306, 2.3029904 , -0.33272168, 0.62174965],\n", 116 | " [-0.89000248, 0.4347403 , 1.97113721, nan, -1.84173417],\n", 117 | " [ 0.32945355, -0.20609533, 0.83525747, -1.11246698, -0.5230511 ]])" 118 | ] 119 | }, 120 | "execution_count": 13, 121 | "metadata": {}, 122 | "output_type": "execute_result" 123 | } 124 | ], 125 | "source": [ 126 | "a" 127 | ] 128 | }, 129 | { 130 | "cell_type": "code", 131 | "execution_count": 14, 132 | "metadata": {}, 133 | "outputs": [], 134 | "source": [ 135 | "# np.nan (a float object) will be converted to np.float64" 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": 19, 141 | "metadata": {}, 142 | "outputs": [ 143 | { 144 | "data": { 145 | "text/plain": [ 146 | "numpy.float64" 147 | ] 148 | }, 149 | "execution_count": 19, 150 | "metadata": {}, 151 | "output_type": "execute_result" 152 | } 153 | ], 154 | "source": [ 155 | "type(a[1,3])" 156 | ] 157 | }, 158 | { 159 | "cell_type": "markdown", 160 | "metadata": {}, 161 | "source": [ 162 | "## Theoretically, sklearn pairwise distance should be able to do that, there is a force_all_finite argument." 163 | ] 164 | }, 165 | { 166 | "cell_type": "code", 167 | "execution_count": 20, 168 | "metadata": {}, 169 | "outputs": [ 170 | { 171 | "ename": "ValueError", 172 | "evalue": "Input contains NaN, infinity or a value too large for dtype('float64').", 173 | "output_type": "error", 174 | "traceback": [ 175 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 176 | "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", 177 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mpairwise_distances\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", 178 | "\u001b[0;32m~/opt/anaconda3/envs/scanpy/lib/python3.6/site-packages/sklearn/utils/validation.py\u001b[0m in \u001b[0;36minner_f\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 70\u001b[0m FutureWarning)\n\u001b[1;32m 71\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mupdate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m{\u001b[0m\u001b[0mk\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0marg\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mk\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0marg\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mzip\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msig\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mparameters\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 72\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 73\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0minner_f\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 74\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", 179 | "\u001b[0;32m~/opt/anaconda3/envs/scanpy/lib/python3.6/site-packages/sklearn/metrics/pairwise.py\u001b[0m in \u001b[0;36mpairwise_distances\u001b[0;34m(X, Y, metric, n_jobs, force_all_finite, **kwds)\u001b[0m\n\u001b[1;32m 1777\u001b[0m \u001b[0mfunc\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpartial\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdistance\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcdist\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmetric\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmetric\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1778\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1779\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0m_parallel_pairwise\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mY\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mn_jobs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1780\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1781\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", 180 | "\u001b[0;32m~/opt/anaconda3/envs/scanpy/lib/python3.6/site-packages/sklearn/metrics/pairwise.py\u001b[0m in \u001b[0;36m_parallel_pairwise\u001b[0;34m(X, Y, func, n_jobs, **kwds)\u001b[0m\n\u001b[1;32m 1358\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1359\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0meffective_n_jobs\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mn_jobs\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1360\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mY\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1361\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1362\u001b[0m \u001b[0;31m# enforce a threading backend to prevent data communication overhead\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 181 | "\u001b[0;32m~/opt/anaconda3/envs/scanpy/lib/python3.6/site-packages/sklearn/utils/validation.py\u001b[0m in \u001b[0;36minner_f\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 70\u001b[0m FutureWarning)\n\u001b[1;32m 71\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mupdate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m{\u001b[0m\u001b[0mk\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0marg\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mk\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0marg\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mzip\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msig\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mparameters\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 72\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 73\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0minner_f\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 74\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", 182 | "\u001b[0;32m~/opt/anaconda3/envs/scanpy/lib/python3.6/site-packages/sklearn/metrics/pairwise.py\u001b[0m in \u001b[0;36meuclidean_distances\u001b[0;34m(X, Y, Y_norm_squared, squared, X_norm_squared)\u001b[0m\n\u001b[1;32m 267\u001b[0m \u001b[0mpaired_distances\u001b[0m \u001b[0;34m:\u001b[0m \u001b[0mdistances\u001b[0m \u001b[0mbetweens\u001b[0m \u001b[0mpairs\u001b[0m \u001b[0mof\u001b[0m \u001b[0melements\u001b[0m \u001b[0mof\u001b[0m \u001b[0mX\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mY\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 268\u001b[0m \"\"\"\n\u001b[0;32m--> 269\u001b[0;31m \u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mY\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcheck_pairwise_arrays\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mY\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 270\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 271\u001b[0m \u001b[0;31m# If norms are passed as float32, they are unused. If arrays are passed as\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 183 | "\u001b[0;32m~/opt/anaconda3/envs/scanpy/lib/python3.6/site-packages/sklearn/utils/validation.py\u001b[0m in \u001b[0;36minner_f\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 70\u001b[0m FutureWarning)\n\u001b[1;32m 71\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mupdate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m{\u001b[0m\u001b[0mk\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0marg\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mk\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0marg\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mzip\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msig\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mparameters\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 72\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 73\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0minner_f\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 74\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", 184 | "\u001b[0;32m~/opt/anaconda3/envs/scanpy/lib/python3.6/site-packages/sklearn/metrics/pairwise.py\u001b[0m in \u001b[0;36mcheck_pairwise_arrays\u001b[0;34m(X, Y, precomputed, dtype, accept_sparse, force_all_finite, copy)\u001b[0m\n\u001b[1;32m 141\u001b[0m X = Y = check_array(X, accept_sparse=accept_sparse, dtype=dtype,\n\u001b[1;32m 142\u001b[0m \u001b[0mcopy\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcopy\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mforce_all_finite\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mforce_all_finite\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 143\u001b[0;31m estimator=estimator)\n\u001b[0m\u001b[1;32m 144\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 145\u001b[0m X = check_array(X, accept_sparse=accept_sparse, dtype=dtype,\n", 185 | "\u001b[0;32m~/opt/anaconda3/envs/scanpy/lib/python3.6/site-packages/sklearn/utils/validation.py\u001b[0m in \u001b[0;36minner_f\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 70\u001b[0m FutureWarning)\n\u001b[1;32m 71\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mupdate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m{\u001b[0m\u001b[0mk\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0marg\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mk\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0marg\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mzip\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msig\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mparameters\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 72\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 73\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0minner_f\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 74\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", 186 | "\u001b[0;32m~/opt/anaconda3/envs/scanpy/lib/python3.6/site-packages/sklearn/utils/validation.py\u001b[0m in \u001b[0;36mcheck_array\u001b[0;34m(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, estimator)\u001b[0m\n\u001b[1;32m 643\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mforce_all_finite\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 644\u001b[0m _assert_all_finite(array,\n\u001b[0;32m--> 645\u001b[0;31m allow_nan=force_all_finite == 'allow-nan')\n\u001b[0m\u001b[1;32m 646\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 647\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mensure_min_samples\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 187 | "\u001b[0;32m~/opt/anaconda3/envs/scanpy/lib/python3.6/site-packages/sklearn/utils/validation.py\u001b[0m in \u001b[0;36m_assert_all_finite\u001b[0;34m(X, allow_nan, msg_dtype)\u001b[0m\n\u001b[1;32m 97\u001b[0m \u001b[0mmsg_err\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 98\u001b[0m (type_err,\n\u001b[0;32m---> 99\u001b[0;31m msg_dtype if msg_dtype is not None else X.dtype)\n\u001b[0m\u001b[1;32m 100\u001b[0m )\n\u001b[1;32m 101\u001b[0m \u001b[0;31m# for object dtype data, we only check for NaNs (GH-13254)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 188 | "\u001b[0;31mValueError\u001b[0m: Input contains NaN, infinity or a value too large for dtype('float64')." 189 | ] 190 | } 191 | ], 192 | "source": [ 193 | "pairwise_distances(X=a)" 194 | ] 195 | }, 196 | { 197 | "cell_type": "markdown", 198 | "metadata": {}, 199 | "source": [ 200 | "You see, It doesn't work, because the missing value has to be in the form of np.inf, np.nan and pd.NA" 201 | ] 202 | }, 203 | { 204 | "cell_type": "markdown", 205 | "metadata": {}, 206 | "source": [ 207 | "# What is the workaround?\n" 208 | ] 209 | }, 210 | { 211 | "cell_type": "code", 212 | "execution_count": 21, 213 | "metadata": {}, 214 | "outputs": [], 215 | "source": [ 216 | "# first using nan_euclidean_distances to compute \n", 217 | "test = nan_euclidean_distances(X=a,Y=a)" 218 | ] 219 | }, 220 | { 221 | "cell_type": "code", 222 | "execution_count": 22, 223 | "metadata": {}, 224 | "outputs": [ 225 | { 226 | "data": { 227 | "text/plain": [ 228 | "array([[0. , 4.69712359, 2.86101521],\n", 229 | " [4.69712359, 0. , 2.48165548],\n", 230 | " [2.86101521, 2.48165548, 0. ]])" 231 | ] 232 | }, 233 | "execution_count": 22, 234 | "metadata": {}, 235 | "output_type": "execute_result" 236 | } 237 | ], 238 | "source": [ 239 | "test" 240 | ] 241 | }, 242 | { 243 | "cell_type": "code", 244 | "execution_count": 23, 245 | "metadata": {}, 246 | "outputs": [], 247 | "source": [ 248 | "# make sure it is sysmetric\n", 249 | "test_sym = np.tril(test) + np.tril(test,k=-1).T" 250 | ] 251 | }, 252 | { 253 | "cell_type": "code", 254 | "execution_count": 24, 255 | "metadata": {}, 256 | "outputs": [ 257 | { 258 | "data": { 259 | "text/plain": [ 260 | "array([[0. , 4.69712359, 2.86101521],\n", 261 | " [4.69712359, 0. , 2.48165548],\n", 262 | " [2.86101521, 2.48165548, 0. ]])" 263 | ] 264 | }, 265 | "execution_count": 24, 266 | "metadata": {}, 267 | "output_type": "execute_result" 268 | } 269 | ], 270 | "source": [ 271 | "test_sym\n" 272 | ] 273 | }, 274 | { 275 | "cell_type": "code", 276 | "execution_count": 25, 277 | "metadata": {}, 278 | "outputs": [], 279 | "source": [ 280 | "# make sure the main diagonal is 0\n", 281 | "np.fill_diagonal(test_sym,0)" 282 | ] 283 | }, 284 | { 285 | "cell_type": "code", 286 | "execution_count": 26, 287 | "metadata": {}, 288 | "outputs": [ 289 | { 290 | "data": { 291 | "text/plain": [ 292 | "array([[0. , 4.69712359, 2.86101521],\n", 293 | " [4.69712359, 0. , 2.48165548],\n", 294 | " [2.86101521, 2.48165548, 0. ]])" 295 | ] 296 | }, 297 | "execution_count": 26, 298 | "metadata": {}, 299 | "output_type": "execute_result" 300 | } 301 | ], 302 | "source": [ 303 | "test_sym" 304 | ] 305 | }, 306 | { 307 | "cell_type": "code", 308 | "execution_count": 27, 309 | "metadata": {}, 310 | "outputs": [ 311 | { 312 | "data": { 313 | "text/plain": [ 314 | "array([4.69712359, 2.86101521, 2.48165548])" 315 | ] 316 | }, 317 | "execution_count": 27, 318 | "metadata": {}, 319 | "output_type": "execute_result" 320 | } 321 | ], 322 | "source": [ 323 | "# convert to dense distance matrix using squareform\n", 324 | "squareform(test_sym)" 325 | ] 326 | }, 327 | { 328 | "cell_type": "code", 329 | "execution_count": null, 330 | "metadata": {}, 331 | "outputs": [], 332 | "source": [] 333 | } 334 | ], 335 | "metadata": { 336 | "kernelspec": { 337 | "display_name": "Python 3", 338 | "language": "python", 339 | "name": "python3" 340 | }, 341 | "language_info": { 342 | "codemirror_mode": { 343 | "name": "ipython", 344 | "version": 3 345 | }, 346 | "file_extension": ".py", 347 | "mimetype": "text/x-python", 348 | "name": "python", 349 | "nbconvert_exporter": "python", 350 | "pygments_lexer": "ipython3", 351 | "version": "3.6.12" 352 | } 353 | }, 354 | "nbformat": 4, 355 | "nbformat_minor": 4 356 | } 357 | --------------------------------------------------------------------------------