├── 02_SimpleGraph.py ├── 03_IntegralOfDensity.py ├── 03_RunningProportion.py ├── 04_BayesUpdate.py ├── 05_BernBeta.py ├── 05_BetaPosteriorPredictions.py ├── 06_BernGrid.py ├── 07_BernBetaPyMCFull.py ├── 07_BernMetropolisTemplate.py ├── 08_BernTwoGrid.py ├── 08_BernTwoMetropolis.py ├── 08_BernTwoPyMC.py ├── 09_BernBetaMuKappaPyMC.py ├── 09_BernBetaMuKappaPyMC_TT.py ├── 09_FilconPyMC.py ├── 09_FilconPyMC_ex9.2.A.py ├── 09_FilconPyMC_ex9.2.B.py ├── 10_BernBetaModelCompPyMC.py ├── 10_FilconModelCompPyMC.py ├── 10_ToyModelCompPyMC.py ├── 12_OneOddGroupModelComp.py ├── 13_minNforHDIpower.py ├── 15_SystemsPyMC.py ├── 15_YmetricXsinglePyMC.py ├── 16_SimpleLinearRegressionPyMC.py ├── 16_SimpleRobustLinearRegressionPyMC.py ├── 17_MultiLinRegressHyperPyMC.py ├── 17_MultipleLinearRegressionPyMC.py ├── 18_ANOVAonewayNonhomogvarBrugs.py ├── 18_ANOVAonewayPyMC.py ├── 19_ANOVAtwowayPyMC.py ├── Figures ├── Figure_10.2.png ├── Figure_10.3-4.png ├── Figure_12.5.png ├── Figure_15.9.png ├── Figure_16.2.png ├── Figure_16.4.png ├── Figure_16.5.png ├── Figure_16.6.png ├── Figure_16.8a.png ├── Figure_16.8b.png ├── Figure_16.8c.png ├── Figure_16.8d.png ├── Figure_17.5a.png ├── Figure_17.5b.png ├── Figure_17.Xa.png ├── Figure_17.Xb.png ├── Figure_18.2a.png ├── Figure_18.2b.png ├── Figure_18.3.png ├── Figure_19.4.png ├── Figure_19.5.png ├── Figure_2.2.png ├── Figure_3.1.png ├── Figure_3.3.png ├── Figure_4.1.png ├── Figure_4.2.png ├── Figure_4.3.png ├── Figure_5.2.png ├── Figure_6.1.png ├── Figure_6.2.png ├── Figure_6.3.png ├── Figure_7.3.png ├── Figure_7.4.png ├── Figure_7.5.png ├── Figure_7.6_a.png ├── Figure_7.6_b.png ├── Figure_7.6_c.png ├── Figure_8.1.png ├── Figure_8.2.png ├── Figure_8.3.png ├── Figure_8.3_HDI.png ├── Figure_8.6.png ├── Figure_9.11.png ├── Figure_9.12.png ├── Figure_9.14.png ├── Figure_9.16.png ├── Figure_9.16b.png ├── Figure_9.18_lower.png ├── Figure_9.18_upper.png └── figure_15.3.png ├── Guber1999data.txt ├── HDI_of_grid.py ├── HDIofICDF.py ├── HtWtDataGenerator.py ├── IPython └── Kruschkes_Doing_Bayesian_Data_Analysis_in_PyMC3.ipynb ├── McDonaldSK1991data.txt ├── McIntyre1994data.csv ├── QianS2007SeaweedData.txt ├── README.md ├── Salary.csv ├── SolariLS2008data.txt ├── Systems.txt ├── hpd.py └── plot_post.py /02_SimpleGraph.py: -------------------------------------------------------------------------------- 1 | """ 2 | A simple graph drawn by Python :-) 3 | """ 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | plt.style.use('seaborn-darkgrid') 7 | 8 | x = np.linspace(-2, 2, 40) 9 | y = x**2 10 | 11 | plt.plot(x, y) 12 | plt.savefig('Figure_2.2.png') 13 | plt.show() 14 | -------------------------------------------------------------------------------- /03_IntegralOfDensity.py: -------------------------------------------------------------------------------- 1 | """ 2 | Graph of normal probability density function, with comb of intervals. 3 | """ 4 | import matplotlib.pyplot as plt 5 | plt.style.use('seaborn-darkgrid') 6 | import numpy as np 7 | 8 | meanval = 0.0 # Specify mean of distribution. 9 | sdval = 0.2 # Specify standard deviation of distribution. 10 | xlow = meanval - 3 * sdval # Specify low end of x-axis. 11 | xhigh = meanval + 3 * sdval # Specify high end of x-axis. 12 | dx = 0.02 # Specify interval width on x-axis 13 | # Specify comb points along the x axis: 14 | x = np.arange(xlow, xhigh, dx) 15 | # Compute y values, i.e., probability density at each value of x: 16 | y = (1/(sdval*np.sqrt(2*np.pi))) * np.exp(-.5 * ((x - meanval)/sdval)**2) 17 | # Plot the function. "plot" draws the bell curve. "stem" draws the intervals. 18 | plt.plot(x, y) 19 | plt.stem(x, y, markerfmt=' ') 20 | 21 | plt.xlabel('$x$') 22 | plt.ylabel('$p(x)$') 23 | plt.title('Normal Probability Density') 24 | # Approximate the integral as the sum of width * height for each interval. 25 | area = np.sum(dx*y) 26 | # Display info in the graph. 27 | plt.text(-.6, 1.7, '$\mu$ = %s' % meanval) 28 | plt.text(-.6, 1.5, '$\sigma$ = %s' % sdval) 29 | plt.text(.2, 1.7, '$\Delta x$ = %s' % dx) 30 | plt.text(.2, 1.5, '$\sum_{x}$ $\Delta x$ $p(x)$ = %5.3f' % area) 31 | 32 | plt.savefig('Figure_3.3.png') 33 | -------------------------------------------------------------------------------- /03_RunningProportion.py: -------------------------------------------------------------------------------- 1 | """ 2 | Goal: Toss a coin N times and compute the running proportion of heads. 3 | """ 4 | import matplotlib.pyplot as plt 5 | plt.style.use('seaborn-darkgrid') 6 | import numpy as np 7 | 8 | # Specify the total number of flips, denoted N. 9 | N = 500 10 | # Generate a random sample of N flips for a fair coin (heads=1, tails=0); 11 | np.random.seed(47405) 12 | flip_sequence = np.random.choice(a=(0, 1), p=(.5, .5), size=N, replace=True) 13 | # Compute the running proportion of heads: 14 | r = np.cumsum(flip_sequence) 15 | n = np.linspace(1, N, N) # n is a vector. 16 | run_prop = r/n # component by component division. 17 | 18 | # Graph the running proportion: 19 | plt.plot(n, run_prop, '-o', ) 20 | plt.xscale('log') # an alternative to plot() and xscale() is semilogx() 21 | plt.xlim(1, N) 22 | plt.ylim(0, 1) 23 | plt.xlabel('Flip Number') 24 | plt.ylabel('Proportion Heads') 25 | plt.title('Running Proportion of Heads') 26 | # Plot a dotted horizontal line at y=.5, just as a reference line: 27 | plt.axhline(y=.5, ls='dashed') 28 | 29 | # Display the beginning of the flip sequence. 30 | flipletters = '' 31 | for i in flip_sequence[:10]: 32 | if i == 1: 33 | flipletters += 'H' 34 | else: 35 | flipletters += 'T' 36 | 37 | plt.text(10, 0.8, 'Flip Sequence = %s...' % flipletters) 38 | # Display the relative frequency at the end of the sequence. 39 | plt.text(25, 0.2, 'End Proportion = %s' % run_prop[-1]) 40 | 41 | plt.savefig('Figure_3.1.png') 42 | -------------------------------------------------------------------------------- /04_BayesUpdate.py: -------------------------------------------------------------------------------- 1 | """ 2 | Bayesian updating of beliefs about the bias of a coin. The prior and posterior 3 | distributions indicate probability masses at discrete candidate values of theta. 4 | """ 5 | import numpy as np 6 | import matplotlib.pyplot as plt 7 | plt.style.use('seaborn-darkgrid') 8 | 9 | 10 | 11 | # theta is the vector of candidate values for the parameter theta. 12 | # n_theta_vals is the number of candidate theta values. 13 | # To produce the examples in the book, set n_theta_vals to either 3 or 63. 14 | n_theta_vals = 3. 15 | # Now make the vector of theta values: 16 | theta = np.linspace(1/(n_theta_vals +1), n_theta_vals /(n_theta_vals +1), n_theta_vals ) 17 | 18 | # p_theta is the vector of prior probabilities on the theta values. 19 | p_theta = np.minimum(theta, 1-theta) # Makes a triangular belief distribution. 20 | p_theta = p_theta / np.sum(p_theta) # Makes sure that beliefs sum to 1. 21 | 22 | # Specify the data. To produce the examples in the book, use either 23 | # data = np.repeat([1,0], [3, 9]) or data = np.repeat([1,0], [1, 11]) 24 | data = np.repeat([1, 0], [3, 9]) 25 | n_heads = np.sum(data) 26 | n_tails = len(data) - n_heads 27 | 28 | # Compute the likelihood of the data for each value of theta: 29 | p_data_given_theta = theta**n_heads * (1-theta)**n_tails 30 | 31 | # Compute the posterior: 32 | p_data = np.sum(p_data_given_theta * p_theta) 33 | p_theta_given_data = p_data_given_theta * p_theta / p_data # This is Bayes' rule! 34 | 35 | # Plot the results. 36 | plt.figure(figsize=(12, 11)) 37 | plt.subplots_adjust(hspace=0.7) 38 | 39 | # Plot the prior: 40 | plt.subplot(3, 1, 1) 41 | plt.stem(theta, p_theta, markerfmt=' ') 42 | plt.xlim(0, 1) 43 | plt.xlabel('$\\theta$') 44 | plt.ylabel('$P(\\theta)$') 45 | plt.title('Prior') 46 | # Plot the likelihood: 47 | plt.subplot(3, 1, 2) 48 | plt.stem(theta, p_data_given_theta, markerfmt=' ') 49 | plt.xlim(0, 1) 50 | plt.xlabel('$\\theta$') 51 | plt.ylabel('$P(D|\\theta)$') 52 | plt.title('Likelihood') 53 | plt.text(0.6, np.max(p_data_given_theta)/2, 'D = %sH,%sT' % (n_heads, n_tails)) 54 | # Plot the posterior: 55 | plt.subplot(3, 1, 3) 56 | plt.stem(theta, p_theta_given_data, markerfmt=' ') 57 | plt.xlim(0, 1) 58 | plt.xlabel('$\\theta$') 59 | plt.ylabel('$P(\\theta|D)$') 60 | plt.title('Posterior') 61 | plt.text(0.6, np.max(p_theta_given_data)/2, 'P(D) = %g' % p_data) 62 | 63 | plt.savefig('Figure_4.1.png') 64 | -------------------------------------------------------------------------------- /05_BernBeta.py: -------------------------------------------------------------------------------- 1 | """ 2 | Inferring a binomial proportion via exact mathematical analysis. 3 | """ 4 | import sys 5 | import numpy as np 6 | from scipy.stats import beta 7 | from scipy.special import beta as beta_func 8 | import matplotlib.pyplot as plt 9 | plt.style.use('seaborn-darkgrid') 10 | from HDIofICDF import * 11 | 12 | 13 | def bern_beta(prior_shape, data_vec, cred_mass=0.95): 14 | """Bayesian updating for Bernoulli likelihood and beta prior. 15 | Input arguments: 16 | prior_shape 17 | vector of parameter values for the prior beta distribution. 18 | data_vec 19 | vector of 1's and 0's. 20 | cred_mass 21 | the probability mass of the HDI. 22 | Output: 23 | post_shape 24 | vector of parameter values for the posterior beta distribution. 25 | Graphics: 26 | Creates a three-panel graph of prior, likelihood, and posterior 27 | with highest posterior density interval. 28 | Example of use: 29 | post_shape = bern_beta(prior_shape=[1,1] , data_vec=[1,0,0,1,1])""" 30 | 31 | # Check for errors in input arguments: 32 | if len(prior_shape) != 2: 33 | sys.exit('prior_shape must have two components.') 34 | if any([i < 0 for i in prior_shape]): 35 | sys.exit('prior_shape components must be positive.') 36 | if any([i != 0 and i != 1 for i in data_vec]): 37 | sys.exit('data_vec must be a vector of 1s and 0s.') 38 | if cred_mass <= 0 or cred_mass >= 1.0: 39 | sys.exit('cred_mass must be between 0 and 1.') 40 | 41 | # Rename the prior shape parameters, for convenience: 42 | a = prior_shape[0] 43 | b = prior_shape[1] 44 | # Create summary values of the data: 45 | z = sum(data_vec[data_vec == 1]) # number of 1's in data_vec 46 | N = len(data_vec) # number of flips in data_vec 47 | # Compute the posterior shape parameters: 48 | post_shape = [a+z, b+N-z] 49 | # Compute the evidence, p(D): 50 | p_data = beta_func(z+a, N-z+b)/beta_func(a, b) 51 | # Construct grid of theta values, used for graphing. 52 | bin_width = 0.005 # Arbitrary small value for comb on theta. 53 | theta = np.arange(bin_width/2, 1-(bin_width/2)+bin_width, bin_width) 54 | # Compute the prior at each value of theta. 55 | p_theta = beta.pdf(theta, a, b) 56 | # Compute the likelihood of the data at each value of theta. 57 | p_data_given_theta = theta**z * (1-theta)**(N-z) 58 | # Compute the posterior at each value of theta. 59 | post_a = a + z 60 | post_b = b+N-z 61 | p_theta_given_data = beta.pdf(theta, a+z, b+N-z) 62 | # Determine the limits of the highest density interval 63 | intervals = HDIofICDF(beta, cred_mass, a=post_shape[0], b=post_shape[1]) 64 | 65 | # Plot the results. 66 | plt.figure(figsize=(12, 12)) 67 | plt.subplots_adjust(hspace=0.7) 68 | 69 | # Plot the prior. 70 | locx = 0.05 71 | plt.subplot(3, 1, 1) 72 | plt.plot(theta, p_theta) 73 | plt.xlim(0, 1) 74 | plt.ylim(0, np.max(p_theta)*1.2) 75 | plt.xlabel(r'$\theta$') 76 | plt.ylabel(r'$P(\theta)$') 77 | plt.title('Prior') 78 | plt.text(locx, np.max(p_theta)/2, r'beta($\theta$;%s,%s)' % (a, b)) 79 | # Plot the likelihood: 80 | plt.subplot(3, 1, 2) 81 | plt.plot(theta, p_data_given_theta) 82 | plt.xlim(0, 1) 83 | plt.ylim(0, np.max(p_data_given_theta)*1.2) 84 | plt.xlabel(r'$\theta$') 85 | plt.ylabel(r'$P(D|\theta)$') 86 | plt.title('Likelihood') 87 | plt.text(locx, np.max(p_data_given_theta)/2, 'Data: z=%s, N=%s' % (z, N)) 88 | # Plot the posterior: 89 | plt.subplot(3, 1, 3) 90 | plt.plot(theta, p_theta_given_data) 91 | plt.xlim(0, 1) 92 | plt.ylim(0, np.max(p_theta_given_data)*1.2) 93 | plt.xlabel(r'$\theta$') 94 | plt.ylabel(r'$P(\theta|D)$') 95 | plt.title('Posterior') 96 | locy = np.linspace(0, np.max(p_theta_given_data), 5) 97 | plt.text(locx, locy[1], r'beta($\theta$;%s,%s)' % (post_a, post_b)) 98 | plt.text(locx, locy[2], 'P(D) = %g' % p_data) 99 | # Plot the HDI 100 | plt.text(locx, locy[3], 101 | 'Intervals = %.3f - %.3f' % (intervals[0], intervals[1])) 102 | plt.fill_between(theta, 0, p_theta_given_data, 103 | where=np.logical_and(theta > intervals[0], 104 | theta < intervals[1]), 105 | color='blue', alpha=0.3) 106 | return intervals 107 | 108 | data_vec = np.repeat([1, 0], [11, 3]) # 11 heads, 3 tail 109 | intervals = bern_beta(prior_shape=[100, 100], data_vec=data_vec) 110 | plt.savefig('Figure_5.2.png') 111 | plt.show() 112 | 113 | -------------------------------------------------------------------------------- /05_BetaPosteriorPredictions.py: -------------------------------------------------------------------------------- 1 | """ 2 | Posterior predictive check. Examine the veracity of the winning model by 3 | simulating data sampled from the winning model and see if the simulated data 4 | 'look like' the actual data. 5 | """ 6 | import numpy as np 7 | from scipy.stats import beta 8 | import matplotlib.pyplot as plt 9 | plt.style.use('seaborn-darkgrid') 10 | 11 | # Specify known values of prior and actual data. 12 | prior_a = 100 13 | prior_b = 1 14 | actual_data_Z = 8 15 | actual_data_N = 12 16 | # Compute posterior parameter values. 17 | post_a = prior_a + actual_data_Z 18 | post_b = prior_b + actual_data_N - actual_data_Z 19 | # Number of flips in a simulated sample should match the actual sample size: 20 | sim_sample_size = actual_data_N 21 | # Designate an arbitrarily large number of simulated samples. 22 | n_sim_samples = 1000 23 | # Set aside a vector in which to store the simulation results. 24 | sim_sample_Z_record = np.zeros(n_sim_samples) 25 | # Now generate samples from the posterior. 26 | for sample_idx in range(0, n_sim_samples): 27 | # Generate a theta value for the new sample from the posterior. 28 | sample_theta = beta.rvs(post_a, post_b) 29 | # Generate a sample, using sample_theta. 30 | sample_data = np.random.choice([0, 1], p=[1-sample_theta, sample_theta], 31 | size=sim_sample_size, replace=True) 32 | sim_sample_Z_record[sample_idx] = sum(sample_data) 33 | 34 | 35 | ## Make a histogram of the number of heads in the samples. 36 | plt.hist(sim_sample_Z_record) 37 | plt.show() 38 | -------------------------------------------------------------------------------- /06_BernGrid.py: -------------------------------------------------------------------------------- 1 | """ 2 | Inferring a binomial proportion via grid approximation. 3 | """ 4 | import matplotlib.pyplot as plt 5 | plt.style.use('seaborn-darkgrid') 6 | import numpy as np 7 | from hpd import hpd 8 | 9 | 10 | def bern_grid(theta, p_theta, data, credib=.95): 11 | """ 12 | Bayesian updating for Bernoulli likelihood and prior specified on a grid. 13 | Input arguments: 14 | theta is a vector of theta values, all between 0 and 1. 15 | p_theta is a vector of corresponding probability _masses_. 16 | data is a vector of 1's and 0's, where 1 corresponds to a and 0 to b. 17 | credib is the probability mass of the credible interval, default is 0.95. 18 | Output: 19 | p_theta_given_data is a vector of posterior probability masses over theta. 20 | Also creates a three-panel graph of prior, likelihood, and posterior 21 | probability masses with credible interval. 22 | Example of use: 23 | Create vector of theta values. 24 | bin_width = 1/1000 25 | theta_grid = np.arange(0, 1+bin_width, bin_width) 26 | Specify probability mass at each theta value. 27 | > rel_prob = np.minimum(theta_grid, 1-theta_grid) relative prob at each theta 28 | > prior = rel_prob / sum(rel_prob) probability mass at each theta 29 | Specify the data vector. 30 | data_vec = np.repeat([1, 0], [11, 3]) # 3 heads, 1 tail 31 | Call the function. 32 | > posterior = bern_grid( theta=theta_grid , p_theta=prior , data=data_vec ) 33 | """ 34 | 35 | # Create summary values of data 36 | z = sum(data[data == 1]) # number of 1's in data 37 | N = len(data) # number of flips in data 38 | # Compute the likelihood of the data for each value of theta. 39 | p_data_given_theta = theta**z * (1 - theta)**(N - z) 40 | # Compute the evidence and the posterior. 41 | p_data = sum(p_data_given_theta * p_theta) 42 | p_theta_given_data = p_data_given_theta * p_theta / p_data 43 | # Determine the limits of the highest density interval 44 | x = np.random.choice(theta, size=5000, replace=True, p=p_theta_given_data) 45 | intervals = hpd(x, alpha=1-credib) 46 | 47 | # Plot the results. 48 | plt.figure(figsize=(12, 12)) 49 | plt.subplots_adjust(hspace=0.7) 50 | 51 | # # Plot the prior. 52 | locx = 0.05 53 | mean_theta = sum(theta * p_theta) # mean of prior, for plotting 54 | plt.subplot(3, 1, 1) 55 | plt.plot(theta, p_theta) 56 | plt.xlim(0, 1) 57 | plt.ylim(0, np.max(p_theta)*1.2) 58 | plt.xlabel(r'$\theta$') 59 | plt.ylabel(r'$P(\theta)$') 60 | plt.title('Prior') 61 | plt.text(locx, np.max(p_theta)/2, r'mean($\theta$;%5.2f)' % mean_theta) 62 | # Plot the likelihood: 63 | plt.subplot(3, 1, 2) 64 | plt.plot(theta, p_data_given_theta) 65 | plt.xlim(0, 1) 66 | plt.ylim(0, np.max(p_data_given_theta)*1.2) 67 | plt.xlabel(r'$\theta$') 68 | plt.ylabel(r'$P(D|\theta)$') 69 | plt.title('Likelihood') 70 | plt.text(locx, np.max(p_data_given_theta)/2, 'data: z=%s, N=%s' % (z, N)) 71 | # Plot the posterior: 72 | mean_theta_given_data = sum(theta * p_theta_given_data) 73 | plt.subplot(3, 1, 3) 74 | plt.plot(theta, p_theta_given_data) 75 | plt.xlim(0, 1) 76 | plt.ylim(0, np.max(p_theta_given_data)*1.2) 77 | plt.xlabel(r'$\theta$') 78 | plt.ylabel(r'$P(\theta|D)$') 79 | plt.title('Posterior') 80 | loc = np.linspace(0, np.max(p_theta_given_data), 5) 81 | plt.text(locx, loc[1], r'mean($\theta$;%5.2f)' % mean_theta_given_data) 82 | plt.text(locx, loc[2], 'P(D) = %g' % p_data) 83 | # Plot the HDI 84 | plt.text(locx, loc[3], 85 | 'Intervals =%s' % ', '.join('%.3f' % x for x in intervals)) 86 | for i in range(0, len(intervals), 2): 87 | plt.fill_between(theta, 0, p_theta_given_data, 88 | where=np.logical_and(theta > intervals[i], 89 | theta < intervals[i+1]), 90 | color='blue', alpha=0.3) 91 | plt.savefig('Figure_6.1.png') 92 | plt.show() 93 | return p_theta_given_data 94 | 95 | 96 | ###Create vector of theta values. 97 | bin_width = 1/1000. 98 | theta_grid = np.arange(0, 1+bin_width, bin_width) 99 | ##Specify probability mass at each theta value. 100 | rel_prob = np.array([0.1] * len(theta_grid)) # uniform prior 101 | rel_prob = np.array([0.1] * len(theta_grid)) # uniform prior 102 | prior = rel_prob / sum(rel_prob) # probability mass at each theta 103 | 104 | 105 | #### figure 6.2 ### 106 | #np.random.seed(123) 107 | #a = [0.1] * 50 108 | #b = np.linspace(0.1, 1, 50) 109 | #c = np.linspace(1, 0.1, 50) 110 | #d = [0.1] * 50 111 | #p_theta = np.concatenate((a, b, c, d)) 112 | #prior = np.where(p_theta != 0 , p_theta / sum(p_theta), 0.) 113 | #width = 1. / len(p_theta) 114 | #theta_grid = np.arange(width/2 , (1-width/2)+width, width) 115 | 116 | ### figure 6.3 ### 117 | #np.random.seed(123) 118 | #a = np.repeat([0], [50]) 119 | #b = np.linspace(0, 1, 50) 120 | #c = (np.linspace(1, 0, 20))**2 121 | #d = np.random.uniform(size=3) 122 | #e = np.repeat([1], [20]) 123 | #p_theta = np.concatenate((a, b, c, d, e)) 124 | #prior = np.where(p_theta != 0 , p_theta / sum(p_theta), 0.) 125 | #width = 1. / len(p_theta) 126 | #theta_grid = np.arange(width/2 , (1-width/2)+width, width) 127 | 128 | ###Specify the data vector. 129 | data_vec = np.repeat([1, 0], [11, 3]) # 3 heads, 1 tail 130 | ###Call the function. 131 | posterior = bern_grid(theta=theta_grid, p_theta=prior, data=data_vec) 132 | -------------------------------------------------------------------------------- /07_BernBetaPyMCFull.py: -------------------------------------------------------------------------------- 1 | """ 2 | Inferring a binomial proportion using PyMC. 3 | """ 4 | import matplotlib.pyplot as plt 5 | plt.style.use('seaborn-darkgrid') 6 | import numpy as np 7 | import pymc3 as pm 8 | 9 | # Generate the data 10 | y = np.array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0]) # 11 heads and 3 tails 11 | 12 | 13 | with pm.Model() as model: 14 | # define the prior 15 | theta = pm.Beta('theta', 1., 1.) # prior 16 | # define the likelihood 17 | y = pm.Bernoulli('y', p=theta, observed=y) 18 | 19 | # Generate a MCMC chain 20 | trace = pm.sample(1000) 21 | 22 | 23 | # create an array with the posterior sample 24 | theta_sample = trace['theta'] 25 | 26 | fig, ax = plt.subplots(1, 2) 27 | ax[0].plot(theta_sample[:500], np.arange(500), marker='o', color='skyblue') 28 | ax[0].set_xlim(0, 1) 29 | ax[0].set_xlabel(r'$\theta$') 30 | ax[0].set_ylabel('Position in Chain') 31 | 32 | pm.plot_posterior(theta_sample, ax=ax[1], color='skyblue'); 33 | ax[1].set_xlabel(r'$\theta$'); 34 | 35 | # Posterior prediction: 36 | # For each step in the chain, use posterior theta to flip a coin: 37 | y_pred = np.zeros(len(theta_sample)) 38 | for i, p_head in enumerate(theta_sample): 39 | y_pred[i] = np.random.choice([0, 1], p=[1 - p_head, p_head]) 40 | 41 | # Jitter the 0,1 y values for plotting purposes: 42 | y_pred_jittered = y_pred + np.random.uniform(-.05, .05, size=len(theta_sample)) 43 | 44 | # Now plot the jittered values: 45 | plt.figure() 46 | plt.plot(theta_sample[:500], y_pred_jittered[:500], 'C1o') 47 | plt.xlim(-.1, 1.1) 48 | plt.ylim(-.1, 1.1) 49 | plt.xlabel(r'$\theta$') 50 | plt.ylabel('y (jittered)') 51 | 52 | mean_y = np.mean(y_pred) 53 | mean_theta = np.mean(theta_sample) 54 | 55 | plt.plot(mean_y, mean_theta, 'k+', markersize=15) 56 | plt.annotate('mean(y) = %.2f\nmean($\\theta$) = %.2f' % 57 | (mean_y, mean_theta), xy=(mean_y, mean_theta)) 58 | plt.plot([0, 1], [0, 1], linestyle='--') 59 | 60 | plt.savefig('BernBetaPyMCPost.png') 61 | plt.show() 62 | -------------------------------------------------------------------------------- /07_BernMetropolisTemplate.py: -------------------------------------------------------------------------------- 1 | """ 2 | Use this program as a template for experimenting with the Metropolis algorithm 3 | applied to a single parameter called theta, defined on the interval [0,1]. 4 | """ 5 | from __future__ import division 6 | import numpy as np 7 | import pymc3 as pm 8 | import matplotlib.pyplot as plt 9 | plt.style.use('seaborn-darkgrid') 10 | from scipy.stats import beta 11 | 12 | 13 | 14 | # Specify the data, to be used in the likelihood function. 15 | # This is a vector with one component per flip, 16 | # in which 1 means a "head" and 0 means a "tail". 17 | my_data = np.repeat([1, 0], [11, 3]) # 11 heads, 2 tail 18 | 19 | # Define the Bernoulli likelihood function, p(D|theta). 20 | # The argument theta could be a vector, not just a scalar. 21 | def likelihood(theta, data): 22 | theta = np.array(theta) # ensure you have an array 23 | z = sum(data[data == 1]) # number of 1's in Data 24 | N = len(data) # number of flips in Data 25 | # Compute the likelihood of the Data for each value of Theta. 26 | if np.size(theta) == 1: # if theta is an scalar 27 | p_data_given_theta = 0 28 | if theta < 1 and theta > 0: 29 | p_data_given_theta = theta**z * (1-theta)**(N-z) 30 | else: # if theta is an array 31 | p_data_given_theta = theta**z * (1-theta)**(N-z) 32 | # The theta values passed into this function are generated at random, 33 | # and therefore might be inadvertently greater than 1 or less than 0. 34 | # The likelihood for theta > 1 or for theta < 0 is zero: 35 | p_data_given_theta[(theta > 1) | (theta < 0)] = 0 36 | return p_data_given_theta 37 | 38 | 39 | # Define the prior density function. For purposes of computing p(D), 40 | # at the end of this program, we want this prior to be a proper density. 41 | # The argument theta could be a vector, not just a scalar. 42 | def prior(theta): 43 | theta = np.array(theta) # ensure you have an array 44 | # For kicks, here's a bimodal prior. To try it, uncomment the next 2 lines. 45 | #from scipy.stats import beta 46 | #prior = dbeta(np.minium(2*theta, 2*(1-theta)), 2, 2) 47 | if np.size(theta) == 1: # if theta is an scalar 48 | prior = 0 49 | if theta < 1 and theta > 0: 50 | prior = 1 51 | else: # if theta is an array 52 | prior = np.ones(len(theta)) # uniform density over [0,1] 53 | # The theta values passed into this function are generated at random, 54 | # and therefore might be inadvertently greater than 1 or less than 0. 55 | # The likelihood for theta > 1 or for theta < 0 is zero: 56 | prior[(theta > 1) | (theta < 0)] = 0 57 | return prior 58 | 59 | 60 | 61 | # Define the relative probability of the target distribution, 62 | # as a function of vector theta. For our application, this 63 | # target distribution is the unnormalized posterior distribution. 64 | def target_rel_prob(theta, data): 65 | target_rel_prob = likelihood(theta , data) * prior(theta) 66 | return target_rel_prob 67 | 68 | # Specify the length of the trajectory, i.e., the number of jumps to try: 69 | traj_length = 5000 # arbitrary large number 70 | # Initialize the vector that will store the results: 71 | trajectory = np.zeros(traj_length) 72 | # Specify where to start the trajectory: 73 | trajectory[0] = 0.50 # arbitrary value 74 | # Specify the burn-in period: 75 | burn_in = int(np.ceil(0.1 * traj_length)) # arbitrary number, less than traj_length 76 | # Initialize accepted, rejected counters, just to monitor performance: 77 | n_accepted = 0 78 | n_rejected = 0 79 | # Specify seed to reproduce same random walk: 80 | np.random.seed(4745) 81 | 82 | # Now generate the random walk. The 't' index is time or trial in the walk. 83 | for t in range(traj_length-1): 84 | current_position = trajectory[t] 85 | # Use the proposal distribution to generate a proposed jump. 86 | # The shape and variance of the proposal distribution can be changed 87 | # to whatever you think is appropriate for the target distribution. 88 | proposed_jump = np.random.normal(loc=0 , scale=0.1, size=1) 89 | 90 | # # Compute the probability of accepting the proposed jump. 91 | prob_accept = np.minimum(1, 92 | target_rel_prob(current_position + proposed_jump, my_data) 93 | / target_rel_prob(current_position, my_data)) 94 | # # Generate a random uniform value from the interval [0,1] to 95 | # # decide whether or not to accept the proposed jump. 96 | if np.random.rand() < prob_accept: 97 | # accept the proposed jump 98 | trajectory[t+1] = current_position + proposed_jump 99 | # increment the accepted counter, just to monitor performance 100 | if t > burn_in: 101 | n_accepted += 1 102 | else: 103 | # reject the proposed jump, stay at current position 104 | trajectory[t+1] = current_position 105 | # increment the rejected counter, just to monitor performance 106 | if t > burn_in: 107 | n_rejected += 1 108 | 109 | 110 | # Extract the post-burn_in portion of the trajectory. 111 | accepted_traj = trajectory[burn_in:] 112 | # End of Metropolis algorithm. 113 | 114 | # Display the posterior. 115 | ROPE = np.array([0.76, 0.8]) 116 | pm.plot_posterior(accepted_traj, ref_val=0.9, rope=ROPE) 117 | plt.xlabel = 'theta' 118 | 119 | 120 | # Display rejected/accepted ratio in the plot. 121 | mean_traj = np.mean(accepted_traj) 122 | std_traj = np.std(accepted_traj) 123 | plt.plot(0, label=r'$N_{pro}=%s$ $\frac{N_{acc}}{N_{pro}} = %.3f$' % (len(accepted_traj), (n_accepted/len(accepted_traj))), alpha=0) 124 | 125 | # Evidence for model, p(D). 126 | 127 | # Compute a,b parameters for beta distribution that has the same mean 128 | # and stdev as the sample from the posterior. This is a useful choice 129 | # when the likelihood function is Bernoulli. 130 | a = mean_traj * ((mean_traj*(1 - mean_traj)/std_traj**2) - 1) 131 | b = (1 - mean_traj) * ((mean_traj*(1 - mean_traj)/std_traj**2) - 1) 132 | 133 | # For every theta value in the posterior sample, compute 134 | # dbeta(theta,a,b) / likelihood(theta)*prior(theta) 135 | # This computation assumes that likelihood and prior are proper densities, 136 | # i.e., not just relative probabilities. This computation also assumes that 137 | # the likelihood and prior functions were defined to accept a vector argument, 138 | # not just a single-component scalar argument. 139 | wtd_evid = beta.pdf(accepted_traj, a, b) / (likelihood(accepted_traj, my_data) * prior(accepted_traj)) 140 | p_data = 1 / np.mean(wtd_evid) 141 | 142 | 143 | # Display p(D) in the graph 144 | plt.plot(0, label='p(D) = %.3e' % p_data, alpha=0) 145 | 146 | 147 | 148 | # Uncomment next line if you want to save the graph. 149 | plt.savefig('BernMetropolisTemplate.png') 150 | plt.show() 151 | -------------------------------------------------------------------------------- /08_BernTwoGrid.py: -------------------------------------------------------------------------------- 1 | """ 2 | Inferring two binomial proportions via grid approximation. 3 | """ 4 | from __future__ import division 5 | import matplotlib.pyplot as plt 6 | plt.style.use('seaborn-darkgrid') 7 | from mpl_toolkits.mplot3d.axes3d import Axes3D 8 | from scipy.stats import beta 9 | from HDI_of_grid import HDI_of_grid 10 | import numpy as np 11 | 12 | 13 | # Specify the grid on theta1,theta2 parameter space. 14 | n_int = 500 # arbitrary number of intervals for grid on theta. 15 | theta1 = np.linspace(0, 1, n_int) 16 | theta2 = theta1 17 | 18 | theta1_grid, theta2_grid = np.meshgrid(theta1, theta2) 19 | 20 | # Specify the prior probability _masses_ on the grid. 21 | prior_name = ("Beta","Ripples","Null","Alt")[0] # or define your own. 22 | if prior_name == "Beta": 23 | a1, b1, a2, b2 = 3, 3, 3, 3 24 | prior1 = beta.pdf(theta1_grid, a1, b1) 25 | prior2 = beta.pdf(theta2_grid, a1, b1) 26 | prior = prior1 * prior2 27 | prior = prior / np.sum(prior) 28 | 29 | if prior_name == "Ripples": 30 | m1, m2, k = 0, 1, 0.75 * np.pi 31 | prior = np.sin((k*(theta1_grid-m1))**2 + (k*(theta2_grid-m2))**2)**2 32 | prior = prior / np.sum(prior) 33 | 34 | if prior_name == "Null": 35 | # 1's at theta1=theta2, 0's everywhere else: 36 | prior = np.eye(len(theta1_grid), len(theta2_grid)) 37 | prior = prior / np.sum(prior) 38 | 39 | if prior_name == "Alt": 40 | # # Uniform: 41 | prior = np.ones((len(theta1_grid), len(theta2_grid))) 42 | prior = prior / np.sum(prior) 43 | 44 | # Specify likelihood 45 | z1, N1, z2, N2 = 5, 7, 2, 7 # data are specified here 46 | likelihood = theta1_grid**z1 * (1-theta1_grid)**(N1-z1) * theta2_grid**z2 * (1-theta2_grid)**(N2-z2) 47 | 48 | # Compute posterior from point-by-point multiplication and normalizing: 49 | p_data = np.sum(prior * likelihood) 50 | posterior = (prior * likelihood) / p_data 51 | 52 | # Specify the probability mass for the HDI region 53 | credib = .95 54 | thin = 4 55 | color = 'skyblue' 56 | 57 | fig = plt.figure(figsize=(12,12)) 58 | 59 | # prior 60 | ax = fig.add_subplot(3, 2, 1, projection='3d') 61 | ax.plot_surface(theta1_grid[::thin,::thin], theta2_grid[::thin,::thin], prior[::thin,::thin], color=color) 62 | ax.set_xlabel(r'$\theta1$') 63 | ax.set_ylabel(r'$\theta1$') 64 | ax.set_zlabel(r'$p(t1,t2)$') 65 | ax.set_xticklabels([]) 66 | ax.set_yticklabels([]) 67 | ax.set_zticklabels([]) 68 | 69 | plt.subplot(3, 2, 2) 70 | plt.contour(theta1_grid, theta2_grid, prior, colors=color) 71 | plt.xlabel(r'$\theta1$') 72 | plt.ylabel(r'$\theta1$') 73 | 74 | # likelihood 75 | ax = fig.add_subplot(3, 2, 3, projection='3d') 76 | ax.plot_surface(theta1_grid[::thin,::thin], theta2_grid[::thin,::thin], likelihood[::thin,::thin], color=color) 77 | ax.set_xlabel(r'$\theta1$') 78 | ax.set_ylabel(r'$\theta1$') 79 | ax.set_zlabel(r'$p(D|t1,t2)$') 80 | ax.set_xticklabels([]) 81 | ax.set_yticklabels([]) 82 | ax.set_zticklabels([]) 83 | 84 | plt.subplot(3, 2, 4) 85 | plt.contour(theta1_grid, theta2_grid, likelihood, colors=color) 86 | plt.xlabel(r'$\theta1$') 87 | plt.ylabel(r'$\theta1$') 88 | plt.plot(0, label='z1,N1,z2,N2=%s,%s,%s,%s' % (z1, N1, z2, N2), alpha=0) 89 | plt.legend(loc='upper left') 90 | 91 | # posterior 92 | ax = fig.add_subplot(3, 2, 5, projection='3d') 93 | ax.plot_surface(theta1_grid[::thin,::thin], theta2_grid[::thin,::thin],posterior[::thin,::thin], color=color) 94 | ax.set_xlabel(r'$\theta1$') 95 | ax.set_ylabel(r'$\theta1$') 96 | ax.set_zlabel(r'$p(t1,t2|D)$') 97 | ax.set_xticklabels([]) 98 | ax.set_yticklabels([]) 99 | ax.set_zticklabels([]) 100 | 101 | plt.subplot(3, 2, 6) 102 | plt.contour(theta1_grid, theta2_grid, posterior, colors=color) 103 | plt.xlabel(r'$\theta1$') 104 | plt.ylabel(r'$\theta1$') 105 | plt.plot(0, label='p(D) = %.3e' % p_data, alpha=0) 106 | plt.legend(loc='upper left') 107 | 108 | # Mark the highest posterior density region 109 | HDI_height = HDI_of_grid(posterior)['height'] 110 | plt.contour(theta1_grid, theta2_grid, posterior, levels=[HDI_height], colors='k') 111 | 112 | plt.tight_layout() 113 | plt.savefig('BernTwoGrid_%s.png' % prior_name) 114 | plt.show() 115 | -------------------------------------------------------------------------------- /08_BernTwoMetropolis.py: -------------------------------------------------------------------------------- 1 | """ 2 | Use this program as a template for experimenting with the Metropolis algorithm 3 | applied to 2 parameters called theta1,theta2 defined on the domain [0,1]x[0,1]. 4 | """ 5 | from __future__ import division 6 | import numpy as np 7 | from scipy.stats import beta 8 | import matplotlib.pyplot as plt 9 | plt.style.use('seaborn-darkgrid') 10 | 11 | 12 | # Define the likelihood function. 13 | # The input argument is a vector: theta = [theta1 , theta2] 14 | 15 | def likelihood(theta): 16 | # Data are constants, specified here: 17 | z1, N1, z2, N2 = 5, 7, 2, 7 18 | likelihood = (theta[0]**z1 * (1-theta[0])**(N1-z1) 19 | * theta[1]**z2 * (1-theta[1])**(N2-z2)) 20 | return likelihood 21 | 22 | 23 | # Define the prior density function. 24 | # The input argument is a vector: theta = [theta1 , theta2] 25 | def prior(theta): 26 | # Here's a beta-beta prior: 27 | a1, b1, a2, b2 = 3, 3, 3, 3 28 | prior = beta.pdf(theta[0], a1, b1) * beta.pdf(theta[1], a2, b2) 29 | return prior 30 | 31 | 32 | # Define the relative probability of the target distribution, as a function 33 | # of theta. The input argument is a vector: theta = [theta1 , theta2]. 34 | # For our purposes, the value returned is the UNnormalized posterior prob. 35 | def target_rel_prob(theta): 36 | if ((theta >= 0.0).all() & (theta <= 1.0).all()): 37 | target_rel_probVal = likelihood(theta) * prior(theta) 38 | else: 39 | # This part is important so that the Metropolis algorithm 40 | # never accepts a jump to an invalid parameter value. 41 | target_rel_probVal = 0.0 42 | return target_rel_probVal 43 | # if ( all( theta >= 0.0 ) & all( theta <= 1.0 ) ) { 44 | # target_rel_probVal = likelihood( theta ) * prior( theta ) 45 | 46 | 47 | # Specify the length of the trajectory, i.e., the number of jumps to try: 48 | traj_length = 5000 # arbitrary large number 49 | # Initialize the vector that will store the results. 50 | trajectory = np.zeros((traj_length, 2)) 51 | # Specify where to start the trajectory 52 | trajectory[0, ] = [0.50, 0.50] # arbitrary start values of the two param's 53 | # Specify the burn-in period. 54 | burn_in = int(np.ceil(.1 * traj_length)) # arbitrary number 55 | # Initialize accepted, rejected counters, just to monitor performance. 56 | n_accepted = 0 57 | n_rejected = 0 58 | # Specify the seed, so the trajectory can be reproduced. 59 | np.random.seed(47405) 60 | # Specify the covariance matrix for multivariate normal proposal distribution. 61 | n_dim, sd1, sd2 = 2, 0.2, 0.2 62 | covar_mat = [[sd1**2, 0], [0, sd2**2]] 63 | 64 | # Now generate the random walk. step is the step in the walk. 65 | for step in range(traj_length-1): 66 | current_position = trajectory[step, ] 67 | # Use the proposal distribution to generate a proposed jump. 68 | # The shape and variance of the proposal distribution can be changed 69 | # to whatever you think is appropriate for the target distribution. 70 | proposed_jump = np.random.multivariate_normal(mean=np.zeros((n_dim)), 71 | cov=covar_mat) 72 | # Compute the probability of accepting the proposed jump. 73 | prob_accept = np.minimum(1, target_rel_prob(current_position + proposed_jump) 74 | / target_rel_prob(current_position)) 75 | # Generate a random uniform value from the interval [0,1] to 76 | # decide whether or not to accept the proposed jump. 77 | if np.random.rand() < prob_accept: 78 | # accept the proposed jump 79 | trajectory[step+1, ] = current_position + proposed_jump 80 | # increment the accepted counter, just to monitor performance 81 | if step > burn_in: 82 | n_accepted += 1 83 | else: 84 | # reject the proposed jump, stay at current position 85 | trajectory[step+1, ] = current_position 86 | # increment the rejected counter, just to monitor performance 87 | if step > burn_in: 88 | n_rejected += 1 89 | 90 | # End of Metropolis algorithm. 91 | 92 | #----------------------------------------------------------------------- 93 | # Begin making inferences by using the sample generated by the 94 | # Metropolis algorithm. 95 | 96 | # Extract just the post-burnIn portion of the trajectory. 97 | accepted_traj = trajectory[burn_in:] 98 | 99 | # Compute the means of the accepted points. 100 | mean_traj = np.mean(accepted_traj, axis=0) 101 | # Compute the standard deviations of the accepted points. 102 | stdTraj = np.std(accepted_traj, axis=0) 103 | 104 | # Plot the trajectory of the last 500 sampled values. 105 | plt.plot(accepted_traj[:,0], accepted_traj[:,1], marker='o', alpha=0.3) 106 | plt.xlim(0, 1) 107 | plt.ylim(0, 1) 108 | plt.xlabel(r'$\theta1$') 109 | plt.ylabel(r'$\theta2$') 110 | 111 | # Display means in plot. 112 | plt.plot(0, label='M = %.3f, %.3f' % (mean_traj[0], mean_traj[1]), alpha=0.0) 113 | # Display rejected/accepted ratio in the plot. 114 | plt.plot(0, label=r'$N_{pro}=%s$ $\frac{N_{acc}}{N_{pro}} = %.3f$' % (len(accepted_traj), (n_accepted/len(accepted_traj))), alpha=0) 115 | 116 | # Evidence for model, p(D). 117 | # Compute a,b parameters for beta distribution that has the same mean 118 | # and stdev as the sample from the posterior. This is a useful choice 119 | # when the likelihood function is binomial. 120 | a = mean_traj * ((mean_traj*(1-mean_traj)/stdTraj**2) - np.ones(n_dim)) 121 | b = (1-mean_traj) * ( (mean_traj*(1-mean_traj)/stdTraj**2) - np.ones(n_dim)) 122 | # For every theta value in the posterior sample, compute 123 | # beta.pdf(theta, a, b) / likelihood(theta) * prior(theta) 124 | # This computation assumes that likelihood and prior are properly normalized, 125 | # i.e., not just relative probabilities. 126 | 127 | wtd_evid = np.zeros(np.shape(accepted_traj)[0]) 128 | for idx in range(np.shape(accepted_traj)[0]): 129 | wtd_evid[idx] = (beta.pdf(accepted_traj[idx,0],a[0],b[0] ) 130 | * beta.pdf(accepted_traj[idx,1],a[1],b[1]) / 131 | (likelihood(accepted_traj[idx,]) * prior(accepted_traj[idx,]))) 132 | 133 | p_data = 1 / np.mean(wtd_evid) 134 | # Display p(D) in the graph 135 | plt.plot(0, label='p(D) = %.3e' % p_data, alpha=0) 136 | plt.legend(loc='upper left') 137 | plt.savefig('Figure_8.3.png') 138 | 139 | # Estimate highest density region by evaluating posterior at each point. 140 | accepted_traj = trajectory[burn_in:] 141 | npts = np.shape(accepted_traj)[0] 142 | post_prob = np.zeros((npts)) 143 | for ptIdx in range(npts): 144 | post_prob[ptIdx] = target_rel_prob(accepted_traj[ptIdx,]) 145 | 146 | # Determine the level at which credmass points are above: 147 | credmass = 0.95 148 | waterline = np.percentile(post_prob, (credmass)) 149 | 150 | HDI_points = accepted_traj[post_prob > waterline, ] 151 | 152 | plt.figure() 153 | plt.plot(HDI_points[:,0], HDI_points[:,1], 'C1o') 154 | plt.xlim(0,1) 155 | plt.ylim(0,1) 156 | plt.xlabel(r'$\theta1$') 157 | plt.ylabel(r'$\theta2$') 158 | 159 | # Display means in plot. 160 | plt.plot(0, label='M = %.3f, %.3f' % (mean_traj[0], mean_traj[1]), alpha=0.0) 161 | # Display rejected/accepted ratio in the plot. 162 | plt.plot(0, label=r'$N_{pro}=%s$ $\frac{N_{acc}}{N_{pro}} = %.3f$' % (len(accepted_traj), (n_accepted/len(accepted_traj))), alpha=0) 163 | # Display p(D) in the graph 164 | plt.plot(0, label='p(D) = %.3e' % p_data, alpha=0) 165 | plt.legend(loc='upper left') 166 | 167 | plt.savefig('Figure_8.3_HDI.png') 168 | 169 | plt.show() 170 | 171 | -------------------------------------------------------------------------------- /08_BernTwoPyMC.py: -------------------------------------------------------------------------------- 1 | """ 2 | Inferring two binomial proportions using PyMC. 3 | """ 4 | from __future__ import division 5 | import matplotlib.pyplot as plt 6 | plt.style.use('seaborn-darkgrid') 7 | import numpy as np 8 | import pymc3 as pm 9 | 10 | 11 | # Generate the data 12 | y1 = np.array([1, 1, 1, 1, 1, 0, 0]) # 5 heads and 2 tails 13 | y2 = np.array([1, 1, 0, 0, 0, 0, 0]) # 2 heads and 5 tails 14 | 15 | 16 | with pm.Model() as model: 17 | # define the prior 18 | theta1 = pm.Beta('theta1', 3, 3) # prior 19 | theta2 = pm.Beta('theta2', 3, 3) # prior 20 | # define the likelihood 21 | y1 = pm.Bernoulli('y1', p=theta1, observed=y1) 22 | y2 = pm.Bernoulli('y2', p=theta2, observed=y2) 23 | 24 | # Generate a MCMC chain 25 | trace = pm.sample(1000) 26 | 27 | # create an array with the posterior sample 28 | theta1_sample = trace['theta1'] 29 | theta2_sample = trace['theta2'] 30 | 31 | # Plot the trajectory of the last 500 sampled values. 32 | plt.plot(theta1_sample[:-500], theta2_sample[:-500], marker='o', color='skyblue') 33 | plt.xlim(0, 1) 34 | plt.ylim(0, 1) 35 | plt.xlabel(r'$\theta1$') 36 | plt.ylabel(r'$\theta2$') 37 | 38 | # Display means in plot. 39 | plt.plot(0, label='M = %.3f, %.3f' % (np.mean(theta1_sample), np.mean(theta2_sample)), alpha=0.0) 40 | 41 | plt.legend(loc='upper left') 42 | plt.savefig('Figure_8.6.png') 43 | 44 | # Plot a histogram of the posterior differences of theta values. 45 | theta_diff = theta1_sample - theta2_sample 46 | pm.plot_posterior(theta_diff, ref_val=0.0, bins=30, color='skyblue') 47 | plt.xlabel(r'$\theta_1 - \theta_2$') 48 | plt.savefig('Figure_8.8.png') 49 | 50 | # For Exercise 8.5: 51 | # Posterior prediction. For each step in the chain, use the posterior thetas 52 | # to flip the coins. 53 | chain_len = len(theta1_sample) 54 | # Create matrix to hold results of simulated flips: 55 | y_pred = np.zeros((2, chain_len)) 56 | for step_idx in range(chain_len): # step through the chain 57 | # flip the first coin: 58 | p_head1 = theta1_sample[step_idx] 59 | y_pred[0, step_idx] = np.random.choice([0,1], p=[1-p_head1, p_head1]) 60 | # flip the second coin: 61 | p_head2 = theta2_sample[step_idx] 62 | y_pred[1, step_idx] = np.random.choice([0,1], p=[1-p_head2, p_head2]) 63 | 64 | 65 | # Now determine the proportion of times that y1==1 and y2==0 66 | pY1eq1andY2eq0 = sum((y_pred[0] ==1) & (y_pred[1] == 0)) / chain_len 67 | 68 | print(pY1eq1andY2eq0) 69 | plt.show() 70 | 71 | -------------------------------------------------------------------------------- /09_BernBetaMuKappaPyMC.py: -------------------------------------------------------------------------------- 1 | """ 2 | Bernoulli Likelihood with Hierarchical Prior! 3 | """ 4 | import numpy as np 5 | import pymc3 as pm 6 | import sys 7 | from scipy.stats import beta, binom 8 | import matplotlib.pyplot as plt 9 | plt.style.use('seaborn-darkgrid') 10 | 11 | 12 | # Data for figure 9.11 13 | N = [10, 10, 10] # Number of flips per coin 14 | z = [5, 5, 5] # Number of heads per coin 15 | ## Data for figure 9.12 16 | #N = [10, 10, 10] # Number of flips per coin 17 | #z = [1, 5, 9] # Number of heads per coin 18 | 19 | ## Data for exercise 9.1 20 | #ncoins = 50 21 | #nflipspercoin = 5 22 | #mu_act = .7 23 | #kappa_act = 20 24 | #theta_act = beta.rvs(mu_act*kappa_act+1, (1-mu_act)*kappa_act+1, size=ncoins) 25 | #z = binom.rvs(n=nflipspercoin, p=theta_act, size=ncoins) 26 | #N = [nflipspercoin] * ncoins 27 | 28 | 29 | # Arrange the data into a more convenient way to feed the PyMC model. 30 | coin = [] # list/vector index for each coins (from 0 to number of coins) 31 | y = [] # list/vector with head (1) or tails (0) for each flip. 32 | for i, flips in enumerate(N): 33 | heads = z[i] 34 | if heads > flips: 35 | sys.exit("The number of heads can't be greater than the number of flips") 36 | else: 37 | y = y + [1] * heads + [0] * (flips-heads) 38 | coin = coin + [i] * flips 39 | 40 | 41 | # Specify the model in PyMC 42 | with pm.Model() as model: 43 | # define the hyperparameters 44 | mu = pm.Beta('mu', 2, 2) 45 | kappa = pm.Gamma('kappa', 1, 0.1) 46 | # define the prior 47 | theta = pm.Beta('theta', mu * kappa, (1 - mu) * kappa, shape=len(N)) 48 | # define the likelihood 49 | y = pm.Bernoulli('y', p=theta[coin], observed=y) 50 | 51 | # Generate a MCMC chain 52 | 53 | trace = pm.sample(1000, progressbar=False) 54 | 55 | 56 | ## Check the results. 57 | 58 | ## Print summary for each trace 59 | #pm.df_summary(trace) 60 | #pm.df_summary(trace) 61 | 62 | ## Check for mixing and autocorrelation 63 | pm.autocorrplot(trace, varnames=['mu', 'kappa']) 64 | #pm.autocorrplot(trace, varnames =[mu, kappa]) 65 | 66 | ## Plot KDE and sampled values for each parameter. 67 | pm.traceplot(trace) 68 | #pm.traceplot(trace) 69 | 70 | # Create arrays with the posterior sample 71 | theta1_sample = trace['theta'][:,0] 72 | theta2_sample = trace['theta'][:,1] 73 | theta3_sample = trace['theta'][:,2] 74 | mu_sample = trace['mu'] 75 | kappa_sample = trace['kappa'] 76 | 77 | 78 | # Scatter plot hyper-parameters 79 | fig, ax = plt.subplots(4, 3, figsize=(12,12)) 80 | ax[0, 0].scatter(mu_sample, kappa_sample, marker='o', color='skyblue') 81 | ax[0, 0].set_xlim(0,1) 82 | ax[0, 0].set_xlabel(r'$\mu$') 83 | ax[0, 0].set_ylabel(r'$\kappa$') 84 | 85 | # Plot mu histogram 86 | #plot_post(mu_sample, xlab=r'$\mu$', show_mode=False, labelsize=9, framealpha=0.5) 87 | 88 | pm.plot_posterior(mu_sample, ax=ax[0, 1], color='skyblue') 89 | ax[0, 1].set_xlabel(r'$\mu$') 90 | ax[0, 1].set_xlim(0,1) 91 | 92 | # Plot kappa histogram 93 | #plot_post(kappa_sample, xlab=r'$\kappa$', show_mode=False, labelsize=9, framealpha=0.5) 94 | pm.plot_posterior(kappa_sample, ax=ax[0, 2], color='skyblue') 95 | ax[0, 2].set_xlabel(r'$\kappa$') 96 | 97 | # Plot theta 1 98 | 99 | #plot_post(theta1_sample, xlab=r'$\theta1$', show_mode=False, labelsize=9, framealpha=0.5) 100 | pm.plot_posterior(theta1_sample, ax=ax[1, 0], color='skyblue') 101 | ax[1, 0].set_xlabel(r'$\theta1$') 102 | ax[1, 0].set_xlim(0,1) 103 | 104 | # Scatter theta 1 vs mu 105 | ax[1, 1].scatter(theta1_sample, mu_sample, marker='o', color='skyblue') 106 | ax[1, 1].set_xlim(0,1) 107 | ax[1, 1].set_ylim(0,1) 108 | ax[1, 1].set_xlabel(r'$\theta1$') 109 | ax[1, 1].set_ylabel(r'$\mu$') 110 | 111 | # Scatter theta 1 vs kappa 112 | ax[1, 2].scatter(theta1_sample, kappa_sample, marker='o', color='skyblue') 113 | ax[1, 2].set_xlim(0,1) 114 | ax[1, 2].set_xlabel(r'$\theta1$') 115 | ax[1, 2].set_ylabel(r'$\kappa$') 116 | 117 | # Plot theta 2 118 | #plot_post(theta2_sample, xlab=r'$\theta2$', show_mode=False, labelsize=9, framealpha=0.5) 119 | pm.plot_posterior(theta2_sample, ax=ax[2, 0], color='skyblue') 120 | ax[2, 0].set_xlabel(r'$\theta2$') 121 | ax[2, 0].set_xlim(0,1) 122 | 123 | # Scatter theta 2 vs mu 124 | ax[2, 1].scatter(theta2_sample, mu_sample, marker='o', color='skyblue') 125 | ax[2, 1].set_xlim(0,1) 126 | ax[2, 1].set_ylim(0,1) 127 | ax[2, 1].set_xlabel(r'$\theta2$') 128 | ax[2, 1].set_ylabel(r'$\mu$') 129 | 130 | # Scatter theta 2 vs kappa 131 | ax[2, 2].scatter(theta2_sample, kappa_sample, marker='o', color='skyblue') 132 | ax[2, 2].set_xlim(0,1) 133 | ax[2, 2].set_xlabel(r'$\theta2$') 134 | ax[2, 2].set_ylabel(r'$\kappa$') 135 | 136 | # Plot theta 3 137 | 138 | #plot_post(theta3_sample, xlab=r'$\theta3$', show_mode=False, labelsize=9, framealpha=0.5) 139 | pm.plot_posterior(theta3_sample, ax=ax[3, 0], color='skyblue') 140 | ax[3, 0].set_xlabel(r'$\theta3$') 141 | ax[3, 0].set_xlim(0,1) 142 | 143 | # Scatter theta 3 vs mu 144 | ax[3, 1].scatter(theta3_sample, mu_sample, marker='o', color='skyblue') 145 | ax[3, 1].set_xlim(0,1) 146 | ax[3, 1].set_ylim(0,1) 147 | ax[3, 1].set_xlabel(r'$\theta3$') 148 | ax[3, 1].set_ylabel(r'$\mu$') 149 | 150 | # Scatter theta 3 vs kappa 151 | ax[3, 2].scatter(theta3_sample, kappa_sample, marker='o', color='skyblue') 152 | ax[3, 2].set_xlim(0,1) 153 | ax[3, 2].set_xlabel(r'$\theta3$') 154 | ax[3, 2].set_ylabel(r'$\kappa$') 155 | 156 | plt.tight_layout() 157 | plt.savefig('Figure_9.11.png') 158 | plt.show() 159 | 160 | -------------------------------------------------------------------------------- /09_BernBetaMuKappaPyMC_TT.py: -------------------------------------------------------------------------------- 1 | """ 2 | Bernoulli Likelihood with Hierarchical Prior. The Therapeutic Touch example. 3 | """ 4 | import numpy as np 5 | import pymc3 as pm 6 | import sys 7 | import matplotlib.pyplot as plt 8 | plt.style.use('seaborn-darkgrid') 9 | 10 | 11 | ## Therapeutic touch data: 12 | z = [1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 13 | 5, 5, 5, 5, 5, 5, 5, 6, 6, 7, 7, 7, 8] # Number of heads per coin 14 | N = [10] * len(z) # Number of flips per coin 15 | 16 | # rearrange the data to load it PyMC model. 17 | coin = [] # list/vector index for each coins (from 0 to number of coins) 18 | y = [] # list/vector with head (1) or tails (0) for each flip. 19 | for i, flips in enumerate(N): 20 | heads = z[i] 21 | if heads > flips: 22 | sys.exit("The number of heads can't be greater than the number of flips") 23 | else: 24 | y = y + [1] * heads + [0] * (flips-heads) 25 | coin = coin + [i] * flips 26 | 27 | 28 | # Specify the model in PyMC 29 | with pm.Model() as model: 30 | # define the hyperparameters 31 | mu = pm.Beta('mu', 2, 2) 32 | kappa = pm.Gamma('kappa', 1, 0.1) 33 | # define the prior 34 | theta = pm.Beta('theta', mu * kappa, (1 - mu) * kappa, shape=len(N)) 35 | # define the likelihood 36 | y = pm.Bernoulli('y', p=theta[coin], observed=y) 37 | # Generate a MCMC chain 38 | trace = pm.sample(5000, random_seed=123) 39 | 40 | ## Check the results. 41 | 42 | ## Print summary for each trace 43 | #pm.df_summary(trace) 44 | 45 | ## Check for mixing and autocorrelation 46 | pm.autocorrplot(trace, varnames=['mu', 'kappa']) 47 | 48 | ## Plot KDE and sampled values for each parameter. 49 | pm.traceplot(trace) 50 | #pm.traceplot(trace) 51 | 52 | # Create arrays with the posterior sample 53 | theta1_sample = trace['theta'][:,0] 54 | theta28_sample = trace['theta'][:,27] 55 | mu_sample = trace['mu'] 56 | kappa_sample = trace['kappa'] 57 | 58 | # Plot mu histogram 59 | fig, ax = plt.subplots(2, 2, figsize=(12,12)) 60 | pm.plot_posterior(mu_sample, ax=ax[0, 0], color='skyblue') 61 | ax[0, 0].set_xlabel(r'$\mu$') 62 | 63 | # Plot kappa histogram 64 | pm.plot_posterior(kappa_sample, ax=ax[0, 1], color='skyblue') 65 | ax[0, 1].set_xlabel(r'$\kappa$') 66 | 67 | # Plot theta 1 68 | pm.plot_posterior(theta1_sample, ax=ax[1, 0], color='skyblue') 69 | ax[1, 0].set_xlabel(r'$\theta1$') 70 | 71 | # Plot theta 28 72 | pm.plot_posterior(theta1_sample, ax=ax[1, 1], color='skyblue') 73 | ax[1, 1].set_xlabel(r'$\theta28$') 74 | 75 | 76 | plt.tight_layout() 77 | plt.savefig('Figure_9.14.png') 78 | plt.show() 79 | -------------------------------------------------------------------------------- /09_FilconPyMC.py: -------------------------------------------------------------------------------- 1 | """ 2 | More Hierarchical models. The filtration-condensation experiment. 3 | """ 4 | import numpy as np 5 | import pymc3 as pm 6 | import sys 7 | import matplotlib.pyplot as plt 8 | plt.style.use('seaborn-darkgrid') 9 | 10 | # Data 11 | # For each subject, specify the condition s/he was in, 12 | # the number of trials s/he experienced, and the number correct. 13 | ncond = 4 14 | nSubj = 40 15 | trials = 64 16 | 17 | N = np.repeat([trials], (ncond * nSubj)) 18 | z = np.array([45, 63, 58, 64, 58, 63, 51, 60, 59, 47, 63, 61, 60, 51, 59, 45, 19 | 61, 59, 60, 58, 63, 56, 63, 64, 64, 60, 64, 62, 49, 64, 64, 58, 64, 52, 64, 64, 20 | 64, 62, 64, 61, 59, 59, 55, 62, 51, 58, 55, 54, 59, 57, 58, 60, 54, 42, 59, 57, 21 | 59, 53, 53, 42, 59, 57, 29, 36, 51, 64, 60, 54, 54, 38, 61, 60, 61, 60, 62, 55, 22 | 38, 43, 58, 60, 44, 44, 32, 56, 43, 36, 38, 48, 32, 40, 40, 34, 45, 42, 41, 32, 23 | 48, 36, 29, 37, 53, 55, 50, 47, 46, 44, 50, 56, 58, 42, 58, 54, 57, 54, 51, 49, 24 | 52, 51, 49, 51, 46, 46, 42, 49, 46, 56, 42, 53, 55, 51, 55, 49, 53, 55, 40, 46, 25 | 56, 47, 54, 54, 42, 34, 35, 41, 48, 46, 39, 55, 30, 49, 27, 51, 41, 36, 45, 41, 26 | 53, 32, 43, 33]) 27 | condition = np.repeat([0,1,2,3], nSubj) 28 | 29 | # Specify the model in PyMC 30 | with pm.Model() as model: 31 | kappa = pm.Gamma('kappa', 1, 0.1, shape=ncond) 32 | mu = pm.Beta('mu', 1, 1, shape=ncond) 33 | theta = pm.Beta('theta', mu[condition] * kappa[condition], (1 - mu[condition]) * kappa[condition], shape=len(z)) 34 | y = pm.Binomial('y', p=theta, n=N, observed=z) 35 | 36 | trace = pm.sample(1000) 37 | 38 | ## Check the results. 39 | 40 | ## Print summary for each trace 41 | #pm.df_summary(trace) 42 | #pm.df_summary(trace) 43 | 44 | ## Check for mixing and autocorrelation 45 | #pm.autocorrplot(trace, varnames=['mu', 'kappa']) 46 | 47 | ## Plot KDE and sampled values for each parameter. 48 | #pm.traceplot(trace) 49 | pm.traceplot(trace) 50 | 51 | 52 | # Create arrays with the posterior sample 53 | mu1_sample = trace['mu'][:,0] 54 | mu2_sample = trace['mu'][:,1] 55 | mu3_sample = trace['mu'][:,2] 56 | mu4_sample = trace['mu'][:,3] 57 | 58 | 59 | # Plot differences among filtrations experiments 60 | fig, ax = plt.subplots(1, 3, figsize=(15, 6)) 61 | pm.plot_posterior(mu1_sample-mu2_sample, ax=ax[0], color='skyblue') 62 | ax[0].set_xlabel(r'$\mu1-\mu2$') 63 | 64 | # Plot differences among condensation experiments 65 | pm.plot_posterior(mu3_sample-mu4_sample, ax=ax[1], color='skyblue') 66 | ax[1].set_xlabel(r'$\mu3-\mu4$') 67 | 68 | # Plot differences between filtration and condensation experiments 69 | a = (mu1_sample+mu2_sample)/2 - (mu3_sample+mu4_sample)/2 70 | pm.plot_posterior(a, ax=ax[2], color='skyblue') 71 | ax[2].set_xlabel(r'$(\mu1+\mu2)/2 - (\mu3+\mu4)/2$') 72 | 73 | plt.tight_layout() 74 | plt.savefig('Figure_9.16.png') 75 | plt.show() 76 | -------------------------------------------------------------------------------- /09_FilconPyMC_ex9.2.A.py: -------------------------------------------------------------------------------- 1 | """ 2 | More Hierarchical models. The filtration-condensation experiment. A single kappa 3 | for all conditions. 4 | """ 5 | import numpy as np 6 | import pymc3 as pm 7 | import sys 8 | import matplotlib.pyplot as plt 9 | plt.style.use('seaborn-darkgrid') 10 | 11 | 12 | # Data 13 | # For each subject, specify the condition s/he was in, 14 | # the number of trials s/he experienced, and the number correct. 15 | ncond = 4 16 | nSubj = 40 17 | trials = 64 18 | 19 | N = np.repeat([trials], (ncond * nSubj)) 20 | z = np.array([45, 63, 58, 64, 58, 63, 51, 60, 59, 47, 63, 61, 60, 51, 59, 45, 21 | 61, 59, 60, 58, 63, 56, 63, 64, 64, 60, 64, 62, 49, 64, 64, 58, 64, 52, 64, 64, 22 | 64, 62, 64, 61, 59, 59, 55, 62, 51, 58, 55, 54, 59, 57, 58, 60, 54, 42, 59, 57, 23 | 59, 53, 53, 42, 59, 57, 29, 36, 51, 64, 60, 54, 54, 38, 61, 60, 61, 60, 62, 55, 24 | 38, 43, 58, 60, 44, 44, 32, 56, 43, 36, 38, 48, 32, 40, 40, 34, 45, 42, 41, 32, 25 | 48, 36, 29, 37, 53, 55, 50, 47, 46, 44, 50, 56, 58, 42, 58, 54, 57, 54, 51, 49, 26 | 52, 51, 49, 51, 46, 46, 42, 49, 46, 56, 42, 53, 55, 51, 55, 49, 53, 55, 40, 46, 27 | 56, 47, 54, 54, 42, 34, 35, 41, 48, 46, 39, 55, 30, 49, 27, 51, 41, 36, 45, 41, 28 | 53, 32, 43, 33]) 29 | condition = np.repeat([0,1,2,3], nSubj) 30 | 31 | # Specify the model in PyMC 32 | with pm.Model() as model: 33 | # define the hyperparameters 34 | kappa = pm.Gamma('kappa', 1, 0.1) 35 | mu = pm.Beta('mu', 1, 1, shape=ncond) 36 | # define the prior 37 | theta = pm.Beta('theta', mu[condition] * kappa, (1 - mu[condition]) * kappa, shape=len(z)) 38 | # define the likelihood 39 | y = pm.Binomial('y', p=theta, n=N, observed=z) 40 | trace = pm.sample(1000) 41 | 42 | ## Check the results. 43 | 44 | ## Print summary for each trace 45 | #pm.df_summary(trace) 46 | 47 | ## Check for mixing and autocorrelation 48 | #pm.autocorrplot(trace, varnames=['mu', 'kappa']) 49 | 50 | ## Plot KDE and sampled values for each parameter. 51 | pm.traceplot(trace) 52 | 53 | # Create arrays with the posterior sample 54 | mu1_sample = trace['mu'][:,0] 55 | mu2_sample = trace['mu'][:,1] 56 | mu3_sample = trace['mu'][:,2] 57 | mu4_sample = trace['mu'][:,3] 58 | 59 | 60 | # Plot differences among filtrations experiments 61 | fig, ax = plt.subplots(1, 3, figsize=(15, 6)) 62 | pm.plot_posterior((mu1_sample-mu2_sample), ax=ax[0], ref_val=0, color='skyblue') 63 | ax[0].set_xlabel(r'$\mu1-\mu2$') 64 | 65 | # Plot differences among condensation experiments 66 | pm.plot_posterior((mu3_sample-mu4_sample), ax=ax[1], ref_val=0, color='skyblue') 67 | ax[1].set_xlabel(r'$\mu3-\mu4$') 68 | 69 | # Plot differences between filtration and condensation experiments 70 | a = (mu1_sample+mu2_sample)/2 - (mu3_sample+mu4_sample)/2 71 | pm.plot_posterior(a, ax=ax[2], ref_val=0, color='skyblue') 72 | ax[2].set_xlabel(r'$(\mu1+\mu2)/2 - (\mu3+\mu4)/2$') 73 | 74 | plt.tight_layout() 75 | plt.savefig('Figure_9.18_upper.png') 76 | plt.show() 77 | -------------------------------------------------------------------------------- /09_FilconPyMC_ex9.2.B.py: -------------------------------------------------------------------------------- 1 | """ 2 | More Hierarchical models. The filtration-condensation experiment. 3 | Hyperparameters for kappafor all conditions. 4 | """ 5 | import numpy as np 6 | import pymc3 as pm 7 | import sys 8 | import matplotlib.pyplot as plt 9 | plt.style.use('seaborn-darkgrid') 10 | 11 | # Data 12 | # For each subject, specify the condition s/he was in, 13 | # the number of trials s/he experienced, and the number correct. 14 | ncond = 4 15 | nSubj = 40 16 | trials = 64 17 | 18 | N = np.repeat([trials], (ncond * nSubj)) 19 | z = np.array([45, 63, 58, 64, 58, 63, 51, 60, 59, 47, 63, 61, 60, 51, 59, 45, 20 | 61, 59, 60, 58, 63, 56, 63, 64, 64, 60, 64, 62, 49, 64, 64, 58, 64, 52, 64, 64, 21 | 64, 62, 64, 61, 59, 59, 55, 62, 51, 58, 55, 54, 59, 57, 58, 60, 54, 42, 59, 57, 22 | 59, 53, 53, 42, 59, 57, 29, 36, 51, 64, 60, 54, 54, 38, 61, 60, 61, 60, 62, 55, 23 | 38, 43, 58, 60, 44, 44, 32, 56, 43, 36, 38, 48, 32, 40, 40, 34, 45, 42, 41, 32, 24 | 48, 36, 29, 37, 53, 55, 50, 47, 46, 44, 50, 56, 58, 42, 58, 54, 57, 54, 51, 49, 25 | 52, 51, 49, 51, 46, 46, 42, 49, 46, 56, 42, 53, 55, 51, 55, 49, 53, 55, 40, 46, 26 | 56, 47, 54, 54, 42, 34, 35, 41, 48, 46, 39, 55, 30, 49, 27, 51, 41, 36, 45, 41, 27 | 53, 32, 43, 33]) 28 | condition = np.repeat([0,1,2,3], nSubj) 29 | 30 | # Specify the model in PyMC 31 | with pm.Model() as model: 32 | # define the hyper-hyperparameters for kappa 33 | mean_gamma = pm.Uniform('mean_gamma', 0, 30) 34 | sd_gamma = pm.Uniform('sd_gamma', 0, 30) 35 | s_kappa = mean_gamma**2/sd_gamma**2 36 | r_kappa = mean_gamma/sd_gamma**2 37 | # define the hyperparameters 38 | kappa = pm.Gamma('kappa', s_kappa, r_kappa) 39 | mu = pm.Beta('mu', 1, 1, shape=ncond) 40 | # define the prior 41 | theta = pm.Beta('theta', mu[condition] * kappa, (1 - mu[condition]) * kappa, shape=len(z)) 42 | # define the likelihood 43 | y = pm.Binomial('y', p=theta, n=N, observed=z) 44 | trace = pm.sample(2000, tune=1000) 45 | 46 | ## Check the results. 47 | burnin = 0 # posterior samples to discard 48 | 49 | ## Print summary for each trace 50 | #pm.df_summary(trace[burnin:]) 51 | #pm.df_summary(trace) 52 | 53 | ## Check for mixing and autocorrelation 54 | #pm.autocorrplot(trace, varnames=['mu', 'kappa']) 55 | 56 | ## Plot KDE and sampled values for each parameter. 57 | #pm.traceplot(trace[burnin:]) 58 | pm.traceplot(trace) 59 | 60 | # Create arrays with the posterior sample 61 | mu1_sample = trace['mu'][:,0][burnin:] 62 | mu2_sample = trace['mu'][:,1][burnin:] 63 | mu3_sample = trace['mu'][:,2][burnin:] 64 | mu4_sample = trace['mu'][:,3][burnin:] 65 | 66 | # Plot differences among filtrations experiments 67 | fig, ax = plt.subplots(1, 3, figsize=(15, 6)) 68 | pm.plot_posterior((mu1_sample-mu2_sample), ax=ax[0], ref_val=0, color='skyblue') 69 | ax[0].set_xlabel(r'$\mu1-\mu2$') 70 | 71 | # Plot differences among condensation experiments 72 | pm.plot_posterior((mu3_sample-mu4_sample), ax=ax[1], ref_val=0, color='skyblue') 73 | ax[1].set_xlabel(r'$\mu3-\mu4$') 74 | 75 | # Plot differences between filtration and condensation experiments 76 | a = (mu1_sample+mu2_sample)/2 - (mu3_sample+mu4_sample)/2 77 | pm.plot_posterior(a, ax=ax[2], ref_val=0, color='skyblue') 78 | ax[2].set_xlabel(r'$(\mu1+\mu2)/2 - (\mu3+\mu4)/2$') 79 | 80 | plt.tight_layout() 81 | plt.savefig('Figure_9.18_lower.png') 82 | plt.show() 83 | -------------------------------------------------------------------------------- /10_BernBetaModelCompPyMC.py: -------------------------------------------------------------------------------- 1 | """ 2 | Comparing models using Hierarchical modelling. 3 | """ 4 | from __future__ import division 5 | import numpy as np 6 | import pymc3 as pm 7 | import matplotlib.pyplot as plt 8 | plt.style.use('seaborn-darkgrid') 9 | 10 | ## specify the Data 11 | y = np.repeat([0, 1], [3, 6]) # 3 tails 6 heads 12 | 13 | with pm.Model() as model: 14 | # Hyperhyperprior: 15 | model_index = pm.DiscreteUniform('model_index', lower=0, upper=1) 16 | # Hyperprior: 17 | kappa_theta = 12 18 | mu_theta = pm.math.switch(pm.math.eq(model_index, 1), 0.25, 0.75) 19 | # Prior distribution: 20 | a_theta = mu_theta * kappa_theta 21 | b_theta = (1 - mu_theta) * kappa_theta 22 | theta = pm.Beta('theta', a_theta, b_theta) # theta distributed as beta density 23 | #likelihood 24 | y = pm.Bernoulli('y', theta, observed=y) 25 | trace = pm.sample(5000) 26 | 27 | 28 | ## Check the results. 29 | 30 | ## Print summary for each trace 31 | #pm.summary(trace) 32 | 33 | ## Check for mixing and autocorrelation 34 | #pm.autocorrplot(trace) 35 | 36 | ## Plot KDE and sampled values for each parameter. 37 | pm.traceplot(trace) 38 | 39 | ## Get the posterior sample of model_index: 40 | model_idx_sample = trace['model_index'] 41 | ## Compute the proportion of model_index at each value: 42 | p_M1 = sum(model_idx_sample == 1) / len(model_idx_sample) 43 | p_M2 = 1 - p_M1 44 | 45 | 46 | ## Get the posterior sample of theta: 47 | theta_sample = trace['theta'] 48 | ## Extract theta values when model_index is 1: 49 | theta_sample_M1 = theta_sample[model_idx_sample == 1] 50 | ## Extract theta values when model_index is 2: 51 | theta_sample_M2 = theta_sample[model_idx_sample == 0] 52 | 53 | ## Plot histograms of sampled theta values for each model, 54 | plt.figure() 55 | plt.subplot(1, 2, 1) 56 | plt.hist(theta_sample_M1, label='p(M1|D) = {:.3f}'.format(p_M1)) 57 | plt.xlabel(r'$\theta$') 58 | plt.ylabel(r'$p(\theta|\mu=0.25,D)$') 59 | plt.xlim(0, 1) 60 | plt.legend(loc='upper right', framealpha=0.5) 61 | 62 | plt.subplot(1, 2, 2) 63 | plt.hist(theta_sample_M2, label='p(M2|D) = {:.3f}'.format(p_M2)) 64 | plt.xlabel(r'$\theta$') 65 | plt.ylabel(r'$p(\theta|\mu=0.75,D)$') 66 | plt.xlim(0, 1) 67 | plt.legend(loc='upper right', framealpha=0.5) 68 | 69 | plt.savefig('Figure_10.2.png') 70 | plt.show() 71 | -------------------------------------------------------------------------------- /10_FilconModelCompPyMC.py: -------------------------------------------------------------------------------- 1 | """ 2 | Comparing models using Hierarchical modelling. 3 | """ 4 | from __future__ import division 5 | import numpy as np 6 | import pymc3 as pm 7 | import matplotlib.pyplot as plt 8 | plt.style.use('seaborn-darkgrid') 9 | 10 | # THE DATA. 11 | # For each subject, specify the condition s/he was in, 12 | # the number of trials s/he experienced, and the number correct. 13 | 14 | cond_of_subj = np.repeat([0,1,2,3], 40) 15 | 16 | n_trl_of_subj = np.array([64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64, 17 | 64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64, 18 | 64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64, 19 | 64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64, 20 | 64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64, 21 | 64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64, 22 | 64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64]) 23 | 24 | n_corr_of_subj = np.array([45,63,58,64,58,63,51,60,59,47,63,61,60,51,59,45,61, 25 | 59,60,58,63,56,63,64,64,60,64,62,49,64,64,58,64,52,64,64,64,62,64,61,59,59, 26 | 55,62,51,58,55,54,59,57,58,60,54,42,59,57,59,53,53,42,59,57,29,36,51,64,60, 27 | 54,54,38,61,60,61,60,62,55,38,43,58,60,44,44,32,56,43,36,38,48,32,40,40,34, 28 | 45,42,41,32,48,36,29,37,53,55,50,47,46,44,50,56,58,42,58,54,57,54,51,49,52, 29 | 51,49,51,46,46,42,49,46,56,42,53,55,51,55,49,53,55,40,46,56,47,54,54,42,34, 30 | 35,41,48,46,39,55,30,49,27,51,41,36,45,41,53,32,43,33]) 31 | 32 | n_subj = len(cond_of_subj) 33 | n_cond = len(set(cond_of_subj)) 34 | 35 | 36 | # THE MODEL. 37 | with pm.Model() as model: 38 | # Hyperprior on model index: 39 | model_index = pm.DiscreteUniform('model_index', lower=0, upper=1) 40 | # Constants for hyperprior: 41 | shape_Gamma = 1.0 42 | rate_Gamma = 0.1 43 | # Hyperprior on mu and kappa: 44 | mu = pm.Beta('mu', 1, 1, shape=n_cond) 45 | 46 | kappa0 = pm.Gamma('kappa0', alpha=shape_Gamma, beta=rate_Gamma) 47 | a_Beta0 = mu[cond_of_subj] * kappa0 48 | b_Beta0 = (1 - mu[cond_of_subj]) * kappa0 49 | 50 | kappa1 = pm.Gamma('kappa1', alpha=shape_Gamma, beta=rate_Gamma, shape=n_cond) 51 | a_Beta1 = mu[cond_of_subj] * kappa1[cond_of_subj] 52 | b_Beta1 = (1 - mu[cond_of_subj]) * kappa1[cond_of_subj] 53 | 54 | #Prior on theta 55 | theta0 = pm.Beta('theta0', a_Beta0, b_Beta0, shape=n_subj) 56 | theta1 = pm.Beta('theta1', a_Beta1, b_Beta1, shape=n_subj) 57 | # if model_index == 0 then sample from theta1 else sample from theta0 58 | theta = pm.math.switch(pm.math.eq(model_index, 0), theta1, theta0) 59 | 60 | # Likelihood: 61 | y = pm.Binomial('y', p=theta, n=n_trl_of_subj, observed=n_corr_of_subj) 62 | 63 | # Sampling 64 | step = pm.ElemwiseCategorical(vars=[model_index],values=[0,1]) 65 | trace = pm.sample(5000, step=step, progressbar=False) 66 | 67 | 68 | # EXAMINE THE RESULTS. 69 | burnin = 500 70 | pm.traceplot(trace) 71 | 72 | model_idx_sample = trace['model_index'][burnin:] 73 | 74 | pM1 = sum(model_idx_sample == 0) / len(model_idx_sample) 75 | pM2 = 1 - pM1 76 | 77 | plt.figure(figsize=(15, 15)) 78 | plt.subplot2grid((5,4), (0,0), colspan=4) 79 | plt.plot(model_idx_sample, label='p(M1|D) = {:.3f} ; p(M2|D) = {:.3f}'.format(pM1, pM2)); 80 | plt.xlabel('Steps in Markov Chain') 81 | plt.legend(loc='upper right', framealpha=0.75) 82 | 83 | for m in range(0, 2): 84 | kappa0_sample = trace['kappa0'][burnin:][model_idx_sample == m] 85 | plt.subplot2grid((5,4), (3+m, 1), colspan=2) 86 | plt.hist(kappa0_sample, bins=30) 87 | plt.title(r'Post. $\kappa_0$ for M={}'.format(m+1), fontsize=14) 88 | plt.xlabel(r'$\kappa_0$') 89 | plt.xlim(0, 30) 90 | for i in range(0, 4): 91 | kappa1_sample = trace['kappa1'][:,i][burnin:][model_idx_sample == m] 92 | plt.subplot2grid((5,4), (m+1, i)) 93 | plt.hist(kappa1_sample, bins=30) 94 | plt.title(r'Post. $\kappa_{}$ for M={}'.format(i+1, m+1), fontsize=14) 95 | plt.xlabel(r'$\kappa_%s$' % (i+1)) 96 | plt.xlim(0, 30) 97 | 98 | plt.tight_layout() 99 | plt.savefig('Figure_10.3-4.png') 100 | plt.show() 101 | -------------------------------------------------------------------------------- /10_ToyModelCompPyMC.py: -------------------------------------------------------------------------------- 1 | """ 2 | Comparing models using Hierarchical modelling. Toy Model. 3 | """ 4 | from __future__ import division 5 | import numpy as np 6 | import pymc3 as pm 7 | import matplotlib.pyplot as plt 8 | plt.style.use('seaborn-darkgrid') 9 | 10 | # THE DATA. 11 | N = 30 12 | z = 8 13 | y = np.repeat([1, 0], [z, N-z]) 14 | 15 | # THE MODEL. 16 | with pm.Model() as model: 17 | # Hyperprior on model index: 18 | model_index = pm.DiscreteUniform('model_index', lower=0, upper=1) 19 | # Prior 20 | nu = pm.Normal('nu', mu=0, tau=0.1) # it is posible to use tau or sd 21 | eta = pm.Gamma('eta', .1, .1) 22 | theta0 = 1 / (1 + pm.math.exp(-nu)) # theta from model index 0 23 | theta1 = pm.math.exp(-eta) # theta from model index 1 24 | theta = pm.math.switch(pm.math.eq(model_index, 0), theta0, theta1) 25 | # Likelihood 26 | y = pm.Bernoulli('y', p=theta, observed=y) 27 | # Sampling 28 | trace = pm.sample(1000) 29 | 30 | 31 | # EXAMINE THE RESULTS. 32 | ## Print summary for each trace 33 | 34 | #pm.summary(trace) 35 | 36 | ## Check for mixing and autocorrelation 37 | #pm.autocorrplot(trace, vars =[nu, eta]) 38 | 39 | ## Plot KDE and sampled values for each parameter. 40 | #pm.traceplot(trace) 41 | 42 | 43 | model_idx_sample = trace['model_index'] 44 | pM1 = sum(model_idx_sample == 0) / len(model_idx_sample) 45 | pM2 = 1 - pM1 46 | 47 | 48 | nu_sample_M1 = trace['nu'][model_idx_sample == 0] 49 | eta_sample_M2 = trace['eta'][model_idx_sample == 1] 50 | 51 | plt.figure() 52 | plt.subplot(2, 1, 1) 53 | pm.plot_posterior(nu_sample_M1) 54 | plt.xlabel(r'$\nu$') 55 | plt.ylabel('frequency') 56 | plt.title(r'p($\nu$|D,M2), with p(M2|D)={:.3}f'.format(pM1), fontsize=14) 57 | plt.xlim(-8, 8) 58 | 59 | plt.subplot(2, 1, 2) 60 | pm.plot_posterior(eta_sample_M2) 61 | plt.xlabel(r'$\eta$') 62 | plt.ylabel('frequency') 63 | plt.title(r'p($\eta$|D,M2), with p(M2|D)={:.3f}'.format(pM2), fontsize=14) 64 | plt.xlim(0, 8) 65 | plt.savefig('figure_ex_10.2_a.png') 66 | plt.show() 67 | -------------------------------------------------------------------------------- /12_OneOddGroupModelComp.py: -------------------------------------------------------------------------------- 1 | """ 2 | Testing a point ('Null') Hypothesis (not using pseudopriors) 3 | """ 4 | from __future__ import division 5 | import numpy as np 6 | import pymc3 as pm 7 | from scipy.stats import binom 8 | import matplotlib.pyplot as plt 9 | plt.style.use('seaborn-darkgrid') 10 | 11 | # THE DATA. 12 | # For each subject, specify the condition s/he was in, 13 | # the number of trials s/he experienced, and the number correct. 14 | # (Randomly generated fictitious data.) 15 | npg = 20 # number of subjects per group 16 | ntrl = 20 # number of trials per subject 17 | cond_of_subj = np.repeat([0, 1, 2, 3], npg) 18 | n_trl_of_subj = np.repeat([ntrl], 4*npg) 19 | np.random.seed(47401) 20 | 21 | n_corr_of_subj = np.concatenate((binom.rvs(n=ntrl, p=.61, size=npg), 22 | binom.rvs(n=ntrl, p=.50, size=npg), 23 | binom.rvs(n=ntrl, p=.49, size=npg), 24 | binom.rvs(n=ntrl, p=.51, size=npg))) 25 | 26 | n_subj = len(cond_of_subj) 27 | n_cond = len(set(cond_of_subj)) 28 | 29 | 30 | # THE MODEL 31 | with pm.Model() as model: 32 | # Hyperprior on model index: 33 | model_index = pm.DiscreteUniform('model_index', lower=0, upper=1) 34 | # Constants for hyperprior: 35 | shape_Gamma = 1.0 36 | rate_Gamma = 0.1 37 | # Hyperprior on mu and kappa: 38 | kappa = pm.Gamma('kappa', shape_Gamma, rate_Gamma, shape=n_cond) 39 | 40 | mu0 = pm.Beta('mu0', 1, 1) 41 | a_Beta0 = mu0 * kappa[cond_of_subj] 42 | b_Beta0 = (1 - mu0) * kappa[cond_of_subj] 43 | 44 | mu1 = pm.Beta('mu1', 1, 1, shape=n_cond) 45 | a_Beta1 = mu1[cond_of_subj] * kappa[cond_of_subj] 46 | b_Beta1 = (1 - mu1[cond_of_subj]) * kappa[cond_of_subj] 47 | 48 | #Prior on theta 49 | theta0 = pm.Beta('theta0', a_Beta0, b_Beta0, shape=n_subj) 50 | theta1 = pm.Beta('theta1', a_Beta1, b_Beta1, shape=n_subj) 51 | # if model_index == 0 then sample from theta1 else sample from theta0 52 | theta = pm.math.switch(pm.math.eq(model_index, 0), theta1, theta0) 53 | 54 | # Likelihood: 55 | y = pm.Binomial('y', p=theta, n=n_trl_of_subj, observed=n_corr_of_subj) 56 | 57 | # Sampling 58 | step = pm.ElemwiseCategorical(vars=[model_index],values=[0,1]) 59 | trace = pm.sample(10000, step) 60 | 61 | # EXAMINE THE RESULTS. 62 | 63 | ## Print summary for each trace 64 | #pm.summary(trace) 65 | 66 | ## Check for mixing and autocorrelation 67 | #pm.autocorrplot(trace, vars =[mu, kappa]) 68 | 69 | ## Plot KDE and sampled values for each parameter. 70 | #pm.traceplot(trace) 71 | 72 | model_idx_sample = trace['model_index'] 73 | pM1 = sum(model_idx_sample == 0) / len(model_idx_sample) 74 | pM2 = 1 - pM1 75 | 76 | plt.figure(figsize=(15, 15)) 77 | plt.subplot2grid((3,3), (0,0), colspan=3) 78 | plt.plot(model_idx_sample, label='p(DiffMu|D) = %.3f ; p(SameMu|D) = {:.3f}'.format(pM1, pM2)); 79 | plt.xlabel('Step in Markov Chain') 80 | plt.legend(loc='upper right', framealpha=0.75) 81 | 82 | count = 0 83 | position = [(1,0), (1,1), (1,2), (2,0), (2,1), (2,2)] 84 | for i in range(0, 4): 85 | mui_sample = trace['mu1'][:,i][model_idx_sample == 0] 86 | for j in range(i+1, 4): 87 | muj_sample = trace['mu1'][:,j][model_idx_sample == 0] 88 | ax = plt.subplot2grid((3,3), position[count]) 89 | pm.plot_posterior(mui_sample-muj_sample, 90 | ref_val=0, ax=ax) 91 | plt.title(r'$\mu_{} - \mu_{}$'.format(i+1, j+1)) 92 | plt.xlim(-0.3, 0.3) 93 | count += 1 94 | 95 | 96 | plt.tight_layout() 97 | plt.savefig('Figure_12.5.png') 98 | plt.show() 99 | -------------------------------------------------------------------------------- /13_minNforHDIpower.py: -------------------------------------------------------------------------------- 1 | """ 2 | The program described in this section was used to generate Tables 13.1 and 13.2. 3 | The program determines the minimal sample size needed to achieve a specified 4 | goal with a specified power, when flipping a single coin. 5 | """ 6 | import numpy as np 7 | from HDIofICDF import * 8 | from scipy.special import binom, betaln 9 | 10 | 11 | def minNforHDIpower(genPriorMean, genPriorN, HDImaxwid=None, nullVal=None, 12 | ROPE=None, desiredPower=0.8, audPriorMean=0.5, 13 | audPriorN=2, HDImass=0.95, initSampSize=1, verbose=True): 14 | if HDImaxwid != None and nullVal != None: 15 | sys.exit('One and only one of HDImaxwid and nullVal must be specified') 16 | if ROPE == None: 17 | ROPE = [nullVal, nullVal] 18 | # Convert prior mean and N to a, b parameter values of beta distribution. 19 | genPriorA = genPriorMean * genPriorN 20 | genPriorB = (1.0 - genPriorMean) * genPriorN 21 | audPriorA = audPriorMean * audPriorN 22 | audPriorB = (1.0 - audPriorMean) * audPriorN 23 | # Initialize loop for incrementing sampleSize 24 | sampleSize = initSampSize 25 | # Increment sampleSize until desired power is achieved. 26 | while True: 27 | zvec = np.arange(0, sampleSize+1) # All possible z values for N flips. 28 | # Compute probability of each z value for data-generating prior. 29 | pzvec = np.exp(np.log(binom(sampleSize, zvec)) 30 | + betaln(zvec + genPriorA, sampleSize - zvec + genPriorB) 31 | - betaln(genPriorA, genPriorB)) 32 | # For each z value, compute HDI. hdiMat is min, max of HDI for each z. 33 | hdiMat = np.zeros((len(zvec), 2)) 34 | for zIdx in range(0, len(zvec)): 35 | z = zvec[zIdx] 36 | # Determine the limits of the highest density interval 37 | # hdp is a function from PyMC package and takes a sample vector as 38 | # input, not a function. 39 | hdiMat[zIdx] = HDIofICDF(beta, credMass=HDImass, a=(z + audPriorA), 40 | b=(sampleSize - z + audPriorB)) 41 | if HDImaxwid != None: 42 | hdiWid = hdiMat[:,1] - hdiMat[:,0] 43 | powerHDI = np.sum(pzvec[hdiWid < HDImaxwid]) 44 | if nullVal != None: 45 | powerHDI = np.sum(pzvec[(hdiMat[:,0] > ROPE[1]) | 46 | (hdiMat[:,1] < ROPE[0])]) 47 | if verbose: 48 | print(" For sample size = %s\npower = %s\n" % (sampleSize, powerHDI)) 49 | 50 | if powerHDI > desiredPower: 51 | break 52 | else: 53 | sampleSize += 1 54 | return sampleSize 55 | 56 | print(minNforHDIpower(genPriorMean=.85 , genPriorN=2000 , nullVal=0.5, verbose=False)) 57 | #print(minNforHDIpower(genPriorMean=.85 , genPriorN=10 , HDImaxwid=0.2, verbose=False)) 58 | 59 | -------------------------------------------------------------------------------- /15_SystemsPyMC.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Estimating the mean and standard deviation of a Gaussian likelihood with a 3 | hierarchical model. 4 | ''' 5 | from __future__ import division 6 | import numpy as np 7 | import pymc3 as pm 8 | import matplotlib.pyplot as plt 9 | plt.style.use('seaborn-darkgrid') 10 | 11 | # THE DATA. 12 | # Load the aircraft data: 13 | data = np.genfromtxt('Systems.txt', skip_header=True) 14 | 15 | n_subj = len(set(data[:,0])) 16 | # Put it into generic variables so easier to change data in other applications: 17 | y = data[:,3] 18 | subj = data[:,0].astype(int) 19 | 20 | 21 | 22 | ## Specify the model in PyMC 23 | with pm.Model() as model: 24 | # define the HyperPriors 25 | muG = pm.Normal('muG', mu=2.3, tau=0.1) 26 | tauG = pm.Gamma('tauG', 1, .5) 27 | m = pm.Gamma('m', 1, .25) 28 | d = pm.Gamma('d', 1, .5) 29 | sG = m**2 / d**2 30 | rG = m / d**2 31 | # define the priors 32 | tau = pm.Gamma('tau', sG, rG, shape=n_subj) 33 | mu = pm.Normal('mu', mu=muG, tau=tauG, shape=n_subj) 34 | # define the likelihood 35 | y = pm.Normal('y', mu=mu[subj-1], tau=tau[subj-1], observed=y) 36 | # Generate a MCMC chain 37 | trace = pm.sample(2000) 38 | 39 | 40 | # EXAMINE THE RESULTS 41 | 42 | 43 | ## Print summary for each trace 44 | #pm.summary(trace) 45 | 46 | ## Check for mixing and autocorrelation 47 | #pm.autocorrplot(trace, vars =[mu, tau]) 48 | 49 | ## Plot KDE and sampled values for each parameter. 50 | #pm.traceplot(trace) 51 | 52 | 53 | ## Extract chains 54 | muG_sample = trace['muG'] 55 | tauG_sample = trace['tauG'] 56 | m_sample = trace['m'] 57 | d_sample = trace['d'] 58 | 59 | # Plot the hyperdistributions: 60 | _, ax = plt.subplots(1, 4, figsize=(20, 5)) 61 | pm.plot_posterior(muG_sample, bins=30, ax=ax[0]) 62 | ax[0].set_xlabel(r'$\mu_g$', fontsize=16) 63 | pm.plot_posterior(tauG_sample, bins=30 ,ax=ax[1]) 64 | ax[1].set_xlabel(r'$\tau_g$', fontsize=16) 65 | pm.plot_posterior(m_sample, bins=30, ax=ax[2]) 66 | ax[2].set_xlabel('m', fontsize=16) 67 | pm.plot_posterior(d_sample, bins=30, ax=ax[3]) 68 | ax[3].set_xlabel('d', fontsize=16) 69 | 70 | plt.tight_layout() 71 | plt.savefig('Figure_15.9.png') 72 | plt.show() 73 | -------------------------------------------------------------------------------- /15_YmetricXsinglePyMC.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Estimating the mean and standard deviation of a Gaussian likelihood. 3 | ''' 4 | import numpy as np 5 | import pymc3 as pm 6 | from scipy.stats import norm 7 | import matplotlib.pyplot as plt 8 | 9 | 10 | # THE DATA. 11 | 12 | # Generate random data from known parameter values: 13 | np.random.seed(4745) 14 | true_mu = 100 15 | true_std = 15 16 | y = norm.rvs(true_mu, true_std, 500) 17 | 18 | 19 | # Specify the model in PyMC 20 | with pm.Model() as model: 21 | # define the priors 22 | sd = pm.HalfNormal('sd', 25) 23 | mu = pm.Normal('mu', mu=0, sd=100) # PyMC support precision and std 24 | #define the likelihood 25 | yl = pm.Normal('yl', mu, sd, observed=y) 26 | # Generate a MCMC chain 27 | trace = pm.sample(5000) 28 | 29 | 30 | # EXAMINE THE RESULTS 31 | 32 | ## Print summary for each trace 33 | #pm.summary(trace) 34 | 35 | ## Check for mixing and autocorrelation 36 | #pm.autocorrplot(trace, vars =[mu, tau]) 37 | 38 | ## Plot KDE and sampled values for each parameter. 39 | #pm.traceplot(trace) 40 | 41 | mu_sample = trace['mu'] 42 | sigma_sample = trace['sd'] 43 | 44 | 45 | 46 | plt.figure(figsize=(10, 6)) 47 | ax = plt.subplot(1, 2, 1) 48 | pm.plot_posterior(mu_sample, bins=30, ax=ax) 49 | ax.set_xlabel('mu') 50 | ax.set_title = 'Posterior' 51 | ax.set_xlim(98, 102) 52 | 53 | plt.subplot(1, 2, 2) 54 | 55 | mu_mean = np.mean(mu_sample) 56 | sigma_mean = np.mean(sigma_sample) 57 | 58 | plt.scatter(mu_sample, sigma_sample , c='gray') 59 | plt.plot(mu_mean, sigma_mean, 'C1*', 60 | label=r'$\mu$ = %.1f, $\sigma$ = %.1f' % (mu_mean, sigma_mean)) 61 | plt.xlabel('mu') 62 | plt.ylabel('sigma') 63 | plt.title('Posterior') 64 | plt.legend(loc=0) 65 | plt.savefig('figure_15.3.png') 66 | plt.show() 67 | 68 | -------------------------------------------------------------------------------- /16_SimpleLinearRegressionPyMC.py: -------------------------------------------------------------------------------- 1 | """ 2 | Estimating the mean and standard deviation of a Gaussian likelihood with a 3 | hierarchical model. 4 | """ 5 | from __future__ import division 6 | import numpy as np 7 | import pymc3 as pm 8 | from scipy.stats import norm 9 | from scipy.interpolate import spline 10 | import matplotlib.pyplot as plt 11 | from hpd import * 12 | from HtWtDataGenerator import * 13 | plt.style.use('seaborn-darkgrid') 14 | 15 | # THE DATA. 16 | # Simulated height and weight data: 17 | n_subj = 30 18 | HtWtData = HtWtDataGenerator(n_subj, rndsd=5678) 19 | x = HtWtData[:,1] 20 | y = HtWtData[:,2] 21 | 22 | # Re-center data at mean, to reduce autocorrelation in MCMC sampling. 23 | # Standardize (divide by SD) to make initialization easier. 24 | x_m = np.mean(x) 25 | x_sd = np.std(x) 26 | y_m = np.mean(y) 27 | y_sd = np.std(y) 28 | zx = (x - x_m) / x_sd 29 | zy = (y - y_m) / y_sd 30 | 31 | 32 | # THE MODEL 33 | with pm.Model() as model: 34 | # define the priors 35 | sd = pm.HalfNormal('sd', 25) 36 | beta0 = pm.Normal('beta0', mu=0, sd=100) 37 | beta1 = pm.Normal('beta1', mu=0, sd=100) 38 | mu = beta0 + beta1 * zx 39 | # define the likelihood 40 | yl = pm.Normal('yl', mu=mu, sd=sd, observed=zy) 41 | # Generate a MCMC chain 42 | trace = pm.sample(1000) 43 | 44 | 45 | # EXAMINE THE RESULTS 46 | 47 | ## Print summary for each trace 48 | #pm.summary(trace) 49 | 50 | ## Check for mixing and autocorrelation 51 | #pm.autocorrplot(trace, vars =[tau]) 52 | 53 | 54 | ## Plot KDE and sampled values for each parameter. 55 | pm.traceplot(trace) 56 | 57 | 58 | ## Extract chain values: 59 | z0 = trace['beta0'] 60 | z1 = trace['beta1'] 61 | z_sigma = trace['sd'] 62 | 63 | 64 | # Convert to original scale: 65 | b1 = z1 * y_sd / x_sd 66 | b0 = (z0 * y_sd + y_m - z1 * y_sd * x_m / x_sd) 67 | sigma = z_sigma * y_sd 68 | 69 | 70 | # Posterior prediction: 71 | # Specify x values for which predicted y's are needed: 72 | x_post_pred = np.arange(55, 81) 73 | # Define matrix for recording posterior predicted y values at each x value. 74 | # One row per x value, with each row holding random predicted y values. 75 | post_samp_size = len(b1) 76 | y_post_pred = np.zeros((len(x_post_pred), post_samp_size)) 77 | # Define matrix for recording HDI limits of posterior predicted y values: 78 | y_HDI_lim = np.zeros((len(x_post_pred), 2)) 79 | # Generate posterior predicted y values. 80 | # This gets only one y value, at each x, for each step in the chain. 81 | for chain_idx in range(post_samp_size): 82 | y_post_pred[:,chain_idx] = norm.rvs(loc=b0[chain_idx] + b1[chain_idx] * x_post_pred , 83 | scale = np.repeat([sigma[chain_idx]], [len(x_post_pred)]), size=len(x_post_pred)) 84 | 85 | for x_idx in range(len(x_post_pred)): 86 | y_HDI_lim[x_idx] = hpd(y_post_pred[x_idx]) 87 | 88 | ## Display believable beta0 and b1 values 89 | plt.figure() 90 | plt.subplot(1, 2, 1) 91 | thin_idx = 50 92 | plt.plot(z1[::thin_idx], z0[::thin_idx], 'b.', alpha=0.7) 93 | plt.ylabel('Standardized Intercept') 94 | plt.xlabel('Standardized Slope') 95 | plt.subplot(1, 2, 2) 96 | plt.plot(b1[::thin_idx], b0[::thin_idx], 'b.', alpha=0.7) 97 | plt.ylabel('Intercept (ht when wt=0)') 98 | plt.xlabel('Slope (pounds per inch)') 99 | plt.tight_layout() 100 | plt.savefig('Figure_16.4.png') 101 | 102 | # Display the posterior of the b1: 103 | plt.figure(figsize=(8, 5)) 104 | ax = plt.subplot(1, 2, 1) 105 | pm.plot_posterior(z1, ref_val=0.0, bins=30, ax=ax) 106 | ax.set_xlabel('Standardized slope') 107 | ax = plt.subplot(1, 2, 2) 108 | pm.plot_posterior(b1, ref_val=0.0, bins=30, ax=ax) 109 | ax.set_xlabel('Slope (pounds per inch)') 110 | plt.tight_layout() 111 | plt.savefig('Figure_16.5.png') 112 | 113 | # Display data with believable regression lines and posterior predictions. 114 | plt.figure() 115 | # Plot data values: 116 | x_rang = np.max(x) - np.min(x) 117 | y_rang = np.max(y) - np.min(y) 118 | lim_mult = 0.25 119 | x_lim = [np.min(x)-lim_mult*x_rang, np.max(x)+lim_mult*x_rang] 120 | y_lim = [np.min(y)-lim_mult*y_rang, np.max(y)+lim_mult*y_rang] 121 | plt.plot(x, y, 'k.') 122 | plt.title('Data with credible regression lines') 123 | plt.xlabel('X (height in inches)') 124 | plt.ylabel('Y (weight in pounds)') 125 | plt.xlim(x_lim) 126 | plt.ylim(y_lim) 127 | # Superimpose a smattering of believable regression lines: 128 | for i in range(0, len(b0), 100): 129 | plt.plot(x, b0[i] + b1[i]*x , c='k', alpha=0.05 ) 130 | plt.savefig('Figure_16.2.png') 131 | 132 | # Display data with HDIs of posterior predictions. 133 | 134 | plt.figure() 135 | # Plot data values: 136 | y_lim = [np.min(y_HDI_lim), np.max(y_HDI_lim)] 137 | plt.plot(x, y, 'k.') 138 | plt.xlim(x_lim) 139 | plt.ylim(y_lim) 140 | plt.xlabel('X (height in inches)') 141 | plt.ylabel('Y (weight in pounds)') 142 | plt.title('Data with 95% HDI & Mean of Posterior Predictions') 143 | # Superimpose posterior predicted 95% HDIs: 144 | y_post_pred_ave = np.average(y_post_pred, axis=1) 145 | #Book version of the HDI representation 146 | #plt.errorbar(x_post_pred,y_post_pred_ave, 147 | # yerr=[abs(y_HDI_lim[:,0]-y_post_pred_ave), 148 | # abs(y_HDI_lim[:,1]-y_post_pred_ave)], fmt='.') 149 | 150 | #Smoothed version of the HDI representation 151 | x_new = np.linspace(x_post_pred.min(), x_post_pred.max(), 200) 152 | y_HDI_lim_smooth = spline(x_post_pred, y_HDI_lim, x_new) 153 | plt.plot(x_post_pred, y_post_pred_ave) 154 | plt.fill_between(x_new, y_HDI_lim_smooth[:,0], y_HDI_lim_smooth[:,1], alpha=0.3) 155 | 156 | plt.savefig('Figure_16.6.png') 157 | 158 | plt.show() 159 | -------------------------------------------------------------------------------- /16_SimpleRobustLinearRegressionPyMC.py: -------------------------------------------------------------------------------- 1 | """ 2 | Simple Robust Linear Regression 3 | """ 4 | from __future__ import division 5 | import numpy as np 6 | import pymc3 as pm 7 | from scipy.stats import t, norm 8 | from scipy.interpolate import spline 9 | import matplotlib.pyplot as plt 10 | plt.style.use('seaborn-darkgrid') 11 | from hpd import * 12 | from HtWtDataGenerator import * 13 | 14 | 15 | # THE DATA. 16 | 17 | cig_data = np.genfromtxt("McIntyre1994data.csv", skip_header=True, delimiter=",") 18 | n_subj = len(cig_data) 19 | x = cig_data[:,3] 20 | y = cig_data[:,1] 21 | 22 | 23 | # Re-center data at mean, to reduce autocorrelation in MCMC sampling. 24 | # Standardize (divide by SD) to make initialization easier. 25 | x_m = np.mean(x) 26 | x_sd = np.std(x) 27 | y_m = np.mean(y) 28 | y_sd = np.std(y) 29 | zx = (x - x_m) / x_sd 30 | zy = (y - y_m) / y_sd 31 | 32 | tdf_gain = 1 # 1 for low-biased tdf, 100 for high-biased tdf 33 | 34 | # THE MODEL 35 | with pm.Model() as model: 36 | # define the priors 37 | tdf = pm.Exponential('tdf', 1/30.) 38 | sd = pm.HalfNormal('sd', 25) 39 | beta0 = pm.Normal('beta0', mu=0, sd=100) 40 | beta1 = pm.Normal('beta1', mu=0, sd=100) 41 | mu = beta0 + beta1 * zx 42 | # define the likelihood 43 | yl = pm.StudentT('yl', mu=mu, sd=sd, nu=tdf, observed=zy) 44 | # Generate a MCMC chain 45 | trace = pm.sample(2000) 46 | 47 | 48 | # EXAMINE THE RESULTS 49 | 50 | ## Print summary for each trace 51 | #pm.summary(trace) 52 | 53 | ## Check for mixing and autocorrelation 54 | #pm.autocorrplot(trace, vars =[tau]) 55 | 56 | ## Plot KDE and sampled values for each parameter. 57 | #pm.traceplot(trace) 58 | 59 | 60 | # Extract chain values: 61 | 62 | tdf_samp = trace['tdf'] 63 | tdf_m = np.mean(tdf_samp) 64 | z0 = trace["beta0"] 65 | z1 = trace["beta1"] 66 | z_sigma = trace["sd"] 67 | 68 | # Convert to original scale: 69 | b1 = z1 * y_sd / x_sd 70 | b0 = (z0 * y_sd + y_m - z1 * y_sd * x_m / x_sd) 71 | sigma = z_sigma * y_sd 72 | 73 | # Posterior prediction: 74 | # Specify x values for which predicted y's are needed: 75 | x_rang = np.max(x) - np.min(x) 76 | y_rang = np.max(y) - np.min(y) 77 | lim_mult = 0.25 78 | x_lim = [np.min(x) - lim_mult * x_rang, np.max(x) + lim_mult * x_rang] 79 | #y_lim = [np.min(y) - lim_mult*y_rang, np.max(y) + lim_mult*y_rang] 80 | y_lim = [-10, 40] 81 | x_post_pred = np.linspace(x_lim[0], x_lim[1], 20) 82 | # Define matrix for recording posterior predicted y values at each x value. 83 | # One row per x value, with each row holding random predicted y values. 84 | post_samp_size = len(b1) 85 | y_post_pred = np.zeros((len(x_post_pred), post_samp_size)) 86 | # Define matrix for recording HDI limits of posterior predicted y values: 87 | y_HDI_lim = np.zeros((len(x_post_pred), 2)) 88 | # Generate posterior predicted y values. 89 | # This gets only one y value, at each x, for each step in the chain. 90 | for chain_idx in range(post_samp_size): 91 | y_post_pred[:,chain_idx] = t.rvs(df=np.repeat([tdf_samp[chain_idx]], [len(x_post_pred)]), 92 | loc = b0[chain_idx] + b1[chain_idx] * x_post_pred, 93 | scale = np.repeat([sigma[chain_idx]], [len(x_post_pred)])) 94 | 95 | for x_idx in range(len(x_post_pred)): 96 | y_HDI_lim[x_idx] = hpd(y_post_pred[x_idx]) 97 | 98 | 99 | # Display believable beta0 and b1 values 100 | plt.figure() 101 | thin_idx = 5 102 | plt.plot(b1[::thin_idx], b0[::thin_idx], '.') 103 | plt.ylabel("Intercept") 104 | plt.xlabel("Slope") 105 | plt.savefig('Figure_16.x0.png') 106 | 107 | # Display the posterior of the b1: 108 | ax = pm.plot_posterior(b1, ref_val=0.0, bins=30) 109 | ax.set_xlabel(r'Slope ($\Delta$ tar / $\Delta$ weight)') 110 | plt.title('Mean tdf = %.2f' % tdf_m) 111 | plt.savefig('Figure_16.8b.png') 112 | 113 | # Display data with believable regression lines and posterior predictions. 114 | plt.figure() 115 | plt.plot(x, y, 'k.') 116 | plt.title('Data with credible regression lines') 117 | plt.xlabel('weight') 118 | plt.ylabel('tar') 119 | plt.xlim(x_lim) 120 | plt.ylim(y_lim) 121 | # Superimpose a smattering of believable regression lines: 122 | for i in range(0, len(b0), 5): 123 | plt.plot(x, b0[i] + b1[i]*x , c='k', alpha=0.05 ) 124 | plt.savefig('Figure_16.8x1.png') 125 | 126 | 127 | plt.figure() 128 | # Plot data values: 129 | plt.plot(x, y, 'k.') 130 | plt.xlim(x_lim) 131 | plt.ylim(y_lim) 132 | plt.xlabel('weight') 133 | plt.ylabel('tar') 134 | plt.title('Data with 95% HDI & Mean of Posterior Predictions') 135 | # Superimpose posterior predicted 95% HDIs: 136 | y_post_pred_med = np.median(y_post_pred, axis=1) 137 | 138 | #Book version of the HDI representation 139 | #plt.errorbar(x_post_pred, y_post_pred_med, 140 | # yerr=[abs(y_HDI_lim[:,0]-y_post_pred_med), 141 | # abs(y_HDI_lim[:,1]-y_post_pred_med)], fmt='.') 142 | 143 | #Smoothed version of the HDI representation 144 | x_new = np.linspace(x_post_pred.min(), x_post_pred.max(), 200) 145 | y_HDI_lim_smooth = spline(x_post_pred, y_HDI_lim, x_new) 146 | plt.plot(x_post_pred, y_post_pred_med) 147 | plt.fill_between(x_new, y_HDI_lim_smooth[:,0], y_HDI_lim_smooth[:,1], alpha=0.3) 148 | 149 | plt.savefig('Figure_16.8d.png') 150 | 151 | plt.show() 152 | 153 | 154 | -------------------------------------------------------------------------------- /17_MultiLinRegressHyperPyMC.py: -------------------------------------------------------------------------------- 1 | """ 2 | Multiple linear regression with hyperpriors. 3 | """ 4 | from __future__ import division 5 | import numpy as np 6 | import pymc3 as pm 7 | import pandas as pd 8 | from scipy.stats import norm 9 | import matplotlib.pyplot as plt 10 | plt.style.use('seaborn-darkgrid') 11 | import seaborn as sns 12 | from hpd import * 13 | 14 | 15 | 16 | # THE DATA. 17 | 18 | tdfBgain = 1 19 | 20 | dataSource = ["Guber1999", "McIntyre1994", "random"][0] 21 | 22 | if dataSource == "Guber1999": 23 | fname = "Guber1999" # file name for saved graphs 24 | data = pd.read_csv('Guber1999data.txt', sep='\s+', 25 | names = ["State","Spend","StuTchRat","Salary", "PrcntTake","SATV","SATM","SATT"]) 26 | # Specify variables to be used in BUGS analysis: 27 | predicted_name = "SATT" 28 | predictor_names = ["Spend" , "PrcntTake"] 29 | n_data = len(data) 30 | y = data[predicted_name] 31 | x = data[predictor_names] 32 | n_predictors = len(x.columns) 33 | 34 | 35 | if dataSource == "McIntyre1994": 36 | fname = "McIntyre1994" # file name for saved graphs 37 | data = pd.read_csv('McIntyre1994data.csv') 38 | predicted_name = "CO" 39 | predictor_names = ["Tar","Nic","Wt"] 40 | n_data = len(data) 41 | y = data[predicted_name] 42 | x = data[predictor_names] 43 | n_data = len(data) 44 | 45 | 46 | if dataSource == "random": 47 | fname = "Random" # file name for saved graphs 48 | # Generate random data. 49 | # True parameter values: 50 | beta_true = np.repeat(0, 21) 51 | beta_true = np.insert(beta_true, [0,0,0], [100, 1, 2]) # beta0 is first component 52 | n_predictors = len(beta_true) - 1 53 | sd_true = 2 54 | tau_true = 1/sd_true**2 55 | # Random X values: 56 | np.random.seed(47405) 57 | xM = 5 58 | xSD = 2 59 | n_data = 100 60 | x = norm.rvs(xM, xSD, n_predictors*n_data).reshape(100, -1) 61 | x = pd.DataFrame(x, columns=['X%s' % i for i in range(0, n_predictors)]) 62 | # Random Y values generated from linear model with true parameter values: 63 | y = np.sum(x * beta_true[1:].T, axis=1) + beta_true[0] + norm.rvs(0, sd_true, n_data) 64 | # Select which predictors to include 65 | include_only = range(0, n_predictors) # default is to include all 66 | #x = x.iloc[include_only] 67 | predictor_names = x.columns 68 | n_predictors = len(predictor_names) 69 | 70 | 71 | # THE MODEL 72 | with pm.Model() as model: 73 | # define hyperpriors 74 | muB = pm.Normal('muB', 0, 100) 75 | tauB = pm.Gamma('tauB', .01, .01) 76 | udfB = pm.Uniform('udfB', 0, 1) 77 | tdfB = 1 + tdfBgain * (-pm.math.log(1 - udfB)) 78 | # define the priors 79 | tau = pm.Gamma('tau', 0.01, 0.01) 80 | beta0 = pm.Normal('beta0', mu=0, tau=1.0E-12) 81 | beta1 = pm.StudentT('beta1', mu=muB, lam=tauB, nu=tdfB, shape=n_predictors) 82 | mu = beta0 + pm.math.dot(beta1, x.values.T) 83 | # define the likelihood 84 | #mu = beta0 + beta1[0] * x.values[:,0] + beta1[1] * x.values[:,1] 85 | yl = pm.Normal('yl', mu=mu, tau=tau, observed=y) 86 | # Generate a MCMC chain 87 | trace = pm.sample(1000) 88 | 89 | 90 | # EXAMINE THE RESULTS 91 | 92 | # Print summary for each trace 93 | #pm.summary(trace) 94 | 95 | # Check for mixing and autocorrelation 96 | #pm.autocorrplot(trace, vars =[beta0]) 97 | 98 | ## Plot KDE and sampled values for each parameter. 99 | #pm.traceplot(trace) 100 | 101 | 102 | # Extract chain values: 103 | b0_samp = trace['beta0'] 104 | b_samp = trace['beta1'] 105 | tau_samp = trace['tau'] 106 | sigma_samp = 1 / np.sqrt(tau_samp) # Convert precision to SD 107 | chain_length = len(tau_samp) 108 | 109 | if n_predictors >= 6: # don't display if too many predictors 110 | n_predictors == 6 111 | 112 | columns = ['Sigma y', 'Intercept'] 113 | [columns.append('Slope_%s' % i) for i in predictor_names[:n_predictors]] 114 | traces = np.array([sigma_samp, b0_samp, b_samp[:,0], b_samp[:,1]]).T 115 | df = pd.DataFrame(traces, columns=columns) 116 | g = sns.PairGrid(df) 117 | g.map(plt.scatter) 118 | plt.savefig('Figure_17.Xa.png') 119 | 120 | ## Display the posterior: 121 | 122 | plt.figure(figsize=(16,4)) 123 | ax = plt.subplot(1, n_predictors+2, 1) 124 | pm.plot_posterior(sigma_samp, ax=ax) 125 | ax.set_xlabel(r'$\sigma y$') 126 | ax = plt.subplot(1, n_predictors+2, 2) 127 | pm.plot_posterior(b0_samp, ax=ax) 128 | ax.set_xlabel('Intercept') 129 | 130 | for i in range(0, n_predictors): 131 | ax = plt.subplot(1, n_predictors+2, 3+i) 132 | pm.plot_posterior(b_samp[:,i], ref_val=0, ax=ax) 133 | ax.set_xlabel('Slope_%s' % predictor_names[i]) 134 | plt.tight_layout() 135 | plt.savefig('Figure_17.Xb.png') 136 | 137 | # Posterior prediction: 138 | # Define matrix for recording posterior predicted y values for each xPostPred. 139 | # One row per xPostPred value, with each row holding random predicted y values. 140 | y_post_pred = np.zeros((len(x), chain_length)) 141 | # Define matrix for recording HDI limits of posterior predicted y values: 142 | y_HDI_lim = np.zeros((len(x), 2)) 143 | # Generate posterior predicted y values. 144 | # This gets only one y value, at each x, for each step in the chain. 145 | #or chain_idx in range(chain_length): 146 | for chain_idx in range(chain_length): 147 | y_post_pred[:,chain_idx] = norm.rvs(loc = b0_samp[chain_idx] + np.dot(b_samp[chain_idx], x.values.T), 148 | scale = np.repeat([sigma_samp[chain_idx]], [len(x)])) 149 | 150 | for x_idx in range(len(x)): 151 | y_HDI_lim[x_idx] = hpd(y_post_pred[x_idx]) 152 | 153 | for i in range(len(x)): 154 | print(np.mean(y_post_pred, axis=1)[i], y_HDI_lim[i]) 155 | 156 | plt.show() 157 | -------------------------------------------------------------------------------- /17_MultipleLinearRegressionPyMC.py: -------------------------------------------------------------------------------- 1 | """ 2 | Multiple linear regression 3 | """ 4 | from __future__ import division 5 | import numpy as np 6 | import pymc3 as pm 7 | import pandas as pd 8 | from scipy.stats import norm 9 | import matplotlib.pyplot as plt 10 | plt.style.use('seaborn-darkgrid') 11 | from hpd import * 12 | import seaborn as sns 13 | 14 | 15 | # THE DATA. 16 | dataSource = ["Guber1999", "McIntyre1994", "random"][0] 17 | 18 | if dataSource == "Guber1999": 19 | fname = "Guber1999" # file name for saved graphs 20 | data = pd.read_csv('Guber1999data.txt', sep='\s+', 21 | names = ["State","Spend","StuTchRat","Salary", "PrcntTake","SATV","SATM","SATT"]) 22 | # Specify variables to be used in BUGS analysis: 23 | predictedName = "SATT" 24 | predictorNames = ["Spend" , "PrcntTake"] 25 | nData = len(data) 26 | y = data[predictedName] 27 | x = data[predictorNames] 28 | n_predictors = len(x.columns) 29 | 30 | 31 | if dataSource == "McIntyre1994": 32 | fname = "McIntyre1994" # file name for saved graphs 33 | data = pd.read_csv('McIntyre1994data.csv') 34 | predictedName = "CO" 35 | predictorNames = ["Tar","Nic","Wt"] 36 | nData = len(data) 37 | y = data[predictedName] 38 | x = data[predictorNames] 39 | nData = len(data) 40 | 41 | 42 | if dataSource == "random": 43 | fname = "Random" # file name for saved graphs 44 | # Generate random data. 45 | # True parameter values: 46 | beta_true = np.repeat(0, 21) 47 | beta_true = np.insert(beta_true, [0,0,0], [100, 1, 2]) # beta0 is first component 48 | n_predictors = len(beta_true) - 1 49 | sd_true = 2 50 | tau_true = 1/sd_true**2 51 | # Random X values: 52 | np.random.seed(47405) 53 | xM = 5 54 | xSD = 2 55 | nData = 100 56 | x = norm.rvs(xM, xSD, n_predictors*nData).reshape(100, -1) 57 | x = pd.DataFrame(x, columns=['X%s' % i for i in range(0, n_predictors)]) 58 | # Random Y values generated from linear model with true parameter values: 59 | y = np.sum(x * beta_true[1:].T, axis=1) + beta_true[0] + norm.rvs(0, sd_true, nData) 60 | # Select which predictors to include 61 | includeOnly = range(0, n_predictors) # default is to include all 62 | #x = x.iloc[includeOnly] 63 | predictorNames = x.columns 64 | n_predictors = len(predictorNames) 65 | 66 | 67 | 68 | # THE MODEL 69 | with pm.Model() as model: 70 | # define the priors 71 | beta0 = pm.Normal('beta0', mu=0, sd=100) 72 | beta1 = pm.Normal('beta1', mu= 0, sd=100, shape=n_predictors) 73 | sd = pm.HalfNormal('sd', 25) 74 | mu = beta0 + pm.math.dot(beta1, x.values.T) 75 | # define the likelihood 76 | yl = pm.Normal('yl', mu, sd, observed=y) 77 | # Generate a MCMC chain 78 | trace = pm.sample(1000) 79 | 80 | # EXAMINE THE RESULTS 81 | 82 | # Print summary for each trace 83 | #pm.summary(trace) 84 | 85 | # Check for mixing and autocorrelation 86 | #pm.autocorrplot(trace, vars =[beta0]) 87 | 88 | ## Plot KDE and sampled values for each parameter. 89 | #pm.traceplot(trace) 90 | 91 | 92 | # Extract chain values: 93 | b0_samp = trace['beta0'] 94 | b_samp = trace['beta1'] 95 | 96 | Sigma_samp = trace['sd'] 97 | chain_length = len(Sigma_samp) 98 | 99 | if n_predictors >= 6: # don't display if too many predictors 100 | n_predictors == 6 101 | 102 | columns = ['Sigma y', 'Intercept'] 103 | [columns.append('Slope_%s' % i) for i in predictorNames[:n_predictors]] 104 | traces = np.array([Sigma_samp, b0_samp, b_samp[:,0], b_samp[:,1]]).T 105 | df = pd.DataFrame(traces, columns=columns) 106 | sns.set_style('dark') 107 | g = sns.PairGrid(df) 108 | g.map(plt.scatter) 109 | plt.savefig('Figure_17.5b.png') 110 | 111 | ## Display the posterior: 112 | sns.set_style('darkgrid') 113 | 114 | plt.figure(figsize=(16,4)) 115 | ax = plt.subplot(1, n_predictors+2, 1) 116 | pm.plot_posterior(Sigma_samp, ax=ax) 117 | ax.set_xlabel(r'$\sigma y$') 118 | ax = plt.subplot(1, n_predictors+2, 2) 119 | ax = pm.plot_posterior(b0_samp, ax=ax) 120 | ax.set_xlabel('Intercept') 121 | 122 | for i in range(0, n_predictors): 123 | ax = plt.subplot(1, n_predictors+2, 3+i) 124 | pm.plot_posterior(b_samp[:,i], ref_val=0, ax=ax) 125 | ax.set_xlabel('Slope_{}'.format(predictorNames[i])) 126 | plt.tight_layout() 127 | plt.savefig('Figure_17.5a.png') 128 | 129 | 130 | # Posterior prediction: 131 | # Define matrix for recording posterior predicted y values for each xPostPred. 132 | # One row per xPostPred value, with each row holding random predicted y values. 133 | y_post_pred = np.zeros((len(x), chain_length)) 134 | # Define matrix for recording HDI limits of posterior predicted y values: 135 | y_HDI_lim = np.zeros((len(x), 2)) 136 | # Generate posterior predicted y values. 137 | # This gets only one y value, at each x, for each step in the chain. 138 | #or chain_idx in range(chain_length): 139 | for chain_idx in range(chain_length): 140 | y_post_pred[:,chain_idx] = norm.rvs(loc = b0_samp[chain_idx] + np.dot(b_samp[chain_idx], x.values.T), 141 | scale = np.repeat([Sigma_samp[chain_idx]], [len(x)])) 142 | 143 | for x_idx in range(len(x)): 144 | y_HDI_lim[x_idx] = hpd(y_post_pred[x_idx]) 145 | 146 | for i in range(len(x)): 147 | print(np.mean(y_post_pred, axis=1)[i], y_HDI_lim[i]) 148 | 149 | plt.show() 150 | -------------------------------------------------------------------------------- /18_ANOVAonewayNonhomogvarBrugs.py: -------------------------------------------------------------------------------- 1 | """ 2 | One way BANOVA Non Homogeneous Variance 3 | """ 4 | from __future__ import division 5 | import numpy as np 6 | import pymc3 as pm 7 | import pandas as pd 8 | import matplotlib.pyplot as plt 9 | plt.style.use('seaborn-darkgrid') 10 | from scipy.stats import norm 11 | from hpd import * 12 | from theano import tensor as tt 13 | 14 | # THE DATA. 15 | # Specify data source: 16 | dataSource = ["McDonaldSK1991" , "SolariLS2008" , "Random"][0] 17 | 18 | # Load the data: 19 | if dataSource == "McDonaldSK1991": 20 | datarecord = pd.read_csv("McDonaldSK1991data.txt", sep='\s+', skiprows=18, skipfooter=25) 21 | y = datarecord['Size'] 22 | Ntotal = len(y) 23 | x = (datarecord['Group'] - 1).values 24 | xnames = pd.unique(datarecord['Site']) 25 | NxLvl = len(xnames) 26 | contrast_dict = {'BIGvSMALL':[-1/3,-1/3,1/2,-1/3,1/2], 27 | 'ORE1vORE2': [1,-1,0,0,0], 28 | 'ALAvORE':[-1/2,-1/2,1,0,0], 29 | 'NPACvORE':[-1/2,-1/2,1/2,1/2,0], 30 | 'USAvRUS':[1/3,1/3,1/3,-1,0], 31 | 'FINvPAC':[-1/4,-1/4,-1/4,-1/4,1], 32 | 'ENGvOTH':[1/3,1/3,1/3,-1/2,-1/2], 33 | 'FINvRUS':[0,0,0,-1,1]} 34 | 35 | 36 | if dataSource == "SolariLS2008": 37 | datarecord = pd.read_csv("SolariLS2008data.txt", sep='\s+', skiprows=21) 38 | y = datarecord['Acid'] 39 | Ntotal = len(y) 40 | x = (datarecord['Type'] - 1).values 41 | xnames = pd.unique(x) 42 | NxLvl = len(xnames) 43 | contrast_dict = {'G3vOTHER':[-1/8,-1/8,1,-1/8,-1/8,-1/8,-1/8,-1/8,-1/8]} 44 | 45 | 46 | if dataSource == "Random": 47 | np.random.seed(47405) 48 | ysdtrue = 4.0 49 | a0true = 100 50 | atrue = [2, -2] # sum to zero 51 | npercell = 8 52 | x = [] 53 | y = [] 54 | for xidx in range(len(atrue)): 55 | for subjidx in range(npercell): 56 | x.append(xidx) 57 | y.append(a0true + atrue[xidx] + norm.rvs(1, ysdtrue)) 58 | Ntotal = len(y) 59 | NxLvl = len(set(x)) 60 | # # Construct list of all pairwise comparisons, to compare with NHST TukeyHSD: 61 | contrast_dict = None 62 | for g1idx in range(NxLvl): 63 | for g2idx in range(g1idx+1, NxLvl): 64 | cmpVec = np.repeat(0, NxLvl) 65 | cmpVec[g1idx] = -1 66 | cmpVec[g2idx] = 1 67 | contrast_dict = (contrast_dict, cmpVec) 68 | 69 | 70 | z = (y - np.mean(y))/np.std(y) 71 | 72 | 73 | ## THE MODEL. 74 | with pm.Model() as model: 75 | # define the hyperpriors 76 | a_SD_unabs = pm.StudentT('a_SD_unabs', mu=0, lam=0.001, nu=1) 77 | a_SD = abs(a_SD_unabs) + 0.1 78 | atau = 1 / a_SD**2 79 | m = pm.Gamma('m', 1, 1) 80 | d = pm.Gamma('d', 1, 1) 81 | sG = m**2 / d**2 82 | rG = m / d**2 83 | # define the priors 84 | tau = pm.Gamma('tau', sG, rG) 85 | a0 = pm.Normal('a0', mu=0, tau=0.001) # y values are assumed to be standardized 86 | a = pm.Normal('a', mu=0 , tau=atau, shape=NxLvl) 87 | 88 | b = pm.Deterministic('b', a - tt.mean(a)) 89 | mu = a0 + b[x] 90 | # define the likelihood 91 | yl = pm.Normal('yl', mu=mu, tau=tau, observed=z) 92 | # Generate a MCMC chain 93 | trace = pm.sample(2000) 94 | 95 | 96 | # EXAMINE THE RESULTS 97 | 98 | # Print summary for each trace 99 | #pm.summary(trace) 100 | 101 | # Check for mixing and autocorrelation 102 | #pm.autocorrplot(trace, vars=model.unobserved_RVs[:-1]) 103 | 104 | ## Plot KDE and sampled values for each parameter. 105 | pm.traceplot(trace) 106 | 107 | 108 | a0_sample = trace['a0'] 109 | b_sample = trace['b'] 110 | b0_sample = a0_sample * np.std(y) + np.mean(y) 111 | b_sample = b_sample * np.std(y) 112 | 113 | 114 | plt.figure(figsize=(20, 4)) 115 | for i in range(5): 116 | ax = plt.subplot(1, 5, i+1) 117 | pm.plot_posterior(b_sample[:,i], bins=50, ax=ax) 118 | ax.set_xlabel=r'$\beta1_{}$'.format(i) 119 | ax.set_title='x:{}'.format(i) 120 | plt.tight_layout() 121 | plt.savefig('Figure_18.xa.png') 122 | 123 | 124 | nContrasts = len(contrast_dict) 125 | if nContrasts > 0: 126 | plt.figure(figsize=(20, 8)) 127 | count = 1 128 | for key, value in contrast_dict.items(): 129 | contrast = np.dot(b_sample, value) 130 | ax = plt.subplot(2, 4, count) 131 | pm.plot_posterior(contrast, ref_val=0.0, bins=50, ax=ax) 132 | ax.set_title('Contrast {}'.format(key)) 133 | count += 1 134 | plt.tight_layout() 135 | plt.savefig('Figure_18.xa.png') 136 | 137 | plt.show() 138 | -------------------------------------------------------------------------------- /18_ANOVAonewayPyMC.py: -------------------------------------------------------------------------------- 1 | """ 2 | One way BANOVA 3 | """ 4 | from __future__ import division 5 | import numpy as np 6 | import pymc3 as pm 7 | import pandas as pd 8 | import matplotlib.pyplot as plt 9 | plt.style.use('seaborn-darkgrid') 10 | from scipy.stats import norm 11 | from hpd import * 12 | from theano import tensor as T 13 | 14 | 15 | # THE DATA. 16 | # Specify data source: 17 | dataSource = ["McDonaldSK1991" , "SolariLS2008" , "Random"][0] 18 | 19 | # Load the data: 20 | if dataSource == "McDonaldSK1991": 21 | datarecord = pd.read_csv("McDonaldSK1991data.txt", sep='\s+', skiprows=18, skipfooter=25) 22 | y = datarecord['Size'] 23 | Ntotal = len(y) 24 | x = (datarecord['Group'] - 1).values 25 | xnames = pd.unique(datarecord['Site']) 26 | NxLvl = len(xnames) 27 | contrast_dict = {'BIGvSMALL':[-1/3,-1/3,1/2,-1/3,1/2], 28 | 'ORE1vORE2': [1,-1,0,0,0], 29 | 'ALAvORE':[-1/2,-1/2,1,0,0], 30 | 'NPACvORE':[-1/2,-1/2,1/2,1/2,0], 31 | 'USAvRUS':[1/3,1/3,1/3,-1,0], 32 | 'FINvPAC':[-1/4,-1/4,-1/4,-1/4,1], 33 | 'ENGvOTH':[1/3,1/3,1/3,-1/2,-1/2], 34 | 'FINvRUS':[0,0,0,-1,1]} 35 | 36 | 37 | if dataSource == "SolariLS2008": 38 | datarecord = pd.read_csv("SolariLS2008data.txt", sep='\s+', skiprows=21) 39 | y = datarecord['Acid'] 40 | Ntotal = len(y) 41 | x = (datarecord['Type'] - 1).values 42 | xnames = pd.unique(x) 43 | NxLvl = len(xnames) 44 | contrast_dict = {'G3vOTHER':[-1/8,-1/8,1,-1/8,-1/8,-1/8,-1/8,-1/8,-1/8]} 45 | 46 | 47 | if dataSource == "Random": 48 | np.random.seed(47405) 49 | ysdtrue = 4.0 50 | a0true = 100 51 | atrue = [2, -2] # sum to zero 52 | npercell = 8 53 | x = [] 54 | y = [] 55 | for xidx in range(len(atrue)): 56 | for subjidx in range(npercell): 57 | x.append(xidx) 58 | y.append(a0true + atrue[xidx] + norm.rvs(1, ysdtrue)) 59 | Ntotal = len(y) 60 | NxLvl = len(set(x)) 61 | # # Construct list of all pairwise comparisons, to compare with NHST TukeyHSD: 62 | contrast_dict = None 63 | for g1idx in range(NxLvl): 64 | for g2idx in range(g1idx+1, NxLvl): 65 | cmpVec = np.repeat(0, NxLvl) 66 | cmpVec[g1idx] = -1 67 | cmpVec[g2idx] = 1 68 | contrast_dict = (contrast_dict, cmpVec) 69 | 70 | 71 | z = (y - np.mean(y))/np.std(y) 72 | 73 | 74 | ## THE MODEL. 75 | with pm.Model() as model: 76 | # define the hyperpriors 77 | a_SD_unabs = pm.StudentT('a_SD_unabs', mu=0, lam=0.001, nu=1) 78 | a_SD = abs(a_SD_unabs) + 0.1 79 | atau = 1 / a_SD**2 80 | # define the priors 81 | sigma = pm.Uniform('sigma', 0, 10) # y values are assumed to be standardized 82 | tau = 1 / sigma**2 83 | a0 = pm.Normal('a0', mu=0, tau=0.001) # y values are assumed to be standardized 84 | a = pm.Normal('a', mu=0 , tau=atau, shape=NxLvl) 85 | 86 | b = pm.Deterministic('b', a - T.mean(a)) 87 | mu = a0 + b[x] 88 | # define the likelihood 89 | yl = pm.Normal('yl', mu, tau=tau, observed=z) 90 | # Generate a MCMC chain 91 | trace = pm.sample(2000, progressbar=False) 92 | 93 | 94 | # EXAMINE THE RESULTS 95 | burnin = 1000 96 | thin = 10 97 | 98 | # Print summary for each trace 99 | #pm.summary(trace[burnin::thin]) 100 | #pm.summary(trace) 101 | 102 | # Check for mixing and autocorrelation 103 | #pm.autocorrplot(trace[burnin::thin], vars=model.unobserved_RVs[:-1]) 104 | 105 | ## Plot KDE and sampled values for each parameter. 106 | #pm.traceplot(trace[burnin::thin]) 107 | pm.traceplot(trace) 108 | 109 | a0_sample = trace['a0'][burnin::thin] 110 | b_sample = trace['b'][burnin::thin] 111 | b0_sample = a0_sample * np.std(y) + np.mean(y) 112 | b_sample = b_sample * np.std(y) 113 | 114 | 115 | plt.figure(figsize=(20, 4)) 116 | for i in range(5): 117 | ax = plt.subplot(1, 5, i+1) 118 | pm.plot_posterior(b_sample[:,i], bins=50, ax=ax) 119 | ax.set_xlabel(r'$\beta1_{}$'.format(i)) 120 | ax.set_title('x:{}'.format(i)) 121 | plt.tight_layout() 122 | plt.savefig('Figure_18.2a.png') 123 | 124 | 125 | nContrasts = len(contrast_dict) 126 | if nContrasts > 0: 127 | plt.figure(figsize=(20, 8)) 128 | count = 1 129 | for key, value in contrast_dict.items(): 130 | contrast = np.dot(b_sample, value) 131 | ax = plt.subplot(2, 4, count) 132 | pm.plot_posterior(contrast, ref_val=0.0, bins=50, ax=ax) 133 | ax.set_title('Contrast {}'.format(key)) 134 | count += 1 135 | plt.tight_layout() 136 | plt.savefig('Figure_18.2b.png') 137 | 138 | plt.show() 139 | -------------------------------------------------------------------------------- /19_ANOVAtwowayPyMC.py: -------------------------------------------------------------------------------- 1 | """ 2 | Two way BANOVA 3 | """ 4 | from __future__ import division 5 | import numpy as np 6 | import pymc3 as pm 7 | import pandas as pd 8 | import matplotlib.pyplot as plt 9 | plt.style.use('seaborn-darkgrid') 10 | from scipy.stats import norm 11 | from theano import tensor as tt 12 | 13 | 14 | # THE DATA. 15 | # Specify data source: 16 | data_source = ["QianS2007" , "Salary" , "Random" , "Ex19.3"][1] 17 | 18 | # Load the data: 19 | if data_source == "QianS2007": 20 | data_record = pd.read_csv("QianS2007SeaweedData.txt") 21 | # Logistic transform the COVER value: 22 | # Used by Appendix 3 of QianS2007 to replicate Ramsey and Schafer (2002). 23 | data_record['COVER'] = -np.log((100/data_record['COVER']) -1) 24 | 25 | y = data_record['COVER'].values 26 | x1 = pd.Categorical(data_record['TREAT']).codes 27 | x1names = data_record['TREAT'].values 28 | x2 = pd.Categorical(data_record['BLOCK']).codes 29 | x2names = data_record['BLOCK'].values 30 | Ntotal = len(y) 31 | Nx1Lvl = len(set(x1)) 32 | Nx2Lvl = len(set(x2)) 33 | x1contrastDict = {'f_Effect':[1/2, -1/2, 0, 1/2, -1/2, 0], 34 | 'F_Effect':[0, 1/2, -1/2, 0, 1/2, -1/2], 35 | 'L_Effect':[1/3, 1/3, 1/3, -1/3, -1/3, -1/3 ]} 36 | x2contrastDict = None # np.zeros(Nx2Lvl) 37 | x1x2contrastDict = None # np.zeros(Nx1Lvl*Nx2Lvl, Nx1Lvl) 38 | 39 | if data_source == "Salary": 40 | data_record = pd.read_csv("Salary.csv") 41 | y = data_record['Salary'] 42 | x1 = pd.Categorical(data_record['Org']).codes 43 | x1names = data_record['Org'].unique() 44 | x1names.sort() 45 | x2 = pd.Categorical(data_record['Post']).codes 46 | x2names = data_record['Post'].unique() 47 | x2names.sort() 48 | Ntotal = len(y) 49 | Nx1Lvl = len(set(x1)) 50 | Nx2Lvl = len(set(x2)) 51 | 52 | x1contrastDict = {'BFINvCEDP':[1, -1, 0, 0], 53 | 'CEDPvTHTR':[0, 1, 0, -1]} 54 | x2contrastDict = {'FT1vFT2':[1, -1, 0], 55 | 'FT2vFT3':[0,1,-1]} 56 | x1x2contrastDict = {'CHEMvTHTRxFT1vFT3':np.outer([0, 0, 1, -1], [1,0,-1]), 57 | 'BFINvOTHxFT1vOTH':np.outer([1, -1/3, -1/3, -1/3], [1, -1/2, -1/2])} 58 | 59 | if data_source == "Random": 60 | np.random.seed(47405) 61 | ysdtrue = 3 62 | a0true = 100 63 | a1true = np.array([2, 0, -2]) # sum to zero 64 | a2true = np.array([3, 1, -1, -3]) # sum to zero 65 | a1a2true = np.array([[1,-1,0, 0], [-1,1,0,0], [0,0,0,0]]) 66 | 67 | npercell = 8 68 | index = np.arange(len(a1true)*len(a2true)*npercell) 69 | data_record = pd.DataFrame(index=index, columns=["y","x1","x2"]) 70 | 71 | rowidx = 0 72 | for x1idx in range(0, len(a1true)): 73 | for x2idx in range(0, len(a2true)): 74 | for subjidx in range(0, npercell): 75 | data_record['x1'][rowidx] = x1idx 76 | data_record['x2'][rowidx] = x2idx 77 | data_record['y'][rowidx] = float(a0true + a1true[x1idx] + a2true[x2idx] 78 | + a1a2true[x1idx, x2idx] + norm.rvs(loc=0, scale=ysdtrue, size=1)[0]) 79 | rowidx += 1 80 | 81 | y = data_record['y'] 82 | x1 = pd.Categorical(data_record['x1']).codes 83 | x1names = data_record['x1'].unique() 84 | x2 = pd.Categorical(data_record['x2']).codes 85 | x2names = data_record['x2'].unique() 86 | Ntotal = len(y) 87 | Nx1Lvl = len(set(x1)) 88 | Nx2Lvl = len(set(x2)) 89 | x1contrast_dict = {'X1_1v3': [1, 0, -1]} # 90 | x2contrast_dict = {'X2_12v34':[1/2, 1/2, -1/2, -1/2]} # 91 | x1x2contrast_dict = {'IC_11v22': np.outer([1, -1, 0], [1, -1, 0, 0]), 92 | 'IC_23v34': np.outer([0, 1, -1], [0, 0, 1, -1])} 93 | 94 | if data_source == 'Ex19.3': 95 | y = [101,102,103,105,104, 104,105,107,106,108, 105,107,106,108,109, 109,108,110,111,112] 96 | x1 = [0,0,0,0,0, 0,0,0,0,0, 1,1,1,1,1, 1,1,1,1,1] 97 | x2 = [0,0,0,0,0, 1,1,1,1,1, 0,0,0,0,0, 1,1,1,1,1] 98 | S = [0,1,2,3,4, 0,1,2,3,4, 0,1,2,3,4, 0,1,2,3,4] 99 | x1names = ['x1.1' ,'x1.2'] 100 | x2names = ['x2.1', "x2.2"] 101 | Snames = ['S1', 'S2', 'S3', 'S4', 'S5'] 102 | Ntotal = len(y) 103 | Nx1Lvl = len(set(x1)) 104 | Nx2Lvl = len(set(x2)) 105 | NSLvl = len(set(S)) 106 | x1contrast_dict = {'X1.2vX1.1':[-1 , 1]} 107 | x2contrast_dict = {'X2.2vX2.1':[-1 , 1]} 108 | x1x2contrast_dict = None #np.arange(0, Nx1Lvl*Nx2Lvl).reshape(Nx1Lvl, -1).T 109 | 110 | z = (y - np.mean(y))/np.std(y) 111 | 112 | z = (y - np.mean(y))/np.std(y) 113 | 114 | # THE MODEL. 115 | 116 | with pm.Model() as model: 117 | # define the hyperpriors 118 | a1_SD_unabs = pm.StudentT('a1_SD_unabs', mu=0, lam=0.001, nu=1) 119 | a1_SD = abs(a1_SD_unabs) + 0.1 120 | a1tau = 1 / a1_SD**2 121 | 122 | a2_SD_unabs = pm.StudentT('a2_SD_unabs', mu=0, lam=0.001, nu=1) 123 | a2_SD = abs(a2_SD_unabs) + 0.1 124 | a2tau = 1 / a2_SD**2 125 | 126 | a1a2_SD_unabs = pm.StudentT('a1a2_SD_unabs', mu=0, lam=0.001, nu=1) 127 | a1a2_SD = abs(a1a2_SD_unabs) + 0.1 128 | a1a2tau = 1 / a1a2_SD**2 129 | 130 | 131 | # define the priors 132 | sigma = pm.Uniform('sigma', 0, 10) # y values are assumed to be standardized 133 | tau = 1 / sigma**2 134 | 135 | a0 = pm.Normal('a0', mu=0, tau=0.001) # y values are assumed to be standardized 136 | 137 | a1 = pm.Normal('a1', mu=0 , tau=a1tau, shape=Nx1Lvl) 138 | a2 = pm.Normal('a2', mu=0 , tau=a2tau, shape=Nx2Lvl) 139 | a1a2 = pm.Normal('a1a2', mu=0 , tau=a1a2tau, shape=[Nx1Lvl, Nx2Lvl]) 140 | 141 | b1 = pm.Deterministic('b1', a1 - tt.mean(a1)) 142 | b2 = pm.Deterministic('b2', a2 - tt.mean(a2)) 143 | b1b2 = pm.Deterministic('b1b2', a1a2 - tt.mean(a1a2)) 144 | 145 | mu = a0 + b1[x1] + b2[x2] + b1b2[x1, x2] 146 | 147 | # define the likelihood 148 | yl = pm.Normal('yl', mu=mu, tau=tau, observed=z) 149 | 150 | # Generate a MCMC chain 151 | trace = pm.sample(2000) 152 | 153 | # EXAMINE THE RESULTS 154 | 155 | # Print summary for each trace 156 | #pm.summary(trace) 157 | 158 | # Check for mixing and autocorrelation 159 | #pm.autocorrplot(trace, vars=model.unobserved_RVs[:-1]) 160 | 161 | ## Plot KDE and sampled values for each parameter. 162 | pm.traceplot(trace) 163 | 164 | 165 | # Extract values of 'a' 166 | a0_sample = trace['a0'] 167 | b1_sample = trace['b1'] 168 | b2_sample = trace['b2'] 169 | b1b2_sample = trace['b1b2'] 170 | 171 | b0_sample = a0_sample * np.std(y) + np.mean(y) 172 | b1_sample = b1_sample * np.std(y) 173 | b2_sample = b2_sample * np.std(y) 174 | b1b2_sample = b1b2_sample * np.std(y) 175 | 176 | 177 | plt.figure(figsize=(25,20)) 178 | ax = plt.subplot(451) 179 | pm.plot_posterior(b0_sample, bins=50, ax=ax) 180 | ax.set_xlabel(r'$\beta0$') 181 | ax.set_title('Baseline') 182 | plt.xlim(b0_sample.min(), b0_sample.max()); 183 | 184 | count = 2 185 | for i in range(len(b1_sample[0])): 186 | ax = plt.subplot(4, 5, count) 187 | pm.plot_posterior(b1_sample[:,i], ax=ax) 188 | ax.set_xlabel(r'$\beta1_{}$'.format(i)) 189 | ax.set_title('x1: {}'.format(x1names[i])) 190 | count += 1 191 | 192 | for i in range(len(b2_sample[0])): 193 | ax = plt.subplot(4, 5, count) 194 | pm.plot_posterior(b2_sample[:,i], bins=50, ax=ax) 195 | ax.set_xlabel(r'$\beta2_{}$'.format(i)), 196 | ax.set_title('x1: {}'.format(x2names[i])) 197 | count += 1 198 | 199 | for j in range(len(b1_sample[0])): 200 | ax = plt.subplot(4, 5, count) 201 | pm.plot_posterior(b1b2_sample[:,j,i], bins=50, ax=ax) 202 | ax.set_title('x1: {}, x2: {}'.format(x1names[j], x2names[i])) 203 | ax.set_xlabel(r'$\beta12_{}{}$'.format(i, j)) 204 | count += 1 205 | 206 | 207 | plt.tight_layout() 208 | plt.savefig('Figure_19.4.png') 209 | 210 | ## Display contrast analyses 211 | plt.figure(figsize=(10, 12)) 212 | count = 1 213 | for key, value in x1contrastDict.items(): 214 | contrast = np.dot(b1_sample, value) 215 | ax = plt.subplot(3, 2, count) 216 | pm.plot_posterior(contrast, ref_val=0.0, bins=50, ax=ax) 217 | ax.set_title('Contrast {}'.format(key)) 218 | count += 1 219 | 220 | for key, value in x2contrastDict.items(): 221 | contrast = np.dot(b2_sample, value) 222 | ax = plt.subplot(3, 2, count) 223 | pm.plot_posterior(contrast, ref_val=0.0, bins=50, ax=ax) 224 | ax.set_title('Contrast {}'.format(key)) 225 | count += 1 226 | 227 | for key, value in x1x2contrastDict.items(): 228 | contrast = np.tensordot(b1b2_sample, value) 229 | ax = plt.subplot(3, 2, count) 230 | pm.plot_posterior(contrast, ref_val=0.0, bins=50, ax=ax) 231 | ax.set_title('Contrast {}'.format(key)) 232 | count += 1 233 | plt.tight_layout() 234 | plt.savefig('Figure_19.5.png') 235 | 236 | plt.show() 237 | -------------------------------------------------------------------------------- /Figures/Figure_10.2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/Figure_10.2.png -------------------------------------------------------------------------------- /Figures/Figure_10.3-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/Figure_10.3-4.png -------------------------------------------------------------------------------- /Figures/Figure_12.5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/Figure_12.5.png -------------------------------------------------------------------------------- /Figures/Figure_15.9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/Figure_15.9.png -------------------------------------------------------------------------------- /Figures/Figure_16.2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/Figure_16.2.png -------------------------------------------------------------------------------- /Figures/Figure_16.4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/Figure_16.4.png -------------------------------------------------------------------------------- /Figures/Figure_16.5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/Figure_16.5.png -------------------------------------------------------------------------------- /Figures/Figure_16.6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/Figure_16.6.png -------------------------------------------------------------------------------- /Figures/Figure_16.8a.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/Figure_16.8a.png -------------------------------------------------------------------------------- /Figures/Figure_16.8b.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/Figure_16.8b.png -------------------------------------------------------------------------------- /Figures/Figure_16.8c.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/Figure_16.8c.png -------------------------------------------------------------------------------- /Figures/Figure_16.8d.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/Figure_16.8d.png -------------------------------------------------------------------------------- /Figures/Figure_17.5a.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/Figure_17.5a.png -------------------------------------------------------------------------------- /Figures/Figure_17.5b.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/Figure_17.5b.png -------------------------------------------------------------------------------- /Figures/Figure_17.Xa.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/Figure_17.Xa.png -------------------------------------------------------------------------------- /Figures/Figure_17.Xb.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/Figure_17.Xb.png -------------------------------------------------------------------------------- /Figures/Figure_18.2a.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/Figure_18.2a.png -------------------------------------------------------------------------------- /Figures/Figure_18.2b.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/Figure_18.2b.png -------------------------------------------------------------------------------- /Figures/Figure_18.3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/Figure_18.3.png -------------------------------------------------------------------------------- /Figures/Figure_19.4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/Figure_19.4.png -------------------------------------------------------------------------------- /Figures/Figure_19.5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/Figure_19.5.png -------------------------------------------------------------------------------- /Figures/Figure_2.2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/Figure_2.2.png -------------------------------------------------------------------------------- /Figures/Figure_3.1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/Figure_3.1.png -------------------------------------------------------------------------------- /Figures/Figure_3.3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/Figure_3.3.png -------------------------------------------------------------------------------- /Figures/Figure_4.1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/Figure_4.1.png -------------------------------------------------------------------------------- /Figures/Figure_4.2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/Figure_4.2.png -------------------------------------------------------------------------------- /Figures/Figure_4.3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/Figure_4.3.png -------------------------------------------------------------------------------- /Figures/Figure_5.2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/Figure_5.2.png -------------------------------------------------------------------------------- /Figures/Figure_6.1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/Figure_6.1.png -------------------------------------------------------------------------------- /Figures/Figure_6.2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/Figure_6.2.png -------------------------------------------------------------------------------- /Figures/Figure_6.3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/Figure_6.3.png -------------------------------------------------------------------------------- /Figures/Figure_7.3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/Figure_7.3.png -------------------------------------------------------------------------------- /Figures/Figure_7.4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/Figure_7.4.png -------------------------------------------------------------------------------- /Figures/Figure_7.5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/Figure_7.5.png -------------------------------------------------------------------------------- /Figures/Figure_7.6_a.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/Figure_7.6_a.png -------------------------------------------------------------------------------- /Figures/Figure_7.6_b.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/Figure_7.6_b.png -------------------------------------------------------------------------------- /Figures/Figure_7.6_c.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/Figure_7.6_c.png -------------------------------------------------------------------------------- /Figures/Figure_8.1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/Figure_8.1.png -------------------------------------------------------------------------------- /Figures/Figure_8.2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/Figure_8.2.png -------------------------------------------------------------------------------- /Figures/Figure_8.3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/Figure_8.3.png -------------------------------------------------------------------------------- /Figures/Figure_8.3_HDI.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/Figure_8.3_HDI.png -------------------------------------------------------------------------------- /Figures/Figure_8.6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/Figure_8.6.png -------------------------------------------------------------------------------- /Figures/Figure_9.11.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/Figure_9.11.png -------------------------------------------------------------------------------- /Figures/Figure_9.12.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/Figure_9.12.png -------------------------------------------------------------------------------- /Figures/Figure_9.14.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/Figure_9.14.png -------------------------------------------------------------------------------- /Figures/Figure_9.16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/Figure_9.16.png -------------------------------------------------------------------------------- /Figures/Figure_9.16b.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/Figure_9.16b.png -------------------------------------------------------------------------------- /Figures/Figure_9.18_lower.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/Figure_9.18_lower.png -------------------------------------------------------------------------------- /Figures/Figure_9.18_upper.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/Figure_9.18_upper.png -------------------------------------------------------------------------------- /Figures/figure_15.3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/figure_15.3.png -------------------------------------------------------------------------------- /Guber1999data.txt: -------------------------------------------------------------------------------- 1 | "Alabama" 4.405 17.2 31.144 8 491 538 1029 2 | "Alaska" 8.963 17.6 47.951 47 445 489 934 3 | "Arizona" 4.778 19.3 32.175 27 448 496 944 4 | "Arkansas" 4.459 17.1 28.934 6 482 523 1005 5 | "California" 4.992 24.0 41.078 45 417 485 902 6 | "Colorado" 5.443 18.4 34.571 29 462 518 980 7 | "Connecticut" 8.817 14.4 50.045 81 431 477 908 8 | "Delaware" 7.030 16.6 39.076 68 429 468 897 9 | "Florida" 5.718 19.1 32.588 48 420 469 889 10 | "Georgia" 5.193 16.3 32.291 65 406 448 854 11 | "Hawaii" 6.078 17.9 38.518 57 407 482 889 12 | "Idaho" 4.210 19.1 29.783 15 468 511 979 13 | "Illinois" 6.136 17.3 39.431 13 488 560 1048 14 | "Indiana" 5.826 17.5 36.785 58 415 467 882 15 | "Iowa" 5.483 15.8 31.511 5 516 583 1099 16 | "Kansas" 5.817 15.1 34.652 9 503 557 1060 17 | "Kentucky" 5.217 17.0 32.257 11 477 522 999 18 | "Louisiana" 4.761 16.8 26.461 9 486 535 1021 19 | "Maine" 6.428 13.8 31.972 68 427 469 896 20 | "Maryland" 7.245 17.0 40.661 64 430 479 909 21 | "Massachusetts" 7.287 14.8 40.795 80 430 477 907 22 | "Michigan" 6.994 20.1 41.895 11 484 549 1033 23 | "Minnesota" 6.000 17.5 35.948 9 506 579 1085 24 | "Mississippi" 4.080 17.5 26.818 4 496 540 1036 25 | "Missouri" 5.383 15.5 31.189 9 495 550 1045 26 | "Montana" 5.692 16.3 28.785 21 473 536 1009 27 | "Nebraska" 5.935 14.5 30.922 9 494 556 1050 28 | "Nevada" 5.160 18.7 34.836 30 434 483 917 29 | "New Hampshire" 5.859 15.6 34.720 70 444 491 935 30 | "New Jersey" 9.774 13.8 46.087 70 420 478 898 31 | "New Mexico" 4.586 17.2 28.493 11 485 530 1015 32 | "New York" 9.623 15.2 47.612 74 419 473 892 33 | "North Carolina" 5.077 16.2 30.793 60 411 454 865 34 | "North Dakota" 4.775 15.3 26.327 5 515 592 1107 35 | "Ohio" 6.162 16.6 36.802 23 460 515 975 36 | "Oklahoma" 4.845 15.5 28.172 9 491 536 1027 37 | "Oregon" 6.436 19.9 38.555 51 448 499 947 38 | "Pennsylvania" 7.109 17.1 44.510 70 419 461 880 39 | "Rhode Island" 7.469 14.7 40.729 70 425 463 888 40 | "South Carolina" 4.797 16.4 30.279 58 401 443 844 41 | "South Dakota" 4.775 14.4 25.994 5 505 563 1068 42 | "Tennessee" 4.388 18.6 32.477 12 497 543 1040 43 | "Texas" 5.222 15.7 31.223 47 419 474 893 44 | "Utah" 3.656 24.3 29.082 4 513 563 1076 45 | "Vermont" 6.750 13.8 35.406 68 429 472 901 46 | "Virginia" 5.327 14.6 33.987 65 428 468 896 47 | "Washington" 5.906 20.2 36.151 48 443 494 937 48 | "West Virginia" 6.107 14.8 31.944 17 448 484 932 49 | "Wisconsin" 6.930 15.9 37.746 9 501 572 1073 50 | "Wyoming" 6.160 14.9 31.285 10 476 525 1001 51 | -------------------------------------------------------------------------------- /HDI_of_grid.py: -------------------------------------------------------------------------------- 1 | """ 2 | Arguments: 3 | probMassVec is a vector of probability masses at each grid point. 4 | credMass is the desired mass of the HDI region. 5 | 6 | Return a dictionary with: 7 | indices is a vector of indices that are in the HDI 8 | mass is the total mass of the included indices 9 | height is the smallest component probability mass in the HDI 10 | """ 11 | import numpy as np 12 | 13 | def HDI_of_grid(probMassVec, credMass=0.95): 14 | sortedProbMass = np.sort(probMassVec, axis=None)[::-1] 15 | HDIheightIdx = np.min(np.where(np.cumsum(sortedProbMass) >= credMass)) 16 | HDIheight = sortedProbMass[HDIheightIdx] 17 | HDImass = np.sum(probMassVec[probMassVec >= HDIheight]) 18 | idx = np.where(probMassVec >= HDIheight) 19 | return {'indices':idx, 'mass':HDImass, 'height':HDIheight} 20 | 21 | if __name__ =='__main__': 22 | from scipy.stats import beta 23 | theta1 = np.linspace(0, 1, 10) 24 | theta2 = theta1 25 | theta1_grid, theta2_grid = np.meshgrid(theta1, theta2) 26 | probDensityVec = beta.pdf(theta1_grid, 3, 3) 27 | probMassVec = probDensityVec / np.sum(probDensityVec) 28 | HDIinfo = HDIofGrid(probMassVec) 29 | print(HDIinfo) 30 | -------------------------------------------------------------------------------- /HDIofICDF.py: -------------------------------------------------------------------------------- 1 | """ 2 | This program finds the HDI of a probability density function that is specified 3 | mathematically in Python. 4 | """ 5 | from scipy.optimize import fmin 6 | from scipy.stats import * 7 | 8 | def HDIofICDF(dist_name, credMass=0.95, **args): 9 | # freeze distribution with given arguments 10 | distri = dist_name(**args) 11 | # initial guess for HDIlowTailPr 12 | incredMass = 1.0 - credMass 13 | 14 | def intervalWidth(lowTailPr): 15 | return distri.ppf(credMass + lowTailPr) - distri.ppf(lowTailPr) 16 | 17 | # find lowTailPr that minimizes intervalWidth 18 | HDIlowTailPr = fmin(intervalWidth, incredMass, ftol=1e-8, disp=False)[0] 19 | # return interval as array([low, high]) 20 | return distri.ppf([HDIlowTailPr, credMass + HDIlowTailPr]) 21 | 22 | -------------------------------------------------------------------------------- /HtWtDataGenerator.py: -------------------------------------------------------------------------------- 1 | """ 2 | Random height, weight generator for males and females. Uses parameters from 3 | Brainard, J. & Burmaster, D. E. (1992). Bivariate distributions for height and 4 | weight of men and women in the United States. Risk Analysis, 12(2), 267-275. 5 | John K. Kruschke, January 2008. 6 | """ 7 | from __future__ import division 8 | from scipy.stats import multivariate_normal 9 | import numpy as np 10 | 11 | 12 | def HtWtDataGenerator(nSubj, rndsd=None): 13 | # Specify parameters of multivariate normal (MVN) distributions. 14 | # Men: 15 | HtMmu = 69.18 16 | HtMsd = 2.87 17 | lnWtMmu = 5.14 18 | lnWtMsd = 0.17 19 | Mrho = 0.42 20 | Mmean = np.array([HtMmu , lnWtMmu]) 21 | Msigma = np.array([[HtMsd**2, Mrho * HtMsd * lnWtMsd], 22 | [Mrho * HtMsd * lnWtMsd, lnWtMsd**2]]) 23 | 24 | # Women cluster 1: 25 | HtFmu1 = 63.11 26 | HtFsd1 = 2.76 27 | lnWtFmu1 = 5.06 28 | lnWtFsd1 = 0.24 29 | Frho1 = 0.41 30 | prop1 = 0.46 31 | Fmean1 = np.array([HtFmu1, lnWtFmu1]) 32 | Fsigma1 = np.array([[HtFsd1**2, Frho1 * HtFsd1 * lnWtFsd1], 33 | [Frho1 * HtFsd1 * lnWtFsd1, lnWtFsd1**2]]) 34 | # Women cluster 2: 35 | HtFmu2 = 64.36 36 | HtFsd2 = 2.49 37 | lnWtFmu2 = 4.86 38 | lnWtFsd2 = 0.14 39 | Frho2 = 0.44 40 | prop2 = 1 - prop1 41 | Fmean2 = np.array([HtFmu2, lnWtFmu2]) 42 | Fsigma2 = np.array([[HtFsd2**2 , Frho2 * HtFsd2 * lnWtFsd2], 43 | [Frho2 * HtFsd2 * lnWtFsd2 , lnWtFsd2**2]]) 44 | 45 | # Randomly generate data values from those MVN distributions. 46 | if rndsd is not None: 47 | np.random.seed(rndsd) 48 | datamatrix = np.zeros((nSubj, 3)) 49 | # arbitrary coding values 50 | maleval = 1 51 | femaleval = 0 52 | for i in range(0, nSubj): 53 | # Flip coin to decide sex 54 | sex = np.random.choice([maleval, femaleval], replace=True, p=(.5,.5), size=1) 55 | if sex == maleval: 56 | datum = multivariate_normal.rvs(mean=Mmean, cov=Msigma) 57 | if sex == femaleval: 58 | Fclust = np.random.choice([1, 2], replace=True, p=(prop1, prop2), size=1) 59 | if Fclust == 1: 60 | datum = multivariate_normal.rvs(mean=Fmean1, cov=Fsigma1) 61 | if Fclust == 2: 62 | datum = multivariate_normal.rvs(mean=Fmean2, cov=Fsigma2) 63 | datamatrix[i] = np.concatenate([sex, np.round([datum[0], np.exp(datum[1])], 1)]) 64 | 65 | return datamatrix 66 | -------------------------------------------------------------------------------- /McDonaldSK1991data.txt: -------------------------------------------------------------------------------- 1 | # From http://udel.edu/~mcdonald/statanovasig.html 2 | # "Here are some data on a shell measurement (the length of the anterior 3 | # adductor muscle scar, standardized by dividing by length) in the mussel 4 | # Mytilus trossulus from five locations: Tillamook, Oregon; Newport, Oregon; 5 | # Petersburg, Alaska; Magadan, Russia; and Tvarminne, Finland, 6 | # taken from a much larger data set used in McDonald et al. (1991)." 7 | # 8 | # McDonald, J. H., R. Seed and R. K. Koehn. 1991. 9 | # Allozymes and morphometric characters of three species of Mytilus 10 | # in the Northern and Southern Hemispheres. 11 | # Mar. Biol. 111:323-333. 12 | # 13 | # Group code: 14 | # 1=Tillamook,Oregon 15 | # 2=Newport,Oregon 16 | # 3=Petersburg,Alaska 17 | # 4=Magadan,Russia 18 | # 5=Tvarminne,Finland 19 | Group Size Site 20 | 1 0.0571 OregonT 21 | 1 0.0813 OregonT 22 | 1 0.0831 OregonT 23 | 1 0.0976 OregonT 24 | 1 0.0817 OregonT 25 | 1 0.0859 OregonT 26 | 1 0.0735 OregonT 27 | 1 0.0659 OregonT 28 | 1 0.0923 OregonT 29 | 1 0.0836 OregonT 30 | 2 0.0873 OregonN 31 | 2 0.0662 OregonN 32 | 2 0.0672 OregonN 33 | 2 0.0819 OregonN 34 | 2 0.0749 OregonN 35 | 2 0.0649 OregonN 36 | 2 0.0835 OregonN 37 | 2 0.0725 OregonN 38 | 3 0.0974 Alaska 39 | 3 0.1352 Alaska 40 | 3 0.0817 Alaska 41 | 3 0.1016 Alaska 42 | 3 0.0968 Alaska 43 | 3 0.1064 Alaska 44 | 3 0.1050 Alaska 45 | 4 0.1033 Russia 46 | 4 0.0915 Russia 47 | 4 0.0781 Russia 48 | 4 0.0685 Russia 49 | 4 0.0677 Russia 50 | 4 0.0697 Russia 51 | 4 0.0764 Russia 52 | 4 0.0689 Russia 53 | 5 0.0703 Finland 54 | 5 0.1026 Finland 55 | 5 0.0956 Finland 56 | 5 0.0973 Finland 57 | 5 0.1039 Finland 58 | 5 0.1045 Finland 59 | # 60 | # http://udel.edu/~mcdonald/statanovaunplanned.html 61 | # shows that Tukey-Kramer method of unplanned comparisons 62 | # groups 63 | # Newport/Magadan/Tillamook (2/4/1), 64 | # Magadan/Tillamook/Tvarminne (4/1/5), 65 | # and Tvarminne/Petersburg (5/3). 66 | # 67 | # From http://udel.edu/~mcdonald/statanovaplanned.html: 68 | # Really important note about planned comparisons 69 | # Planned comparisons must be planned before you look at the data. If you 70 | # look at some data, pick out an interesting comparison, then analyze it as 71 | # if it were a planned comparison, you will be committing scientific fraud. 72 | # For example, if you look at the mean arch heights for the nine sports, see 73 | # that cross-country has the lowest mean and swimming has the highest mean, 74 | # then compare just those two means, your P-value will be much too low. This 75 | # is because there are 36 possible pairwise comparisons in a set of 9 means. 76 | # You expect 5 percent, or 1 out of 20, tests to be "significant" at the 77 | # P<0.05 level, even if all the data really fit the null hypothesis, so 78 | # there's a good chance that the most extreme comparison in a set of 36 79 | # will have a P-value less than 0.05. 80 | # It would be acceptable to run a pilot experiment and plan your planned 81 | # comparisons based on the results of the pilot experiment. However, if you 82 | # do this you could not include the data from the pilot experiment in the 83 | # analysis; you would have to limit your anova to the new data. 84 | -------------------------------------------------------------------------------- /McIntyre1994data.csv: -------------------------------------------------------------------------------- 1 | Brand,Tar,Nic,Wt,CO 2 | Alpine,14.1,0.86,0.9853,13.6 3 | BensonAndHedges,16.0,1.06,1.0938,16.6 4 | BullDurham,29.8,2.03,1.1650,23.5 5 | CamelLights,8.0,0.67,0.9280,10.2 6 | Carlton,4.1,0.40,0.9462,5.4 7 | Chesterfield,15.0,1.04,0.8885,15.0 8 | GoldenLights,8.8,0.76,1.0267,9.0 9 | Kent,12.4,0.95,0.9225,12.3 10 | Kool,16.6,1.12,0.9372,16.3 11 | LandM,14.9,1.02,0.8858,15.4 12 | LarkLights,13.7,1.01,0.9643,13.0 13 | Marlboro,15.1,0.90,0.9316,14.4 14 | Merit,7.8,0.57,0.9705,10.0 15 | MultiFilter,11.4,0.78,1.1240,10.2 16 | NewportLights,9.0,0.74,0.8517,9.5 17 | Now,1.0,0.13,0.7851,1.5 18 | OldGold,17.0,1.26,0.9186,18.5 19 | PallMallLight,12.8,1.08,1.0395,12.6 20 | Raleigh,15.8,0.96,0.9573,17.5 21 | SalemUltra,4.5,0.42,0.9106,4.9 22 | Tareyton,14.5,1.01,1.0070,15.9 23 | True,7.3,0.61,0.9806,8.5 24 | ViceroyRichLight,8.6,0.69,0.9693,10.6 25 | VirginiaSlims,15.2,1.02,0.9496,13.9 26 | WinstonLights,12.0,0.82,1.1184,14.9 27 | -------------------------------------------------------------------------------- /QianS2007SeaweedData.txt: -------------------------------------------------------------------------------- 1 | COVER,BLOCK,TREAT 2 | 14.00,BLOCK 1,CONTROL 3 | 23.00,BLOCK 1,CONTROL 4 | 22.00,BLOCK 2,CONTROL 5 | 35.00,BLOCK 2,CONTROL 6 | 67.00,BLOCK 3,CONTROL 7 | 82.00,BLOCK 3,CONTROL 8 | 94.00,BLOCK 4,CONTROL 9 | 95.00,BLOCK 4,CONTROL 10 | 34.00,BLOCK 5,CONTROL 11 | 53.00,BLOCK 5,CONTROL 12 | 58.00,BLOCK 6,CONTROL 13 | 75.00,BLOCK 6,CONTROL 14 | 19.00,BLOCK 7,CONTROL 15 | 47.00,BLOCK 7,CONTROL 16 | 53.00,BLOCK 8,CONTROL 17 | 61.00,BLOCK 8,CONTROL 18 | 4.00,BLOCK 1,L 19 | 4.00,BLOCK 1,L 20 | 7.00,BLOCK 2,L 21 | 8.00,BLOCK 2,L 22 | 28.00,BLOCK 3,L 23 | 58.00,BLOCK 3,L 24 | 27.00,BLOCK 4,L 25 | 35.00,BLOCK 4,L 26 | 11.00,BLOCK 5,L 27 | 33.00,BLOCK 5,L 28 | 16.00,BLOCK 6,L 29 | 31.00,BLOCK 6,L 30 | 6.00,BLOCK 7,L 31 | 8.00,BLOCK 7,L 32 | 15.00,BLOCK 8,L 33 | 17.00,BLOCK 8,L 34 | 11.00,BLOCK 1,f 35 | 24.00,BLOCK 1,f 36 | 14.00,BLOCK 2,f 37 | 31.00,BLOCK 2,f 38 | 52.00,BLOCK 3,f 39 | 59.00,BLOCK 3,f 40 | 83.00,BLOCK 4,f 41 | 89.00,BLOCK 4,f 42 | 33.00,BLOCK 5,f 43 | 34.00,BLOCK 5,f 44 | 39.00,BLOCK 6,f 45 | 52.00,BLOCK 6,f 46 | 43.00,BLOCK 7,f 47 | 53.00,BLOCK 7,f 48 | 30.00,BLOCK 8,f 49 | 37.00,BLOCK 8,f 50 | 3.00,BLOCK 1,Lf 51 | 5.00,BLOCK 1,Lf 52 | 3.00,BLOCK 2,Lf 53 | 6.00,BLOCK 2,Lf 54 | 9.00,BLOCK 3,Lf 55 | 31.00,BLOCK 3,Lf 56 | 21.00,BLOCK 4,Lf 57 | 57.00,BLOCK 4,Lf 58 | 5.00,BLOCK 5,Lf 59 | 9.00,BLOCK 5,Lf 60 | 26.00,BLOCK 6,Lf 61 | 43.00,BLOCK 6,Lf 62 | 4.00,BLOCK 7,Lf 63 | 12.00,BLOCK 7,Lf 64 | 12.00,BLOCK 8,Lf 65 | 18.00,BLOCK 8,Lf 66 | 10.00,BLOCK 1,fF 67 | 13.00,BLOCK 1,fF 68 | 10.00,BLOCK 2,fF 69 | 15.00,BLOCK 2,fF 70 | 44.00,BLOCK 3,fF 71 | 50.00,BLOCK 3,fF 72 | 57.00,BLOCK 4,fF 73 | 73.00,BLOCK 4,fF 74 | 26.00,BLOCK 5,fF 75 | 42.00,BLOCK 5,fF 76 | 38.00,BLOCK 6,fF 77 | 42.00,BLOCK 6,fF 78 | 29.00,BLOCK 7,fF 79 | 36.00,BLOCK 7,fF 80 | 11.00,BLOCK 8,fF 81 | 40.00,BLOCK 8,fF 82 | 1.00,BLOCK 1,LfF 83 | 2.00,BLOCK 1,LfF 84 | 3.00,BLOCK 2,LfF 85 | 5.00,BLOCK 2,LfF 86 | 6.00,BLOCK 3,LfF 87 | 9.00,BLOCK 3,LfF 88 | 7.00,BLOCK 4,LfF 89 | 22.00,BLOCK 4,LfF 90 | 5.00,BLOCK 5,LfF 91 | 6.00,BLOCK 5,LfF 92 | 10.00,BLOCK 6,LfF 93 | 17.00,BLOCK 6,LfF 94 | 5.00,BLOCK 7,LfF 95 | 14.00,BLOCK 7,LfF 96 | 5.00,BLOCK 8,LfF 97 | 7.00,BLOCK 8,LfF 98 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Doing_bayesian_data_analysis 2 | ============================ 3 | 4 | [![Gitter](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/aloctavodia/Doing_bayesian_data_analysis?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) 5 | 6 | This repository contains the Python version of the R programs described in the great book [Doing bayesian data analysis (first edition)](http://doingbayesiandataanalysis.blogspot.com.ar) by John K. Kruschke (AKA *the puppy book*). 7 | 8 | All the code is adapted from the Kruschke's book, except hpd.py that is taken (without modifications) from the PyMC project. 9 | 10 | The name of the programs are the same used in the book, except they begin with a number indicating the chapter. All programs are written in Python and instead of BUGS/JAGS the [PyMC3](http://pymc-devs.github.io/pymc3) module is used. 11 | 12 | Thanks to [Brian Naughton](https://github.com/hgbrian) the code is also available as an [IPython notebook](http://nbviewer.ipython.org/github/aloctavodia/Doing_bayesian_data_analysis/blob/master/IPython/Kruschkes_Doing_Bayesian_Data_Analysis_in_PyMC3.ipynb) 13 | 14 | ## Second edition 15 | 16 | If you are interested on the PyMC3 code for the second edition of Doing bayesian data analysis, please check this [Repository](https://github.com/JWarmenhoven/DBDA-python). 17 | -------------------------------------------------------------------------------- /Salary.csv: -------------------------------------------------------------------------------- 1 | "Org","Post","Salary" 2 | "CEDP","FT1",89504 3 | "CEDP","FT1",106554 4 | "CHEM","FT1",108158 5 | "CEDP","FT1",92961 6 | "CHEM","FT1",154703 7 | "CEDP","FT1",81840 8 | "BFIN","FT1",238000 9 | "THTR","FT1",86794 10 | "CEDP","FT1",84121 11 | "THTR","FT1",80450 12 | "CEDP","FT1",98434 13 | "CHEM","FT1",194192 14 | "CEDP","FT1",92896 15 | "THTR","FT1",72240 16 | "CEDP","FT1",88374 17 | "BFIN","FT1",234000 18 | "THTR","FT1",81566 19 | "CEDP","FT1",102300 20 | "CHEM","FT1",159753 21 | "CHEM","FT1",121313 22 | "CHEM","FT2",123200 23 | "BFIN","FT2",183000 24 | "THTR","FT2",62611 25 | "BFIN","FT2",222000 26 | "CEDP","FT2",63000 27 | "CHEM","FT2",120000 28 | "BFIN","FT2",200000 29 | "CEDP","FT2",65115 30 | "BFIN","FT2",198000 31 | "CEDP","FT2",83762 32 | "CEDP","FT2",75559 33 | "BFIN","FT2",146000 34 | "CHEM","FT2",83164 35 | "CHEM","FT2",135794 36 | "CHEM","FT2",82762 37 | "BFIN","FT2",134000 38 | "CHEM","FT2",88147 39 | "THTR","FT2",59210 40 | "CEDP","FT2",66186 41 | "THTR","FT2",63924 42 | "CHEM","FT2",123610 43 | "CHEM","FT2",88271 44 | "THTR","FT2",62315 45 | "BFIN","FT2",174000 46 | "THTR","FT2",63261 47 | "CEDP","FT2",66794 48 | "THTR","FT2",71706 49 | "CEDP","FT2",79236 50 | "CHEM","FT2",104568 51 | "BFIN","FT2",180000 52 | "CHEM","FT3",77169 53 | "CHEM","FT3",81773 54 | "CEDP","FT3",59568 55 | "CHEM","FT3",75000 56 | "THTR","FT3",53000 57 | "CHEM","FT3",75000 58 | "THTR","FT3",51991 59 | "CEDP","FT3",57000 60 | "CHEM","FT3",75000 61 | "THTR","FT3",56985 62 | "THTR","FT3",51365 63 | "CHEM","FT3",76714 64 | "CEDP","FT3",58890 65 | "BFIN","FT3",188000 66 | "THTR","FT3",52140 67 | "THTR","FT3",53000 68 | "CHEM","FT3",80017 69 | "BFIN","FT3",165000 70 | "THTR","FT3",53000 71 | "CEDP","FT3",57443 72 | "BFIN","FT3",190000 73 | "CHEM","FT3",75000 74 | "BFIN","FT3",177000 75 | "CEDP","FT3",57443 76 | "BFIN","FT3",180000 77 | "CHEM","FT3",78000 78 | "BFIN","FT3",180000 79 | "CHEM","FT3",68523 80 | "BFIN","FT3",176000 81 | "CEDP","FT3",57000 82 | "CEDP","FT3",58500 83 | "CEDP","FT3",57443 84 | "BFIN","FT3",171000 85 | "BFIN","FT3",176000 86 | -------------------------------------------------------------------------------- /SolariLS2008data.txt: -------------------------------------------------------------------------------- 1 | # Data from Solari, Liseo & Sun 2008 2 | # 3 | # (Pompilj and Napolitani, 1954). An experiment is conducted 4 | # to analyze the possible influence of some types of manuring 5 | # on the ascorbic acid content in tomatoes. The treatments 6 | # under study are nine manures obtained as different 7 | # combinations of calcium nitrate and calcium superphosphate. 8 | # 9 | # T1 T2 T3 T4 T5 T6 T7 T8 T9 10 | # 7.12 4.42 6.49 8.07 8.05 5.09 5.87 6.57 4.13 11 | # 7.16 5.68 8.09 2.86 5.82 4.57 5.36 5.08 7.31 12 | # 4.57 5.15 8.79 6.84 2.47 6.06 5.85 5.95 4.47 13 | # 3.79 3.83 8.44 6.85 3.28 4.87 6.27 7.51 2.53 14 | # 4.20 3.30 6.11 4.12 5.38 4.52 5.96 3.79 3.96 15 | # 5.84 4.44 5.17 3.32 3.98 5.08 4.95 4.33 5.30 16 | # 5.56 3.51 8.13 1.74 6.08 4.29 5.85 3.70 2.66 17 | # 5.02 4.60 7.58 1.74 6.28 6.19 4.70 5.21 4.12 18 | # 3.69 4.85 6.47 1.57 5.72 3.45 1.53 4.48 3.54 19 | # 2.99 4.84 5.45 3.02 2.88 5.85 3.88 5.17 2.98 20 | # 4.99 5.45 6.18 5.08 6.40 2.51 2.88 4.69 5.08 21 | # 2.16 4.71 4.34 4.96 4.58 4.93 2.07 2.12 5.15 22 | Type Acid 23 | 1 7.12 24 | 1 7.16 25 | 1 4.57 26 | 1 3.79 27 | 1 4.20 28 | 1 5.84 29 | 1 5.56 30 | 1 5.02 31 | 1 3.69 32 | 1 2.99 33 | 1 4.99 34 | 1 2.16 35 | 2 4.42 36 | 2 5.68 37 | 2 5.15 38 | 2 3.83 39 | 2 3.30 40 | 2 4.44 41 | 2 3.51 42 | 2 4.60 43 | 2 4.85 44 | 2 4.84 45 | 2 5.45 46 | 2 4.71 47 | 3 6.49 48 | 3 8.09 49 | 3 8.79 50 | 3 8.44 51 | 3 6.11 52 | 3 5.17 53 | 3 8.13 54 | 3 7.58 55 | 3 6.47 56 | 3 5.45 57 | 3 6.18 58 | 3 4.34 59 | 4 8.07 60 | 4 2.86 61 | 4 6.84 62 | 4 6.85 63 | 4 4.12 64 | 4 3.32 65 | 4 1.74 66 | 4 1.74 67 | 4 1.57 68 | 4 3.02 69 | 4 5.08 70 | 4 4.96 71 | 5 8.05 72 | 5 5.82 73 | 5 2.47 74 | 5 3.28 75 | 5 5.38 76 | 5 3.98 77 | 5 6.08 78 | 5 6.28 79 | 5 5.72 80 | 5 2.88 81 | 5 6.40 82 | 5 4.58 83 | 6 5.09 84 | 6 4.57 85 | 6 6.06 86 | 6 4.87 87 | 6 4.52 88 | 6 5.08 89 | 6 4.29 90 | 6 6.19 91 | 6 3.45 92 | 6 5.85 93 | 6 2.51 94 | 6 4.93 95 | 7 5.87 96 | 7 5.36 97 | 7 5.85 98 | 7 6.27 99 | 7 5.96 100 | 7 4.95 101 | 7 5.85 102 | 7 4.70 103 | 7 1.53 104 | 7 3.88 105 | 7 2.88 106 | 7 2.07 107 | 8 6.57 108 | 8 5.08 109 | 8 5.95 110 | 8 7.51 111 | 8 3.79 112 | 8 4.33 113 | 8 3.70 114 | 8 5.21 115 | 8 4.48 116 | 8 5.17 117 | 8 4.69 118 | 8 2.12 119 | 9 4.13 120 | 9 7.31 121 | 9 4.47 122 | 9 2.53 123 | 9 3.96 124 | 9 5.30 125 | 9 2.66 126 | 9 4.12 127 | 9 3.54 128 | 9 2.98 129 | 9 5.08 130 | 9 5.15 131 | -------------------------------------------------------------------------------- /Systems.txt: -------------------------------------------------------------------------------- 1 | Aircraft Failure Days DaysTransf 2 | 1. 1. 194. 2.867876 3 | 1. 2. 15. 1.718772 4 | 1. 3. 41. 2.101632 5 | 1. 4. 29. 1.961009 6 | 1. 5. 33. 2.012347 7 | 1. 6. 181. 2.828367 8 | 2. 1. 413. 3.335723 9 | 2. 2. 14. 1.695218 10 | 2. 3. 58. 2.252608 11 | 2. 4. 37. 2.058924 12 | 2. 5. 100. 2.511886 13 | 2. 6. 65. 2.304532 14 | 2. 7. 9. 1.551846 15 | 2. 8. 169. 2.789827 16 | 2. 9. 447. 3.388921 17 | 2. 10. 184. 2.837681 18 | 2. 11. 36. 2.047673 19 | 2. 12. 201. 2.888279 20 | 2. 13. 118. 2.596429 21 | 3. 1. 90. 2.459509 22 | 3. 2. 10. 1.584893 23 | 3. 3. 60. 2.267933 24 | 3. 4. 186. 2.843823 25 | 3. 5. 61. 2.275443 26 | 3. 6. 49. 2.177906 27 | 3. 7. 14. 1.695218 28 | 3. 8. 24. 1.888175 29 | 3. 9. 56. 2.236854 30 | 3. 10. 20. 1.820564 31 | 3. 11. 79. 2.396213 32 | 3. 12. 84. 2.425805 33 | 3. 13. 44. 2.131526 34 | 3. 14. 59. 2.260322 35 | 3. 15. 29. 1.961009 36 | 3. 16. 118. 2.596429 37 | 3. 17. 25. 1.903654 38 | 3. 18. 156. 2.745522 39 | 3. 19. 310. 3.149723 40 | 3. 20. 76. 2.377731 41 | 3. 21. 26. 1.918645 42 | 3. 22. 44. 2.131526 43 | 3. 23. 23. 1.872171 44 | 3. 24. 62. 2.282855 45 | 4. 1. 74. 2.365083 46 | 4. 2. 57. 2.244786 47 | 4. 3. 48. 2.168944 48 | 4. 4. 29. 1.961009 49 | 4. 5. 502. 3.468492 50 | 4. 6. 12. 1.643752 51 | 4. 7. 70. 2.338943 52 | 4. 8. 21. 1.838416 53 | 4. 9. 29. 1.961009 54 | 4. 10. 386. 3.290921 55 | 4. 11. 59. 2.260322 56 | 4. 12. 27. 1.933182 57 | 5. 1. 55. 2.228807 58 | 5. 2. 320. 3.169786 59 | 5. 3. 56. 2.236854 60 | 5. 4. 104. 2.531668 61 | 5. 5. 220. 2.940929 62 | 5. 6. 239. 2.990058 63 | 5. 7. 47. 2.15983 64 | 5. 8. 246. 3.007371 65 | 5. 9. 176. 2.812565 66 | 5. 10. 182. 2.831485 67 | 5. 11. 33. 2.012347 68 | 6. 1. 23. 1.872171 69 | 6. 2. 261. 3.043183 70 | 6. 3. 87. 2.44289 71 | 6. 4. 7. 1.475773 72 | 6. 5. 120. 2.605171 73 | 6. 6. 14. 1.695218 74 | 6. 7. 62. 2.282855 75 | 6. 8. 47. 2.15983 76 | 6. 9. 225. 2.954177 77 | 6. 10. 71. 2.345588 78 | 6. 11. 246. 3.007371 79 | 6. 12. 21. 1.838416 80 | 6. 13. 42. 2.111786 81 | 6. 14. 20. 1.820564 82 | 6. 15. 5. 1.37973 83 | 6. 16. 12. 1.643752 84 | 6. 17. 120. 2.605171 85 | 6. 18. 11. 1.615394 86 | 6. 19. 3. 1.245731 87 | 6. 20. 14. 1.695218 88 | 6. 21. 71. 2.345588 89 | 6. 22. 11. 1.615394 90 | 6. 23. 14. 1.695218 91 | 6. 24. 11. 1.615394 92 | 6. 25. 16. 1.741101 93 | 6. 26. 90. 2.459509 94 | 6. 27. 1. 1. 95 | 6. 28. 16. 1.741101 96 | 6. 29. 52. 2.203945 97 | 6. 30. 95. 2.48625 98 | 7. 1. 97. 2.496631 99 | 7. 2. 51. 2.195402 100 | 7. 3. 11. 1.615394 101 | 7. 4. 4. 1.319508 102 | 7. 5. 141. 2.690567 103 | 7. 6. 18. 1.782602 104 | 7. 7. 142. 2.694373 105 | 7. 8. 68. 2.325422 106 | 7. 9. 77. 2.383956 107 | 7. 10. 80. 2.402249 108 | 7. 11. 1. 1. 109 | 7. 12. 16. 1.741101 110 | 7. 13. 106. 2.541331 111 | 7. 14. 206. 2.902508 112 | 7. 15. 82. 2.414142 113 | 7. 16. 54. 2.220643 114 | 7. 17. 31. 1.987341 115 | 7. 18. 216. 2.930156 116 | 7. 19. 46. 2.15056 117 | 7. 20. 111. 2.564865 118 | 7. 21. 39. 2.080717 119 | 7. 22. 63. 2.290172 120 | 7. 23. 18. 1.782602 121 | 7. 24. 191. 2.858951 122 | 7. 25. 18. 1.782602 123 | 7. 26. 163. 2.769731 124 | 7. 27. 24. 1.888175 125 | 8. 1. 50. 2.186724 126 | 8. 2. 44. 2.131526 127 | 8. 3. 102. 2.521855 128 | 8. 4. 72. 2.352158 129 | 8. 5. 22. 1.855601 130 | 8. 6. 39. 2.080717 131 | 8. 7. 3. 1.245731 132 | 8. 8. 15. 1.718772 133 | 8. 9. 197. 2.876691 134 | 8. 10. 188. 2.849913 135 | 8. 11. 79. 2.396213 136 | 8. 12. 88. 2.44848 137 | 8. 13. 46. 2.15056 138 | 8. 14. 5. 1.37973 139 | 8. 15. 5. 1.37973 140 | 8. 16. 36. 2.047673 141 | 8. 17. 22. 1.855601 142 | 8. 18. 139. 2.682891 143 | 8. 19. 210. 2.913693 144 | 8. 20. 97. 2.496631 145 | 8. 21. 30. 1.97435 146 | 8. 22. 23. 1.872171 147 | 8. 23. 13. 1.670278 148 | 8. 24. 14. 1.695218 149 | 9. 1. 359. 3.243537 150 | 9. 2. 9. 1.551846 151 | 9. 3. 12. 1.643752 152 | 9. 4. 270. 3.063887 153 | 9. 5. 603. 3.598019 154 | 9. 6. 3. 1.245731 155 | 9. 7. 104. 2.531668 156 | 9. 8. 2. 1.148698 157 | 9. 9. 438. 3.375164 158 | 10. 1. 50. 2.186724 159 | 10. 2. 254. 3.026682 160 | 10. 3. 5. 1.37973 161 | 10. 4. 283. 3.092839 162 | 10. 5. 35. 2.036168 163 | 10. 6. 12. 1.643752 164 | 11. 1. 487. 3.447512 165 | 11. 2. 18. 1.782602 166 | 11. 3. 100. 2.511886 167 | 11. 4. 7. 1.475773 168 | 11. 5. 98. 2.501758 169 | 11. 6. 5. 1.37973 170 | 11. 7. 85. 2.431553 171 | 11. 8. 91. 2.464951 172 | 11. 9. 43. 2.121747 173 | 11. 10. 230. 2.967191 174 | 11. 11. 3. 1.245731 175 | 11. 12. 130. 2.647212 176 | 12. 1. 102. 2.521855 177 | 12. 2. 209. 2.910913 178 | 12. 3. 14. 1.695218 179 | 12. 4. 57. 2.244786 180 | 12. 5. 54. 2.220643 181 | 12. 6. 32. 2. 182 | 12. 7. 67. 2.318542 183 | 12. 8. 59. 2.260322 184 | 12. 9. 134. 2.663305 185 | 12. 10. 152. 2.731296 186 | 12. 11. 27. 1.933182 187 | 12. 12. 14. 1.695218 188 | 12. 13. 230. 2.967191 189 | 12. 14. 66. 2.311579 190 | 12. 15. 61. 2.275443 191 | 12. 16. 34. 2.024397 192 | -------------------------------------------------------------------------------- /hpd.py: -------------------------------------------------------------------------------- 1 | """ 2 | This code was taken form the PyMC library https://github.com/pymc-devs/pymc 3 | """ 4 | 5 | import numpy as np 6 | 7 | def calc_min_interval(x, alpha): 8 | """Internal method to determine the minimum interval of a given width 9 | Assumes that x is sorted numpy array. 10 | """ 11 | 12 | n = len(x) 13 | cred_mass = 1.0-alpha 14 | 15 | interval_idx_inc = int(np.floor(cred_mass*n)) 16 | n_intervals = n - interval_idx_inc 17 | interval_width = x[interval_idx_inc:] - x[:n_intervals] 18 | 19 | if len(interval_width) == 0: 20 | raise ValueError('Too few elements for interval calculation') 21 | 22 | min_idx = np.argmin(interval_width) 23 | hdi_min = x[min_idx] 24 | hdi_max = x[min_idx+interval_idx_inc] 25 | return hdi_min, hdi_max 26 | 27 | 28 | def hpd(x, alpha=0.05): 29 | """Calculate highest posterior density (HPD) of array for given alpha. 30 | The HPD is the minimum width Bayesian credible interval (BCI). 31 | :Arguments: 32 | x : Numpy array 33 | An array containing MCMC samples 34 | alpha : float 35 | Desired probability of type I error (defaults to 0.05) 36 | 37 | """ 38 | 39 | # Make a copy of trace 40 | x = x.copy() 41 | # For multivariate node 42 | if x.ndim > 1: 43 | # Transpose first, then sort 44 | tx = np.transpose(x, list(range(x.ndim))[1:]+[0]) 45 | dims = np.shape(tx) 46 | # Container list for intervals 47 | intervals = np.resize(0.0, dims[:-1]+(2,)) 48 | 49 | for index in make_indices(dims[:-1]): 50 | try: 51 | index = tuple(index) 52 | except TypeError: 53 | pass 54 | 55 | # Sort trace 56 | sx = np.sort(tx[index]) 57 | # Append to list 58 | intervals[index] = calc_min_interval(sx, alpha) 59 | # Transpose back before returning 60 | return np.array(intervals) 61 | else: 62 | # Sort univariate node 63 | sx = np.sort(x) 64 | return np.array(calc_min_interval(sx, alpha)) 65 | -------------------------------------------------------------------------------- /plot_post.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import numpy as np 3 | from scipy import stats 4 | import matplotlib.pyplot as plt 5 | from hpd import hpd 6 | 7 | 8 | def plot_post(param_sample_vec, cred_mass=0.95, comp_val=False, 9 | ROPE=False, ylab='', xlab='parameter', fontsize=14, labelsize=14, 10 | title='', framealpha=1, facecolor='skyblue', edgecolor='white', 11 | show_mode=True, bins=50): 12 | 13 | #compute HDI 14 | HDI = hpd(param_sample_vec, 1-cred_mass) 15 | 16 | post_summary = {'mean':0,'median':0,'mode':0, 'hdi_mass':0,'hdi_low':0, 17 | 'hdi_high':0, 'comp_val':0, 'pc_gt_comp_val':0, 'ROPE_low':0, 18 | 'ROPE_high':0, 'pc_in_ROPE':0} 19 | post_summary['mean'] = np.mean(param_sample_vec) 20 | post_summary['median'] = np.median(param_sample_vec) 21 | post_summary['mode'] = stats.mode(param_sample_vec)[0] 22 | post_summary['hdi_mass'] = cred_mass 23 | post_summary['hdi_low'] = HDI[0] 24 | post_summary['hdi_high'] = HDI[1] 25 | 26 | # Plot histogram. 27 | n, bins, patches = plt.hist(param_sample_vec, normed=True, bins=bins, 28 | edgecolor=edgecolor, facecolor=facecolor) 29 | plt.xlabel(xlab, fontsize=fontsize) 30 | plt.ylabel(ylab, fontsize=fontsize) 31 | plt.title(title, fontsize=fontsize) 32 | 33 | cv_ht = 0.75*np.max(n) 34 | cen_tend_ht = 0.9 * cv_ht 35 | ROPE_text_ht = 0.55 * cv_ht 36 | # # Display mean or mode: 37 | if show_mode: 38 | plt.plot(0, label='mode = %.2f' % post_summary['mode'], alpha=0) 39 | else: 40 | plt.plot(0, label='mean = %.2f' % post_summary['mean'], alpha=0) 41 | # Display the comparison value. 42 | 43 | if comp_val is not False: 44 | pc_gt_comp_val = 100 * np.sum(param_sample_vec > comp_val)/len(param_sample_vec) 45 | pc_lt_comp_val = 100 - pc_gt_comp_val 46 | plt.plot([comp_val, comp_val], [0, cv_ht], color='darkgreen', 47 | linestyle='--', linewidth=2, 48 | label='%.1f%% <%.1f < %.1f%%' 49 | % (pc_lt_comp_val, comp_val, pc_gt_comp_val)) 50 | post_summary['comp_val'] = comp_val 51 | post_summary['pc_gt_comp_val'] = pc_gt_comp_val 52 | # # Display the ROPE. 53 | if ROPE is not False: 54 | rope_col = 'darkred' 55 | pc_in_ROPE = round(np.sum((param_sample_vec > ROPE[0]) & (param_sample_vec < ROPE[1]))/len(param_sample_vec)*100) 56 | plt.plot([ROPE[0], ROPE[0]], [0, 0.96*ROPE_text_ht], color=rope_col, 57 | linestyle=':', linewidth=4, 58 | label='%.1f%% in ROPE' % pc_in_ROPE) 59 | plt.plot([ROPE[1], ROPE[1]], [0, 0.96*ROPE_text_ht], color=rope_col, 60 | linestyle=':', linewidth=4) 61 | post_summary['ROPE_low'] = ROPE[0] 62 | post_summary['ROPE_high'] = ROPE[1] 63 | post_summary['pc_in_ROPE'] = pc_in_ROPE 64 | # # Display the HDI. 65 | plt.plot(HDI, [0, 0], linewidth=6, color='k', label='HDI %.1f%% %.3f-%.3f' % (cred_mass*100, HDI[0], HDI[1])) 66 | plt.legend(loc='upper left', fontsize=labelsize, framealpha=framealpha) 67 | frame = plt.gca() 68 | frame.axes.get_yaxis().set_ticks([]) 69 | return post_summary 70 | 71 | --------------------------------------------------------------------------------