├── 02_SimpleGraph.py
├── 03_IntegralOfDensity.py
├── 03_RunningProportion.py
├── 04_BayesUpdate.py
├── 05_BernBeta.py
├── 05_BetaPosteriorPredictions.py
├── 06_BernGrid.py
├── 07_BernBetaPyMCFull.py
├── 07_BernMetropolisTemplate.py
├── 08_BernTwoGrid.py
├── 08_BernTwoMetropolis.py
├── 08_BernTwoPyMC.py
├── 09_BernBetaMuKappaPyMC.py
├── 09_BernBetaMuKappaPyMC_TT.py
├── 09_FilconPyMC.py
├── 09_FilconPyMC_ex9.2.A.py
├── 09_FilconPyMC_ex9.2.B.py
├── 10_BernBetaModelCompPyMC.py
├── 10_FilconModelCompPyMC.py
├── 10_ToyModelCompPyMC.py
├── 12_OneOddGroupModelComp.py
├── 13_minNforHDIpower.py
├── 15_SystemsPyMC.py
├── 15_YmetricXsinglePyMC.py
├── 16_SimpleLinearRegressionPyMC.py
├── 16_SimpleRobustLinearRegressionPyMC.py
├── 17_MultiLinRegressHyperPyMC.py
├── 17_MultipleLinearRegressionPyMC.py
├── 18_ANOVAonewayNonhomogvarBrugs.py
├── 18_ANOVAonewayPyMC.py
├── 19_ANOVAtwowayPyMC.py
├── Figures
    ├── Figure_10.2.png
    ├── Figure_10.3-4.png
    ├── Figure_12.5.png
    ├── Figure_15.9.png
    ├── Figure_16.2.png
    ├── Figure_16.4.png
    ├── Figure_16.5.png
    ├── Figure_16.6.png
    ├── Figure_16.8a.png
    ├── Figure_16.8b.png
    ├── Figure_16.8c.png
    ├── Figure_16.8d.png
    ├── Figure_17.5a.png
    ├── Figure_17.5b.png
    ├── Figure_17.Xa.png
    ├── Figure_17.Xb.png
    ├── Figure_18.2a.png
    ├── Figure_18.2b.png
    ├── Figure_18.3.png
    ├── Figure_19.4.png
    ├── Figure_19.5.png
    ├── Figure_2.2.png
    ├── Figure_3.1.png
    ├── Figure_3.3.png
    ├── Figure_4.1.png
    ├── Figure_4.2.png
    ├── Figure_4.3.png
    ├── Figure_5.2.png
    ├── Figure_6.1.png
    ├── Figure_6.2.png
    ├── Figure_6.3.png
    ├── Figure_7.3.png
    ├── Figure_7.4.png
    ├── Figure_7.5.png
    ├── Figure_7.6_a.png
    ├── Figure_7.6_b.png
    ├── Figure_7.6_c.png
    ├── Figure_8.1.png
    ├── Figure_8.2.png
    ├── Figure_8.3.png
    ├── Figure_8.3_HDI.png
    ├── Figure_8.6.png
    ├── Figure_9.11.png
    ├── Figure_9.12.png
    ├── Figure_9.14.png
    ├── Figure_9.16.png
    ├── Figure_9.16b.png
    ├── Figure_9.18_lower.png
    ├── Figure_9.18_upper.png
    └── figure_15.3.png
├── Guber1999data.txt
├── HDI_of_grid.py
├── HDIofICDF.py
├── HtWtDataGenerator.py
├── IPython
    └── Kruschkes_Doing_Bayesian_Data_Analysis_in_PyMC3.ipynb
├── McDonaldSK1991data.txt
├── McIntyre1994data.csv
├── QianS2007SeaweedData.txt
├── README.md
├── Salary.csv
├── SolariLS2008data.txt
├── Systems.txt
├── hpd.py
└── plot_post.py


/02_SimpleGraph.py:
--------------------------------------------------------------------------------
 1 | """
 2 | A simple graph drawn by Python :-)
 3 | """
 4 | import numpy as np
 5 | import matplotlib.pyplot as plt
 6 | plt.style.use('seaborn-darkgrid')
 7 | 
 8 | x = np.linspace(-2, 2, 40)
 9 | y = x**2
10 | 
11 | plt.plot(x, y)
12 | plt.savefig('Figure_2.2.png')
13 | plt.show()
14 | 


--------------------------------------------------------------------------------
/03_IntegralOfDensity.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Graph of normal probability density function, with comb of intervals.
 3 | """
 4 | import matplotlib.pyplot as plt
 5 | plt.style.use('seaborn-darkgrid')
 6 | import numpy as np
 7 | 
 8 | meanval = 0.0              # Specify mean of distribution.
 9 | sdval = 0.2                # Specify standard deviation of distribution.
10 | xlow = meanval - 3 * sdval  # Specify low end of x-axis.
11 | xhigh = meanval + 3 * sdval  # Specify high end of x-axis.
12 | dx = 0.02                  # Specify interval width on x-axis
13 | # Specify comb points along the x axis:
14 | x = np.arange(xlow, xhigh, dx)
15 | # Compute y values, i.e., probability density at each value of x:
16 | y = (1/(sdval*np.sqrt(2*np.pi))) * np.exp(-.5 * ((x - meanval)/sdval)**2)
17 | # Plot the function. "plot" draws the bell curve. "stem" draws the intervals.
18 | plt.plot(x, y)
19 | plt.stem(x, y, markerfmt=' ')
20 | 
21 | plt.xlabel('$x$')
22 | plt.ylabel('$p(x)$')
23 | plt.title('Normal Probability Density')
24 | # Approximate the integral as the sum of width * height for each interval.
25 | area = np.sum(dx*y)
26 | # Display info in the graph.
27 | plt.text(-.6, 1.7, '$\mu$ = %s' % meanval)
28 | plt.text(-.6, 1.5, '$\sigma$ = %s' % sdval)
29 | plt.text(.2, 1.7, '$\Delta x$ = %s' % dx)
30 | plt.text(.2, 1.5, '$\sum_{x}$ $\Delta x$ $p(x)$ = %5.3f' % area)
31 | 
32 | plt.savefig('Figure_3.3.png')
33 | 


--------------------------------------------------------------------------------
/03_RunningProportion.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Goal: Toss a coin N times and compute the running proportion of heads.
 3 | """
 4 | import matplotlib.pyplot as plt
 5 | plt.style.use('seaborn-darkgrid')
 6 | import numpy as np
 7 | 
 8 | # Specify the total number of flips, denoted N.
 9 | N = 500
10 | # Generate a random sample of N flips for a fair coin (heads=1, tails=0);
11 | np.random.seed(47405)
12 | flip_sequence = np.random.choice(a=(0, 1), p=(.5, .5), size=N, replace=True)
13 | # Compute the running proportion of heads:
14 | r = np.cumsum(flip_sequence)
15 | n = np.linspace(1, N, N)  # n is a vector.
16 | run_prop = r/n  # component by component division.
17 | 
18 | # Graph the running proportion:
19 | plt.plot(n, run_prop, '-o', )
20 | plt.xscale('log')  # an alternative to plot() and xscale() is semilogx()
21 | plt.xlim(1, N)
22 | plt.ylim(0, 1)
23 | plt.xlabel('Flip Number')
24 | plt.ylabel('Proportion Heads')
25 | plt.title('Running Proportion of Heads')
26 | # Plot a dotted horizontal line at y=.5, just as a reference line:
27 | plt.axhline(y=.5, ls='dashed')
28 | 
29 | # Display the beginning of the flip sequence.
30 | flipletters = ''
31 | for i in flip_sequence[:10]:
32 |     if i == 1:
33 |         flipletters += 'H'
34 |     else:
35 |         flipletters += 'T'
36 | 
37 | plt.text(10, 0.8, 'Flip Sequence = %s...' % flipletters)
38 | # Display the relative frequency at the end of the sequence.
39 | plt.text(25, 0.2, 'End Proportion = %s' % run_prop[-1])
40 | 
41 | plt.savefig('Figure_3.1.png')
42 | 


--------------------------------------------------------------------------------
/04_BayesUpdate.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Bayesian updating of beliefs about the bias of a coin. The prior and posterior
 3 | distributions indicate probability masses at discrete candidate values of theta.
 4 | """
 5 | import numpy as np
 6 | import matplotlib.pyplot as plt
 7 | plt.style.use('seaborn-darkgrid')
 8 | 
 9 | 
10 | 
11 | # theta is the vector of candidate values for the parameter theta.
12 | # n_theta_vals is the number of candidate theta values.
13 | # To produce the examples in the book, set n_theta_vals to either 3 or 63.
14 | n_theta_vals = 3.
15 | # Now make the vector of theta values:
16 | theta = np.linspace(1/(n_theta_vals +1), n_theta_vals /(n_theta_vals +1), n_theta_vals )
17 | 
18 | # p_theta is the vector of prior probabilities on the theta values.
19 | p_theta = np.minimum(theta, 1-theta)  # Makes a triangular belief distribution.
20 | p_theta = p_theta / np.sum(p_theta)     # Makes sure that beliefs sum to 1.
21 | 
22 | # Specify the data. To produce the examples in the book, use either
23 | # data = np.repeat([1,0], [3, 9]) or data = np.repeat([1,0], [1, 11])
24 | data = np.repeat([1, 0], [3, 9])
25 | n_heads = np.sum(data)
26 | n_tails = len(data) - n_heads
27 | 
28 | # Compute the likelihood of the data for each value of theta:
29 | p_data_given_theta = theta**n_heads * (1-theta)**n_tails
30 | 
31 | # Compute the posterior:
32 | p_data = np.sum(p_data_given_theta * p_theta)
33 | p_theta_given_data = p_data_given_theta * p_theta / p_data   # This is Bayes' rule!
34 | 
35 | # Plot the results.
36 | plt.figure(figsize=(12, 11))
37 | plt.subplots_adjust(hspace=0.7)
38 | 
39 | # Plot the prior:
40 | plt.subplot(3, 1, 1)
41 | plt.stem(theta, p_theta, markerfmt=' ')
42 | plt.xlim(0, 1)
43 | plt.xlabel('$\\theta$')
44 | plt.ylabel('$P(\\theta)$')
45 | plt.title('Prior')
46 | # Plot the likelihood:
47 | plt.subplot(3, 1, 2)
48 | plt.stem(theta, p_data_given_theta, markerfmt=' ')
49 | plt.xlim(0, 1)
50 | plt.xlabel('$\\theta$')
51 | plt.ylabel('$P(D|\\theta)$')
52 | plt.title('Likelihood')
53 | plt.text(0.6, np.max(p_data_given_theta)/2, 'D = %sH,%sT' % (n_heads, n_tails))
54 | # Plot the posterior:
55 | plt.subplot(3, 1, 3)
56 | plt.stem(theta, p_theta_given_data, markerfmt=' ')
57 | plt.xlim(0, 1)
58 | plt.xlabel('$\\theta$')
59 | plt.ylabel('$P(\\theta|D)$')
60 | plt.title('Posterior')
61 | plt.text(0.6, np.max(p_theta_given_data)/2, 'P(D) = %g' % p_data)
62 | 
63 | plt.savefig('Figure_4.1.png')
64 | 


--------------------------------------------------------------------------------
/05_BernBeta.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Inferring a binomial proportion via exact mathematical analysis.
  3 | """
  4 | import sys
  5 | import numpy as np
  6 | from scipy.stats import beta
  7 | from scipy.special import beta as beta_func
  8 | import matplotlib.pyplot as plt
  9 | plt.style.use('seaborn-darkgrid')
 10 | from HDIofICDF import *
 11 | 
 12 | 
 13 | def bern_beta(prior_shape, data_vec, cred_mass=0.95):
 14 |     """Bayesian updating for Bernoulli likelihood and beta prior.
 15 |      Input arguments:
 16 |        prior_shape
 17 |          vector of parameter values for the prior beta distribution.
 18 |        data_vec
 19 |          vector of 1's and 0's.
 20 |        cred_mass
 21 |          the probability mass of the HDI.
 22 |      Output:
 23 |        post_shape
 24 |          vector of parameter values for the posterior beta distribution.
 25 |      Graphics:
 26 |        Creates a three-panel graph of prior, likelihood, and posterior
 27 |        with highest posterior density interval.
 28 |      Example of use:
 29 |      post_shape = bern_beta(prior_shape=[1,1] , data_vec=[1,0,0,1,1])"""
 30 | 
 31 |     # Check for errors in input arguments:
 32 |     if len(prior_shape) != 2:
 33 |         sys.exit('prior_shape must have two components.')
 34 |     if any([i < 0 for i in prior_shape]):
 35 |         sys.exit('prior_shape components must be positive.')
 36 |     if any([i != 0 and i != 1 for i in data_vec]):
 37 |         sys.exit('data_vec must be a vector of 1s and 0s.')
 38 |     if cred_mass <= 0 or cred_mass >= 1.0:
 39 |         sys.exit('cred_mass must be between 0 and 1.')
 40 | 
 41 |     # Rename the prior shape parameters, for convenience:
 42 |     a = prior_shape[0]
 43 |     b = prior_shape[1]
 44 |     # Create summary values of the data:
 45 |     z = sum(data_vec[data_vec == 1])  # number of 1's in data_vec
 46 |     N = len(data_vec)   # number of flips in data_vec
 47 |     # Compute the posterior shape parameters:
 48 |     post_shape = [a+z, b+N-z]
 49 |     # Compute the evidence, p(D):
 50 |     p_data = beta_func(z+a, N-z+b)/beta_func(a, b)
 51 |     # Construct grid of theta values, used for graphing.
 52 |     bin_width = 0.005  # Arbitrary small value for comb on theta.
 53 |     theta = np.arange(bin_width/2, 1-(bin_width/2)+bin_width, bin_width)
 54 |     # Compute the prior at each value of theta.
 55 |     p_theta = beta.pdf(theta, a, b)
 56 |     # Compute the likelihood of the data at each value of theta.
 57 |     p_data_given_theta = theta**z * (1-theta)**(N-z)
 58 |     # Compute the posterior at each value of theta.
 59 |     post_a = a + z
 60 |     post_b = b+N-z
 61 |     p_theta_given_data = beta.pdf(theta, a+z, b+N-z)
 62 |     # Determine the limits of the highest density interval
 63 |     intervals = HDIofICDF(beta, cred_mass, a=post_shape[0], b=post_shape[1])
 64 | 
 65 |     # Plot the results.
 66 |     plt.figure(figsize=(12, 12))
 67 |     plt.subplots_adjust(hspace=0.7)
 68 | 
 69 |     # Plot the prior.
 70 |     locx = 0.05
 71 |     plt.subplot(3, 1, 1)
 72 |     plt.plot(theta, p_theta)
 73 |     plt.xlim(0, 1)
 74 |     plt.ylim(0, np.max(p_theta)*1.2)
 75 |     plt.xlabel(r'$\theta$')
 76 |     plt.ylabel(r'$P(\theta)$')
 77 |     plt.title('Prior')
 78 |     plt.text(locx, np.max(p_theta)/2, r'beta($\theta$;%s,%s)' % (a, b))
 79 |     # Plot the likelihood:
 80 |     plt.subplot(3, 1, 2)
 81 |     plt.plot(theta, p_data_given_theta)
 82 |     plt.xlim(0, 1)
 83 |     plt.ylim(0, np.max(p_data_given_theta)*1.2)
 84 |     plt.xlabel(r'$\theta$')
 85 |     plt.ylabel(r'$P(D|\theta)$')
 86 |     plt.title('Likelihood')
 87 |     plt.text(locx, np.max(p_data_given_theta)/2, 'Data: z=%s, N=%s' % (z, N))
 88 |     # Plot the posterior:
 89 |     plt.subplot(3, 1, 3)
 90 |     plt.plot(theta, p_theta_given_data)
 91 |     plt.xlim(0, 1)
 92 |     plt.ylim(0, np.max(p_theta_given_data)*1.2)
 93 |     plt.xlabel(r'$\theta$')
 94 |     plt.ylabel(r'$P(\theta|D)$')
 95 |     plt.title('Posterior')
 96 |     locy = np.linspace(0, np.max(p_theta_given_data), 5)
 97 |     plt.text(locx, locy[1], r'beta($\theta$;%s,%s)' % (post_a, post_b))
 98 |     plt.text(locx, locy[2], 'P(D) = %g' % p_data)
 99 |     # Plot the HDI
100 |     plt.text(locx, locy[3],
101 |              'Intervals = %.3f - %.3f' % (intervals[0], intervals[1]))
102 |     plt.fill_between(theta, 0, p_theta_given_data,
103 |                     where=np.logical_and(theta > intervals[0],
104 |                     theta < intervals[1]),
105 |                         color='blue', alpha=0.3)
106 |     return intervals
107 | 
108 | data_vec = np.repeat([1, 0], [11, 3])  # 11 heads, 3 tail
109 | intervals = bern_beta(prior_shape=[100, 100], data_vec=data_vec)
110 | plt.savefig('Figure_5.2.png')
111 | plt.show()
112 | 
113 | 


--------------------------------------------------------------------------------
/05_BetaPosteriorPredictions.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Posterior predictive check. Examine the veracity of the winning model by
 3 | simulating data sampled from the winning model and see if the simulated data
 4 | 'look like' the actual data.
 5 | """
 6 | import numpy as np
 7 | from scipy.stats import beta
 8 | import matplotlib.pyplot as plt
 9 | plt.style.use('seaborn-darkgrid')
10 | 
11 | # Specify known values of prior and actual data.
12 | prior_a = 100
13 | prior_b = 1
14 | actual_data_Z = 8
15 | actual_data_N = 12
16 | # Compute posterior parameter values.
17 | post_a = prior_a + actual_data_Z
18 | post_b = prior_b + actual_data_N - actual_data_Z
19 | # Number of flips in a simulated sample should match the actual sample size:
20 | sim_sample_size = actual_data_N
21 | # Designate an arbitrarily large number of simulated samples.
22 | n_sim_samples = 1000
23 | # Set aside a vector in which to store the simulation results.
24 | sim_sample_Z_record = np.zeros(n_sim_samples)
25 | # Now generate samples from the posterior.
26 | for sample_idx in range(0, n_sim_samples):
27 |     # Generate a theta value for the new sample from the posterior.
28 |     sample_theta = beta.rvs(post_a, post_b)
29 |     # Generate a sample, using sample_theta.
30 |     sample_data = np.random.choice([0, 1], p=[1-sample_theta, sample_theta],
31 |                                   size=sim_sample_size, replace=True)
32 |     sim_sample_Z_record[sample_idx] = sum(sample_data)
33 | 
34 | 
35 | ## Make a histogram of the number of heads in the samples.
36 | plt.hist(sim_sample_Z_record)
37 | plt.show()
38 | 


--------------------------------------------------------------------------------
/06_BernGrid.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Inferring a binomial proportion via grid approximation.
  3 | """
  4 | import matplotlib.pyplot as plt
  5 | plt.style.use('seaborn-darkgrid')
  6 | import numpy as np
  7 | from hpd import hpd
  8 | 
  9 | 
 10 | def bern_grid(theta, p_theta, data, credib=.95):
 11 |     """
 12 |     Bayesian updating for Bernoulli likelihood and prior specified on a grid.
 13 |     Input arguments:
 14 |      theta is a vector of theta values, all between 0 and 1.
 15 |      p_theta is a vector of corresponding probability _masses_.
 16 |      data is a vector of 1's and 0's, where 1 corresponds to a and 0 to b.
 17 |      credib is the probability mass of the credible interval, default is 0.95.
 18 |     Output:
 19 |      p_theta_given_data is a vector of posterior probability masses over theta.
 20 |      Also creates a three-panel graph of prior, likelihood, and posterior
 21 |      probability masses with credible interval.
 22 |     Example of use:
 23 |      Create vector of theta values.
 24 |      bin_width = 1/1000 
 25 |      theta_grid = np.arange(0, 1+bin_width, bin_width)
 26 |      Specify probability mass at each theta value.
 27 |      > rel_prob = np.minimum(theta_grid, 1-theta_grid) relative prob at each theta
 28 |      > prior = rel_prob / sum(rel_prob) probability mass at each theta
 29 |      Specify the data vector.
 30 |      data_vec = np.repeat([1, 0], [11, 3])  # 3 heads, 1 tail
 31 |      Call the function.
 32 |      > posterior = bern_grid( theta=theta_grid , p_theta=prior , data=data_vec )
 33 |     """
 34 | 
 35 | # Create summary values of data
 36 |     z = sum(data[data == 1])  # number of 1's in data
 37 |     N = len(data)  # number of flips in data
 38 | # Compute the likelihood of the data for each value of theta.
 39 |     p_data_given_theta = theta**z * (1 - theta)**(N - z)
 40 | # Compute the evidence and the posterior.
 41 |     p_data = sum(p_data_given_theta * p_theta)
 42 |     p_theta_given_data = p_data_given_theta * p_theta / p_data
 43 |     # Determine the limits of the highest density interval
 44 |     x = np.random.choice(theta, size=5000, replace=True, p=p_theta_given_data)
 45 |     intervals = hpd(x, alpha=1-credib)
 46 | 
 47 | # Plot the results.
 48 |     plt.figure(figsize=(12, 12))
 49 |     plt.subplots_adjust(hspace=0.7)
 50 | 
 51 | #    # Plot the prior.
 52 |     locx = 0.05
 53 |     mean_theta = sum(theta * p_theta)  # mean of prior, for plotting
 54 |     plt.subplot(3, 1, 1)
 55 |     plt.plot(theta, p_theta)
 56 |     plt.xlim(0, 1)
 57 |     plt.ylim(0, np.max(p_theta)*1.2)
 58 |     plt.xlabel(r'$\theta$')
 59 |     plt.ylabel(r'$P(\theta)$')
 60 |     plt.title('Prior')
 61 |     plt.text(locx, np.max(p_theta)/2, r'mean($\theta$;%5.2f)' % mean_theta)
 62 |     # Plot the likelihood:
 63 |     plt.subplot(3, 1, 2)
 64 |     plt.plot(theta, p_data_given_theta)
 65 |     plt.xlim(0, 1)
 66 |     plt.ylim(0, np.max(p_data_given_theta)*1.2)
 67 |     plt.xlabel(r'$\theta$')
 68 |     plt.ylabel(r'$P(D|\theta)$')
 69 |     plt.title('Likelihood')
 70 |     plt.text(locx, np.max(p_data_given_theta)/2, 'data: z=%s, N=%s' % (z, N))
 71 |     # Plot the posterior:
 72 |     mean_theta_given_data = sum(theta * p_theta_given_data)
 73 |     plt.subplot(3, 1, 3)
 74 |     plt.plot(theta, p_theta_given_data)
 75 |     plt.xlim(0, 1)
 76 |     plt.ylim(0, np.max(p_theta_given_data)*1.2)
 77 |     plt.xlabel(r'$\theta$')
 78 |     plt.ylabel(r'$P(\theta|D)$')
 79 |     plt.title('Posterior')
 80 |     loc = np.linspace(0, np.max(p_theta_given_data), 5)
 81 |     plt.text(locx, loc[1], r'mean($\theta$;%5.2f)' % mean_theta_given_data)
 82 |     plt.text(locx, loc[2], 'P(D) = %g' % p_data)
 83 |     # Plot the HDI
 84 |     plt.text(locx, loc[3],
 85 |              'Intervals =%s' % ', '.join('%.3f' % x for x in intervals))
 86 |     for i in range(0, len(intervals), 2):
 87 |         plt.fill_between(theta, 0, p_theta_given_data,
 88 |                          where=np.logical_and(theta > intervals[i],
 89 |                                               theta < intervals[i+1]),
 90 |                          color='blue', alpha=0.3)
 91 |     plt.savefig('Figure_6.1.png')
 92 |     plt.show()
 93 |     return p_theta_given_data
 94 | 
 95 | 
 96 | ###Create vector of theta values.
 97 | bin_width = 1/1000.
 98 | theta_grid = np.arange(0, 1+bin_width, bin_width)
 99 | ##Specify probability mass at each theta value.
100 | rel_prob = np.array([0.1] * len(theta_grid))  # uniform prior
101 | rel_prob = np.array([0.1] * len(theta_grid))  # uniform prior
102 | prior = rel_prob / sum(rel_prob)  # probability mass at each theta
103 | 
104 | 
105 | #### figure 6.2 ###
106 | #np.random.seed(123)
107 | #a = [0.1] * 50
108 | #b = np.linspace(0.1, 1, 50)
109 | #c = np.linspace(1, 0.1, 50)
110 | #d = [0.1] * 50
111 | #p_theta = np.concatenate((a, b, c, d))
112 | #prior = np.where(p_theta != 0 , p_theta / sum(p_theta), 0.)
113 | #width = 1. / len(p_theta)
114 | #theta_grid = np.arange(width/2 , (1-width/2)+width, width)
115 | 
116 | ### figure 6.3 ###
117 | #np.random.seed(123)
118 | #a = np.repeat([0], [50])
119 | #b = np.linspace(0, 1, 50)
120 | #c = (np.linspace(1, 0, 20))**2
121 | #d = np.random.uniform(size=3)
122 | #e = np.repeat([1], [20])
123 | #p_theta = np.concatenate((a, b, c, d, e))
124 | #prior = np.where(p_theta != 0 , p_theta / sum(p_theta), 0.)
125 | #width = 1. / len(p_theta)
126 | #theta_grid = np.arange(width/2 , (1-width/2)+width, width)
127 | 
128 | ###Specify the data vector.
129 | data_vec = np.repeat([1, 0], [11, 3])  # 3 heads, 1 tail
130 | ###Call the function.
131 | posterior = bern_grid(theta=theta_grid, p_theta=prior, data=data_vec)
132 | 


--------------------------------------------------------------------------------
/07_BernBetaPyMCFull.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Inferring a binomial proportion using PyMC.
 3 | """
 4 | import matplotlib.pyplot as plt
 5 | plt.style.use('seaborn-darkgrid')
 6 | import numpy as np
 7 | import pymc3 as pm
 8 | 
 9 | # Generate the data
10 | y = np.array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0])  # 11 heads and 3 tails
11 | 
12 | 
13 | with pm.Model() as model:
14 |     # define the prior
15 |     theta = pm.Beta('theta', 1., 1.)  # prior
16 |     # define the likelihood
17 |     y = pm.Bernoulli('y', p=theta, observed=y)
18 | 
19 |     # Generate a MCMC chain
20 |     trace = pm.sample(1000)
21 | 
22 | 
23 | # create an array with the posterior sample
24 | theta_sample = trace['theta']
25 | 
26 | fig, ax = plt.subplots(1, 2)
27 | ax[0].plot(theta_sample[:500], np.arange(500), marker='o', color='skyblue')
28 | ax[0].set_xlim(0, 1)
29 | ax[0].set_xlabel(r'$\theta$')
30 | ax[0].set_ylabel('Position in Chain')
31 | 
32 | pm.plot_posterior(theta_sample, ax=ax[1], color='skyblue');
33 | ax[1].set_xlabel(r'$\theta$');
34 | 
35 | # Posterior prediction:
36 | # For each step in the chain, use posterior theta to flip a coin:
37 | y_pred = np.zeros(len(theta_sample))
38 | for i, p_head in enumerate(theta_sample):
39 |     y_pred[i] = np.random.choice([0, 1], p=[1 - p_head, p_head])
40 | 
41 | # Jitter the 0,1 y values for plotting purposes:
42 | y_pred_jittered = y_pred + np.random.uniform(-.05, .05, size=len(theta_sample))
43 | 
44 | # Now plot the jittered values:
45 | plt.figure()
46 | plt.plot(theta_sample[:500], y_pred_jittered[:500], 'C1o')
47 | plt.xlim(-.1, 1.1)
48 | plt.ylim(-.1, 1.1)
49 | plt.xlabel(r'$\theta$')
50 | plt.ylabel('y (jittered)')
51 | 
52 | mean_y = np.mean(y_pred)
53 | mean_theta = np.mean(theta_sample)
54 | 
55 | plt.plot(mean_y, mean_theta, 'k+', markersize=15)
56 | plt.annotate('mean(y) = %.2f\nmean($\\theta$) = %.2f' %
57 |     (mean_y, mean_theta), xy=(mean_y, mean_theta))
58 | plt.plot([0, 1], [0, 1], linestyle='--')
59 | 
60 | plt.savefig('BernBetaPyMCPost.png')
61 | plt.show()
62 | 


--------------------------------------------------------------------------------
/07_BernMetropolisTemplate.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Use this program as a template for experimenting with the Metropolis algorithm
  3 | applied to a single parameter called theta, defined on the interval [0,1].
  4 | """
  5 | from __future__ import division
  6 | import numpy as np
  7 | import pymc3 as pm
  8 | import matplotlib.pyplot as plt
  9 | plt.style.use('seaborn-darkgrid')
 10 | from scipy.stats import beta
 11 | 
 12 | 
 13 | 
 14 | # Specify the data, to be used in the likelihood function.
 15 | # This is a vector with one component per flip,
 16 | # in which 1 means a "head" and 0 means a "tail".
 17 | my_data = np.repeat([1, 0], [11, 3])  # 11 heads, 2 tail
 18 | 
 19 | # Define the Bernoulli likelihood function, p(D|theta).
 20 | # The argument theta could be a vector, not just a scalar.
 21 | def likelihood(theta, data):
 22 |     theta = np.array(theta) # ensure you have an array
 23 |     z = sum(data[data == 1])  # number of 1's in Data
 24 |     N = len(data)  # number of flips in Data
 25 | # Compute the likelihood of the Data for each value of Theta.
 26 |     if np.size(theta) == 1:  # if theta is an scalar
 27 |         p_data_given_theta = 0
 28 |         if theta < 1 and theta > 0:
 29 |             p_data_given_theta = theta**z * (1-theta)**(N-z)
 30 |     else: # if theta is an array
 31 |         p_data_given_theta = theta**z * (1-theta)**(N-z)
 32 |         # The theta values passed into this function are generated at random,
 33 |         # and therefore might be inadvertently greater than 1 or less than 0.
 34 |         # The likelihood for theta > 1 or for theta < 0 is zero:
 35 |         p_data_given_theta[(theta > 1) | (theta < 0)] = 0
 36 |     return p_data_given_theta
 37 | 
 38 | 
 39 | # Define the prior density function. For purposes of computing p(D),
 40 | # at the end of this program, we want this prior to be a proper density.
 41 | # The argument theta could be a vector, not just a scalar.
 42 | def prior(theta):
 43 |     theta = np.array(theta) # ensure you have an array
 44 | # For kicks, here's a bimodal prior. To try it, uncomment the next 2 lines.
 45 |     #from scipy.stats import beta
 46 |     #prior = dbeta(np.minium(2*theta, 2*(1-theta)), 2, 2)
 47 |     if np.size(theta) == 1:  # if theta is an scalar
 48 |         prior = 0
 49 |         if theta < 1 and theta > 0:
 50 |             prior = 1
 51 |     else: # if theta is an array
 52 |         prior = np.ones(len(theta))  # uniform density over [0,1]
 53 |         # The theta values passed into this function are generated at random,
 54 |         # and therefore might be inadvertently greater than 1 or less than 0.
 55 |         # The likelihood for theta > 1 or for theta < 0 is zero:
 56 |         prior[(theta > 1) | (theta < 0)] = 0
 57 |     return prior
 58 | 
 59 | 
 60 | 
 61 | # Define the relative probability of the target distribution, 
 62 | # as a function of vector theta. For our application, this
 63 | # target distribution is the unnormalized posterior distribution.
 64 | def target_rel_prob(theta, data):
 65 |     target_rel_prob = likelihood(theta , data) * prior(theta)
 66 |     return target_rel_prob
 67 | 
 68 | # Specify the length of the trajectory, i.e., the number of jumps to try:
 69 | traj_length = 5000 # arbitrary large number
 70 | # Initialize the vector that will store the results:
 71 | trajectory = np.zeros(traj_length)
 72 | # Specify where to start the trajectory:
 73 | trajectory[0] = 0.50 # arbitrary value
 74 | # Specify the burn-in period:
 75 | burn_in = int(np.ceil(0.1 * traj_length)) # arbitrary number, less than traj_length
 76 | # Initialize accepted, rejected counters, just to monitor performance:
 77 | n_accepted = 0
 78 | n_rejected = 0
 79 | # Specify seed to reproduce same random walk:
 80 | np.random.seed(4745)
 81 | 
 82 | # Now generate the random walk. The 't' index is time or trial in the walk.
 83 | for t in range(traj_length-1):
 84 |     current_position = trajectory[t]
 85 |     # Use the proposal distribution to generate a proposed jump.
 86 |     # The shape and variance of the proposal distribution can be changed
 87 |     # to whatever you think is appropriate for the target distribution.
 88 |     proposed_jump = np.random.normal(loc=0 , scale=0.1, size=1)
 89 |     
 90 | #    # Compute the probability of accepting the proposed jump.
 91 |     prob_accept = np.minimum(1, 
 92 |                             target_rel_prob(current_position + proposed_jump, my_data)
 93 |                             / target_rel_prob(current_position, my_data))
 94 | #    # Generate a random uniform value from the interval [0,1] to
 95 | #    # decide whether or not to accept the proposed jump.
 96 |     if np.random.rand() < prob_accept:
 97 |         # accept the proposed jump
 98 |         trajectory[t+1] = current_position + proposed_jump
 99 |         # increment the accepted counter, just to monitor performance
100 |         if t > burn_in:
101 |             n_accepted += 1
102 |     else:
103 |         # reject the proposed jump, stay at current position
104 |         trajectory[t+1] = current_position
105 |         # increment the rejected counter, just to monitor performance
106 |         if t > burn_in:
107 |             n_rejected += 1
108 | 
109 | 
110 | # Extract the post-burn_in portion of the trajectory.
111 | accepted_traj = trajectory[burn_in:]
112 | # End of Metropolis algorithm.
113 | 
114 | # Display the posterior.
115 | ROPE = np.array([0.76, 0.8])
116 | pm.plot_posterior(accepted_traj,  ref_val=0.9, rope=ROPE)
117 | plt.xlabel = 'theta'
118 | 
119 | 
120 | # Display rejected/accepted ratio in the plot.
121 | mean_traj = np.mean(accepted_traj)
122 | std_traj = np.std(accepted_traj)
123 | plt.plot(0, label=r'$N_{pro}=%s$ $\frac{N_{acc}}{N_{pro}} = %.3f$' % (len(accepted_traj), (n_accepted/len(accepted_traj))), alpha=0)
124 | 
125 | # Evidence for model, p(D).
126 | 
127 | # Compute a,b parameters for beta distribution that has the same mean
128 | # and stdev as the sample from the posterior. This is a useful choice
129 | # when the likelihood function is Bernoulli.
130 | a =   mean_traj   * ((mean_traj*(1 - mean_traj)/std_traj**2) - 1)
131 | b = (1 - mean_traj) * ((mean_traj*(1 - mean_traj)/std_traj**2) - 1)
132 | 
133 | # For every theta value in the posterior sample, compute 
134 | # dbeta(theta,a,b) / likelihood(theta)*prior(theta)
135 | # This computation assumes that likelihood and prior are proper densities,
136 | # i.e., not just relative probabilities. This computation also assumes that
137 | # the likelihood and prior functions were defined to accept a vector argument,
138 | # not just a single-component scalar argument.
139 | wtd_evid = beta.pdf(accepted_traj, a, b) / (likelihood(accepted_traj, my_data) * prior(accepted_traj))
140 | p_data = 1 / np.mean(wtd_evid)
141 | 
142 | 
143 | # Display p(D) in the graph
144 | plt.plot(0, label='p(D) = %.3e' % p_data, alpha=0)
145 | 
146 | 
147 | 
148 | # Uncomment next line if you want to save the graph.
149 | plt.savefig('BernMetropolisTemplate.png')
150 | plt.show()
151 | 


--------------------------------------------------------------------------------
/08_BernTwoGrid.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Inferring two binomial proportions via grid approximation.
  3 | """
  4 | from __future__ import division
  5 | import matplotlib.pyplot as plt
  6 | plt.style.use('seaborn-darkgrid')
  7 | from mpl_toolkits.mplot3d.axes3d import Axes3D
  8 | from scipy.stats import beta
  9 | from HDI_of_grid import HDI_of_grid
 10 | import numpy as np
 11 | 
 12 | 
 13 | # Specify the grid on theta1,theta2 parameter space.
 14 | n_int = 500  # arbitrary number of intervals for grid on theta.
 15 | theta1 = np.linspace(0, 1, n_int)
 16 | theta2 = theta1
 17 | 
 18 | theta1_grid, theta2_grid = np.meshgrid(theta1, theta2)
 19 | 
 20 | # Specify the prior probability _masses_ on the grid.
 21 | prior_name = ("Beta","Ripples","Null","Alt")[0]  # or define your own.
 22 | if prior_name == "Beta":
 23 |     a1, b1, a2, b2 = 3, 3, 3, 3
 24 |     prior1 = beta.pdf(theta1_grid, a1, b1)
 25 |     prior2 = beta.pdf(theta2_grid, a1, b1)
 26 |     prior = prior1 * prior2
 27 |     prior = prior / np.sum(prior)
 28 | 
 29 | if prior_name == "Ripples":
 30 |     m1, m2, k = 0, 1, 0.75 * np.pi
 31 |     prior = np.sin((k*(theta1_grid-m1))**2 + (k*(theta2_grid-m2))**2)**2
 32 |     prior = prior / np.sum(prior)
 33 | 
 34 | if prior_name == "Null":
 35 |     # 1's at theta1=theta2, 0's everywhere else:
 36 |     prior = np.eye(len(theta1_grid), len(theta2_grid))
 37 |     prior = prior / np.sum(prior)
 38 | 
 39 | if prior_name == "Alt":
 40 | #    # Uniform:
 41 |     prior = np.ones((len(theta1_grid), len(theta2_grid)))
 42 |     prior = prior / np.sum(prior)
 43 | 
 44 | # Specify likelihood
 45 | z1, N1, z2, N2 = 5, 7, 2, 7  # data are specified here
 46 | likelihood = theta1_grid**z1 * (1-theta1_grid)**(N1-z1) * theta2_grid**z2 * (1-theta2_grid)**(N2-z2)
 47 | 
 48 | # Compute posterior from point-by-point multiplication and normalizing:
 49 | p_data = np.sum(prior * likelihood)
 50 | posterior = (prior * likelihood) / p_data
 51 | 
 52 | # Specify the probability mass for the HDI region
 53 | credib = .95
 54 | thin = 4
 55 | color = 'skyblue'
 56 | 
 57 | fig = plt.figure(figsize=(12,12))
 58 | 
 59 | # prior
 60 | ax = fig.add_subplot(3, 2, 1, projection='3d')
 61 | ax.plot_surface(theta1_grid[::thin,::thin], theta2_grid[::thin,::thin], prior[::thin,::thin], color=color)
 62 | ax.set_xlabel(r'$\theta1$')
 63 | ax.set_ylabel(r'$\theta1$')
 64 | ax.set_zlabel(r'$p(t1,t2)$')
 65 | ax.set_xticklabels([])
 66 | ax.set_yticklabels([])
 67 | ax.set_zticklabels([])
 68 | 
 69 | plt.subplot(3, 2, 2)
 70 | plt.contour(theta1_grid, theta2_grid, prior, colors=color)
 71 | plt.xlabel(r'$\theta1$')
 72 | plt.ylabel(r'$\theta1$')
 73 | 
 74 | # likelihood
 75 | ax = fig.add_subplot(3, 2, 3, projection='3d')
 76 | ax.plot_surface(theta1_grid[::thin,::thin], theta2_grid[::thin,::thin], likelihood[::thin,::thin], color=color)
 77 | ax.set_xlabel(r'$\theta1$')
 78 | ax.set_ylabel(r'$\theta1$')
 79 | ax.set_zlabel(r'$p(D|t1,t2)$')
 80 | ax.set_xticklabels([])
 81 | ax.set_yticklabels([])
 82 | ax.set_zticklabels([])
 83 | 
 84 | plt.subplot(3, 2, 4)
 85 | plt.contour(theta1_grid, theta2_grid, likelihood, colors=color)
 86 | plt.xlabel(r'$\theta1$')
 87 | plt.ylabel(r'$\theta1$')
 88 | plt.plot(0, label='z1,N1,z2,N2=%s,%s,%s,%s' % (z1, N1, z2, N2), alpha=0)
 89 | plt.legend(loc='upper left')
 90 | 
 91 | # posterior
 92 | ax = fig.add_subplot(3, 2, 5, projection='3d')
 93 | ax.plot_surface(theta1_grid[::thin,::thin], theta2_grid[::thin,::thin],posterior[::thin,::thin], color=color)
 94 | ax.set_xlabel(r'$\theta1$')
 95 | ax.set_ylabel(r'$\theta1$')
 96 | ax.set_zlabel(r'$p(t1,t2|D)$')
 97 | ax.set_xticklabels([])
 98 | ax.set_yticklabels([])
 99 | ax.set_zticklabels([])
100 | 
101 | plt.subplot(3, 2, 6)
102 | plt.contour(theta1_grid, theta2_grid, posterior, colors=color)
103 | plt.xlabel(r'$\theta1$')
104 | plt.ylabel(r'$\theta1$')
105 | plt.plot(0, label='p(D) = %.3e' % p_data, alpha=0)
106 | plt.legend(loc='upper left')
107 | 
108 | # Mark the highest posterior density region
109 | HDI_height = HDI_of_grid(posterior)['height']
110 | plt.contour(theta1_grid, theta2_grid, posterior, levels=[HDI_height], colors='k')
111 | 
112 | plt.tight_layout()
113 | plt.savefig('BernTwoGrid_%s.png' % prior_name)
114 | plt.show()
115 | 


--------------------------------------------------------------------------------
/08_BernTwoMetropolis.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Use this program as a template for experimenting with the Metropolis algorithm
  3 | applied to 2 parameters called theta1,theta2 defined on the domain [0,1]x[0,1].
  4 | """
  5 | from __future__ import division
  6 | import numpy as np
  7 | from scipy.stats import beta
  8 | import matplotlib.pyplot as plt
  9 | plt.style.use('seaborn-darkgrid')
 10 | 
 11 | 
 12 | # Define the likelihood function.
 13 | # The input argument is a vector: theta = [theta1 , theta2]
 14 | 
 15 | def likelihood(theta):
 16 |     # Data are constants, specified here:
 17 |     z1, N1, z2, N2 = 5, 7, 2, 7
 18 |     likelihood = (theta[0]**z1 * (1-theta[0])**(N1-z1)
 19 |                  * theta[1]**z2 * (1-theta[1])**(N2-z2))
 20 |     return likelihood
 21 | 
 22 | 
 23 | # Define the prior density function.
 24 | # The input argument is a vector: theta = [theta1 , theta2]
 25 | def prior(theta):
 26 |     # Here's a beta-beta prior:
 27 |     a1, b1, a2, b2 = 3, 3, 3, 3
 28 |     prior = beta.pdf(theta[0], a1, b1) * beta.pdf(theta[1], a2, b2) 
 29 |     return prior
 30 | 
 31 | 
 32 | # Define the relative probability of the target distribution, as a function 
 33 | # of theta.  The input argument is a vector: theta = [theta1 , theta2].
 34 | # For our purposes, the value returned is the UNnormalized posterior prob.
 35 | def target_rel_prob(theta):
 36 |     if ((theta >= 0.0).all() & (theta <= 1.0).all()):
 37 |         target_rel_probVal =  likelihood(theta) * prior(theta)
 38 |     else:
 39 |         # This part is important so that the Metropolis algorithm
 40 |         # never accepts a jump to an invalid parameter value.
 41 |         target_rel_probVal = 0.0
 42 |     return target_rel_probVal
 43 | #    if ( all( theta >= 0.0 ) & all( theta <= 1.0 ) ) {
 44 | #        target_rel_probVal =  likelihood( theta ) * prior( theta )
 45 | 
 46 | 
 47 | # Specify the length of the trajectory, i.e., the number of jumps to try:
 48 | traj_length = 5000 # arbitrary large number
 49 | # Initialize the vector that will store the results.
 50 | trajectory = np.zeros((traj_length, 2))
 51 | # Specify where to start the trajectory
 52 | trajectory[0, ] = [0.50, 0.50] # arbitrary start values of the two param's
 53 | # Specify the burn-in period.
 54 | burn_in = int(np.ceil(.1 * traj_length)) # arbitrary number
 55 | # Initialize accepted, rejected counters, just to monitor performance.
 56 | n_accepted = 0
 57 | n_rejected = 0
 58 | # Specify the seed, so the trajectory can be reproduced.
 59 | np.random.seed(47405)
 60 | # Specify the covariance matrix for multivariate normal proposal distribution.
 61 | n_dim, sd1, sd2 = 2, 0.2, 0.2
 62 | covar_mat = [[sd1**2, 0], [0, sd2**2]]
 63 | 
 64 | # Now generate the random walk. step is the step in the walk.
 65 | for step in range(traj_length-1):
 66 |     current_position = trajectory[step, ]
 67 |     # Use the proposal distribution to generate a proposed jump.
 68 |     # The shape and variance of the proposal distribution can be changed
 69 |     # to whatever you think is appropriate for the target distribution.
 70 |     proposed_jump = np.random.multivariate_normal(mean=np.zeros((n_dim)),
 71 |                                                  cov=covar_mat)
 72 |     # Compute the probability of accepting the proposed jump.
 73 |     prob_accept = np.minimum(1, target_rel_prob(current_position + proposed_jump)
 74 |                             / target_rel_prob(current_position))
 75 |     # Generate a random uniform value from the interval [0,1] to
 76 |     # decide whether or not to accept the proposed jump.
 77 |     if np.random.rand() < prob_accept:
 78 |         # accept the proposed jump
 79 |         trajectory[step+1, ] = current_position + proposed_jump
 80 |         # increment the accepted counter, just to monitor performance
 81 |         if step > burn_in:
 82 |             n_accepted += 1
 83 |     else:
 84 |         # reject the proposed jump, stay at current position
 85 |         trajectory[step+1, ] = current_position
 86 |         # increment the rejected counter, just to monitor performance
 87 |         if step > burn_in:
 88 |             n_rejected += 1
 89 | 
 90 | # End of Metropolis algorithm.
 91 | 
 92 | #-----------------------------------------------------------------------
 93 | # Begin making inferences by using the sample generated by the
 94 | # Metropolis algorithm.
 95 | 
 96 | # Extract just the post-burnIn portion of the trajectory.
 97 | accepted_traj = trajectory[burn_in:]
 98 | 
 99 | # Compute the means of the accepted points.
100 | mean_traj =  np.mean(accepted_traj, axis=0)
101 | # Compute the standard deviations of the accepted points.
102 | stdTraj =  np.std(accepted_traj, axis=0)
103 | 
104 | # Plot the trajectory of the last 500 sampled values.
105 | plt.plot(accepted_traj[:,0], accepted_traj[:,1], marker='o', alpha=0.3)
106 | plt.xlim(0, 1)
107 | plt.ylim(0, 1)
108 | plt.xlabel(r'$\theta1$')
109 | plt.ylabel(r'$\theta2$')
110 | 
111 | # Display means in plot.
112 | plt.plot(0, label='M = %.3f, %.3f' % (mean_traj[0], mean_traj[1]), alpha=0.0)
113 | # Display rejected/accepted ratio in the plot.
114 | plt.plot(0, label=r'$N_{pro}=%s$ $\frac{N_{acc}}{N_{pro}} = %.3f$' % (len(accepted_traj), (n_accepted/len(accepted_traj))), alpha=0)
115 | 
116 | # Evidence for model, p(D).
117 | # Compute a,b parameters for beta distribution that has the same mean
118 | # and stdev as the sample from the posterior. This is a useful choice
119 | # when the likelihood function is binomial.
120 | a =   mean_traj * ((mean_traj*(1-mean_traj)/stdTraj**2) - np.ones(n_dim))
121 | b = (1-mean_traj) * ( (mean_traj*(1-mean_traj)/stdTraj**2) - np.ones(n_dim))
122 | # For every theta value in the posterior sample, compute 
123 | # beta.pdf(theta, a, b) / likelihood(theta) * prior(theta)
124 | # This computation assumes that likelihood and prior are properly normalized,
125 | # i.e., not just relative probabilities.
126 | 
127 | wtd_evid = np.zeros(np.shape(accepted_traj)[0])
128 | for idx in range(np.shape(accepted_traj)[0]):
129 |     wtd_evid[idx] = (beta.pdf(accepted_traj[idx,0],a[0],b[0] )
130 |         * beta.pdf(accepted_traj[idx,1],a[1],b[1]) /
131 |         (likelihood(accepted_traj[idx,]) * prior(accepted_traj[idx,])))
132 | 
133 | p_data = 1 / np.mean(wtd_evid)
134 | # Display p(D) in the graph
135 | plt.plot(0, label='p(D) = %.3e' % p_data, alpha=0)
136 | plt.legend(loc='upper left')
137 | plt.savefig('Figure_8.3.png')
138 | 
139 | # Estimate highest density region by evaluating posterior at each point.
140 | accepted_traj = trajectory[burn_in:]
141 | npts = np.shape(accepted_traj)[0] 
142 | post_prob = np.zeros((npts))
143 | for ptIdx in range(npts):
144 |     post_prob[ptIdx] = target_rel_prob(accepted_traj[ptIdx,])
145 | 
146 | # Determine the level at which credmass points are above:
147 | credmass = 0.95
148 | waterline = np.percentile(post_prob, (credmass))
149 | 
150 | HDI_points = accepted_traj[post_prob > waterline, ]
151 | 
152 | plt.figure()
153 | plt.plot(HDI_points[:,0], HDI_points[:,1], 'C1o')
154 | plt.xlim(0,1)
155 | plt.ylim(0,1)
156 | plt.xlabel(r'$\theta1$')
157 | plt.ylabel(r'$\theta2$')
158 | 
159 | # Display means in plot.
160 | plt.plot(0, label='M = %.3f, %.3f' % (mean_traj[0], mean_traj[1]), alpha=0.0)
161 | # Display rejected/accepted ratio in the plot.
162 | plt.plot(0, label=r'$N_{pro}=%s$ $\frac{N_{acc}}{N_{pro}} = %.3f$' % (len(accepted_traj), (n_accepted/len(accepted_traj))), alpha=0)
163 | # Display p(D) in the graph
164 | plt.plot(0, label='p(D) = %.3e' % p_data, alpha=0)
165 | plt.legend(loc='upper left')
166 | 
167 | plt.savefig('Figure_8.3_HDI.png')
168 | 
169 | plt.show()
170 | 
171 | 


--------------------------------------------------------------------------------
/08_BernTwoPyMC.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Inferring two binomial proportions using PyMC.
 3 | """
 4 | from __future__ import division
 5 | import matplotlib.pyplot as plt
 6 | plt.style.use('seaborn-darkgrid')
 7 | import numpy as np
 8 | import pymc3 as pm
 9 | 
10 | 
11 | # Generate the data
12 | y1 = np.array([1, 1, 1, 1, 1, 0, 0])  # 5 heads and 2 tails
13 | y2 = np.array([1, 1, 0, 0, 0, 0, 0])  # 2 heads and 5 tails
14 | 
15 | 
16 | with pm.Model() as model:
17 |     # define the prior
18 |     theta1 = pm.Beta('theta1', 3, 3)  # prior
19 |     theta2 = pm.Beta('theta2', 3, 3)  # prior
20 |     # define the likelihood
21 |     y1 = pm.Bernoulli('y1', p=theta1, observed=y1)
22 |     y2 = pm.Bernoulli('y2', p=theta2, observed=y2)
23 | 
24 |     # Generate a MCMC chain
25 |     trace = pm.sample(1000)
26 | 
27 | # create an array with the posterior sample
28 | theta1_sample = trace['theta1']
29 | theta2_sample = trace['theta2']
30 | 
31 | # Plot the trajectory of the last 500 sampled values.
32 | plt.plot(theta1_sample[:-500], theta2_sample[:-500], marker='o',  color='skyblue')
33 | plt.xlim(0, 1)
34 | plt.ylim(0, 1)
35 | plt.xlabel(r'$\theta1$')
36 | plt.ylabel(r'$\theta2$')
37 | 
38 | # Display means in plot.
39 | plt.plot(0, label='M = %.3f, %.3f' % (np.mean(theta1_sample), np.mean(theta2_sample)), alpha=0.0)
40 | 
41 | plt.legend(loc='upper left')
42 | plt.savefig('Figure_8.6.png')
43 | 
44 | # Plot a histogram of the posterior differences of theta values.
45 | theta_diff = theta1_sample - theta2_sample
46 | pm.plot_posterior(theta_diff, ref_val=0.0, bins=30, color='skyblue')
47 | plt.xlabel(r'$\theta_1 - \theta_2$')
48 | plt.savefig('Figure_8.8.png')
49 | 
50 | # For Exercise 8.5:
51 | # Posterior prediction. For each step in the chain, use the posterior thetas 
52 | # to flip the coins.
53 | chain_len = len(theta1_sample)
54 | # Create matrix to hold results of simulated flips:
55 | y_pred = np.zeros((2, chain_len))
56 | for step_idx in range(chain_len):  # step through the chain
57 |     # flip the first coin:
58 |     p_head1 = theta1_sample[step_idx]
59 |     y_pred[0, step_idx] = np.random.choice([0,1], p=[1-p_head1, p_head1])
60 |     # flip the second coin:
61 |     p_head2 = theta2_sample[step_idx]
62 |     y_pred[1, step_idx] = np.random.choice([0,1], p=[1-p_head2, p_head2])
63 | 
64 | 
65 | # Now determine the proportion of times that y1==1 and y2==0
66 | pY1eq1andY2eq0 = sum((y_pred[0] ==1) & (y_pred[1] == 0)) / chain_len
67 | 
68 | print(pY1eq1andY2eq0)
69 | plt.show()
70 | 
71 | 


--------------------------------------------------------------------------------
/09_BernBetaMuKappaPyMC.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Bernoulli Likelihood with Hierarchical Prior!
  3 | """
  4 | import numpy as np
  5 | import pymc3 as pm
  6 | import sys
  7 | from scipy.stats import beta, binom
  8 | import matplotlib.pyplot as plt
  9 | plt.style.use('seaborn-darkgrid')
 10 | 
 11 | 
 12 | # Data for figure 9.11
 13 | N =  [10, 10, 10]  # Number of flips per coin
 14 | z =  [5, 5, 5]  # Number of heads per coin
 15 | ## Data for figure 9.12
 16 | #N =  [10, 10, 10]  # Number of flips per coin
 17 | #z =  [1, 5, 9]  # Number of heads per coin
 18 | 
 19 | ## Data for exercise 9.1
 20 | #ncoins = 50
 21 | #nflipspercoin = 5
 22 | #mu_act = .7
 23 | #kappa_act = 20
 24 | #theta_act = beta.rvs(mu_act*kappa_act+1, (1-mu_act)*kappa_act+1, size=ncoins)
 25 | #z = binom.rvs(n=nflipspercoin, p=theta_act, size=ncoins)
 26 | #N = [nflipspercoin] * ncoins
 27 | 
 28 | 
 29 | # Arrange the data into a more convenient way to feed the PyMC model.
 30 | coin = []  # list/vector index for each coins (from 0 to number of coins)
 31 | y = []  # list/vector with head (1) or tails (0) for each flip.
 32 | for i, flips in enumerate(N):
 33 |     heads = z[i]
 34 |     if  heads > flips:
 35 |         sys.exit("The number of heads can't be greater than the number of flips")
 36 |     else:
 37 |         y = y + [1] * heads + [0] * (flips-heads)
 38 |         coin = coin + [i] * flips
 39 | 
 40 | 
 41 | # Specify the model in PyMC
 42 | with pm.Model() as model:
 43 | # define the hyperparameters
 44 |     mu = pm.Beta('mu', 2, 2)
 45 |     kappa = pm.Gamma('kappa', 1, 0.1)
 46 |     # define the prior
 47 |     theta = pm.Beta('theta', mu * kappa, (1 - mu) * kappa, shape=len(N))
 48 |     # define the likelihood
 49 |     y = pm.Bernoulli('y', p=theta[coin], observed=y)
 50 | 
 51 | #   Generate a MCMC chain
 52 | 
 53 |     trace = pm.sample(1000, progressbar=False)
 54 | 
 55 | 
 56 | ## Check the results.
 57 | 
 58 | ## Print summary for each trace
 59 | #pm.df_summary(trace)
 60 | #pm.df_summary(trace)
 61 | 
 62 | ## Check for mixing and autocorrelation
 63 | pm.autocorrplot(trace, varnames=['mu', 'kappa'])
 64 | #pm.autocorrplot(trace, varnames =[mu, kappa])
 65 | 
 66 | ## Plot KDE and sampled values for each parameter.
 67 | pm.traceplot(trace)
 68 | #pm.traceplot(trace)
 69 | 
 70 | # Create arrays with the posterior sample
 71 | theta1_sample = trace['theta'][:,0]
 72 | theta2_sample = trace['theta'][:,1]
 73 | theta3_sample = trace['theta'][:,2]
 74 | mu_sample = trace['mu']
 75 | kappa_sample = trace['kappa']
 76 | 
 77 | 
 78 | # Scatter plot hyper-parameters
 79 | fig, ax = plt.subplots(4, 3, figsize=(12,12))
 80 | ax[0, 0].scatter(mu_sample, kappa_sample, marker='o', color='skyblue')
 81 | ax[0, 0].set_xlim(0,1)
 82 | ax[0, 0].set_xlabel(r'$\mu$')
 83 | ax[0, 0].set_ylabel(r'$\kappa$')
 84 | 
 85 | # Plot mu histogram
 86 | #plot_post(mu_sample, xlab=r'$\mu$', show_mode=False, labelsize=9, framealpha=0.5)
 87 | 
 88 | pm.plot_posterior(mu_sample, ax=ax[0, 1], color='skyblue')
 89 | ax[0, 1].set_xlabel(r'$\mu$')
 90 | ax[0, 1].set_xlim(0,1)
 91 | 
 92 | # Plot kappa histogram
 93 | #plot_post(kappa_sample, xlab=r'$\kappa$', show_mode=False, labelsize=9, framealpha=0.5)
 94 | pm.plot_posterior(kappa_sample, ax=ax[0, 2], color='skyblue')
 95 | ax[0, 2].set_xlabel(r'$\kappa$')
 96 | 
 97 | # Plot theta 1
 98 | 
 99 | #plot_post(theta1_sample, xlab=r'$\theta1$', show_mode=False, labelsize=9, framealpha=0.5)
100 | pm.plot_posterior(theta1_sample, ax=ax[1, 0], color='skyblue')
101 | ax[1, 0].set_xlabel(r'$\theta1$')
102 | ax[1, 0].set_xlim(0,1)
103 | 
104 | # Scatter theta 1 vs mu
105 | ax[1, 1].scatter(theta1_sample, mu_sample, marker='o', color='skyblue')
106 | ax[1, 1].set_xlim(0,1)
107 | ax[1, 1].set_ylim(0,1)
108 | ax[1, 1].set_xlabel(r'$\theta1$')
109 | ax[1, 1].set_ylabel(r'$\mu$')
110 | 
111 | # Scatter theta 1 vs kappa
112 | ax[1, 2].scatter(theta1_sample, kappa_sample, marker='o', color='skyblue')
113 | ax[1, 2].set_xlim(0,1)
114 | ax[1, 2].set_xlabel(r'$\theta1$')
115 | ax[1, 2].set_ylabel(r'$\kappa$')
116 | 
117 | # Plot theta 2
118 | #plot_post(theta2_sample, xlab=r'$\theta2$', show_mode=False, labelsize=9, framealpha=0.5)
119 | pm.plot_posterior(theta2_sample, ax=ax[2, 0], color='skyblue')
120 | ax[2, 0].set_xlabel(r'$\theta2$')
121 | ax[2, 0].set_xlim(0,1)
122 | 
123 | # Scatter theta 2 vs mu
124 | ax[2, 1].scatter(theta2_sample, mu_sample, marker='o', color='skyblue')
125 | ax[2, 1].set_xlim(0,1)
126 | ax[2, 1].set_ylim(0,1)
127 | ax[2, 1].set_xlabel(r'$\theta2$')
128 | ax[2, 1].set_ylabel(r'$\mu$')
129 | 
130 | # Scatter theta 2 vs kappa
131 | ax[2, 2].scatter(theta2_sample, kappa_sample, marker='o', color='skyblue')
132 | ax[2, 2].set_xlim(0,1)
133 | ax[2, 2].set_xlabel(r'$\theta2$')
134 | ax[2, 2].set_ylabel(r'$\kappa$')
135 | 
136 | # Plot theta 3
137 | 
138 | #plot_post(theta3_sample, xlab=r'$\theta3$', show_mode=False, labelsize=9, framealpha=0.5)
139 | pm.plot_posterior(theta3_sample, ax=ax[3, 0], color='skyblue')
140 | ax[3, 0].set_xlabel(r'$\theta3$')
141 | ax[3, 0].set_xlim(0,1)
142 | 
143 | # Scatter theta 3 vs mu
144 | ax[3, 1].scatter(theta3_sample, mu_sample, marker='o', color='skyblue')
145 | ax[3, 1].set_xlim(0,1)
146 | ax[3, 1].set_ylim(0,1)
147 | ax[3, 1].set_xlabel(r'$\theta3$')
148 | ax[3, 1].set_ylabel(r'$\mu$')
149 | 
150 | # Scatter theta 3 vs kappa
151 | ax[3, 2].scatter(theta3_sample, kappa_sample, marker='o', color='skyblue')
152 | ax[3, 2].set_xlim(0,1)
153 | ax[3, 2].set_xlabel(r'$\theta3$')
154 | ax[3, 2].set_ylabel(r'$\kappa$')
155 | 
156 | plt.tight_layout()
157 | plt.savefig('Figure_9.11.png')
158 | plt.show()
159 | 
160 | 


--------------------------------------------------------------------------------
/09_BernBetaMuKappaPyMC_TT.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Bernoulli Likelihood with Hierarchical Prior. The Therapeutic Touch example.
 3 | """
 4 | import numpy as np
 5 | import pymc3 as pm
 6 | import sys
 7 | import matplotlib.pyplot as plt
 8 | plt.style.use('seaborn-darkgrid')
 9 | 
10 | 
11 | ## Therapeutic touch data:
12 | z =  [1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4,
13 |      5, 5, 5, 5, 5, 5, 5, 6, 6, 7, 7, 7, 8]  # Number of heads per coin
14 | N =  [10] * len(z)  # Number of flips per coin
15 | 
16 | # rearrange the data to load it PyMC model.
17 | coin = []  # list/vector index for each coins (from 0 to number of coins)
18 | y = []  # list/vector with head (1) or tails (0) for each flip.
19 | for i, flips in enumerate(N):
20 |     heads = z[i]
21 |     if  heads > flips:
22 |         sys.exit("The number of heads can't be greater than the number of flips")
23 |     else:
24 |         y = y + [1] * heads + [0] * (flips-heads)
25 |         coin = coin + [i] * flips
26 | 
27 | 
28 | # Specify the model in PyMC
29 | with pm.Model() as model:
30 | # define the hyperparameters
31 |     mu = pm.Beta('mu', 2, 2)
32 |     kappa = pm.Gamma('kappa', 1, 0.1)
33 |     # define the prior
34 |     theta = pm.Beta('theta', mu * kappa, (1 - mu) * kappa, shape=len(N))
35 |     # define the likelihood
36 |     y = pm.Bernoulli('y', p=theta[coin], observed=y)
37 | #   Generate a MCMC chain
38 |     trace = pm.sample(5000, random_seed=123)
39 | 
40 | ## Check the results.
41 | 
42 | ## Print summary for each trace
43 | #pm.df_summary(trace)
44 | 
45 | ## Check for mixing and autocorrelation
46 | pm.autocorrplot(trace, varnames=['mu', 'kappa'])
47 | 
48 | ## Plot KDE and sampled values for each parameter.
49 | pm.traceplot(trace)
50 | #pm.traceplot(trace)
51 | 
52 | # Create arrays with the posterior sample
53 | theta1_sample = trace['theta'][:,0]
54 | theta28_sample = trace['theta'][:,27]
55 | mu_sample = trace['mu']
56 | kappa_sample = trace['kappa']
57 | 
58 | # Plot mu histogram
59 | fig, ax = plt.subplots(2, 2, figsize=(12,12))
60 | pm.plot_posterior(mu_sample, ax=ax[0, 0], color='skyblue')
61 | ax[0, 0].set_xlabel(r'$\mu$')
62 | 
63 | # Plot kappa histogram
64 | pm.plot_posterior(kappa_sample, ax=ax[0, 1], color='skyblue')
65 | ax[0, 1].set_xlabel(r'$\kappa$')
66 | 
67 | # Plot theta 1
68 | pm.plot_posterior(theta1_sample, ax=ax[1, 0], color='skyblue')
69 | ax[1, 0].set_xlabel(r'$\theta1$')
70 | 
71 | # Plot theta 28
72 | pm.plot_posterior(theta1_sample, ax=ax[1, 1], color='skyblue')
73 | ax[1, 1].set_xlabel(r'$\theta28$')
74 | 
75 | 
76 | plt.tight_layout()
77 | plt.savefig('Figure_9.14.png')
78 | plt.show()
79 | 


--------------------------------------------------------------------------------
/09_FilconPyMC.py:
--------------------------------------------------------------------------------
 1 | """
 2 | More Hierarchical models. The filtration-condensation experiment.
 3 | """
 4 | import numpy as np
 5 | import pymc3 as pm
 6 | import sys
 7 | import matplotlib.pyplot as plt
 8 | plt.style.use('seaborn-darkgrid')
 9 | 
10 | # Data
11 | # For each subject, specify the condition s/he was in,
12 | # the number of trials s/he experienced, and the number correct.
13 | ncond = 4
14 | nSubj = 40
15 | trials = 64
16 | 
17 | N = np.repeat([trials], (ncond * nSubj))
18 | z = np.array([45, 63, 58, 64, 58, 63, 51, 60, 59, 47, 63, 61, 60, 51, 59, 45,
19 | 61, 59, 60, 58, 63, 56, 63, 64, 64, 60, 64, 62, 49, 64, 64, 58, 64, 52, 64, 64,
20 | 64, 62, 64, 61, 59, 59, 55, 62, 51, 58, 55, 54, 59, 57, 58, 60, 54, 42, 59, 57,
21 | 59, 53, 53, 42, 59, 57, 29, 36, 51, 64, 60, 54, 54, 38, 61, 60, 61, 60, 62, 55,
22 | 38, 43, 58, 60, 44, 44, 32, 56, 43, 36, 38, 48, 32, 40, 40, 34, 45, 42, 41, 32,
23 | 48, 36, 29, 37, 53, 55, 50, 47, 46, 44, 50, 56, 58, 42, 58, 54, 57, 54, 51, 49,
24 | 52, 51, 49, 51, 46, 46, 42, 49, 46, 56, 42, 53, 55, 51, 55, 49, 53, 55, 40, 46,
25 | 56, 47, 54, 54, 42, 34, 35, 41, 48, 46, 39, 55, 30, 49, 27, 51, 41, 36, 45, 41,
26 | 53, 32, 43, 33])
27 | condition = np.repeat([0,1,2,3], nSubj)
28 | 
29 | # Specify the model in PyMC
30 | with pm.Model() as model:
31 |     kappa = pm.Gamma('kappa', 1, 0.1, shape=ncond)
32 |     mu = pm.Beta('mu', 1, 1, shape=ncond)
33 |     theta = pm.Beta('theta', mu[condition] * kappa[condition], (1 - mu[condition]) * kappa[condition], shape=len(z))
34 |     y = pm.Binomial('y', p=theta, n=N, observed=z)
35 | 
36 |     trace = pm.sample(1000)
37 | 
38 | ## Check the results.
39 | 
40 | ## Print summary for each trace
41 | #pm.df_summary(trace)
42 | #pm.df_summary(trace)
43 | 
44 | ## Check for mixing and autocorrelation
45 | #pm.autocorrplot(trace, varnames=['mu', 'kappa'])
46 | 
47 | ## Plot KDE and sampled values for each parameter.
48 | #pm.traceplot(trace)
49 | pm.traceplot(trace)
50 | 
51 | 
52 | # Create arrays with the posterior sample
53 | mu1_sample = trace['mu'][:,0]
54 | mu2_sample = trace['mu'][:,1]
55 | mu3_sample = trace['mu'][:,2]
56 | mu4_sample = trace['mu'][:,3]
57 | 
58 | 
59 | # Plot differences among filtrations experiments
60 | fig, ax = plt.subplots(1, 3, figsize=(15, 6))
61 | pm.plot_posterior(mu1_sample-mu2_sample, ax=ax[0], color='skyblue')
62 | ax[0].set_xlabel(r'$\mu1-\mu2$')
63 | 
64 | # Plot differences among condensation experiments
65 | pm.plot_posterior(mu3_sample-mu4_sample, ax=ax[1], color='skyblue')
66 | ax[1].set_xlabel(r'$\mu3-\mu4$')
67 | 
68 | # Plot differences between filtration and condensation experiments
69 | a = (mu1_sample+mu2_sample)/2 - (mu3_sample+mu4_sample)/2
70 | pm.plot_posterior(a, ax=ax[2], color='skyblue')
71 | ax[2].set_xlabel(r'$(\mu1+\mu2)/2 - (\mu3+\mu4)/2$')
72 | 
73 | plt.tight_layout()
74 | plt.savefig('Figure_9.16.png')
75 | plt.show()
76 | 


--------------------------------------------------------------------------------
/09_FilconPyMC_ex9.2.A.py:
--------------------------------------------------------------------------------
 1 | """
 2 | More Hierarchical models. The filtration-condensation experiment. A single kappa
 3 | for all conditions.
 4 | """
 5 | import numpy as np
 6 | import pymc3 as pm
 7 | import sys
 8 | import matplotlib.pyplot as plt
 9 | plt.style.use('seaborn-darkgrid')
10 | 
11 | 
12 | # Data
13 | # For each subject, specify the condition s/he was in,
14 | # the number of trials s/he experienced, and the number correct.
15 | ncond = 4
16 | nSubj = 40
17 | trials = 64
18 | 
19 | N = np.repeat([trials], (ncond * nSubj))
20 | z = np.array([45, 63, 58, 64, 58, 63, 51, 60, 59, 47, 63, 61, 60, 51, 59, 45,
21 | 61, 59, 60, 58, 63, 56, 63, 64, 64, 60, 64, 62, 49, 64, 64, 58, 64, 52, 64, 64,
22 | 64, 62, 64, 61, 59, 59, 55, 62, 51, 58, 55, 54, 59, 57, 58, 60, 54, 42, 59, 57,
23 | 59, 53, 53, 42, 59, 57, 29, 36, 51, 64, 60, 54, 54, 38, 61, 60, 61, 60, 62, 55,
24 | 38, 43, 58, 60, 44, 44, 32, 56, 43, 36, 38, 48, 32, 40, 40, 34, 45, 42, 41, 32,
25 | 48, 36, 29, 37, 53, 55, 50, 47, 46, 44, 50, 56, 58, 42, 58, 54, 57, 54, 51, 49,
26 | 52, 51, 49, 51, 46, 46, 42, 49, 46, 56, 42, 53, 55, 51, 55, 49, 53, 55, 40, 46,
27 | 56, 47, 54, 54, 42, 34, 35, 41, 48, 46, 39, 55, 30, 49, 27, 51, 41, 36, 45, 41,
28 | 53, 32, 43, 33])
29 | condition = np.repeat([0,1,2,3], nSubj)
30 | 
31 | # Specify the model in PyMC
32 | with pm.Model() as model:
33 |     # define the hyperparameters
34 |     kappa = pm.Gamma('kappa', 1, 0.1)
35 |     mu = pm.Beta('mu', 1, 1, shape=ncond)
36 |     # define the prior
37 |     theta = pm.Beta('theta', mu[condition] * kappa, (1 - mu[condition]) * kappa, shape=len(z))
38 |     # define the likelihood
39 |     y = pm.Binomial('y', p=theta, n=N, observed=z)
40 |     trace = pm.sample(1000)
41 | 
42 | ## Check the results.
43 | 
44 | ## Print summary for each trace
45 | #pm.df_summary(trace)
46 | 
47 | ## Check for mixing and autocorrelation
48 | #pm.autocorrplot(trace, varnames=['mu', 'kappa'])
49 | 
50 | ## Plot KDE and sampled values for each parameter.
51 | pm.traceplot(trace)
52 | 
53 | # Create arrays with the posterior sample
54 | mu1_sample = trace['mu'][:,0]
55 | mu2_sample = trace['mu'][:,1]
56 | mu3_sample = trace['mu'][:,2]
57 | mu4_sample = trace['mu'][:,3]
58 | 
59 | 
60 | # Plot differences among filtrations experiments
61 | fig, ax = plt.subplots(1, 3, figsize=(15, 6))
62 | pm.plot_posterior((mu1_sample-mu2_sample), ax=ax[0], ref_val=0, color='skyblue')
63 | ax[0].set_xlabel(r'$\mu1-\mu2$')
64 | 
65 | # Plot differences among condensation experiments
66 | pm.plot_posterior((mu3_sample-mu4_sample), ax=ax[1], ref_val=0, color='skyblue')
67 | ax[1].set_xlabel(r'$\mu3-\mu4$')
68 | 
69 | # Plot differences between filtration and condensation experiments
70 | a = (mu1_sample+mu2_sample)/2 - (mu3_sample+mu4_sample)/2
71 | pm.plot_posterior(a, ax=ax[2], ref_val=0, color='skyblue')
72 | ax[2].set_xlabel(r'$(\mu1+\mu2)/2 - (\mu3+\mu4)/2$')
73 | 
74 | plt.tight_layout()
75 | plt.savefig('Figure_9.18_upper.png')
76 | plt.show()
77 | 


--------------------------------------------------------------------------------
/09_FilconPyMC_ex9.2.B.py:
--------------------------------------------------------------------------------
 1 | """
 2 | More Hierarchical models. The filtration-condensation experiment. 
 3 | Hyperparameters for kappafor all conditions.
 4 | """
 5 | import numpy as np
 6 | import pymc3 as pm
 7 | import sys
 8 | import matplotlib.pyplot as plt
 9 | plt.style.use('seaborn-darkgrid')
10 | 
11 | # Data
12 | # For each subject, specify the condition s/he was in,
13 | # the number of trials s/he experienced, and the number correct.
14 | ncond = 4
15 | nSubj = 40
16 | trials = 64
17 | 
18 | N = np.repeat([trials], (ncond * nSubj))
19 | z = np.array([45, 63, 58, 64, 58, 63, 51, 60, 59, 47, 63, 61, 60, 51, 59, 45,
20 | 61, 59, 60, 58, 63, 56, 63, 64, 64, 60, 64, 62, 49, 64, 64, 58, 64, 52, 64, 64,
21 | 64, 62, 64, 61, 59, 59, 55, 62, 51, 58, 55, 54, 59, 57, 58, 60, 54, 42, 59, 57,
22 | 59, 53, 53, 42, 59, 57, 29, 36, 51, 64, 60, 54, 54, 38, 61, 60, 61, 60, 62, 55,
23 | 38, 43, 58, 60, 44, 44, 32, 56, 43, 36, 38, 48, 32, 40, 40, 34, 45, 42, 41, 32,
24 | 48, 36, 29, 37, 53, 55, 50, 47, 46, 44, 50, 56, 58, 42, 58, 54, 57, 54, 51, 49,
25 | 52, 51, 49, 51, 46, 46, 42, 49, 46, 56, 42, 53, 55, 51, 55, 49, 53, 55, 40, 46,
26 | 56, 47, 54, 54, 42, 34, 35, 41, 48, 46, 39, 55, 30, 49, 27, 51, 41, 36, 45, 41,
27 | 53, 32, 43, 33])
28 | condition = np.repeat([0,1,2,3], nSubj)
29 | 
30 | # Specify the model in PyMC
31 | with pm.Model() as model:
32 |     # define the hyper-hyperparameters for kappa
33 |     mean_gamma = pm.Uniform('mean_gamma', 0, 30)
34 |     sd_gamma = pm.Uniform('sd_gamma', 0, 30)
35 |     s_kappa = mean_gamma**2/sd_gamma**2
36 |     r_kappa = mean_gamma/sd_gamma**2
37 |     # define the hyperparameters
38 |     kappa = pm.Gamma('kappa', s_kappa, r_kappa)
39 |     mu = pm.Beta('mu', 1, 1, shape=ncond)
40 |     # define the prior
41 |     theta = pm.Beta('theta', mu[condition] * kappa, (1 - mu[condition]) * kappa, shape=len(z))
42 |     # define the likelihood
43 |     y = pm.Binomial('y', p=theta, n=N, observed=z)
44 |     trace = pm.sample(2000, tune=1000)
45 | 
46 | ## Check the results.
47 | burnin = 0  # posterior samples to discard
48 | 
49 | ## Print summary for each trace
50 | #pm.df_summary(trace[burnin:])
51 | #pm.df_summary(trace)
52 | 
53 | ## Check for mixing and autocorrelation
54 | #pm.autocorrplot(trace, varnames=['mu', 'kappa'])
55 | 
56 | ## Plot KDE and sampled values for each parameter.
57 | #pm.traceplot(trace[burnin:])
58 | pm.traceplot(trace)
59 | 
60 | # Create arrays with the posterior sample
61 | mu1_sample = trace['mu'][:,0][burnin:]
62 | mu2_sample = trace['mu'][:,1][burnin:]
63 | mu3_sample = trace['mu'][:,2][burnin:]
64 | mu4_sample = trace['mu'][:,3][burnin:]
65 | 
66 | # Plot differences among filtrations experiments
67 | fig, ax = plt.subplots(1, 3, figsize=(15, 6))
68 | pm.plot_posterior((mu1_sample-mu2_sample), ax=ax[0], ref_val=0, color='skyblue')
69 | ax[0].set_xlabel(r'$\mu1-\mu2$')
70 | 
71 | # Plot differences among condensation experiments
72 | pm.plot_posterior((mu3_sample-mu4_sample), ax=ax[1], ref_val=0, color='skyblue')
73 | ax[1].set_xlabel(r'$\mu3-\mu4$')
74 | 
75 | # Plot differences between filtration and condensation experiments
76 | a = (mu1_sample+mu2_sample)/2 - (mu3_sample+mu4_sample)/2
77 | pm.plot_posterior(a, ax=ax[2], ref_val=0, color='skyblue')
78 | ax[2].set_xlabel(r'$(\mu1+\mu2)/2 - (\mu3+\mu4)/2$')
79 | 
80 | plt.tight_layout()
81 | plt.savefig('Figure_9.18_lower.png')
82 | plt.show()
83 | 


--------------------------------------------------------------------------------
/10_BernBetaModelCompPyMC.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Comparing models using Hierarchical modelling.
 3 | """
 4 | from __future__ import division
 5 | import numpy as np
 6 | import pymc3 as pm
 7 | import matplotlib.pyplot as plt
 8 | plt.style.use('seaborn-darkgrid')
 9 | 
10 | ## specify the Data
11 | y = np.repeat([0, 1], [3, 6])  # 3 tails 6 heads
12 | 
13 | with pm.Model() as model:
14 |     # Hyperhyperprior:
15 |     model_index = pm.DiscreteUniform('model_index', lower=0, upper=1)
16 |     # Hyperprior:
17 |     kappa_theta = 12
18 |     mu_theta = pm.math.switch(pm.math.eq(model_index, 1), 0.25, 0.75)
19 |     # Prior distribution:
20 |     a_theta = mu_theta * kappa_theta
21 |     b_theta = (1 - mu_theta) * kappa_theta
22 |     theta = pm.Beta('theta', a_theta, b_theta) # theta distributed as beta density
23 |     #likelihood
24 |     y = pm.Bernoulli('y', theta, observed=y)
25 |     trace = pm.sample(5000)
26 | 
27 | 
28 | ## Check the results.
29 | 
30 | ## Print summary for each trace
31 | #pm.summary(trace)
32 | 
33 | ## Check for mixing and autocorrelation
34 | #pm.autocorrplot(trace)
35 | 
36 | ## Plot KDE and sampled values for each parameter.
37 | pm.traceplot(trace)
38 | 
39 | ## Get the posterior sample of model_index:
40 | model_idx_sample = trace['model_index']
41 | ## Compute the proportion of model_index at each value:
42 | p_M1 = sum(model_idx_sample == 1) / len(model_idx_sample)
43 | p_M2 = 1 - p_M1
44 | 
45 | 
46 | ## Get the posterior sample of theta:
47 | theta_sample = trace['theta']
48 | ## Extract theta values when model_index is 1:
49 | theta_sample_M1 = theta_sample[model_idx_sample == 1]
50 | ## Extract theta values when model_index is 2:
51 | theta_sample_M2 = theta_sample[model_idx_sample == 0]
52 | 
53 | ## Plot histograms of sampled theta values for each model,
54 | plt.figure()
55 | plt.subplot(1, 2, 1)
56 | plt.hist(theta_sample_M1, label='p(M1|D) = {:.3f}'.format(p_M1))
57 | plt.xlabel(r'$\theta$')
58 | plt.ylabel(r'$p(\theta|\mu=0.25,D)$')
59 | plt.xlim(0, 1)
60 | plt.legend(loc='upper right', framealpha=0.5)
61 | 
62 | plt.subplot(1, 2, 2)
63 | plt.hist(theta_sample_M2, label='p(M2|D) = {:.3f}'.format(p_M2))
64 | plt.xlabel(r'$\theta$')
65 | plt.ylabel(r'$p(\theta|\mu=0.75,D)$')
66 | plt.xlim(0, 1)
67 | plt.legend(loc='upper right', framealpha=0.5)
68 | 
69 | plt.savefig('Figure_10.2.png')
70 | plt.show()
71 | 


--------------------------------------------------------------------------------
/10_FilconModelCompPyMC.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Comparing models using Hierarchical modelling.
  3 | """
  4 | from __future__ import division
  5 | import numpy as np
  6 | import pymc3 as pm
  7 | import matplotlib.pyplot as plt
  8 | plt.style.use('seaborn-darkgrid')
  9 | 
 10 | # THE DATA.
 11 | # For each subject, specify the condition s/he was in,
 12 | # the number of trials s/he experienced, and the number correct.
 13 | 
 14 | cond_of_subj = np.repeat([0,1,2,3], 40)
 15 | 
 16 | n_trl_of_subj = np.array([64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,
 17 | 64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,
 18 | 64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,
 19 | 64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,
 20 | 64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,
 21 | 64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,
 22 | 64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64])
 23 | 
 24 | n_corr_of_subj = np.array([45,63,58,64,58,63,51,60,59,47,63,61,60,51,59,45,61,
 25 | 59,60,58,63,56,63,64,64,60,64,62,49,64,64,58,64,52,64,64,64,62,64,61,59,59,
 26 | 55,62,51,58,55,54,59,57,58,60,54,42,59,57,59,53,53,42,59,57,29,36,51,64,60,
 27 | 54,54,38,61,60,61,60,62,55,38,43,58,60,44,44,32,56,43,36,38,48,32,40,40,34,
 28 | 45,42,41,32,48,36,29,37,53,55,50,47,46,44,50,56,58,42,58,54,57,54,51,49,52,
 29 | 51,49,51,46,46,42,49,46,56,42,53,55,51,55,49,53,55,40,46,56,47,54,54,42,34,
 30 | 35,41,48,46,39,55,30,49,27,51,41,36,45,41,53,32,43,33])
 31 | 
 32 | n_subj = len(cond_of_subj)
 33 | n_cond = len(set(cond_of_subj))
 34 | 
 35 | 
 36 | # THE MODEL.
 37 | with pm.Model() as model:
 38 |     # Hyperprior on model index:
 39 |     model_index = pm.DiscreteUniform('model_index', lower=0, upper=1)
 40 |     # Constants for hyperprior:
 41 |     shape_Gamma = 1.0
 42 |     rate_Gamma = 0.1
 43 |     # Hyperprior on mu and kappa:
 44 |     mu = pm.Beta('mu', 1, 1, shape=n_cond)
 45 | 
 46 |     kappa0 = pm.Gamma('kappa0', alpha=shape_Gamma, beta=rate_Gamma)
 47 |     a_Beta0 = mu[cond_of_subj] * kappa0
 48 |     b_Beta0 = (1 - mu[cond_of_subj]) * kappa0
 49 | 
 50 |     kappa1 = pm.Gamma('kappa1', alpha=shape_Gamma, beta=rate_Gamma, shape=n_cond)
 51 |     a_Beta1 = mu[cond_of_subj] * kappa1[cond_of_subj]
 52 |     b_Beta1 = (1 - mu[cond_of_subj]) * kappa1[cond_of_subj]
 53 | 
 54 |     #Prior on theta
 55 |     theta0 = pm.Beta('theta0', a_Beta0, b_Beta0, shape=n_subj)
 56 |     theta1 = pm.Beta('theta1', a_Beta1, b_Beta1, shape=n_subj)
 57 |     # if model_index == 0 then sample from theta1 else sample from theta0
 58 |     theta = pm.math.switch(pm.math.eq(model_index, 0), theta1, theta0)
 59 | 
 60 |     # Likelihood:
 61 |     y = pm.Binomial('y', p=theta, n=n_trl_of_subj, observed=n_corr_of_subj)
 62 | 
 63 |     # Sampling
 64 |     step = pm.ElemwiseCategorical(vars=[model_index],values=[0,1])
 65 |     trace = pm.sample(5000, step=step, progressbar=False)
 66 | 
 67 | 
 68 | # EXAMINE THE RESULTS.
 69 | burnin = 500
 70 | pm.traceplot(trace)
 71 | 
 72 | model_idx_sample = trace['model_index'][burnin:]
 73 | 
 74 | pM1 = sum(model_idx_sample == 0) / len(model_idx_sample)
 75 | pM2 = 1 - pM1
 76 | 
 77 | plt.figure(figsize=(15, 15))
 78 | plt.subplot2grid((5,4), (0,0), colspan=4)
 79 | plt.plot(model_idx_sample, label='p(M1|D) = {:.3f} ; p(M2|D) = {:.3f}'.format(pM1, pM2));
 80 | plt.xlabel('Steps in Markov Chain')
 81 | plt.legend(loc='upper right', framealpha=0.75)
 82 | 
 83 | for m in range(0, 2):
 84 |     kappa0_sample = trace['kappa0'][burnin:][model_idx_sample == m]
 85 |     plt.subplot2grid((5,4), (3+m, 1), colspan=2)
 86 |     plt.hist(kappa0_sample, bins=30)
 87 |     plt.title(r'Post. $\kappa_0$ for M={}'.format(m+1), fontsize=14)
 88 |     plt.xlabel(r'$\kappa_0$')
 89 |     plt.xlim(0, 30)
 90 |     for i in range(0, 4):
 91 |         kappa1_sample = trace['kappa1'][:,i][burnin:][model_idx_sample == m]
 92 |         plt.subplot2grid((5,4), (m+1, i))
 93 |         plt.hist(kappa1_sample, bins=30)
 94 |         plt.title(r'Post. $\kappa_{}$ for M={}'.format(i+1, m+1), fontsize=14)
 95 |         plt.xlabel(r'$\kappa_%s$' % (i+1))
 96 |         plt.xlim(0, 30)
 97 | 
 98 | plt.tight_layout()
 99 | plt.savefig('Figure_10.3-4.png')
100 | plt.show()
101 | 


--------------------------------------------------------------------------------
/10_ToyModelCompPyMC.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Comparing models using Hierarchical modelling. Toy Model.
 3 | """
 4 | from __future__ import division
 5 | import numpy as np
 6 | import pymc3 as pm
 7 | import matplotlib.pyplot as plt
 8 | plt.style.use('seaborn-darkgrid')
 9 | 
10 | # THE DATA.
11 | N = 30
12 | z = 8
13 | y = np.repeat([1, 0], [z, N-z]) 
14 | 
15 | # THE MODEL.
16 | with pm.Model() as model:
17 |     # Hyperprior on model index:
18 |     model_index = pm.DiscreteUniform('model_index', lower=0, upper=1)
19 |     # Prior
20 |     nu = pm.Normal('nu', mu=0, tau=0.1) # it is posible to use tau or sd
21 |     eta = pm.Gamma('eta', .1, .1)
22 |     theta0 = 1 / (1 + pm.math.exp(-nu)) # theta from model index 0
23 |     theta1 = pm.math.exp(-eta)    # theta from model index 1
24 |     theta = pm.math.switch(pm.math.eq(model_index, 0), theta0, theta1)
25 |     # Likelihood
26 |     y = pm.Bernoulli('y', p=theta, observed=y)
27 |     # Sampling
28 |     trace = pm.sample(1000)
29 | 
30 | 
31 | # EXAMINE THE RESULTS.
32 | ## Print summary for each trace
33 | 
34 | #pm.summary(trace)
35 | 
36 | ## Check for mixing and autocorrelation
37 | #pm.autocorrplot(trace, vars =[nu, eta])
38 | 
39 | ## Plot KDE and sampled values for each parameter.
40 | #pm.traceplot(trace)
41 | 
42 | 
43 | model_idx_sample = trace['model_index']
44 | pM1 = sum(model_idx_sample == 0) / len(model_idx_sample)
45 | pM2 = 1 - pM1
46 | 
47 | 
48 | nu_sample_M1 = trace['nu'][model_idx_sample == 0]
49 | eta_sample_M2 = trace['eta'][model_idx_sample == 1]
50 | 
51 | plt.figure()
52 | plt.subplot(2, 1, 1)
53 | pm.plot_posterior(nu_sample_M1)
54 | plt.xlabel(r'$\nu$')
55 | plt.ylabel('frequency')
56 | plt.title(r'p($\nu$|D,M2), with p(M2|D)={:.3}f'.format(pM1), fontsize=14)
57 | plt.xlim(-8, 8)
58 | 
59 | plt.subplot(2, 1, 2)
60 | pm.plot_posterior(eta_sample_M2)
61 | plt.xlabel(r'$\eta$')
62 | plt.ylabel('frequency')
63 | plt.title(r'p($\eta$|D,M2), with p(M2|D)={:.3f}'.format(pM2), fontsize=14)
64 | plt.xlim(0, 8)
65 | plt.savefig('figure_ex_10.2_a.png')
66 | plt.show()
67 | 


--------------------------------------------------------------------------------
/12_OneOddGroupModelComp.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Testing a point ('Null') Hypothesis (not using pseudopriors)
 3 | """
 4 | from __future__ import division
 5 | import numpy as np
 6 | import pymc3 as pm
 7 | from scipy.stats import binom
 8 | import matplotlib.pyplot as plt
 9 | plt.style.use('seaborn-darkgrid')
10 | 
11 | # THE DATA.
12 | # For each subject, specify the condition s/he was in,
13 | # the number of trials s/he experienced, and the number correct.
14 | # (Randomly generated fictitious data.)
15 | npg = 20  # number of subjects per group
16 | ntrl = 20 # number of trials per subject
17 | cond_of_subj = np.repeat([0, 1, 2, 3], npg)
18 | n_trl_of_subj = np.repeat([ntrl], 4*npg)
19 | np.random.seed(47401)
20 | 
21 | n_corr_of_subj = np.concatenate((binom.rvs(n=ntrl, p=.61, size=npg), 
22 |                 binom.rvs(n=ntrl, p=.50, size=npg),
23 |                 binom.rvs(n=ntrl, p=.49, size=npg),
24 |                 binom.rvs(n=ntrl, p=.51, size=npg)))
25 | 
26 | n_subj = len(cond_of_subj)
27 | n_cond = len(set(cond_of_subj))
28 | 
29 | 
30 | # THE MODEL
31 | with pm.Model() as model:
32 |     # Hyperprior on model index:
33 |     model_index = pm.DiscreteUniform('model_index', lower=0, upper=1)
34 |     # Constants for hyperprior:
35 |     shape_Gamma = 1.0
36 |     rate_Gamma = 0.1
37 |     # Hyperprior on mu and kappa:
38 |     kappa = pm.Gamma('kappa', shape_Gamma, rate_Gamma, shape=n_cond)
39 | 
40 |     mu0 = pm.Beta('mu0', 1, 1)
41 |     a_Beta0 = mu0 * kappa[cond_of_subj]
42 |     b_Beta0 = (1 - mu0) * kappa[cond_of_subj]
43 | 
44 |     mu1 = pm.Beta('mu1', 1, 1, shape=n_cond)
45 |     a_Beta1 = mu1[cond_of_subj] * kappa[cond_of_subj]
46 |     b_Beta1 = (1 - mu1[cond_of_subj]) * kappa[cond_of_subj]
47 | 
48 |     #Prior on theta
49 |     theta0 = pm.Beta('theta0', a_Beta0, b_Beta0, shape=n_subj)
50 |     theta1 = pm.Beta('theta1', a_Beta1, b_Beta1, shape=n_subj)
51 |     # if model_index == 0 then sample from theta1 else sample from theta0
52 |     theta = pm.math.switch(pm.math.eq(model_index, 0), theta1, theta0)
53 | 
54 |     # Likelihood:
55 |     y = pm.Binomial('y', p=theta, n=n_trl_of_subj, observed=n_corr_of_subj)
56 | 
57 |     # Sampling
58 |     step = pm.ElemwiseCategorical(vars=[model_index],values=[0,1])
59 |     trace = pm.sample(10000, step)
60 | 
61 | # EXAMINE THE RESULTS.
62 | 
63 | ## Print summary for each trace
64 | #pm.summary(trace)
65 | 
66 | ## Check for mixing and autocorrelation
67 | #pm.autocorrplot(trace, vars =[mu, kappa])
68 | 
69 | ## Plot KDE and sampled values for each parameter.
70 | #pm.traceplot(trace)
71 | 
72 | model_idx_sample = trace['model_index']
73 | pM1 = sum(model_idx_sample == 0) / len(model_idx_sample)
74 | pM2 = 1 - pM1
75 | 
76 | plt.figure(figsize=(15, 15))
77 | plt.subplot2grid((3,3), (0,0), colspan=3)
78 | plt.plot(model_idx_sample, label='p(DiffMu|D) = %.3f ; p(SameMu|D) = {:.3f}'.format(pM1, pM2));
79 | plt.xlabel('Step in Markov Chain')
80 | plt.legend(loc='upper right', framealpha=0.75)
81 | 
82 | count = 0
83 | position = [(1,0), (1,1), (1,2), (2,0), (2,1), (2,2)]
84 | for i in range(0, 4):
85 |     mui_sample = trace['mu1'][:,i][model_idx_sample == 0]
86 |     for j in range(i+1, 4):
87 |         muj_sample = trace['mu1'][:,j][model_idx_sample == 0]
88 |         ax = plt.subplot2grid((3,3), position[count])
89 |         pm.plot_posterior(mui_sample-muj_sample,
90 |                           ref_val=0, ax=ax)
91 |         plt.title(r'$\mu_{} - \mu_{}$'.format(i+1, j+1))
92 |         plt.xlim(-0.3, 0.3)
93 |         count += 1
94 | 
95 | 
96 | plt.tight_layout()
97 | plt.savefig('Figure_12.5.png')
98 | plt.show()
99 | 


--------------------------------------------------------------------------------
/13_minNforHDIpower.py:
--------------------------------------------------------------------------------
 1 | """
 2 | The program described in this section was used to generate Tables 13.1 and 13.2.
 3 | The program determines the minimal sample size needed to achieve a specified 
 4 | goal with a specified power, when flipping a single coin.
 5 | """
 6 | import numpy as np
 7 | from HDIofICDF import *
 8 | from scipy.special import binom, betaln
 9 | 
10 | 
11 | def minNforHDIpower(genPriorMean, genPriorN, HDImaxwid=None, nullVal=None,
12 |                     ROPE=None, desiredPower=0.8, audPriorMean=0.5,
13 |                     audPriorN=2, HDImass=0.95, initSampSize=1, verbose=True):
14 |     if HDImaxwid != None  and nullVal != None:
15 |         sys.exit('One and only one of HDImaxwid and nullVal must be specified')
16 |     if ROPE == None:
17 |         ROPE = [nullVal, nullVal]
18 |    # Convert prior mean and N to a, b parameter values of beta distribution.
19 |     genPriorA = genPriorMean * genPriorN
20 |     genPriorB = (1.0 - genPriorMean) * genPriorN
21 |     audPriorA = audPriorMean * audPriorN
22 |     audPriorB = (1.0 - audPriorMean) * audPriorN
23 |     # Initialize loop for incrementing sampleSize
24 |     sampleSize = initSampSize
25 |     # Increment sampleSize until desired power is achieved.
26 |     while True:
27 |         zvec = np.arange(0, sampleSize+1) # All possible z values for N flips.
28 |         # Compute probability of each z value for data-generating prior.
29 |         pzvec = np.exp(np.log(binom(sampleSize, zvec))
30 |                    + betaln(zvec + genPriorA, sampleSize - zvec + genPriorB)
31 |                    - betaln(genPriorA, genPriorB))
32 |         # For each z value, compute HDI. hdiMat is min, max of HDI for each z.
33 |         hdiMat = np.zeros((len(zvec), 2))
34 |         for zIdx in range(0, len(zvec)):
35 |             z = zvec[zIdx]
36 |             # Determine the limits of the highest density interval
37 |             # hdp is a function from PyMC package and takes a sample vector as 
38 |             # input, not a function.
39 |             hdiMat[zIdx] = HDIofICDF(beta, credMass=HDImass, a=(z + audPriorA), 
40 |                                      b=(sampleSize - z + audPriorB))
41 |         if HDImaxwid != None:
42 |             hdiWid = hdiMat[:,1] - hdiMat[:,0]
43 |             powerHDI = np.sum(pzvec[hdiWid < HDImaxwid])
44 |         if nullVal != None:
45 |             powerHDI = np.sum(pzvec[(hdiMat[:,0] > ROPE[1]) | 
46 |                                     (hdiMat[:,1] < ROPE[0])])
47 |         if verbose:
48 |             print(" For sample size = %s\npower = %s\n" % (sampleSize, powerHDI))
49 | 
50 |         if powerHDI > desiredPower:
51 |             break
52 |         else:
53 |             sampleSize += 1
54 |     return sampleSize
55 | 
56 | print(minNforHDIpower(genPriorMean=.85 , genPriorN=2000 , nullVal=0.5, verbose=False))
57 | #print(minNforHDIpower(genPriorMean=.85 , genPriorN=10 , HDImaxwid=0.2, verbose=False))
58 | 
59 | 


--------------------------------------------------------------------------------
/15_SystemsPyMC.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Estimating the mean and standard deviation of a Gaussian likelihood with a
 3 | hierarchical model.
 4 | '''
 5 | from __future__ import division
 6 | import numpy as np
 7 | import pymc3 as pm
 8 | import matplotlib.pyplot as plt
 9 | plt.style.use('seaborn-darkgrid')
10 | 
11 | # THE DATA.
12 | # Load the aircraft data:
13 | data = np.genfromtxt('Systems.txt', skip_header=True)
14 | 
15 | n_subj = len(set(data[:,0]))
16 | # Put it into generic variables so easier to change data in other applications:
17 | y = data[:,3]
18 | subj = data[:,0].astype(int)
19 | 
20 | 
21 | 
22 | ## Specify the model in PyMC
23 | with pm.Model() as model:
24 |     # define the HyperPriors
25 |     muG = pm.Normal('muG', mu=2.3, tau=0.1)
26 |     tauG = pm.Gamma('tauG', 1, .5)
27 |     m = pm.Gamma('m', 1, .25)
28 |     d = pm.Gamma('d', 1, .5)
29 |     sG = m**2 / d**2
30 |     rG = m / d**2
31 |     # define the priors
32 |     tau = pm.Gamma('tau', sG, rG, shape=n_subj)
33 |     mu = pm.Normal('mu', mu=muG, tau=tauG, shape=n_subj)
34 |     # define the likelihood
35 |     y = pm.Normal('y', mu=mu[subj-1], tau=tau[subj-1], observed=y)
36 |     # Generate a MCMC chain
37 |     trace = pm.sample(2000)
38 | 
39 | 
40 | # EXAMINE THE RESULTS
41 | 
42 | 
43 | ## Print summary for each trace
44 | #pm.summary(trace)
45 | 
46 | ## Check for mixing and autocorrelation
47 | #pm.autocorrplot(trace, vars =[mu, tau])
48 | 
49 | ## Plot KDE and sampled values for each parameter.
50 | #pm.traceplot(trace)
51 | 
52 | 
53 | ## Extract chains
54 | muG_sample = trace['muG']
55 | tauG_sample = trace['tauG']
56 | m_sample = trace['m']
57 | d_sample = trace['d']
58 | 
59 | # Plot the hyperdistributions:
60 | _, ax = plt.subplots(1, 4, figsize=(20, 5))
61 | pm.plot_posterior(muG_sample, bins=30, ax=ax[0])
62 | ax[0].set_xlabel(r'$\mu_g$', fontsize=16)
63 | pm.plot_posterior(tauG_sample, bins=30 ,ax=ax[1])
64 | ax[1].set_xlabel(r'$\tau_g$', fontsize=16)
65 | pm.plot_posterior(m_sample, bins=30, ax=ax[2])
66 | ax[2].set_xlabel('m', fontsize=16)
67 | pm.plot_posterior(d_sample, bins=30, ax=ax[3])
68 | ax[3].set_xlabel('d', fontsize=16)
69 | 
70 | plt.tight_layout()
71 | plt.savefig('Figure_15.9.png')
72 | plt.show()
73 | 


--------------------------------------------------------------------------------
/15_YmetricXsinglePyMC.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Estimating the mean and standard deviation of a Gaussian likelihood.
 3 | '''
 4 | import numpy as np
 5 | import pymc3 as pm
 6 | from scipy.stats import norm
 7 | import matplotlib.pyplot as plt
 8 | 
 9 | 
10 | # THE DATA.
11 | 
12 | # Generate random data from known parameter values:
13 | np.random.seed(4745)
14 | true_mu = 100
15 | true_std = 15
16 | y = norm.rvs(true_mu, true_std, 500)
17 | 
18 | 
19 | # Specify the model in PyMC
20 | with pm.Model() as model:
21 |     # define the priors
22 |     sd = pm.HalfNormal('sd', 25)
23 |     mu = pm.Normal('mu', mu=0, sd=100) # PyMC support precision and std
24 |     #define the likelihood
25 |     yl = pm.Normal('yl', mu, sd, observed=y)
26 | #   Generate a MCMC chain
27 |     trace = pm.sample(5000)
28 | 
29 | 
30 | # EXAMINE THE RESULTS
31 | 
32 | ## Print summary for each trace
33 | #pm.summary(trace)
34 | 
35 | ## Check for mixing and autocorrelation
36 | #pm.autocorrplot(trace, vars =[mu, tau])
37 | 
38 | ## Plot KDE and sampled values for each parameter.
39 | #pm.traceplot(trace)
40 | 
41 | mu_sample = trace['mu']
42 | sigma_sample = trace['sd']
43 | 
44 | 
45 | 
46 | plt.figure(figsize=(10, 6))
47 | ax = plt.subplot(1, 2, 1)
48 | pm.plot_posterior(mu_sample, bins=30, ax=ax)
49 | ax.set_xlabel('mu')
50 | ax.set_title = 'Posterior'
51 | ax.set_xlim(98, 102)
52 | 
53 | plt.subplot(1, 2, 2)
54 | 
55 | mu_mean = np.mean(mu_sample)
56 | sigma_mean = np.mean(sigma_sample)
57 | 
58 | plt.scatter(mu_sample, sigma_sample , c='gray')
59 | plt.plot(mu_mean, sigma_mean, 'C1*',
60 |         label=r'$\mu$ = %.1f, $\sigma$ = %.1f' % (mu_mean, sigma_mean))
61 | plt.xlabel('mu')
62 | plt.ylabel('sigma')
63 | plt.title('Posterior')
64 | plt.legend(loc=0)
65 | plt.savefig('figure_15.3.png')
66 | plt.show()
67 | 
68 | 


--------------------------------------------------------------------------------
/16_SimpleLinearRegressionPyMC.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Estimating the mean and standard deviation of a Gaussian likelihood with a
  3 | hierarchical model.
  4 | """
  5 | from __future__ import division
  6 | import numpy as np
  7 | import pymc3 as pm
  8 | from scipy.stats import norm
  9 | from scipy.interpolate import spline
 10 | import matplotlib.pyplot as plt
 11 | from hpd import *
 12 | from HtWtDataGenerator import *
 13 | plt.style.use('seaborn-darkgrid')
 14 | 
 15 | # THE DATA.
 16 | # Simulated height and weight data:
 17 | n_subj = 30
 18 | HtWtData = HtWtDataGenerator(n_subj, rndsd=5678)
 19 | x = HtWtData[:,1]
 20 | y = HtWtData[:,2]
 21 | 
 22 | # Re-center data at mean, to reduce autocorrelation in MCMC sampling.
 23 | # Standardize (divide by SD) to make initialization easier.
 24 | x_m = np.mean(x)
 25 | x_sd = np.std(x)
 26 | y_m = np.mean(y)
 27 | y_sd = np.std(y)
 28 | zx = (x - x_m) / x_sd
 29 | zy = (y - y_m) / y_sd
 30 | 
 31 | 
 32 | # THE MODEL
 33 | with pm.Model() as model:
 34 |     # define the priors
 35 |     sd = pm.HalfNormal('sd', 25)
 36 |     beta0 = pm.Normal('beta0', mu=0, sd=100)
 37 |     beta1 = pm.Normal('beta1', mu=0, sd=100)
 38 |     mu = beta0 + beta1 * zx
 39 |     # define the likelihood
 40 |     yl = pm.Normal('yl', mu=mu, sd=sd, observed=zy)
 41 |     # Generate a MCMC chain
 42 |     trace = pm.sample(1000)
 43 | 
 44 | 
 45 | # EXAMINE THE RESULTS
 46 | 
 47 | ## Print summary for each trace
 48 | #pm.summary(trace)
 49 | 
 50 | ## Check for mixing and autocorrelation
 51 | #pm.autocorrplot(trace, vars =[tau])
 52 | 
 53 | 
 54 | ## Plot KDE and sampled values for each parameter.
 55 | pm.traceplot(trace)
 56 | 
 57 | 
 58 | ## Extract chain values:
 59 | z0 = trace['beta0']
 60 | z1 = trace['beta1']
 61 | z_sigma = trace['sd']
 62 | 
 63 | 
 64 | # Convert to original scale:
 65 | b1 = z1 * y_sd / x_sd
 66 | b0 = (z0 * y_sd + y_m - z1 * y_sd * x_m / x_sd)
 67 | sigma = z_sigma * y_sd
 68 | 
 69 | 
 70 | # Posterior prediction:
 71 | # Specify x values for which predicted y's are needed:
 72 | x_post_pred = np.arange(55, 81)
 73 | # Define matrix for recording posterior predicted y values at each x value.
 74 | # One row per x value, with each row holding random predicted y values.
 75 | post_samp_size = len(b1)
 76 | y_post_pred = np.zeros((len(x_post_pred), post_samp_size))
 77 | # Define matrix for recording HDI limits of posterior predicted y values:
 78 | y_HDI_lim = np.zeros((len(x_post_pred), 2))
 79 | # Generate posterior predicted y values.
 80 | # This gets only one y value, at each x, for each step in the chain.
 81 | for chain_idx in range(post_samp_size):
 82 |     y_post_pred[:,chain_idx] = norm.rvs(loc=b0[chain_idx] + b1[chain_idx] * x_post_pred ,
 83 |                            scale = np.repeat([sigma[chain_idx]], [len(x_post_pred)]), size=len(x_post_pred))
 84 | 
 85 | for x_idx in range(len(x_post_pred)):
 86 |     y_HDI_lim[x_idx] = hpd(y_post_pred[x_idx])
 87 | 
 88 | ## Display believable beta0 and b1 values
 89 | plt.figure()
 90 | plt.subplot(1, 2, 1)
 91 | thin_idx = 50
 92 | plt.plot(z1[::thin_idx], z0[::thin_idx], 'b.', alpha=0.7)
 93 | plt.ylabel('Standardized Intercept')
 94 | plt.xlabel('Standardized Slope')
 95 | plt.subplot(1, 2, 2)
 96 | plt.plot(b1[::thin_idx], b0[::thin_idx], 'b.', alpha=0.7)
 97 | plt.ylabel('Intercept (ht when wt=0)')
 98 | plt.xlabel('Slope (pounds per inch)')
 99 | plt.tight_layout()
100 | plt.savefig('Figure_16.4.png')
101 | 
102 | # Display the posterior of the b1:
103 | plt.figure(figsize=(8, 5))
104 | ax = plt.subplot(1, 2, 1)
105 | pm.plot_posterior(z1, ref_val=0.0, bins=30, ax=ax)
106 | ax.set_xlabel('Standardized slope')
107 | ax = plt.subplot(1, 2, 2)
108 | pm.plot_posterior(b1, ref_val=0.0, bins=30, ax=ax)
109 | ax.set_xlabel('Slope (pounds per inch)')
110 | plt.tight_layout()
111 | plt.savefig('Figure_16.5.png')
112 | 
113 | # Display data with believable regression lines and posterior predictions.
114 | plt.figure()
115 | # Plot data values:
116 | x_rang = np.max(x) - np.min(x)
117 | y_rang = np.max(y) - np.min(y)
118 | lim_mult = 0.25
119 | x_lim = [np.min(x)-lim_mult*x_rang, np.max(x)+lim_mult*x_rang]
120 | y_lim = [np.min(y)-lim_mult*y_rang, np.max(y)+lim_mult*y_rang]
121 | plt.plot(x, y, 'k.')
122 | plt.title('Data with credible regression lines')
123 | plt.xlabel('X (height in inches)')
124 | plt.ylabel('Y (weight in pounds)')
125 | plt.xlim(x_lim)
126 | plt.ylim(y_lim)
127 | # Superimpose a smattering of believable regression lines:
128 | for i in range(0, len(b0), 100):
129 |     plt.plot(x, b0[i] + b1[i]*x  , c='k', alpha=0.05 )
130 | plt.savefig('Figure_16.2.png')
131 | 
132 | # Display data with HDIs of posterior predictions.
133 | 
134 | plt.figure()
135 | # Plot data values:
136 | y_lim = [np.min(y_HDI_lim), np.max(y_HDI_lim)]
137 | plt.plot(x, y, 'k.')
138 | plt.xlim(x_lim)
139 | plt.ylim(y_lim)
140 | plt.xlabel('X (height in inches)')
141 | plt.ylabel('Y (weight in pounds)')
142 | plt.title('Data with 95% HDI & Mean of Posterior Predictions')
143 | # Superimpose posterior predicted 95% HDIs:
144 | y_post_pred_ave = np.average(y_post_pred, axis=1)
145 | #Book version of the HDI representation
146 | #plt.errorbar(x_post_pred,y_post_pred_ave, 
147 | #             yerr=[abs(y_HDI_lim[:,0]-y_post_pred_ave),
148 | #                   abs(y_HDI_lim[:,1]-y_post_pred_ave)], fmt='.')
149 | 
150 | #Smoothed version of the HDI representation
151 | x_new = np.linspace(x_post_pred.min(), x_post_pred.max(), 200)
152 | y_HDI_lim_smooth = spline(x_post_pred, y_HDI_lim, x_new)
153 | plt.plot(x_post_pred, y_post_pred_ave)
154 | plt.fill_between(x_new, y_HDI_lim_smooth[:,0], y_HDI_lim_smooth[:,1], alpha=0.3)
155 | 
156 | plt.savefig('Figure_16.6.png')
157 | 
158 | plt.show()
159 | 


--------------------------------------------------------------------------------
/16_SimpleRobustLinearRegressionPyMC.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Simple Robust Linear Regression
  3 | """
  4 | from __future__ import division
  5 | import numpy as np
  6 | import pymc3 as pm
  7 | from scipy.stats import t, norm
  8 | from scipy.interpolate import spline
  9 | import matplotlib.pyplot as plt
 10 | plt.style.use('seaborn-darkgrid')
 11 | from hpd import *
 12 | from HtWtDataGenerator import *
 13 | 
 14 | 
 15 | # THE DATA.
 16 | 
 17 | cig_data = np.genfromtxt("McIntyre1994data.csv", skip_header=True, delimiter=",")
 18 | n_subj = len(cig_data)
 19 | x = cig_data[:,3]
 20 | y = cig_data[:,1]
 21 | 
 22 | 
 23 | # Re-center data at mean, to reduce autocorrelation in MCMC sampling.
 24 | # Standardize (divide by SD) to make initialization easier.
 25 | x_m = np.mean(x)
 26 | x_sd = np.std(x)
 27 | y_m = np.mean(y)
 28 | y_sd = np.std(y)
 29 | zx = (x - x_m) / x_sd
 30 | zy = (y - y_m) / y_sd
 31 | 
 32 | tdf_gain = 1 # 1 for low-biased tdf, 100 for high-biased tdf
 33 | 
 34 | # THE MODEL
 35 | with pm.Model() as model:
 36 |     # define the priors
 37 |     tdf = pm.Exponential('tdf', 1/30.)
 38 |     sd = pm.HalfNormal('sd', 25)
 39 |     beta0 = pm.Normal('beta0', mu=0, sd=100)
 40 |     beta1 = pm.Normal('beta1', mu=0, sd=100)
 41 |     mu = beta0 + beta1 * zx
 42 |     # define the likelihood
 43 |     yl = pm.StudentT('yl', mu=mu, sd=sd, nu=tdf, observed=zy)
 44 |     # Generate a MCMC chain
 45 |     trace = pm.sample(2000)
 46 | 
 47 | 
 48 | # EXAMINE THE RESULTS
 49 | 
 50 | ## Print summary for each trace
 51 | #pm.summary(trace)
 52 | 
 53 | ## Check for mixing and autocorrelation
 54 | #pm.autocorrplot(trace, vars =[tau])
 55 | 
 56 | ## Plot KDE and sampled values for each parameter.
 57 | #pm.traceplot(trace)
 58 | 
 59 | 
 60 | # Extract chain values:
 61 | 
 62 | tdf_samp = trace['tdf']
 63 | tdf_m = np.mean(tdf_samp)
 64 | z0 = trace["beta0"] 
 65 | z1 = trace["beta1"] 
 66 | z_sigma = trace["sd"] 
 67 | 
 68 | # Convert to original scale:
 69 | b1 = z1 * y_sd / x_sd
 70 | b0 = (z0 * y_sd + y_m - z1 * y_sd * x_m / x_sd)
 71 | sigma = z_sigma * y_sd
 72 | 
 73 | # Posterior prediction:
 74 | # Specify x values for which predicted y's are needed:
 75 | x_rang = np.max(x) - np.min(x)
 76 | y_rang = np.max(y) - np.min(y)
 77 | lim_mult = 0.25
 78 | x_lim = [np.min(x) - lim_mult * x_rang, np.max(x) + lim_mult * x_rang]
 79 | #y_lim = [np.min(y) - lim_mult*y_rang, np.max(y) + lim_mult*y_rang]
 80 | y_lim = [-10, 40]
 81 | x_post_pred = np.linspace(x_lim[0], x_lim[1], 20)
 82 | # Define matrix for recording posterior predicted y values at each x value.
 83 | # One row per x value, with each row holding random predicted y values.
 84 | post_samp_size = len(b1)
 85 | y_post_pred = np.zeros((len(x_post_pred), post_samp_size))
 86 | # Define matrix for recording HDI limits of posterior predicted y values:
 87 | y_HDI_lim = np.zeros((len(x_post_pred), 2))
 88 | # Generate posterior predicted y values.
 89 | # This gets only one y value, at each x, for each step in the chain.
 90 | for chain_idx in range(post_samp_size):
 91 |     y_post_pred[:,chain_idx] = t.rvs(df=np.repeat([tdf_samp[chain_idx]], [len(x_post_pred)]),
 92 |                             loc = b0[chain_idx] + b1[chain_idx] * x_post_pred,
 93 |                             scale = np.repeat([sigma[chain_idx]], [len(x_post_pred)]))
 94 | 
 95 | for x_idx in range(len(x_post_pred)):
 96 |     y_HDI_lim[x_idx] = hpd(y_post_pred[x_idx])
 97 | 
 98 | 
 99 | # Display believable beta0 and b1 values
100 | plt.figure()
101 | thin_idx = 5
102 | plt.plot(b1[::thin_idx], b0[::thin_idx], '.')
103 | plt.ylabel("Intercept")
104 | plt.xlabel("Slope")
105 | plt.savefig('Figure_16.x0.png')
106 | 
107 | # Display the posterior of the b1:
108 | ax = pm.plot_posterior(b1, ref_val=0.0, bins=30)
109 | ax.set_xlabel(r'Slope ($\Delta$ tar  / $\Delta$ weight)')
110 | plt.title('Mean tdf = %.2f' % tdf_m)
111 | plt.savefig('Figure_16.8b.png')
112 | 
113 | # Display data with believable regression lines and posterior predictions.
114 | plt.figure()
115 | plt.plot(x, y, 'k.')
116 | plt.title('Data with credible regression lines')
117 | plt.xlabel('weight')
118 | plt.ylabel('tar')
119 | plt.xlim(x_lim)
120 | plt.ylim(y_lim)
121 | # Superimpose a smattering of believable regression lines:
122 | for i in range(0, len(b0), 5):
123 |     plt.plot(x, b0[i] + b1[i]*x  , c='k', alpha=0.05 )
124 | plt.savefig('Figure_16.8x1.png')
125 | 
126 | 
127 | plt.figure()
128 | # Plot data values:
129 | plt.plot(x, y, 'k.')
130 | plt.xlim(x_lim)
131 | plt.ylim(y_lim)
132 | plt.xlabel('weight')
133 | plt.ylabel('tar')
134 | plt.title('Data with 95% HDI & Mean of Posterior Predictions')
135 | # Superimpose posterior predicted 95% HDIs:
136 | y_post_pred_med = np.median(y_post_pred, axis=1)
137 | 
138 | #Book version of the HDI representation
139 | #plt.errorbar(x_post_pred, y_post_pred_med, 
140 | #             yerr=[abs(y_HDI_lim[:,0]-y_post_pred_med),
141 | #                   abs(y_HDI_lim[:,1]-y_post_pred_med)], fmt='.')
142 | 
143 | #Smoothed version of the HDI representation
144 | x_new = np.linspace(x_post_pred.min(), x_post_pred.max(), 200)
145 | y_HDI_lim_smooth = spline(x_post_pred, y_HDI_lim, x_new)
146 | plt.plot(x_post_pred, y_post_pred_med)
147 | plt.fill_between(x_new, y_HDI_lim_smooth[:,0], y_HDI_lim_smooth[:,1], alpha=0.3)
148 | 
149 | plt.savefig('Figure_16.8d.png')
150 | 
151 | plt.show()
152 | 
153 | 
154 | 


--------------------------------------------------------------------------------
/17_MultiLinRegressHyperPyMC.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Multiple linear regression with hyperpriors.
  3 | """
  4 | from __future__ import division
  5 | import numpy as np
  6 | import pymc3 as pm
  7 | import pandas as pd
  8 | from scipy.stats import norm
  9 | import matplotlib.pyplot as plt
 10 | plt.style.use('seaborn-darkgrid')
 11 | import seaborn as sns
 12 | from hpd import *
 13 | 
 14 | 
 15 | 
 16 | # THE DATA.
 17 | 
 18 | tdfBgain = 1
 19 | 
 20 | dataSource = ["Guber1999", "McIntyre1994", "random"][0]
 21 | 
 22 | if dataSource == "Guber1999":
 23 |     fname = "Guber1999" # file name for saved graphs
 24 |     data = pd.read_csv('Guber1999data.txt', sep='\s+', 
 25 |     names = ["State","Spend","StuTchRat","Salary", "PrcntTake","SATV","SATM","SATT"])
 26 |     # Specify variables to be used in BUGS analysis:
 27 |     predicted_name = "SATT"
 28 |     predictor_names = ["Spend" , "PrcntTake"]
 29 |     n_data = len(data)
 30 |     y = data[predicted_name]
 31 |     x = data[predictor_names]
 32 |     n_predictors = len(x.columns)
 33 | 
 34 | 
 35 | if dataSource == "McIntyre1994":
 36 |     fname = "McIntyre1994" # file name for saved graphs
 37 |     data = pd.read_csv('McIntyre1994data.csv')
 38 |     predicted_name = "CO"
 39 |     predictor_names = ["Tar","Nic","Wt"]
 40 |     n_data = len(data)
 41 |     y = data[predicted_name]
 42 |     x = data[predictor_names]
 43 |     n_data = len(data)
 44 | 
 45 | 
 46 | if dataSource == "random":
 47 |     fname = "Random"  # file name for saved graphs
 48 |     # Generate random data.
 49 |     # True parameter values:
 50 |     beta_true = np.repeat(0, 21)
 51 |     beta_true = np.insert(beta_true, [0,0,0], [100, 1, 2])  # beta0 is first component
 52 |     n_predictors = len(beta_true) - 1
 53 |     sd_true = 2
 54 |     tau_true = 1/sd_true**2
 55 |     # Random X values:
 56 |     np.random.seed(47405)
 57 |     xM = 5
 58 |     xSD = 2
 59 |     n_data = 100
 60 |     x = norm.rvs(xM, xSD, n_predictors*n_data).reshape(100, -1)
 61 |     x = pd.DataFrame(x, columns=['X%s' % i for i in range(0, n_predictors)])
 62 |     # Random Y values generated from linear model with true parameter values:
 63 |     y = np.sum(x * beta_true[1:].T, axis=1) + beta_true[0] + norm.rvs(0, sd_true, n_data)
 64 |    # Select which predictors to include
 65 |     include_only = range(0, n_predictors) # default is to include all
 66 |     #x = x.iloc[include_only]
 67 |     predictor_names = x.columns
 68 |     n_predictors = len(predictor_names)
 69 | 
 70 | 
 71 | # THE MODEL
 72 | with pm.Model() as model:
 73 |     # define hyperpriors
 74 |     muB = pm.Normal('muB', 0, 100)
 75 |     tauB = pm.Gamma('tauB', .01, .01)
 76 |     udfB = pm.Uniform('udfB', 0, 1)
 77 |     tdfB = 1 + tdfBgain * (-pm.math.log(1 - udfB))
 78 |     # define the priors
 79 |     tau = pm.Gamma('tau', 0.01, 0.01)
 80 |     beta0 = pm.Normal('beta0', mu=0, tau=1.0E-12)
 81 |     beta1 = pm.StudentT('beta1', mu=muB, lam=tauB, nu=tdfB, shape=n_predictors)
 82 |     mu = beta0 + pm.math.dot(beta1, x.values.T)
 83 |     # define the likelihood
 84 |     #mu = beta0 + beta1[0] * x.values[:,0] + beta1[1] * x.values[:,1]
 85 |     yl = pm.Normal('yl', mu=mu, tau=tau, observed=y)
 86 |     # Generate a MCMC chain
 87 |     trace = pm.sample(1000)
 88 | 
 89 | 
 90 | # EXAMINE THE RESULTS
 91 | 
 92 | # Print summary for each trace
 93 | #pm.summary(trace)
 94 | 
 95 | # Check for mixing and autocorrelation
 96 | #pm.autocorrplot(trace, vars =[beta0])
 97 | 
 98 | ## Plot KDE and sampled values for each parameter.
 99 | #pm.traceplot(trace)
100 | 
101 | 
102 | # Extract chain values:
103 | b0_samp = trace['beta0']
104 | b_samp = trace['beta1']
105 | tau_samp = trace['tau']
106 | sigma_samp = 1 / np.sqrt(tau_samp) # Convert precision to SD
107 | chain_length = len(tau_samp)
108 | 
109 | if n_predictors >= 6: # don't display if too many predictors
110 |     n_predictors == 6
111 | 
112 | columns = ['Sigma y', 'Intercept']
113 | [columns.append('Slope_%s' % i) for i in predictor_names[:n_predictors]]
114 | traces = np.array([sigma_samp, b0_samp, b_samp[:,0], b_samp[:,1]]).T
115 | df = pd.DataFrame(traces, columns=columns)
116 | g = sns.PairGrid(df)
117 | g.map(plt.scatter)
118 | plt.savefig('Figure_17.Xa.png')
119 | 
120 | ## Display the posterior:
121 | 
122 | plt.figure(figsize=(16,4))
123 | ax = plt.subplot(1, n_predictors+2, 1)
124 | pm.plot_posterior(sigma_samp, ax=ax)
125 | ax.set_xlabel(r'$\sigma y$')
126 | ax = plt.subplot(1, n_predictors+2, 2)
127 | pm.plot_posterior(b0_samp, ax=ax)
128 | ax.set_xlabel('Intercept')
129 | 
130 | for i in range(0, n_predictors):
131 |     ax = plt.subplot(1, n_predictors+2, 3+i)
132 |     pm.plot_posterior(b_samp[:,i], ref_val=0, ax=ax)
133 |     ax.set_xlabel('Slope_%s' % predictor_names[i])
134 | plt.tight_layout()
135 | plt.savefig('Figure_17.Xb.png')
136 | 
137 | # Posterior prediction:
138 | # Define matrix for recording posterior predicted y values for each xPostPred.
139 | # One row per xPostPred value, with each row holding random predicted y values.
140 | y_post_pred = np.zeros((len(x), chain_length))
141 | # Define matrix for recording HDI limits of posterior predicted y values:
142 | y_HDI_lim = np.zeros((len(x), 2))
143 | # Generate posterior predicted y values.
144 | # This gets only one y value, at each x, for each step in the chain.
145 | #or chain_idx in range(chain_length):
146 | for chain_idx in range(chain_length):
147 |     y_post_pred[:,chain_idx] = norm.rvs(loc = b0_samp[chain_idx] + np.dot(b_samp[chain_idx], x.values.T), 
148 |                                         scale = np.repeat([sigma_samp[chain_idx]], [len(x)]))
149 | 
150 | for x_idx in range(len(x)):
151 |     y_HDI_lim[x_idx] = hpd(y_post_pred[x_idx])
152 | 
153 | for i in range(len(x)):
154 |     print(np.mean(y_post_pred, axis=1)[i], y_HDI_lim[i])
155 | 
156 | plt.show()
157 | 


--------------------------------------------------------------------------------
/17_MultipleLinearRegressionPyMC.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Multiple linear regression
  3 | """
  4 | from __future__ import division
  5 | import numpy as np
  6 | import pymc3 as pm
  7 | import pandas as pd
  8 | from scipy.stats import norm
  9 | import matplotlib.pyplot as plt
 10 | plt.style.use('seaborn-darkgrid')
 11 | from hpd import *
 12 | import seaborn as sns
 13 | 
 14 | 
 15 | # THE DATA.
 16 | dataSource = ["Guber1999", "McIntyre1994", "random"][0]
 17 | 
 18 | if dataSource == "Guber1999":
 19 |     fname = "Guber1999" # file name for saved graphs
 20 |     data = pd.read_csv('Guber1999data.txt', sep='\s+', 
 21 |     names = ["State","Spend","StuTchRat","Salary", "PrcntTake","SATV","SATM","SATT"])
 22 |     # Specify variables to be used in BUGS analysis:
 23 |     predictedName = "SATT"
 24 |     predictorNames = ["Spend" , "PrcntTake"]
 25 |     nData = len(data)
 26 |     y = data[predictedName]
 27 |     x = data[predictorNames]
 28 |     n_predictors = len(x.columns)
 29 | 
 30 | 
 31 | if dataSource == "McIntyre1994":
 32 |     fname = "McIntyre1994" # file name for saved graphs
 33 |     data = pd.read_csv('McIntyre1994data.csv')
 34 |     predictedName = "CO"
 35 |     predictorNames = ["Tar","Nic","Wt"]
 36 |     nData = len(data)
 37 |     y = data[predictedName]
 38 |     x = data[predictorNames]
 39 |     nData = len(data)
 40 | 
 41 | 
 42 | if dataSource == "random":
 43 |     fname = "Random"  # file name for saved graphs
 44 |     # Generate random data.
 45 |     # True parameter values:
 46 |     beta_true = np.repeat(0, 21)
 47 |     beta_true = np.insert(beta_true, [0,0,0], [100, 1, 2])  # beta0 is first component
 48 |     n_predictors = len(beta_true) - 1
 49 |     sd_true = 2
 50 |     tau_true = 1/sd_true**2
 51 |     # Random X values:
 52 |     np.random.seed(47405)
 53 |     xM = 5
 54 |     xSD = 2
 55 |     nData = 100
 56 |     x = norm.rvs(xM, xSD, n_predictors*nData).reshape(100, -1)
 57 |     x = pd.DataFrame(x, columns=['X%s' % i for i in range(0, n_predictors)])
 58 |     # Random Y values generated from linear model with true parameter values:
 59 |     y = np.sum(x * beta_true[1:].T, axis=1) + beta_true[0] + norm.rvs(0, sd_true, nData)
 60 |     # Select which predictors to include
 61 |     includeOnly = range(0, n_predictors) # default is to include all
 62 |     #x = x.iloc[includeOnly]
 63 |     predictorNames = x.columns
 64 |     n_predictors = len(predictorNames)
 65 | 
 66 | 
 67 | 
 68 | # THE MODEL
 69 | with pm.Model() as model:
 70 |     # define the priors
 71 |     beta0 = pm.Normal('beta0', mu=0, sd=100)
 72 |     beta1 = pm.Normal('beta1', mu= 0, sd=100, shape=n_predictors)
 73 |     sd = pm.HalfNormal('sd', 25)
 74 |     mu = beta0 + pm.math.dot(beta1, x.values.T)
 75 |     # define the likelihood
 76 |     yl = pm.Normal('yl', mu, sd, observed=y)
 77 |     # Generate a MCMC chain
 78 |     trace = pm.sample(1000)
 79 | 
 80 | # EXAMINE THE RESULTS
 81 | 
 82 | # Print summary for each trace
 83 | #pm.summary(trace)
 84 | 
 85 | # Check for mixing and autocorrelation
 86 | #pm.autocorrplot(trace, vars =[beta0])
 87 | 
 88 | ## Plot KDE and sampled values for each parameter.
 89 | #pm.traceplot(trace)
 90 | 
 91 | 
 92 | # Extract chain values:
 93 | b0_samp = trace['beta0']
 94 | b_samp = trace['beta1']
 95 | 
 96 | Sigma_samp = trace['sd']
 97 | chain_length = len(Sigma_samp)
 98 | 
 99 | if n_predictors >= 6: # don't display if too many predictors
100 |     n_predictors == 6
101 | 
102 | columns = ['Sigma y', 'Intercept']
103 | [columns.append('Slope_%s' % i) for i in predictorNames[:n_predictors]]
104 | traces = np.array([Sigma_samp, b0_samp, b_samp[:,0], b_samp[:,1]]).T
105 | df = pd.DataFrame(traces, columns=columns)
106 | sns.set_style('dark')
107 | g = sns.PairGrid(df)
108 | g.map(plt.scatter)
109 | plt.savefig('Figure_17.5b.png')
110 | 
111 | ## Display the posterior:
112 | sns.set_style('darkgrid')
113 | 
114 | plt.figure(figsize=(16,4))
115 | ax = plt.subplot(1, n_predictors+2, 1)
116 | pm.plot_posterior(Sigma_samp, ax=ax)
117 | ax.set_xlabel(r'$\sigma y$') 
118 | ax = plt.subplot(1, n_predictors+2, 2)
119 | ax = pm.plot_posterior(b0_samp,  ax=ax)
120 | ax.set_xlabel('Intercept')
121 | 
122 | for i in range(0, n_predictors):
123 |     ax = plt.subplot(1, n_predictors+2, 3+i)
124 |     pm.plot_posterior(b_samp[:,i], ref_val=0, ax=ax)
125 |     ax.set_xlabel('Slope_{}'.format(predictorNames[i]))
126 | plt.tight_layout()
127 | plt.savefig('Figure_17.5a.png')
128 | 
129 | 
130 | # Posterior prediction:
131 | # Define matrix for recording posterior predicted y values for each xPostPred.
132 | # One row per xPostPred value, with each row holding random predicted y values.
133 | y_post_pred = np.zeros((len(x), chain_length))
134 | # Define matrix for recording HDI limits of posterior predicted y values:
135 | y_HDI_lim = np.zeros((len(x), 2))
136 | # Generate posterior predicted y values.
137 | # This gets only one y value, at each x, for each step in the chain.
138 | #or chain_idx in range(chain_length):
139 | for chain_idx in range(chain_length):
140 |     y_post_pred[:,chain_idx] = norm.rvs(loc = b0_samp[chain_idx] + np.dot(b_samp[chain_idx], x.values.T), 
141 |                                         scale = np.repeat([Sigma_samp[chain_idx]], [len(x)]))
142 | 
143 | for x_idx in range(len(x)):
144 |     y_HDI_lim[x_idx] = hpd(y_post_pred[x_idx])
145 | 
146 | for i in range(len(x)):
147 |     print(np.mean(y_post_pred, axis=1)[i], y_HDI_lim[i])
148 | 
149 | plt.show()
150 | 


--------------------------------------------------------------------------------
/18_ANOVAonewayNonhomogvarBrugs.py:
--------------------------------------------------------------------------------
  1 | """
  2 | One way BANOVA Non Homogeneous Variance
  3 | """
  4 | from __future__ import division
  5 | import numpy as np
  6 | import pymc3 as pm
  7 | import pandas as pd
  8 | import matplotlib.pyplot as plt
  9 | plt.style.use('seaborn-darkgrid')
 10 | from scipy.stats import norm
 11 | from hpd import * 
 12 | from theano import tensor as tt
 13 | 
 14 | # THE DATA.
 15 | # Specify data source:
 16 | dataSource = ["McDonaldSK1991" , "SolariLS2008" , "Random"][0]
 17 | 
 18 | # Load the data:
 19 | if dataSource == "McDonaldSK1991":
 20 |     datarecord = pd.read_csv("McDonaldSK1991data.txt", sep='\s+', skiprows=18, skipfooter=25)
 21 |     y = datarecord['Size']
 22 |     Ntotal = len(y)
 23 |     x = (datarecord['Group'] - 1).values
 24 |     xnames = pd.unique(datarecord['Site'])
 25 |     NxLvl = len(xnames)
 26 |     contrast_dict = {'BIGvSMALL':[-1/3,-1/3,1/2,-1/3,1/2],
 27 |                        'ORE1vORE2': [1,-1,0,0,0],
 28 |                        'ALAvORE':[-1/2,-1/2,1,0,0],
 29 |                        'NPACvORE':[-1/2,-1/2,1/2,1/2,0],
 30 |                        'USAvRUS':[1/3,1/3,1/3,-1,0],
 31 |                        'FINvPAC':[-1/4,-1/4,-1/4,-1/4,1],
 32 |                        'ENGvOTH':[1/3,1/3,1/3,-1/2,-1/2],
 33 |                        'FINvRUS':[0,0,0,-1,1]}
 34 | 
 35 | 
 36 | if dataSource == "SolariLS2008":
 37 |     datarecord = pd.read_csv("SolariLS2008data.txt", sep='\s+', skiprows=21)
 38 |     y = datarecord['Acid']
 39 |     Ntotal = len(y)
 40 |     x = (datarecord['Type'] - 1).values
 41 |     xnames = pd.unique(x)
 42 |     NxLvl = len(xnames)
 43 |     contrast_dict = {'G3vOTHER':[-1/8,-1/8,1,-1/8,-1/8,-1/8,-1/8,-1/8,-1/8]}
 44 | 
 45 | 
 46 | if dataSource == "Random":
 47 |     np.random.seed(47405)
 48 |     ysdtrue = 4.0
 49 |     a0true = 100
 50 |     atrue = [2, -2]  # sum to zero
 51 |     npercell = 8
 52 |     x = []
 53 |     y = []
 54 |     for xidx in range(len(atrue)):
 55 |         for subjidx in range(npercell):
 56 |             x.append(xidx)
 57 |             y.append(a0true + atrue[xidx] + norm.rvs(1, ysdtrue))
 58 |     Ntotal = len(y)
 59 |     NxLvl = len(set(x))
 60 | #  # Construct list of all pairwise comparisons, to compare with NHST TukeyHSD:
 61 |     contrast_dict = None
 62 |     for g1idx in range(NxLvl):
 63 |         for g2idx in range(g1idx+1, NxLvl):
 64 |             cmpVec = np.repeat(0, NxLvl)
 65 |             cmpVec[g1idx] = -1
 66 |             cmpVec[g2idx] = 1
 67 |             contrast_dict = (contrast_dict, cmpVec)
 68 | 
 69 | 
 70 | z = (y - np.mean(y))/np.std(y)
 71 | 
 72 | 
 73 | ## THE MODEL.
 74 | with pm.Model() as model:
 75 |     # define the hyperpriors
 76 |     a_SD_unabs = pm.StudentT('a_SD_unabs', mu=0, lam=0.001, nu=1)
 77 |     a_SD = abs(a_SD_unabs) + 0.1
 78 |     atau = 1 / a_SD**2
 79 |     m = pm.Gamma('m', 1, 1)
 80 |     d = pm.Gamma('d', 1, 1)
 81 |     sG = m**2 / d**2 
 82 |     rG = m / d**2 
 83 |     # define the priors
 84 |     tau = pm.Gamma('tau', sG, rG) 
 85 |     a0 = pm.Normal('a0', mu=0, tau=0.001) # y values are assumed to be standardized
 86 |     a = pm.Normal('a', mu=0 , tau=atau, shape=NxLvl)
 87 |     
 88 |     b = pm.Deterministic('b', a - tt.mean(a))
 89 |     mu = a0 + b[x]
 90 |     # define the likelihood
 91 |     yl = pm.Normal('yl', mu=mu, tau=tau, observed=z)
 92 |     # Generate a MCMC chain
 93 |     trace = pm.sample(2000)
 94 | 
 95 | 
 96 | # EXAMINE THE RESULTS
 97 | 
 98 | # Print summary for each trace
 99 | #pm.summary(trace)
100 | 
101 | # Check for mixing and autocorrelation
102 | #pm.autocorrplot(trace, vars=model.unobserved_RVs[:-1])
103 | 
104 | ## Plot KDE and sampled values for each parameter.
105 | pm.traceplot(trace)
106 | 
107 | 
108 | a0_sample = trace['a0']
109 | b_sample = trace['b']
110 | b0_sample = a0_sample * np.std(y) + np.mean(y)
111 | b_sample = b_sample * np.std(y)
112 | 
113 | 
114 | plt.figure(figsize=(20, 4))
115 | for i in range(5):
116 |     ax = plt.subplot(1, 5, i+1)
117 |     pm.plot_posterior(b_sample[:,i], bins=50, ax=ax)
118 |     ax.set_xlabel=r'$\beta1_{}$'.format(i)
119 |     ax.set_title='x:{}'.format(i)
120 | plt.tight_layout()
121 | plt.savefig('Figure_18.xa.png')
122 | 
123 | 
124 | nContrasts = len(contrast_dict)
125 | if nContrasts > 0:
126 |     plt.figure(figsize=(20, 8))
127 |     count = 1
128 |     for key, value in contrast_dict.items():
129 |         contrast = np.dot(b_sample, value)
130 |         ax = plt.subplot(2, 4, count)
131 |         pm.plot_posterior(contrast,  ref_val=0.0, bins=50, ax=ax)
132 |         ax.set_title('Contrast {}'.format(key))
133 |         count += 1
134 |     plt.tight_layout()
135 |     plt.savefig('Figure_18.xa.png')
136 | 
137 | plt.show()
138 | 


--------------------------------------------------------------------------------
/18_ANOVAonewayPyMC.py:
--------------------------------------------------------------------------------
  1 | """
  2 | One way BANOVA
  3 | """
  4 | from __future__ import division
  5 | import numpy as np
  6 | import pymc3 as pm
  7 | import pandas as pd
  8 | import matplotlib.pyplot as plt
  9 | plt.style.use('seaborn-darkgrid')
 10 | from scipy.stats import norm
 11 | from hpd import * 
 12 | from theano import tensor as T
 13 | 
 14 | 
 15 | # THE DATA.
 16 | # Specify data source:
 17 | dataSource = ["McDonaldSK1991" , "SolariLS2008" , "Random"][0]
 18 | 
 19 | # Load the data:
 20 | if dataSource == "McDonaldSK1991":
 21 |     datarecord = pd.read_csv("McDonaldSK1991data.txt", sep='\s+', skiprows=18, skipfooter=25)
 22 |     y = datarecord['Size']
 23 |     Ntotal = len(y)
 24 |     x = (datarecord['Group'] - 1).values
 25 |     xnames = pd.unique(datarecord['Site'])
 26 |     NxLvl = len(xnames)
 27 |     contrast_dict = {'BIGvSMALL':[-1/3,-1/3,1/2,-1/3,1/2],
 28 |                        'ORE1vORE2': [1,-1,0,0,0],
 29 |                        'ALAvORE':[-1/2,-1/2,1,0,0],
 30 |                        'NPACvORE':[-1/2,-1/2,1/2,1/2,0],
 31 |                        'USAvRUS':[1/3,1/3,1/3,-1,0],
 32 |                        'FINvPAC':[-1/4,-1/4,-1/4,-1/4,1],
 33 |                        'ENGvOTH':[1/3,1/3,1/3,-1/2,-1/2],
 34 |                        'FINvRUS':[0,0,0,-1,1]}
 35 | 
 36 | 
 37 | if dataSource == "SolariLS2008":
 38 |     datarecord = pd.read_csv("SolariLS2008data.txt", sep='\s+', skiprows=21)
 39 |     y = datarecord['Acid']
 40 |     Ntotal = len(y)
 41 |     x = (datarecord['Type'] - 1).values
 42 |     xnames = pd.unique(x)
 43 |     NxLvl = len(xnames)
 44 |     contrast_dict = {'G3vOTHER':[-1/8,-1/8,1,-1/8,-1/8,-1/8,-1/8,-1/8,-1/8]}
 45 | 
 46 | 
 47 | if dataSource == "Random":
 48 |     np.random.seed(47405)
 49 |     ysdtrue = 4.0
 50 |     a0true = 100
 51 |     atrue = [2, -2]  # sum to zero
 52 |     npercell = 8
 53 |     x = []
 54 |     y = []
 55 |     for xidx in range(len(atrue)):
 56 |         for subjidx in range(npercell):
 57 |             x.append(xidx)
 58 |             y.append(a0true + atrue[xidx] + norm.rvs(1, ysdtrue))
 59 |     Ntotal = len(y)
 60 |     NxLvl = len(set(x))
 61 | #  # Construct list of all pairwise comparisons, to compare with NHST TukeyHSD:
 62 |     contrast_dict = None
 63 |     for g1idx in range(NxLvl):
 64 |         for g2idx in range(g1idx+1, NxLvl):
 65 |             cmpVec = np.repeat(0, NxLvl)
 66 |             cmpVec[g1idx] = -1
 67 |             cmpVec[g2idx] = 1
 68 |             contrast_dict = (contrast_dict, cmpVec)
 69 | 
 70 | 
 71 | z = (y - np.mean(y))/np.std(y)
 72 | 
 73 | 
 74 | ## THE MODEL.
 75 | with pm.Model() as model:
 76 |     # define the hyperpriors
 77 |     a_SD_unabs = pm.StudentT('a_SD_unabs', mu=0, lam=0.001, nu=1)
 78 |     a_SD = abs(a_SD_unabs) + 0.1
 79 |     atau = 1 / a_SD**2
 80 |     # define the priors
 81 |     sigma = pm.Uniform('sigma', 0, 10) # y values are assumed to be standardized
 82 |     tau = 1 / sigma**2
 83 |     a0 = pm.Normal('a0', mu=0, tau=0.001) # y values are assumed to be standardized
 84 |     a = pm.Normal('a', mu=0 , tau=atau, shape=NxLvl)
 85 |     
 86 |     b = pm.Deterministic('b', a - T.mean(a))
 87 |     mu = a0 + b[x]
 88 |     # define the likelihood
 89 |     yl = pm.Normal('yl', mu, tau=tau, observed=z)
 90 |     # Generate a MCMC chain
 91 |     trace = pm.sample(2000,  progressbar=False)
 92 | 
 93 | 
 94 | # EXAMINE THE RESULTS
 95 | burnin = 1000
 96 | thin = 10
 97 | 
 98 | # Print summary for each trace
 99 | #pm.summary(trace[burnin::thin])
100 | #pm.summary(trace)
101 | 
102 | # Check for mixing and autocorrelation
103 | #pm.autocorrplot(trace[burnin::thin], vars=model.unobserved_RVs[:-1])
104 | 
105 | ## Plot KDE and sampled values for each parameter.
106 | #pm.traceplot(trace[burnin::thin])
107 | pm.traceplot(trace)
108 | 
109 | a0_sample = trace['a0'][burnin::thin]
110 | b_sample = trace['b'][burnin::thin]
111 | b0_sample = a0_sample * np.std(y) + np.mean(y)
112 | b_sample = b_sample * np.std(y)
113 | 
114 | 
115 | plt.figure(figsize=(20, 4))
116 | for i in range(5):
117 |     ax = plt.subplot(1, 5, i+1)
118 |     pm.plot_posterior(b_sample[:,i], bins=50, ax=ax)
119 |     ax.set_xlabel(r'$\beta1_{}$'.format(i))
120 |     ax.set_title('x:{}'.format(i))
121 | plt.tight_layout()
122 | plt.savefig('Figure_18.2a.png')
123 | 
124 | 
125 | nContrasts = len(contrast_dict)
126 | if nContrasts > 0:
127 |     plt.figure(figsize=(20, 8))
128 |     count = 1
129 |     for key, value in contrast_dict.items():
130 |         contrast = np.dot(b_sample, value)
131 |         ax = plt.subplot(2, 4, count)
132 |         pm.plot_posterior(contrast, ref_val=0.0, bins=50, ax=ax)
133 |         ax.set_title('Contrast {}'.format(key))
134 |         count += 1
135 |     plt.tight_layout()
136 |     plt.savefig('Figure_18.2b.png')
137 | 
138 | plt.show()
139 | 


--------------------------------------------------------------------------------
/19_ANOVAtwowayPyMC.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Two way BANOVA
  3 | """
  4 | from __future__ import division
  5 | import numpy as np
  6 | import pymc3 as pm
  7 | import pandas as pd
  8 | import matplotlib.pyplot as plt
  9 | plt.style.use('seaborn-darkgrid')
 10 | from scipy.stats import norm
 11 | from theano import tensor as tt
 12 | 
 13 | 
 14 | # THE DATA.
 15 | # Specify data source:
 16 | data_source = ["QianS2007" , "Salary" , "Random" , "Ex19.3"][1]
 17 | 
 18 | # Load the data:
 19 | if data_source == "QianS2007":
 20 |     data_record = pd.read_csv("QianS2007SeaweedData.txt")
 21 |     # Logistic transform the COVER value:
 22 |     # Used by Appendix 3 of QianS2007 to replicate Ramsey and Schafer (2002).
 23 |     data_record['COVER'] = -np.log((100/data_record['COVER']) -1)
 24 | 
 25 |     y = data_record['COVER'].values
 26 |     x1 = pd.Categorical(data_record['TREAT']).codes
 27 |     x1names = data_record['TREAT'].values
 28 |     x2 = pd.Categorical(data_record['BLOCK']).codes
 29 |     x2names = data_record['BLOCK'].values
 30 |     Ntotal = len(y)
 31 |     Nx1Lvl = len(set(x1))
 32 |     Nx2Lvl = len(set(x2))
 33 |     x1contrastDict = {'f_Effect':[1/2, -1/2, 0, 1/2, -1/2, 0],
 34 |                      'F_Effect':[0, 1/2, -1/2, 0, 1/2, -1/2],
 35 |                      'L_Effect':[1/3, 1/3, 1/3, -1/3, -1/3, -1/3 ]}
 36 |     x2contrastDict = None # np.zeros(Nx2Lvl)
 37 |     x1x2contrastDict = None # np.zeros(Nx1Lvl*Nx2Lvl, Nx1Lvl)
 38 | 
 39 | if data_source == "Salary":
 40 |     data_record = pd.read_csv("Salary.csv")
 41 |     y = data_record['Salary']
 42 |     x1 = pd.Categorical(data_record['Org']).codes
 43 |     x1names = data_record['Org'].unique()
 44 |     x1names.sort()
 45 |     x2 = pd.Categorical(data_record['Post']).codes
 46 |     x2names = data_record['Post'].unique()
 47 |     x2names.sort()
 48 |     Ntotal = len(y)
 49 |     Nx1Lvl = len(set(x1))
 50 |     Nx2Lvl = len(set(x2))
 51 | 
 52 |     x1contrastDict = {'BFINvCEDP':[1, -1, 0, 0],
 53 |                       'CEDPvTHTR':[0, 1, 0, -1]}
 54 |     x2contrastDict = {'FT1vFT2':[1, -1, 0], 
 55 |                       'FT2vFT3':[0,1,-1]}
 56 |     x1x2contrastDict = {'CHEMvTHTRxFT1vFT3':np.outer([0, 0, 1, -1], [1,0,-1]),
 57 |            'BFINvOTHxFT1vOTH':np.outer([1, -1/3, -1/3, -1/3], [1, -1/2, -1/2])}
 58 | 
 59 | if data_source == "Random":
 60 |     np.random.seed(47405)
 61 |     ysdtrue = 3
 62 |     a0true = 100
 63 |     a1true = np.array([2, 0, -2])  # sum to zero
 64 |     a2true = np.array([3, 1, -1, -3])  # sum to zero
 65 |     a1a2true = np.array([[1,-1,0, 0], [-1,1,0,0], [0,0,0,0]])
 66 |     
 67 |     npercell = 8
 68 |     index = np.arange(len(a1true)*len(a2true)*npercell)
 69 |     data_record = pd.DataFrame(index=index, columns=["y","x1","x2"])
 70 | 
 71 |     rowidx = 0
 72 |     for x1idx in range(0, len(a1true)):
 73 |         for x2idx in range(0, len(a2true)):
 74 |             for subjidx in range(0, npercell):
 75 |                 data_record['x1'][rowidx] = x1idx
 76 |                 data_record['x2'][rowidx] = x2idx
 77 |                 data_record['y'][rowidx] = float(a0true + a1true[x1idx] + a2true[x2idx] 
 78 |                 + a1a2true[x1idx, x2idx] + norm.rvs(loc=0, scale=ysdtrue, size=1)[0])
 79 |                 rowidx += 1
 80 | 
 81 |     y = data_record['y']
 82 |     x1 = pd.Categorical(data_record['x1']).codes
 83 |     x1names = data_record['x1'].unique()
 84 |     x2 = pd.Categorical(data_record['x2']).codes
 85 |     x2names = data_record['x2'].unique()
 86 |     Ntotal = len(y)
 87 |     Nx1Lvl = len(set(x1))
 88 |     Nx2Lvl = len(set(x2))
 89 |     x1contrast_dict = {'X1_1v3': [1, 0, -1]} #
 90 |     x2contrast_dict =  {'X2_12v34':[1/2, 1/2, -1/2, -1/2]} #
 91 |     x1x2contrast_dict = {'IC_11v22': np.outer([1, -1, 0], [1, -1, 0, 0]),
 92 |     'IC_23v34': np.outer([0, 1, -1], [0, 0, 1, -1])}
 93 |     
 94 | if data_source == 'Ex19.3':
 95 |     y =  [101,102,103,105,104, 104,105,107,106,108, 105,107,106,108,109, 109,108,110,111,112]
 96 |     x1 = [0,0,0,0,0, 0,0,0,0,0, 1,1,1,1,1, 1,1,1,1,1]
 97 |     x2 = [0,0,0,0,0, 1,1,1,1,1, 0,0,0,0,0, 1,1,1,1,1]
 98 |     S = [0,1,2,3,4, 0,1,2,3,4, 0,1,2,3,4, 0,1,2,3,4]
 99 |     x1names = ['x1.1' ,'x1.2']
100 |     x2names = ['x2.1', "x2.2"]
101 |     Snames = ['S1', 'S2', 'S3', 'S4', 'S5']
102 |     Ntotal = len(y)
103 |     Nx1Lvl = len(set(x1))
104 |     Nx2Lvl = len(set(x2))
105 |     NSLvl = len(set(S))
106 |     x1contrast_dict = {'X1.2vX1.1':[-1 , 1]}
107 |     x2contrast_dict = {'X2.2vX2.1':[-1 , 1]}
108 |     x1x2contrast_dict = None #np.arange(0, Nx1Lvl*Nx2Lvl).reshape(Nx1Lvl, -1).T
109 | 
110 | z = (y - np.mean(y))/np.std(y)
111 |     
112 | z = (y - np.mean(y))/np.std(y)
113 | 
114 | # THE MODEL.
115 | 
116 | with pm.Model() as model:
117 |     # define the hyperpriors
118 |     a1_SD_unabs = pm.StudentT('a1_SD_unabs', mu=0, lam=0.001, nu=1)
119 |     a1_SD = abs(a1_SD_unabs) + 0.1
120 |     a1tau = 1 / a1_SD**2
121 | 
122 |     a2_SD_unabs = pm.StudentT('a2_SD_unabs', mu=0, lam=0.001, nu=1)
123 |     a2_SD = abs(a2_SD_unabs) + 0.1
124 |     a2tau = 1 / a2_SD**2
125 |     
126 |     a1a2_SD_unabs = pm.StudentT('a1a2_SD_unabs', mu=0, lam=0.001, nu=1)
127 |     a1a2_SD = abs(a1a2_SD_unabs) + 0.1
128 |     a1a2tau = 1 / a1a2_SD**2
129 | 
130 | 
131 |     # define the priors
132 |     sigma = pm.Uniform('sigma', 0, 10) # y values are assumed to be standardized
133 |     tau = 1 / sigma**2
134 |     
135 |     a0 = pm.Normal('a0', mu=0, tau=0.001) # y values are assumed to be standardized
136 |    
137 |     a1 = pm.Normal('a1', mu=0 , tau=a1tau, shape=Nx1Lvl)
138 |     a2 = pm.Normal('a2', mu=0 , tau=a2tau, shape=Nx2Lvl)
139 |     a1a2 = pm.Normal('a1a2', mu=0 , tau=a1a2tau, shape=[Nx1Lvl, Nx2Lvl])
140 | 
141 |     b1 = pm.Deterministic('b1', a1 - tt.mean(a1))
142 |     b2 = pm.Deterministic('b2', a2 - tt.mean(a2))
143 |     b1b2 = pm.Deterministic('b1b2', a1a2 - tt.mean(a1a2))
144 |     
145 |     mu = a0 + b1[x1] + b2[x2] + b1b2[x1, x2]
146 |  
147 |     # define the likelihood
148 |     yl = pm.Normal('yl', mu=mu, tau=tau, observed=z)
149 | 
150 |     # Generate a MCMC chain
151 |     trace = pm.sample(2000)
152 | 
153 | # EXAMINE THE RESULTS
154 | 
155 | # Print summary for each trace
156 | #pm.summary(trace)
157 | 
158 | # Check for mixing and autocorrelation
159 | #pm.autocorrplot(trace, vars=model.unobserved_RVs[:-1])
160 | 
161 | ## Plot KDE and sampled values for each parameter.
162 | pm.traceplot(trace)
163 | 
164 | 
165 | # Extract values of 'a'
166 | a0_sample = trace['a0']
167 | b1_sample = trace['b1']
168 | b2_sample = trace['b2']
169 | b1b2_sample = trace['b1b2']
170 | 
171 | b0_sample = a0_sample * np.std(y) + np.mean(y)
172 | b1_sample = b1_sample * np.std(y)
173 | b2_sample = b2_sample * np.std(y)
174 | b1b2_sample = b1b2_sample * np.std(y)
175 | 
176 | 
177 | plt.figure(figsize=(25,20))
178 | ax = plt.subplot(451)
179 | pm.plot_posterior(b0_sample,  bins=50, ax=ax)
180 | ax.set_xlabel(r'$\beta0$')
181 | ax.set_title('Baseline')
182 | plt.xlim(b0_sample.min(), b0_sample.max());
183 | 
184 | count = 2
185 | for i in range(len(b1_sample[0])):
186 |     ax = plt.subplot(4, 5, count)
187 |     pm.plot_posterior(b1_sample[:,i], ax=ax)
188 |     ax.set_xlabel(r'$\beta1_{}$'.format(i))
189 |     ax.set_title('x1: {}'.format(x1names[i]))
190 |     count += 1
191 | 
192 | for i in range(len(b2_sample[0])):
193 |     ax = plt.subplot(4, 5, count)
194 |     pm.plot_posterior(b2_sample[:,i], bins=50, ax=ax)
195 |     ax.set_xlabel(r'$\beta2_{}$'.format(i)),
196 |     ax.set_title('x1: {}'.format(x2names[i]))
197 |     count += 1
198 |     
199 |     for j in range(len(b1_sample[0])):
200 |         ax = plt.subplot(4, 5, count)
201 |         pm.plot_posterior(b1b2_sample[:,j,i], bins=50, ax=ax)
202 |         ax.set_title('x1: {}, x2: {}'.format(x1names[j], x2names[i]))
203 |         ax.set_xlabel(r'$\beta12_{}{}$'.format(i, j))
204 |         count += 1
205 | 
206 | 
207 | plt.tight_layout()
208 | plt.savefig('Figure_19.4.png')
209 | 
210 | ## Display contrast analyses
211 | plt.figure(figsize=(10, 12))
212 | count = 1
213 | for key, value in x1contrastDict.items():
214 |     contrast = np.dot(b1_sample, value)
215 |     ax = plt.subplot(3, 2, count)
216 |     pm.plot_posterior(contrast, ref_val=0.0, bins=50, ax=ax)
217 |     ax.set_title('Contrast {}'.format(key))
218 |     count += 1
219 |     
220 | for key, value in x2contrastDict.items():
221 |     contrast = np.dot(b2_sample, value)
222 |     ax = plt.subplot(3, 2, count)
223 |     pm.plot_posterior(contrast, ref_val=0.0, bins=50, ax=ax)
224 |     ax.set_title('Contrast {}'.format(key))
225 |     count += 1
226 |     
227 | for key, value in x1x2contrastDict.items():
228 |     contrast = np.tensordot(b1b2_sample, value)
229 |     ax = plt.subplot(3, 2, count)
230 |     pm.plot_posterior(contrast, ref_val=0.0, bins=50, ax=ax)
231 |     ax.set_title('Contrast {}'.format(key))
232 |     count += 1
233 | plt.tight_layout()
234 | plt.savefig('Figure_19.5.png')
235 | 
236 | plt.show()
237 | 


--------------------------------------------------------------------------------
/Figures/Figure_10.2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/Figure_10.2.png


--------------------------------------------------------------------------------
/Figures/Figure_10.3-4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/Figure_10.3-4.png


--------------------------------------------------------------------------------
/Figures/Figure_12.5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/Figure_12.5.png


--------------------------------------------------------------------------------
/Figures/Figure_15.9.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/Figure_15.9.png


--------------------------------------------------------------------------------
/Figures/Figure_16.2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/Figure_16.2.png


--------------------------------------------------------------------------------
/Figures/Figure_16.4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/Figure_16.4.png


--------------------------------------------------------------------------------
/Figures/Figure_16.5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/Figure_16.5.png


--------------------------------------------------------------------------------
/Figures/Figure_16.6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/Figure_16.6.png


--------------------------------------------------------------------------------
/Figures/Figure_16.8a.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/Figure_16.8a.png


--------------------------------------------------------------------------------
/Figures/Figure_16.8b.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/Figure_16.8b.png


--------------------------------------------------------------------------------
/Figures/Figure_16.8c.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/Figure_16.8c.png


--------------------------------------------------------------------------------
/Figures/Figure_16.8d.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/Figure_16.8d.png


--------------------------------------------------------------------------------
/Figures/Figure_17.5a.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/Figure_17.5a.png


--------------------------------------------------------------------------------
/Figures/Figure_17.5b.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/Figure_17.5b.png


--------------------------------------------------------------------------------
/Figures/Figure_17.Xa.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/Figure_17.Xa.png


--------------------------------------------------------------------------------
/Figures/Figure_17.Xb.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/Figure_17.Xb.png


--------------------------------------------------------------------------------
/Figures/Figure_18.2a.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/Figure_18.2a.png


--------------------------------------------------------------------------------
/Figures/Figure_18.2b.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/Figure_18.2b.png


--------------------------------------------------------------------------------
/Figures/Figure_18.3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/Figure_18.3.png


--------------------------------------------------------------------------------
/Figures/Figure_19.4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/Figure_19.4.png


--------------------------------------------------------------------------------
/Figures/Figure_19.5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/Figure_19.5.png


--------------------------------------------------------------------------------
/Figures/Figure_2.2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/Figure_2.2.png


--------------------------------------------------------------------------------
/Figures/Figure_3.1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/Figure_3.1.png


--------------------------------------------------------------------------------
/Figures/Figure_3.3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/Figure_3.3.png


--------------------------------------------------------------------------------
/Figures/Figure_4.1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/Figure_4.1.png


--------------------------------------------------------------------------------
/Figures/Figure_4.2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/Figure_4.2.png


--------------------------------------------------------------------------------
/Figures/Figure_4.3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/Figure_4.3.png


--------------------------------------------------------------------------------
/Figures/Figure_5.2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/Figure_5.2.png


--------------------------------------------------------------------------------
/Figures/Figure_6.1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/Figure_6.1.png


--------------------------------------------------------------------------------
/Figures/Figure_6.2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/Figure_6.2.png


--------------------------------------------------------------------------------
/Figures/Figure_6.3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/Figure_6.3.png


--------------------------------------------------------------------------------
/Figures/Figure_7.3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/Figure_7.3.png


--------------------------------------------------------------------------------
/Figures/Figure_7.4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/Figure_7.4.png


--------------------------------------------------------------------------------
/Figures/Figure_7.5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/Figure_7.5.png


--------------------------------------------------------------------------------
/Figures/Figure_7.6_a.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/Figure_7.6_a.png


--------------------------------------------------------------------------------
/Figures/Figure_7.6_b.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/Figure_7.6_b.png


--------------------------------------------------------------------------------
/Figures/Figure_7.6_c.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/Figure_7.6_c.png


--------------------------------------------------------------------------------
/Figures/Figure_8.1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/Figure_8.1.png


--------------------------------------------------------------------------------
/Figures/Figure_8.2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/Figure_8.2.png


--------------------------------------------------------------------------------
/Figures/Figure_8.3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/Figure_8.3.png


--------------------------------------------------------------------------------
/Figures/Figure_8.3_HDI.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/Figure_8.3_HDI.png


--------------------------------------------------------------------------------
/Figures/Figure_8.6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/Figure_8.6.png


--------------------------------------------------------------------------------
/Figures/Figure_9.11.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/Figure_9.11.png


--------------------------------------------------------------------------------
/Figures/Figure_9.12.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/Figure_9.12.png


--------------------------------------------------------------------------------
/Figures/Figure_9.14.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/Figure_9.14.png


--------------------------------------------------------------------------------
/Figures/Figure_9.16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/Figure_9.16.png


--------------------------------------------------------------------------------
/Figures/Figure_9.16b.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/Figure_9.16b.png


--------------------------------------------------------------------------------
/Figures/Figure_9.18_lower.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/Figure_9.18_lower.png


--------------------------------------------------------------------------------
/Figures/Figure_9.18_upper.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/Figure_9.18_upper.png


--------------------------------------------------------------------------------
/Figures/figure_15.3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aloctavodia/Doing_bayesian_data_analysis/a34212340de7e2eb1723046dead980a3a13447ff/Figures/figure_15.3.png


--------------------------------------------------------------------------------
/Guber1999data.txt:
--------------------------------------------------------------------------------
 1 | "Alabama"        4.405 17.2 31.144  8 491 538 1029
 2 | "Alaska"         8.963 17.6 47.951 47 445 489  934
 3 | "Arizona"        4.778 19.3 32.175 27 448 496  944
 4 | "Arkansas"       4.459 17.1 28.934  6 482 523 1005
 5 | "California"     4.992 24.0 41.078 45 417 485  902
 6 | "Colorado"       5.443 18.4 34.571 29 462 518  980
 7 | "Connecticut"    8.817 14.4 50.045 81 431 477  908
 8 | "Delaware"       7.030 16.6 39.076 68 429 468  897
 9 | "Florida"        5.718 19.1 32.588 48 420 469  889
10 | "Georgia"        5.193 16.3 32.291 65 406 448  854
11 | "Hawaii"         6.078 17.9 38.518 57 407 482  889
12 | "Idaho"          4.210 19.1 29.783 15 468 511  979
13 | "Illinois"       6.136 17.3 39.431 13 488 560 1048
14 | "Indiana"        5.826 17.5 36.785 58 415 467  882
15 | "Iowa"           5.483 15.8 31.511  5 516 583 1099
16 | "Kansas"         5.817 15.1 34.652  9 503 557 1060
17 | "Kentucky"       5.217 17.0 32.257 11 477 522  999
18 | "Louisiana"      4.761 16.8 26.461  9 486 535 1021
19 | "Maine"          6.428 13.8 31.972 68 427 469  896
20 | "Maryland"       7.245 17.0 40.661 64 430 479  909
21 | "Massachusetts"  7.287 14.8 40.795 80 430 477  907
22 | "Michigan"       6.994 20.1 41.895 11 484 549 1033
23 | "Minnesota"      6.000 17.5 35.948  9 506 579 1085
24 | "Mississippi"    4.080 17.5 26.818  4 496 540 1036
25 | "Missouri"       5.383 15.5 31.189  9 495 550 1045
26 | "Montana"        5.692 16.3 28.785 21 473 536 1009
27 | "Nebraska"       5.935 14.5 30.922  9 494 556 1050
28 | "Nevada"         5.160 18.7 34.836 30 434 483  917
29 | "New Hampshire"  5.859 15.6 34.720 70 444 491  935
30 | "New Jersey"     9.774 13.8 46.087 70 420 478  898
31 | "New Mexico"     4.586 17.2 28.493 11 485 530 1015
32 | "New York"       9.623 15.2 47.612 74 419 473  892
33 | "North Carolina" 5.077 16.2 30.793 60 411 454  865
34 | "North Dakota"   4.775 15.3 26.327  5 515 592 1107
35 | "Ohio"           6.162 16.6 36.802 23 460 515  975
36 | "Oklahoma"       4.845 15.5 28.172  9 491 536 1027
37 | "Oregon"         6.436 19.9 38.555 51 448 499  947
38 | "Pennsylvania"   7.109 17.1 44.510 70 419 461  880
39 | "Rhode Island"   7.469 14.7 40.729 70 425 463  888
40 | "South Carolina" 4.797 16.4 30.279 58 401 443  844
41 | "South Dakota"   4.775 14.4 25.994  5 505 563 1068
42 | "Tennessee"      4.388 18.6 32.477 12 497 543 1040
43 | "Texas"          5.222 15.7 31.223 47 419 474  893
44 | "Utah"           3.656 24.3 29.082  4 513 563 1076
45 | "Vermont"        6.750 13.8 35.406 68 429 472  901
46 | "Virginia"       5.327 14.6 33.987 65 428 468  896
47 | "Washington"     5.906 20.2 36.151 48 443 494  937
48 | "West Virginia"  6.107 14.8 31.944 17 448 484  932
49 | "Wisconsin"      6.930 15.9 37.746  9 501 572 1073
50 | "Wyoming"        6.160 14.9 31.285 10 476 525 1001
51 | 


--------------------------------------------------------------------------------
/HDI_of_grid.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Arguments:
 3 | probMassVec is a vector of probability masses at each grid point.
 4 | credMass is the desired mass of the HDI region.
 5 | 
 6 | Return a dictionary with:
 7 | indices is a vector of indices that are in the HDI
 8 | mass is the total mass of the included indices
 9 | height is the smallest component probability mass in the HDI
10 | """
11 | import numpy as np
12 | 
13 | def HDI_of_grid(probMassVec, credMass=0.95):
14 |     sortedProbMass = np.sort(probMassVec, axis=None)[::-1]
15 |     HDIheightIdx = np.min(np.where(np.cumsum(sortedProbMass) >= credMass))
16 |     HDIheight = sortedProbMass[HDIheightIdx]
17 |     HDImass = np.sum(probMassVec[probMassVec >= HDIheight])
18 |     idx = np.where(probMassVec >= HDIheight)
19 |     return {'indices':idx, 'mass':HDImass, 'height':HDIheight}
20 | 
21 | if  __name__ =='__main__':
22 |     from scipy.stats import beta
23 |     theta1 = np.linspace(0, 1, 10)
24 |     theta2 = theta1
25 |     theta1_grid, theta2_grid = np.meshgrid(theta1, theta2)
26 |     probDensityVec = beta.pdf(theta1_grid, 3, 3)
27 |     probMassVec = probDensityVec / np.sum(probDensityVec)
28 |     HDIinfo = HDIofGrid(probMassVec)
29 |     print(HDIinfo)
30 | 


--------------------------------------------------------------------------------
/HDIofICDF.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This program finds the HDI of a probability density function that is specified 
 3 | mathematically in Python.
 4 | """
 5 | from scipy.optimize import fmin
 6 | from scipy.stats import *
 7 | 
 8 | def HDIofICDF(dist_name, credMass=0.95, **args):
 9 |     # freeze distribution with given arguments
10 |     distri = dist_name(**args)
11 |     # initial guess for HDIlowTailPr
12 |     incredMass =  1.0 - credMass
13 | 
14 |     def intervalWidth(lowTailPr):
15 |         return distri.ppf(credMass + lowTailPr) - distri.ppf(lowTailPr)
16 | 
17 |     # find lowTailPr that minimizes intervalWidth
18 |     HDIlowTailPr = fmin(intervalWidth, incredMass, ftol=1e-8, disp=False)[0]
19 |     # return interval as array([low, high])
20 |     return distri.ppf([HDIlowTailPr, credMass + HDIlowTailPr])
21 | 
22 | 


--------------------------------------------------------------------------------
/HtWtDataGenerator.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Random height, weight generator for males and females. Uses parameters from
 3 | Brainard, J. & Burmaster, D. E. (1992). Bivariate distributions for height and
 4 | weight of men and women in the United States. Risk Analysis, 12(2), 267-275.
 5 | John K. Kruschke, January 2008.
 6 | """
 7 | from __future__ import division
 8 | from scipy.stats import multivariate_normal
 9 | import numpy as np
10 | 
11 | 
12 | def HtWtDataGenerator(nSubj, rndsd=None):
13 |     # Specify parameters of multivariate normal (MVN) distributions.
14 |     # Men:
15 |     HtMmu = 69.18
16 |     HtMsd = 2.87
17 |     lnWtMmu = 5.14
18 |     lnWtMsd = 0.17
19 |     Mrho = 0.42
20 |     Mmean = np.array([HtMmu , lnWtMmu])
21 |     Msigma = np.array([[HtMsd**2, Mrho * HtMsd * lnWtMsd],
22 |                      [Mrho * HtMsd * lnWtMsd, lnWtMsd**2]])
23 | 
24 |     # Women cluster 1:
25 |     HtFmu1 = 63.11
26 |     HtFsd1 = 2.76
27 |     lnWtFmu1 = 5.06
28 |     lnWtFsd1 = 0.24
29 |     Frho1 = 0.41
30 |     prop1 = 0.46
31 |     Fmean1 = np.array([HtFmu1, lnWtFmu1])
32 |     Fsigma1 = np.array([[HtFsd1**2, Frho1 * HtFsd1 * lnWtFsd1],
33 |                      [Frho1 * HtFsd1 * lnWtFsd1, lnWtFsd1**2]])
34 |     # Women cluster 2:
35 |     HtFmu2 = 64.36
36 |     HtFsd2 = 2.49
37 |     lnWtFmu2 = 4.86
38 |     lnWtFsd2 = 0.14
39 |     Frho2 = 0.44
40 |     prop2 = 1 - prop1
41 |     Fmean2 = np.array([HtFmu2, lnWtFmu2])
42 |     Fsigma2 = np.array([[HtFsd2**2 , Frho2 * HtFsd2 * lnWtFsd2],
43 |                 [Frho2 * HtFsd2 * lnWtFsd2 , lnWtFsd2**2]])
44 | 
45 |     # Randomly generate data values from those MVN distributions.
46 |     if rndsd is not None:
47 |         np.random.seed(rndsd)
48 |     datamatrix = np.zeros((nSubj, 3))
49 |     # arbitrary coding values
50 |     maleval = 1
51 |     femaleval = 0 
52 |     for i in range(0, nSubj):
53 |         # Flip coin to decide sex
54 |         sex = np.random.choice([maleval, femaleval], replace=True, p=(.5,.5), size=1)
55 |         if sex == maleval:
56 |             datum = multivariate_normal.rvs(mean=Mmean, cov=Msigma) 
57 |         if sex == femaleval:
58 |             Fclust = np.random.choice([1, 2], replace=True, p=(prop1, prop2), size=1)
59 |             if Fclust == 1:
60 |                 datum = multivariate_normal.rvs(mean=Fmean1, cov=Fsigma1)
61 |             if Fclust == 2: 
62 |                 datum = multivariate_normal.rvs(mean=Fmean2, cov=Fsigma2)
63 |         datamatrix[i] = np.concatenate([sex, np.round([datum[0], np.exp(datum[1])], 1)])
64 | 
65 |     return datamatrix
66 | 


--------------------------------------------------------------------------------
/McDonaldSK1991data.txt:
--------------------------------------------------------------------------------
 1 | # From http://udel.edu/~mcdonald/statanovasig.html
 2 | # "Here are some data on a shell measurement (the length of the anterior 
 3 | # adductor muscle scar, standardized by dividing by length) in the mussel 
 4 | # Mytilus trossulus from five locations: Tillamook, Oregon; Newport, Oregon;
 5 | # Petersburg, Alaska; Magadan, Russia; and Tvarminne, Finland, 
 6 | # taken from a much larger data set used in McDonald et al. (1991)." 
 7 | #
 8 | # McDonald, J. H., R. Seed and R. K. Koehn. 1991. 
 9 | # Allozymes and morphometric characters of three species of Mytilus
10 | # in the Northern and Southern Hemispheres. 
11 | # Mar. Biol. 111:323-333.
12 | #
13 | # Group code: 
14 | # 1=Tillamook,Oregon  
15 | # 2=Newport,Oregon  
16 | # 3=Petersburg,Alaska  
17 | # 4=Magadan,Russia  
18 | # 5=Tvarminne,Finland
19 | Group Size Site
20 | 1 0.0571 OregonT                
21 | 1 0.0813 OregonT                 
22 | 1 0.0831 OregonT                 
23 | 1 0.0976 OregonT                 
24 | 1 0.0817 OregonT                 
25 | 1 0.0859 OregonT                 
26 | 1 0.0735 OregonT                 
27 | 1 0.0659 OregonT                 
28 | 1 0.0923 OregonT    
29 | 1 0.0836 OregonT     
30 | 2 0.0873 OregonN
31 | 2 0.0662 OregonN
32 | 2 0.0672 OregonN
33 | 2 0.0819 OregonN
34 | 2 0.0749 OregonN
35 | 2 0.0649 OregonN
36 | 2 0.0835 OregonN
37 | 2 0.0725 OregonN
38 | 3 0.0974 Alaska
39 | 3 0.1352 Alaska
40 | 3 0.0817 Alaska
41 | 3 0.1016 Alaska
42 | 3 0.0968 Alaska
43 | 3 0.1064 Alaska
44 | 3 0.1050 Alaska
45 | 4 0.1033 Russia
46 | 4 0.0915 Russia
47 | 4 0.0781 Russia
48 | 4 0.0685 Russia
49 | 4 0.0677 Russia
50 | 4 0.0697 Russia
51 | 4 0.0764 Russia
52 | 4 0.0689 Russia
53 | 5 0.0703 Finland
54 | 5 0.1026 Finland
55 | 5 0.0956 Finland
56 | 5 0.0973 Finland
57 | 5 0.1039 Finland
58 | 5 0.1045 Finland
59 | #
60 | #    http://udel.edu/~mcdonald/statanovaunplanned.html
61 | #    shows that Tukey-Kramer method of unplanned comparisons
62 | #    groups 
63 | #    Newport/Magadan/Tillamook (2/4/1), 
64 | #    Magadan/Tillamook/Tvarminne (4/1/5), 
65 | #    and Tvarminne/Petersburg (5/3).
66 | #    
67 | #    From http://udel.edu/~mcdonald/statanovaplanned.html:
68 | #    Really important note about planned comparisons
69 | #      Planned comparisons must be planned before you look at the data. If you 
70 | #    look at some data, pick out an interesting comparison, then analyze it as 
71 | #    if it were a planned comparison, you will be committing scientific fraud. 
72 | #    For example, if you look at the mean arch heights for the nine sports, see 
73 | #    that cross-country has the lowest mean and swimming has the highest mean, 
74 | #    then compare just those two means, your P-value will be much too low. This 
75 | #    is because there are 36 possible pairwise comparisons in a set of 9 means. 
76 | #    You expect 5 percent, or 1 out of 20, tests to be "significant" at the 
77 | #    P<0.05 level, even if all the data really fit the null hypothesis, so 
78 | #    there's a good chance that the most extreme comparison in a set of 36 
79 | #    will have a P-value less than 0.05.
80 | #      It would be acceptable to run a pilot experiment and plan your planned 
81 | #    comparisons based on the results of the pilot experiment. However, if you 
82 | #    do this you could not include the data from the pilot experiment in the 
83 | #    analysis; you would have to limit your anova to the new data.
84 | 


--------------------------------------------------------------------------------
/McIntyre1994data.csv:
--------------------------------------------------------------------------------
 1 | Brand,Tar,Nic,Wt,CO
 2 | Alpine,14.1,0.86,0.9853,13.6
 3 | BensonAndHedges,16.0,1.06,1.0938,16.6
 4 | BullDurham,29.8,2.03,1.1650,23.5
 5 | CamelLights,8.0,0.67,0.9280,10.2
 6 | Carlton,4.1,0.40,0.9462,5.4
 7 | Chesterfield,15.0,1.04,0.8885,15.0
 8 | GoldenLights,8.8,0.76,1.0267,9.0
 9 | Kent,12.4,0.95,0.9225,12.3
10 | Kool,16.6,1.12,0.9372,16.3
11 | LandM,14.9,1.02,0.8858,15.4
12 | LarkLights,13.7,1.01,0.9643,13.0
13 | Marlboro,15.1,0.90,0.9316,14.4
14 | Merit,7.8,0.57,0.9705,10.0
15 | MultiFilter,11.4,0.78,1.1240,10.2
16 | NewportLights,9.0,0.74,0.8517,9.5
17 | Now,1.0,0.13,0.7851,1.5
18 | OldGold,17.0,1.26,0.9186,18.5
19 | PallMallLight,12.8,1.08,1.0395,12.6
20 | Raleigh,15.8,0.96,0.9573,17.5
21 | SalemUltra,4.5,0.42,0.9106,4.9
22 | Tareyton,14.5,1.01,1.0070,15.9
23 | True,7.3,0.61,0.9806,8.5
24 | ViceroyRichLight,8.6,0.69,0.9693,10.6
25 | VirginiaSlims,15.2,1.02,0.9496,13.9
26 | WinstonLights,12.0,0.82,1.1184,14.9
27 | 


--------------------------------------------------------------------------------
/QianS2007SeaweedData.txt:
--------------------------------------------------------------------------------
 1 | COVER,BLOCK,TREAT
 2 | 14.00,BLOCK 1,CONTROL
 3 | 23.00,BLOCK 1,CONTROL
 4 | 22.00,BLOCK 2,CONTROL
 5 | 35.00,BLOCK 2,CONTROL
 6 | 67.00,BLOCK 3,CONTROL
 7 | 82.00,BLOCK 3,CONTROL
 8 | 94.00,BLOCK 4,CONTROL
 9 | 95.00,BLOCK 4,CONTROL
10 | 34.00,BLOCK 5,CONTROL
11 | 53.00,BLOCK 5,CONTROL
12 | 58.00,BLOCK 6,CONTROL
13 | 75.00,BLOCK 6,CONTROL
14 | 19.00,BLOCK 7,CONTROL
15 | 47.00,BLOCK 7,CONTROL
16 | 53.00,BLOCK 8,CONTROL
17 | 61.00,BLOCK 8,CONTROL
18 | 4.00,BLOCK 1,L
19 | 4.00,BLOCK 1,L
20 | 7.00,BLOCK 2,L
21 | 8.00,BLOCK 2,L
22 | 28.00,BLOCK 3,L
23 | 58.00,BLOCK 3,L
24 | 27.00,BLOCK 4,L
25 | 35.00,BLOCK 4,L
26 | 11.00,BLOCK 5,L
27 | 33.00,BLOCK 5,L
28 | 16.00,BLOCK 6,L
29 | 31.00,BLOCK 6,L
30 | 6.00,BLOCK 7,L
31 | 8.00,BLOCK 7,L
32 | 15.00,BLOCK 8,L
33 | 17.00,BLOCK 8,L
34 | 11.00,BLOCK 1,f
35 | 24.00,BLOCK 1,f
36 | 14.00,BLOCK 2,f
37 | 31.00,BLOCK 2,f
38 | 52.00,BLOCK 3,f
39 | 59.00,BLOCK 3,f
40 | 83.00,BLOCK 4,f
41 | 89.00,BLOCK 4,f
42 | 33.00,BLOCK 5,f
43 | 34.00,BLOCK 5,f
44 | 39.00,BLOCK 6,f
45 | 52.00,BLOCK 6,f
46 | 43.00,BLOCK 7,f
47 | 53.00,BLOCK 7,f
48 | 30.00,BLOCK 8,f
49 | 37.00,BLOCK 8,f
50 | 3.00,BLOCK 1,Lf
51 | 5.00,BLOCK 1,Lf
52 | 3.00,BLOCK 2,Lf
53 | 6.00,BLOCK 2,Lf
54 | 9.00,BLOCK 3,Lf
55 | 31.00,BLOCK 3,Lf
56 | 21.00,BLOCK 4,Lf
57 | 57.00,BLOCK 4,Lf
58 | 5.00,BLOCK 5,Lf
59 | 9.00,BLOCK 5,Lf
60 | 26.00,BLOCK 6,Lf
61 | 43.00,BLOCK 6,Lf
62 | 4.00,BLOCK 7,Lf
63 | 12.00,BLOCK 7,Lf
64 | 12.00,BLOCK 8,Lf
65 | 18.00,BLOCK 8,Lf
66 | 10.00,BLOCK 1,fF
67 | 13.00,BLOCK 1,fF
68 | 10.00,BLOCK 2,fF
69 | 15.00,BLOCK 2,fF
70 | 44.00,BLOCK 3,fF
71 | 50.00,BLOCK 3,fF
72 | 57.00,BLOCK 4,fF
73 | 73.00,BLOCK 4,fF
74 | 26.00,BLOCK 5,fF
75 | 42.00,BLOCK 5,fF
76 | 38.00,BLOCK 6,fF
77 | 42.00,BLOCK 6,fF
78 | 29.00,BLOCK 7,fF
79 | 36.00,BLOCK 7,fF
80 | 11.00,BLOCK 8,fF
81 | 40.00,BLOCK 8,fF
82 | 1.00,BLOCK 1,LfF
83 | 2.00,BLOCK 1,LfF
84 | 3.00,BLOCK 2,LfF
85 | 5.00,BLOCK 2,LfF
86 | 6.00,BLOCK 3,LfF
87 | 9.00,BLOCK 3,LfF
88 | 7.00,BLOCK 4,LfF
89 | 22.00,BLOCK 4,LfF
90 | 5.00,BLOCK 5,LfF
91 | 6.00,BLOCK 5,LfF
92 | 10.00,BLOCK 6,LfF
93 | 17.00,BLOCK 6,LfF
94 | 5.00,BLOCK 7,LfF
95 | 14.00,BLOCK 7,LfF
96 | 5.00,BLOCK 8,LfF
97 | 7.00,BLOCK 8,LfF
98 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | Doing_bayesian_data_analysis  
 2 | ============================  
 3 | 
 4 | [![Gitter](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/aloctavodia/Doing_bayesian_data_analysis?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
 5 | 
 6 | This repository contains the Python version of the R programs described in the great book [Doing bayesian data analysis (first edition)](http://doingbayesiandataanalysis.blogspot.com.ar) by John K. Kruschke (AKA *the puppy book*).
 7 | 
 8 | All the code is adapted from the Kruschke's book, except hpd.py that is taken (without modifications) from the PyMC project.
 9 | 
10 | The name of the programs are the same used in the book, except they begin with a number indicating the chapter. All programs are written in Python and instead of BUGS/JAGS the [PyMC3](http://pymc-devs.github.io/pymc3) module is used.
11 | 
12 | Thanks to [Brian Naughton](https://github.com/hgbrian) the code is also available as an [IPython notebook](http://nbviewer.ipython.org/github/aloctavodia/Doing_bayesian_data_analysis/blob/master/IPython/Kruschkes_Doing_Bayesian_Data_Analysis_in_PyMC3.ipynb)
13 | 
14 | ## Second edition
15 | 
16 | If you are interested on the PyMC3 code for the second edition of Doing bayesian data analysis, please check this [Repository](https://github.com/JWarmenhoven/DBDA-python).
17 | 


--------------------------------------------------------------------------------
/Salary.csv:
--------------------------------------------------------------------------------
 1 | "Org","Post","Salary"
 2 | "CEDP","FT1",89504
 3 | "CEDP","FT1",106554
 4 | "CHEM","FT1",108158
 5 | "CEDP","FT1",92961
 6 | "CHEM","FT1",154703
 7 | "CEDP","FT1",81840
 8 | "BFIN","FT1",238000
 9 | "THTR","FT1",86794
10 | "CEDP","FT1",84121
11 | "THTR","FT1",80450
12 | "CEDP","FT1",98434
13 | "CHEM","FT1",194192
14 | "CEDP","FT1",92896
15 | "THTR","FT1",72240
16 | "CEDP","FT1",88374
17 | "BFIN","FT1",234000
18 | "THTR","FT1",81566
19 | "CEDP","FT1",102300
20 | "CHEM","FT1",159753
21 | "CHEM","FT1",121313
22 | "CHEM","FT2",123200
23 | "BFIN","FT2",183000
24 | "THTR","FT2",62611
25 | "BFIN","FT2",222000
26 | "CEDP","FT2",63000
27 | "CHEM","FT2",120000
28 | "BFIN","FT2",200000
29 | "CEDP","FT2",65115
30 | "BFIN","FT2",198000
31 | "CEDP","FT2",83762
32 | "CEDP","FT2",75559
33 | "BFIN","FT2",146000
34 | "CHEM","FT2",83164
35 | "CHEM","FT2",135794
36 | "CHEM","FT2",82762
37 | "BFIN","FT2",134000
38 | "CHEM","FT2",88147
39 | "THTR","FT2",59210
40 | "CEDP","FT2",66186
41 | "THTR","FT2",63924
42 | "CHEM","FT2",123610
43 | "CHEM","FT2",88271
44 | "THTR","FT2",62315
45 | "BFIN","FT2",174000
46 | "THTR","FT2",63261
47 | "CEDP","FT2",66794
48 | "THTR","FT2",71706
49 | "CEDP","FT2",79236
50 | "CHEM","FT2",104568
51 | "BFIN","FT2",180000
52 | "CHEM","FT3",77169
53 | "CHEM","FT3",81773
54 | "CEDP","FT3",59568
55 | "CHEM","FT3",75000
56 | "THTR","FT3",53000
57 | "CHEM","FT3",75000
58 | "THTR","FT3",51991
59 | "CEDP","FT3",57000
60 | "CHEM","FT3",75000
61 | "THTR","FT3",56985
62 | "THTR","FT3",51365
63 | "CHEM","FT3",76714
64 | "CEDP","FT3",58890
65 | "BFIN","FT3",188000
66 | "THTR","FT3",52140
67 | "THTR","FT3",53000
68 | "CHEM","FT3",80017
69 | "BFIN","FT3",165000
70 | "THTR","FT3",53000
71 | "CEDP","FT3",57443
72 | "BFIN","FT3",190000
73 | "CHEM","FT3",75000
74 | "BFIN","FT3",177000
75 | "CEDP","FT3",57443
76 | "BFIN","FT3",180000
77 | "CHEM","FT3",78000
78 | "BFIN","FT3",180000
79 | "CHEM","FT3",68523
80 | "BFIN","FT3",176000
81 | "CEDP","FT3",57000
82 | "CEDP","FT3",58500
83 | "CEDP","FT3",57443
84 | "BFIN","FT3",171000
85 | "BFIN","FT3",176000
86 | 


--------------------------------------------------------------------------------
/SolariLS2008data.txt:
--------------------------------------------------------------------------------
  1 | # Data from Solari, Liseo & Sun 2008
  2 | #
  3 | #    (Pompilj and Napolitani, 1954). An experiment is conducted 
  4 | #    to analyze the possible influence of some types of manuring 
  5 | #    on the ascorbic acid content in tomatoes. The treatments 
  6 | #    under study are nine manures obtained as different 
  7 | #    combinations of calcium nitrate and calcium superphosphate.
  8 | #
  9 | #    T1   T2   T3   T4   T5   T6   T7   T8   T9
 10 | #    7.12 4.42 6.49 8.07 8.05 5.09 5.87 6.57 4.13
 11 | #    7.16 5.68 8.09 2.86 5.82 4.57 5.36 5.08 7.31
 12 | #    4.57 5.15 8.79 6.84 2.47 6.06 5.85 5.95 4.47
 13 | #    3.79 3.83 8.44 6.85 3.28 4.87 6.27 7.51 2.53
 14 | #    4.20 3.30 6.11 4.12 5.38 4.52 5.96 3.79 3.96
 15 | #    5.84 4.44 5.17 3.32 3.98 5.08 4.95 4.33 5.30
 16 | #    5.56 3.51 8.13 1.74 6.08 4.29 5.85 3.70 2.66
 17 | #    5.02 4.60 7.58 1.74 6.28 6.19 4.70 5.21 4.12
 18 | #    3.69 4.85 6.47 1.57 5.72 3.45 1.53 4.48 3.54
 19 | #    2.99 4.84 5.45 3.02 2.88 5.85 3.88 5.17 2.98
 20 | #    4.99 5.45 6.18 5.08 6.40 2.51 2.88 4.69 5.08
 21 | #    2.16 4.71 4.34 4.96 4.58 4.93 2.07 2.12 5.15
 22 | Type Acid
 23 | 1 7.12
 24 | 1 7.16
 25 | 1 4.57
 26 | 1 3.79
 27 | 1 4.20
 28 | 1 5.84
 29 | 1 5.56
 30 | 1 5.02
 31 | 1 3.69
 32 | 1 2.99
 33 | 1 4.99
 34 | 1 2.16
 35 | 2 4.42
 36 | 2 5.68
 37 | 2 5.15
 38 | 2 3.83
 39 | 2 3.30
 40 | 2 4.44
 41 | 2 3.51
 42 | 2 4.60
 43 | 2 4.85
 44 | 2 4.84
 45 | 2 5.45
 46 | 2 4.71
 47 | 3 6.49
 48 | 3 8.09
 49 | 3 8.79
 50 | 3 8.44
 51 | 3 6.11
 52 | 3 5.17
 53 | 3 8.13
 54 | 3 7.58
 55 | 3 6.47
 56 | 3 5.45
 57 | 3 6.18
 58 | 3 4.34
 59 | 4 8.07
 60 | 4 2.86
 61 | 4 6.84
 62 | 4 6.85
 63 | 4 4.12
 64 | 4 3.32
 65 | 4 1.74
 66 | 4 1.74
 67 | 4 1.57
 68 | 4 3.02
 69 | 4 5.08
 70 | 4 4.96
 71 | 5 8.05
 72 | 5 5.82
 73 | 5 2.47
 74 | 5 3.28
 75 | 5 5.38
 76 | 5 3.98
 77 | 5 6.08
 78 | 5 6.28
 79 | 5 5.72
 80 | 5 2.88
 81 | 5 6.40
 82 | 5 4.58
 83 | 6 5.09
 84 | 6 4.57
 85 | 6 6.06
 86 | 6 4.87
 87 | 6 4.52
 88 | 6 5.08
 89 | 6 4.29
 90 | 6 6.19
 91 | 6 3.45
 92 | 6 5.85
 93 | 6 2.51
 94 | 6 4.93
 95 | 7 5.87
 96 | 7 5.36
 97 | 7 5.85
 98 | 7 6.27
 99 | 7 5.96
100 | 7 4.95
101 | 7 5.85
102 | 7 4.70
103 | 7 1.53
104 | 7 3.88
105 | 7 2.88
106 | 7 2.07
107 | 8 6.57
108 | 8 5.08
109 | 8 5.95
110 | 8 7.51
111 | 8 3.79
112 | 8 4.33
113 | 8 3.70
114 | 8 5.21
115 | 8 4.48
116 | 8 5.17
117 | 8 4.69
118 | 8 2.12
119 | 9 4.13
120 | 9 7.31
121 | 9 4.47
122 | 9 2.53
123 | 9 3.96
124 | 9 5.30
125 | 9 2.66
126 | 9 4.12
127 | 9 3.54
128 | 9 2.98
129 | 9 5.08
130 | 9 5.15
131 | 


--------------------------------------------------------------------------------
/Systems.txt:
--------------------------------------------------------------------------------
  1 |  Aircraft    Failure      Days        DaysTransf
  2 |     1.          1.        194.          2.867876
  3 |     1.          2.         15.          1.718772
  4 |     1.          3.         41.          2.101632
  5 |     1.          4.         29.          1.961009
  6 |     1.          5.         33.          2.012347
  7 |     1.          6.        181.          2.828367
  8 |     2.          1.        413.          3.335723
  9 |     2.          2.         14.          1.695218
 10 |     2.          3.         58.          2.252608
 11 |     2.          4.         37.          2.058924
 12 |     2.          5.        100.          2.511886
 13 |     2.          6.         65.          2.304532
 14 |     2.          7.          9.          1.551846
 15 |     2.          8.        169.          2.789827
 16 |     2.          9.        447.          3.388921
 17 |     2.         10.        184.          2.837681
 18 |     2.         11.         36.          2.047673
 19 |     2.         12.        201.          2.888279
 20 |     2.         13.        118.          2.596429
 21 |     3.          1.         90.          2.459509
 22 |     3.          2.         10.          1.584893
 23 |     3.          3.         60.          2.267933
 24 |     3.          4.        186.          2.843823
 25 |     3.          5.         61.          2.275443
 26 |     3.          6.         49.          2.177906
 27 |     3.          7.         14.          1.695218
 28 |     3.          8.         24.          1.888175
 29 |     3.          9.         56.          2.236854
 30 |     3.         10.         20.          1.820564
 31 |     3.         11.         79.          2.396213
 32 |     3.         12.         84.          2.425805
 33 |     3.         13.         44.          2.131526
 34 |     3.         14.         59.          2.260322
 35 |     3.         15.         29.          1.961009
 36 |     3.         16.        118.          2.596429
 37 |     3.         17.         25.          1.903654
 38 |     3.         18.        156.          2.745522
 39 |     3.         19.        310.          3.149723
 40 |     3.         20.         76.          2.377731
 41 |     3.         21.         26.          1.918645
 42 |     3.         22.         44.          2.131526
 43 |     3.         23.         23.          1.872171
 44 |     3.         24.         62.          2.282855
 45 |     4.          1.         74.          2.365083
 46 |     4.          2.         57.          2.244786
 47 |     4.          3.         48.          2.168944
 48 |     4.          4.         29.          1.961009
 49 |     4.          5.        502.          3.468492
 50 |     4.          6.         12.          1.643752
 51 |     4.          7.         70.          2.338943
 52 |     4.          8.         21.          1.838416
 53 |     4.          9.         29.          1.961009
 54 |     4.         10.        386.          3.290921
 55 |     4.         11.         59.          2.260322
 56 |     4.         12.         27.          1.933182
 57 |     5.          1.         55.          2.228807
 58 |     5.          2.        320.          3.169786
 59 |     5.          3.         56.          2.236854
 60 |     5.          4.        104.          2.531668
 61 |     5.          5.        220.          2.940929
 62 |     5.          6.        239.          2.990058
 63 |     5.          7.         47.          2.15983 
 64 |     5.          8.        246.          3.007371
 65 |     5.          9.        176.          2.812565
 66 |     5.         10.        182.          2.831485
 67 |     5.         11.         33.          2.012347
 68 |     6.          1.         23.          1.872171
 69 |     6.          2.        261.          3.043183
 70 |     6.          3.         87.          2.44289 
 71 |     6.          4.          7.          1.475773
 72 |     6.          5.        120.          2.605171
 73 |     6.          6.         14.          1.695218
 74 |     6.          7.         62.          2.282855
 75 |     6.          8.         47.          2.15983 
 76 |     6.          9.        225.          2.954177
 77 |     6.         10.         71.          2.345588
 78 |     6.         11.        246.          3.007371
 79 |     6.         12.         21.          1.838416
 80 |     6.         13.         42.          2.111786
 81 |     6.         14.         20.          1.820564
 82 |     6.         15.          5.          1.37973 
 83 |     6.         16.         12.          1.643752
 84 |     6.         17.        120.          2.605171
 85 |     6.         18.         11.          1.615394
 86 |     6.         19.          3.          1.245731
 87 |     6.         20.         14.          1.695218
 88 |     6.         21.         71.          2.345588
 89 |     6.         22.         11.          1.615394
 90 |     6.         23.         14.          1.695218
 91 |     6.         24.         11.          1.615394
 92 |     6.         25.         16.          1.741101
 93 |     6.         26.         90.          2.459509
 94 |     6.         27.          1.          1.      
 95 |     6.         28.         16.          1.741101
 96 |     6.         29.         52.          2.203945
 97 |     6.         30.         95.          2.48625 
 98 |     7.          1.         97.          2.496631
 99 |     7.          2.         51.          2.195402
100 |     7.          3.         11.          1.615394
101 |     7.          4.          4.          1.319508
102 |     7.          5.        141.          2.690567
103 |     7.          6.         18.          1.782602
104 |     7.          7.        142.          2.694373
105 |     7.          8.         68.          2.325422
106 |     7.          9.         77.          2.383956
107 |     7.         10.         80.          2.402249
108 |     7.         11.          1.          1.      
109 |     7.         12.         16.          1.741101
110 |     7.         13.        106.          2.541331
111 |     7.         14.        206.          2.902508
112 |     7.         15.         82.          2.414142
113 |     7.         16.         54.          2.220643
114 |     7.         17.         31.          1.987341
115 |     7.         18.        216.          2.930156
116 |     7.         19.         46.          2.15056 
117 |     7.         20.        111.          2.564865
118 |     7.         21.         39.          2.080717
119 |     7.         22.         63.          2.290172
120 |     7.         23.         18.          1.782602
121 |     7.         24.        191.          2.858951
122 |     7.         25.         18.          1.782602
123 |     7.         26.        163.          2.769731
124 |     7.         27.         24.          1.888175
125 |     8.          1.         50.          2.186724
126 |     8.          2.         44.          2.131526
127 |     8.          3.        102.          2.521855
128 |     8.          4.         72.          2.352158
129 |     8.          5.         22.          1.855601
130 |     8.          6.         39.          2.080717
131 |     8.          7.          3.          1.245731
132 |     8.          8.         15.          1.718772
133 |     8.          9.        197.          2.876691
134 |     8.         10.        188.          2.849913
135 |     8.         11.         79.          2.396213
136 |     8.         12.         88.          2.44848 
137 |     8.         13.         46.          2.15056 
138 |     8.         14.          5.          1.37973 
139 |     8.         15.          5.          1.37973 
140 |     8.         16.         36.          2.047673
141 |     8.         17.         22.          1.855601
142 |     8.         18.        139.          2.682891
143 |     8.         19.        210.          2.913693
144 |     8.         20.         97.          2.496631
145 |     8.         21.         30.          1.97435 
146 |     8.         22.         23.          1.872171
147 |     8.         23.         13.          1.670278
148 |     8.         24.         14.          1.695218
149 |     9.          1.        359.          3.243537
150 |     9.          2.          9.          1.551846
151 |     9.          3.         12.          1.643752
152 |     9.          4.        270.          3.063887
153 |     9.          5.        603.          3.598019
154 |     9.          6.          3.          1.245731
155 |     9.          7.        104.          2.531668
156 |     9.          8.          2.          1.148698
157 |     9.          9.        438.          3.375164
158 |    10.          1.         50.          2.186724
159 |    10.          2.        254.          3.026682
160 |    10.          3.          5.          1.37973 
161 |    10.          4.        283.          3.092839
162 |    10.          5.         35.          2.036168
163 |    10.          6.         12.          1.643752
164 |    11.          1.        487.          3.447512
165 |    11.          2.         18.          1.782602
166 |    11.          3.        100.          2.511886
167 |    11.          4.          7.          1.475773
168 |    11.          5.         98.          2.501758
169 |    11.          6.          5.          1.37973 
170 |    11.          7.         85.          2.431553
171 |    11.          8.         91.          2.464951
172 |    11.          9.         43.          2.121747
173 |    11.         10.        230.          2.967191
174 |    11.         11.          3.          1.245731
175 |    11.         12.        130.          2.647212
176 |    12.          1.        102.          2.521855
177 |    12.          2.        209.          2.910913
178 |    12.          3.         14.          1.695218
179 |    12.          4.         57.          2.244786
180 |    12.          5.         54.          2.220643
181 |    12.          6.         32.          2.      
182 |    12.          7.         67.          2.318542
183 |    12.          8.         59.          2.260322
184 |    12.          9.        134.          2.663305
185 |    12.         10.        152.          2.731296
186 |    12.         11.         27.          1.933182
187 |    12.         12.         14.          1.695218
188 |    12.         13.        230.          2.967191
189 |    12.         14.         66.          2.311579
190 |    12.         15.         61.          2.275443
191 |    12.         16.         34.          2.024397
192 | 


--------------------------------------------------------------------------------
/hpd.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This code was taken form the PyMC library https://github.com/pymc-devs/pymc
 3 | """
 4 | 
 5 | import numpy as np
 6 | 
 7 | def calc_min_interval(x, alpha):
 8 |     """Internal method to determine the minimum interval of a given width
 9 |     Assumes that x is sorted numpy array.
10 |     """
11 | 
12 |     n = len(x)
13 |     cred_mass = 1.0-alpha
14 | 
15 |     interval_idx_inc = int(np.floor(cred_mass*n))
16 |     n_intervals = n - interval_idx_inc
17 |     interval_width = x[interval_idx_inc:] - x[:n_intervals]
18 | 
19 |     if len(interval_width) == 0:
20 |         raise ValueError('Too few elements for interval calculation')
21 | 
22 |     min_idx = np.argmin(interval_width)
23 |     hdi_min = x[min_idx]
24 |     hdi_max = x[min_idx+interval_idx_inc]
25 |     return hdi_min, hdi_max
26 | 
27 | 
28 | def hpd(x, alpha=0.05):
29 |     """Calculate highest posterior density (HPD) of array for given alpha. 
30 |     The HPD is the minimum width Bayesian credible interval (BCI).
31 |     :Arguments:
32 |         x : Numpy array
33 |         An array containing MCMC samples
34 |         alpha : float
35 |         Desired probability of type I error (defaults to 0.05)
36 | 
37 |     """
38 | 
39 |     # Make a copy of trace
40 |     x = x.copy()
41 |     # For multivariate node
42 |     if x.ndim > 1:
43 |         # Transpose first, then sort
44 |         tx = np.transpose(x, list(range(x.ndim))[1:]+[0])
45 |         dims = np.shape(tx)
46 |         # Container list for intervals
47 |         intervals = np.resize(0.0, dims[:-1]+(2,))
48 | 
49 |         for index in make_indices(dims[:-1]):
50 |             try:
51 |                 index = tuple(index)
52 |             except TypeError:
53 |                 pass
54 | 
55 |             # Sort trace
56 |             sx = np.sort(tx[index])
57 |             # Append to list
58 |             intervals[index] = calc_min_interval(sx, alpha)
59 |         # Transpose back before returning
60 |         return np.array(intervals)
61 |     else:
62 |         # Sort univariate node
63 |         sx = np.sort(x)
64 |         return np.array(calc_min_interval(sx, alpha))
65 | 


--------------------------------------------------------------------------------
/plot_post.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division
 2 | import numpy as np
 3 | from scipy import stats
 4 | import matplotlib.pyplot as plt
 5 | from hpd import hpd
 6 | 
 7 | 
 8 | def plot_post(param_sample_vec, cred_mass=0.95, comp_val=False,
 9 |               ROPE=False, ylab='', xlab='parameter', fontsize=14, labelsize=14,
10 |               title='', framealpha=1, facecolor='skyblue', edgecolor='white',
11 |               show_mode=True, bins=50):
12 | 
13 |     #compute HDI
14 |     HDI = hpd(param_sample_vec, 1-cred_mass)
15 | 
16 |     post_summary = {'mean':0,'median':0,'mode':0, 'hdi_mass':0,'hdi_low':0,
17 |                    'hdi_high':0, 'comp_val':0, 'pc_gt_comp_val':0, 'ROPE_low':0,
18 |                    'ROPE_high':0, 'pc_in_ROPE':0}
19 |     post_summary['mean'] = np.mean(param_sample_vec)
20 |     post_summary['median'] = np.median(param_sample_vec)
21 |     post_summary['mode'] = stats.mode(param_sample_vec)[0]
22 |     post_summary['hdi_mass'] = cred_mass
23 |     post_summary['hdi_low'] = HDI[0]
24 |     post_summary['hdi_high'] = HDI[1]
25 | 
26 |     # Plot histogram.
27 |     n, bins, patches = plt.hist(param_sample_vec, normed=True, bins=bins,
28 |                                 edgecolor=edgecolor, facecolor=facecolor)
29 |     plt.xlabel(xlab, fontsize=fontsize)
30 |     plt.ylabel(ylab, fontsize=fontsize)
31 |     plt.title(title, fontsize=fontsize)
32 | 
33 |     cv_ht = 0.75*np.max(n)
34 |     cen_tend_ht = 0.9 * cv_ht
35 |     ROPE_text_ht = 0.55 * cv_ht
36 | #    # Display mean or mode:
37 |     if show_mode:
38 |         plt.plot(0, label='mode = %.2f' % post_summary['mode'], alpha=0)
39 |     else:
40 |         plt.plot(0, label='mean = %.2f' % post_summary['mean'], alpha=0)
41 |     # Display the comparison value.
42 | 
43 |     if comp_val is not False:
44 |         pc_gt_comp_val = 100 * np.sum(param_sample_vec > comp_val)/len(param_sample_vec)
45 |         pc_lt_comp_val = 100 - pc_gt_comp_val
46 |         plt.plot([comp_val, comp_val], [0, cv_ht], color='darkgreen',
47 |                  linestyle='--', linewidth=2,
48 |                  label='%.1f%% <%.1f < %.1f%%'
49 |                  % (pc_lt_comp_val, comp_val, pc_gt_comp_val))
50 |         post_summary['comp_val'] = comp_val
51 |         post_summary['pc_gt_comp_val'] = pc_gt_comp_val
52 | #    # Display the ROPE.
53 |     if ROPE is not False:
54 |         rope_col = 'darkred'
55 |         pc_in_ROPE = round(np.sum((param_sample_vec > ROPE[0]) & (param_sample_vec < ROPE[1]))/len(param_sample_vec)*100)
56 |         plt.plot([ROPE[0], ROPE[0]], [0, 0.96*ROPE_text_ht], color=rope_col,
57 |                 linestyle=':', linewidth=4,
58 |                 label='%.1f%% in ROPE' % pc_in_ROPE)
59 |         plt.plot([ROPE[1], ROPE[1]], [0, 0.96*ROPE_text_ht], color=rope_col,
60 |                 linestyle=':', linewidth=4)
61 |         post_summary['ROPE_low'] = ROPE[0] 
62 |         post_summary['ROPE_high'] = ROPE[1] 
63 |         post_summary['pc_in_ROPE'] = pc_in_ROPE
64 | #    # Display the HDI.
65 |     plt.plot(HDI, [0, 0], linewidth=6, color='k', label='HDI %.1f%% %.3f-%.3f' % (cred_mass*100, HDI[0], HDI[1]))
66 |     plt.legend(loc='upper left', fontsize=labelsize, framealpha=framealpha)
67 |     frame = plt.gca()
68 |     frame.axes.get_yaxis().set_ticks([])
69 |     return post_summary
70 | 
71 | 


--------------------------------------------------------------------------------