├── Cauchy.py
├── Correlations.py
├── README.md
└── screenshots
    ├── Cauchy.png
    ├── Cauchy_data.png
    ├── V1_e_corr.png
    ├── V2_correlations.png
    ├── V2_data.png
    ├── V2_e_corr.png
    ├── monte_carlo.png
    ├── prediction_cone.png
    └── traceplot.png


/Cauchy.py:
--------------------------------------------------------------------------------
 1 | import pymc3 as pm
 2 | import numpy as np
 3 | import matplotlib.pyplot as plt
 4 | 
 5 | def generateCauchyData(N, mean):
 6 | 
 7 |     return (np.random.standard_cauchy(N) * 100. + mean)
 8 | 
 9 | data = generateCauchyData(10000, 42.)
10 | plt.plot(data)
11 | plt.show()
12 | 
13 | def frequentistCenter(data):
14 | 
15 |     return np.mean(data)
16 | 
17 | X = frequentistCenter(data)
18 | 
19 | print("Sample mean: ", X)
20 | 
21 | def bayesianCenter(data):
22 | 
23 |     with pm.Model():
24 |         loc = pm.Uniform('location', lower=-1000., upper=1000.)
25 |         scale = pm.Uniform('scale', lower=0.01, upper=1000.)
26 | 
27 |         pm.Cauchy('y', alpha=loc, beta=scale, observed=data)
28 | 
29 |         trace = pm.sample(3000, tune=3000, target_accept=0.92)
30 |         pm.traceplot(trace)
31 |         plt.show()
32 | 
33 |     return np.mean(trace['location'])
34 | 
35 | X2 = bayesianCenter(data)
36 | 
37 | print("Bayesian mode (median, location): ", X2)
38 | 


--------------------------------------------------------------------------------
/Correlations.py:
--------------------------------------------------------------------------------
  1 | import pymc3 as pm
  2 | import numpy as np
  3 | import matplotlib.pyplot as plt
  4 | 
  5 | # First we generate the data
  6 | def generateDataV1(N):
  7 | 
  8 |     x = [1.0]
  9 |     y = [1.0]
 10 | 
 11 |     corr = 0.25
 12 | 
 13 |     for _ in range(N):
 14 | 
 15 |         x_innovation = np.random.normal(0., 0.1)
 16 |         x_tmp = x[-1] + x_innovation
 17 |         y_tmp = y[-1] + corr * x_innovation + np.random.normal(0., 0.1)
 18 | 
 19 |         x.append(x_tmp)
 20 |         y.append(y_tmp)
 21 | 
 22 |     return np.array(x), np.array(y)
 23 | 
 24 | def generateDataV2(N):
 25 | 
 26 |     x = [1.0]
 27 |     y = [1.0]
 28 | 
 29 |     corr = 0.25
 30 | 
 31 |     corrs = [corr]
 32 |     for _ in range(N):
 33 | 
 34 |         x_innovation = np.random.normal(0., 0.1)
 35 |         x_tmp = x[-1] + x_innovation
 36 |         y_tmp = y[-1] + corr * x_innovation + np.random.normal(0., 0.1)
 37 | 
 38 |         x.append(x_tmp)
 39 |         y.append(y_tmp)
 40 | 
 41 |         corr += np.random.normal(0., 0.001)
 42 |         corrs.append(corr)
 43 | 
 44 |     return np.array(x), np.array(y), np.array(corrs)
 45 | 
 46 | x, y = generateDataV1(100000)
 47 | 
 48 | plt.plot(x)
 49 | plt.plot(y)
 50 | plt.title("V1 data (Constant correlation)")
 51 | plt.show()
 52 | 
 53 | x2, y2, corrs = generateDataV2(100000)
 54 | 
 55 | plt.plot(x2)
 56 | plt.plot(y2)
 57 | plt.title("V2 data (Time-drifting correlation)")
 58 | plt.show()
 59 | 
 60 | plt.plot(corrs)
 61 | plt.title("Correlation time series for V2")
 62 | plt.show()
 63 | 
 64 | # Now we model the V1 data, and examine the stability of the correlation
 65 | import theano.tensor as tt
 66 | with pm.Model() as model1:
 67 | 
 68 |     def custom_likelihood(x_diffs, y_obs_last, y_obs):
 69 | 
 70 |         # Model is: y(t) = y(t-1) + correlation * [x(t) - x(t-1)]
 71 |         expected = y_obs_last + corr * x_diffs
 72 |         return pm.Normal.dist(mu=expected, sd=0.01).logp(y_obs)
 73 | 
 74 |     step_size = pm.Uniform('step_size', lower=0.0, upper=1.)
 75 |     corr = pm.GaussianRandomWalk('corr', mu=0, sd=step_size, shape=100)
 76 |     corr = tt.repeat(corr, 1000)
 77 | 
 78 |     pm.DensityDist('obs', custom_likelihood, observed={
 79 |         'x_diffs': (x[:-1] - x[1:]),
 80 |         'y_obs_last': y[1:],
 81 |         'y_obs': y[:-1]
 82 |     })
 83 | 
 84 |     mean_field = pm.fit(n=10000, method='advi', obj_optimizer=pm.adam(learning_rate=0.02))
 85 |     trace = mean_field.sample(1000)
 86 | 
 87 | estimated_corrs = np.median(trace['corr'], axis=0)
 88 | 
 89 | plt.plot(estimated_corrs)
 90 | plt.title("Estimated correlation for V1")
 91 | plt.show()
 92 | 
 93 | # Now we model the V2 data, and examine the stability of the correlation
 94 | with pm.Model() as model2:
 95 | 
 96 |     def custom_likelihood(x_diffs, y_obs_last, y_obs):
 97 |         expected = y_obs_last + corr * x_diffs
 98 |         return pm.Normal.dist(mu=expected, sd=0.01).logp(y_obs)
 99 | 
100 |     step_size = pm.Uniform('step_size', lower=0.0001, upper=1.)
101 |     corr = pm.GaussianRandomWalk('corr', mu=0, sd=step_size, shape=100)
102 |     corr = tt.repeat(corr, 1000)
103 | 
104 |     pm.DensityDist('obs', custom_likelihood, observed={
105 |         'x_diffs': (x2[:-1] - x2[1:]),
106 |         'y_obs_last': y2[1:],
107 |         'y_obs': y2[:-1]
108 |     })
109 | 
110 |     mean_field = pm.fit(n=10000, method='advi', obj_optimizer=pm.adam(learning_rate=0.02))
111 |     trace = mean_field.sample(1000)
112 | 
113 |     pm.traceplot(trace)
114 |     plt.show()
115 | 
116 | estimated_corrs = np.repeat(np.median(trace['corr'], axis=0), 1000)
117 | plt.plot(estimated_corrs, color='r', label='Estimated')
118 | plt.plot(corrs, color='b', label='Actual')
119 | plt.legend(loc='upper left')
120 | plt.title("Estimated correlations for V2")
121 | plt.show()
122 | 
123 | # Estimated parameters for the dynamic correlation (V2)
124 | estimated_step_size = np.median(trace['step_size'])
125 | estimated_last_value = estimated_corrs[-1]
126 | 
127 | print("Estimated step size = ", estimated_step_size)
128 | 
129 | # Generate monte carlo simulation for the next 100 time steps
130 | simulations = []
131 | for _ in range(1000):
132 |     sim = [estimated_last_value]
133 |     for _ in range(100):
134 |         sim.append(sim[-1] + np.random.normal(0, estimated_step_size))
135 | 
136 |     simulations.append(np.repeat(sim, 1000))
137 |     plt.plot(sim)
138 | 
139 | plt.title("Monte Carlo simulation of correlation for next 1000 points")
140 | plt.show()
141 | 
142 | simulations = np.array(simulations)
143 | 
144 | p75 = np.percentile(simulations, 75, axis=0)
145 | median = np.median(simulations, axis=0)
146 | p25 = np.percentile(simulations, 25, axis=0)
147 | 
148 | estimatedMedian = np.concatenate((estimated_corrs, median))
149 | estimated75 = np.concatenate((estimated_corrs, p75))
150 | estimated25 = np.concatenate((estimated_corrs, p25))
151 | 
152 | plt.plot(corrs, color='b', label='Actual')
153 | plt.plot(estimatedMedian, color='g', label='Median estimate')
154 | plt.plot(estimated75, color='r', alpha=0.3, label='75th percentile')
155 | plt.plot(estimated25, color='r', alpha=0.3, label='25th percentile')
156 | 
157 | plt.title("Prediction cone for correlation")
158 | plt.axvline(x=len(corrs), linestyle='--', color='r', alpha=0.5)
159 | plt.show()


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | Companion code for my talk at the PyData meetup: "Introduction to Probabilistic Programming with PyMC3"
2 | 
3 | My blog: http://www.simonouellette.com/
4 | 


--------------------------------------------------------------------------------
/screenshots/Cauchy.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SimonOuellette35/PyData-talk---Intro-to-PyMC3/827bb273dc58a9b4be46d487379400678964e21f/screenshots/Cauchy.png


--------------------------------------------------------------------------------
/screenshots/Cauchy_data.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SimonOuellette35/PyData-talk---Intro-to-PyMC3/827bb273dc58a9b4be46d487379400678964e21f/screenshots/Cauchy_data.png


--------------------------------------------------------------------------------
/screenshots/V1_e_corr.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SimonOuellette35/PyData-talk---Intro-to-PyMC3/827bb273dc58a9b4be46d487379400678964e21f/screenshots/V1_e_corr.png


--------------------------------------------------------------------------------
/screenshots/V2_correlations.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SimonOuellette35/PyData-talk---Intro-to-PyMC3/827bb273dc58a9b4be46d487379400678964e21f/screenshots/V2_correlations.png


--------------------------------------------------------------------------------
/screenshots/V2_data.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SimonOuellette35/PyData-talk---Intro-to-PyMC3/827bb273dc58a9b4be46d487379400678964e21f/screenshots/V2_data.png


--------------------------------------------------------------------------------
/screenshots/V2_e_corr.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SimonOuellette35/PyData-talk---Intro-to-PyMC3/827bb273dc58a9b4be46d487379400678964e21f/screenshots/V2_e_corr.png


--------------------------------------------------------------------------------
/screenshots/monte_carlo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SimonOuellette35/PyData-talk---Intro-to-PyMC3/827bb273dc58a9b4be46d487379400678964e21f/screenshots/monte_carlo.png


--------------------------------------------------------------------------------
/screenshots/prediction_cone.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SimonOuellette35/PyData-talk---Intro-to-PyMC3/827bb273dc58a9b4be46d487379400678964e21f/screenshots/prediction_cone.png


--------------------------------------------------------------------------------
/screenshots/traceplot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SimonOuellette35/PyData-talk---Intro-to-PyMC3/827bb273dc58a9b4be46d487379400678964e21f/screenshots/traceplot.png


--------------------------------------------------------------------------------