├── Cauchy.py ├── Correlations.py ├── README.md └── screenshots ├── Cauchy.png ├── Cauchy_data.png ├── V1_e_corr.png ├── V2_correlations.png ├── V2_data.png ├── V2_e_corr.png ├── monte_carlo.png ├── prediction_cone.png └── traceplot.png /Cauchy.py: -------------------------------------------------------------------------------- 1 | import pymc3 as pm 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | 5 | def generateCauchyData(N, mean): 6 | 7 | return (np.random.standard_cauchy(N) * 100. + mean) 8 | 9 | data = generateCauchyData(10000, 42.) 10 | plt.plot(data) 11 | plt.show() 12 | 13 | def frequentistCenter(data): 14 | 15 | return np.mean(data) 16 | 17 | X = frequentistCenter(data) 18 | 19 | print("Sample mean: ", X) 20 | 21 | def bayesianCenter(data): 22 | 23 | with pm.Model(): 24 | loc = pm.Uniform('location', lower=-1000., upper=1000.) 25 | scale = pm.Uniform('scale', lower=0.01, upper=1000.) 26 | 27 | pm.Cauchy('y', alpha=loc, beta=scale, observed=data) 28 | 29 | trace = pm.sample(3000, tune=3000, target_accept=0.92) 30 | pm.traceplot(trace) 31 | plt.show() 32 | 33 | return np.mean(trace['location']) 34 | 35 | X2 = bayesianCenter(data) 36 | 37 | print("Bayesian mode (median, location): ", X2) 38 | -------------------------------------------------------------------------------- /Correlations.py: -------------------------------------------------------------------------------- 1 | import pymc3 as pm 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | 5 | # First we generate the data 6 | def generateDataV1(N): 7 | 8 | x = [1.0] 9 | y = [1.0] 10 | 11 | corr = 0.25 12 | 13 | for _ in range(N): 14 | 15 | x_innovation = np.random.normal(0., 0.1) 16 | x_tmp = x[-1] + x_innovation 17 | y_tmp = y[-1] + corr * x_innovation + np.random.normal(0., 0.1) 18 | 19 | x.append(x_tmp) 20 | y.append(y_tmp) 21 | 22 | return np.array(x), np.array(y) 23 | 24 | def generateDataV2(N): 25 | 26 | x = [1.0] 27 | y = [1.0] 28 | 29 | corr = 0.25 30 | 31 | corrs = [corr] 32 | for _ in range(N): 33 | 34 | x_innovation = np.random.normal(0., 0.1) 35 | x_tmp = x[-1] + x_innovation 36 | y_tmp = y[-1] + corr * x_innovation + np.random.normal(0., 0.1) 37 | 38 | x.append(x_tmp) 39 | y.append(y_tmp) 40 | 41 | corr += np.random.normal(0., 0.001) 42 | corrs.append(corr) 43 | 44 | return np.array(x), np.array(y), np.array(corrs) 45 | 46 | x, y = generateDataV1(100000) 47 | 48 | plt.plot(x) 49 | plt.plot(y) 50 | plt.title("V1 data (Constant correlation)") 51 | plt.show() 52 | 53 | x2, y2, corrs = generateDataV2(100000) 54 | 55 | plt.plot(x2) 56 | plt.plot(y2) 57 | plt.title("V2 data (Time-drifting correlation)") 58 | plt.show() 59 | 60 | plt.plot(corrs) 61 | plt.title("Correlation time series for V2") 62 | plt.show() 63 | 64 | # Now we model the V1 data, and examine the stability of the correlation 65 | import theano.tensor as tt 66 | with pm.Model() as model1: 67 | 68 | def custom_likelihood(x_diffs, y_obs_last, y_obs): 69 | 70 | # Model is: y(t) = y(t-1) + correlation * [x(t) - x(t-1)] 71 | expected = y_obs_last + corr * x_diffs 72 | return pm.Normal.dist(mu=expected, sd=0.01).logp(y_obs) 73 | 74 | step_size = pm.Uniform('step_size', lower=0.0, upper=1.) 75 | corr = pm.GaussianRandomWalk('corr', mu=0, sd=step_size, shape=100) 76 | corr = tt.repeat(corr, 1000) 77 | 78 | pm.DensityDist('obs', custom_likelihood, observed={ 79 | 'x_diffs': (x[:-1] - x[1:]), 80 | 'y_obs_last': y[1:], 81 | 'y_obs': y[:-1] 82 | }) 83 | 84 | mean_field = pm.fit(n=10000, method='advi', obj_optimizer=pm.adam(learning_rate=0.02)) 85 | trace = mean_field.sample(1000) 86 | 87 | estimated_corrs = np.median(trace['corr'], axis=0) 88 | 89 | plt.plot(estimated_corrs) 90 | plt.title("Estimated correlation for V1") 91 | plt.show() 92 | 93 | # Now we model the V2 data, and examine the stability of the correlation 94 | with pm.Model() as model2: 95 | 96 | def custom_likelihood(x_diffs, y_obs_last, y_obs): 97 | expected = y_obs_last + corr * x_diffs 98 | return pm.Normal.dist(mu=expected, sd=0.01).logp(y_obs) 99 | 100 | step_size = pm.Uniform('step_size', lower=0.0001, upper=1.) 101 | corr = pm.GaussianRandomWalk('corr', mu=0, sd=step_size, shape=100) 102 | corr = tt.repeat(corr, 1000) 103 | 104 | pm.DensityDist('obs', custom_likelihood, observed={ 105 | 'x_diffs': (x2[:-1] - x2[1:]), 106 | 'y_obs_last': y2[1:], 107 | 'y_obs': y2[:-1] 108 | }) 109 | 110 | mean_field = pm.fit(n=10000, method='advi', obj_optimizer=pm.adam(learning_rate=0.02)) 111 | trace = mean_field.sample(1000) 112 | 113 | pm.traceplot(trace) 114 | plt.show() 115 | 116 | estimated_corrs = np.repeat(np.median(trace['corr'], axis=0), 1000) 117 | plt.plot(estimated_corrs, color='r', label='Estimated') 118 | plt.plot(corrs, color='b', label='Actual') 119 | plt.legend(loc='upper left') 120 | plt.title("Estimated correlations for V2") 121 | plt.show() 122 | 123 | # Estimated parameters for the dynamic correlation (V2) 124 | estimated_step_size = np.median(trace['step_size']) 125 | estimated_last_value = estimated_corrs[-1] 126 | 127 | print("Estimated step size = ", estimated_step_size) 128 | 129 | # Generate monte carlo simulation for the next 100 time steps 130 | simulations = [] 131 | for _ in range(1000): 132 | sim = [estimated_last_value] 133 | for _ in range(100): 134 | sim.append(sim[-1] + np.random.normal(0, estimated_step_size)) 135 | 136 | simulations.append(np.repeat(sim, 1000)) 137 | plt.plot(sim) 138 | 139 | plt.title("Monte Carlo simulation of correlation for next 1000 points") 140 | plt.show() 141 | 142 | simulations = np.array(simulations) 143 | 144 | p75 = np.percentile(simulations, 75, axis=0) 145 | median = np.median(simulations, axis=0) 146 | p25 = np.percentile(simulations, 25, axis=0) 147 | 148 | estimatedMedian = np.concatenate((estimated_corrs, median)) 149 | estimated75 = np.concatenate((estimated_corrs, p75)) 150 | estimated25 = np.concatenate((estimated_corrs, p25)) 151 | 152 | plt.plot(corrs, color='b', label='Actual') 153 | plt.plot(estimatedMedian, color='g', label='Median estimate') 154 | plt.plot(estimated75, color='r', alpha=0.3, label='75th percentile') 155 | plt.plot(estimated25, color='r', alpha=0.3, label='25th percentile') 156 | 157 | plt.title("Prediction cone for correlation") 158 | plt.axvline(x=len(corrs), linestyle='--', color='r', alpha=0.5) 159 | plt.show() -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Companion code for my talk at the PyData meetup: "Introduction to Probabilistic Programming with PyMC3" 2 | 3 | My blog: http://www.simonouellette.com/ 4 | -------------------------------------------------------------------------------- /screenshots/Cauchy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SimonOuellette35/PyData-talk---Intro-to-PyMC3/827bb273dc58a9b4be46d487379400678964e21f/screenshots/Cauchy.png -------------------------------------------------------------------------------- /screenshots/Cauchy_data.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SimonOuellette35/PyData-talk---Intro-to-PyMC3/827bb273dc58a9b4be46d487379400678964e21f/screenshots/Cauchy_data.png -------------------------------------------------------------------------------- /screenshots/V1_e_corr.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SimonOuellette35/PyData-talk---Intro-to-PyMC3/827bb273dc58a9b4be46d487379400678964e21f/screenshots/V1_e_corr.png -------------------------------------------------------------------------------- /screenshots/V2_correlations.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SimonOuellette35/PyData-talk---Intro-to-PyMC3/827bb273dc58a9b4be46d487379400678964e21f/screenshots/V2_correlations.png -------------------------------------------------------------------------------- /screenshots/V2_data.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SimonOuellette35/PyData-talk---Intro-to-PyMC3/827bb273dc58a9b4be46d487379400678964e21f/screenshots/V2_data.png -------------------------------------------------------------------------------- /screenshots/V2_e_corr.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SimonOuellette35/PyData-talk---Intro-to-PyMC3/827bb273dc58a9b4be46d487379400678964e21f/screenshots/V2_e_corr.png -------------------------------------------------------------------------------- /screenshots/monte_carlo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SimonOuellette35/PyData-talk---Intro-to-PyMC3/827bb273dc58a9b4be46d487379400678964e21f/screenshots/monte_carlo.png -------------------------------------------------------------------------------- /screenshots/prediction_cone.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SimonOuellette35/PyData-talk---Intro-to-PyMC3/827bb273dc58a9b4be46d487379400678964e21f/screenshots/prediction_cone.png -------------------------------------------------------------------------------- /screenshots/traceplot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SimonOuellette35/PyData-talk---Intro-to-PyMC3/827bb273dc58a9b4be46d487379400678964e21f/screenshots/traceplot.png --------------------------------------------------------------------------------