├── .gitignore ├── LICENSE ├── README.md ├── bayesnewton ├── __init__.py ├── basemodels.py ├── cubature.py ├── inference.py ├── kernels.py ├── likelihoods.py ├── models.py ├── ops.py └── utils.py ├── data ├── TRI2TU-data.csv ├── alpha_time_points.csv ├── audio_training_textures_Heavy_rain_on_hard_surface.wav ├── banana_X_train ├── banana_Y_train ├── banana_large.csv ├── coal.txt ├── fission_col_data.csv ├── laqn_sites.csv ├── ligo.txt ├── mcycle.csv ├── olympicMarathonTimes.csv ├── stim23_bees_buzzing.wav └── stim35_boiling_water.wav ├── demos ├── 2d_binary.py ├── 2d_classification.py ├── 2d_log_gaussian_cox_process.py ├── air_quality.py ├── bulk_rna_seq.py ├── classification.py ├── gprn_sample.py ├── heteroscedastic.py ├── heteroscedastic_plotting.py ├── ligo.py ├── log_gaussian_cox_process.py ├── marathon.py ├── multiclass.py ├── multistage.py ├── positive.py ├── positive2d.py ├── product.py ├── regression.py ├── regression_multi.py ├── scrna_seq.py ├── speech.py ├── step.py ├── studentt.py ├── studentt_multi.py └── studentt_multi_positive.py ├── experiments ├── README.md ├── air_quality │ ├── air_quality_bayesnewton.py │ ├── air_quality_gpflow.py │ └── air_quality_ski.py ├── aircraft │ ├── aircraft.py │ └── aircraft_accidents.txt ├── audio │ ├── audio.py │ ├── audioNMF.py │ ├── audio_timings.py │ ├── audio_varyM.py │ └── speech_female.mat ├── banana │ └── banana.py ├── binary │ └── binary.py ├── coal │ ├── binned.csv │ ├── coal.py │ └── cvind.csv ├── electricity │ └── electricity.py ├── gprn │ ├── bn-ep.sh │ ├── bn-newton.sh │ ├── bn-pl.sh │ ├── bn-vi.sh │ ├── bn_gprn.py │ └── results_bn.py ├── motorcycle │ ├── bn-ep.sh │ ├── bn-newton.sh │ ├── bn-pl.sh │ ├── bn-vi.sh │ ├── cvind.csv │ ├── heteroscedastic_bn.py │ ├── motorcycle.py │ └── results_bn.py ├── nyc_crime │ ├── nyc_crime_bayesnewton.py │ └── nyc_crime_gpflow.py ├── product │ ├── bn-ep.sh │ ├── bn-newton.sh │ ├── bn-pl.sh │ ├── bn-vi.sh │ ├── bn_product.py │ ├── bn_product_plot.py │ └── results_bn.py ├── rainforest │ └── rainforest.py └── shutters │ └── shutters.py ├── requirements.txt ├── setup.py └── tests ├── normaliser_test.py ├── spatiotemporal_test.py ├── test_gp_vs_markovgp_class.py ├── test_gp_vs_markovgp_reg.py ├── test_gp_vs_markovgp_spacetime.py ├── test_sparsemarkov.py ├── test_vs_exact_marg_lik.py ├── test_vs_gpflow_class.py ├── test_vs_gpflow_reg.py ├── test_vs_gpflow_shutters.py ├── test_vs_gpflow_spacetime.py └── variational_expectations.py /.gitignore: -------------------------------------------------------------------------------- 1 | # This is a .gitignore file which aims to keep the git 2 | # repository tidy by preventing the inclusion of different 3 | # temporary or system files. 4 | 5 | # Exclude TeX temporary working files 6 | *.acn 7 | *.acr 8 | *.alg 9 | *.aux 10 | *.bbl 11 | *.blg 12 | *.dvi 13 | *.fdb_latexmk 14 | *.glg 15 | *.glo 16 | *.gls 17 | *.idx 18 | *.ilg 19 | *.ind 20 | *.ist 21 | *.lof 22 | *.log 23 | *.lot 24 | *.maf 25 | *.mtc 26 | *.mtc0 27 | *.nav 28 | *.nlo 29 | *.out 30 | *.pdfsync 31 | *.ps 32 | *.snm 33 | *.synctex.gz 34 | *.toc 35 | *.vrb 36 | *.xdy 37 | *.tdo 38 | *.dpth 39 | *.auxlock 40 | *.dep 41 | *.brf 42 | *.pdf 43 | 44 | # Exclude backup files freated by e.g. Matlab and Emacs 45 | *~ 46 | 47 | # Exclude system specific thumbnail and other folder metadata 48 | .DS_Store 49 | .DS_Store? 50 | ._* 51 | .Spotlight-V100 52 | .Trashes 53 | Icon? 54 | ehthumbs.db 55 | Thumbs.db 56 | 57 | setup.cfg 58 | dist/ 59 | 60 | # Exclude externalisation results from tikz 61 | tikz*.pdf 62 | *.md5 63 | *.spl 64 | 65 | *.lock 66 | *.bin 67 | *.iml 68 | 69 | .DS_Store 70 | newt/.idea* 71 | newt/__pycache__* 72 | 73 | **/.ipynb_checkpoints/* 74 | *.png 75 | *.pickle 76 | *.npy 77 | *.pyc 78 | 79 | data/aq_data.csv 80 | data/electricity.csv 81 | data/fission_normalized_counts.csv 82 | data/normalized_alpha_counts.csv 83 | data/air_quality_train_data_0.pickle 84 | 85 | 86 | notebooks/output/* 87 | experiments/rainforest/output/* 88 | experiments/motorcycle/output/* 89 | experiments/motorcycle/output_vardamp/* 90 | experiments/gprn/output/* 91 | experiments/coal/output/* 92 | experiments/banana/output/* 93 | experiments/binary/output/* 94 | experiments/audio/output/* 95 | experiments/airline/output/* 96 | experiments/aircraft/output/* 97 | experiments/electricity/output/* 98 | experiments/rainforest/output/* 99 | experiments/air_quality/output/* 100 | experiments/nyc_crime/output/* 101 | experiments/product/output/* 102 | experiments/air_quality/data/* 103 | experiments/nyc_crime/data/* 104 | 105 | experiments/rainforest/plots_for_will/* 106 | 107 | experiments/banana/banana0.slrm 108 | experiments/banana/banana1.slrm 109 | experiments/banana/banana2.slrm 110 | experiments/banana/banana3.slrm 111 | experiments/banana/banana4.slrm 112 | experiments/banana/banana_baseline0.slrm 113 | experiments/banana/banana_baseline1.slrm 114 | experiments/banana/banana_baseline2.slrm 115 | experiments/banana/banana_baseline3.slrm 116 | experiments/banana/banana_baseline4.slrm 117 | 118 | experiments/audio/audio0.slrm 119 | experiments/audio/audio1.slrm 120 | experiments/audio/audio2.slrm 121 | experiments/audio/audio3.slrm 122 | experiments/audio/audio4.slrm 123 | experiments/audio/audio5.slrm 124 | 125 | experiments/aircraft/aircraft0.slrm 126 | experiments/aircraft/aircraft1.slrm 127 | experiments/aircraft/aircraft2.slrm 128 | experiments/aircraft/aircraft3.slrm 129 | experiments/aircraft/aircraft4.slrm 130 | experiments/aircraft/aircraft5.slrm 131 | 132 | experiments/electricity/electricity0.slrm 133 | experiments/electricity/electricity1.slrm 134 | experiments/electricity/electricity2.slrm 135 | experiments/electricity/electricity3.slrm 136 | experiments/electricity/electricity4.slrm 137 | experiments/electricity/electricity5.slrm 138 | 139 | experiments/electricity/electricity.csv 140 | 141 | experiments/audio/audiovm0.slrm 142 | experiments/audio/audiovm1.slrm 143 | experiments/audio/audiovm2.slrm 144 | experiments/audio/audiovm3.slrm 145 | experiments/audio/audiovm4.slrm 146 | experiments/audio/audiovm5.slrm 147 | 148 | experiments/banana/create_txts.py 149 | experiments/banana/banana_varyM.py 150 | experiments/banana/results_varyM.py 151 | 152 | experiments/audio/fig1.png 153 | experiments/audio/fig2.png 154 | 155 | experiments/rainforest/plots_for_will 156 | 157 | newt.egg-info/* 158 | src.egg-info/* 159 | bayesnewton.egg-info/* 160 | 161 | .idea/ 162 | 163 | src/data/audio_training_textures_Heavy_rain_on_hard_surface.wav 164 | 165 | build/ 166 | -------------------------------------------------------------------------------- /bayesnewton/__init__.py: -------------------------------------------------------------------------------- 1 | from . import ( 2 | kernels, 3 | utils, 4 | ops, 5 | likelihoods, 6 | models, 7 | basemodels, 8 | inference, 9 | cubature 10 | ) 11 | 12 | 13 | def build_model(model, inf, name='GPModel'): 14 | return type(name, (inf, model), {}) 15 | -------------------------------------------------------------------------------- /data/audio_training_textures_Heavy_rain_on_hard_surface.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AaltoML/BayesNewton/f72ae9a6ca69f9cce8e62135c9f78dda4825b4df/data/audio_training_textures_Heavy_rain_on_hard_surface.wav -------------------------------------------------------------------------------- /data/coal.txt: -------------------------------------------------------------------------------- 1 | 1.8512026e+03 2 | 1.8516324e+03 3 | 1.8519692e+03 4 | 1.8519747e+03 5 | 1.8523142e+03 6 | 1.8523470e+03 7 | 1.8523580e+03 8 | 1.8523854e+03 9 | 1.8529767e+03 10 | 1.8531958e+03 11 | 1.8532286e+03 12 | 1.8533190e+03 13 | 1.8534997e+03 14 | 1.8541348e+03 15 | 1.8563963e+03 16 | 1.8565058e+03 17 | 1.8565387e+03 18 | 1.8566181e+03 19 | 1.8571383e+03 20 | 1.8574038e+03 21 | 1.8575818e+03 22 | 1.8580910e+03 23 | 1.8581540e+03 24 | 1.8584059e+03 25 | 1.8589452e+03 26 | 1.8601253e+03 27 | 1.8601691e+03 28 | 1.8605907e+03 29 | 1.8608508e+03 30 | 1.8609192e+03 31 | 1.8609713e+03 32 | 1.8611848e+03 33 | 1.8617379e+03 34 | 1.8618364e+03 35 | 1.8621376e+03 36 | 1.8628932e+03 37 | 1.8629370e+03 38 | 1.8631780e+03 39 | 1.8637940e+03 40 | 1.8639391e+03 41 | 1.8639856e+03 42 | 1.8654586e+03 43 | 1.8659706e+03 44 | 1.8660637e+03 45 | 1.8663402e+03 46 | 1.8664524e+03 47 | 1.8668330e+03 48 | 1.8669480e+03 49 | 1.8669507e+03 50 | 1.8676352e+03 51 | 1.8678542e+03 52 | 1.8678624e+03 53 | 1.8687495e+03 54 | 1.8689028e+03 55 | 1.8689877e+03 56 | 1.8692505e+03 57 | 1.8694422e+03 58 | 1.8695544e+03 59 | 1.8698090e+03 60 | 1.8698747e+03 61 | 1.8701239e+03 62 | 1.8705154e+03 63 | 1.8705592e+03 64 | 1.8706331e+03 65 | 1.8710274e+03 66 | 1.8711506e+03 67 | 1.8711670e+03 68 | 1.8717365e+03 69 | 1.8718159e+03 70 | 1.8721225e+03 71 | 1.8722402e+03 72 | 1.8727687e+03 73 | 1.8731355e+03 74 | 1.8742854e+03 75 | 1.8745455e+03 76 | 1.8748877e+03 77 | 1.8749808e+03 78 | 1.8753285e+03 79 | 1.8759254e+03 80 | 1.8759309e+03 81 | 1.8759309e+03 82 | 1.8769658e+03 83 | 1.8770643e+03 84 | 1.8771054e+03 85 | 1.8771903e+03 86 | 1.8777789e+03 87 | 1.8778090e+03 88 | 1.8781841e+03 89 | 1.8781951e+03 90 | 1.8782361e+03 91 | 1.8784333e+03 92 | 1.8786961e+03 93 | 1.8790356e+03 94 | 1.8791725e+03 95 | 1.8795010e+03 96 | 1.8800568e+03 97 | 1.8805387e+03 98 | 1.8806893e+03 99 | 1.8809439e+03 100 | 1.8811054e+03 101 | 1.8819678e+03 102 | 1.8821294e+03 103 | 1.8822964e+03 104 | 1.8822991e+03 105 | 1.8823347e+03 106 | 1.8828522e+03 107 | 1.8837967e+03 108 | 1.8838515e+03 109 | 1.8840732e+03 110 | 1.8848563e+03 111 | 1.8851684e+03 112 | 1.8854641e+03 113 | 1.8859788e+03 114 | 1.8866167e+03 115 | 1.8866934e+03 116 | 1.8867536e+03 117 | 1.8869206e+03 118 | 1.8871342e+03 119 | 1.8874052e+03 120 | 1.8882977e+03 121 | 1.8890507e+03 122 | 1.8891985e+03 123 | 1.8897926e+03 124 | 1.8901020e+03 125 | 1.8901896e+03 126 | 1.8912519e+03 127 | 1.8916653e+03 128 | 1.8926537e+03 129 | 1.8935079e+03 130 | 1.8944771e+03 131 | 1.8953176e+03 132 | 1.8960705e+03 133 | 1.8962841e+03 134 | 1.8963306e+03 135 | 1.8996297e+03 136 | 1.9013929e+03 137 | 1.9026715e+03 138 | 1.9050561e+03 139 | 1.9051875e+03 140 | 1.9055243e+03 141 | 1.9067728e+03 142 | 1.9081362e+03 143 | 1.9082704e+03 144 | 1.9086290e+03 145 | 1.9091273e+03 146 | 1.9098255e+03 147 | 1.9103566e+03 148 | 1.9109699e+03 149 | 1.9125195e+03 150 | 1.9137844e+03 151 | 1.9144086e+03 152 | 1.9166153e+03 153 | 1.9180308e+03 154 | 1.9225291e+03 155 | 1.9226769e+03 156 | 1.9235695e+03 157 | 1.9271615e+03 158 | 1.9281143e+03 159 | 1.9301540e+03 160 | 1.9307481e+03 161 | 1.9310767e+03 162 | 1.9318296e+03 163 | 1.9318843e+03 164 | 1.9320650e+03 165 | 1.9328645e+03 166 | 1.9328754e+03 167 | 1.9338830e+03 168 | 1.9347235e+03 169 | 1.9356434e+03 170 | 1.9356954e+03 171 | 1.9365962e+03 172 | 1.9374997e+03 173 | 1.9383539e+03 174 | 1.9398214e+03 175 | 1.9402183e+03 176 | 1.9404237e+03 177 | 1.9414203e+03 178 | 1.9415216e+03 179 | 1.9415736e+03 180 | 1.9420007e+03 181 | 1.9421294e+03 182 | 1.9424825e+03 183 | 1.9469452e+03 184 | 1.9470246e+03 185 | 1.9476188e+03 186 | 1.9476379e+03 187 | 1.9476872e+03 188 | 1.9514052e+03 189 | 1.9578830e+03 190 | 1.9604894e+03 191 | 1.9622197e+03 192 | -------------------------------------------------------------------------------- /data/fission_col_data.csv: -------------------------------------------------------------------------------- 1 | "","strain","minute","replicate","id" 2 | "GSM1368273","wt","0","r1","wt_0_r1" 3 | "GSM1368274","wt","0","r2","wt_0_r2" 4 | "GSM1368275","wt","0","r3","wt_0_r3" 5 | "GSM1368276","wt","15","r1","wt_15_r1" 6 | "GSM1368277","wt","15","r2","wt_15_r2" 7 | "GSM1368278","wt","15","r3","wt_15_r3" 8 | "GSM1368279","wt","30","r1","wt_30_r1" 9 | "GSM1368280","wt","30","r2","wt_30_r2" 10 | "GSM1368281","wt","30","r3","wt_30_r3" 11 | "GSM1368282","wt","60","r1","wt_60_r1" 12 | "GSM1368283","wt","60","r2","wt_60_r2" 13 | "GSM1368284","wt","60","r3","wt_60_r3" 14 | "GSM1368285","wt","120","r1","wt_120_r1" 15 | "GSM1368286","wt","120","r2","wt_120_r2" 16 | "GSM1368287","wt","120","r3","wt_120_r3" 17 | "GSM1368288","wt","180","r1","wt_180_r1" 18 | "GSM1368289","wt","180","r2","wt_180_r2" 19 | "GSM1368290","wt","180","r3","wt_180_r3" 20 | "GSM1368291","mut","0","r1","mut_0_r1" 21 | "GSM1368292","mut","0","r2","mut_0_r2" 22 | "GSM1368293","mut","0","r3","mut_0_r3" 23 | "GSM1368294","mut","15","r1","mut_15_r1" 24 | "GSM1368295","mut","15","r2","mut_15_r2" 25 | "GSM1368296","mut","15","r3","mut_15_r3" 26 | "GSM1368297","mut","30","r1","mut_30_r1" 27 | "GSM1368298","mut","30","r2","mut_30_r2" 28 | "GSM1368299","mut","30","r3","mut_30_r3" 29 | "GSM1368300","mut","60","r1","mut_60_r1" 30 | "GSM1368301","mut","60","r2","mut_60_r2" 31 | "GSM1368302","mut","60","r3","mut_60_r3" 32 | "GSM1368303","mut","120","r1","mut_120_r1" 33 | "GSM1368304","mut","120","r2","mut_120_r2" 34 | "GSM1368305","mut","120","r3","mut_120_r3" 35 | "GSM1368306","mut","180","r1","mut_180_r1" 36 | "GSM1368307","mut","180","r2","mut_180_r2" 37 | "GSM1368308","mut","180","r3","mut_180_r3" 38 | -------------------------------------------------------------------------------- /data/mcycle.csv: -------------------------------------------------------------------------------- 1 | 1,2.4,0 2 | 2,2.6,-1.3 3 | 3,3.2,-2.7 4 | 4,3.6,0 5 | 5,4,-2.7 6 | 6,6.2,-2.7 7 | 7,6.6,-2.7 8 | 8,6.8,-1.3 9 | 9,7.8,-2.7 10 | 10,8.2,-2.7 11 | 11,8.8,-1.3 12 | 12,8.8,-2.7 13 | 13,9.6,-2.7 14 | 14,10,-2.7 15 | 15,10.2,-5.4 16 | 16,10.6,-2.7 17 | 17,11,-5.4 18 | 18,11.4,0 19 | 19,13.2,-2.7 20 | 20,13.6,-2.7 21 | 21,13.8,0 22 | 22,14.6,-13.3 23 | 23,14.6,-5.4 24 | 24,14.6,-5.4 25 | 25,14.6,-9.3 26 | 26,14.6,-16 27 | 27,14.6,-22.8 28 | 28,14.8,-2.7 29 | 29,15.4,-22.8 30 | 30,15.4,-32.1 31 | 31,15.4,-53.5 32 | 32,15.4,-54.9 33 | 33,15.6,-40.2 34 | 34,15.6,-21.5 35 | 35,15.8,-21.5 36 | 36,15.8,-50.8 37 | 37,16,-42.9 38 | 38,16,-26.8 39 | 39,16.2,-21.5 40 | 40,16.2,-50.8 41 | 41,16.2,-61.7 42 | 42,16.4,-5.4 43 | 43,16.4,-80.4 44 | 44,16.6,-59 45 | 45,16.8,-71 46 | 46,16.8,-91.1 47 | 47,16.8,-77.7 48 | 48,17.6,-37.5 49 | 49,17.6,-85.6 50 | 50,17.6,-123.1 51 | 51,17.6,-101.9 52 | 52,17.8,-99.1 53 | 53,17.8,-104.4 54 | 54,18.6,-112.5 55 | 55,18.6,-50.8 56 | 56,19.2,-123.1 57 | 57,19.4,-85.6 58 | 58,19.4,-72.3 59 | 59,19.6,-127.2 60 | 60,20.2,-123.1 61 | 61,20.4,-117.9 62 | 62,21.2,-134 63 | 63,21.4,-101.9 64 | 64,21.8,-108.4 65 | 65,22,-123.1 66 | 66,23.2,-123.1 67 | 67,23.4,-128.5 68 | 68,24,-112.5 69 | 69,24.2,-95.1 70 | 70,24.2,-81.8 71 | 71,24.6,-53.5 72 | 72,25,-64.4 73 | 73,25,-57.6 74 | 74,25.4,-72.3 75 | 75,25.4,-44.3 76 | 76,25.6,-26.8 77 | 77,26,-5.4 78 | 78,26.2,-107.1 79 | 79,26.2,-21.5 80 | 80,26.4,-65.6 81 | 81,27,-16 82 | 82,27.2,-45.6 83 | 83,27.2,-24.2 84 | 84,27.2,9.5 85 | 85,27.6,4 86 | 86,28.2,12 87 | 87,28.4,-21.5 88 | 88,28.4,37.5 89 | 89,28.6,46.9 90 | 90,29.4,-17.4 91 | 91,30.2,36.2 92 | 92,31,75 93 | 93,31.2,8.1 94 | 94,32,54.9 95 | 95,32,48.2 96 | 96,32.8,46.9 97 | 97,33.4,16 98 | 98,33.8,45.6 99 | 99,34.4,1.3 100 | 100,34.8,75 101 | 101,35.2,-16 102 | 102,35.2,-54.9 103 | 103,35.4,69.6 104 | 104,35.6,34.8 105 | 105,35.6,32.1 106 | 106,36.2,-37.5 107 | 107,36.2,22.8 108 | 108,38,46.9 109 | 109,38,10.7 110 | 110,39.2,5.4 111 | 111,39.4,-1.3 112 | 112,40,-21.5 113 | 113,40.4,-13.3 114 | 114,41.6,30.8 115 | 115,41.6,-10.7 116 | 116,42.4,29.4 117 | 117,42.8,0 118 | 118,42.8,-10.7 119 | 119,43,14.7 120 | 120,44,-1.3 121 | 121,44.4,0 122 | 122,45,10.7 123 | 123,46.6,10.7 124 | 124,47.8,-26.8 125 | 125,47.8,-14.7 126 | 126,48.8,-13.3 127 | 127,50.6,0 128 | 128,52,10.7 129 | 129,53.2,-14.7 130 | 130,55,-2.7 131 | 131,55,10.7 132 | 132,55.4,-2.7 133 | 133,57.6,10.7 134 | -------------------------------------------------------------------------------- /data/olympicMarathonTimes.csv: -------------------------------------------------------------------------------- 1 | 1896,4.47083333333333 2 | 1900,4.46472925981123 3 | 1904,5.22208333333333 4 | 1908,4.1546786744085 5 | 1912,3.90331674958541 6 | 1920,3.5695126705653 7 | 1924,3.8245447722874 8 | 1928,3.62483706600308 9 | 1932,3.59284275388079 10 | 1936,3.53880791562981 11 | 1948,3.6701030927835 12 | 1952,3.39029110874116 13 | 1956,3.43642611683849 14 | 1960,3.2058300746534 15 | 1964,3.13275664573212 16 | 1968,3.32819844373346 17 | 1972,3.13583757949204 18 | 1976,3.07895880238575 19 | 1980,3.10581822490816 20 | 1984,3.06552909112454 21 | 1988,3.09357348817 22 | 1992,3.16111703598373 23 | 1996,3.14255243512264 24 | 2000,3.08527866650867 25 | 2004,3.1026582928467 26 | 2008,2.99877552632618 27 | 2012,3.03392977050993 -------------------------------------------------------------------------------- /data/stim23_bees_buzzing.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AaltoML/BayesNewton/f72ae9a6ca69f9cce8e62135c9f78dda4825b4df/data/stim23_bees_buzzing.wav -------------------------------------------------------------------------------- /data/stim35_boiling_water.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AaltoML/BayesNewton/f72ae9a6ca69f9cce8e62135c9f78dda4825b4df/data/stim35_boiling_water.wav -------------------------------------------------------------------------------- /demos/2d_binary.py: -------------------------------------------------------------------------------- 1 | import bayesnewton 2 | from bayesnewton.cubature import Unscented 3 | import objax 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | import matplotlib.cm as cm 7 | import time 8 | 9 | print('loading rainforest data ...') 10 | data = np.loadtxt('../data/TRI2TU-data.csv', delimiter=',') 11 | 12 | nr = 10 # spatial grid point (y-axis) 13 | nt = 20 # temporal grid points (x-axis) 14 | scale = 1000 / nt 15 | 16 | t, r, Y_ = bayesnewton.utils.discretegrid(data, [0, 1000, 0, 500], [nt, nr]) 17 | 18 | np.random.seed(99) 19 | N = nr * nt # number of data points 20 | 21 | # make binary for classification demo 22 | Y_ = np.sign(Y_ - np.mean(Y_)) 23 | Y_[Y_ == -1] = 0 24 | 25 | test_ind = np.random.permutation(N)[:N//4] 26 | Y = Y_.flatten() 27 | Y[test_ind] = np.nan 28 | Y = Y.reshape(nt, nr) 29 | 30 | # flatten for use in standard GP 31 | X = np.vstack((t.flatten(), r.flatten())).T 32 | Y_GP = Y.flatten() 33 | 34 | var_f = 1. # GP variance 35 | len_f = 10. # lengthscale 36 | 37 | markov = True 38 | 39 | lik = bayesnewton.likelihoods.Bernoulli() 40 | if markov: 41 | kern = bayesnewton.kernels.SpatialMatern32(variance=var_f, lengthscale=len_f, z=r[0, ...], sparse=False) 42 | # flattened data version 43 | # kern = bayesnewton.kernels.SpatialMatern32(variance=var_f, lengthscale=len_f, z=r[0, ...], sparse=True, opt_z=False) 44 | 45 | model = bayesnewton.models.MarkovVariationalGP(kernel=kern, likelihood=lik, X=t, R=r, Y=Y) 46 | # model = bayesnewton.models.MarkovVariationalGPMeanField(kernel=kern, likelihood=lik, X=t, R=r, Y=Y) 47 | else: 48 | kern = bayesnewton.kernels.SpatialMatern32(variance=var_f, lengthscale=len_f, z=r[0, ...], sparse=True, opt_z=False) 49 | model = bayesnewton.models.VariationalGP(kernel=kern, likelihood=lik, X=X, Y=Y_GP) 50 | 51 | lr_adam = 0.2 52 | lr_newton = 1. 53 | iters = 20 54 | opt_hypers = objax.optimizer.Adam(model.vars()) 55 | energy = objax.GradValues(model.energy, model.vars()) 56 | 57 | 58 | @objax.Function.with_vars(model.vars() + opt_hypers.vars()) 59 | def train_op(): 60 | model.inference(lr=lr_newton) # perform inference and update variational params 61 | dE, E = energy() # compute energy and its gradients w.r.t. hypers 62 | opt_hypers(lr_adam, dE) 63 | return E 64 | 65 | 66 | train_op = objax.Jit(train_op) 67 | 68 | t0 = time.time() 69 | for i in range(1, iters + 1): 70 | loss = train_op() 71 | print('iter %2d, energy: %1.4f' % (i, loss[0])) 72 | t1 = time.time() 73 | print('optimisation time: %2.2f secs' % (t1-t0)) 74 | 75 | # calculate posterior predictive distribution via filtering and smoothing at train & test locations: 76 | print('calculating the posterior predictive distribution ...') 77 | t0 = time.time() 78 | if markov: 79 | posterior_mean, posterior_var = model.predict(X=t, R=r) 80 | else: 81 | Xtest_GP = np.vstack((t.flatten(), r.flatten())).T 82 | posterior_mean, posterior_var = model.predict(X=Xtest_GP) 83 | posterior_mean = posterior_mean.reshape(nt, -1) 84 | t1 = time.time() 85 | print('prediction time: %2.2f secs' % (t1-t0)) 86 | 87 | link_fn = lik.link_fn 88 | 89 | print('plotting ...') 90 | cmap = cm.coolwarm 91 | plt.figure(1, figsize=(10, 5)) 92 | plt.plot(data[:, 0], data[:, 1], 'k.', markersize=2) 93 | plt.title('Tree locations') 94 | plt.xlim(0, 1000) 95 | plt.ylim(0, 500) 96 | plt.figure(2, figsize=(10, 5)) 97 | im = plt.imshow(Y_.T, cmap=cmap, extent=[0, 1000, 0, 500], origin='lower') 98 | cb = plt.colorbar(im, fraction=0.0235, pad=0.04) 99 | cb.set_ticks([cb.vmin, 0, cb.vmax]) 100 | cb.set_ticklabels([0., 0.5, 1.]) 101 | plt.title('Tree count data (full).') 102 | plt.figure(3, figsize=(10, 5)) 103 | im = plt.imshow(Y.T, cmap=cmap, extent=[0, 1000, 0, 500], origin='lower') 104 | cb = plt.colorbar(im, fraction=0.0235, pad=0.04) 105 | cb.set_ticks([cb.vmin, 0, cb.vmax]) 106 | cb.set_ticklabels([0., 0.5, 1.]) 107 | plt.title('Tree count data (with missing values).') 108 | plt.figure(4, figsize=(10, 5)) 109 | im = plt.imshow(link_fn(posterior_mean).T, cmap=cmap, extent=[0, 1000, 0, 500], origin='lower') 110 | cb = plt.colorbar(im, fraction=0.0235, pad=0.04) 111 | cb.set_ticks([cb.vmin, 0.5, cb.vmax]) 112 | cb.set_ticklabels([0., 0.5, 1.]) 113 | plt.xlim(0, 1000) 114 | plt.ylim(0, 500) 115 | plt.title('2D classification (rainforest tree data). Tree intensity per $m^2$.') 116 | plt.xlabel('first spatial dimension, $t$ (metres)') 117 | plt.ylabel('second spatial dimension, $r$ (metres)') 118 | plt.show() 119 | -------------------------------------------------------------------------------- /demos/2d_classification.py: -------------------------------------------------------------------------------- 1 | import bayesnewton 2 | import objax 3 | import numpy as np 4 | import matplotlib.pyplot as plt 5 | import matplotlib.cm as cm 6 | import time 7 | 8 | # --- small data set --- 9 | # inputs = np.loadtxt('../data/banana_X_train', delimiter=',') 10 | # X = inputs[:, :1] # temporal inputs (x-axis) 11 | # R = inputs[:, 1:] # spatial inputs (y-axis) 12 | # Y = np.loadtxt('../data/banana_Y_train')[:, None] # observations / labels 13 | 14 | # --- large data set --- 15 | inputs = np.loadtxt('../data/banana_large.csv', delimiter=',', skiprows=1) 16 | X = inputs[:, :1] # temporal inputs (x-axis) 17 | R = inputs[:, 1:2] # spatial inputs (y-axis) 18 | Y = np.maximum(inputs[:, 2:], 0) # observations / labels 19 | 20 | # Test points 21 | Xtest, Rtest = np.mgrid[-2.8:2.8:100j, -2.8:2.8:100j] 22 | Xtest_GP = np.vstack((Xtest.flatten(), Rtest.flatten())).T 23 | # X0test, X1test = np.linspace(-3., 3., num=100), np.linspace(-3., 3., num=100) 24 | 25 | Mt = 15 # num inducing points in time 26 | Ms = 15 # num inducing points in space 27 | batch_size = X.shape[0] 28 | Z = np.linspace(-3., 3., Mt)[:, None] # inducing points 29 | 30 | np.random.seed(99) 31 | N = X.shape[0] # number of training points 32 | 33 | var_f = 0.3 # GP variance 34 | len_time = 0.3 # temporal lengthscale 35 | len_space = 0.3 # spacial lengthscale 36 | 37 | markov = True 38 | 39 | kern = bayesnewton.kernels.SpatioTemporalMatern52(variance=var_f, 40 | lengthscale_time=len_time, 41 | lengthscale_space=len_space, 42 | z=np.linspace(-3, 3, Ms), 43 | sparse=True, 44 | opt_z=True, 45 | conditional='Full') 46 | lik = bayesnewton.likelihoods.Bernoulli(link='logit') 47 | if markov: 48 | # model = bayesnewton.models.MarkovVariationalGP(kernel=kern, likelihood=lik, X=X, R=R, Y=Y) 49 | # model = bayesnewton.models.MarkovVariationalGPMeanField(kernel=kern, likelihood=lik, X=X, R=R, Y=Y) 50 | model = bayesnewton.models.SparseMarkovVariationalGP(kernel=kern, likelihood=lik, X=X, R=R, Y=Y, Z=Z) 51 | # model = bayesnewton.models.SparseMarkovVariationalGPMeanField(kernel=kern, likelihood=lik, X=X, R=R, Y=Y, Z=Z) 52 | # model = bayesnewton.models.SparseInfiniteHorizonVariationalGP(kernel=kern, likelihood=lik, X=X, R=R, Y=Y, Z=Z) 53 | else: 54 | model = bayesnewton.models.VariationalGP(kernel=kern, likelihood=lik, X=inputs, Y=Y) # TODO: this model is not sparse 55 | 56 | lr_adam = 0.1 57 | lr_newton = 0.5 58 | iters = 25 59 | opt_hypers = objax.optimizer.Adam(model.vars()) 60 | energy = objax.GradValues(model.energy, model.vars()) 61 | 62 | 63 | @objax.Function.with_vars(model.vars() + opt_hypers.vars()) 64 | def train_op(): 65 | batch = np.random.permutation(N)[:batch_size] 66 | model.inference(lr=lr_newton, batch_ind=batch) # perform inference and update variational params 67 | dE, E = energy(batch_ind=batch) # compute energy and its gradients w.r.t. hypers 68 | opt_hypers(lr_adam, dE) 69 | return E 70 | 71 | 72 | train_op = objax.Jit(train_op) 73 | 74 | t0 = time.time() 75 | for i in range(1, iters + 1): 76 | loss = train_op() 77 | print('iter %2d, energy: %1.4f' % (i, loss[0])) 78 | t1 = time.time() 79 | print('optimisation time: %2.2f secs' % (t1-t0)) 80 | 81 | # calculate posterior predictive distribution via filtering and smoothing at train & test locations: 82 | print('calculating the posterior predictive distribution ...') 83 | t0 = time.time() 84 | if markov: 85 | posterior_mean, posterior_var = model.predict(X=Xtest, R=Rtest) 86 | else: 87 | posterior_mean, posterior_var = model.predict(X=Xtest_GP) 88 | Ntest = Xtest.shape[0] 89 | posterior_mean = posterior_mean.reshape(Ntest, -1) 90 | t1 = time.time() 91 | print('prediction time: %2.2f secs' % (t1-t0)) 92 | 93 | link_fn = lik.link_fn 94 | 95 | print('plotting ...') 96 | z_final = model.kernel.z.value.reshape(-1, 1) 97 | cmap = cm.coolwarm 98 | ax, fig = plt.subplots(1, figsize=(6, 6)) 99 | for label, mark in [[1, 'o'], [0, 'o']]: 100 | ind = Y[:, 0] == label 101 | plt.scatter(X[ind], R[ind], color=cmap(label - 0.01), s=50, alpha=.5, edgecolor='k') 102 | plt.contour(Xtest, Rtest, posterior_mean, levels=[.0], colors='k', linewidths=4.) 103 | # plt.axis('equal') 104 | plt.tick_params(axis='x', which='both', bottom=False, top=False, labelbottom=False) 105 | plt.tick_params(axis='y', which='both', right=False, left=False, labelleft=False) 106 | # ax.axis('off') 107 | ax = plt.gca() 108 | ax.axis('equal') 109 | ax.axis('square') 110 | lim = 2.8 111 | plt.xlim(-lim, lim) 112 | plt.ylim(-lim, lim) 113 | if markov: 114 | # plt.plot(Xtest[:, 0], np.tile(z_final, [1, Xtest.shape[0]]).T, '--', color='k', linewidth=1.) 115 | plt.plot((-lim+0.05) * np.ones_like(z_final), z_final, ">", color='k', markersize=6) 116 | if hasattr(model, 'Z'): 117 | plt.plot(model.Z.value[:, 0], 0.06-lim * np.ones_like(model.Z.value[:, 0]), 'k^', markersize=6) 118 | 119 | ax2, fig2 = plt.subplots(1, figsize=(6, 6)) 120 | im = plt.imshow(link_fn(posterior_mean).T, cmap=cmap, extent=[-lim, lim, -lim, lim], origin='lower') 121 | cb = plt.colorbar(im, fraction=0.046, pad=0.04) 122 | cb.set_ticks([cb.vmin, 0, cb.vmax]) 123 | cb.set_ticklabels([-1, 0, 1]) 124 | # plt.contour(Xtest, Rtest, mu, levels=[.0], colors='k', linewidths=1.5) 125 | # plt.axis('equal') 126 | for label in [1, 0]: 127 | ind = Y[:, 0] == label 128 | plt.scatter(X[ind], R[ind], color=cmap(label - 0.01), s=50, alpha=.25, edgecolor='k') 129 | plt.tick_params(axis='x', which='both', bottom=False, top=False, labelbottom=False) 130 | plt.tick_params(axis='y', which='both', right=False, left=False, labelleft=False) 131 | ax2 = plt.gca() 132 | ax2.axis('equal') 133 | ax2.axis('square') 134 | plt.xlim(-lim, lim) 135 | plt.ylim(-lim, lim) 136 | if markov: 137 | # plt.plot(Xtest[:, 0], np.tile(z_final, [1, Xtest.shape[0]]).T, '--', color='w', linewidth=1.) 138 | plt.plot((-lim+0.05) * np.ones_like(z_final), model.kernel.z.value, ">", color='w', markersize=6) 139 | if hasattr(model, 'Z'): 140 | plt.plot(model.Z.value[:, 0], 0.06-lim * np.ones_like(model.Z.value[:, 0]), 'w^', markersize=6) 141 | plt.show() 142 | -------------------------------------------------------------------------------- /demos/2d_log_gaussian_cox_process.py: -------------------------------------------------------------------------------- 1 | import bayesnewton 2 | import objax 3 | import numpy as np 4 | import matplotlib.pyplot as plt 5 | import matplotlib.cm as cm 6 | import time 7 | import tikzplotlib 8 | 9 | print('loading rainforest data ...') 10 | data = np.loadtxt('../data/TRI2TU-data.csv', delimiter=',') 11 | 12 | nr = 50 # spatial grid point (y-axis) 13 | nt = 100 # temporal grid points (x-axis) 14 | binsize = 1000 / nt 15 | 16 | t, r, Y_ = bayesnewton.utils.discretegrid(data, [0, 1000, 0, 500], [nt, nr]) 17 | t_flat, r_flat, Y_flat = t.flatten(), r.flatten(), Y_.flatten() 18 | 19 | N = nr * nt # number of data points 20 | 21 | np.random.seed(99) 22 | test_ind = np.random.permutation(N)[:N//10] 23 | t_test = t_flat[test_ind] 24 | r_test = r_flat[test_ind] 25 | Y_test = Y_flat[test_ind] 26 | Y_flat[test_ind] = np.nan 27 | Y = Y_flat.reshape(nt, nr) 28 | 29 | # put test points on a grid to speed up prediction 30 | X_test = np.concatenate([t_test[:, None], r_test[:, None]], axis=1) 31 | t_test, r_test, Y_test = bayesnewton.utils.create_spatiotemporal_grid(X_test, Y_test) 32 | 33 | var_f = 1. # GP variance 34 | len_f = 20. # lengthscale 35 | 36 | kern = bayesnewton.kernels.SpatialMatern32(variance=var_f, lengthscale=len_f, z=r[0, ...], sparse=False) 37 | # kern = bayesnewton.kernels.SpatialMatern32(variance=var_f, lengthscale=len_f, z=r[0, ...], sparse=True) 38 | lik = bayesnewton.likelihoods.Poisson(binsize=binsize) 39 | # lik = bayesnewton.likelihoods.Gaussian(variance=1) 40 | # model = bayesnewton.models.VariationalGP(kernel=kern, likelihood=lik, X=x, Y=Y) 41 | model = bayesnewton.models.MarkovVariationalGP(kernel=kern, likelihood=lik, X=t, R=r, Y=Y) 42 | # model = bayesnewton.models.MarkovVariationalGP(kernel=kern, likelihood=lik, X=t_flat, R=r_flat, Y=Y_flat) 43 | # model = bayesnewton.models.InfiniteHorizonVariationalGP(kernel=kern, likelihood=lik, X=t, R=r, Y=Y) 44 | # model = bayesnewton.models.MarkovVariationalGPMeanField(kernel=kern, likelihood=lik, X=t, R=r, Y=Y) 45 | 46 | lr_adam = 0.2 47 | lr_newton = 0.2 48 | iters = 10 49 | opt_hypers = objax.optimizer.Adam(model.vars()) 50 | energy = objax.GradValues(model.energy, model.vars()) 51 | 52 | 53 | @objax.Function.with_vars(model.vars() + opt_hypers.vars()) 54 | def train_op(): 55 | model.inference(lr=lr_newton) # perform inference and update variational params 56 | dE, E = energy() # compute energy and its gradients w.r.t. hypers 57 | opt_hypers(lr_adam, dE) 58 | test_nlpd_ = model.negative_log_predictive_density(X=t_test, R=r_test, Y=Y_test) 59 | return E, test_nlpd_ 60 | 61 | 62 | train_op = objax.Jit(train_op) 63 | 64 | t0 = time.time() 65 | for i in range(1, iters + 1): 66 | loss, test_nlpd = train_op() 67 | print('iter %2d, energy: %1.4f, nlpd: %1.4f' % (i, loss[0], test_nlpd)) 68 | t1 = time.time() 69 | print('optimisation time: %2.2f secs' % (t1-t0)) 70 | 71 | # calculate posterior predictive distribution via filtering and smoothing at train & test locations: 72 | print('calculating the posterior predictive distribution ...') 73 | t0 = time.time() 74 | posterior_mean, posterior_var = model.predict(X=t, R=r) 75 | # posterior_mean_y, posterior_var_y = model.predict_y(X=t, R=r) 76 | nlpd = model.negative_log_predictive_density(X=t_test, R=r_test, Y=Y_test) 77 | t1 = time.time() 78 | print('prediction time: %2.2f secs' % (t1-t0)) 79 | print('nlpd: %2.3f' % nlpd) 80 | 81 | link_fn = lik.link_fn 82 | 83 | print('plotting ...') 84 | cmap = cm.viridis 85 | plt.figure(1, figsize=(10, 5)) 86 | plt.plot(data[:, 0], data[:, 1], 'k.', markersize=2) 87 | plt.title('Tree locations') 88 | plt.xlim(0, 1000) 89 | plt.ylim(0, 500) 90 | plt.figure(2, figsize=(10, 5)) 91 | im = plt.imshow(Y_.T / binsize, cmap=cmap, extent=[0, 1000, 0, 500], origin='lower') 92 | plt.colorbar(im, fraction=0.0235, pad=0.04) 93 | plt.title('Tree count data (full).') 94 | plt.figure(3, figsize=(10, 5)) 95 | im = plt.imshow(Y.T / binsize, cmap=cmap, extent=[0, 1000, 0, 500], origin='lower') 96 | plt.colorbar(im, fraction=0.0235, pad=0.04) 97 | plt.title('Tree count data (with missing values).') 98 | plt.figure(4, figsize=(10, 5)) 99 | im = plt.imshow(link_fn(posterior_mean).T, cmap=cmap, extent=[0, 1000, 0, 500], origin='lower') 100 | # im = plt.imshow(posterior_mean_y.T, cmap=cmap, extent=[0, 1000, 0, 500], origin='lower') 101 | plt.colorbar(im, fraction=0.0235, pad=0.04) 102 | plt.xlim(0, 1000) 103 | plt.ylim(0, 500) 104 | # plt.title('2D log-Gaussian Cox process (rainforest tree data). Log-intensity shown.') 105 | plt.title('2D log-Gaussian Cox process (rainforest tree data). Tree intensity per $m^2$.') 106 | plt.xlabel('first spatial dimension, $t$ (metres)') 107 | plt.ylabel('second spatial dimension, $r$ (metres)') 108 | 109 | 110 | # plt.figure(5, figsize=(10, 5)) 111 | # plt.plot(data[:, 0], data[:, 1], 'k.', markersize=2) 112 | # bayesnewton.utils.bitmappify(plt.gca(), 200) 113 | # plt.xlabel('first spatial dimension, $t$ (metres)') 114 | # plt.ylabel('second spatial dimension, $\\Space$ (metres)') 115 | # plt.xlim(0, 1000) 116 | # plt.ylim(0, 500) 117 | # tikzplotlib.save('/Users/wilkinw1/postdoc/inprogress/ati-fcai/paper/icml2021/fig/tree_locations.tex', 118 | # axis_width='\\figurewidth', 119 | # axis_height='\\figureheight', 120 | # tex_relative_path_to_data='./fig/') 121 | # 122 | # plt.figure(6, figsize=(10, 5)) 123 | # im = plt.imshow(link_fn(posterior_mean).T, cmap=cmap, extent=[0, 1000, 0, 500], origin='lower') 124 | # plt.xlim(0, 1000) 125 | # plt.ylim(0, 500) 126 | # plt.xlabel('first spatial dimension, $t$ (metres)') 127 | # plt.ylabel('\\phantom{second spatial dimension, $\\Space$ (metres)}') 128 | # tikzplotlib.save('/Users/wilkinw1/postdoc/inprogress/ati-fcai/paper/icml2021/fig/tree_posterior.tex', 129 | # axis_width='\\figurewidth', 130 | # axis_height='\\figureheight', 131 | # tex_relative_path_to_data='./fig/') 132 | 133 | plt.show() 134 | -------------------------------------------------------------------------------- /demos/bulk_rna_seq.py: -------------------------------------------------------------------------------- 1 | import bayesnewton 2 | import objax 3 | import numpy as np 4 | import pandas as pd 5 | import matplotlib.pyplot as plt 6 | import time 7 | 8 | 9 | filename = '../data/fission_normalized_counts.csv' 10 | Y = pd.read_csv(filename, index_col=[0]) 11 | X = pd.read_csv('../data/fission_col_data.csv', index_col=[0]) 12 | X = X[['minute']] 13 | 14 | # extract time series for one gene 15 | genes_name = ['SPAC11D3.01c'] 16 | num = 18 17 | x, y = X.iloc[0:num, :].values, Y.iloc[:, 0:num].loc[genes_name].values.T 18 | 19 | # Test points 20 | # x_test = x 21 | x_plot = np.linspace(np.min(x)-5, np.max(x)+5, 200) 22 | # M = 15 23 | # z = np.linspace(np.min(x), np.max(x), M) 24 | 25 | var_f = 15.0 # GP variance 26 | len_f = 150.0 # GP lengthscale 27 | 28 | kern = bayesnewton.kernels.Matern72(variance=var_f, lengthscale=len_f) 29 | lik = bayesnewton.likelihoods.NegativeBinomial(alpha=1.0, scale=1.0) 30 | 31 | # model = bayesnewton.models.VariationalGP(kernel=kern, likelihood=lik, X=x, Y=y) 32 | model = bayesnewton.models.MarkovVariationalGP(kernel=kern, likelihood=lik, X=x, Y=y) 33 | # model = bayesnewton.models.MarkovVariationalGaussNewtonGP(kernel=kern, likelihood=lik, X=x, Y=y) 34 | # model = bayesnewton.models.MarkovVariationalQuasiNewtonGP(kernel=kern, likelihood=lik, X=x, Y=y) 35 | # model = bayesnewton.models.MarkovVariationalRiemannGP(kernel=kern, likelihood=lik, X=x, Y=y) 36 | # model = bayesnewton.models.MarkovExpectationPropagationGP(kernel=kern, likelihood=lik, X=x, Y=y, power=1.) 37 | # model = bayesnewton.models.MarkovExpectationPropagationQuasiNewtonGP(kernel=kern, likelihood=lik, X=x, Y=y, power=1.) 38 | # model = bayesnewton.models.MarkovExpectationPropagationRiemannGP(kernel=kern, likelihood=lik, X=x, Y=y, power=1.) 39 | 40 | lr_adam = 0.1 41 | lr_newton = 0.25 42 | iters = 500 43 | opt_hypers = objax.optimizer.Adam(model.vars()) 44 | energy = objax.GradValues(model.energy, model.vars()) 45 | 46 | 47 | @objax.Function.with_vars(model.vars() + opt_hypers.vars()) 48 | def train_op(): 49 | model.inference(lr=lr_newton) # perform inference and update variational params 50 | dE, E = energy() # compute energy and its gradients w.r.t. hypers 51 | opt_hypers(lr_adam, dE) 52 | return E 53 | 54 | 55 | train_op = objax.Jit(train_op) 56 | 57 | t0 = time.time() 58 | for i in range(1, iters + 1): 59 | loss = train_op() 60 | print('iter %2d, energy: %1.4f' % (i, loss[0])) 61 | t1 = time.time() 62 | print('optimisation time: %2.2f secs' % (t1-t0)) 63 | 64 | print(model.likelihood.alpha) 65 | print(model.kernel.variance) 66 | print(model.kernel.lengthscale) 67 | 68 | # calculate posterior predictive distribution via filtering and smoothing at train & test locations: 69 | print('calculating the posterior predictive distribution ...') 70 | t0 = time.time() 71 | # posterior_mean, posterior_var = model.predict(X=x_plot) 72 | posterior_mean_y, posterior_var_y = model.predict_y(X=x_plot) 73 | t1 = time.time() 74 | print('prediction time: %2.2f secs' % (t1-t0)) 75 | 76 | lb_y = posterior_mean_y - np.sqrt(posterior_var_y) 77 | ub_y = posterior_mean_y + np.sqrt(posterior_var_y) 78 | 79 | print('plotting ...') 80 | plt.figure(1, figsize=(10, 6)) 81 | plt.clf() 82 | plt.plot(x, y, 'b.', label='observations', clip_on=False) 83 | plt.plot(x_plot, posterior_mean_y, 'b', label='posterior mean') 84 | plt.fill_between(x_plot, lb_y, ub_y, color='b', alpha=0.05, label='posterior std') 85 | plt.xlim(x_plot[0], x_plot[-1]) 86 | plt.ylim(0.0) 87 | plt.legend() 88 | plt.title('') 89 | plt.xlabel('time') 90 | plt.ylabel('gene expression') 91 | plt.show() 92 | -------------------------------------------------------------------------------- /demos/classification.py: -------------------------------------------------------------------------------- 1 | import bayesnewton 2 | import objax 3 | import numpy as np 4 | import matplotlib.pyplot as plt 5 | import time 6 | 7 | np.random.seed(99) 8 | N = 500 # number of training points 9 | M = 20 10 | Nbatch = 100 11 | # x = 100 * np.random.rand(N) 12 | x0 = 40 * np.random.rand(N//2) 13 | x1 = 40 * np.random.rand(N//2) + 60 14 | x = np.concatenate([x0, np.array([50]), x1], axis=0) 15 | # x = np.linspace(np.min(x), np.max(x), N) 16 | f = lambda x_: 6 * np.sin(np.pi * x_ / 10.0) / (np.pi * x_ / 10.0 + 1) 17 | y_ = f(x) + np.sqrt(0.05)*np.random.randn(x.shape[0]) 18 | y = np.sign(y_) 19 | y[y == -1] = 0 20 | x_test = np.linspace(np.min(x)-5.0, np.max(x)+5.0, num=500) 21 | y_test = np.sign(f(x_test) + np.sqrt(0.05)*np.random.randn(x_test.shape[0])) 22 | y_test[y_test == -1] = 0 23 | x_plot = np.linspace(np.min(x)-10.0, np.max(x)+10.0, num=500) 24 | z = np.linspace(min(x), max(x), num=M) 25 | 26 | x = x[:, None] 27 | x_plot = x_plot[:, None] 28 | 29 | var_f = 1. # GP variance 30 | len_f = 5.0 # GP lengthscale 31 | 32 | kern = bayesnewton.kernels.Matern52(variance=var_f, lengthscale=len_f) 33 | lik = bayesnewton.likelihoods.Bernoulli(link='logit') 34 | 35 | # model = bayesnewton.models.VariationalGP(kernel=kern, likelihood=lik, X=x, Y=y) 36 | # model = bayesnewton.models.ExpectationPropagationGP(kernel=kern, likelihood=lik, X=x, Y=y, power=0.5) 37 | # model = bayesnewton.models.SparseVariationalGP(kernel=kern, likelihood=lik, X=x, Y=y, Z=z) 38 | model = bayesnewton.models.SparseExpectationPropagationGP(kernel=kern, likelihood=lik, X=x, Y=y, Z=z, power=0.5) 39 | # model = bayesnewton.models.MarkovVariationalGP(kernel=kern, likelihood=lik, X=x, Y=y) 40 | # model = bayesnewton.models.MarkovVariationalGaussNewtonGP(kernel=kern, likelihood=lik, X=x, Y=y) 41 | # model = bayesnewton.models.MarkovNewtonGP(kernel=kern, likelihood=lik, X=x, Y=y) 42 | # model = bayesnewton.models.MarkovQuasiNewtonGP(kernel=kern, likelihood=lik, X=x, Y=y, damped=True) 43 | # model = bayesnewton.models.MarkovVariationalQuasiNewtonGP(kernel=kern, likelihood=lik, X=x, Y=y) 44 | # model = bayesnewton.models.MarkovExpectationPropagationQuasiNewtonGP(kernel=kern, likelihood=lik, X=x, Y=y, power=0.5) 45 | # model = bayesnewton.models.MarkovExpectationPropagationRiemannGP(kernel=kern, likelihood=lik, X=x, Y=y) 46 | # model = bayesnewton.models.MarkovExpectationPropagationGP(kernel=kern, likelihood=lik, X=x, Y=y) 47 | # model = bayesnewton.models.MarkovPosteriorLinearisationGP(kernel=kern, likelihood=lik, X=x, Y=y) 48 | # model = bayesnewton.models.MarkovPosteriorLinearisation2ndOrderGP(kernel=kern, likelihood=lik, X=x, Y=y) 49 | # model = bayesnewton.models.MarkovPosteriorLinearisation2ndOrderQuasiNewtonGP(kernel=kern, likelihood=lik, X=x, Y=y) 50 | # model = bayesnewton.models.MarkovTaylorNewtonGP(kernel=kern, likelihood=lik, X=x, Y=y) 51 | # model = bayesnewton.models.MarkovLaplaceGP(kernel=kern, likelihood=lik, X=x, Y=y) 52 | # model = bayesnewton.models.InfiniteHorizonVariationalGP(kernel=kern, likelihood=lik, X=x, Y=y) 53 | # model = bayesnewton.models.SparseMarkovExpectationPropagationGP(kernel=kern, likelihood=lik, X=x, Y=y, Z=z) 54 | 55 | lr_adam = 0.05 56 | lr_newton = 1 57 | iters = 200 58 | opt_hypers = objax.optimizer.Adam(model.vars()) 59 | energy = objax.GradValues(model.energy, model.vars()) 60 | 61 | 62 | @objax.Function.with_vars(model.vars() + opt_hypers.vars()) 63 | def train_op(ind): 64 | model.inference(batch_ind=ind, lr=lr_newton) # perform inference and update variational params 65 | dE, E = energy(batch_ind=ind) # compute energy and its gradients w.r.t. hypers 66 | opt_hypers(lr_adam, dE) 67 | return E 68 | 69 | 70 | train_op = objax.Jit(train_op) 71 | 72 | t0 = time.time() 73 | for i in range(1, iters + 1): 74 | batch_ind = np.random.permutation(N)[:Nbatch] 75 | loss = train_op(batch_ind) 76 | print('iter %2d, energy: %1.4f' % (i, loss[0])) 77 | t1 = time.time() 78 | print('optimisation time: %2.2f secs' % (t1-t0)) 79 | 80 | # calculate posterior predictive distribution via filtering and smoothing at train & test locations: 81 | print('calculating the posterior predictive distribution ...') 82 | t0 = time.time() 83 | posterior_mean, posterior_var = model.predict(X=x_plot) 84 | nlpd = model.negative_log_predictive_density(X=x_test, Y=y_test) 85 | t1 = time.time() 86 | print('prediction time: %2.2f secs' % (t1-t0)) 87 | print('nlpd: %2.3f' % nlpd) 88 | lb = np.squeeze(posterior_mean) - 1.96 * np.squeeze(posterior_var) ** 0.5 89 | ub = np.squeeze(posterior_mean) + 1.96 * np.squeeze(posterior_var) ** 0.5 90 | link_fn = lik.link_fn 91 | 92 | print('plotting ...') 93 | plt.figure(1, figsize=(12, 5)) 94 | plt.clf() 95 | plt.plot(x, y, 'b+', label='training observations') 96 | plt.plot(x_test, y_test, 'r+', alpha=0.4, label='test observations') 97 | plt.plot(x_plot, link_fn(posterior_mean), 'm', label='posterior mean') 98 | plt.fill_between(x_plot[:, 0], link_fn(lb), link_fn(ub), color='m', alpha=0.05, label='95% confidence') 99 | if hasattr(model, 'Z'): 100 | plt.plot(model.Z.value[:, 0], +0.03 * np.ones_like(model.Z.value[:, 0]), 'm^', markersize=5) 101 | plt.xlim(x_plot[0], x_plot[-1]) 102 | plt.legend(loc=3) 103 | plt.title('GP classification.') 104 | plt.xlabel('$X$') 105 | plt.show() 106 | -------------------------------------------------------------------------------- /demos/heteroscedastic.py: -------------------------------------------------------------------------------- 1 | import bayesnewton 2 | import objax 3 | import numpy as np 4 | import matplotlib.pyplot as plt 5 | import time 6 | from sklearn.preprocessing import StandardScaler 7 | 8 | print('loading data ...') 9 | D = np.loadtxt('../data/mcycle.csv', delimiter=',') 10 | X = D[:, 1:2] 11 | Y = D[:, 2:] 12 | 13 | # Standardize 14 | X_scaler = StandardScaler().fit(X) 15 | y_scaler = StandardScaler().fit(Y) 16 | Xall = X_scaler.transform(X) 17 | Yall = y_scaler.transform(Y) 18 | x_plot = np.linspace(np.min(Xall)-0.2, np.max(Xall)+0.2, 200)[:, None] 19 | 20 | # Load cross-validation indices 21 | cvind = np.loadtxt('../experiments/motorcycle/cvind.csv').astype(int) 22 | 23 | # 10-fold cross-validation setup 24 | nt = np.floor(cvind.shape[0]/10).astype(int) 25 | cvind = np.reshape(cvind[:10*nt], (10, nt)) 26 | 27 | np.random.seed(123) 28 | fold = 3 29 | 30 | # Get training and test indices 31 | test = cvind[fold, :] 32 | train = np.setdiff1d(cvind, test) 33 | 34 | # Set training and test data 35 | X = Xall[train, :] 36 | Y = Yall[train, :] 37 | XT = Xall[test, :] 38 | YT = Yall[test, :] 39 | N = X.shape[0] 40 | M = 20 41 | batch_size = N # 100 42 | Z = np.linspace(np.min(Xall), np.max(Xall), M) 43 | 44 | var_f1 = 1. # GP variance 45 | len_f1 = 1. # GP lengthscale 46 | var_f2 = 1. # GP variance 47 | len_f2 = 1. # GP lengthscale 48 | 49 | kern1 = bayesnewton.kernels.Matern32(variance=var_f1, lengthscale=len_f1, fix_variance=True, fix_lengthscale=True) 50 | kern2 = bayesnewton.kernels.Matern32(variance=var_f2, lengthscale=len_f2, fix_variance=True, fix_lengthscale=True) 51 | kern = bayesnewton.kernels.Independent([kern1, kern2]) 52 | lik = bayesnewton.likelihoods.HeteroscedasticNoise() 53 | 54 | # model = bayesnewton.models.MarkovNewtonGP(kernel=kern, likelihood=lik, X=X, Y=Y) 55 | model = bayesnewton.models.MarkovVariationalGP(kernel=kern, likelihood=lik, X=X, Y=Y) 56 | # model = bayesnewton.models.MarkovExpectationPropagationGP(kernel=kern, likelihood=lik, X=X, Y=Y, power=0.5) 57 | # model = bayesnewton.models.MarkovPosteriorLinearisation2ndOrderGP(kernel=kern, likelihood=lik, X=X, Y=Y) 58 | # model = bayesnewton.models.InfiniteHorizonVariationalGP(kernel=kern, likelihood=lik, X=X, Y=Y) 59 | # model = bayesnewton.models.SparseInfiniteHorizonExpectationPropagationGP(kernel=kern, likelihood=lik, X=X, Y=Y) 60 | # --- Gauss-Newton --- 61 | # model = bayesnewton.models.MarkovGaussNewtonGP(kernel=kern, likelihood=lik, X=X, Y=Y) 62 | # model = bayesnewton.models.MarkovVariationalGaussNewtonGP(kernel=kern, likelihood=lik, X=X, Y=Y) 63 | # model = bayesnewton.models.MarkovPosteriorLinearisation2ndOrderGaussNewtonGP(kernel=kern, likelihood=lik, X=X, Y=Y) 64 | # --- quasi-Newton --- 65 | # model = bayesnewton.models.MarkovQuasiNewtonGP(kernel=kern, likelihood=lik, X=X, Y=Y) 66 | # model = bayesnewton.models.MarkovVariationalQuasiNewtonGP(kernel=kern, likelihood=lik, X=X, Y=Y) 67 | # model = bayesnewton.models.MarkovExpectationPropagationQuasiNewtonGP(kernel=kern, likelihood=lik, X=X, Y=Y, power=0.5) 68 | # model = bayesnewton.models.MarkovPosteriorLinearisation2ndOrderQuasiNewtonGP(kernel=kern, likelihood=lik, X=X, Y=Y) 69 | # --- Riemannian grads --- 70 | # model = bayesnewton.models.MarkovVariationalRiemannGP(kernel=kern, likelihood=lik, X=X, Y=Y) 71 | # model = bayesnewton.models.MarkovExpectationPropagationRiemannGP(kernel=kern, likelihood=lik, X=X, Y=Y, power=0.5) 72 | 73 | # --- GP --- 74 | # model = bayesnewton.models.NewtonGP(kernel=kern, likelihood=lik, X=X, Y=Y) 75 | # model = bayesnewton.models.VariationalGP(kernel=kern, likelihood=lik, X=X, Y=Y) 76 | # model = bayesnewton.models.ExpectationPropagationGP(kernel=kern, likelihood=lik, X=X, Y=Y) 77 | # model = bayesnewton.models.VariationalQuasiNewtonGP(kernel=kern, likelihood=lik, X=X, Y=Y) 78 | # --- sparse --- 79 | # model = bayesnewton.models.SparseVariationalGP(kernel=kern, likelihood=lik, X=X, Y=Y, Z=Z) 80 | # model = bayesnewton.models.SparseExpectationPropagationGP(kernel=kern, likelihood=lik, X=X, Y=Y, Z=Z) 81 | # --- sparse quasi-Newton --- 82 | # model = bayesnewton.models.SparseQuasiNewtonGP(kernel=kern, likelihood=lik, X=X, Y=Y, Z=Z) 83 | # model = bayesnewton.models.SparseVariationalQuasiNewtonGP(kernel=kern, likelihood=lik, X=X, Y=Y, Z=Z) 84 | # model = bayesnewton.models.SparseExpectationPropagationQuasiNewtonGP(kernel=kern, likelihood=lik, X=X, Y=Y, Z=Z, power=0.5) 85 | 86 | lr_adam = 0.01 87 | lr_newton = 0.3 88 | iters = 300 89 | opt_hypers = objax.optimizer.Adam(model.vars()) 90 | energy = objax.GradValues(model.energy, model.vars()) 91 | 92 | damping = 0.5 93 | 94 | 95 | @objax.Function.with_vars(model.vars() + opt_hypers.vars()) 96 | def train_op(): 97 | model.inference(lr=lr_newton, damping=damping) # perform inference and update variational params 98 | dE, E = energy() # compute energy and its gradients w.r.t. hypers 99 | # opt_hypers(lr_adam, dE) 100 | test_nlpd_ = model.negative_log_predictive_density(X=XT, Y=YT) 101 | return E, test_nlpd_ 102 | 103 | 104 | train_op = objax.Jit(train_op) 105 | 106 | t0 = time.time() 107 | for i in range(1, iters + 1): 108 | loss, test_nlpd = train_op() 109 | print('iter %2d, energy: %1.4f, nlpd: %1.4f' % (i, loss[0], test_nlpd)) 110 | t1 = time.time() 111 | print('optimisation time: %2.2f secs' % (t1-t0)) 112 | 113 | t0 = time.time() 114 | posterior_mean, posterior_var = model.predict(X=x_plot) 115 | nlpd = model.negative_log_predictive_density(X=XT, Y=YT) 116 | t1 = time.time() 117 | print('prediction time: %2.2f secs' % (t1-t0)) 118 | print('NLPD: %1.2f' % nlpd) 119 | 120 | print('avg cross cov:', np.mean(np.abs(posterior_var[:, 0, 1]))) 121 | 122 | x_pred = X_scaler.inverse_transform(x_plot) 123 | link = model.likelihood.link_fn 124 | lb = posterior_mean[:, 0] - np.sqrt(posterior_var[:, 0, 0] + link(posterior_mean[:, 1]) ** 2) * 1.96 125 | ub = posterior_mean[:, 0] + np.sqrt(posterior_var[:, 0, 0] + link(posterior_mean[:, 1]) ** 2) * 1.96 126 | post_mean = y_scaler.inverse_transform(posterior_mean[:, 0:1]) 127 | lb = y_scaler.inverse_transform(lb[:, None])[:, 0] 128 | ub = y_scaler.inverse_transform(ub[:, None])[:, 0] 129 | 130 | print('plotting ...') 131 | plt.figure(1, figsize=(12, 5)) 132 | plt.clf() 133 | plt.plot(X_scaler.inverse_transform(X), y_scaler.inverse_transform(Y), 'k.', label='train') 134 | plt.plot(X_scaler.inverse_transform(XT), y_scaler.inverse_transform(YT), 'r.', label='test') 135 | plt.plot(x_pred, post_mean, 'c', label='posterior mean') 136 | plt.fill_between(x_pred[:, 0], lb, ub, color='c', alpha=0.05, label='95% confidence') 137 | plt.xlim(x_pred[0], x_pred[-1]) 138 | if hasattr(model, 'Z'): 139 | plt.plot(X_scaler.inverse_transform(model.Z.value[:, 0]), 140 | (np.min(lb)-5)*np.ones_like(model.Z.value[:, 0]), 141 | 'c^', 142 | markersize=4) 143 | plt.legend() 144 | plt.title('Heteroscedastic Noise Model (motorcycle crash data)') 145 | plt.xlabel('time (milliseconds)') 146 | plt.ylabel('accelerometer reading') 147 | plt.show() 148 | -------------------------------------------------------------------------------- /demos/ligo.py: -------------------------------------------------------------------------------- 1 | import bayesnewton 2 | import objax 3 | import numpy as np 4 | import matplotlib.pyplot as plt 5 | import time 6 | 7 | 8 | # load graviational wave data 9 | data = np.loadtxt('../data/ligo.txt') # https://www.gw-openscience.org/events/GW150914/ 10 | 11 | 12 | np.random.seed(12345) 13 | x = data[:, 0] 14 | y = data[:, 1] 15 | x_test = x 16 | y_test = y 17 | x_plot = x 18 | 19 | var_f = 1.0 # GP variance 20 | len_f = 0.1 # GP lengthscale 21 | var_y = 0.01 # observation noise 22 | 23 | kern = bayesnewton.kernels.Matern12(variance=var_f, lengthscale=len_f) 24 | lik = bayesnewton.likelihoods.Gaussian(variance=var_y) # , fix_variance=True) 25 | model = bayesnewton.models.MarkovVariationalGP(kernel=kern, likelihood=lik, X=x, Y=y) 26 | 27 | lr_adam = 0.1 28 | lr_newton = 1 29 | iters = 100 30 | opt_hypers = objax.optimizer.Adam(model.vars()) 31 | energy = objax.GradValues(model.energy, model.vars()) 32 | inf_args = { 33 | "power": 0.5, # the EP power 34 | } 35 | 36 | 37 | @objax.Function.with_vars(model.vars() + opt_hypers.vars()) 38 | def train_op(): 39 | model.inference(lr=lr_newton, **inf_args) # perform inference and update variational params 40 | dE, E = energy(**inf_args) # compute energy and its gradients w.r.t. hypers 41 | opt_hypers(lr_adam, dE) 42 | return E 43 | 44 | 45 | train_op = objax.Jit(train_op) 46 | 47 | t0 = time.time() 48 | for i in range(1, iters + 1): 49 | loss = train_op() 50 | print('iter %2d, energy: %1.4f' % (i, loss[0])) 51 | t1 = time.time() 52 | print('optimisation time: %2.2f secs' % (t1-t0)) 53 | 54 | t0 = time.time() 55 | posterior_mean, posterior_var = model.predict_y(X=x_plot) 56 | nlpd = model.negative_log_predictive_density(X=x_test, Y=y_test) 57 | t1 = time.time() 58 | print('prediction time: %2.2f secs' % (t1-t0)) 59 | print('nlpd: %2.3f' % nlpd) 60 | lb = posterior_mean - 1.96 * posterior_var ** 0.5 61 | ub = posterior_mean + 1.96 * posterior_var ** 0.5 62 | 63 | print('plotting ...') 64 | plt.figure(1, figsize=(12, 5)) 65 | plt.clf() 66 | plt.plot(x, y, 'k.', label='training observations') 67 | plt.plot(x_test, y_test, 'r.', alpha=0.4, label='test observations') 68 | plt.plot(x_plot, posterior_mean, 'b', label='posterior mean') 69 | # plt.plot(x_plot, posterior_samples.T, 'b', alpha=0.2) 70 | plt.fill_between(x_plot, lb, ub, color='b', alpha=0.05, label='95% confidence') 71 | plt.xlim([x_plot[0], x_plot[-1]]) 72 | if hasattr(model, 'Z'): 73 | plt.plot(model.Z.value[:, 0], -2 * np.ones_like(model.Z.value[:, 0]), 'b^', markersize=5) 74 | # plt.xlim([x_test[0], x_test[-1]]) 75 | # plt.ylim([-2, 5]) 76 | plt.legend() 77 | plt.title('GP regression') 78 | plt.xlabel('$X$') 79 | plt.show() 80 | -------------------------------------------------------------------------------- /demos/log_gaussian_cox_process.py: -------------------------------------------------------------------------------- 1 | import bayesnewton 2 | import objax 3 | import numpy as np 4 | import pandas as pd 5 | import matplotlib.pyplot as plt 6 | import time 7 | 8 | print('loading coal data ...') 9 | disaster_timings = pd.read_csv('../data/coal.txt', header=None).values[:, 0] 10 | 11 | # Discretization 12 | num_time_bins = 200 13 | # Discretize the data 14 | x = np.linspace(min(disaster_timings), max(disaster_timings), num_time_bins).T 15 | y = np.histogram(disaster_timings, np.concatenate([[-1e10], x[:-1] + np.diff(x)/2, [1e10]]))[0][:, None] 16 | # Test points 17 | x_test = x 18 | x_plot = np.linspace(np.min(x_test)-5, np.max(x_test)+5, 200) 19 | M = 15 20 | z = np.linspace(np.min(x), np.max(x), M) 21 | 22 | x = x[:, None] 23 | 24 | meanval = np.log(len(disaster_timings)/num_time_bins) # TODO: incorporate mean 25 | binsize = (max(x) - min(x)) / num_time_bins 26 | 27 | var_f = 1.0 # GP variance 28 | len_f = 4. # GP lengthscale 29 | 30 | kern = bayesnewton.kernels.Matern52(variance=var_f, lengthscale=len_f) 31 | lik = bayesnewton.likelihoods.Poisson(binsize=binsize, link='logistic') 32 | 33 | # model = bayesnewton.models.VariationalGP(kernel=kern, likelihood=lik, X=x, Y=y) 34 | # model = bayesnewton.models.SparseMarkovVariationalGP(kernel=kern, likelihood=lik, X=x, Y=y, Z=z) 35 | # model = bayesnewton.models.MarkovVariationalGP(kernel=kern, likelihood=lik, X=x, Y=y) 36 | # model = bayesnewton.models.MarkovExpectationPropagationGP(kernel=kern, likelihood=lik, X=x, Y=y) 37 | # model = bayesnewton.models.MarkovVariationalGaussNewtonGP(kernel=kern, likelihood=lik, X=x, Y=y) 38 | # model = bayesnewton.models.MarkovVariationalQuasiNewtonGP(kernel=kern, likelihood=lik, X=x, Y=y) 39 | model = bayesnewton.models.MarkovExpectationPropagationQuasiNewtonGP(kernel=kern, likelihood=lik, X=x, Y=y) 40 | # model = bayesnewton.models.MarkovGaussNewtonGP(kernel=kern, likelihood=lik, X=x, Y=y) 41 | # model = bayesnewton.models.MarkovTaylorGP(kernel=kern, likelihood=lik, X=x, Y=y) 42 | # model = bayesnewton.models.MarkovTaylorNewtonGP(kernel=kern, likelihood=lik, X=x, Y=y) 43 | # model = bayesnewton.models.MarkovLaplaceGP(kernel=kern, likelihood=lik, X=x, Y=y) 44 | 45 | lr_adam = 0.1 46 | lr_newton = 1 47 | iters = 100 48 | opt_hypers = objax.optimizer.Adam(model.vars()) 49 | energy = objax.GradValues(model.energy, model.vars()) 50 | 51 | 52 | @objax.Function.with_vars(model.vars() + opt_hypers.vars()) 53 | def train_op(): 54 | model.inference(lr=lr_newton) # perform inference and update variational params 55 | dE, E = energy() # compute energy and its gradients w.r.t. hypers 56 | opt_hypers(lr_adam, dE) 57 | return E 58 | 59 | 60 | train_op = objax.Jit(train_op) 61 | 62 | t0 = time.time() 63 | for i in range(1, iters + 1): 64 | loss = train_op() 65 | print('iter %2d, energy: %1.4f' % (i, loss[0])) 66 | t1 = time.time() 67 | print('optimisation time: %2.2f secs' % (t1-t0)) 68 | 69 | # calculate posterior predictive distribution via filtering and smoothing at train & test locations: 70 | print('calculating the posterior predictive distribution ...') 71 | t0 = time.time() 72 | posterior_mean, posterior_var = model.predict(X=x_plot) 73 | # posterior_mean_y, posterior_var_y = model.predict_y(X=x_plot) 74 | t1 = time.time() 75 | print('prediction time: %2.2f secs' % (t1-t0)) 76 | 77 | link_fn = lik.link_fn 78 | 79 | post_mean_lgcp = link_fn(posterior_mean + posterior_var / 2) 80 | lb_lgcp = link_fn(posterior_mean - np.sqrt(posterior_var) * 1.645) 81 | ub_lgcp = link_fn(posterior_mean + np.sqrt(posterior_var) * 1.645) 82 | 83 | # lb_y = posterior_mean_y - np.sqrt(posterior_var_y) 84 | # ub_y = posterior_mean_y + np.sqrt(posterior_var_y) 85 | 86 | print('plotting ...') 87 | plt.figure(1, figsize=(12, 5)) 88 | plt.clf() 89 | plt.plot(disaster_timings, 0*disaster_timings, 'k+', label='observations', clip_on=False) 90 | plt.plot(x_plot, post_mean_lgcp, 'g', label='posterior mean') 91 | # plt.plot(x_plot, posterior_mean_y, 'r', label='posterior mean (y)') 92 | plt.fill_between(x_plot, lb_lgcp, ub_lgcp, color='g', alpha=0.05, label='95% confidence') 93 | # plt.fill_between(x_plot, lb_y, ub_y, color='r', alpha=0.05, label='1 std (y)') 94 | plt.xlim(x_plot[0], x_plot[-1]) 95 | plt.ylim(0.0) 96 | plt.legend() 97 | plt.title('log-Gaussian Cox process via Kalman smoothing (coal mining disasters)') 98 | plt.xlabel('year') 99 | plt.ylabel('accident intensity') 100 | plt.show() 101 | -------------------------------------------------------------------------------- /demos/marathon.py: -------------------------------------------------------------------------------- 1 | import bayesnewton 2 | import objax 3 | import numpy as np 4 | 5 | # import pods 6 | import matplotlib.pyplot as plt 7 | import time 8 | 9 | 10 | # data = pods.datasets.olympic_marathon_men() 11 | data = np.loadtxt("../data/olympicMarathonTimes.csv", delimiter=",") 12 | x = data[:, :1] 13 | y = data[:, 1:] 14 | 15 | x_train = x[:-2, :] 16 | y_train = y[:-2, :] 17 | 18 | x_test = x[-2:, :] 19 | y_test = y[-2:, :] 20 | 21 | offset = y_train.mean() 22 | scale = np.sqrt(y_train.var()) 23 | 24 | # remove outlier 25 | # y[2] = np.nan 26 | 27 | xlim = (1875, 2030) 28 | ylim = (2.5, 6.5) 29 | yhat = (y_train - offset) / scale 30 | 31 | np.random.seed(12345) 32 | x_plot = np.linspace(xlim[0], xlim[1], 200)[:, None] 33 | 34 | var_f = 1.0 # GP variance 35 | len_f = 40 # GP lengthscale 36 | var_y = 0.5 # observation noise 37 | 38 | kern = bayesnewton.kernels.SquaredExponential(variance=var_f, lengthscale=len_f) 39 | lik = bayesnewton.likelihoods.Gaussian(variance=var_y) 40 | model = bayesnewton.models.VariationalGP(kernel=kern, likelihood=lik, X=x_train, Y=yhat) 41 | 42 | 43 | lr_adam = 0.1 44 | lr_newton = 1.0 45 | iters = 100 46 | opt_hypers = objax.optimizer.Adam(model.vars()) 47 | energy = objax.GradValues(model.energy, model.vars()) 48 | 49 | 50 | @objax.Function.with_vars(model.vars() + opt_hypers.vars()) 51 | def train_op(): 52 | model.inference(lr=lr_newton) # perform inference and update variational params 53 | dE, E = energy() # compute energy and its gradients w.r.t. hypers 54 | opt_hypers(lr_adam, dE) 55 | return E 56 | 57 | 58 | train_op = objax.Jit(train_op) 59 | 60 | t0 = time.time() 61 | for i in range(1, iters + 1): 62 | loss = train_op() 63 | print("iter %2d, energy: %1.4f" % (i, loss[0])) 64 | t1 = time.time() 65 | print("optimisation time: %2.2f secs" % (t1 - t0)) 66 | 67 | t0 = time.time() 68 | posterior_mean, posterior_var = model.predict_y(X=x_plot) 69 | t1 = time.time() 70 | print("prediction time: %2.2f secs" % (t1 - t0)) 71 | lb = posterior_mean - 2 * posterior_var**0.5 72 | ub = posterior_mean + 2 * posterior_var**0.5 73 | 74 | print("plotting ...") 75 | plt.figure(1, figsize=(8, 4)) 76 | plt.clf() 77 | plt.plot(x_train, y_train, "k.", label="training observations") 78 | plt.plot(x_test, y_test, "gx", label="held out observations") 79 | plt.plot(x_plot, posterior_mean * scale + offset, "r", label="posterior mean") 80 | plt.fill_between( 81 | x_plot[:, 0], 82 | lb * scale + offset, 83 | ub * scale + offset, 84 | color="r", 85 | alpha=0.05, 86 | label="2 std", 87 | ) 88 | plt.xlim([x_plot[0], x_plot[-1]]) 89 | plt.ylim([2.8, 5.5]) 90 | plt.legend(loc=1) 91 | # plt.xticks([-2, -1, 0., 1., 2]) 92 | # plt.yticks([-0.5, 0., 0.5, 1., 1.5]) 93 | plt.title("GP regression - Olympic Marathon Data") 94 | plt.xlabel("Year") 95 | plt.ylabel("Pace, min / km") 96 | # plt.savefig('/Users/wilkinw1/postdoc/gp_course/lec8_deepgps/marathon.png') 97 | # plt.savefig('/Users/wilkinw1/postdoc/gp_course/lec8_deepgps/marathon_outlier_removed.png') 98 | plt.show() 99 | -------------------------------------------------------------------------------- /demos/multiclass.py: -------------------------------------------------------------------------------- 1 | import bayesnewton 2 | import objax 3 | import numpy as np 4 | import matplotlib.pyplot as plt 5 | import time 6 | 7 | N = 250 8 | XAll = np.linspace(-10., 10., num=N) 9 | 10 | num_classes = 3 11 | basekernel = bayesnewton.kernels.Matern52 12 | kern = bayesnewton.kernels.Independent( 13 | kernels=[basekernel() for i in range(num_classes)] 14 | ) 15 | 16 | lik = bayesnewton.likelihoods.Softmax(num_classes) 17 | 18 | dummy_model = bayesnewton.basemodels.MarkovGaussianProcess(kernel=kern, likelihood=lik, X=XAll, Y=np.zeros_like(XAll)) 19 | f_samp = np.squeeze(dummy_model.prior_sample(seed=12345)) 20 | 21 | # Hard max observation 22 | Y_max = np.argmax(f_samp, 1).reshape(-1,).astype(int) 23 | 24 | # One-hot encoding 25 | Y_hot = np.zeros((N, num_classes), dtype=bool) 26 | Y_hot[np.arange(N), Y_max] = 1 27 | order = np.argsort(XAll.reshape(-1,)) 28 | 29 | colors = ['r', 'b', 'g', 'y'] 30 | # plt.figure(1) 31 | # for c in range(num_classes): 32 | # plt.plot(XAll[order], f_samp[order, c], ".", color=colors[c], label=str(c)) 33 | # plt.plot(XAll[order], Y_hot[order, c], "-", color=colors[c]) 34 | # # plt.plot(XAll, f_samp) 35 | # plt.show() 36 | 37 | x_plot = np.linspace(np.min(XAll) - 2, np.max(XAll) + 2, 500) 38 | 39 | np.random.seed(123) 40 | # 10-fold cross-validation setup 41 | ind_shuffled = np.random.permutation(N) 42 | ind_split = np.stack(np.split(ind_shuffled, 10)) # 10 random batches of data indices 43 | fold = 7 44 | 45 | # Get training and test indices 46 | ind_test = ind_split[fold] # np.sort(ind_shuffled[:N//10]) 47 | ind_train = np.concatenate(ind_split[np.arange(10) != fold]) 48 | X = XAll[ind_train] # 90/10 train/test split 49 | XT = XAll[ind_test] 50 | Y = Y_max[ind_train] 51 | YT = Y_max[ind_test] 52 | 53 | # model = bayesnewton.models.MarkovNewtonGP(kernel=kern, likelihood=lik, X=X, Y=Y) 54 | model = bayesnewton.models.MarkovVariationalGP(kernel=kern, likelihood=lik, X=X, Y=Y) 55 | # model = bayesnewton.models.MarkovExpectationPropagationGP(kernel=kern, likelihood=lik, X=X, Y=Y, power=0.5) 56 | # model = bayesnewton.models.MarkovPosteriorLinearisation2ndOrderGP(kernel=kern, likelihood=lik, X=X, Y=Y) 57 | # model = bayesnewton.models.InfiniteHorizonVariationalGP(kernel=kern, likelihood=lik, X=X, Y=Y) 58 | # model = bayesnewton.models.SparseInfiniteHorizonExpectationPropagationGP(kernel=kern, likelihood=lik, X=X, Y=Y) 59 | # --- Gauss-Newton --- 60 | # model = bayesnewton.models.MarkovGaussNewtonGP(kernel=kern, likelihood=lik, X=X, Y=Y) 61 | # model = bayesnewton.models.MarkovVariationalGaussNewtonGP(kernel=kern, likelihood=lik, X=X, Y=Y) 62 | # model = bayesnewton.models.MarkovPosteriorLinearisation2ndOrderGaussNewtonGP(kernel=kern, likelihood=lik, X=X, Y=Y) 63 | # --- quasi-Newton --- 64 | # model = bayesnewton.models.MarkovQuasiNewtonGP(kernel=kern, likelihood=lik, X=X, Y=Y) 65 | # model = bayesnewton.models.MarkovVariationalQuasiNewtonGP(kernel=kern, likelihood=lik, X=X, Y=Y) 66 | # model = bayesnewton.models.MarkovExpectationPropagationQuasiNewtonGP(kernel=kern, likelihood=lik, X=X, Y=Y, power=0.5) 67 | # model = bayesnewton.models.MarkovPosteriorLinearisation2ndOrderQuasiNewtonGP(kernel=kern, likelihood=lik, X=X, Y=Y) 68 | # --- Riemannian grads --- 69 | # model = bayesnewton.models.MarkovVariationalRiemannGP(kernel=kern, likelihood=lik, X=X, Y=Y) 70 | # model = bayesnewton.models.MarkovExpectationPropagationRiemannGP(kernel=kern, likelihood=lik, X=X, Y=Y, power=0.5) 71 | 72 | 73 | lr_adam = 0.01 74 | lr_newton = 0.1 75 | iters = 500 76 | opt_hypers = objax.optimizer.Adam(model.vars()) 77 | energy = objax.GradValues(model.energy, model.vars()) 78 | 79 | damping = 0.5 80 | 81 | 82 | @objax.Function.with_vars(model.vars() + opt_hypers.vars()) 83 | def train_op(): 84 | model.inference(lr=lr_newton, damping=damping, ensure_psd=False) # perform inference and update variational params 85 | dE, E = energy() # compute energy and its gradients w.r.t. hypers 86 | # opt_hypers(lr_adam, dE) 87 | test_nlpd_ = model.negative_log_predictive_density(X=XT, Y=YT) 88 | return E, test_nlpd_ 89 | 90 | 91 | train_op = objax.Jit(train_op) 92 | 93 | t0 = time.time() 94 | for i in range(1, iters + 1): 95 | loss, test_nlpd = train_op() 96 | print('iter %2d, energy: %1.4f, nlpd: %1.4f' % (i, loss[0], test_nlpd)) 97 | t1 = time.time() 98 | print('optimisation time: %2.2f secs' % (t1-t0)) 99 | 100 | t0 = time.time() 101 | posterior_mean, posterior_var = model.predict(X=x_plot) 102 | nlpd = model.negative_log_predictive_density(X=XT, Y=YT) 103 | t1 = time.time() 104 | print('prediction time: %2.2f secs' % (t1-t0)) 105 | print('NLPD: %1.2f' % nlpd) 106 | 107 | plt.figure(1) 108 | for c in range(num_classes): 109 | plt.plot(XAll, f_samp[:, c], ".", color=colors[c], label=str(c)) 110 | plt.plot(XAll, Y_hot[:, c], "-", color=colors[c]) 111 | # plt.plot(XAll, f_samp) 112 | 113 | plt.figure(2) 114 | for c in range(num_classes): 115 | plt.plot(x_plot, posterior_mean[:, c], "-", color=colors[c], label=str(c)) 116 | plt.show() 117 | 118 | # print('plotting ...') 119 | # plt.figure(1, figsize=(12, 8)) 120 | # plt.clf() 121 | # plt.subplot(2, 1, 1) 122 | # plt.title('Multi Class Classification') 123 | # plt.plot(x_plot, posterior_mean_sig, 'c', linewidth=1.) 124 | # # plt.fill_between(x_plot, lb_sig, ub_sig, color='c', alpha=0.05, label='95% confidence') 125 | # plt.plot(X, Y, 'k.', markersize=2, label='train') 126 | # plt.plot(XT, YT, 'r.', markersize=2, label='test') 127 | # plt.xlim(x_plot[0], x_plot[-1]) 128 | # plt.gca().xaxis.set_ticklabels([]) 129 | # plt.subplot(2, 1, 2) 130 | # plt.plot(XAll, f_samp[:, 0], 'b--', linewidth=0.5) 131 | # plt.plot(XAll, lik.link_fn(f_samp[:, 1]), 'r--', linewidth=0.5) 132 | # plt.plot(x_plot, posterior_mean_subbands, 'b-', linewidth=0.5) 133 | # # plt.fill_between(x_plot, lb_subbands, ub_subbands, color='b', alpha=0.05) 134 | # plt.plot(x_plot, posterior_mean_modulators, 'r-', linewidth=0.5) 135 | # # plt.fill_between(x_plot, lb_modulators, ub_modulators, color='r', alpha=0.05) 136 | # plt.xlim(x_plot[0], x_plot[-1]) 137 | # plt.legend() 138 | # plt.xlabel('time') 139 | # plt.show() 140 | -------------------------------------------------------------------------------- /demos/multistage.py: -------------------------------------------------------------------------------- 1 | import bayesnewton 2 | import objax 3 | import numpy as np 4 | import matplotlib.pyplot as plt 5 | import time 6 | import tensorflow as tf 7 | import gpflow 8 | from markovflow.likelihoods.mutlistage_likelihood import MultiStageLikelihood 9 | 10 | N = 100 # number of training points 11 | X_train = np.arange(N).astype(float) 12 | L = 3 # number of latent functions 13 | 14 | # Define the kernel 15 | k1a = gpflow.kernels.Periodic( 16 | gpflow.kernels.Matern52(variance=1.0, lengthscales=3.0), period=12.0 17 | ) 18 | k1b = gpflow.kernels.Matern52(variance=1.0, lengthscales=30.0) 19 | k2 = gpflow.kernels.Matern32(variance=0.1, lengthscales=5.0) 20 | k = k1a * k1b + k2 21 | 22 | # Draw three independent functions from the same Gaussian process 23 | X = X_train 24 | num_latent = L 25 | K = k(X[:, None]) 26 | np.random.seed(123) 27 | v = np.random.randn(len(K), num_latent) 28 | # We draw samples from a GP with kernel k(.) evaluated at X by reparameterizing: 29 | # f ~ N(0, K) → f = chol(K) v, v ~ N(0, I), where chol(K) chol(K)ᵀ = K 30 | f = np.linalg.cholesky(K + 1e-6 * np.eye(len(K))) @ v 31 | 32 | # We shift the third function to increase the mean of the Poisson component to 20 to make it easier to identify 33 | f += np.array([0.0, 0.0, np.log(20)]).reshape(1, L) 34 | 35 | # Define the likelihood 36 | lik = MultiStageLikelihood() 37 | # Draw observations from the likelihood given the functions `f` from the previous step 38 | Y = lik.sample_y(tf.convert_to_tensor(f, dtype=gpflow.default_float())).numpy() 39 | 40 | # Plot all three functions 41 | # plt.figure(1) 42 | # for i in range(num_latent): 43 | # plt.plot(X, f[:, i]) 44 | # _ = plt.xticks(np.arange(0, 100, 12)) 45 | # # Plot the observations 46 | # plt.figure(2) 47 | # _ = plt.plot(X, Y, ".") 48 | # plt.show() 49 | 50 | var_f = 1.0 # GP variance 51 | len_f = 15.0 # GP lengthscale 52 | 53 | kern1 = bayesnewton.kernels.Matern52(variance=var_f, lengthscale=len_f) 54 | kern2 = bayesnewton.kernels.Matern52(variance=var_f, lengthscale=len_f) 55 | kern3 = bayesnewton.kernels.Matern52(variance=var_f, lengthscale=len_f) 56 | kern = bayesnewton.kernels.Independent(kernels=[kern1, kern2, kern3]) 57 | lik = bayesnewton.likelihoods.MultiStage() 58 | 59 | # model = bayesnewton.models.MarkovNewtonGP(kernel=kern, likelihood=lik, X=X, Y=Y) 60 | model = bayesnewton.models.MarkovVariationalGP(kernel=kern, likelihood=lik, X=X, Y=Y) 61 | # model = bayesnewton.models.MarkovExpectationPropagationGP(kernel=kern, likelihood=lik, X=X, Y=Y, power=0.5) 62 | # -- Gauss-Newton --- 63 | # model = bayesnewton.models.MarkovGaussNewtonGP(kernel=kern, likelihood=lik, X=X, Y=Y) 64 | # model = bayesnewton.models.MarkovVariationalGaussNewtonGP(kernel=kern, likelihood=lik, X=X, Y=Y) 65 | # -- quasi-Newton --- 66 | # model = bayesnewton.models.MarkovQuasiNewtonGP(kernel=kern, likelihood=lik, X=X, Y=Y) 67 | # model = bayesnewton.models.MarkovVariationalQuasiNewtonGP(kernel=kern, likelihood=lik, X=X, Y=Y) 68 | # model = bayesnewton.models.MarkovExpectationPropagationQuasiNewtonGP(kernel=kern, likelihood=lik, X=X, Y=Y, power=0.5) 69 | # --- Riemannian grads --- 70 | # model = bayesnewton.models.MarkovVariationalRiemannGP(kernel=kern, likelihood=lik, X=X, Y=Y) 71 | 72 | # --- GP --- 73 | # model = bayesnewton.models.VariationalGP(kernel=kern, likelihood=lik, X=X, Y=Y) 74 | # model = bayesnewton.models.VariationalQuasiNewtonGP(kernel=kern, likelihood=lik, X=X, Y=Y) 75 | # --- sparse --- 76 | # model = bayesnewton.models.SparseVariationalGP(kernel=kern, likelihood=lik, X=X, Y=Y, Z=Z) 77 | # --- sparse quasi-Newton --- 78 | # model = bayesnewton.models.SparseQuasiNewtonGP(kernel=kern, likelihood=lik, X=X, Y=Y, Z=Z) 79 | # model = bayesnewton.models.SparseVariationalQuasiNewtonGP(kernel=kern, likelihood=lik, X=X, Y=Y, Z=Z) 80 | 81 | lr_adam = 0.1 82 | lr_newton = 0.3 83 | iters = 300 84 | opt_hypers = objax.optimizer.Adam(model.vars()) 85 | energy = objax.GradValues(model.energy, model.vars()) 86 | 87 | unscented_transform = bayesnewton.cubature.Unscented(dim=3) # 5th-order unscented transform 88 | 89 | damping = 0.5 90 | 91 | 92 | @objax.Function.with_vars(model.vars() + opt_hypers.vars()) 93 | def train_op(): 94 | model.inference(lr=lr_newton, damping=damping, cubature=unscented_transform, ensure_psd=False) # perform inference and update variational params 95 | dE, E = energy(cubature=unscented_transform) # compute energy and its gradients w.r.t. hypers 96 | # opt_hypers(lr_adam, dE) 97 | test_nlpd_ = 0. # model.negative_log_predictive_density(X=XT, Y=YT, cubature=unscented_transform) 98 | return E, test_nlpd_ 99 | 100 | 101 | train_op = objax.Jit(train_op) 102 | 103 | t0 = time.time() 104 | for i in range(1, iters + 1): 105 | loss, test_nlpd = train_op() 106 | print('iter %2d, energy: %1.4f, nlpd: %1.4f' % (i, loss[0], test_nlpd)) 107 | t1 = time.time() 108 | print('optimisation time: %2.2f secs' % (t1-t0)) 109 | -------------------------------------------------------------------------------- /demos/positive.py: -------------------------------------------------------------------------------- 1 | import bayesnewton 2 | import objax 3 | import numpy as np 4 | import matplotlib.pyplot as plt 5 | import time 6 | 7 | 8 | def nonlinearity(f_): 9 | return bayesnewton.utils.softplus(f_) 10 | 11 | 12 | def wiggly_time_series(x_): 13 | return 2 * np.cos(0.06*x_+0.33*np.pi) * np.sin(0.4*x_) - 1. 14 | 15 | 16 | np.random.seed(99) 17 | N = 500 # number of training points 18 | x = 100 * np.random.rand(N) 19 | # x = np.linspace(np.min(x), np.max(x), N) 20 | # f = lambda x_: 3 * np.sin(np.pi * x_ / 10.0) 21 | f = wiggly_time_series 22 | y = nonlinearity(f(x)) + np.sqrt(0.1)*np.random.randn(x.shape[0]) 23 | x_test = np.linspace(np.min(x), np.max(x), num=500) 24 | y_test = nonlinearity(f(x_test)) + np.sqrt(0.05)*np.random.randn(x_test.shape[0]) 25 | x_plot = np.linspace(np.min(x)-10.0, np.max(x)+10.0, num=500) 26 | 27 | M = 20 28 | Z = np.linspace(np.min(x), np.max(x), M) 29 | 30 | x = x[:, None] 31 | x_plot = x_plot[:, None] 32 | 33 | var_f = 1. # GP variance 34 | len_f = 5.0 # GP lengthscale 35 | var_y = 5.0 # likelihood lengthscale 36 | 37 | kern = bayesnewton.kernels.Matern52(variance=var_f, lengthscale=len_f) 38 | lik = bayesnewton.likelihoods.Positive(variance=var_y) 39 | 40 | # model = bayesnewton.models.MarkovVariationalGP(kernel=kern, likelihood=lik, X=x, Y=y) 41 | # model = bayesnewton.models.MarkovExpectationPropagationGP(kernel=kern, likelihood=lik, X=x, Y=y, power=0.5) 42 | model = bayesnewton.models.SparseVariationalGP(kernel=kern, likelihood=lik, X=x, Y=y, Z=Z, opt_z=True) 43 | # model = bayesnewton.models.MarkovVariationalGaussNewtonGP(kernel=kern, likelihood=lik, X=x, Y=y) 44 | # model = bayesnewton.models.MarkovVariationalQuasiNewtonGP(kernel=kern, likelihood=lik, X=x, Y=y) 45 | # model = bayesnewton.models.MarkovExpectationPropagationQuasiNewtonGP(kernel=kern, likelihood=lik, X=x, Y=y, power=0.5) 46 | # model = bayesnewton.models.SparseVariationalQuasiNewtonGP(kernel=kern, likelihood=lik, X=x, Y=y, Z=Z, opt_z=True) 47 | 48 | lr_adam = 0.1 49 | lr_newton = 0.3 50 | iters = 1000 51 | opt_hypers = objax.optimizer.Adam(model.vars()) 52 | energy = objax.GradValues(model.energy, model.vars()) 53 | 54 | damping = 0.5 55 | 56 | 57 | @objax.Function.with_vars(model.vars() + opt_hypers.vars()) 58 | def train_op(): 59 | model.inference(lr=lr_newton, damping=damping) # perform inference and update variational params 60 | dE, E = energy() # compute energy and its gradients w.r.t. hypers 61 | opt_hypers(lr_adam, dE) 62 | test_nlpd_ = model.negative_log_predictive_density(X=x_test, Y=y_test) 63 | return E, test_nlpd_ 64 | 65 | 66 | train_op = objax.Jit(train_op) 67 | 68 | t0 = time.time() 69 | for i in range(1, iters + 1): 70 | loss, test_nlpd = train_op() 71 | print('iter %2d, energy: %1.4f, nlpd: %1.4f' % (i, loss[0], test_nlpd)) 72 | t1 = time.time() 73 | print('optimisation time: %2.2f secs' % (t1-t0)) 74 | 75 | # calculate posterior predictive distribution via filtering and smoothing at train & test locations: 76 | print('calculating the posterior predictive distribution ...') 77 | t0 = time.time() 78 | posterior_mean, posterior_var = model.predict(X=x_plot) 79 | nlpd = model.negative_log_predictive_density(X=x_test, Y=y_test) 80 | t1 = time.time() 81 | print('prediction time: %2.2f secs' % (t1-t0)) 82 | print('nlpd: %2.3f' % nlpd) 83 | lb = np.squeeze(posterior_mean) - 1.96 * np.squeeze(posterior_var) ** 0.5 84 | ub = np.squeeze(posterior_mean) + 1.96 * np.squeeze(posterior_var) ** 0.5 85 | link_fn = lik.link_fn 86 | 87 | print('plotting ...') 88 | plt.figure(1, figsize=(12, 5)) 89 | plt.clf() 90 | plt.plot(x, y, 'b.', label='training observations') 91 | plt.plot(x_test, y_test, 'r.', alpha=0.4, label='test observations') 92 | plt.plot(x_plot, link_fn(np.squeeze(posterior_mean)), 'm', label='posterior mean') 93 | plt.fill_between(x_plot[:, 0], link_fn(lb), link_fn(ub), color='m', alpha=0.05, label='95% confidence') 94 | plt.xlim(x_plot[0], x_plot[-1]) 95 | if hasattr(model, 'Z'): 96 | plt.plot(model.Z.value[:, 0], 97 | (np.min(link_fn(lb))-1.)*np.ones_like(model.Z.value[:, 0]), 98 | 'm^', 99 | markersize=4) 100 | plt.legend(loc=3) 101 | plt.xlabel('$X$') 102 | plt.show() 103 | -------------------------------------------------------------------------------- /demos/positive2d.py: -------------------------------------------------------------------------------- 1 | import bayesnewton 2 | import objax 3 | import numpy as np 4 | from scipy.cluster.vq import kmeans2 5 | import matplotlib.pyplot as plt 6 | import pandas as pd 7 | from sklearn.preprocessing import StandardScaler 8 | from convertbng.util import convert_bng 9 | import time 10 | 11 | 12 | def datetime_to_epoch(datetime): 13 | """ 14 | Converts a datetime to a number 15 | args: 16 | datatime: is a pandas column 17 | """ 18 | return datetime.astype('int64') // 1e9 19 | 20 | 21 | species = 'pm10' 22 | 23 | raw_data = pd.read_csv('../data/aq_data.csv') 24 | sites_df = pd.read_csv('../data/laqn_sites.csv', sep=';') 25 | 26 | # filter sites not in london 27 | london_box = [ 28 | [51.279, 51.684], # lat 29 | [-0.533, 0.208] # lon 30 | ] 31 | 32 | sites_df = sites_df[(sites_df['Latitude'] > london_box[0][0]) & (sites_df['Latitude'] < london_box[0][1])] 33 | sites_df = sites_df[(sites_df['Longitude'] > london_box[1][0]) & (sites_df['Longitude'] < london_box[1][1])] 34 | 35 | # merge spatial infomation to data 36 | raw_data = raw_data.merge(sites_df, left_on='site', right_on='SiteCode') 37 | 38 | # convert to datetimes 39 | raw_data['date'] = pd.to_datetime(raw_data['date']) 40 | raw_data['epoch'] = datetime_to_epoch(raw_data['date']) 41 | 42 | # get data in date range 43 | data_range_start = '2019/02/01 00:00:00' 44 | data_range_end = '2019/02/01 04:00:00' # '2019/02/01 23:59:59' # '2019/02/25 23:59:59', '2019/03/11 23:59:59', '2019/04/17 23:59:59' 45 | 46 | raw_data = raw_data[(raw_data['date'] >= data_range_start) & (raw_data['date'] < data_range_end)] 47 | 48 | Xraw = np.array(raw_data[['epoch', 'Longitude', 'Latitude']]) 49 | Yraw = np.array(raw_data[[species]]) 50 | 51 | Xraw = Xraw[~np.isnan(np.squeeze(Yraw))] 52 | Yraw = Yraw[~np.isnan(np.squeeze(Yraw))] 53 | 54 | X_scaler = StandardScaler().fit(Xraw) 55 | Xraw = X_scaler.transform(Xraw) 56 | 57 | scale_y = 30. 58 | Yraw = Yraw / scale_y 59 | # Y_scaler = StandardScaler().fit(Yraw) 60 | # Yraw = Y_scaler.transform(Yraw) 61 | 62 | # plt.plot(Yraw) 63 | # plt.show() 64 | 65 | print('N =', Yraw.shape[0]) 66 | 67 | fold = 0 68 | 69 | np.random.seed(123) 70 | # 4-fold cross-validation setup 71 | ind_shuffled = np.random.permutation((Yraw.shape[0] // 4) * 4) 72 | ind_split = np.stack(np.split(ind_shuffled, 4)) # 4 random batches of data indices 73 | 74 | # Get training and test indices 75 | ind_test = ind_split[fold] # np.sort(ind_shuffled[:N//4]) 76 | ind_train = np.concatenate(ind_split[np.arange(4) != fold]) 77 | X = Xraw[ind_train] # 75/25 train/test split 78 | XT = Xraw[ind_test] 79 | Y = Yraw[ind_train] 80 | YT = Yraw[ind_test] 81 | 82 | # X = X[:500] 83 | # XT = XT[:150] 84 | # Y = Y[:500] 85 | # YT = YT[:150] 86 | 87 | M = 100 88 | 89 | Z = kmeans2(X, M, minit="points")[0] 90 | 91 | kern_process_ = bayesnewton.kernels.Matern52(variance=1.0, lengthscale=1.0) 92 | kern_process = bayesnewton.kernels.Separable([kern_process_, kern_process_, kern_process_]) 93 | kern_noise_ = bayesnewton.kernels.Matern52(variance=1.0, lengthscale=1.0) 94 | kern_noise = bayesnewton.kernels.Separable([kern_noise_, kern_noise_, kern_noise_]) 95 | kern = bayesnewton.kernels.Independent([kern_process, kern_noise]) 96 | # lik = bayesnewton.likelihoods.Positive(variance=0.25) 97 | # lik = bayesnewton.likelihoods.PositiveStudentsT(scale=0.25) 98 | # lik = bayesnewton.likelihoods.HeteroscedasticNoise() 99 | lik = bayesnewton.likelihoods.HeteroscedasticStudentsT(df=5.) 100 | # model = bayesnewton.models.VariationalGP(kernel=kern, likelihood=lik, X=X, Y=Y) 101 | # model = bayesnewton.models.ExpectationPropagationGP(kernel=kern, likelihood=lik, X=X, Y=Y) 102 | # model = bayesnewton.models.VariationalGaussNewtonGP(kernel=kern, likelihood=lik, X=X, Y=Y) 103 | model = bayesnewton.models.VariationalQuasiNewtonGP(kernel=kern, likelihood=lik, X=X, Y=Y) 104 | # model = bayesnewton.models.ExpectationPropagationQuasiNewtonGP(kernel=kern, likelihood=lik, X=X, Y=Y) 105 | # model = bayesnewton.models.SparseVariationalGP(kernel=kern, likelihood=lik, X=X, Y=Y, Z=Z, opt_z=True) 106 | # model = bayesnewton.models.SparseVariationalGaussNewtonGP(kernel=kern, likelihood=lik, X=X, Y=Y, Z=Z, opt_z=True) 107 | # model = bayesnewton.models.SparseVariationalQuasiNewtonGP(kernel=kern, likelihood=lik, X=X, Y=Y, Z=Z, opt_z=True) 108 | 109 | lr_adam = 0.01 110 | lr_newton = 0.1 # 0.05 111 | iters = 1000 112 | iters_warmup = 500 113 | opt_hypers = objax.optimizer.Adam(model.vars()) 114 | energy = objax.GradValues(model.energy, model.vars()) 115 | 116 | # damping = np.logspace(np.log10(0.8), np.log10(1e-1), num=iters) 117 | # damping = np.linspace(0.8, 1e-3, num=iters) 118 | # damping = np.linspace(0.5, 0.1, num=iters) 119 | # damping = np.linspace(0.5, 0.1, num=iters) 120 | # damping = np.logspace(np.log10(0.8), np.log10(0.01), num=iters) 121 | damping = 0.1 122 | damping_warmup = 0.5 123 | 124 | 125 | @objax.Function.with_vars(model.vars() + opt_hypers.vars()) 126 | def train_op(): 127 | model.inference(lr=lr_newton, damping=damping) # perform inference and update variational params 128 | dE, E = energy() # compute energy and its gradients w.r.t. hypers 129 | opt_hypers(lr_adam, dE) 130 | test_nlpd_ = model.negative_log_predictive_density(X=XT, Y=YT) 131 | return E, test_nlpd_ 132 | 133 | 134 | @objax.Function.with_vars(model.vars() + opt_hypers.vars()) 135 | def train_op_warmup(): 136 | model.inference(lr=lr_newton, damping=damping_warmup) # perform inference and update variational params 137 | dE, E = energy() # compute energy and its gradients w.r.t. hypers 138 | # opt_hypers(lr_adam, dE) 139 | test_nlpd_ = model.negative_log_predictive_density(X=XT, Y=YT) 140 | return E, test_nlpd_ 141 | 142 | 143 | train_op = objax.Jit(train_op) 144 | train_op_warmup = objax.Jit(train_op_warmup) 145 | 146 | t0 = time.time() 147 | for i in range(1, iters_warmup + 1): 148 | loss, test_nlpd = train_op_warmup() 149 | print('iter %2d, energy: %1.4f, nlpd: %1.4f' % (i, loss[0], test_nlpd)) 150 | for i in range(1, iters + 1): 151 | loss, test_nlpd = train_op() 152 | print('iter %2d, energy: %1.4f, nlpd: %1.4f' % (i, loss[0], test_nlpd)) 153 | t1 = time.time() 154 | print('optimisation time: %2.2f secs' % (t1-t0)) 155 | -------------------------------------------------------------------------------- /demos/regression.py: -------------------------------------------------------------------------------- 1 | import bayesnewton 2 | import objax 3 | import numpy as np 4 | import matplotlib.pyplot as plt 5 | import time 6 | 7 | 8 | def wiggly_time_series(x_): 9 | noise_var = 0.2 # true observation noise 10 | # return 0.25 * (np.cos(0.04*x_+0.33*np.pi) * np.sin(0.2*x_) + 11 | return (np.cos(0.04*x_+0.33*np.pi) * np.sin(0.2*x_) + 12 | np.sqrt(noise_var) * np.random.normal(0, 1, x_.shape) + 13 | # np.sqrt(noise_var) * np.random.uniform(-4, 4, x_.shape) + 14 | 0.0 * x_) # 0.02 * x_) 15 | # 0.0 * x_) + 2.5 # 0.02 * x_) 16 | 17 | 18 | np.random.seed(12345) 19 | N = 500 20 | Nbatch = 100 21 | x = np.linspace(-17, 147, num=N) 22 | y = wiggly_time_series(x) 23 | # x_test = np.linspace(np.min(x)-15.0, np.max(x)+15.0, num=500) 24 | x_test = np.linspace(np.min(x), np.max(x), num=500) 25 | # x_test = np.linspace(-32.5, 157.5, num=250) 26 | y_test = wiggly_time_series(x_test) 27 | x_plot = np.linspace(np.min(x)-20.0, np.max(x)+20.0, 200) 28 | M = 20 29 | z = np.linspace(-30, 155, num=M) 30 | # z = x 31 | 32 | var_f = 1.0 # GP variance 33 | len_f = 5.0 # GP lengthscale 34 | var_y = 0.2 # observation noise 35 | 36 | kern = bayesnewton.kernels.Matern52(variance=var_f, lengthscale=len_f) 37 | lik = bayesnewton.likelihoods.Gaussian(variance=var_y) 38 | # lik = bayesnewton.likelihoods.StudentT() 39 | # lik = bayesnewton.likelihoods.Beta() 40 | # lik = bayesnewton.likelihoods.Gamma() 41 | # model = bayesnewton.models.MarkovVariationalGP(kernel=kern, likelihood=lik, X=x, Y=y) 42 | # model = bayesnewton.models.MarkovVariationalGaussNewtonGP(kernel=kern, likelihood=lik, X=x, Y=y) 43 | # model = bayesnewton.models.MarkovVGNGP(kernel=kern, likelihood=lik, X=x, Y=y) 44 | # model = bayesnewton.models.MarkovLaplaceGP(kernel=kern, likelihood=lik, X=x, Y=y) 45 | # model = bayesnewton.models.MarkovQuasiNewtonGP(kernel=kern, likelihood=lik, X=x, Y=y) 46 | # model = bayesnewton.models.MarkovVariationalQuasiNewtonGP(kernel=kern, likelihood=lik, X=x, Y=y) 47 | # model = bayesnewton.models.MarkovExpectationPropagationQuasiNewtonGP(kernel=kern, likelihood=lik, X=x, Y=y, power=0.5) 48 | # model = bayesnewton.models.MarkovLaplaceGaussNewtonGP(kernel=kern, likelihood=lik, X=x, Y=y) 49 | # model = bayesnewton.models.MarkovExpectationPropagationGaussNewtonsGP(kernel=kern, likelihood=lik, X=x, Y=y, power=0.5) 50 | # model = bayesnewton.models.MarkovExpectationPropagationGP(kernel=kern, likelihood=lik, X=x, Y=y, power=0.5) 51 | # model = bayesnewton.models.MarkovLaplaceGP(kernel=kern, likelihood=lik, X=x, Y=y) 52 | # model = bayesnewton.models.InfiniteHorizonVariationalGP(kernel=kern, likelihood=lik, X=x, Y=y) 53 | # model = bayesnewton.models.SparseMarkovVariationalGP(kernel=kern, likelihood=lik, X=x, Y=y, Z=z) 54 | # model = bayesnewton.models.SparseMarkovExpectationPropagationGP(kernel=kern, likelihood=lik, X=x, Y=y, Z=z, power=0.5) 55 | 56 | model = bayesnewton.models.SparseVariationalGP(kernel=kern, likelihood=lik, X=x, Y=y, Z=z) 57 | # model = bayesnewton.models.SparseExpectationPropagationGP(kernel=kern, likelihood=lik, X=x, Y=y, Z=z, power=0.5) 58 | # model = bayesnewton.models.ExpectationPropagationGP(kernel=kern, likelihood=lik, X=x, Y=y, power=0.5) 59 | 60 | # SMVGP = bayesnewton.build_model(bayesnewton.models.SparseMarkovGP, bayesnewton.inference.VariationalInference) 61 | # model = SMVGP(kernel=kern, likelihood=lik, X=x, Y=y, Z=z) 62 | 63 | lr_adam = 0.1 64 | lr_newton = 1 65 | iters = 200 66 | opt_hypers = objax.optimizer.Adam(model.vars()) 67 | energy = objax.GradValues(model.energy, model.vars()) 68 | 69 | 70 | @objax.Function.with_vars(model.vars() + opt_hypers.vars()) 71 | def train_op(ind): 72 | model.inference(batch_ind=ind, lr=lr_newton) # perform inference and update variational params 73 | dE, E = energy(batch_ind=ind) # compute energy and its gradients w.r.t. hypers 74 | opt_hypers(lr_adam, dE) 75 | return E 76 | 77 | 78 | train_op = objax.Jit(train_op) 79 | 80 | t0 = time.time() 81 | for i in range(1, iters + 1): 82 | batch_ind = np.random.permutation(N)[:Nbatch] 83 | loss = train_op(batch_ind) 84 | print('iter %2d, energy: %1.4f' % (i, loss[0])) 85 | t1 = time.time() 86 | print('optimisation time: %2.2f secs' % (t1-t0)) 87 | 88 | # posterior_samples = model.posterior_sample(X=x_plot, num_samps=20) 89 | 90 | t0 = time.time() 91 | posterior_mean, posterior_var = model.predict_y(X=x_plot) 92 | nlpd = model.negative_log_predictive_density(X=x_test, Y=y_test) 93 | t1 = time.time() 94 | print('prediction time: %2.2f secs' % (t1-t0)) 95 | print('nlpd: %2.3f' % nlpd) 96 | lb = posterior_mean - 1.96 * posterior_var ** 0.5 97 | ub = posterior_mean + 1.96 * posterior_var ** 0.5 98 | 99 | print('plotting ...') 100 | plt.figure(1, figsize=(12, 5)) 101 | plt.clf() 102 | plt.plot(x, y, 'k.', label='training observations') 103 | plt.plot(x_test, y_test, 'r.', alpha=0.4, label='test observations') 104 | plt.plot(x_plot, posterior_mean, 'b', label='posterior mean') 105 | # plt.plot(x_plot, posterior_samples.T, 'b', alpha=0.2) 106 | plt.fill_between(x_plot, lb, ub, color='b', alpha=0.05, label='95% confidence') 107 | plt.xlim([x_plot[0], x_plot[-1]]) 108 | if hasattr(model, 'Z'): 109 | plt.plot(model.Z.value[:, 0], -2 * np.ones_like(model.Z.value[:, 0]), 'b^', markersize=5) 110 | # plt.xlim([x_test[0], x_test[-1]]) 111 | # plt.ylim([-2, 5]) 112 | plt.legend() 113 | plt.title('GP regression') 114 | plt.xlabel('$X$') 115 | plt.show() 116 | -------------------------------------------------------------------------------- /demos/regression_multi.py: -------------------------------------------------------------------------------- 1 | import bayesnewton 2 | import objax 3 | from jax import vmap 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | import time 7 | 8 | 9 | def wiggly_time_series_1(x_): 10 | return np.cos(0.04*x_+0.33*np.pi) * np.sin(0.2*x_) 11 | 12 | 13 | def wiggly_time_series_2(x_): 14 | # return np.cos(0.05*x_+0.4*np.pi) * np.sin(0.1*x_) 15 | # return 2 * wiggly_time_series_1(x_) 16 | return wiggly_time_series_1(x_) 17 | 18 | 19 | np.random.seed(123) 20 | N = 1000 21 | # x = np.linspace(-17, 147, num=N) 22 | x = np.concatenate([ 23 | np.linspace(-17, 55, num=N), 24 | np.linspace(75, 147, num=N) 25 | ], axis=0) 26 | f1 = wiggly_time_series_1(x)[:, None] 27 | f2 = wiggly_time_series_2(x)[:, None] 28 | f = np.concatenate([f1, f2], axis=1)[..., None] 29 | 30 | noise_cov = np.array([[0.2, 0.1], [0.1, 0.3]]) 31 | # noise_cov = np.array([[0.1, 0.05], [0.05, 0.2]]) 32 | 33 | noise = np.linalg.cholesky(noise_cov)[None] @ np.random.multivariate_normal(np.zeros(2), np.eye(2), f.shape[0])[..., None] 34 | y = f + noise 35 | 36 | # plt.figure(1) 37 | # plt.plot(x, f1, 'b-') 38 | # plt.plot(x, f2, 'r-') 39 | # plt.plot(x, y[:, 0], 'b.') 40 | # plt.plot(x, y[:, 1], 'r.') 41 | # 42 | # plt.figure(2) 43 | # plt.plot(x, noise[:, 0], 'b') 44 | # plt.plot(x, noise[:, 1], 'r') 45 | # plt.show() 46 | 47 | x_test = np.linspace(np.min(x)-10.0, np.max(x)+10.0, num=500) 48 | f1_test = wiggly_time_series_1(x_test)[:, None] 49 | f2_test = wiggly_time_series_2(x_test)[:, None] 50 | f_test = np.concatenate([f1_test, f2_test], axis=1)[..., None] 51 | noise_test = np.linalg.cholesky(noise_cov)[None] @ np.random.multivariate_normal(np.zeros(2), np.eye(2), f_test.shape[0])[..., None] 52 | y_test = f_test + noise_test 53 | x_plot = np.linspace(np.min(x)-20.0, np.max(x)+20.0, 500) 54 | f1_plot = wiggly_time_series_1(x_plot) 55 | f2_plot = wiggly_time_series_2(x_plot) 56 | 57 | var_f = 1.0 # GP variance 58 | len_f = 15.0 # GP lengthscale 59 | 60 | kern1 = bayesnewton.kernels.Matern52(variance=var_f, lengthscale=len_f) 61 | kern2 = bayesnewton.kernels.Matern52(variance=var_f, lengthscale=len_f) 62 | kern = bayesnewton.kernels.Independent(kernels=[kern1, kern2]) 63 | lik = bayesnewton.likelihoods.GaussianMultivariate(covariance=noise_cov) 64 | 65 | # model = bayesnewton.models.MarkovNewtonGP(kernel=kern, likelihood=lik, X=x, Y=y) 66 | model = bayesnewton.models.MarkovVariationalGP(kernel=kern, likelihood=lik, X=x, Y=y) 67 | # model = bayesnewton.models.MarkovExpectationPropagationGP(kernel=kern, likelihood=lik, X=x, Y=y, power=0.5) 68 | # -- Gauss-Newton --- 69 | # model = bayesnewton.models.MarkovGaussNewtonGP(kernel=kern, likelihood=lik, X=x, Y=y) 70 | # model = bayesnewton.models.MarkovVariationalGaussNewtonGP(kernel=kern, likelihood=lik, X=x, Y=y) 71 | # -- quasi-Newton --- 72 | # model = bayesnewton.models.MarkovQuasiNewtonGP(kernel=kern, likelihood=lik, X=x, Y=y) 73 | # model = bayesnewton.models.MarkovVariationalQuasiNewtonGP(kernel=kern, likelihood=lik, X=x, Y=y) 74 | # model = bayesnewton.models.MarkovExpectationPropagationQuasiNewtonGP(kernel=kern, likelihood=lik, X=x, Y=y, power=0.5) 75 | # --- Riemannian grads --- 76 | # model = bayesnewton.models.MarkovVariationalRiemannGP(kernel=kern, likelihood=lik, X=x, Y=y) 77 | 78 | lr_adam = 0.1 79 | lr_newton = 1. 80 | iters = 10 81 | opt_hypers = objax.optimizer.Adam(model.vars()) 82 | energy = objax.GradValues(model.energy, model.vars()) 83 | 84 | 85 | @objax.Function.with_vars(model.vars() + opt_hypers.vars()) 86 | def train_op(): 87 | model.inference(lr=lr_newton) # perform inference and update variational params 88 | dE, E = energy() # compute energy and its gradients w.r.t. hypers 89 | # opt_hypers(lr_adam, dE) 90 | return E 91 | 92 | 93 | train_op = objax.Jit(train_op) 94 | 95 | t0 = time.time() 96 | for i in range(1, iters + 1): 97 | loss = train_op() 98 | print('iter %2d, energy: %1.4f' % (i, loss[0])) 99 | t1 = time.time() 100 | print('optimisation time: %2.2f secs' % (t1-t0)) 101 | 102 | t0 = time.time() 103 | posterior_mean, posterior_cov = model.predict_y(X=x_plot) 104 | nlpd = model.negative_log_predictive_density(X=x_test, Y=y_test) 105 | t1 = time.time() 106 | print('prediction time: %2.2f secs' % (t1-t0)) 107 | print('nlpd: %2.3f' % nlpd) 108 | posterior_var = bayesnewton.utils.diag(posterior_cov) 109 | lb = posterior_mean - 1.96 * posterior_var ** 0.5 110 | ub = posterior_mean + 1.96 * posterior_var ** 0.5 111 | 112 | print('plotting ...') 113 | plt.figure(1, figsize=(12, 5)) 114 | plt.clf() 115 | plt.plot(x, y[:, 0], 'b.') 116 | plt.plot(x, y[:, 1], 'r.') 117 | # plt.plot(x_test, y_test, 'r.', alpha=0.4, label='test observations') 118 | plt.plot(x_plot, posterior_mean[:, 0], 'b') 119 | plt.plot(x_plot, posterior_mean[:, 1], 'r') 120 | plt.plot(x_plot, f1_plot, 'b--') 121 | plt.plot(x_plot, f2_plot, 'r--') 122 | plt.fill_between(x_plot, lb[:, 0], ub[:, 0], color='b', alpha=0.05) 123 | plt.fill_between(x_plot, lb[:, 1], ub[:, 1], color='r', alpha=0.05) 124 | plt.xlim([x_plot[0], x_plot[-1]]) 125 | if hasattr(model, 'Z'): 126 | plt.plot(model.Z.value[:, 0], -2 * np.ones_like(model.Z.value[:, 0]), 'b^', markersize=5) 127 | plt.legend() 128 | plt.title('Multivariate GP regression') 129 | plt.xlabel('$X$') 130 | plt.show() 131 | -------------------------------------------------------------------------------- /demos/scrna_seq.py: -------------------------------------------------------------------------------- 1 | import bayesnewton 2 | import objax 3 | import numpy as np 4 | import pandas as pd 5 | import matplotlib.pyplot as plt 6 | import time 7 | 8 | 9 | y = pd.read_csv('../data/normalized_alpha_counts.csv', index_col=[0]) 10 | x = pd.read_csv('../data/alpha_time_points.csv', index_col=[0]) 11 | 12 | y = y.rename(index={'ENSMUSG00000015879': 'Fam184b', 'ENSMUSG00000059173': 'Pde1a'}) 13 | genes_name = ['Fam184b', 'Pde1a'] 14 | 15 | x, y = x.values, y.loc[genes_name].values.T 16 | 17 | y = y[:, :1] # Fam184b 18 | # y = y[:, 1:] # Pde1a 19 | 20 | # Test points 21 | # x_test = x 22 | x_plot = np.linspace(np.min(x)-0.1, np.max(x)+0.1, 200) 23 | 24 | var_f = 10.0 # GP variance 25 | len_f = 1.0 # GP lengthscale 26 | 27 | kern = bayesnewton.kernels.Matern72(variance=var_f, lengthscale=len_f) 28 | lik = bayesnewton.likelihoods.NegativeBinomial(alpha=1.0, scale=1.0) 29 | # lik = bayesnewton.likelihoods.ZeroInflatedNegativeBinomial(alpha=1.0, km=1.0) 30 | 31 | 32 | # model = bayesnewton.models.VariationalGP(kernel=kern, likelihood=lik, X=x, Y=y) 33 | # model = bayesnewton.models.ExpectationPropagationGP(kernel=kern, likelihood=lik, X=x, Y=y) 34 | model = bayesnewton.models.MarkovVariationalGP(kernel=kern, likelihood=lik, X=x, Y=y) 35 | # model = bayesnewton.models.MarkovExpectationPropagationGP(kernel=kern, likelihood=lik, X=x, Y=y) 36 | 37 | lr_adam = 0.1 38 | lr_newton = 1 39 | iters = 500 40 | opt_hypers = objax.optimizer.Adam(model.vars()) 41 | energy = objax.GradValues(model.energy, model.vars()) 42 | 43 | 44 | @objax.Function.with_vars(model.vars() + opt_hypers.vars()) 45 | def train_op(): 46 | model.inference(lr=lr_newton) # perform inference and update variational params 47 | dE, E = energy() # compute energy and its gradients w.r.t. hypers 48 | opt_hypers(lr_adam, dE) 49 | return E 50 | 51 | 52 | train_op = objax.Jit(train_op) 53 | 54 | t0 = time.time() 55 | for i in range(1, iters + 1): 56 | loss = train_op() 57 | print('iter %2d, energy: %1.4f' % (i, loss[0])) 58 | t1 = time.time() 59 | print('optimisation time: %2.2f secs' % (t1-t0)) 60 | 61 | print(model.likelihood.alpha) 62 | print(model.kernel.variance) 63 | print(model.kernel.lengthscale) 64 | 65 | # calculate posterior predictive distribution via filtering and smoothing at train & test locations: 66 | print('calculating the posterior predictive distribution ...') 67 | t0 = time.time() 68 | # posterior_mean, posterior_var = model.predict(X=x_plot) 69 | posterior_mean_y, posterior_var_y = model.predict_y(X=x_plot) 70 | t1 = time.time() 71 | print('prediction time: %2.2f secs' % (t1-t0)) 72 | 73 | lb_y = posterior_mean_y - np.sqrt(posterior_var_y) 74 | ub_y = posterior_mean_y + np.sqrt(posterior_var_y) 75 | 76 | print('plotting ...') 77 | plt.figure(1, figsize=(10, 6)) 78 | plt.clf() 79 | plt.plot(x, y, 'b.', label='observations', clip_on=False) 80 | plt.plot(x_plot, posterior_mean_y, 'b', label='posterior mean') 81 | # plt.plot(x_plot, lik.link_fn(posterior_mean), 'b--', label='posterior mean') 82 | plt.fill_between(x_plot, lb_y, ub_y, color='b', alpha=0.05, label='posterior std') 83 | plt.xlim(x_plot[0], x_plot[-1]) 84 | plt.ylim(0.0) 85 | plt.legend() 86 | plt.title('') 87 | plt.xlabel('time') 88 | plt.ylabel('gene expression') 89 | plt.show() 90 | -------------------------------------------------------------------------------- /demos/speech.py: -------------------------------------------------------------------------------- 1 | import bayesnewton 2 | import objax 3 | import numpy as np 4 | import matplotlib.pyplot as plt 5 | import time 6 | from scipy.io import loadmat 7 | 8 | plot_final = True 9 | plot_intermediate = False 10 | 11 | 12 | print('loading data ...') 13 | N_sig = 22050 14 | # y = loadmat('../experiments/audio/speech_female')['y'][1000:N_sig+1000] 15 | y = loadmat('../experiments/audio/speech_female')['y'][:N_sig] 16 | fs = 44100 # sampling rate (Hz) 17 | scale = 1000 # convert to milliseconds 18 | 19 | normaliser = 0.5 * np.sqrt(np.var(y)) 20 | yTrain = y / normaliser # rescale the data 21 | 22 | # N = y.shape[0] 23 | # yTrain = yTrain[:N] 24 | x = np.linspace(0., N_sig, num=N_sig) / fs * scale # arbitrary evenly spaced inputs inputs 25 | 26 | np.random.seed(123) 27 | 28 | gap_size = 2000 29 | gap = np.arange(gap_size) 30 | gap0 = gap + 3000 31 | gap1 = gap + 8000 32 | gap2 = gap + 13000 33 | gap3 = gap + 18000 34 | gaps = np.concatenate([gap0, gap1, gap2, gap3]) 35 | mask = np.ones_like(x, dtype=bool) 36 | mask[gaps] = False 37 | 38 | # x_train = x[:N] 39 | x_train = x[mask] 40 | x_test = x 41 | # y_train = y[:N] 42 | y_train = y[mask] 43 | y_test = y 44 | 45 | # N = 5000 46 | N = x_train.shape[0] 47 | batch_size = N 48 | M = 30 49 | z = np.linspace(x[0], x[-1], M) 50 | 51 | var_f = 1.0 # GP variance 52 | len_per = 1.0 # GP lengthscale 53 | len_mat = 50.0 54 | var_y = 0.1 # observation noise 55 | fundamental_freq = 220 # Hz 56 | radial_freq = 2 * np.pi * fundamental_freq / scale # radial freq = 2pi * f / scale 57 | per = 6. 58 | 59 | # kern = bayesnewton.kernels.Matern72(variance=var_f, lengthscale=len_f) 60 | kern = bayesnewton.kernels.QuasiPeriodicMatern32(variance=var_f, lengthscale_periodic=len_per, period=per, lengthscale_matern=len_mat) 61 | # kern = bayesnewton.kernels.SubbandMatern12(variance=var_f, lengthscale=len_per, radial_frequency=radial_freq) 62 | lik = bayesnewton.likelihoods.Gaussian(variance=var_y) 63 | 64 | # kern = bayesnewton.kernels.Independent([sub1, sub2, sub3, mod1, mod2, mod3]) 65 | # lik = bayesnewton.likelihoods.AudioAmplitudeDemodulation(variance=0.3) 66 | 67 | model = bayesnewton.models.MarkovVariationalGP(kernel=kern, likelihood=lik, X=x_train, Y=y_train) 68 | # model = bayesnewton.models.SparseMarkovVariationalGP(kernel=kern, likelihood=lik, X=x_train, Y=y_train, Z=z) 69 | 70 | 71 | lr_adam = 0.1 72 | lr_newton = 1. 73 | iters = 100 74 | opt_hypers = objax.optimizer.Adam(model.vars()) 75 | energy = objax.GradValues(model.energy, model.vars()) 76 | 77 | 78 | @objax.Function.with_vars(model.vars() + opt_hypers.vars()) 79 | def train_op(): 80 | batch = np.random.permutation(N)[:batch_size] 81 | model.inference(lr=lr_newton, batch_ind=batch) # perform inference and update variational params 82 | dE, E = energy(batch_ind=batch) # compute energy and its gradients w.r.t. hypers 83 | opt_hypers(lr_adam, dE) 84 | return E 85 | 86 | 87 | train_op = objax.Jit(train_op) 88 | 89 | t0 = time.time() 90 | for i in range(1, iters + 1): 91 | loss = train_op() 92 | print('iter %2d, energy: %1.4f' % (i, loss[0])) 93 | t1 = time.time() 94 | print('optimisation time: %2.2f secs' % (t1-t0)) 95 | 96 | t0 = time.time() 97 | posterior_mean, posterior_var = model.predict(X=x_test) 98 | t1 = time.time() 99 | print('prediction time: %2.2f secs' % (t1-t0)) 100 | 101 | lb = posterior_mean - 1.96 * posterior_var ** 0.5 102 | ub = posterior_mean + 1.96 * posterior_var ** 0.5 103 | 104 | print('plotting ...') 105 | plt.figure(1, figsize=(13, 7)) 106 | plt.clf() 107 | plt.plot(x_test, y_test, 'r.', alpha=0.4, label='test observations', markersize=5) 108 | plt.plot(x_train, y_train, 'k.', label='training observations', markersize=5) 109 | # plt.plot(x_test[N:], y_test[N:], 'r.', alpha=0.4, label='test observations', markersize=5) 110 | plt.plot(x_test, posterior_mean, 'b-', label='posterior mean') 111 | plt.fill_between(x_test[..., 0], lb, ub, color='b', alpha=0.05, label='95% confidence') 112 | if hasattr(model, 'Z'): 113 | plt.plot(model.Z[:, 0], (np.min(lb)-0.1)*np.ones_like(model.Z[:, 0]), 'r^', markersize=4) 114 | plt.xlim([x_test[0], x_test[-1]]) 115 | # plt.ylim([-2, 2]) 116 | plt.legend(loc=0) 117 | plt.title('Sparse GP regression via Kalman smoothing.') 118 | plt.xlabel('time (milliseconds)') 119 | plt.show() 120 | -------------------------------------------------------------------------------- /demos/step.py: -------------------------------------------------------------------------------- 1 | import bayesnewton 2 | import objax 3 | import numpy as np 4 | import matplotlib.pyplot as plt 5 | import time 6 | 7 | 8 | def step_function(x_): 9 | # return np.maximum(np.sign(x_), 0.) - 0.5 10 | return np.sign(x_) 11 | 12 | 13 | print('generating some data ...') 14 | np.random.seed(0) 15 | # N = 50 16 | # x = np.linspace(-1, 1, num=N) 17 | # x = np.random.uniform(-1, 1, N) 18 | # y = step_function(x + np.random.randn(*x.shape) * 1e-1) # + np.random.randn(*x.shape) * 1e-2 19 | 20 | N = 40 21 | x = np.linspace(-1, 1, N)[:, None] 22 | # x = np.random.uniform(-1, 1, N)[:, None] 23 | f_step = lambda x_: -1 if x_ < 0 else 1. 24 | y = np.reshape([f_step(x_) for x_ in x], x.shape) + np.random.randn(*x.shape) * 1e-2 25 | x_plot = np.linspace(-2., 2, 300) # test inputs 26 | 27 | # num_low = 25 28 | # num_high = 25 29 | # gap = -.02 30 | # noise = 0.0001 31 | # x = np.vstack((np.linspace(-1, -gap/2.0, num_low)[:, np.newaxis], 32 | # np.linspace(gap/2.0, 1, num_high)[:, np.newaxis])) 33 | # y = np.vstack((-np.ones((num_low, 1)), np.ones((num_high, 1)))) 34 | 35 | # x = x[:, None] 36 | x_plot = x_plot[:, None] 37 | 38 | var_f = 1.0 # GP variance 39 | len_f = 0.1 # GP lengthscale 40 | var_y = 0.1 # observation noise 41 | 42 | kern = bayesnewton.kernels.Matern72(variance=var_f, lengthscale=len_f) 43 | lik = bayesnewton.likelihoods.Gaussian(variance=var_y) 44 | model = bayesnewton.models.VariationalGP(kernel=kern, likelihood=lik, X=x, Y=y) 45 | 46 | 47 | lr_adam = 0.1 48 | lr_newton = 1. 49 | iters = 100 50 | opt_hypers = objax.optimizer.Adam(model.vars()) 51 | energy = objax.GradValues(model.energy, model.vars()) 52 | 53 | 54 | @objax.Function.with_vars(model.vars() + opt_hypers.vars()) 55 | def train_op(): 56 | model.inference(lr=lr_newton) # perform inference and update variational params 57 | dE, E = energy() # compute energy and its gradients w.r.t. hypers 58 | opt_hypers(lr_adam, dE) 59 | return E 60 | 61 | 62 | train_op = objax.Jit(train_op) 63 | 64 | t0 = time.time() 65 | for i in range(1, iters + 1): 66 | loss = train_op() 67 | print('iter %2d, energy: %1.4f' % (i, loss[0])) 68 | t1 = time.time() 69 | print('optimisation time: %2.2f secs' % (t1-t0)) 70 | 71 | t0 = time.time() 72 | posterior_mean, posterior_var = model.predict_y(X=x_plot) 73 | t1 = time.time() 74 | print('prediction time: %2.2f secs' % (t1-t0)) 75 | lb = posterior_mean - 1.96 * posterior_var ** 0.5 76 | ub = posterior_mean + 1.96 * posterior_var ** 0.5 77 | 78 | print('plotting ...') 79 | plt.figure(1, figsize=(8, 4)) 80 | plt.clf() 81 | plt.plot(x, y, 'k.') # , label='training observations') 82 | plt.plot(x_plot, posterior_mean, 'b') # , label='posterior mean') 83 | plt.fill_between(x_plot[:, 0], lb, ub, color='b', alpha=0.05) # , label='2 std') 84 | plt.xlim([x_plot[0], x_plot[-1]]) 85 | plt.ylim([-0.5, 1.5]) 86 | # plt.legend(loc=2) 87 | plt.xticks([-2, -1, 0., 1., 2]) 88 | plt.yticks([-2, -1., 0., 1., 2]) 89 | plt.title('GP regression - Step Function') 90 | plt.xlabel('$X$') 91 | # plt.savefig('/Users/wilkinw1/postdoc/gp_course/lec8_deepgps/step_function.png') 92 | plt.show() 93 | -------------------------------------------------------------------------------- /demos/studentt.py: -------------------------------------------------------------------------------- 1 | import bayesnewton 2 | import objax 3 | from jax import vmap 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | import time 7 | 8 | 9 | def wiggly_time_series(x_): 10 | noise_var = 0.2 # true observation noise scale 11 | return (np.cos(0.04*x_+0.33*np.pi) * np.sin(0.2*x_) + 12 | np.sqrt(noise_var) * np.random.standard_t(3., x_.shape) + 13 | 0.0 * x_) 14 | 15 | 16 | np.random.seed(123) 17 | N = 100 18 | x = np.linspace(-17, 147, num=N) 19 | y = wiggly_time_series(x) 20 | x_test = np.linspace(np.min(x)-10.0, np.max(x)+10.0, num=500) 21 | y_test = wiggly_time_series(x_test) 22 | x_plot = np.linspace(np.min(x)-20.0, np.max(x)+20.0, 200) 23 | 24 | var_f = 1.0 # GP variance 25 | len_f = 5.0 # GP lengthscale 26 | var_y = 0.2 # observation noise 27 | 28 | kern = bayesnewton.kernels.Matern52(variance=var_f, lengthscale=len_f) 29 | lik = bayesnewton.likelihoods.StudentsT(scale=0.5, df=3.) 30 | # lik = bayesnewton.likelihoods.Gaussian(variance=0.5) 31 | 32 | # model = bayesnewton.models.MarkovNewtonGP(kernel=kern, likelihood=lik, X=x, Y=y) 33 | model = bayesnewton.models.MarkovVariationalGP(kernel=kern, likelihood=lik, X=x, Y=y) 34 | # model = bayesnewton.models.MarkovExpectationPropagationGP(kernel=kern, likelihood=lik, X=x, Y=y, power=0.5) 35 | # -- Gauss-Newton --- 36 | # model = bayesnewton.models.MarkovGaussNewtonGP(kernel=kern, likelihood=lik, X=x, Y=y) 37 | # model = bayesnewton.models.MarkovVariationalGaussNewtonGP(kernel=kern, likelihood=lik, X=x, Y=y) 38 | # -- quasi-Newton --- 39 | # model = bayesnewton.models.MarkovQuasiNewtonGP(kernel=kern, likelihood=lik, X=x, Y=y) 40 | # model = bayesnewton.models.MarkovVariationalQuasiNewtonGP(kernel=kern, likelihood=lik, X=x, Y=y) 41 | # model = bayesnewton.models.MarkovExpectationPropagationQuasiNewtonGP(kernel=kern, likelihood=lik, X=x, Y=y, power=0.5) 42 | # --- Riemannian grads --- 43 | # model = bayesnewton.models.MarkovVariationalRiemannGP(kernel=kern, likelihood=lik, X=x, Y=y) 44 | 45 | lr_adam = 0.1 46 | lr_newton = 1. 47 | iters = 100 48 | opt_hypers = objax.optimizer.Adam(model.vars()) 49 | energy = objax.GradValues(model.energy, model.vars()) 50 | 51 | 52 | @objax.Function.with_vars(model.vars() + opt_hypers.vars()) 53 | def train_op(): 54 | model.inference(lr=lr_newton) # perform inference and update variational params 55 | dE, E = energy() # compute energy and its gradients w.r.t. hypers 56 | opt_hypers(lr_adam, dE) 57 | return E 58 | 59 | 60 | train_op = objax.Jit(train_op) 61 | 62 | t0 = time.time() 63 | for i in range(1, iters + 1): 64 | loss = train_op() 65 | print('iter %2d, energy: %1.4f' % (i, loss[0])) 66 | t1 = time.time() 67 | print('optimisation time: %2.2f secs' % (t1-t0)) 68 | 69 | t0 = time.time() 70 | posterior_mean, posterior_var = model.predict_y(X=x_plot) 71 | nlpd = model.negative_log_predictive_density(X=x_test, Y=y_test) 72 | t1 = time.time() 73 | print('prediction time: %2.2f secs' % (t1-t0)) 74 | print('nlpd: %2.3f' % nlpd) 75 | lb = posterior_mean - 1.96 * posterior_var ** 0.5 76 | ub = posterior_mean + 1.96 * posterior_var ** 0.5 77 | 78 | _, _, hessian = vmap(model.likelihood.log_likelihood_gradients)( # parallel 79 | model.Y, 80 | model.posterior_mean.value 81 | ) 82 | 83 | outliers = np.argwhere(np.squeeze(hessian > 0)) 84 | 85 | print('plotting ...') 86 | plt.figure(1, figsize=(12, 5)) 87 | plt.clf() 88 | plt.plot(x, y, 'k.', label='training observations') 89 | plt.plot(x_test, y_test, 'r.', alpha=0.4, label='test observations') 90 | plt.plot(x_plot, posterior_mean, 'b', label='posterior mean') 91 | plt.plot(x[outliers], y[outliers], 'g*', label='outliers') 92 | plt.fill_between(x_plot, lb, ub, color='b', alpha=0.05, label='95% confidence') 93 | plt.xlim([x_plot[0], x_plot[-1]]) 94 | if hasattr(model, 'Z'): 95 | plt.plot(model.Z.value[:, 0], -2 * np.ones_like(model.Z.value[:, 0]), 'b^', markersize=5) 96 | plt.legend() 97 | plt.title('Robust GP regression (Students\' t likelihood)') 98 | plt.xlabel('$X$') 99 | plt.show() 100 | -------------------------------------------------------------------------------- /experiments/README.md: -------------------------------------------------------------------------------- 1 | # Bayes-Newton Experiments 2 | 3 | The code here can be used to reproduce the experiments for the following article: 4 | * W.J. Wilkinson, S. Särkkä, and A. Solin (2021): **Bayes-Newton Methods for Approximate Bayesian Inference with PSD Guarantees**. [*arXiv preprint arXiv:2111.01721*](https://arxiv.org/abs/2111.01721). 5 | 6 | The paper experiments can be found in the `motorcycle`, `product` and `gprn` folders respectively. Each folder contains a main Python script, plus bash scripts to produce the results for each inference method class (`bn-newton.sh`, `bn-vi.sh`, `bn-ep.sh`, `bn-pl.sh`). After these have finished running, the `results_bn.py` script can then be run to produce the plots. 7 | -------------------------------------------------------------------------------- /experiments/air_quality/air_quality_bayesnewton.py: -------------------------------------------------------------------------------- 1 | import bayesnewton 2 | import objax 3 | import numpy as np 4 | import pickle 5 | import time 6 | import sys 7 | from scipy.cluster.vq import kmeans2 8 | from jax.lib import xla_bridge 9 | # import os 10 | 11 | # Limit ourselves to single-threaded jax/xla operations to avoid thrashing. See 12 | # https://github.com/google/jax/issues/743. 13 | # os.environ["XLA_FLAGS"] = ("--xla_cpu_multi_thread_eigen=false " 14 | # "intra_op_parallelism_threads=1") 15 | 16 | 17 | if len(sys.argv) > 1: 18 | ind = int(sys.argv[1]) 19 | else: 20 | ind = 0 21 | 22 | 23 | if len(sys.argv) > 2: 24 | mean_field = bool(int(sys.argv[2])) 25 | else: 26 | mean_field = False 27 | 28 | 29 | if len(sys.argv) > 3: 30 | parallel = bool(int(sys.argv[3])) 31 | else: 32 | parallel = None 33 | 34 | # ===========================Load Data=========================== 35 | train_data = pickle.load(open("data/train_data_" + str(ind) + ".pickle", "rb")) 36 | pred_data = pickle.load(open("data/pred_data_" + str(ind) + ".pickle", "rb")) 37 | 38 | X = train_data['X'] 39 | Y = train_data['Y'] 40 | 41 | X_t = pred_data['test']['X'] 42 | Y_t = pred_data['test']['Y'] 43 | 44 | print('X: ', X.shape) 45 | 46 | num_z_space = 30 47 | 48 | grid = True 49 | print(Y.shape) 50 | print("num data points =", Y.shape[0]) 51 | 52 | if grid: 53 | # the gridded approach: 54 | t, R, Y = bayesnewton.utils.create_spatiotemporal_grid(X, Y) 55 | t_t, R_t, Y_t = bayesnewton.utils.create_spatiotemporal_grid(X_t, Y_t) 56 | else: 57 | # the sequential approach: 58 | t = X[:, :1] 59 | R = X[:, 1:] 60 | t_t = X_t[:, :1] 61 | R_t = X_t[:, 1:] 62 | Nt = t.shape[0] 63 | print("num time steps =", Nt) 64 | Nr = R.shape[1] 65 | print("num spatial points =", Nr) 66 | N = Y.shape[0] * Y.shape[1] * Y.shape[2] 67 | print("num data points =", N) 68 | 69 | var_y = 5. 70 | var_f = 1. 71 | len_time = 0.001 72 | len_space = 0.2 73 | 74 | sparse = True 75 | opt_z = True # will be set to False if sparse=False 76 | 77 | if sparse: 78 | z = kmeans2(R[0, ...], num_z_space, minit="points")[0] 79 | else: 80 | z = R[0, ...] 81 | 82 | # kern = bayesnewton.kernels.SpatioTemporalMatern52(variance=var_f, 83 | # lengthscale_time=len_time, 84 | # lengthscale_space=[len_space, len_space], 85 | # z=z, 86 | # sparse=sparse, 87 | # opt_z=opt_z, 88 | # conditional='Full') 89 | 90 | kern_time = bayesnewton.kernels.Matern32(variance=var_f, lengthscale=len_time) 91 | kern_space0 = bayesnewton.kernels.Matern32(variance=var_f, lengthscale=len_space) 92 | kern_space1 = bayesnewton.kernels.Matern32(variance=var_f, lengthscale=len_space) 93 | kern_space = bayesnewton.kernels.Separable([kern_space0, kern_space1]) 94 | 95 | kern = bayesnewton.kernels.SpatioTemporalKernel(temporal_kernel=kern_time, 96 | spatial_kernel=kern_space, 97 | z=z, 98 | sparse=sparse, 99 | opt_z=opt_z, 100 | conditional='Full') 101 | 102 | lik = bayesnewton.likelihoods.Gaussian(variance=var_y) 103 | 104 | if mean_field: 105 | model = bayesnewton.models.MarkovVariationalMeanFieldGP(kernel=kern, likelihood=lik, X=t, R=R, Y=Y, parallel=parallel) 106 | else: 107 | model = bayesnewton.models.MarkovVariationalGP(kernel=kern, likelihood=lik, X=t, R=R, Y=Y, parallel=parallel) 108 | 109 | lr_adam = 0.01 110 | lr_newton = 1. 111 | iters = 300 112 | opt_hypers = objax.optimizer.Adam(model.vars()) 113 | energy = objax.GradValues(model.energy, model.vars()) 114 | 115 | 116 | @objax.Function.with_vars(model.vars() + opt_hypers.vars()) 117 | def train_op(): 118 | model.inference(lr=lr_newton) # perform inference and update variational params 119 | dE, E = energy() # compute energy and its gradients w.r.t. hypers 120 | opt_hypers(lr_adam, dE) 121 | return E 122 | 123 | 124 | train_op = objax.Jit(train_op) 125 | 126 | t0 = time.time() 127 | for i in range(1, iters + 1): 128 | loss = train_op() 129 | print('iter %2d: energy: %1.4f' % (i, loss[0])) 130 | t1 = time.time() 131 | # print('optimisation time: %2.2f secs' % (t1-t0)) 132 | avg_time_taken = (t1-t0)/iters 133 | print('average iter time: %2.2f secs' % avg_time_taken) 134 | 135 | posterior_mean, posterior_var = model.predict_y(X=t_t, R=R_t) 136 | nlpd = model.negative_log_predictive_density(X=t_t, R=R_t, Y=Y_t) 137 | rmse = np.sqrt(np.nanmean((np.squeeze(Y_t) - np.squeeze(posterior_mean))**2)) 138 | print('nlpd: %2.3f' % nlpd) 139 | print('rmse: %2.3f' % rmse) 140 | 141 | cpugpu = xla_bridge.get_backend().platform 142 | 143 | with open("output/" + str(int(mean_field)) + "_" + str(ind) + "_" + str(int(parallel)) + "_" + cpugpu + "_time.txt", "wb") as fp: 144 | pickle.dump(avg_time_taken, fp) 145 | with open("output/" + str(int(mean_field)) + "_" + str(ind) + "_" + str(int(parallel)) + "_" + cpugpu + "_nlpd.txt", "wb") as fp: 146 | pickle.dump(nlpd, fp) 147 | with open("output/" + str(int(mean_field)) + "_" + str(ind) + "_" + str(int(parallel)) + "_" + cpugpu + "_rmse.txt", "wb") as fp: 148 | pickle.dump(rmse, fp) 149 | -------------------------------------------------------------------------------- /experiments/air_quality/air_quality_gpflow.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import gpflow 3 | from gpflow.optimizers import NaturalGradient 4 | from gpflow.utilities import set_trainable, leaf_components 5 | import numpy as np 6 | import scipy as sp 7 | import time 8 | from scipy.cluster.vq import kmeans2 9 | from tqdm import tqdm 10 | import pickle 11 | import sys 12 | 13 | print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU'))) 14 | 15 | 16 | if len(sys.argv) > 1: 17 | ind = int(sys.argv[1]) 18 | else: 19 | ind = 0 20 | 21 | 22 | if len(sys.argv) > 2: 23 | num_z_ind = int(sys.argv[2]) 24 | else: 25 | num_z_ind = 0 26 | 27 | 28 | # ===========================Load Data=========================== 29 | train_data = pickle.load(open("data/train_data_" + str(ind) + ".pickle", "rb")) 30 | pred_data = pickle.load(open("data/pred_data_" + str(ind) + ".pickle", "rb")) 31 | 32 | X = train_data['X'] 33 | Y = train_data['Y'] 34 | 35 | X_t = pred_data['test']['X'] 36 | Y_t = pred_data['test']['Y'] 37 | 38 | non_nan_idx = np.logical_not(np.isnan(np.squeeze(Y))) 39 | X = X[non_nan_idx, :] 40 | Y = Y[non_nan_idx, :] 41 | 42 | non_nan_idx_t = np.logical_not(np.isnan(np.squeeze(Y_t))) 43 | X_t = X_t[non_nan_idx_t, :] 44 | Y_t = Y_t[non_nan_idx_t, :] 45 | 46 | print('X: ', X.shape) 47 | 48 | kernel_lengthscales = [0.01, 0.2, 0.2] 49 | kernel_variances = 1.0 50 | likelihood_noise = 5.0 51 | train_z = True 52 | epochs = 300 53 | step_size = 0.01 54 | # jitter = 1e-4 55 | natgrad_step_size = 1.0 56 | # enforce_psd = False 57 | # minibatch_size = [100, 500, 100] 58 | minibatch_size = [400, 600, 800, 2000, 3000] 59 | # num_z = [1000, 1500, 2000] 60 | num_z = [1500, 2000, 2500, 5000, 8000] 61 | 62 | 63 | def get_gpflow_params(m): 64 | params = {} 65 | leafs = leaf_components(m) 66 | for key in leafs.keys(): 67 | tf_vars = leafs[key].trainable_variables 68 | 69 | # check if variable exists 70 | if len(tf_vars) == 1: 71 | tf_var = tf_vars[0] 72 | 73 | params[key] = tf_var.numpy() 74 | 75 | return params 76 | 77 | 78 | N, D = X.shape 79 | 80 | print('num_z: ', num_z[num_z_ind]) 81 | Z_all = kmeans2(X, num_z[num_z_ind], minit="points")[0] 82 | 83 | kernel = gpflow.kernels.Matern32 84 | 85 | k = None 86 | for d in range(D): 87 | # print(d, kernel_lengthscales) 88 | if type(kernel_lengthscales) is list: 89 | k_ls = kernel_lengthscales[d] 90 | else: 91 | k_ls = kernel_lengthscales 92 | 93 | if type(kernel_variances) is list: 94 | k_var = kernel_variances[d] 95 | else: 96 | k_var = kernel_variances 97 | 98 | k_d = kernel( 99 | lengthscales=[k_ls], 100 | variance=k_var, 101 | active_dims=[d] 102 | ) 103 | 104 | # print(k_d) 105 | if k is None: 106 | k = k_d 107 | else: 108 | k = k * k_d 109 | 110 | init_as_cvi = True 111 | 112 | if init_as_cvi: 113 | M = Z_all.shape[0] 114 | jit = 1e-6 115 | 116 | Kzz = k(Z_all, Z_all) 117 | 118 | def inv(K): 119 | K_chol = sp.linalg.cholesky(K + jit * np.eye(M), lower=True) 120 | return sp.linalg.cho_solve((K_chol, True), np.eye(K.shape[0])) 121 | 122 | # manual q(u) decompositin 123 | nat1 = np.zeros([M, 1]) 124 | nat2 = -0.5 * inv(Kzz) 125 | 126 | lam1 = 1e-5 * np.ones([M, 1]) 127 | lam2 = -0.5 * np.eye(M) 128 | 129 | S = inv(-2 * (nat2 + lam2)) 130 | m = S @ (lam1 + nat1) 131 | 132 | S_chol = sp.linalg.cholesky(S + jit * np.eye(M), lower=True) 133 | S_flattened = S_chol[np.tril_indices(M, 0)] 134 | 135 | q_mu = m 136 | q_sqrt = np.array([S_chol]) 137 | else: 138 | q_mu = 1e-5 * np.ones([Z_all.shape[0], 1]) # match gpjax init 139 | q_sqrt = None 140 | 141 | lik = gpflow.likelihoods.Gaussian(variance=likelihood_noise) 142 | 143 | data = (X, Y) 144 | 145 | m = gpflow.models.SVGP( 146 | inducing_variable=Z_all, 147 | whiten=True, 148 | kernel=k, 149 | mean_function=None, 150 | likelihood=lik, 151 | q_mu=q_mu, 152 | q_sqrt=q_sqrt 153 | ) 154 | 155 | set_trainable(m.inducing_variable, True) 156 | 157 | # ===========================Train=========================== 158 | 159 | if minibatch_size[num_z_ind] is None or minibatch_size[num_z_ind] is 'none': 160 | training_loss = m.training_loss_closure( 161 | data 162 | ) 163 | else: 164 | print(N, minibatch_size[num_z_ind]) 165 | train_dataset = (tf.data.Dataset.from_tensor_slices(data).repeat().shuffle(N).batch(minibatch_size[num_z_ind])) 166 | train_iter = iter(train_dataset) 167 | training_loss = m.training_loss_closure(train_iter) 168 | 169 | 170 | # make it so adam does not train these 171 | set_trainable(m.q_mu, False) 172 | set_trainable(m.q_sqrt, False) 173 | 174 | natgrad_opt = NaturalGradient(gamma=natgrad_step_size) 175 | variational_params = [(m.q_mu, m.q_sqrt)] 176 | 177 | optimizer = tf.optimizers.Adam 178 | 179 | adam_opt_for_vgp = optimizer(step_size) 180 | 181 | loss_arr = [] 182 | 183 | bar = tqdm(total=epochs) 184 | 185 | # MINIBATCHING TRAINING 186 | t0 = time.time() 187 | for i in range(epochs): 188 | # NAT GRAD STEP 189 | natgrad_opt.minimize(training_loss, var_list=variational_params) 190 | 191 | # elbo = -m.elbo(data).numpy() 192 | 193 | # loss_arr.append(elbo) 194 | 195 | # ADAM STEP 196 | adam_opt_for_vgp.minimize(training_loss, var_list=m.trainable_variables) 197 | 198 | bar.update(1) 199 | t1 = time.time() 200 | avg_time_taken = (t1-t0)/epochs 201 | print('average iter time: %2.2f secs' % avg_time_taken) 202 | 203 | 204 | def _prediction_fn(X_, Y_): 205 | mu, var = m.predict_y(X_) 206 | log_pred_density = m.predict_log_density((X_, Y_)) 207 | return mu.numpy(), var.numpy(), log_pred_density.numpy() 208 | 209 | 210 | print('predicting...') 211 | posterior_mean, posterior_var, lpd = _prediction_fn(X_t, Y_t) 212 | # print(lpd.shape) 213 | # print(lpd) 214 | nlpd = np.mean(-lpd) 215 | rmse = np.sqrt(np.nanmean((np.squeeze(Y_t) - np.squeeze(posterior_mean))**2)) 216 | print('nlpd: %2.3f' % nlpd) 217 | print('rmse: %2.3f' % rmse) 218 | 219 | # prediction_fn = lambda X: utils.batch_predict(X, _prediction_fn, verbose=True) 220 | 221 | if len(tf.config.list_physical_devices('GPU')) > 0: 222 | cpugpu = 'gpu' 223 | else: 224 | cpugpu = 'cpu' 225 | 226 | with open("output/gpflow_" + str(ind) + "_" + str(num_z_ind) + "_" + cpugpu + "_time.txt", "wb") as fp: 227 | pickle.dump(avg_time_taken, fp) 228 | with open("output/gpflow_" + str(ind) + "_" + str(num_z_ind) + "_" + cpugpu + "_nlpd.txt", "wb") as fp: 229 | pickle.dump(nlpd, fp) 230 | with open("output/gpflow_" + str(ind) + "_" + str(num_z_ind) + "_" + cpugpu + "_rmse.txt", "wb") as fp: 231 | pickle.dump(rmse, fp) 232 | -------------------------------------------------------------------------------- /experiments/air_quality/air_quality_ski.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import gpytorch 3 | from gpytorch.means import ConstantMean 4 | from gpytorch.kernels import MaternKernel, ScaleKernel, GridInterpolationKernel 5 | from gpytorch.distributions import MultivariateNormal 6 | import numpy as np 7 | from loguru import logger 8 | import pickle 9 | from timeit import default_timer as timer 10 | import matplotlib.pyplot as plt 11 | import sys 12 | 13 | 14 | if len(sys.argv) > 1: 15 | ind = int(sys.argv[1]) 16 | plot_final = False 17 | else: 18 | ind = 0 19 | plot_final = True 20 | 21 | 22 | inducing_type = 'all_time' # 'default' 23 | num_z = 30 24 | likelihood_noise = 5. 25 | kernel_lengthscales = [0.001, 0.2, 0.2] 26 | step_size = 0.01 27 | iters = 300 28 | init_params = {} 29 | optimizer = torch.optim.Adam 30 | 31 | cpugpu = str(0) 32 | 33 | 34 | # ===========================Load Data=========================== 35 | train_data = pickle.load(open("data/train_data_" + str(ind) + ".pickle", "rb")) 36 | pred_data = pickle.load(open("data/pred_data_" + str(ind) + ".pickle", "rb")) 37 | 38 | X = train_data['X'] 39 | Y = np.squeeze(train_data['Y']) 40 | 41 | X_t = pred_data['test']['X'] 42 | Y_t = pred_data['test']['Y'] 43 | 44 | print('X: ', X.shape) 45 | 46 | non_nan_idx = np.squeeze(~np.isnan(Y)) 47 | 48 | X = torch.tensor(X[non_nan_idx]).float() 49 | Y = torch.tensor(Y[non_nan_idx]).float() 50 | 51 | D = X.shape[1] 52 | Nt = 2159 # number of time steps 53 | 54 | non_nan_idx_t = np.squeeze(~np.isnan(Y_t)) 55 | 56 | X_t = torch.tensor(X_t[non_nan_idx_t]).float() 57 | Y_t = np.squeeze(Y_t[non_nan_idx_t]) 58 | 59 | 60 | class GPRegressionModelSKI(gpytorch.models.ExactGP): 61 | 62 | def __init__(self, train_x, train_y, kernel, likelihood): 63 | super(GPRegressionModelSKI, self).__init__(train_x, train_y, likelihood) 64 | self.mean_module = ConstantMean() 65 | 66 | self.base_covar_module = kernel 67 | self.base_covar_module.lengthscale = torch.tensor(kernel_lengthscales) 68 | logger.info(f'kernel_lengthscales : {kernel_lengthscales}') 69 | 70 | if inducing_type == 'default': 71 | grid_size = gpytorch.utils.grid.choose_grid_size(train_x) 72 | init_params['grid_size'] = grid_size 73 | 74 | elif inducing_type == 'all_time': 75 | grid_size = np.array([Nt, np.ceil(np.sqrt(num_z)), np.ceil(np.sqrt(num_z))]).astype(int) 76 | init_params['grid_size'] = grid_size 77 | 78 | logger.info(f'grid_size : {grid_size}') 79 | 80 | self.covar_module = ScaleKernel( 81 | GridInterpolationKernel(self.base_covar_module, grid_size, num_dims=D) 82 | ) 83 | 84 | def forward(self, x): 85 | mean_x = self.mean_module(x) 86 | covar_x = self.covar_module(x) 87 | return MultivariateNormal(mean_x, covar_x) 88 | 89 | 90 | kern = MaternKernel(ard_num_dims=3, nu=1.5) 91 | # kern = MaternKernel(ard_num_dims=1, nu=1.5) 92 | lik = gpytorch.likelihoods.GaussianLikelihood() 93 | lik.noise = torch.tensor(likelihood_noise) 94 | 95 | model = GPRegressionModelSKI(X, Y, kern, lik) # SKI model 96 | 97 | # train 98 | model.train() 99 | lik.train() 100 | 101 | # Use the adam optimizer 102 | optimizer = optimizer(model.parameters(), lr=step_size) 103 | 104 | # "Loss" for GPs - the marginal log likelihood 105 | mll = gpytorch.mlls.ExactMarginalLogLikelihood(lik, model) 106 | 107 | loss_arr = [] 108 | 109 | 110 | def train(): 111 | 112 | for i in range(iters): 113 | # Zero backprop gradients 114 | optimizer.zero_grad() 115 | 116 | with gpytorch.settings.use_toeplitz(False), gpytorch.settings.max_root_decomposition_size(30): 117 | 118 | # Get output from model 119 | output = model(X) 120 | 121 | # Calc loss and backprop derivatives 122 | loss = -mll(output, torch.squeeze(Y)) 123 | loss.backward() 124 | print('Iter %d/%d - Loss: %.3f' % (i + 1, iters, loss.item())) 125 | 126 | loss_arr.append(loss.detach().numpy()) 127 | 128 | optimizer.step() 129 | torch.cuda.empty_cache() 130 | 131 | 132 | start = timer() 133 | 134 | with gpytorch.settings.use_toeplitz(True): 135 | train() 136 | 137 | end = timer() 138 | 139 | training_time = end - start 140 | 141 | 142 | # ===========================Predict=========================== 143 | 144 | model.eval() 145 | lik.eval() 146 | 147 | print('noise var:', model.likelihood.noise.detach().numpy()) 148 | 149 | logger.info('Predicting') 150 | 151 | 152 | with gpytorch.settings.max_preconditioner_size(10), torch.no_grad(): 153 | with gpytorch.settings.use_toeplitz(False), gpytorch.settings.max_root_decomposition_size(30), gpytorch.settings.fast_pred_var(): 154 | preds = model(X_t) 155 | 156 | 157 | def negative_log_predictive_density(y, post_mean, post_cov, lik_cov): 158 | # logZₙ = log ∫ 𝓝(yₙ|fₙ,σ²) 𝓝(fₙ|mₙ,vₙ) dfₙ = log 𝓝(yₙ|mₙ,σ²+vₙ) 159 | cov = lik_cov + post_cov 160 | lZ = np.squeeze(-0.5 * np.log(2 * np.pi * cov) - 0.5 * (y - post_mean) ** 2 / cov) 161 | return -lZ 162 | 163 | 164 | posterior_mean, posterior_var = preds.mean.detach().numpy(), preds.variance.detach().numpy() 165 | 166 | noise_var = model.likelihood.noise.detach().numpy() 167 | print('noise var:', noise_var) 168 | 169 | nlpd = np.mean(negative_log_predictive_density(y=Y_t, 170 | post_mean=posterior_mean, 171 | post_cov=posterior_var, 172 | lik_cov=noise_var)) 173 | rmse = np.sqrt(np.nanmean((np.squeeze(Y_t) - np.squeeze(posterior_mean))**2)) 174 | print('nlpd: %2.3f' % nlpd) 175 | print('rmse: %2.3f' % rmse) 176 | 177 | avg_time_taken = training_time / iters 178 | print('avg iter time:', avg_time_taken) 179 | 180 | with open("output/ski_" + str(ind) + "_" + cpugpu + "_time.txt", "wb") as fp: 181 | pickle.dump(avg_time_taken, fp) 182 | with open("output/ski_" + str(ind) + "_" + cpugpu + "_nlpd.txt", "wb") as fp: 183 | pickle.dump(nlpd, fp) 184 | with open("output/ski_" + str(ind) + "_" + cpugpu + "_rmse.txt", "wb") as fp: 185 | pickle.dump(rmse, fp) 186 | 187 | if plot_final: 188 | plt.plot(posterior_mean) 189 | plt.show() 190 | -------------------------------------------------------------------------------- /experiments/aircraft/aircraft.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import bayesnewton 3 | import objax 4 | import numpy as np 5 | import pandas as pd 6 | import matplotlib.pyplot as plt 7 | import time 8 | from datetime import date 9 | import pickle 10 | 11 | plot_final = False 12 | plot_intermediate = False 13 | 14 | print('loading data ...') 15 | aircraft_accidents = pd.read_csv('aircraft_accidents.txt', sep='-', header=None).values 16 | 17 | num_data = aircraft_accidents.shape[0] 18 | xx = np.zeros([num_data, 1]) 19 | for j in range(num_data): 20 | xx[j] = date.toordinal(date(aircraft_accidents[j, 0], aircraft_accidents[j, 1], aircraft_accidents[j, 2])) + 366 21 | 22 | BIN_WIDTH = 1 23 | # Discretize the data 24 | x_min = np.floor(np.min(xx)) 25 | x_max = np.ceil(np.max(xx)) 26 | x_max_int = x_max-np.mod(x_max-x_min, BIN_WIDTH) 27 | x = np.linspace(x_min, x_max_int, num=int((x_max_int-x_min)/BIN_WIDTH+1)) 28 | x = np.concatenate([np.min(x)-np.linspace(61, 1, num=61), x]) # pad with zeros to reduce strange edge effects 29 | y, _ = np.histogram(xx, np.concatenate([[-1e10], x[1:]-np.diff(x)/2, [1e10]])) 30 | N = y.shape[0] 31 | print('N =', N) 32 | 33 | scale = 1 # scale inputs for stability 34 | x = scale * x 35 | 36 | np.random.seed(123) 37 | ind_shuffled = np.random.permutation(N) 38 | ind_split = np.stack(np.split(ind_shuffled, 10)) # 10 random batches of data indices 39 | 40 | np.random.seed(123) 41 | # meanval = np.log(len(disaster_timings)/num_time_bins) # TODO: incorporate mean 42 | 43 | if len(sys.argv) > 1: 44 | method = int(sys.argv[1]) 45 | fold = int(sys.argv[2]) 46 | else: 47 | method = 2 48 | fold = 0 49 | 50 | print('method number', method) 51 | print('batch number', fold) 52 | 53 | # Get training and test indices 54 | ind_test = ind_split[fold] # np.sort(ind_shuffled[:N//10]) 55 | ind_train = np.concatenate(ind_split[np.arange(10) != fold]) 56 | x_train = x[ind_train] # 90/10 train/test split 57 | x_test = x[ind_test] 58 | y_train = y[ind_train] 59 | y_test = y[ind_test] 60 | # N_batch = 2000 61 | M = 4000 62 | # z = np.linspace(701050, 737050, M) 63 | z = np.linspace(x[0], x[-1], M) 64 | 65 | if len(sys.argv) > 3: 66 | baseline = int(sys.argv[3]) 67 | else: 68 | baseline = 0 69 | 70 | kern_1 = bayesnewton.kernels.Matern52(variance=2., lengthscale=scale*5.5e4) 71 | kern_2 = bayesnewton.kernels.QuasiPeriodicMatern12(variance=1., 72 | lengthscale_periodic=scale*2., 73 | period=scale*365., 74 | lengthscale_matern=scale*1.5e4) 75 | kern_3 = bayesnewton.kernels.QuasiPeriodicMatern12(variance=1., 76 | lengthscale_periodic=scale*2., 77 | period=scale*7., 78 | lengthscale_matern=scale*30*365.) 79 | 80 | kern = bayesnewton.kernels.Sum([kern_1, kern_2, kern_3]) 81 | lik = bayesnewton.likelihoods.Poisson(binsize=scale*BIN_WIDTH) 82 | 83 | if baseline: 84 | if method == 0: 85 | model = bayesnewton.models.MarkovTaylorGP(kernel=kern, likelihood=lik, X=x_train, Y=y_train) 86 | elif method == 1: 87 | model = bayesnewton.models.MarkovPosteriorLinearisationGP(kernel=kern, likelihood=lik, X=x_train, Y=y_train) 88 | elif method == 2: 89 | model = bayesnewton.models.MarkovExpectationPropagationGP(kernel=kern, likelihood=lik, X=x_train, Y=y_train, power=1.) 90 | elif method == 3: 91 | model = bayesnewton.models.MarkovExpectationPropagationGP(kernel=kern, likelihood=lik, X=x_train, Y=y_train, power=0.5) 92 | elif method == 4: 93 | model = bayesnewton.models.MarkovExpectationPropagationGP(kernel=kern, likelihood=lik, X=x_train, Y=y_train, power=0.01) 94 | elif method == 4: 95 | model = bayesnewton.models.MarkovVariationalGP(kernel=kern, likelihood=lik, X=x_train, Y=y_train) 96 | else: 97 | if method == 0: 98 | model = bayesnewton.models.SparseMarkovTaylorGP(kernel=kern, likelihood=lik, X=x_train, Y=y_train, Z=z) 99 | elif method == 1: 100 | model = bayesnewton.models.SparseMarkovPosteriorLinearisationGP(kernel=kern, likelihood=lik, X=x_train, Y=y_train, Z=z) 101 | elif method == 2: 102 | model = bayesnewton.models.SparseMarkovExpectationPropagationGP(kernel=kern, likelihood=lik, X=x_train, Y=y_train, Z=z, power=1.) 103 | elif method == 3: 104 | model = bayesnewton.models.SparseMarkovExpectationPropagationGP(kernel=kern, likelihood=lik, X=x_train, Y=y_train, Z=z, power=0.5) 105 | elif method == 4: 106 | model = bayesnewton.models.SparseMarkovExpectationPropagationGP(kernel=kern, likelihood=lik, X=x_train, Y=y_train, Z=z, power=0.01) 107 | elif method == 4: 108 | model = bayesnewton.models.SparseMarkovVariationalGP(kernel=kern, likelihood=lik, X=x_train, Y=y_train, Z=z) 109 | 110 | 111 | lr_adam = 0.1 112 | lr_newton = 0.1 113 | iters = 200 114 | opt_hypers = objax.optimizer.Adam(model.vars()) 115 | energy = objax.GradValues(model.energy, model.vars()) 116 | 117 | 118 | @objax.Function.with_vars(model.vars() + opt_hypers.vars()) 119 | def train_op(): 120 | model.inference(lr=lr_newton) # perform inference and update variational params 121 | dE, E = energy() # compute energy and its gradients w.r.t. hypers 122 | opt_hypers(lr_adam, dE) 123 | return E 124 | 125 | 126 | train_op = objax.Jit(train_op) 127 | 128 | t0 = time.time() 129 | for i in range(1, iters + 1): 130 | loss = train_op() 131 | print('iter %2d, energy: %1.4f' % (i, loss[0])) 132 | t1 = time.time() 133 | print('optimisation time: %2.2f secs' % (t1-t0)) 134 | 135 | # calculate posterior predictive distribution via filtering and smoothing at train & test locations: 136 | print('calculating the posterior predictive distribution ...') 137 | t0 = time.time() 138 | nlpd = model.negative_log_predictive_density(X=x_test, Y=y_test) 139 | t1 = time.time() 140 | print('NLPD: %1.2f' % nlpd) 141 | 142 | if baseline: 143 | with open("output/baseline_" + str(method) + "_" + str(fold) + "_nlpd.txt", "wb") as fp: 144 | pickle.dump(nlpd, fp) 145 | else: 146 | with open("output/" + str(method) + "_" + str(fold) + "_nlpd.txt", "wb") as fp: 147 | pickle.dump(nlpd, fp) 148 | 149 | # with open("output/" + str(method) + "_" + str(fold) + "_nlpd.txt", "rb") as fp: 150 | # nlpd_show = pickle.load(fp) 151 | # print(nlpd_show) 152 | 153 | # plt.figure(1) 154 | # plt.plot(t_test, mu, 'b-') 155 | # plt.plot(z, inducing_mean[..., 0], 'b.', label='inducing mean', markersize=8) 156 | # plt.show() 157 | -------------------------------------------------------------------------------- /experiments/audio/audio_timings.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import bayesnewton 3 | import objax 4 | from bayesnewton.cubature import Unscented 5 | import numpy as np 6 | import time 7 | from scipy.io import loadmat 8 | from jax.lib import xla_bridge 9 | import pickle 10 | 11 | print('loading data ...') 12 | y = loadmat('speech_female')['y'] 13 | fs = 44100 # sampling rate (Hz) 14 | scale = 1000 # convert to milliseconds 15 | 16 | normaliser = 0.5 * np.sqrt(np.var(y)) 17 | yTrain = y / normaliser # rescale the data 18 | 19 | N = y.shape[0] 20 | x = np.linspace(0., N, num=N) / fs * scale # arbitrary evenly spaced inputs inputs 21 | # batch_size = 20000 22 | M = 3000 23 | z = np.linspace(x[0], x[-1], num=M) 24 | 25 | np.random.seed(123) 26 | # 10-fold cross-validation setup 27 | ind_shuffled = np.random.permutation(N) 28 | ind_split = np.stack(np.split(ind_shuffled, 10)) # 10 random batches of data indices 29 | 30 | if len(sys.argv) > 1: 31 | method = int(sys.argv[1]) 32 | # fold = int(sys.argv[2]) 33 | # plot_final = False 34 | # save_result = True 35 | else: 36 | method = 2 37 | # fold = 0 38 | # plot_final = True 39 | # save_result = False 40 | 41 | # if len(sys.argv) > 2: 42 | # baseline = bool(int(sys.argv[2])) 43 | # else: 44 | # baseline = True 45 | baseline = True 46 | 47 | if len(sys.argv) > 2: 48 | parallel = bool(int(sys.argv[2])) 49 | else: 50 | parallel = None 51 | 52 | if len(sys.argv) > 3: 53 | num_components = int(sys.argv[3]) 54 | else: 55 | num_components = 3 56 | 57 | time_steps = [5000, 10000, 15000, 20000] 58 | if len(sys.argv) > 4: 59 | num_time_steps_ind = int(sys.argv[4]) 60 | else: 61 | num_time_steps_ind = 3 62 | 63 | num_time_steps = time_steps[num_time_steps_ind] 64 | 65 | # if len(sys.argv) > 6: 66 | # iters = int(sys.argv[6]) 67 | # else: 68 | iters = 11 69 | 70 | print('method number:', method) 71 | # print('batch number:', fold) 72 | # print('baseline:', baseline) 73 | print('parallel:', parallel) 74 | print('num components:', num_components) 75 | print('num time steps:', num_time_steps) 76 | # print('num iterations:', iters) 77 | 78 | x_train = x[:num_time_steps] 79 | y_train = y[:num_time_steps] 80 | 81 | fundamental_freq = 220 # Hz 82 | radial_freq = 2 * np.pi * fundamental_freq / scale # radial freq = 2pi * f / scale 83 | 84 | subband_kernel = bayesnewton.kernels.SubbandMatern12 85 | modulator_kernel = bayesnewton.kernels.Matern52 86 | subband_frequencies = radial_freq * (np.arange(num_components) + 1) 87 | subband_lengthscales = 75. * np.ones(num_components) 88 | modulator_lengthscales = 10. * np.ones(num_components) 89 | modulator_variances = 0.5 * np.ones(num_components) 90 | 91 | kern = bayesnewton.kernels.SpectroTemporal( 92 | subband_lengthscales=subband_lengthscales, 93 | subband_frequencies=subband_frequencies, 94 | modulator_lengthscales=modulator_lengthscales, 95 | modulator_variances=modulator_variances, 96 | subband_kernel=subband_kernel, 97 | modulator_kernel=modulator_kernel 98 | ) 99 | 100 | lik = bayesnewton.likelihoods.AudioAmplitudeDemodulation(num_components=num_components, variance=0.3) 101 | 102 | 103 | if method == 0: 104 | inf = bayesnewton.inference.Taylor 105 | elif method == 1: 106 | inf = bayesnewton.inference.PosteriorLinearisation 107 | elif method in [2, 3, 4]: 108 | inf = bayesnewton.inference.ExpectationPropagation 109 | elif method == 5: 110 | inf = bayesnewton.inference.VariationalInference 111 | 112 | if baseline: 113 | mod = bayesnewton.basemodels.MarkovGP 114 | Mod = bayesnewton.build_model(mod, inf) 115 | model = Mod(kernel=kern, likelihood=lik, X=x_train, Y=y_train, parallel=parallel) 116 | else: 117 | mod = bayesnewton.basemodels.SparseMarkovGaussianProcess 118 | Mod = bayesnewton.build_model(mod, inf) 119 | model = Mod(kernel=kern, likelihood=lik, X=x_train, Y=y_train, Z=z, parallel=parallel) 120 | 121 | unscented_transform = Unscented(dim=num_components) # 5th-order unscented transform 122 | 123 | if method == 2: 124 | inf_args = {"power": 1., "cubature": unscented_transform} 125 | elif method == 3: 126 | inf_args = {"power": 0.5, "cubature": unscented_transform} 127 | elif method == 4: 128 | inf_args = {"power": 0.01, "cubature": unscented_transform} 129 | else: 130 | inf_args = {"cubature": unscented_transform} 131 | 132 | 133 | lr_adam = 0.05 134 | lr_newton = 0.05 135 | opt = objax.optimizer.Adam(model.vars()) 136 | energy = objax.GradValues(model.energy, model.vars()) 137 | 138 | 139 | @objax.Function.with_vars(model.vars() + opt.vars()) 140 | def train_op(): 141 | model.inference(lr=lr_newton, **inf_args) # perform inference and update variational params 142 | dE, E = energy(**inf_args) # compute energy and its gradients w.r.t. hypers 143 | opt(lr_adam, dE) 144 | return E 145 | 146 | 147 | train_op = objax.Jit(train_op) 148 | 149 | 150 | t0 = time.time() 151 | for i in range(1, iters + 1): 152 | if i == 2: 153 | t2 = time.time() 154 | loss = train_op() 155 | print('iter %2d, energy: %1.4f' % (i, loss[0])) 156 | t1 = time.time() 157 | t3 = time.time() 158 | print('optimisation time: %2.2f secs' % (t1-t0)) 159 | avg_time_taken = (t3-t2)/(iters-1) 160 | print('per-iteration time (excl. compile): %2.2f secs' % avg_time_taken) 161 | 162 | cpugpu = xla_bridge.get_backend().platform 163 | 164 | with open("output/" + str(method) + "_" + str(num_time_steps_ind) + "_" + str(num_components) + "_" 165 | + str(int(parallel)) + "_" + cpugpu + ".txt", "wb") as fp: 166 | pickle.dump(avg_time_taken, fp) 167 | -------------------------------------------------------------------------------- /experiments/audio/speech_female.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AaltoML/BayesNewton/f72ae9a6ca69f9cce8e62135c9f78dda4825b4df/experiments/audio/speech_female.mat -------------------------------------------------------------------------------- /experiments/banana/banana.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import bayesnewton 3 | import objax 4 | import numpy as np 5 | import time 6 | import pickle 7 | 8 | print('loading banana data ...') 9 | np.random.seed(99) 10 | inputs = np.loadtxt('../../data/banana_large.csv', delimiter=',', skiprows=1) 11 | Xall = inputs[:, :1] # temporal inputs (x-axis) 12 | Rall = inputs[:, 1:2] # spatial inputs (y-axis) 13 | Yall = np.maximum(inputs[:, 2:], 0) # observations / labels 14 | 15 | N = Xall.shape[0] # number of training points 16 | M = 15 17 | Z = np.linspace(-3., 3., M) # inducing points 18 | 19 | ind_shuffled = np.random.permutation(N) 20 | ind_split = np.stack(np.split(ind_shuffled, 10)) # 10 random batches of data indices 21 | 22 | # Test points 23 | Xtest, Rtest = np.mgrid[-3.2:3.2:100j, -3.2:3.2:100j] 24 | 25 | if len(sys.argv) > 1: 26 | method = int(sys.argv[1]) 27 | fold = int(sys.argv[2]) 28 | else: 29 | method = 3 30 | fold = 0 31 | 32 | if len(sys.argv) > 3: 33 | baseline = bool(int(sys.argv[3])) 34 | else: 35 | baseline = True 36 | 37 | if len(sys.argv) > 4: 38 | parallel = bool(int(sys.argv[4])) 39 | else: 40 | parallel = None 41 | 42 | print('method number:', method) 43 | print('batch number:', fold) 44 | print('baseline:', baseline) 45 | print('parallel:', parallel) 46 | 47 | # Get training and test indices 48 | ind_test = ind_split[fold] # np.sort(ind_shuffled[:N//10]) 49 | ind_train = np.concatenate(ind_split[np.arange(10) != fold]) 50 | 51 | # Set training and test data 52 | X = Xall[ind_train] 53 | R = Rall[ind_train] 54 | Y = Yall[ind_train] 55 | XT = Xall[ind_test] 56 | RT = Rall[ind_test] 57 | YT = Yall[ind_test] 58 | 59 | var_f = 1. # GP variance 60 | len_time = 1. # temporal lengthscale 61 | len_space = 1. # spacial lengthscale 62 | 63 | kern = bayesnewton.kernels.SpatioTemporalMatern52(variance=var_f, lengthscale_time=len_time, lengthscale_space=len_space, 64 | z=np.linspace(-3, 3, M), sparse=True, opt_z=False, conditional='Full') 65 | lik = bayesnewton.likelihoods.Bernoulli(link='logit') 66 | 67 | 68 | if baseline: 69 | if method == 0: 70 | model = bayesnewton.models.MarkovTaylorGP(kernel=kern, likelihood=lik, X=X, R=R, Y=Y, parallel=parallel) 71 | elif method == 1: 72 | model = bayesnewton.models.MarkovPosteriorLinearisationGP(kernel=kern, likelihood=lik, X=X, R=R, Y=Y, 73 | parallel=parallel) 74 | elif method == 2: 75 | model = bayesnewton.models.MarkovExpectationPropagationGP(kernel=kern, likelihood=lik, X=X, R=R, Y=Y, 76 | parallel=parallel, power=1.) 77 | elif method == 3: 78 | model = bayesnewton.models.MarkovExpectationPropagationGP(kernel=kern, likelihood=lik, X=X, R=R, Y=Y, 79 | parallel=parallel, power=0.5) 80 | elif method == 4: 81 | model = bayesnewton.models.MarkovExpectationPropagationGP(kernel=kern, likelihood=lik, X=X, R=R, Y=Y, 82 | parallel=parallel, power=0.01) 83 | elif method == 4: 84 | model = bayesnewton.models.MarkovVariationalGP(kernel=kern, likelihood=lik, X=X, R=R, Y=Y, 85 | parallel=parallel) 86 | else: 87 | if method == 0: 88 | model = bayesnewton.models.SparseMarkovTaylorGP(kernel=kern, likelihood=lik, X=X, R=R, Y=Y, Z=Z, parallel=parallel) 89 | elif method == 1: 90 | model = bayesnewton.models.SparseMarkovPosteriorLinearisationGP(kernel=kern, likelihood=lik, X=X, R=R, Y=Y, Z=Z, 91 | parallel=parallel) 92 | elif method == 2: 93 | model = bayesnewton.models.SparseMarkovExpectationPropagationGP(kernel=kern, likelihood=lik, X=X, R=R, Y=Y, Z=Z, 94 | parallel=parallel, power=1.) 95 | elif method == 3: 96 | model = bayesnewton.models.SparseMarkovExpectationPropagationGP(kernel=kern, likelihood=lik, X=X, R=R, Y=Y, Z=Z, 97 | parallel=parallel, power=0.5) 98 | elif method == 4: 99 | model = bayesnewton.models.SparseMarkovExpectationPropagationGP(kernel=kern, likelihood=lik, X=X, R=R, Y=Y, Z=Z, 100 | parallel=parallel, power=0.01) 101 | elif method == 4: 102 | model = bayesnewton.models.SparseMarkovVariationalGP(kernel=kern, likelihood=lik, X=X, R=R, Y=Y, Z=Z, 103 | parallel=parallel) 104 | 105 | 106 | lr_adam = 0.1 107 | lr_newton = 0.1 108 | iters = 500 109 | opt_hypers = objax.optimizer.Adam(model.vars()) 110 | energy = objax.GradValues(model.energy, model.vars()) 111 | 112 | 113 | @objax.Function.with_vars(model.vars() + opt_hypers.vars()) 114 | def train_op(): 115 | model.inference(lr=lr_newton) # perform inference and update variational params 116 | dE, E = energy() # compute energy and its gradients w.r.t. hypers 117 | opt_hypers(lr_adam, dE) 118 | return E 119 | 120 | 121 | train_op = objax.Jit(train_op) 122 | 123 | t0 = time.time() 124 | for i in range(1, iters + 1): 125 | loss = train_op() 126 | print('iter %2d, energy: %1.4f' % (i, loss[0])) 127 | t1 = time.time() 128 | print('optimisation time: %2.2f secs' % (t1-t0)) 129 | 130 | print('calculating the posterior predictive distribution ...') 131 | t0 = time.time() 132 | nlpd = model.negative_log_predictive_density(X=XT, R=RT, Y=YT) 133 | t1 = time.time() 134 | print('test NLPD: %1.2f' % nlpd) 135 | 136 | if baseline: 137 | with open("output/baseline_" + str(method) + "_" + str(fold) + "_nlpd.txt", "wb") as fp: 138 | pickle.dump(nlpd, fp) 139 | else: 140 | with open("output/" + str(method) + "_" + str(fold) + "_nlpd.txt", "wb") as fp: 141 | pickle.dump(nlpd, fp) 142 | -------------------------------------------------------------------------------- /experiments/binary/binary.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import bayesnewton 3 | import objax 4 | import numpy as np 5 | import time 6 | import pickle 7 | 8 | print('generating some data ...') 9 | np.random.seed(99) 10 | N = 10000 # number of points 11 | x = np.sort(70 * np.random.rand(N)) 12 | sn = 0.01 13 | f = lambda x_: 12. * np.sin(4 * np.pi * x_) / (0.25 * np.pi * x_ + 1) 14 | y_ = f(x) + np.sqrt(sn)*np.random.randn(x.shape[0]) 15 | y = np.sign(y_) 16 | y[y == -1] = 0 17 | 18 | ind_shuffled = np.random.permutation(N) 19 | ind_split = np.stack(np.split(ind_shuffled, 10)) # 10 random batches of data indices 20 | 21 | if len(sys.argv) > 1: 22 | method = int(sys.argv[1]) 23 | fold = int(sys.argv[2]) 24 | else: 25 | method = 4 26 | fold = 0 27 | 28 | if len(sys.argv) > 3: 29 | baseline = bool(int(sys.argv[3])) 30 | else: 31 | baseline = True 32 | 33 | if len(sys.argv) > 4: 34 | parallel = bool(int(sys.argv[4])) 35 | else: 36 | parallel = None 37 | 38 | print('method number:', method) 39 | print('batch number:', fold) 40 | print('baseline:', baseline) 41 | print('parallel:', parallel) 42 | 43 | # Get training and test indices 44 | ind_test = ind_split[fold] # np.sort(ind_shuffled[:N//10]) 45 | ind_train = np.concatenate(ind_split[np.arange(10) != fold]) 46 | 47 | x *= 100 48 | 49 | x_train = x[ind_train] # 90/10 train/test split 50 | x_test = x[ind_test] 51 | y_train = y[ind_train] 52 | y_test = y[ind_test] 53 | N = x_train.shape[0] # number of points 54 | batch_size = N # 2000 55 | M = 1000 56 | z = np.linspace(x[0], x[-1], M) 57 | 58 | if len(sys.argv) > 3: 59 | baseline = int(sys.argv[3]) 60 | else: 61 | baseline = 0 62 | 63 | # if baseline: 64 | # batch_size = N 65 | 66 | var_f = 1. # GP variance 67 | len_f = 25. # GP lengthscale 68 | 69 | kern = bayesnewton.kernels.Matern72(variance=var_f, lengthscale=len_f) 70 | lik = bayesnewton.likelihoods.Bernoulli(link='logit') 71 | 72 | if baseline: 73 | if method == 0: 74 | model = bayesnewton.models.MarkovTaylorGP(kernel=kern, likelihood=lik, X=x_train, Y=y_train, parallel=parallel) 75 | elif method == 1: 76 | model = bayesnewton.models.MarkovPosteriorLinearisationGP(kernel=kern, likelihood=lik, X=x_train, Y=y_train, 77 | parallel=parallel) 78 | elif method == 2: 79 | model = bayesnewton.models.MarkovExpectationPropagationGP(kernel=kern, likelihood=lik, X=x_train, Y=y_train, 80 | parallel=parallel, power=1.) 81 | elif method == 3: 82 | model = bayesnewton.models.MarkovExpectationPropagationGP(kernel=kern, likelihood=lik, X=x_train, Y=y_train, 83 | parallel=parallel, power=0.5) 84 | elif method == 4: 85 | model = bayesnewton.models.MarkovExpectationPropagationGP(kernel=kern, likelihood=lik, X=x_train, Y=y_train, 86 | parallel=parallel, power=0.01) 87 | elif method == 4: 88 | model = bayesnewton.models.MarkovVariationalGP(kernel=kern, likelihood=lik, X=x_train, Y=y_train, 89 | parallel=parallel) 90 | else: 91 | if method == 0: 92 | model = bayesnewton.models.SparseMarkovTaylorGP(kernel=kern, likelihood=lik, X=x_train, Y=y_train, Z=z, parallel=parallel) 93 | elif method == 1: 94 | model = bayesnewton.models.SparseMarkovPosteriorLinearisationGP(kernel=kern, likelihood=lik, X=x_train, Y=y_train, Z=z, 95 | parallel=parallel) 96 | elif method == 2: 97 | model = bayesnewton.models.SparseMarkovExpectationPropagationGP(kernel=kern, likelihood=lik, X=x_train, Y=y_train, Z=z, 98 | parallel=parallel, power=1.) 99 | elif method == 3: 100 | model = bayesnewton.models.SparseMarkovExpectationPropagationGP(kernel=kern, likelihood=lik, X=x_train, Y=y_train, Z=z, 101 | parallel=parallel, power=0.5) 102 | elif method == 4: 103 | model = bayesnewton.models.SparseMarkovExpectationPropagationGP(kernel=kern, likelihood=lik, X=x_train, Y=y_train, Z=z, 104 | parallel=parallel, power=0.01) 105 | elif method == 4: 106 | model = bayesnewton.models.SparseMarkovVariationalGP(kernel=kern, likelihood=lik, X=x_train, Y=y_train, Z=z, 107 | parallel=parallel) 108 | 109 | 110 | lr_adam = 0.1 111 | lr_newton = 0.5 112 | iters = 500 113 | opt_hypers = objax.optimizer.Adam(model.vars()) 114 | energy = objax.GradValues(model.energy, model.vars()) 115 | 116 | 117 | @objax.Function.with_vars(model.vars() + opt_hypers.vars()) 118 | def train_op(): 119 | batch = np.random.permutation(N)[:batch_size] 120 | model.inference(lr=lr_newton, batch_ind=batch) # perform inference and update variational params 121 | dE, E = energy(batch_ind=batch) # compute energy and its gradients w.r.t. hypers 122 | opt_hypers(lr_adam, dE) 123 | return E 124 | 125 | 126 | train_op = objax.Jit(train_op) 127 | 128 | 129 | t0 = time.time() 130 | for i in range(1, iters + 1): 131 | loss = train_op() 132 | print('iter %2d, energy: %1.4f' % (i, loss[0])) 133 | t1 = time.time() 134 | print('optimisation time: %2.2f secs' % (t1-t0)) 135 | 136 | print('calculating the posterior predictive distribution ...') 137 | t0 = time.time() 138 | nlpd = model.negative_log_predictive_density(X=x_test, Y=y_test) 139 | t1 = time.time() 140 | print('nlpd: %2.3f' % nlpd) 141 | 142 | if baseline: 143 | with open("output/baseline_" + str(method) + "_" + str(fold) + "_nlpd.txt", "wb") as fp: 144 | pickle.dump(nlpd, fp) 145 | else: 146 | with open("output/" + str(method) + "_" + str(fold) + "_nlpd.txt", "wb") as fp: 147 | pickle.dump(nlpd, fp) 148 | -------------------------------------------------------------------------------- /experiments/coal/coal.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import bayesnewton 3 | import objax 4 | import numpy as np 5 | import pandas as pd 6 | import time 7 | import pickle 8 | 9 | plot_final = False 10 | plot_intermediate = False 11 | 12 | print('loading coal data ...') 13 | if plot_final: 14 | disaster_timings = pd.read_csv('../data/coal.txt', header=None).values[:, 0] 15 | cvind = np.loadtxt('cvind.csv').astype(int) 16 | # 10-fold cross-validation 17 | nt = np.floor(cvind.shape[0]/10).astype(int) 18 | cvind = np.reshape(cvind[:10*nt], (10, nt)) 19 | 20 | D = np.loadtxt('binned.csv') 21 | x = D[:, 0:1] 22 | y = D[:, 1:] 23 | N = D.shape[0] 24 | N_batch = 300 25 | M = 15 26 | z = np.linspace(np.min(x), np.max(x), M) 27 | num_time_bins = x.shape[0] 28 | binsize = (max(x) - min(x)) / num_time_bins 29 | 30 | np.random.seed(123) 31 | # meanval = np.log(len(disaster_timings)/num_time_bins) # TODO: incorporate mean 32 | 33 | if len(sys.argv) > 1: 34 | method = int(sys.argv[1]) 35 | fold = int(sys.argv[2]) 36 | else: 37 | method = 0 38 | fold = 0 39 | 40 | if len(sys.argv) > 3: 41 | baseline = bool(int(sys.argv[3])) 42 | else: 43 | baseline = True 44 | 45 | if len(sys.argv) > 4: 46 | parallel = bool(int(sys.argv[4])) 47 | else: 48 | parallel = None 49 | 50 | print('method number:', method) 51 | print('batch number:', fold) 52 | print('baseline:', baseline) 53 | print('parallel:', parallel) 54 | 55 | # Get training and test indices 56 | ind_test = cvind[fold, :] 57 | ind_train = np.setdiff1d(cvind, ind_test) 58 | 59 | x_train = x[ind_train, ...] # 90/10 train/test split 60 | x_test = x[ind_test, ...] 61 | y_train = y[ind_train, ...] 62 | y_test = y[ind_test, ...] 63 | 64 | var_f = 1.0 # GP variance 65 | len_f = 4.0 # GP lengthscale 66 | 67 | kern = bayesnewton.kernels.Matern52(variance=var_f, lengthscale=len_f) 68 | lik = bayesnewton.likelihoods.Poisson(binsize=binsize) 69 | 70 | if baseline: 71 | if method == 0: 72 | model = bayesnewton.models.MarkovTaylorGP(kernel=kern, likelihood=lik, X=x_train, Y=y_train, parallel=parallel) 73 | elif method == 1: 74 | model = bayesnewton.models.MarkovPosteriorLinearisationGP(kernel=kern, likelihood=lik, X=x_train, Y=y_train, 75 | parallel=parallel) 76 | elif method == 2: 77 | model = bayesnewton.models.MarkovExpectationPropagationGP(kernel=kern, likelihood=lik, X=x_train, Y=y_train, 78 | parallel=parallel, power=1.) 79 | elif method == 3: 80 | model = bayesnewton.models.MarkovExpectationPropagationGP(kernel=kern, likelihood=lik, X=x_train, Y=y_train, 81 | parallel=parallel, power=0.5) 82 | elif method == 4: 83 | model = bayesnewton.models.MarkovExpectationPropagationGP(kernel=kern, likelihood=lik, X=x_train, Y=y_train, 84 | parallel=parallel, power=0.01) 85 | elif method == 4: 86 | model = bayesnewton.models.MarkovVariationalGP(kernel=kern, likelihood=lik, X=x_train, Y=y_train, 87 | parallel=parallel) 88 | else: 89 | if method == 0: 90 | model = bayesnewton.models.SparseMarkovTaylorGP(kernel=kern, likelihood=lik, X=x_train, Y=y_train, Z=z, parallel=parallel) 91 | elif method == 1: 92 | model = bayesnewton.models.SparseMarkovPosteriorLinearisationGP(kernel=kern, likelihood=lik, X=x_train, Y=y_train, Z=z, 93 | parallel=parallel) 94 | elif method == 2: 95 | model = bayesnewton.models.SparseMarkovExpectationPropagationGP(kernel=kern, likelihood=lik, X=x_train, Y=y_train, Z=z, 96 | parallel=parallel, power=1.) 97 | elif method == 3: 98 | model = bayesnewton.models.SparseMarkovExpectationPropagationGP(kernel=kern, likelihood=lik, X=x_train, Y=y_train, Z=z, 99 | parallel=parallel, power=0.5) 100 | elif method == 4: 101 | model = bayesnewton.models.SparseMarkovExpectationPropagationGP(kernel=kern, likelihood=lik, X=x_train, Y=y_train, Z=z, 102 | parallel=parallel, power=0.01) 103 | elif method == 4: 104 | model = bayesnewton.models.SparseMarkovVariationalGP(kernel=kern, likelihood=lik, X=x_train, Y=y_train, Z=z, 105 | parallel=parallel) 106 | 107 | 108 | lr_adam = 0.2 109 | lr_newton = .5 110 | iters = 500 111 | opt_hypers = objax.optimizer.Adam(model.vars()) 112 | energy = objax.GradValues(model.energy, model.vars()) 113 | 114 | 115 | @objax.Function.with_vars(model.vars() + opt_hypers.vars()) 116 | def train_op(): 117 | model.inference(lr=lr_newton) # perform inference and update variational params 118 | dE, E = energy() # compute energy and its gradients w.r.t. hypers 119 | opt_hypers(lr_adam, dE) 120 | return E 121 | 122 | 123 | train_op = objax.Jit(train_op) 124 | 125 | t0 = time.time() 126 | for i in range(1, iters + 1): 127 | loss = train_op() 128 | print('iter %2d, energy: %1.4f' % (i, loss[0])) 129 | t1 = time.time() 130 | print('optimisation time: %2.2f secs' % (t1-t0)) 131 | 132 | t0 = time.time() 133 | nlpd = model.negative_log_predictive_density(X=x_test, Y=y_test) 134 | t1 = time.time() 135 | print('NLPD: %1.2f' % nlpd) 136 | 137 | if baseline: 138 | with open("output/baseline_" + str(method) + "_" + str(fold) + "_nlpd.txt", "wb") as fp: 139 | pickle.dump(nlpd, fp) 140 | else: 141 | with open("output/" + str(method) + "_" + str(fold) + "_nlpd.txt", "wb") as fp: 142 | pickle.dump(nlpd, fp) 143 | -------------------------------------------------------------------------------- /experiments/coal/cvind.csv: -------------------------------------------------------------------------------- 1 | 1.2200000e+02 2 | 9.8000000e+01 3 | 1.7300000e+02 4 | 3.3200000e+02 5 | 2.9300000e+02 6 | 2.4300000e+02 7 | 2.8000000e+02 8 | 3.1000000e+01 9 | 3.9000000e+01 10 | 2.1000000e+01 11 | 2.6000000e+02 12 | 3.0200000e+02 13 | 1.7400000e+02 14 | 3.3000000e+01 15 | 1.5100000e+02 16 | 9.1000000e+01 17 | 3.0000000e+02 18 | 1.6900000e+02 19 | 3.2400000e+02 20 | 3.0300000e+02 21 | 9.0000000e+01 22 | 1.4500000e+02 23 | 1.1800000e+02 24 | 1.8900000e+02 25 | 3.1800000e+02 26 | 1.1100000e+02 27 | 1.2600000e+02 28 | 2.4000000e+02 29 | 1.6400000e+02 30 | 3.0500000e+02 31 | 3.4000000e+01 32 | 5.0000000e+00 33 | 2.6400000e+02 34 | 1.2000000e+02 35 | 2.7000000e+02 36 | 2.2700000e+02 37 | 1.5900000e+02 38 | 2.3500000e+02 39 | 5.4000000e+01 40 | 1.4700000e+02 41 | 2.0000000e+00 42 | 9.5000000e+01 43 | 1.6500000e+02 44 | 3.1900000e+02 45 | 1.3500000e+02 46 | 2.6800000e+02 47 | 6.7000000e+01 48 | 1.5000000e+01 49 | 1.3900000e+02 50 | 1.3400000e+02 51 | 6.8000000e+01 52 | 3.0900000e+02 53 | 1.1400000e+02 54 | 1.0000000e+01 55 | 1.0000000e+02 56 | 5.3000000e+01 57 | 1.0400000e+02 58 | 2.9500000e+02 59 | 1.7500000e+02 60 | 2.1700000e+02 61 | 2.9000000e+01 62 | 3.2000000e+02 63 | 1.3200000e+02 64 | 2.9000000e+02 65 | 1.8500000e+02 66 | 1.4800000e+02 67 | 2.2300000e+02 68 | 4.4000000e+01 69 | 1.8300000e+02 70 | 2.1500000e+02 71 | 7.6000000e+01 72 | 2.7900000e+02 73 | 2.1000000e+02 74 | 2.3400000e+02 75 | 5.9000000e+01 76 | 2.1600000e+02 77 | 2.1800000e+02 78 | 1.1200000e+02 79 | 2.1300000e+02 80 | 2.5300000e+02 81 | 1.7000000e+02 82 | 2.4800000e+02 83 | 1.4600000e+02 84 | 2.9100000e+02 85 | 1.4900000e+02 86 | 1.6200000e+02 87 | 7.3000000e+01 88 | 7.7000000e+01 89 | 7.1000000e+01 90 | 6.1000000e+01 91 | 6.9000000e+01 92 | 2.2800000e+02 93 | 1.2800000e+02 94 | 2.6500000e+02 95 | 2.3100000e+02 96 | 2.4100000e+02 97 | 1.0600000e+02 98 | 1.3300000e+02 99 | 5.0000000e+01 100 | 3.2000000e+01 101 | 6.0000000e+00 102 | 8.5000000e+01 103 | 3.3000000e+02 104 | 1.8100000e+02 105 | 2.3600000e+02 106 | 2.1100000e+02 107 | 1.9500000e+02 108 | 2.2000000e+02 109 | 1.0200000e+02 110 | 3.7000000e+01 111 | 2.3700000e+02 112 | 2.7700000e+02 113 | 2.6600000e+02 114 | 9.9000000e+01 115 | 1.5600000e+02 116 | 5.7000000e+01 117 | 7.5000000e+01 118 | 2.0400000e+02 119 | 1.4200000e+02 120 | 8.0000000e+01 121 | 1.7100000e+02 122 | 2.5800000e+02 123 | 1.8600000e+02 124 | 1.5800000e+02 125 | 2.0100000e+02 126 | 8.8000000e+01 127 | 4.1000000e+01 128 | 1.6100000e+02 129 | 3.2100000e+02 130 | 2.7000000e+01 131 | 2.5700000e+02 132 | 3.2500000e+02 133 | 1.2700000e+02 134 | 1.4400000e+02 135 | 1.6300000e+02 136 | 2.2900000e+02 137 | 3.2700000e+02 138 | 1.5000000e+02 139 | 1.6000000e+01 140 | 2.3800000e+02 141 | 2.2200000e+02 142 | 1.3000000e+02 143 | 3.3100000e+02 144 | 3.1600000e+02 145 | 2.1900000e+02 146 | 1.9300000e+02 147 | 4.0000000e+01 148 | 2.2600000e+02 149 | 1.1900000e+02 150 | 4.6000000e+01 151 | 1.9400000e+02 152 | 1.1000000e+02 153 | 1.7900000e+02 154 | 3.1500000e+02 155 | 2.4900000e+02 156 | 9.7000000e+01 157 | 2.1200000e+02 158 | 2.9900000e+02 159 | 7.9000000e+01 160 | 2.8300000e+02 161 | 1.3000000e+01 162 | 1.9200000e+02 163 | 2.4600000e+02 164 | 2.5400000e+02 165 | 1.5500000e+02 166 | 4.5000000e+01 167 | 2.9400000e+02 168 | 1.5400000e+02 169 | 2.7200000e+02 170 | 5.5000000e+01 171 | 2.8200000e+02 172 | 2.9800000e+02 173 | 6.2000000e+01 174 | 2.3900000e+02 175 | 1.9600000e+02 176 | 1.9700000e+02 177 | 1.4300000e+02 178 | 3.1100000e+02 179 | 2.5200000e+02 180 | 3.0400000e+02 181 | 3.2600000e+02 182 | 1.0300000e+02 183 | 9.2000000e+01 184 | 2.0300000e+02 185 | 1.1600000e+02 186 | 2.5100000e+02 187 | 7.0000000e+00 188 | 1.8000000e+02 189 | 6.6000000e+01 190 | 8.3000000e+01 191 | 1.3800000e+02 192 | 2.0700000e+02 193 | 8.9000000e+01 194 | 9.6000000e+01 195 | 1.6800000e+02 196 | 2.8900000e+02 197 | 2.4700000e+02 198 | 1.3700000e+02 199 | 8.2000000e+01 200 | 5.8000000e+01 201 | 2.0900000e+02 202 | 2.3000000e+02 203 | 1.0500000e+02 204 | 2.3200000e+02 205 | 2.8500000e+02 206 | 7.8000000e+01 207 | 2.8600000e+02 208 | 1.4100000e+02 209 | 2.0800000e+02 210 | 2.5500000e+02 211 | 1.8700000e+02 212 | 3.2900000e+02 213 | 4.0000000e+00 214 | 2.0000000e+02 215 | 4.7000000e+01 216 | 1.7800000e+02 217 | 3.1300000e+02 218 | 1.7600000e+02 219 | 2.7100000e+02 220 | 1.0700000e+02 221 | 5.2000000e+01 222 | 2.8000000e+01 223 | 2.0000000e+01 224 | 3.2800000e+02 225 | 3.1000000e+02 226 | 2.4000000e+01 227 | 2.5600000e+02 228 | 5.1000000e+01 229 | 2.6700000e+02 230 | 3.0100000e+02 231 | 1.8400000e+02 232 | 1.0800000e+02 233 | 3.6000000e+01 234 | 2.7800000e+02 235 | 6.3000000e+01 236 | 3.0000000e+01 237 | 4.8000000e+01 238 | 2.3300000e+02 239 | 2.9700000e+02 240 | 2.7400000e+02 241 | 2.6900000e+02 242 | 3.0800000e+02 243 | 2.4500000e+02 244 | 1.7700000e+02 245 | 2.6000000e+01 246 | 2.8100000e+02 247 | 1.8200000e+02 248 | 1.0900000e+02 249 | 6.0000000e+01 250 | 8.7000000e+01 251 | 4.9000000e+01 252 | 8.6000000e+01 253 | 2.5000000e+01 254 | 4.2000000e+01 255 | 1.2300000e+02 256 | 1.9100000e+02 257 | 2.7300000e+02 258 | 9.3000000e+01 259 | 1.8800000e+02 260 | 1.6000000e+02 261 | 1.8000000e+01 262 | 1.0100000e+02 263 | 1.9900000e+02 264 | 4.3000000e+01 265 | 2.6300000e+02 266 | 1.2900000e+02 267 | 1.4000000e+01 268 | 3.1400000e+02 269 | 2.4200000e+02 270 | 3.2300000e+02 271 | 2.8800000e+02 272 | 2.0200000e+02 273 | 7.2000000e+01 274 | 0.0000000e+00 275 | 1.2400000e+02 276 | 3.0700000e+02 277 | 1.9800000e+02 278 | 3.0600000e+02 279 | 1.7200000e+02 280 | 3.5000000e+01 281 | 3.1700000e+02 282 | 1.1500000e+02 283 | 8.1000000e+01 284 | 3.2200000e+02 285 | 7.0000000e+01 286 | 2.1400000e+02 287 | 2.2000000e+01 288 | 1.4000000e+02 289 | 2.8700000e+02 290 | 1.2500000e+02 291 | 1.3600000e+02 292 | 2.0600000e+02 293 | 2.6100000e+02 294 | 2.9200000e+02 295 | 6.4000000e+01 296 | 2.7600000e+02 297 | 1.5700000e+02 298 | 1.5200000e+02 299 | 2.7500000e+02 300 | 2.8400000e+02 301 | 2.2400000e+02 302 | 1.0000000e+00 303 | 1.3100000e+02 304 | 2.6200000e+02 305 | 1.1300000e+02 306 | 3.0000000e+00 307 | 1.7000000e+01 308 | 8.4000000e+01 309 | 2.2100000e+02 310 | 2.4400000e+02 311 | 7.4000000e+01 312 | 1.9000000e+02 313 | 2.3000000e+01 314 | 9.4000000e+01 315 | 2.0500000e+02 316 | 1.6600000e+02 317 | 1.5300000e+02 318 | 3.8000000e+01 319 | 1.6700000e+02 320 | 1.2000000e+01 321 | 8.0000000e+00 322 | 6.5000000e+01 323 | 1.9000000e+01 324 | 5.6000000e+01 325 | 1.2100000e+02 326 | 2.5000000e+02 327 | 9.0000000e+00 328 | 1.1000000e+01 329 | 3.1200000e+02 330 | 2.9600000e+02 331 | 2.2500000e+02 332 | 2.5900000e+02 333 | 1.1700000e+02 334 | -------------------------------------------------------------------------------- /experiments/gprn/bn-ep.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | for (( approx=0; approx<4; approx++ )) 4 | do 5 | for (( fold=0; fold<4; fold++ )) 6 | do 7 | printf "$approx $fold \n" 8 | python3 bn_gprn.py 2 "$approx" "$fold" 9 | printf "\n" 10 | done 11 | done 12 | printf "\n" 13 | -------------------------------------------------------------------------------- /experiments/gprn/bn-newton.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | for (( approx=0; approx<4; approx++ )) 4 | do 5 | for (( fold=0; fold<4; fold++ )) 6 | do 7 | printf "$approx $fold \n" 8 | python3 bn_gprn.py 0 "$approx" "$fold" 9 | printf "\n" 10 | done 11 | done 12 | printf "\n" 13 | -------------------------------------------------------------------------------- /experiments/gprn/bn-pl.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | for (( approx=0; approx<5; approx++ )) 4 | do 5 | for (( fold=0; fold<4; fold++ )) 6 | do 7 | printf "$approx $fold \n" 8 | python3 bn_gprn.py 3 "$approx" "$fold" 9 | printf "\n" 10 | done 11 | done 12 | printf "\n" 13 | -------------------------------------------------------------------------------- /experiments/gprn/bn-vi.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | for (( approx=0; approx<4; approx++ )) 4 | do 5 | for (( fold=0; fold<4; fold++ )) 6 | do 7 | printf "$approx $fold \n" 8 | python3 bn_gprn.py 1 "$approx" "$fold" 9 | printf "\n" 10 | done 11 | done 12 | printf "\n" 13 | -------------------------------------------------------------------------------- /experiments/motorcycle/bn-ep.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | for (( approx=0; approx<4; approx++ )) 4 | do 5 | for (( fold=0; fold<4; fold++ )) 6 | do 7 | printf "$approx $fold \n" 8 | python3 heteroscedastic_bn.py 2 "$approx" "$fold" 9 | printf "\n" 10 | done 11 | done 12 | printf "\n" 13 | -------------------------------------------------------------------------------- /experiments/motorcycle/bn-newton.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | for (( approx=0; approx<4; approx++ )) 4 | do 5 | for (( fold=0; fold<4; fold++ )) 6 | do 7 | printf "$approx $fold \n" 8 | python3 heteroscedastic_bn.py 0 "$approx" "$fold" 9 | printf "\n" 10 | done 11 | done 12 | printf "\n" 13 | -------------------------------------------------------------------------------- /experiments/motorcycle/bn-pl.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | for (( approx=0; approx<5; approx++ )) 4 | do 5 | for (( fold=0; fold<4; fold++ )) 6 | do 7 | printf "$approx $fold \n" 8 | python3 heteroscedastic_bn.py 3 "$approx" "$fold" 9 | printf "\n" 10 | done 11 | done 12 | printf "\n" 13 | -------------------------------------------------------------------------------- /experiments/motorcycle/bn-vi.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | for (( approx=0; approx<4; approx++ )) 4 | do 5 | for (( fold=0; fold<4; fold++ )) 6 | do 7 | printf "$approx $fold \n" 8 | python3 heteroscedastic_bn.py 1 "$approx" "$fold" 9 | printf "\n" 10 | done 11 | done 12 | printf "\n" 13 | -------------------------------------------------------------------------------- /experiments/motorcycle/cvind.csv: -------------------------------------------------------------------------------- 1 | 1.2200000e+02 2 | 9.8000000e+01 3 | 3.1000000e+01 4 | 3.9000000e+01 5 | 2.1000000e+01 6 | 3.3000000e+01 7 | 9.1000000e+01 8 | 9.0000000e+01 9 | 1.1800000e+02 10 | 1.1100000e+02 11 | 1.2600000e+02 12 | 3.4000000e+01 13 | 5.0000000e+00 14 | 1.2000000e+02 15 | 5.4000000e+01 16 | 2.0000000e+00 17 | 9.5000000e+01 18 | 6.7000000e+01 19 | 1.5000000e+01 20 | 6.8000000e+01 21 | 1.1400000e+02 22 | 1.0000000e+01 23 | 1.0000000e+02 24 | 5.3000000e+01 25 | 1.0400000e+02 26 | 2.9000000e+01 27 | 1.3200000e+02 28 | 4.4000000e+01 29 | 7.6000000e+01 30 | 5.9000000e+01 31 | 1.1200000e+02 32 | 7.3000000e+01 33 | 7.7000000e+01 34 | 7.1000000e+01 35 | 6.1000000e+01 36 | 6.9000000e+01 37 | 1.2800000e+02 38 | 1.0600000e+02 39 | 5.0000000e+01 40 | 3.2000000e+01 41 | 6.0000000e+00 42 | 8.5000000e+01 43 | 1.0200000e+02 44 | 3.7000000e+01 45 | 9.9000000e+01 46 | 5.7000000e+01 47 | 7.5000000e+01 48 | 8.0000000e+01 49 | 8.8000000e+01 50 | 4.1000000e+01 51 | 2.7000000e+01 52 | 1.2700000e+02 53 | 1.6000000e+01 54 | 1.3000000e+02 55 | 4.0000000e+01 56 | 1.1900000e+02 57 | 4.6000000e+01 58 | 1.1000000e+02 59 | 9.7000000e+01 60 | 7.9000000e+01 61 | 1.3000000e+01 62 | 4.5000000e+01 63 | 5.5000000e+01 64 | 6.2000000e+01 65 | 1.0300000e+02 66 | 9.2000000e+01 67 | 1.1600000e+02 68 | 7.0000000e+00 69 | 6.6000000e+01 70 | 8.3000000e+01 71 | 8.9000000e+01 72 | 9.6000000e+01 73 | 8.2000000e+01 74 | 5.8000000e+01 75 | 1.0500000e+02 76 | 7.8000000e+01 77 | 4.0000000e+00 78 | 4.7000000e+01 79 | 1.0700000e+02 80 | 5.2000000e+01 81 | 2.8000000e+01 82 | 2.0000000e+01 83 | 2.4000000e+01 84 | 5.1000000e+01 85 | 1.0800000e+02 86 | 3.6000000e+01 87 | 6.3000000e+01 88 | 3.0000000e+01 89 | 4.8000000e+01 90 | 2.6000000e+01 91 | 1.0900000e+02 92 | 6.0000000e+01 93 | 8.7000000e+01 94 | 4.9000000e+01 95 | 8.6000000e+01 96 | 2.5000000e+01 97 | 4.2000000e+01 98 | 1.2300000e+02 99 | 9.3000000e+01 100 | 1.8000000e+01 101 | 1.0100000e+02 102 | 4.3000000e+01 103 | 1.2900000e+02 104 | 1.4000000e+01 105 | 7.2000000e+01 106 | 0.0000000e+00 107 | 1.2400000e+02 108 | 3.5000000e+01 109 | 1.1500000e+02 110 | 8.1000000e+01 111 | 7.0000000e+01 112 | 2.2000000e+01 113 | 1.2500000e+02 114 | 6.4000000e+01 115 | 1.0000000e+00 116 | 1.3100000e+02 117 | 1.1300000e+02 118 | 3.0000000e+00 119 | 1.7000000e+01 120 | 8.4000000e+01 121 | 7.4000000e+01 122 | 2.3000000e+01 123 | 9.4000000e+01 124 | 3.8000000e+01 125 | 1.2000000e+01 126 | 8.0000000e+00 127 | 6.5000000e+01 128 | 1.9000000e+01 129 | 5.6000000e+01 130 | 1.2100000e+02 131 | 9.0000000e+00 132 | 1.1000000e+01 133 | 1.1700000e+02 134 | -------------------------------------------------------------------------------- /experiments/nyc_crime/nyc_crime_bayesnewton.py: -------------------------------------------------------------------------------- 1 | import bayesnewton 2 | import objax 3 | import numpy as np 4 | import pickle 5 | import time 6 | import sys 7 | from scipy.cluster.vq import kmeans2 8 | from jax.lib import xla_bridge 9 | 10 | if len(sys.argv) > 1: 11 | ind = int(sys.argv[1]) 12 | else: 13 | ind = 0 14 | 15 | 16 | if len(sys.argv) > 2: 17 | mean_field = bool(int(sys.argv[2])) 18 | else: 19 | mean_field = False 20 | 21 | 22 | if len(sys.argv) > 3: 23 | parallel = bool(int(sys.argv[3])) 24 | else: 25 | parallel = None 26 | 27 | # ===========================Load Data=========================== 28 | train_data = pickle.load(open("data/train_data_" + str(ind) + ".pickle", "rb")) 29 | pred_data = pickle.load(open("data/pred_data_" + str(ind) + ".pickle", "rb")) 30 | 31 | X = train_data['X'] 32 | Y = train_data['Y'] 33 | 34 | X_t = pred_data['test']['X'] 35 | Y_t = pred_data['test']['Y'] 36 | 37 | bin_sizes = train_data['bin_sizes'] 38 | binsize = np.prod(bin_sizes) 39 | 40 | print('X: ', X.shape) 41 | 42 | num_z_space = 30 43 | 44 | grid = True 45 | print(Y.shape) 46 | print("num data points =", Y.shape[0]) 47 | 48 | if grid: 49 | # the gridded approach: 50 | t, R, Y = bayesnewton.utils.create_spatiotemporal_grid(X, Y) 51 | t_t, R_t, Y_t = bayesnewton.utils.create_spatiotemporal_grid(X_t, Y_t) 52 | else: 53 | # the sequential approach: 54 | t = X[:, :1] 55 | R = X[:, 1:] 56 | t_t = X_t[:, :1] 57 | R_t = X_t[:, 1:] 58 | Nt = t.shape[0] 59 | print("num time steps =", Nt) 60 | N = Y.shape[0] * Y.shape[1] * Y.shape[2] 61 | print("num data points =", N) 62 | 63 | var_f = 1. 64 | len_time = 0.001 65 | len_space = 0.1 66 | 67 | sparse = True 68 | opt_z = True # will be set to False if sparse=False 69 | 70 | if sparse: 71 | z = kmeans2(R[0, ...], num_z_space, minit="points")[0] 72 | else: 73 | z = R[0, ...] 74 | 75 | kern_time = bayesnewton.kernels.Matern32(variance=var_f, lengthscale=len_time) 76 | kern_space0 = bayesnewton.kernels.Matern32(variance=var_f, lengthscale=len_space) 77 | kern_space1 = bayesnewton.kernels.Matern32(variance=var_f, lengthscale=len_space) 78 | kern_space = bayesnewton.kernels.Separable([kern_space0, kern_space1]) 79 | 80 | kern = bayesnewton.kernels.SpatioTemporalKernel(temporal_kernel=kern_time, 81 | spatial_kernel=kern_space, 82 | z=z, 83 | sparse=sparse, 84 | opt_z=opt_z, 85 | conditional='Full') 86 | 87 | lik = bayesnewton.likelihoods.Poisson(binsize=binsize) 88 | 89 | if mean_field: 90 | model = bayesnewton.models.MarkovVariationalMeanFieldGP(kernel=kern, likelihood=lik, X=t, R=R, Y=Y, parallel=parallel) 91 | else: 92 | model = bayesnewton.models.MarkovVariationalGP(kernel=kern, likelihood=lik, X=t, R=R, Y=Y, parallel=parallel) 93 | 94 | lr_adam = 0.01 95 | lr_newton = 0.1 96 | iters = 500 97 | opt_hypers = objax.optimizer.Adam(model.vars()) 98 | energy = objax.GradValues(model.energy, model.vars()) 99 | 100 | 101 | @objax.Function.with_vars(model.vars() + opt_hypers.vars()) 102 | def train_op(): 103 | model.inference(lr=lr_newton) # perform inference and update variational params 104 | dE, E = energy() # compute energy and its gradients w.r.t. hypers 105 | opt_hypers(lr_adam, dE) 106 | return E 107 | 108 | 109 | train_op = objax.Jit(train_op) 110 | 111 | t0 = time.time() 112 | for i in range(1, iters): 113 | loss = train_op() 114 | print('iter %2d: energy: %1.4f' % (i, loss[0])) 115 | t1 = time.time() 116 | # print('optimisation time: %2.2f secs' % (t1-t0)) 117 | avg_time_taken = (t1-t0)/iters 118 | print('average iter time: %2.2f secs' % avg_time_taken) 119 | 120 | posterior_mean, posterior_var = model.predict_y(X=t_t, R=R_t) 121 | nlpd = model.negative_log_predictive_density(X=t_t, R=R_t, Y=Y_t) 122 | rmse = np.sqrt(np.nanmean((np.squeeze(Y_t) - np.squeeze(posterior_mean))**2)) 123 | print('nlpd: %2.3f' % nlpd) 124 | print('rmse: %2.3f' % rmse) 125 | 126 | cpugpu = xla_bridge.get_backend().platform 127 | 128 | with open("output/" + str(int(mean_field)) + "_" + str(ind) + "_" + str(int(parallel)) + "_" + cpugpu + "_time.txt", "wb") as fp: 129 | pickle.dump(avg_time_taken, fp) 130 | with open("output/" + str(int(mean_field)) + "_" + str(ind) + "_" + str(int(parallel)) + "_" + cpugpu + "_nlpd.txt", "wb") as fp: 131 | pickle.dump(nlpd, fp) 132 | with open("output/" + str(int(mean_field)) + "_" + str(ind) + "_" + str(int(parallel)) + "_" + cpugpu + "_rmse.txt", "wb") as fp: 133 | pickle.dump(rmse, fp) 134 | -------------------------------------------------------------------------------- /experiments/nyc_crime/nyc_crime_gpflow.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import gpflow 3 | from gpflow.optimizers import NaturalGradient 4 | from gpflow.utilities import set_trainable, leaf_components 5 | import numpy as np 6 | import scipy as sp 7 | import time 8 | from scipy.cluster.vq import kmeans2 9 | from tqdm import tqdm 10 | import pickle 11 | import sys 12 | 13 | print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU'))) 14 | 15 | 16 | if len(sys.argv) > 1: 17 | ind = int(sys.argv[1]) 18 | else: 19 | ind = 0 20 | 21 | 22 | if len(sys.argv) > 2: 23 | num_z_ind = int(sys.argv[2]) 24 | else: 25 | num_z_ind = 0 26 | 27 | 28 | # ===========================Load Data=========================== 29 | train_data = pickle.load(open("data/train_data_" + str(ind) + ".pickle", "rb")) 30 | pred_data = pickle.load(open("data/pred_data_" + str(ind) + ".pickle", "rb")) 31 | 32 | X = train_data['X'] 33 | Y = train_data['Y'] 34 | 35 | X_t = pred_data['test']['X'] 36 | Y_t = pred_data['test']['Y'] 37 | 38 | bin_sizes = train_data['bin_sizes'] 39 | binsize = np.prod(bin_sizes) 40 | 41 | non_nan_idx = np.logical_not(np.isnan(np.squeeze(Y))) 42 | X = X[non_nan_idx, :] 43 | Y = Y[non_nan_idx, :] 44 | 45 | non_nan_idx_t = np.logical_not(np.isnan(np.squeeze(Y_t))) 46 | X_t = X_t[non_nan_idx_t, :] 47 | Y_t = Y_t[non_nan_idx_t, :] 48 | 49 | print('X: ', X.shape) 50 | 51 | kernel_lengthscales = [0.001, 0.1, 0.1] 52 | kernel_variances = 1.0 53 | train_z = True 54 | epochs = 500 55 | step_size = 0.01 56 | # jitter = 1e-4 57 | natgrad_step_size = 0.1 58 | # enforce_psd = False 59 | minibatch_size = [1500, 3000] 60 | num_z = [1500, 3000] 61 | 62 | 63 | def get_gpflow_params(m): 64 | params = {} 65 | leafs = leaf_components(m) 66 | for key in leafs.keys(): 67 | tf_vars = leafs[key].trainable_variables 68 | 69 | # check if variable exists 70 | if len(tf_vars) == 1: 71 | tf_var = tf_vars[0] 72 | 73 | params[key] = tf_var.numpy() 74 | 75 | return params 76 | 77 | 78 | N, D = X.shape 79 | 80 | print('num_z: ', num_z[num_z_ind]) 81 | Z_all = kmeans2(X, num_z[num_z_ind], minit="points")[0] 82 | 83 | kernel = gpflow.kernels.Matern32 84 | 85 | k = None 86 | for d in range(D): 87 | # print(d, kernel_lengthscales) 88 | if type(kernel_lengthscales) is list: 89 | k_ls = kernel_lengthscales[d] 90 | else: 91 | k_ls = kernel_lengthscales 92 | 93 | if type(kernel_variances) is list: 94 | k_var = kernel_variances[d] 95 | else: 96 | k_var = kernel_variances 97 | 98 | k_d = kernel( 99 | lengthscales=[k_ls], 100 | variance=k_var, 101 | active_dims=[d] 102 | ) 103 | 104 | # print(k_d) 105 | if k is None: 106 | k = k_d 107 | else: 108 | k = k * k_d 109 | 110 | init_as_cvi = True 111 | 112 | if init_as_cvi: 113 | M = Z_all.shape[0] 114 | jit = 1e-6 115 | 116 | Kzz = k(Z_all, Z_all) 117 | 118 | def inv(K): 119 | K_chol = sp.linalg.cholesky(K + jit * np.eye(M), lower=True) 120 | return sp.linalg.cho_solve((K_chol, True), np.eye(K.shape[0])) 121 | 122 | # manual q(u) decompositin 123 | nat1 = np.zeros([M, 1]) 124 | nat2 = -0.5 * inv(Kzz) 125 | 126 | lam1 = 1e-5 * np.ones([M, 1]) 127 | lam2 = -0.5 * np.eye(M) 128 | 129 | S = inv(-2 * (nat2 + lam2)) 130 | m = S @ (lam1 + nat1) 131 | 132 | S_chol = sp.linalg.cholesky(S + jit * np.eye(M), lower=True) 133 | S_flattened = S_chol[np.tril_indices(M, 0)] 134 | 135 | q_mu = m 136 | q_sqrt = np.array([S_chol]) 137 | else: 138 | q_mu = 1e-5 * np.ones([Z_all.shape[0], 1]) # match gpjax init 139 | q_sqrt = None 140 | 141 | lik = gpflow.likelihoods.Poisson(binsize=binsize) 142 | 143 | data = (X, Y) 144 | 145 | m = gpflow.models.SVGP( 146 | inducing_variable=Z_all, 147 | whiten=True, 148 | kernel=k, 149 | mean_function=None, 150 | likelihood=lik, 151 | q_mu=q_mu, 152 | q_sqrt=q_sqrt 153 | ) 154 | 155 | set_trainable(m.inducing_variable, True) 156 | 157 | # ===========================Train=========================== 158 | 159 | if minibatch_size[num_z_ind] is None or minibatch_size[num_z_ind] is 'none': 160 | training_loss = m.training_loss_closure( 161 | data 162 | ) 163 | else: 164 | print(N, minibatch_size[num_z_ind]) 165 | train_dataset = (tf.data.Dataset.from_tensor_slices(data).repeat().shuffle(N).batch(minibatch_size[num_z_ind])) 166 | train_iter = iter(train_dataset) 167 | training_loss = m.training_loss_closure(train_iter) 168 | 169 | 170 | # make it so adam does not train these 171 | set_trainable(m.q_mu, False) 172 | set_trainable(m.q_sqrt, False) 173 | 174 | natgrad_opt = NaturalGradient(gamma=natgrad_step_size) 175 | variational_params = [(m.q_mu, m.q_sqrt)] 176 | 177 | optimizer = tf.optimizers.Adam 178 | 179 | adam_opt_for_vgp = optimizer(step_size) 180 | 181 | loss_arr = [] 182 | 183 | bar = tqdm(total=epochs) 184 | 185 | # MINIBATCHING TRAINING 186 | t0 = time.time() 187 | for i in range(epochs): 188 | # NAT GRAD STEP 189 | natgrad_opt.minimize(training_loss, var_list=variational_params) 190 | 191 | # elbo = -m.elbo(data).numpy() 192 | 193 | # loss_arr.append(elbo) 194 | 195 | # ADAM STEP 196 | adam_opt_for_vgp.minimize(training_loss, var_list=m.trainable_variables) 197 | 198 | bar.update(1) 199 | t1 = time.time() 200 | avg_time_taken = (t1-t0)/epochs 201 | print('average iter time: %2.2f secs' % avg_time_taken) 202 | 203 | 204 | def _prediction_fn(X_, Y_): 205 | mu, var = m.predict_y(X_) 206 | log_pred_density = m.predict_log_density((X_, Y_)) 207 | return mu.numpy(), var.numpy(), log_pred_density.numpy() 208 | 209 | 210 | print('predicting...') 211 | posterior_mean, posterior_var, lpd = _prediction_fn(X_t, Y_t) 212 | # print(lpd.shape) 213 | # print(lpd) 214 | nlpd = np.mean(-lpd) 215 | rmse = np.sqrt(np.nanmean((np.squeeze(Y_t) - np.squeeze(posterior_mean))**2)) 216 | print('nlpd: %2.3f' % nlpd) 217 | print('rmse: %2.3f' % rmse) 218 | 219 | # prediction_fn = lambda X: utils.batch_predict(X, _prediction_fn, verbose=True) 220 | 221 | if len(tf.config.list_physical_devices('GPU')) > 0: 222 | cpugpu = 'gpu' 223 | else: 224 | cpugpu = 'cpu' 225 | 226 | with open("output/gpflow_" + str(ind) + "_" + str(num_z_ind) + "_" + cpugpu + "_time.txt", "wb") as fp: 227 | pickle.dump(avg_time_taken, fp) 228 | with open("output/gpflow_" + str(ind) + "_" + str(num_z_ind) + "_" + cpugpu + "_nlpd.txt", "wb") as fp: 229 | pickle.dump(nlpd, fp) 230 | with open("output/gpflow_" + str(ind) + "_" + str(num_z_ind) + "_" + cpugpu + "_rmse.txt", "wb") as fp: 231 | pickle.dump(rmse, fp) 232 | -------------------------------------------------------------------------------- /experiments/product/bn-ep.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | for (( approx=0; approx<4; approx++ )) 4 | do 5 | for (( fold=0; fold<4; fold++ )) 6 | do 7 | printf "$approx $fold \n" 8 | python3 bn_product.py 2 "$approx" "$fold" 9 | printf "\n" 10 | done 11 | done 12 | printf "\n" 13 | -------------------------------------------------------------------------------- /experiments/product/bn-newton.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | for (( approx=0; approx<4; approx++ )) 4 | do 5 | for (( fold=0; fold<4; fold++ )) 6 | do 7 | printf "$approx $fold \n" 8 | python3 bn_product.py 0 "$approx" "$fold" 9 | printf "\n" 10 | done 11 | done 12 | printf "\n" 13 | -------------------------------------------------------------------------------- /experiments/product/bn-pl.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | for (( approx=0; approx<5; approx++ )) 4 | do 5 | for (( fold=0; fold<4; fold++ )) 6 | do 7 | printf "$approx $fold \n" 8 | python3 bn_product.py 3 "$approx" "$fold" 9 | printf "\n" 10 | done 11 | done 12 | printf "\n" 13 | -------------------------------------------------------------------------------- /experiments/product/bn-vi.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | for (( approx=0; approx<4; approx++ )) 4 | do 5 | for (( fold=0; fold<4; fold++ )) 6 | do 7 | printf "$approx $fold \n" 8 | python3 bn_product.py 1 "$approx" "$fold" 9 | printf "\n" 10 | done 11 | done 12 | printf "\n" 13 | -------------------------------------------------------------------------------- /experiments/rainforest/rainforest.py: -------------------------------------------------------------------------------- 1 | import bayesnewton 2 | import objax 3 | import numpy as np 4 | import time 5 | import pickle 6 | import sys 7 | 8 | print('loading rainforest data ...') 9 | data = np.loadtxt('../../data/TRI2TU-data.csv', delimiter=',') 10 | 11 | spatial_points = np.array([20, 50, 100, 150, 200, 250, 300, 350, 400, 450, 500]) 12 | 13 | if len(sys.argv) > 1: 14 | model_type = int(sys.argv[1]) 15 | nr_ind = int(sys.argv[2]) 16 | fold = int(sys.argv[3]) 17 | parallel = bool(int(sys.argv[4])) 18 | else: 19 | model_type = 0 20 | nr_ind = 1 21 | # nr = 100 # spatial grid points (y-axis) 22 | fold = 0 23 | parallel = None 24 | 25 | nr = spatial_points[nr_ind] # spatial grid points (y-axis) 26 | nt = 200 # temporal grid points (x-axis) 27 | scale = 1000 / nt 28 | 29 | t, r, Y_ = bayesnewton.utils.discretegrid(data, [0, 1000, 0, 500], [nt, nr]) 30 | t_flat, r_flat, Y_flat = t.flatten(), r.flatten(), Y_.flatten() 31 | 32 | N = nr * nt # number of data points 33 | 34 | # sort out the train/test split 35 | np.random.seed(99) 36 | ind_shuffled = np.random.permutation(N) 37 | ind_split = np.stack(np.split(ind_shuffled, 10)) # 10 random batches of data indices 38 | test_ind = ind_split[fold] # test_ind = np.random.permutation(N)[:N//10] 39 | t_test = t_flat[test_ind] 40 | r_test = r_flat[test_ind] 41 | Y_test = Y_flat[test_ind] 42 | Y_flat[test_ind] = np.nan 43 | Y = Y_flat.reshape(nt, nr) 44 | 45 | # put test points on a grid to speed up prediction 46 | X_test = np.concatenate([t_test[:, None], r_test[:, None]], axis=1) 47 | t_test, r_test, Y_test = bayesnewton.utils.create_spatiotemporal_grid(X_test, Y_test) 48 | 49 | var_f = 1. # GP variance 50 | len_f = 10. # lengthscale 51 | 52 | kern = bayesnewton.kernels.SpatialMatern32(variance=var_f, lengthscale=len_f, z=r[0, ...], sparse=False) 53 | lik = bayesnewton.likelihoods.Poisson() 54 | if model_type == 0: 55 | model = bayesnewton.models.MarkovVariationalGP(kernel=kern, likelihood=lik, X=t, R=r, Y=Y, parallel=parallel) 56 | elif model_type == 1: 57 | model = bayesnewton.models.MarkovVariationalMeanFieldGP(kernel=kern, likelihood=lik, X=t, R=r, Y=Y, parallel=parallel) 58 | elif model_type == 2: 59 | model = bayesnewton.models.InfiniteHorizonVariationalGP(kernel=kern, likelihood=lik, X=t, R=r, Y=Y, parallel=parallel) 60 | 61 | print('num spatial pts:', nr) 62 | print('batch number:', fold) 63 | print('parallel:', parallel) 64 | print(model) 65 | 66 | lr_adam = 0.2 67 | lr_newton = 0.2 68 | iters = 11 69 | opt_hypers = objax.optimizer.Adam(model.vars()) 70 | energy = objax.GradValues(model.energy, model.vars()) 71 | 72 | unscented_transform = bayesnewton.cubature.Unscented(dim=1) # 5th-order unscented transform 73 | 74 | 75 | @objax.Function.with_vars(model.vars() + opt_hypers.vars()) 76 | def train_op(): 77 | model.inference(lr=lr_newton, cubature=unscented_transform) # perform inference and update variational params 78 | dE, E = energy(cubature=unscented_transform) # compute energy and its gradients w.r.t. hypers 79 | opt_hypers(lr_adam, dE) 80 | return E 81 | 82 | 83 | train_op = objax.Jit(train_op) 84 | 85 | t0 = time.time() 86 | for i in range(1, iters + 1): 87 | if i == 2: 88 | t0 = time.time() 89 | loss = train_op() 90 | print('iter %2d: energy: %1.4f' % (i, loss[0])) 91 | t1 = time.time() 92 | # print('optimisation time: %2.2f secs' % (t1-t0)) 93 | avg_time_taken = (t1-t0)/(iters - 1) 94 | print('average iter time: %2.2f secs' % avg_time_taken) 95 | 96 | with open("output/" + str(model_type) + "_" + str(nr_ind) + "_" + str(int(parallel)) + "_time.txt", "wb") as fp: 97 | pickle.dump(avg_time_taken, fp) 98 | 99 | # calculate posterior predictive distribution via filtering and smoothing at train & test locations: 100 | print('calculating the posterior predictive distribution ...') 101 | t0 = time.time() 102 | nlpd = model.negative_log_predictive_density(X=t_test, R=r_test, Y=Y_test) 103 | t1 = time.time() 104 | print('prediction time: %2.2f secs' % (t1-t0)) 105 | print('nlpd: %2.3f' % nlpd) 106 | # 107 | # with open("output/" + str(model_type) + "_" + str(nr_ind) + "_" + str(fold) + "_nlpd.txt", "wb") as fp: 108 | # pickle.dump(nlpd, fp) 109 | -------------------------------------------------------------------------------- /experiments/shutters/shutters.py: -------------------------------------------------------------------------------- 1 | import bayesnewton 2 | import objax 3 | import numpy as np 4 | from scipy.cluster.vq import kmeans2 5 | import matplotlib.pyplot as plt 6 | import pickle 7 | import time 8 | 9 | train_data = pickle.load(open(f'data/train_data_6_0.pickle', "rb")) 10 | pred_data = pickle.load(open(f'data/pred_data_6_0.pickle', "rb")) 11 | pred_data = pred_data['grid'] 12 | 13 | X = train_data['X'] 14 | Y = train_data['Y'] 15 | X_test = pred_data['X'] 16 | Y_test = pred_data['Y'] 17 | 18 | N = X.shape[0] # number of data points 19 | # print(N) 20 | 21 | np.random.seed(123) 22 | # print(Y.shape) 23 | # print(X.shape) 24 | # print(X) 25 | 26 | # put test points on a grid to speed up prediction 27 | t, r, Y = bayesnewton.utils.create_spatiotemporal_grid(X, Y) 28 | t_test, r_test, Y_test = bayesnewton.utils.create_spatiotemporal_grid(X_test, Y_test) 29 | print(t.shape) 30 | print(r.shape) 31 | print(Y.shape) 32 | # print(t) 33 | # print(r) 34 | # print(Y) 35 | 36 | # plt.imshow(Y, extent=(t[0, 0], t[-1, 0], r[0, 0], r[-1, 0])) 37 | # plt.imshow(Y[..., 0].T) 38 | # plt.show() 39 | 40 | var_f = 1. # GP variance 41 | # var_f = 2. # GP variance 42 | len_f_time = 0.1 # temporal lengthscale 43 | # len_f_time = 0.025 # temporal lengthscale 44 | len_f_space = 0.1 # spatial lengthscale 45 | var_y = 0.1 # observation noise variance 46 | # var_y = 0.3 # observation noise variance 47 | 48 | M = 6 49 | # z = np.linspace(np.min(r) + 0.05, np.max(r) - 0.05, M)[:, None] # inducing points 50 | # z = kmeans2(r_test[0], M, minit="points")[0] 51 | # print(z) 52 | z = np.array([0.0862069, 0.96551724, 0.5, 0.29310345, 0.84482759, 0.68965517]) 53 | kern = bayesnewton.kernels.SpatioTemporalMatern32(variance=var_f, lengthscale_time=len_f_time, lengthscale_space=len_f_space, 54 | z=z, sparse=True, opt_z=True, conditional='Full') 55 | lik = bayesnewton.likelihoods.Gaussian(variance=var_y) 56 | model = bayesnewton.models.MarkovVariationalGP(kernel=kern, likelihood=lik, X=t, R=r, Y=Y) 57 | # model = bayesnewton.models.InfiniteHorizonVariationalGP(kernel=kern, likelihood=lik, X=t, R=r, Y=Y) 58 | # model = bayesnewton.models.MarkovVariationalGPMeanField(kernel=kern, likelihood=lik, X=t, R=r, Y=Y) 59 | 60 | lr_adam = 0.01 61 | lr_newton = 1. 62 | iters = 250 63 | opt_hypers = objax.optimizer.Adam(model.vars()) 64 | energy = objax.GradValues(model.energy, model.vars()) 65 | 66 | 67 | @objax.Function.with_vars(model.vars() + opt_hypers.vars()) 68 | def train_op(): 69 | model.inference(lr=lr_newton) # perform inference and update variational params 70 | dE, E = energy() # compute energy and its gradients w.r.t. hypers 71 | opt_hypers(lr_adam, dE) 72 | return E 73 | 74 | 75 | train_op = objax.Jit(train_op) 76 | 77 | elbos = np.zeros(iters) 78 | t0 = time.time() 79 | for i in range(1, iters + 1): 80 | loss = train_op() 81 | print('iter %2d: energy: %1.4f' % (i, loss[0])) 82 | elbos[i-1] = loss[0] 83 | t1 = time.time() 84 | print('optimisation time: %2.2f secs' % (t1-t0)) 85 | 86 | print(model.likelihood.variance) 87 | print(model.kernel.variance) 88 | print(model.kernel.temporal_kernel.lengthscale) 89 | print(model.kernel.spatial_kernel.lengthscale) 90 | 91 | # calculate posterior predictive distribution via filtering and smoothing at train & test locations: 92 | print('calculating the posterior predictive distribution ...') 93 | t0 = time.time() 94 | posterior_mean, posterior_var = model.predict(X=t_test, R=r_test) 95 | nlpd = model.negative_log_predictive_density(X=t_test, R=r_test, Y=Y_test) 96 | t1 = time.time() 97 | print('prediction time: %2.2f secs' % (t1-t0)) 98 | print('nlpd: %2.3f' % nlpd) 99 | 100 | print('plotting ...') 101 | plt.figure(1) 102 | im = plt.imshow(np.squeeze(Y).T, extent=[np.min(t), np.max(t), np.min(r), np.max(r)], origin='lower') 103 | plt.colorbar(im, fraction=0.0235, pad=0.04) 104 | plt.figure(2) 105 | im = plt.imshow(np.squeeze(Y_test).T, extent=[np.min(t_test), np.max(t_test), np.min(r_test), np.max(r_test)], origin='lower') 106 | plt.colorbar(im, fraction=0.0235, pad=0.04) 107 | plt.figure(3) 108 | im = plt.imshow(posterior_mean.T, extent=[np.min(t_test), np.max(t_test), np.min(r_test), np.max(r_test)], origin='lower') 109 | plt.plot(np.min(t_test) * np.ones_like(model.kernel.z.value[:, 0]), model.kernel.z.value[:, 0], 'k>', markersize=6) 110 | plt.colorbar(im, fraction=0.0235, pad=0.04) 111 | plt.figure(4) 112 | plt.plot(np.arange(iters), elbos) 113 | plt.show() 114 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | jax==0.4.14 2 | jaxlib==0.4.14 3 | objax==1.7.0 4 | numpy 5 | matplotlib 6 | scipy 7 | scikit-learn 8 | pandas 9 | tensorflow_probability==0.21 -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | __version__ = "1.3.4" 4 | 5 | setup( 6 | name='bayesnewton', 7 | version=__version__, 8 | packages=find_packages(), 9 | python_requires='>=3.6', 10 | install_requires=[ 11 | "jax==0.4.14", 12 | "jaxlib==0.4.14", 13 | "objax==1.7.0", 14 | "tensorflow_probability==0.21", 15 | "numpy>=1.22" 16 | ], 17 | url='https://github.com/AaltoML/BayesNewton', 18 | license='Apache-2.0', 19 | ) 20 | -------------------------------------------------------------------------------- /tests/normaliser_test.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.insert(0, '../') 3 | import bayesnewton.kernels as kernels 4 | import numpy as np 5 | from bayesnewton.utils import transpose 6 | from jax import vmap 7 | import jax.numpy as jnp 8 | from jax.scipy.linalg import cho_factor 9 | 10 | np.random.seed(3) 11 | 12 | 13 | def wiggly_time_series(x_): 14 | noise_var = 0.15 # true observation noise 15 | return (np.cos(0.04*x_+0.33*np.pi) * np.sin(0.2*x_) + 16 | np.sqrt(noise_var) * np.random.normal(0, 1, x_.shape) + 17 | 0.0 * x_) # 0.02 * x_) 18 | 19 | 20 | print('generating some data ...') 21 | np.random.seed(12345) 22 | N = 10 23 | x = np.linspace(-17, 147, num=N) 24 | x = np.sort(x, axis=0) 25 | y = wiggly_time_series(x) 26 | 27 | x = x[:, None] 28 | 29 | var_f = 1.0 # GP variance 30 | len_f = 5.0 # GP lengthscale 31 | 32 | kernel = kernels.Matern32(variance=var_f, lengthscale=len_f) 33 | 34 | 35 | def prior_log_normaliser_gp(): 36 | """ compute logZ using kernel """ 37 | dim = 1 # TODO: implement multivariate case 38 | K = kernel.K(x, x) 39 | # (sign, logdet) = np.linalg.slogdet(K) 40 | C, low = cho_factor(K) 41 | logdet = 2 * jnp.sum(jnp.log(jnp.abs(jnp.diag(C)))) 42 | logZ_prior = -0.5 * x.shape[0] * dim * np.log(2 * np.pi) - 0.5 * logdet 43 | return logZ_prior 44 | 45 | 46 | def process_noise_covariance(A, Pinf): 47 | """ compute stationary noise cov, Q """ 48 | Q = Pinf - A @ Pinf @ transpose(A) 49 | return Q 50 | 51 | 52 | def diag(P): 53 | """ broadcastable version of jnp.diag """ 54 | return vmap(jnp.diag)(P) 55 | 56 | 57 | def prior_log_normaliser_markovgp(): 58 | """ compute logZ using state space model """ 59 | Pinf = kernel.stationary_covariance() 60 | dim = Pinf.shape[0] 61 | dt = jnp.diff(x[:, 0]) 62 | As = vmap(kernel.state_transition)(dt) 63 | Qs = np.concatenate([Pinf[None], process_noise_covariance(As, Pinf)]) 64 | Cs, low = cho_factor(Qs) 65 | logdet = 2 * jnp.sum(jnp.log(jnp.abs(diag(Cs)))) 66 | # logZ_prior = -0.5 * x.shape[0] * dim * np.log(2 * np.pi) - 0.5 * logdet 67 | logZ_prior = -0.5 * x.shape[0] * np.log(2 * np.pi) - 0.5 * logdet 68 | return logZ_prior 69 | 70 | 71 | logZ = prior_log_normaliser_gp() 72 | print(logZ) 73 | 74 | logZ_markov = prior_log_normaliser_markovgp() 75 | print(logZ_markov) 76 | -------------------------------------------------------------------------------- /tests/spatiotemporal_test.py: -------------------------------------------------------------------------------- 1 | import bayesnewton 2 | import objax 3 | import numpy as np 4 | import time 5 | 6 | np.random.seed(3) 7 | 8 | 9 | def create_grid(x1, x2, y1, y2, n1=10, n2=10): 10 | y = np.linspace(y1, y2, n2) 11 | x = np.linspace(x1, x2, n1) 12 | 13 | grid = [] 14 | for i in x: 15 | for j in y: 16 | grid.append([i, j]) 17 | 18 | return np.array(grid) 19 | 20 | 21 | Nt_train = 5 22 | Ns = 5 23 | X = create_grid(0, 1, 0, 1, Nt_train, Ns) 24 | t = np.linspace(0, 1, Nt_train, dtype=float) 25 | R = np.tile(np.linspace(0, 1, Ns, dtype=float)[None, ...], [Nt_train, 1]) 26 | 27 | N = X.shape[0] 28 | y = np.sin(10*X[:, 0]) + np.sin(10*X[:, 1]) + 0.01*np.random.randn(N) 29 | 30 | # Y = y[:, None] 31 | Y = y.reshape(Nt_train, Ns) 32 | 33 | # print(R.shape) 34 | # print(Y.shape) 35 | # print(R[0].shape) 36 | # print(X) 37 | # print(R) 38 | # print(R[0]) 39 | 40 | kernel_ls = [0.1, 0.2] 41 | kernel_var = [2.2, 0.4] 42 | likelihood_noise = 0.1 43 | 44 | lik = bayesnewton.likelihoods.Gaussian(variance=likelihood_noise) 45 | kern_time = bayesnewton.kernels.Matern32(variance=kernel_var[0], lengthscale=kernel_ls[0]) 46 | kern_space = bayesnewton.kernels.Matern32(variance=kernel_var[1], lengthscale=kernel_ls[1]) 47 | kern = bayesnewton.kernels.SpatioTemporalKernel(temporal_kernel=kern_time, 48 | spatial_kernel=kern_space, 49 | z=R[0], 50 | sparse=True, 51 | opt_z=False, 52 | conditional='Full') 53 | 54 | markov = True 55 | 56 | if markov: 57 | model = bayesnewton.models.MarkovVariationalGP(kernel=kern, likelihood=lik, X=t, R=R, Y=Y) 58 | # model = bayesnewton.models.MarkovVariationalGP(kernel=kern, likelihood=lik, X=X, Y=y) 59 | else: 60 | model = bayesnewton.models.VariationalGP(kernel=kern, likelihood=lik, X=X, Y=y) 61 | 62 | lr_adam = 0. 63 | lr_newton = 1. 64 | epochs = 2 65 | 66 | opt_hypers = objax.optimizer.Adam(model.vars()) 67 | energy = objax.GradValues(model.energy, model.vars()) 68 | 69 | 70 | def train_op(): 71 | model.inference(lr=lr_newton) # perform inference and update variational params 72 | grads, loss_ = energy() # compute energy and its gradients w.r.t. hypers 73 | # print(grads) 74 | for g, var_name in zip(grads, model.vars().keys()): # TODO: this gives wrong label to likelihood variance 75 | print(g, ' w.r.t. ', var_name) 76 | # print(model.kernel.temporal_kernel.variance) 77 | opt_hypers(lr_adam, grads) 78 | return loss_[0] 79 | 80 | 81 | # train_op = objax.Jit(train_op, model.vars()) 82 | 83 | t0 = time.time() 84 | for i in range(1, epochs+1): 85 | loss = train_op() 86 | print('epoch %2d: loss: %1.4f' % (i, loss)) 87 | t1 = time.time() 88 | print('optimisation time: %2.2f secs' % (t1-t0)) 89 | -------------------------------------------------------------------------------- /tests/test_gp_vs_markovgp_class.py: -------------------------------------------------------------------------------- 1 | import bayesnewton 2 | import objax 3 | import numpy as np 4 | from jax.config import config 5 | config.update("jax_enable_x64", True) 6 | import pytest 7 | 8 | 9 | def build_data(N): 10 | # np.random.seed(12345) 11 | x = 100 * np.random.rand(N) 12 | x = np.sort(x) # since MarkovGP sorts the inputs, they must also be sorted for GP 13 | f = lambda x_: 6 * np.sin(np.pi * x_ / 10.0) / (np.pi * x_ / 10.0 + 1) 14 | y_ = f(x) + np.sqrt(0.05) * np.random.randn(x.shape[0]) 15 | y = np.sign(y_) 16 | y[y == -1] = 0 17 | x = x[:, None] 18 | return x, y 19 | 20 | 21 | def initialise_gp_model(var_f, len_f, x, y): 22 | kernel = bayesnewton.kernels.Matern52(variance=var_f, lengthscale=len_f) 23 | likelihood = bayesnewton.likelihoods.Bernoulli() 24 | model = bayesnewton.models.VariationalGP(kernel=kernel, likelihood=likelihood, X=x, Y=y) 25 | return model 26 | 27 | 28 | def initialise_markovgp_model(var_f, len_f, x, y): 29 | kernel = bayesnewton.kernels.Matern52(variance=var_f, lengthscale=len_f) 30 | likelihood = bayesnewton.likelihoods.Bernoulli() 31 | model = bayesnewton.models.MarkovVariationalGP(kernel=kernel, likelihood=likelihood, X=x, Y=y) 32 | return model 33 | 34 | 35 | @pytest.mark.parametrize('var_f', [0.5, 1.5]) 36 | @pytest.mark.parametrize('len_f', [0.75, 2.5]) 37 | @pytest.mark.parametrize('N', [30, 60]) 38 | def test_initial_loss(var_f, len_f, N): 39 | """ 40 | test whether VI with newt's GP and MarkovGP give the same initial ELBO and posterior 41 | """ 42 | 43 | x, y = build_data(N) 44 | 45 | gp_model = initialise_gp_model(var_f, len_f, x, y) 46 | markovgp_model = initialise_markovgp_model(var_f, len_f, x, y) 47 | 48 | gp_model.update_posterior() 49 | loss_gp = gp_model.energy() 50 | print(loss_gp) 51 | 52 | markovgp_model.update_posterior() 53 | loss_markovgp = markovgp_model.energy() 54 | print(loss_markovgp) 55 | 56 | # print(posterior_mean - f_mean[:, 0]) 57 | 58 | np.testing.assert_allclose(gp_model.posterior_mean.value, markovgp_model.posterior_mean.value, rtol=1e-4) 59 | np.testing.assert_allclose(gp_model.posterior_variance.value, markovgp_model.posterior_variance.value, rtol=1e-4) 60 | np.testing.assert_almost_equal(loss_gp, loss_markovgp, decimal=2) 61 | 62 | 63 | @pytest.mark.parametrize('var_f', [0.5, 1.5]) 64 | @pytest.mark.parametrize('len_f', [0.75, 2.5]) 65 | @pytest.mark.parametrize('N', [30, 60]) 66 | def test_gradient_step(var_f, len_f, N): 67 | """ 68 | test whether VI with newt's GP and MarkovGP provide the same initial gradient step in the hyperparameters 69 | """ 70 | 71 | x, y = build_data(N) 72 | 73 | gp_model = initialise_gp_model(var_f, len_f, x, y) 74 | markovgp_model = initialise_markovgp_model(var_f, len_f, x, y) 75 | 76 | gv = objax.GradValues(gp_model.energy, gp_model.vars()) 77 | gv_markov = objax.GradValues(markovgp_model.energy, markovgp_model.vars()) 78 | 79 | lr_adam = 0.1 80 | lr_newton = 1. 81 | opt = objax.optimizer.Adam(gp_model.vars()) 82 | opt_markov = objax.optimizer.Adam(markovgp_model.vars()) 83 | 84 | gp_model.update_posterior() 85 | gp_grads, gp_value = gv() 86 | gp_loss_ = gp_value[0] 87 | opt(lr_adam, gp_grads) 88 | gp_hypers = np.array([gp_model.kernel.lengthscale, gp_model.kernel.variance]) 89 | print(gp_hypers) 90 | print(gp_grads) 91 | 92 | markovgp_model.update_posterior() 93 | markovgp_grads, markovgp_value = gv_markov() 94 | markovgp_loss_ = markovgp_value[0] 95 | opt_markov(lr_adam, markovgp_grads) 96 | markovgp_hypers = np.array([markovgp_model.kernel.lengthscale, markovgp_model.kernel.variance]) 97 | print(markovgp_hypers) 98 | print(markovgp_grads) 99 | 100 | np.testing.assert_allclose(gp_grads[0], markovgp_grads[0], rtol=1e-4) 101 | np.testing.assert_allclose(gp_grads[1], markovgp_grads[1], rtol=1e-4) 102 | 103 | 104 | @pytest.mark.parametrize('var_f', [0.5, 1.5]) 105 | @pytest.mark.parametrize('len_f', [0.75, 2.5]) 106 | @pytest.mark.parametrize('N', [30, 60]) 107 | def test_inference_step(var_f, len_f, N): 108 | """ 109 | test whether VI with newt's GP and MarkovGP give the same posterior after one natural gradient step 110 | """ 111 | 112 | x, y = build_data(N) 113 | 114 | gp_model = initialise_gp_model(var_f, len_f, x, y) 115 | markovgp_model = initialise_markovgp_model(var_f, len_f, x, y) 116 | 117 | lr_newton = 1. 118 | 119 | gp_model.inference(lr=lr_newton) # update variational params 120 | 121 | markovgp_model.inference(lr=lr_newton) # update variational params 122 | 123 | np.testing.assert_allclose(gp_model.posterior_mean.value, markovgp_model.posterior_mean.value, rtol=1e-4) 124 | np.testing.assert_allclose(gp_model.posterior_variance.value, markovgp_model.posterior_variance.value, rtol=1e-4) 125 | -------------------------------------------------------------------------------- /tests/test_gp_vs_markovgp_reg.py: -------------------------------------------------------------------------------- 1 | import bayesnewton 2 | import objax 3 | import numpy as np 4 | from jax.config import config 5 | config.update("jax_enable_x64", True) 6 | import pytest 7 | 8 | 9 | def wiggly_time_series(x_): 10 | noise_var = 0.15 # true observation noise 11 | return (np.cos(0.04*x_+0.33*np.pi) * np.sin(0.2*x_) + 12 | np.sqrt(noise_var) * np.random.normal(0, 1, x_.shape)) 13 | 14 | 15 | def build_data(N): 16 | # np.random.seed(12345) 17 | x = np.random.permutation(np.linspace(-25.0, 150.0, num=N) + 0.5*np.random.randn(N)) # unevenly spaced 18 | x = np.sort(x) # since MarkovGP sorts the inputs, they must also be sorted for GP 19 | y = wiggly_time_series(x) 20 | # x_test = np.linspace(np.min(x)-15.0, np.max(x)+15.0, num=500) 21 | # y_test = wiggly_time_series(x_test) 22 | # x_plot = np.linspace(np.min(x)-20.0, np.max(x)+20.0, 200) 23 | 24 | x = x[:, None] 25 | # y = y[:, None] 26 | # x_plot = x_plot[:, None] 27 | return x, y 28 | 29 | 30 | def initialise_gp_model(var_f, len_f, var_y, x, y): 31 | kernel = bayesnewton.kernels.Matern52(variance=var_f, lengthscale=len_f) 32 | likelihood = bayesnewton.likelihoods.Gaussian(variance=var_y) 33 | model = bayesnewton.models.VariationalGP(kernel=kernel, likelihood=likelihood, X=x, Y=y) 34 | return model 35 | 36 | 37 | def initialise_markovgp_model(var_f, len_f, var_y, x, y): 38 | kernel = bayesnewton.kernels.Matern52(variance=var_f, lengthscale=len_f) 39 | likelihood = bayesnewton.likelihoods.Gaussian(variance=var_y) 40 | model = bayesnewton.models.MarkovVariationalGP(kernel=kernel, likelihood=likelihood, X=x, Y=y) 41 | return model 42 | 43 | 44 | @pytest.mark.parametrize('var_f', [0.5, 1.5]) 45 | @pytest.mark.parametrize('len_f', [0.75, 2.5]) 46 | @pytest.mark.parametrize('var_y', [0.1, 0.5]) 47 | @pytest.mark.parametrize('N', [30, 60]) 48 | def test_initial_loss(var_f, len_f, var_y, N): 49 | """ 50 | test whether VI with newt's GP and MarkovGP give the same initial ELBO and posterior 51 | """ 52 | 53 | x, y = build_data(N) 54 | 55 | gp_model = initialise_gp_model(var_f, len_f, var_y, x, y) 56 | markovgp_model = initialise_markovgp_model(var_f, len_f, var_y, x, y) 57 | 58 | gp_model.update_posterior() 59 | loss_gp = gp_model.energy() 60 | print(loss_gp) 61 | 62 | markovgp_model.update_posterior() 63 | loss_markovgp = markovgp_model.energy() 64 | print(loss_markovgp) 65 | 66 | # print(posterior_mean - f_mean[:, 0]) 67 | 68 | np.testing.assert_allclose(gp_model.posterior_mean.value, markovgp_model.posterior_mean.value, rtol=1e-4) 69 | np.testing.assert_allclose(gp_model.posterior_variance.value, markovgp_model.posterior_variance.value, rtol=1e-4) 70 | np.testing.assert_almost_equal(loss_gp, loss_markovgp, decimal=2) 71 | 72 | 73 | @pytest.mark.parametrize('var_f', [0.5, 1.5]) 74 | @pytest.mark.parametrize('len_f', [0.75, 2.5]) 75 | @pytest.mark.parametrize('var_y', [0.1, 0.5]) 76 | @pytest.mark.parametrize('N', [30, 60]) 77 | def test_gradient_step(var_f, len_f, var_y, N): 78 | """ 79 | test whether VI with newt's GP and MarkovGP provide the same initial gradient step in the hyperparameters 80 | """ 81 | 82 | x, y = build_data(N) 83 | 84 | gp_model = initialise_gp_model(var_f, len_f, var_y, x, y) 85 | markovgp_model = initialise_markovgp_model(var_f, len_f, var_y, x, y) 86 | 87 | gv = objax.GradValues(gp_model.energy, gp_model.vars()) 88 | gv_markov = objax.GradValues(markovgp_model.energy, markovgp_model.vars()) 89 | 90 | lr_adam = 0.1 91 | lr_newton = 1. 92 | opt = objax.optimizer.Adam(gp_model.vars()) 93 | opt_markov = objax.optimizer.Adam(markovgp_model.vars()) 94 | 95 | gp_model.update_posterior() 96 | gp_grads, gp_value = gv() 97 | gp_loss_ = gp_value[0] 98 | opt(lr_adam, gp_grads) 99 | gp_hypers = np.array([gp_model.kernel.lengthscale, gp_model.kernel.variance, gp_model.likelihood.variance]) 100 | print(gp_hypers) 101 | print(gp_grads) 102 | 103 | markovgp_model.update_posterior() 104 | markovgp_grads, markovgp_value = gv_markov() 105 | markovgp_loss_ = markovgp_value[0] 106 | opt_markov(lr_adam, markovgp_grads) 107 | markovgp_hypers = np.array([markovgp_model.kernel.lengthscale, markovgp_model.kernel.variance, 108 | markovgp_model.likelihood.variance]) 109 | print(markovgp_hypers) 110 | print(markovgp_grads) 111 | 112 | np.testing.assert_allclose(gp_grads[0], markovgp_grads[0], rtol=1e-4) 113 | np.testing.assert_allclose(gp_grads[1], markovgp_grads[1], rtol=1e-4) 114 | np.testing.assert_allclose(gp_grads[2], markovgp_grads[2], rtol=1e-4) 115 | 116 | 117 | @pytest.mark.parametrize('var_f', [0.5, 1.5]) 118 | @pytest.mark.parametrize('len_f', [0.75, 2.5]) 119 | @pytest.mark.parametrize('var_y', [0.1, 0.5]) 120 | @pytest.mark.parametrize('N', [30, 60]) 121 | def test_inference_step(var_f, len_f, var_y, N): 122 | """ 123 | test whether VI with newt's GP and MarkovGP give the same posterior after one natural gradient step 124 | """ 125 | 126 | x, y = build_data(N) 127 | 128 | gp_model = initialise_gp_model(var_f, len_f, var_y, x, y) 129 | markovgp_model = initialise_markovgp_model(var_f, len_f, var_y, x, y) 130 | 131 | lr_newton = 1. 132 | 133 | gp_model.inference(lr=lr_newton) # update variational params 134 | 135 | markovgp_model.inference(lr=lr_newton) # update variational params 136 | 137 | np.testing.assert_allclose(gp_model.posterior_mean.value, markovgp_model.posterior_mean.value, rtol=1e-4) 138 | np.testing.assert_allclose(gp_model.posterior_variance.value, markovgp_model.posterior_variance.value, rtol=1e-4) 139 | -------------------------------------------------------------------------------- /tests/test_vs_exact_marg_lik.py: -------------------------------------------------------------------------------- 1 | import bayesnewton 2 | import numpy as np 3 | from bayesnewton.utils import solve 4 | from jax.config import config 5 | config.update("jax_enable_x64", True) 6 | import pytest 7 | 8 | 9 | def wiggly_time_series(x_): 10 | noise_var = 0.15 # true observation noise 11 | return (np.cos(0.04*x_+0.33*np.pi) * np.sin(0.2*x_) + 12 | np.sqrt(noise_var) * np.random.normal(0, 1, x_.shape)) 13 | 14 | 15 | def build_data(N): 16 | # np.random.seed(12345) 17 | x = np.random.permutation(np.linspace(-25.0, 150.0, num=N) + 0.5*np.random.randn(N)) # unevenly spaced 18 | x = np.sort(x) # since MarkovGP sorts the inputs, they must also be sorted for GP 19 | y = wiggly_time_series(x) 20 | # x_test = np.linspace(np.min(x)-15.0, np.max(x)+15.0, num=500) 21 | # y_test = wiggly_time_series(x_test) 22 | # x_plot = np.linspace(np.min(x)-20.0, np.max(x)+20.0, 200) 23 | 24 | x = x[:, None] 25 | # y = y[:, None] 26 | # x_plot = x_plot[:, None] 27 | return x, y 28 | 29 | 30 | def initialise_gp_model(var_f, len_f, var_y, x, y): 31 | kernel = bayesnewton.kernels.Matern52(variance=var_f, lengthscale=len_f) 32 | likelihood = bayesnewton.likelihoods.Gaussian(variance=var_y) 33 | model = bayesnewton.models.VariationalGP(kernel=kernel, likelihood=likelihood, X=x, Y=y) 34 | return model 35 | 36 | 37 | @pytest.mark.parametrize('var_f', [0.5, 1.5]) 38 | @pytest.mark.parametrize('len_f', [0.75, 2.5]) 39 | @pytest.mark.parametrize('var_y', [0.1, 0.5]) 40 | @pytest.mark.parametrize('N', [30, 60]) 41 | def test_marg_lik(var_f, len_f, var_y, N): 42 | """ 43 | test whether VI with newt's GP and Gaussian likelihood gives the exact marginal likelihood 44 | """ 45 | 46 | x, y = build_data(N) 47 | 48 | gp_model = initialise_gp_model(var_f, len_f, var_y, x, y) 49 | 50 | gp_model.inference(lr=1.) # update variational params 51 | loss_gp = gp_model.energy() 52 | print(loss_gp) 53 | 54 | K_X = gp_model.kernel(x, x) 55 | K_Y = K_X + var_y * np.eye(K_X.shape[0]) 56 | L_Y = np.linalg.cholesky(K_Y) 57 | exact_marg_lik = ( 58 | -0.5 * y.T @ solve(K_Y, y) 59 | - np.sum(np.log(np.diag(L_Y))) 60 | - 0.5 * y.shape[0] * np.log(2 * np.pi) 61 | ) 62 | 63 | print(exact_marg_lik) 64 | 65 | np.testing.assert_almost_equal(loss_gp, -exact_marg_lik, decimal=4) 66 | -------------------------------------------------------------------------------- /tests/test_vs_gpflow_class.py: -------------------------------------------------------------------------------- 1 | import bayesnewton 2 | import objax 3 | from bayesnewton.utils import inv 4 | import numpy as np 5 | from jax.config import config 6 | config.update("jax_enable_x64", True) 7 | import pytest 8 | import tensorflow as tf 9 | import gpflow 10 | 11 | # TODO: ------- FIX -------- 12 | 13 | 14 | def build_data(N): 15 | # np.random.seed(12345) 16 | x = 100 * np.random.rand(N) 17 | f = lambda x_: 6 * np.sin(np.pi * x_ / 10.0) / (np.pi * x_ / 10.0 + 1) 18 | y_ = f(x) + np.sqrt(0.05) * np.random.randn(x.shape[0]) 19 | y = np.sign(y_) 20 | y[y == -1] = 0 21 | x = x[:, None] 22 | return x, y 23 | 24 | 25 | def initialise_newt_model(var_f, len_f, x, y): 26 | kernel = bayesnewton.kernels.Matern52(variance=var_f, lengthscale=len_f) 27 | likelihood = bayesnewton.likelihoods.Bernoulli() 28 | model = bayesnewton.models.VariationalGP(kernel=kernel, likelihood=likelihood, X=x, Y=y) 29 | return model 30 | 31 | 32 | def initialise_gpflow_model(var_f, len_f, x, y): 33 | N = x.shape[0] 34 | k = gpflow.kernels.Matern52(lengthscales=[len_f], variance=var_f, name='matern') 35 | 36 | # find the m and S that correspond to the same natural parameters used by CVI 37 | K_xx = np.array(k(x, x)) 38 | K_xx_inv = inv(K_xx) 39 | 40 | S = inv(K_xx_inv + 1e-2 * np.eye(N)) 41 | S_chol = np.linalg.cholesky(S) 42 | S_chol_init = np.array([S_chol]) 43 | # S_chol_flattened_init = np.array(S_chol[np.tril_indices(N, 0)]) 44 | 45 | lambda_init = np.zeros((N, 1)) 46 | m_init = S @ lambda_init 47 | 48 | lik = gpflow.likelihoods.Bernoulli() 49 | 50 | # data = (x, y) 51 | model = gpflow.models.SVGP( 52 | inducing_variable=x, 53 | whiten=False, 54 | kernel=k, 55 | mean_function=None, 56 | likelihood=lik, 57 | q_mu=m_init, 58 | q_sqrt=S_chol_init 59 | ) 60 | gpflow.utilities.set_trainable(model.inducing_variable.Z, False) 61 | gpflow.utilities.set_trainable(model.q_mu, False) 62 | gpflow.utilities.set_trainable(model.q_sqrt, False) 63 | return model 64 | 65 | 66 | @pytest.mark.parametrize('var_f', [0.5, 1.5]) 67 | @pytest.mark.parametrize('len_f', [2.5, 5.]) 68 | @pytest.mark.parametrize('N', [30, 60]) 69 | def test_initial_loss(var_f, len_f, N): 70 | """ 71 | test whether newt's VI and gpflow's SVGP (Z=X) give the same initial ELBO and posterior 72 | """ 73 | 74 | x, y = build_data(N) 75 | 76 | newt_model = initialise_newt_model(var_f, len_f, x, y) 77 | gpflow_model = initialise_gpflow_model(var_f, len_f, x, y) 78 | 79 | newt_model.update_posterior() 80 | loss_newt = newt_model.energy() 81 | # _, _, expected_density = newt_model.inference(newt_model) 82 | print(loss_newt) 83 | # print(expected_density) 84 | 85 | data = (x, y[:, None]) 86 | f_mean, f_var = gpflow_model.predict_f(x) 87 | var_exp = np.sum(gpflow_model.likelihood.variational_expectations(f_mean, f_var, y[:, None])) 88 | loss_gpflow = -gpflow_model.elbo(data) 89 | print(loss_gpflow.numpy()) 90 | # print(var_exp) 91 | 92 | # print(posterior_mean - f_mean[:, 0]) 93 | 94 | np.testing.assert_allclose(np.squeeze(newt_model.posterior_mean.value), f_mean[:, 0], rtol=1e-4) 95 | np.testing.assert_allclose(np.squeeze(newt_model.posterior_variance.value), f_var[:, 0], rtol=1e-4) 96 | np.testing.assert_almost_equal(loss_newt, loss_gpflow.numpy(), decimal=2) 97 | 98 | 99 | @pytest.mark.parametrize('var_f', [0.5, 1.5]) 100 | @pytest.mark.parametrize('len_f', [2.5, 5.]) 101 | @pytest.mark.parametrize('N', [30, 60]) 102 | def test_gradient_step(var_f, len_f, N): 103 | """ 104 | test whether newt's VI and gpflow's SVGP (Z=X) provide the same initial gradient step in the hyperparameters 105 | """ 106 | 107 | x, y = build_data(N) 108 | 109 | newt_model = initialise_newt_model(var_f, len_f, x, y) 110 | gpflow_model = initialise_gpflow_model(var_f, len_f, x, y) 111 | 112 | gv = objax.GradValues(newt_model.energy, newt_model.vars()) 113 | 114 | lr_adam = 0.1 115 | lr_newton = 1. 116 | opt = objax.optimizer.Adam(newt_model.vars()) 117 | 118 | newt_model.update_posterior() 119 | newt_grads, value = gv() # , lr=lr_newton) 120 | loss_ = value[0] 121 | opt(lr_adam, newt_grads) 122 | newt_hypers = np.array([newt_model.kernel.lengthscale, newt_model.kernel.variance]) 123 | print(newt_hypers) 124 | print(newt_grads) 125 | 126 | adam_opt = tf.optimizers.Adam(lr_adam) 127 | data = (x, y[:, None]) 128 | with tf.GradientTape() as tape: 129 | loss = -gpflow_model.elbo(data) 130 | _vars = gpflow_model.trainable_variables 131 | gpflow_grads = tape.gradient(loss, _vars) 132 | 133 | loss_fn = gpflow_model.training_loss_closure(data) 134 | adam_vars = gpflow_model.trainable_variables 135 | adam_opt.minimize(loss_fn, adam_vars) 136 | gpflow_hypers = np.array([gpflow_model.kernel.lengthscales.numpy()[0], gpflow_model.kernel.variance.numpy()]) 137 | print(gpflow_hypers) 138 | print(gpflow_grads) 139 | 140 | np.testing.assert_allclose(newt_grads[0], gpflow_grads[0], rtol=1e-2) 141 | np.testing.assert_allclose(newt_grads[1], gpflow_grads[1], rtol=1e-2) 142 | 143 | 144 | @pytest.mark.parametrize('var_f', [0.5, 1.5]) 145 | @pytest.mark.parametrize('len_f', [2.5, 5.]) 146 | @pytest.mark.parametrize('N', [30, 60]) 147 | def test_inference_step(var_f, len_f, N): 148 | """ 149 | test whether newt's VI and gpflow's SVGP (Z=X) give the same posterior after one natural gradient step 150 | """ 151 | 152 | x, y = build_data(N) 153 | 154 | newt_model = initialise_newt_model(var_f, len_f, x, y) 155 | gpflow_model = initialise_gpflow_model(var_f, len_f, x, y) 156 | 157 | lr_newton = 1. 158 | 159 | newt_model.inference(lr=lr_newton) # update variational params 160 | 161 | data = (x, y[:, None]) 162 | with tf.GradientTape() as tape: 163 | loss = -gpflow_model.elbo(data) 164 | 165 | variational_vars = [(gpflow_model.q_mu, gpflow_model.q_sqrt)] 166 | natgrad_opt = gpflow.optimizers.NaturalGradient(gamma=lr_newton) 167 | loss_fn = gpflow_model.training_loss_closure(data) 168 | natgrad_opt.minimize(loss_fn, variational_vars) 169 | 170 | f_mean, f_var = gpflow_model.predict_f(x) 171 | 172 | # print(post_mean_) 173 | # print(f_mean[:, 0]) 174 | 175 | np.testing.assert_allclose(np.squeeze(newt_model.posterior_mean.value), f_mean[:, 0], rtol=5e-3) 176 | np.testing.assert_allclose(np.squeeze(newt_model.posterior_variance.value), f_var[:, 0], rtol=5e-3) 177 | -------------------------------------------------------------------------------- /tests/test_vs_gpflow_reg.py: -------------------------------------------------------------------------------- 1 | import bayesnewton 2 | import objax 3 | from bayesnewton.utils import inv 4 | import numpy as np 5 | from jax.config import config 6 | config.update("jax_enable_x64", True) 7 | import pytest 8 | import tensorflow as tf 9 | import gpflow 10 | 11 | 12 | def wiggly_time_series(x_): 13 | noise_var = 0.15 # true observation noise 14 | return (np.cos(0.04*x_+0.33*np.pi) * np.sin(0.2*x_) + 15 | np.sqrt(noise_var) * np.random.normal(0, 1, x_.shape)) 16 | 17 | 18 | def build_data(N): 19 | # np.random.seed(12345) 20 | x = np.random.permutation(np.linspace(-25.0, 150.0, num=N) + 0.5*np.random.randn(N)) # unevenly spaced 21 | y = wiggly_time_series(x) 22 | # x_test = np.linspace(np.min(x)-15.0, np.max(x)+15.0, num=500) 23 | # y_test = wiggly_time_series(x_test) 24 | # x_plot = np.linspace(np.min(x)-20.0, np.max(x)+20.0, 200) 25 | 26 | x = x[:, None] 27 | # y = y[:, None] 28 | # x_plot = x_plot[:, None] 29 | return x, y 30 | 31 | 32 | def initialise_newt_model(var_f, len_f, var_y, x, y): 33 | kernel = bayesnewton.kernels.Matern52(variance=var_f, lengthscale=len_f) 34 | likelihood = bayesnewton.likelihoods.Gaussian(variance=var_y) 35 | model = bayesnewton.models.VariationalGP(kernel=kernel, likelihood=likelihood, X=x, Y=y) 36 | return model 37 | 38 | 39 | def initialise_gpflow_model(var_f, len_f, var_y, x, y): 40 | N = x.shape[0] 41 | k = gpflow.kernels.Matern52(lengthscales=[len_f], variance=var_f, name='matern') 42 | 43 | # find the m and S that correspond to the same natural parameters used by CVI 44 | K_xx = np.array(k(x, x)) 45 | K_xx_inv = inv(K_xx) 46 | 47 | S = inv(K_xx_inv + 1e-2 * np.eye(N)) 48 | S_chol = np.linalg.cholesky(S) 49 | S_chol_init = np.array([S_chol]) 50 | # S_chol_flattened_init = np.array(S_chol[np.tril_indices(N, 0)]) 51 | 52 | lambda_init = np.zeros((N, 1)) 53 | m_init = S @ lambda_init 54 | 55 | lik = gpflow.likelihoods.Gaussian(variance=var_y) 56 | 57 | # data = (x, y) 58 | model = gpflow.models.SVGP( 59 | inducing_variable=x, 60 | whiten=False, 61 | kernel=k, 62 | mean_function=None, 63 | likelihood=lik, 64 | q_mu=m_init, 65 | q_sqrt=S_chol_init 66 | ) 67 | gpflow.utilities.set_trainable(model.inducing_variable.Z, False) 68 | gpflow.utilities.set_trainable(model.q_mu, False) 69 | gpflow.utilities.set_trainable(model.q_sqrt, False) 70 | return model 71 | 72 | 73 | @pytest.mark.parametrize('var_f', [0.5, 1.5]) 74 | @pytest.mark.parametrize('len_f', [0.75, 2.5]) 75 | @pytest.mark.parametrize('var_y', [0.1, 0.5]) 76 | @pytest.mark.parametrize('N', [30, 60]) 77 | def test_initial_loss(var_f, len_f, var_y, N): 78 | """ 79 | test whether newt's VI and gpflow's SVGP (Z=X) give the same initial ELBO and posterior 80 | """ 81 | 82 | x, y = build_data(N) 83 | 84 | newt_model = initialise_newt_model(var_f, len_f, var_y, x, y) 85 | gpflow_model = initialise_gpflow_model(var_f, len_f, var_y, x, y) 86 | 87 | newt_model.update_posterior() 88 | loss_newt = newt_model.energy() 89 | # _, _, expected_density = newt_model.inference(newt_model) 90 | print(loss_newt) 91 | # print(expected_density) 92 | 93 | data = (x, y[:, None]) 94 | f_mean, f_var = gpflow_model.predict_f(x) 95 | var_exp = np.sum(gpflow_model.likelihood.variational_expectations(f_mean, f_var, y[:, None])) 96 | loss_gpflow = -gpflow_model.elbo(data) 97 | print(loss_gpflow.numpy()) 98 | # print(var_exp) 99 | 100 | # print(posterior_mean - f_mean[:, 0]) 101 | 102 | np.testing.assert_allclose(np.squeeze(newt_model.posterior_mean.value), f_mean[:, 0], rtol=1e-4) 103 | np.testing.assert_allclose(np.squeeze(newt_model.posterior_variance.value), f_var[:, 0], rtol=1e-4) 104 | np.testing.assert_almost_equal(loss_newt, loss_gpflow.numpy(), decimal=2) 105 | 106 | 107 | @pytest.mark.parametrize('var_f', [0.5, 1.5]) 108 | @pytest.mark.parametrize('len_f', [0.75, 2.5]) 109 | @pytest.mark.parametrize('var_y', [0.1, 0.5]) 110 | @pytest.mark.parametrize('N', [30, 60]) 111 | def test_gradient_step(var_f, len_f, var_y, N): 112 | """ 113 | test whether newt's VI and gpflow's SVGP (Z=X) provide the same initial gradient step in the hyperparameters 114 | """ 115 | 116 | x, y = build_data(N) 117 | 118 | newt_model = initialise_newt_model(var_f, len_f, var_y, x, y) 119 | gpflow_model = initialise_gpflow_model(var_f, len_f, var_y, x, y) 120 | 121 | gv = objax.GradValues(newt_model.energy, newt_model.vars()) 122 | 123 | lr_adam = 0.1 124 | lr_newton = 1. 125 | opt = objax.optimizer.Adam(newt_model.vars()) 126 | 127 | newt_model.update_posterior() 128 | newt_grads, value = gv() # , lr=lr_newton) 129 | loss_ = value[0] 130 | opt(lr_adam, newt_grads) 131 | newt_hypers = np.array([newt_model.kernel.lengthscale, newt_model.kernel.variance, newt_model.likelihood.variance]) 132 | print(newt_hypers) 133 | print(newt_grads) 134 | 135 | adam_opt = tf.optimizers.Adam(lr_adam) 136 | data = (x, y[:, None]) 137 | with tf.GradientTape() as tape: 138 | loss = -gpflow_model.elbo(data) 139 | _vars = gpflow_model.trainable_variables 140 | gpflow_grads = tape.gradient(loss, _vars) 141 | 142 | loss_fn = gpflow_model.training_loss_closure(data) 143 | adam_vars = gpflow_model.trainable_variables 144 | adam_opt.minimize(loss_fn, adam_vars) 145 | gpflow_hypers = np.array([gpflow_model.kernel.lengthscales.numpy()[0], gpflow_model.kernel.variance.numpy(), 146 | gpflow_model.likelihood.variance.numpy()]) 147 | print(gpflow_hypers) 148 | print(gpflow_grads) 149 | 150 | np.testing.assert_allclose(newt_grads[0], gpflow_grads[0], atol=1e-2) # use atol since values are so small 151 | np.testing.assert_allclose(newt_grads[1], gpflow_grads[1], rtol=1e-2) 152 | np.testing.assert_allclose(newt_grads[2], gpflow_grads[2], rtol=1e-2) 153 | 154 | 155 | @pytest.mark.parametrize('var_f', [0.5, 1.5]) 156 | @pytest.mark.parametrize('len_f', [0.75, 2.5]) 157 | @pytest.mark.parametrize('var_y', [0.1, 0.5]) 158 | @pytest.mark.parametrize('N', [30, 60]) 159 | def test_inference_step(var_f, len_f, var_y, N): 160 | """ 161 | test whether newt's VI and gpflow's SVGP (Z=X) give the same posterior after one natural gradient step 162 | """ 163 | 164 | x, y = build_data(N) 165 | 166 | newt_model = initialise_newt_model(var_f, len_f, var_y, x, y) 167 | gpflow_model = initialise_gpflow_model(var_f, len_f, var_y, x, y) 168 | 169 | lr_newton = 1. 170 | 171 | newt_model.inference(lr=lr_newton) # update variational params 172 | 173 | data = (x, y[:, None]) 174 | with tf.GradientTape() as tape: 175 | loss = -gpflow_model.elbo(data) 176 | 177 | variational_vars = [(gpflow_model.q_mu, gpflow_model.q_sqrt)] 178 | natgrad_opt = gpflow.optimizers.NaturalGradient(gamma=lr_newton) 179 | loss_fn = gpflow_model.training_loss_closure(data) 180 | natgrad_opt.minimize(loss_fn, variational_vars) 181 | 182 | f_mean, f_var = gpflow_model.predict_f(x) 183 | 184 | # print(post_mean_) 185 | # print(f_mean[:, 0]) 186 | 187 | np.testing.assert_allclose(np.squeeze(newt_model.posterior_mean.value), f_mean[:, 0], rtol=1e-3) 188 | np.testing.assert_allclose(np.squeeze(newt_model.posterior_variance.value), f_var[:, 0], rtol=1e-3) 189 | -------------------------------------------------------------------------------- /tests/variational_expectations.py: -------------------------------------------------------------------------------- 1 | import objax 2 | import jax.numpy as np 3 | from jax import grad, jacrev 4 | from jax.scipy.linalg import cholesky 5 | from bayesnewton.likelihoods import Likelihood 6 | from bayesnewton.utils import softplus, softplus_inv, sigmoid, sigmoid_diff 7 | from bayesnewton.cubature import expected_conditional_mean_cubature, gauss_hermite 8 | 9 | 10 | class Positive(Likelihood): 11 | """ 12 | """ 13 | def __init__(self, variance=0.1): 14 | """ 15 | param hyp: observation noise 16 | """ 17 | self.transformed_variance = objax.TrainVar(np.array(softplus_inv(variance))) 18 | super().__init__() 19 | self.name = 'Positive' 20 | self.link_fn = pos_map 21 | self.dlink_fn = dpos_map # derivative of the link function 22 | self.d2link_fn = d2pos_map # 2nd derivative of the link function 23 | 24 | @property 25 | def variance(self): 26 | return softplus(self.transformed_variance.value) 27 | 28 | def evaluate_likelihood(self, y, f): 29 | """ 30 | Evaluate the likelihood 31 | """ 32 | mu, var = self.conditional_moments(f) 33 | return (2 * np.pi * var) ** -0.5 * np.exp(-0.5 * (y - mu) ** 2 / var) 34 | 35 | def evaluate_log_likelihood(self, y, f): 36 | """ 37 | Evaluate the log-likelihood 38 | """ 39 | mu, var = self.conditional_moments(f) 40 | return np.squeeze(-0.5 * np.log(2 * np.pi * var) - 0.5 * (y - mu) ** 2 / var) 41 | 42 | def conditional_moments(self, f): 43 | """ 44 | """ 45 | return self.link_fn(f), np.array([[self.variance]]) 46 | 47 | def log_likelihood_gradients(self, y, f): 48 | log_lik, J, H = self.log_likelihood_gradients_(y, f) 49 | return log_lik, J, H 50 | 51 | def expected_conditional_mean(self, mean, cov, cubature=None): 52 | return expected_conditional_mean_cubature(self, mean, cov, cubature) 53 | 54 | def expected_conditional_mean_dm(self, mean, cov, cubature=None): 55 | """ 56 | """ 57 | dmu_dm, _ = grad(self.expected_conditional_mean, argnums=0, has_aux=True)(mean, cov, cubature) 58 | return np.squeeze(dmu_dm) 59 | 60 | def expected_conditional_mean_dm2(self, mean, cov, cubature=None): 61 | """ 62 | """ 63 | d2mu_dm2 = jacrev(self.expected_conditional_mean_dm, argnums=0)(mean, cov, cubature) 64 | return d2mu_dm2 65 | 66 | def statistical_linear_regression(self, mean, cov, cubature=None): 67 | mu, omega = self.expected_conditional_mean(mean, cov, cubature) 68 | dmu_dm = self.expected_conditional_mean_dm(mean, cov, cubature) 69 | d2mu_dm2 = self.expected_conditional_mean_dm2(mean, cov, cubature) 70 | return mu.reshape(-1, 1), omega, dmu_dm[None, None], d2mu_dm2[None] 71 | 72 | 73 | def _gaussian_expected_log_lik(y, post_mean, post_cov, var): 74 | post_mean = post_mean.reshape(-1, 1) 75 | post_cov = post_cov.reshape(-1, 1) 76 | y = y.reshape(-1, 1) 77 | var = var.reshape(-1, 1) 78 | # version which computes individual parts and outputs vector 79 | exp_log_lik = ( 80 | -0.5 * np.log(2 * np.pi) 81 | - 0.5 * np.log(var) 82 | - 0.5 * ((y - post_mean) ** 2 + post_cov) / var 83 | ) 84 | return exp_log_lik 85 | 86 | 87 | def _gaussian_expected_log_lik_positive(y, post_mean, post_cov, var): 88 | post_mean = post_mean.reshape(-1, 1) 89 | post_cov = post_cov.reshape(-1, 1) 90 | y = y.reshape(-1, 1) 91 | var = var.reshape(-1, 1) 92 | 93 | x, w = gauss_hermite(post_mean.shape[0], 20) 94 | sigma_points = cholesky(post_cov) @ np.atleast_2d(x) + post_mean 95 | conditional_expectation = np.sum(w * pos_map(sigma_points)) 96 | 97 | square_error_cubature = ((y - conditional_expectation) ** 2 + post_cov) / var 98 | 99 | # version which computes individual parts and outputs vector 100 | exp_log_lik = ( 101 | -0.5 * np.log(2 * np.pi) 102 | - 0.5 * np.log(var) 103 | - 0.5 * square_error_cubature 104 | ) 105 | return exp_log_lik 106 | 107 | 108 | def _gaussian_expected_log_lik_positive_attempt(y, post_mean, post_cov, var): 109 | post_mean = post_mean.reshape(-1, 1) 110 | post_cov = post_cov.reshape(-1, 1) 111 | y = y.reshape(-1, 1) 112 | var = var.reshape(-1, 1) 113 | 114 | x, w = gauss_hermite(post_mean.shape[0], 20) 115 | sigma_points = cholesky(post_cov) @ np.atleast_2d(x) + post_mean 116 | conditional_expectation = np.sum(w * pos_map(sigma_points)) 117 | 118 | def cov_term(mean): 119 | return (conditional_expectation - pos_map(mean)) ** 2 120 | 121 | final_term = np.sum(w * cov_term(sigma_points)) 122 | 123 | square_error_cubature = ((y - conditional_expectation) ** 2 + final_term) / var 124 | 125 | # version which computes individual parts and outputs vector 126 | exp_log_lik = ( 127 | -0.5 * np.log(2 * np.pi) 128 | - 0.5 * np.log(var) 129 | - 0.5 * square_error_cubature 130 | ) 131 | return exp_log_lik 132 | 133 | 134 | def var_exp_positive(y, post_mean, post_cov, var): 135 | post_mean = post_mean.reshape(-1, 1) 136 | post_cov = post_cov.reshape(-1, 1) 137 | y = y.reshape(-1, 1) 138 | var = var.reshape(-1, 1) 139 | 140 | def square_error(mean): 141 | return (y - pos_map(mean)) ** 2 * var ** -1 142 | 143 | x, w = gauss_hermite(post_mean.shape[0], 20) 144 | sigma_points = cholesky(post_cov) @ np.atleast_2d(x) + post_mean 145 | 146 | square_error_cubature = np.sum(w * square_error(sigma_points)) 147 | 148 | # version which computes individual parts and outputs vector 149 | exp_log_lik = ( 150 | -0.5 * np.log(2 * np.pi) 151 | - 0.5 * np.log(var) 152 | - 0.5 * square_error_cubature 153 | ) 154 | return exp_log_lik 155 | 156 | 157 | def pos_map(q): 158 | return softplus(q) 159 | 160 | 161 | def dpos_map(q): 162 | return sigmoid(q) 163 | 164 | 165 | def d2pos_map(q): 166 | return sigmoid_diff(q) 167 | 168 | 169 | y, m, v, obs_var = np.array(0.2), np.array(0.1), np.array(0.1), np.array(0.5) 170 | 171 | # var_exp = _gaussian_expected_log_lik(y, m, v, obs_var) 172 | var_exp_pos = _gaussian_expected_log_lik_positive(y, m, v, obs_var) 173 | var_exp_pos_attempt = _gaussian_expected_log_lik_positive_attempt(y, m, v, obs_var) 174 | var_exp_pos_true = var_exp_positive(y, m, v, obs_var) 175 | 176 | # lik = Positive(variance=obs_var) 177 | # var_exp_pos_true_ = lik.variational_expectation_(y[None], m[None, None], v[None, None]) 178 | 179 | # print(var_exp) 180 | print(var_exp_pos) 181 | print(var_exp_pos_attempt) 182 | print(var_exp_pos_true) 183 | # print(var_exp_pos_true_) 184 | --------------------------------------------------------------------------------