├── .gitignore ├── LICENSE.txt ├── README.rst ├── bars ├── deepbars-sbn.py └── param-bars.py ├── caltech ├── dsbn-dsbn.py ├── nade-nade.py ├── run-caltech ├── sbn-nade.py └── sbn-sbn.py ├── data └── download.sh ├── learning ├── __init__.py ├── dataset.py ├── datasets │ ├── __init__.py │ ├── caltech.py │ ├── mnist.py │ ├── tests │ │ └── tests_datasets.py │ └── tfd.py ├── experiment.py ├── hyperbase.py ├── model.py ├── models │ ├── __init__.py │ ├── darn.py │ ├── dsbn.py │ ├── nade.py │ ├── rws.py │ ├── sbn.py │ └── tests │ │ ├── test_darn.py │ │ ├── test_dsbn.py │ │ ├── test_model.py │ │ ├── test_nade.py │ │ ├── test_rws.py │ │ └── test_sbn.py ├── monitor │ ├── __init__.py │ └── bootstrap.py ├── preproc.py ├── termination.py ├── tests │ ├── __init__.py │ ├── test_hyperbase.py │ ├── test_monitor.py │ ├── test_termination.py │ ├── test_training.py │ ├── testing.py │ └── toys.py ├── training.py └── utils │ ├── __init__.py │ ├── autotable.py │ ├── datalog.py │ ├── test_autotable.py │ ├── test_datalog.py │ ├── test_unrolled_scan.py │ └── unrolled_scan.py ├── mnist ├── param-mnist-darn-200.py ├── param-nade-nade-200.py ├── rerun-monitors.py └── run-mnist ├── notebooks └── MNIST-Analyze.ipynb ├── requirements.txt ├── run-exp.py ├── shippable.yml ├── show-W0.py ├── show-layerwise.py ├── show-ll.py ├── show-param-stats.py ├── show-param-trajectory.py ├── show-samples.py ├── theanorc └── uci └── run-uci /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | .*.swp 3 | *.h5 4 | *.state 5 | data/*.h5 6 | data/*.pkl.gz 7 | notebooks/.ipynb_checkpoints/ 8 | */output/* 9 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | .. image:: https://img.shields.io/shippable/557c82e6edd7f2c05214d9b0/master.svg 2 | :target: https://app.shippable.com/projects/557c82e6edd7f2c05214d9b0/builds/latest 3 | 4 | .. image:: https://requires.io/github/jbornschein/reweighted-ws/requirements.svg?branch=master 5 | :target: https://requires.io/github/jbornschein/reweighted-ws/requirements/?branch=master 6 | :alt: Requirements Status 7 | 8 | .. image:: https://img.shields.io/github/license/jbornschein/reweighted-ws.svg 9 | :target: http://choosealicense.com/licenses/agpl-3.0/ 10 | :alt: AGPLv3 11 | 12 | 13 | Reweighted Wake-Sleep 14 | ===================== 15 | 16 | This repository contains the implementation of the machine learning 17 | method described in http://arxiv.org/abs/1406.2751 . 18 | 19 | *Note: There is an alternative implementation based on Blocks/Theano in https://github.com/jbornschein/bihm* 20 | 21 | Installation & Requirements 22 | --------------------------- 23 | 24 | This implementation in written in Python and uses Theano. To automatically 25 | install all dependencies run 26 | 27 | pip install -r requirements.txt 28 | 29 | In order to reproduce the experiments in the paper you need to download about 30 | 500 MB of training data: 31 | 32 | cd data 33 | sh download.sh 34 | 35 | -------------------------------------------------------------------------------- /bars/deepbars-sbn.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | 4 | from learning.dataset import BarsData, FromH5 5 | from learning.training import Trainer 6 | from learning.termination import LogLikelihoodIncrease, EarlyStopping 7 | from learning.monitor import MonitorLL, DLogModelParams 8 | 9 | from learning.rws import LayerStack 10 | from learning.sbn import SBN, SBNTop 11 | from learning.darn import DARN, DARNTop 12 | from learning.nade import NADE, NADETop 13 | 14 | n_vis = 5*5 15 | n_hid = 15 16 | n_qhid = 2*n_hid 17 | 18 | dataset = FromH5(fname="deep-bars-5x5-a.h5", n_datapoints=5000) 19 | valiset = FromH5(fname="deep-bars-5x5-a.h5", n_datapoints=1000, offset=5000) 20 | testset = FromH5(fname="deep-bars-5x5-a.h5", n_datapoints=5000, offset=6000) 21 | 22 | p_layers=[ 23 | SBN( 24 | n_X=n_vis, 25 | n_Y=15, 26 | ), 27 | SBN( 28 | n_X=15, 29 | n_Y=7, 30 | ), 31 | SBNTop( 32 | n_X=7, 33 | ) 34 | ] 35 | 36 | q_layers=[ 37 | SBN( 38 | n_X=15, 39 | n_Y=25, 40 | ), 41 | SBN( 42 | n_X=7, 43 | n_Y=15, 44 | ), 45 | ] 46 | 47 | model = LayerStack( 48 | p_layers=p_layers, 49 | q_layers=q_layers, 50 | ) 51 | 52 | trainer = Trainer( 53 | n_samples=5, 54 | learning_rate_p=3e-2, 55 | learning_rate_q=3e-2, 56 | learning_rate_s=3e-2, 57 | layer_discount=1.00, 58 | batch_size=25, 59 | dataset=dataset, 60 | model=model, 61 | termination=EarlyStopping(), 62 | #monitor_nth_step=100, 63 | #step_monitors=[ 64 | # MonitorLL(name="valiset", data=valiset, n_samples=[1, 5, 25, 100]) 65 | #], 66 | epoch_monitors=[ 67 | DLogModelParams(), 68 | MonitorLL(name="valiset", data=valiset, n_samples=[1, 5, 25, 100]), 69 | MonitorLL(name="testset", data=testset, n_samples=[1, 5, 25, 100]), 70 | ], 71 | final_monitors=[ 72 | MonitorLL(name="final-valiset", data=valiset, n_samples=[1, 5, 25, 100]), 73 | MonitorLL(name="final-testset", data=testset, n_samples=[1, 5, 25, 100]), 74 | SampleFromP(data=valiset, n_samples=100), 75 | ], 76 | ) 77 | -------------------------------------------------------------------------------- /bars/param-bars.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | 4 | from learning.dataset import BarsData, FromModel, MNIST 5 | from learning.training import Trainer 6 | from learning.termination import LogLikelihoodIncrease, EarlyStopping 7 | from learning.monitor import MonitorLL, DLogModelParams, SampleFromP 8 | from learning.monitor.bootstrap import BootstrapLL 9 | 10 | from learning.rws import LayerStack 11 | from learning.sbn import SBN, SBNTop 12 | from learning.darn import DARN, DARNTop 13 | from learning.nade import NADE, NADETop 14 | 15 | n_vis = 5*5 16 | n_hid = 15 17 | n_qhid = 2*n_hid 18 | 19 | dataset = BarsData(which_set='train', n_datapoints=10000) 20 | valiset = BarsData(which_set='valid', n_datapoints=1000) 21 | testset = BarsData(which_set='test' , n_datapoints=10000) 22 | 23 | p_layers=[ 24 | SBN( 25 | n_X=n_vis, 26 | n_Y=n_hid, 27 | ), 28 | SBNTop( 29 | n_X=n_hid, 30 | ) 31 | ] 32 | 33 | q_layers=[ 34 | SBN( 35 | n_X=n_hid, 36 | n_Y=n_vis, 37 | # n_hid=n_qhid, 38 | # unroll_scan=1 39 | ) 40 | ] 41 | 42 | model = LayerStack( 43 | p_layers=p_layers, 44 | q_layers=q_layers, 45 | ) 46 | 47 | trainer = Trainer( 48 | n_samples=10, 49 | learning_rate_p=1e-1, 50 | learning_rate_q=1e-1, 51 | learning_rate_s=1e-1, 52 | batch_size=10, 53 | dataset=dataset, 54 | model=model, 55 | termination=EarlyStopping(), 56 | #monitor_nth_step=100, 57 | #step_monitors=[ 58 | # MonitorLL(name="valiset", data=valiset, n_samples=[1, 5, 25, 100]) 59 | #], 60 | epoch_monitors=[ 61 | DLogModelParams(), 62 | MonitorLL(name="valiset", data=valiset, n_samples=[1, 5, 25, 100]), 63 | MonitorLL(name="testset", data=testset, n_samples=[1, 5, 25, 100]), 64 | #BootstrapLL(name="valiset-bootstrap", data=valiset, n_samples=[1, 5, 25, 100]) 65 | ], 66 | final_monitors=[ 67 | MonitorLL(name="final-valiset", data=valiset, n_samples=[1, 5, 25, 100]), 68 | MonitorLL(name="final-testset", data=testset, n_samples=[1, 5, 25, 100]), 69 | SampleFromP(data=valiset, n_samples=100), 70 | ], 71 | ) 72 | 73 | -------------------------------------------------------------------------------- /caltech/dsbn-dsbn.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | 4 | from learning.dataset import CalTechSilhouettes 5 | from learning.termination import LogLikelihoodIncrease, EarlyStopping 6 | from learning.monitor import MonitorLL, DLogModelParams, SampleFromP 7 | from learning.monitor.bootstrap import BootstrapLL 8 | from learning.training import Trainer 9 | 10 | from learning.rws import LayerStack 11 | from learning.sbn import SBN, SBNTop 12 | from learning.dsbn import DSBN 13 | from learning.darn import DARN, DARNTop 14 | from learning.nade import NADE, NADETop 15 | 16 | n_vis = 28*28 17 | 18 | dataset = CalTechSilhouettes(which_set='train') 19 | valiset = CalTechSilhouettes(which_set='valid') 20 | testset = CalTechSilhouettes(which_set='test') 21 | 22 | p_layers=[ 23 | DSBN( 24 | n_X=n_vis, 25 | n_Y=500, 26 | ), 27 | DSBN( 28 | n_X=500, 29 | n_Y=300, 30 | ), 31 | DSBN( 32 | n_X=300, 33 | n_Y=100, 34 | ), 35 | DSBN( 36 | n_X=100, 37 | n_Y=50, 38 | ), 39 | DSBN( 40 | n_X=50, 41 | n_Y=10, 42 | ), 43 | SBNTop( 44 | n_X=10, 45 | ) 46 | ] 47 | 48 | q_layers=[ 49 | DSBN( 50 | n_Y=n_vis, 51 | n_X=500, 52 | ), 53 | DSBN( 54 | n_Y=500, 55 | n_X=300, 56 | ), 57 | DSBN( 58 | n_Y=300, 59 | n_X=100, 60 | ), 61 | DSBN( 62 | n_Y=100, 63 | n_X=50, 64 | ), 65 | DSBN( 66 | n_Y=50, 67 | n_X=10, 68 | ) 69 | ] 70 | 71 | model = LayerStack( 72 | p_layers=p_layers, 73 | q_layers=q_layers, 74 | ) 75 | 76 | trainer = Trainer( 77 | n_samples=10, 78 | learning_rate_p=1e-4, 79 | learning_rate_q=1e-4, 80 | learning_rate_s=1e-4, 81 | layer_discount=1.0, 82 | batch_size=100, 83 | dataset=dataset, 84 | model=model, 85 | termination=EarlyStopping(), 86 | #step_monitors=[MonitorLL(data=smallset, n_samples=[1, 5, 25, 100])], 87 | epoch_monitors=[ 88 | DLogModelParams(), 89 | MonitorLL(name="valiset", data=valiset, n_samples=[1, 5, 25, 100]), 90 | SampleFromP(n_samples=100) 91 | ], 92 | final_monitors=[ 93 | MonitorLL(name="final-valiset", data=valiset, n_samples=[1, 5, 25, 100, 500, 1000]), 94 | MonitorLL(name="final-testset", data=testset, n_samples=[1, 5, 25, 100, 500, 1000]), 95 | ], 96 | monitor_nth_step=100, 97 | ) 98 | -------------------------------------------------------------------------------- /caltech/nade-nade.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | 4 | from learning.dataset import CalTechSilhouettes 5 | from learning.preproc import PermuteColumns 6 | from learning.termination import LogLikelihoodIncrease, EarlyStopping 7 | from learning.monitor import MonitorLL, DLogModelParams, SampleFromP 8 | from learning.training import Trainer 9 | 10 | from learning.models.rws import LayerStack 11 | from learning.models.sbn import SBN, SBNTop 12 | from learning.models.darn import DARN, DARNTop 13 | from learning.models.nade import NADE, NADETop 14 | 15 | n_vis = 28*28 16 | 17 | preproc = PermuteColumns() 18 | 19 | dataset = CalTechSilhouettes(which_set='train', preproc=[preproc]) 20 | valiset = CalTechSilhouettes(which_set='valid', preproc=[preproc]) 21 | testset = CalTechSilhouettes(which_set='test', preproc=[preproc]) 22 | 23 | p_layers=[ 24 | NADE( 25 | n_X=n_vis, 26 | n_Y=150, 27 | ), 28 | NADETop( 29 | n_X=150, 30 | ), 31 | ] 32 | 33 | q_layers=[ 34 | NADE( 35 | n_Y=n_vis, 36 | n_X=150, 37 | ), 38 | ] 39 | 40 | model = LayerStack( 41 | p_layers=p_layers, 42 | q_layers=q_layers, 43 | ) 44 | 45 | trainer = Trainer( 46 | n_samples=5, 47 | learning_rate_p=1e-3, 48 | learning_rate_q=1e-3, 49 | learning_rate_s=1e-3, 50 | layer_discount=1.0, 51 | batch_size=25, 52 | dataset=dataset, 53 | model=model, 54 | termination=EarlyStopping(), 55 | #step_monitors=[MonitorLL(data=smallset, n_samples=[1, 5, 25, 100])], 56 | epoch_monitors=[ 57 | DLogModelParams(), 58 | MonitorLL(name="valiset", data=valiset, n_samples=[1, 5, 25, 100]), 59 | SampleFromP(n_samples=100) 60 | ], 61 | final_monitors=[ 62 | MonitorLL(name="final-valiset", data=valiset, n_samples=[1, 5, 25, 100, 500, 1000]), 63 | MonitorLL(name="final-testset", data=testset, n_samples=[1, 5, 25, 100, 500, 1000]), 64 | ], 65 | monitor_nth_step=100, 66 | ) 67 | -------------------------------------------------------------------------------- /caltech/run-caltech: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | 4 | from __future__ import division 5 | 6 | import sys 7 | sys.path.append("../") 8 | 9 | import logging 10 | from time import time 11 | import cPickle as pickle 12 | 13 | import numpy as np 14 | 15 | logger = logging.getLogger() 16 | 17 | 18 | def run_experiment(args): 19 | from learning.experiment import Experiment 20 | from learning.training import Trainer 21 | from learning.termination import EarlyStopping 22 | from learning.monitor import MonitorLL, DLogModelParams, SampleFromP 23 | from learning.dataset import CalTechSilhouettes 24 | from learning.preproc import PermuteColumns 25 | 26 | from learning.models.rws import LayerStack 27 | from learning.models.sbn import SBN, SBNTop 28 | from learning.models.dsbn import DSBN 29 | from learning.models.darn import DARN, DARNTop 30 | from learning.models.nade import NADE, NADETop 31 | 32 | np.set_printoptions(precision=2) 33 | 34 | logger.debug("Arguments %s" % args) 35 | tags = [] 36 | 37 | np.random.seed(23) 38 | 39 | # Layer models 40 | layer_models = { 41 | "sbn" : (SBN, SBNTop), 42 | "dsbn": (DSBN, SBNTop), 43 | "darn": (DARN, DARNTop), 44 | "nade": (NADE, NADETop), 45 | } 46 | 47 | if not args.p_model in layer_models: 48 | raise "Unknown P-layer model %s" % args.p_model 49 | p_layer, p_top = layer_models[args.p_model] 50 | 51 | if not args.q_model in layer_models: 52 | raise "Unknown P-layer model %s" % args.p_model 53 | q_layer, q_top = layer_models[args.q_model] 54 | 55 | # Layer sizes 56 | layer_sizes = [int(s) for s in args.layer_sizes.split(",")] 57 | 58 | n_X = 28*28 59 | 60 | p_layers = [] 61 | q_layers = [] 62 | 63 | for ls in layer_sizes: 64 | n_Y = ls 65 | p_layers.append( 66 | p_layer(n_X=n_X, n_Y=n_Y) 67 | ) 68 | q_layers.append( 69 | q_layer(n_X=n_Y, n_Y=n_X) 70 | ) 71 | n_X = n_Y 72 | p_layers.append( p_top(n_X=n_X) ) 73 | 74 | 75 | model = LayerStack( 76 | p_layers=p_layers, 77 | q_layers=q_layers 78 | ) 79 | model.setup() 80 | 81 | # Learning rate 82 | def lr_tag(value, prefix): 83 | if value == 0.0: 84 | return "00" 85 | exp = np.floor(np.log10(value)) 86 | leading = ("%e"%value)[0] 87 | return ["%s%s%d" % (prefix, leading, -exp)] 88 | 89 | lr_base = args.lr 90 | tags += lr_tag(lr_base, prefix="lr") 91 | lr_p = args.lr_p 92 | lr_q = args.lr_q 93 | lr_s = args.lr_s 94 | if lr_p is None: 95 | lr_p = lr_base 96 | else: 97 | tags += lr_tag(lr_p, prefix="lp") 98 | if lr_q is None: 99 | lr_q = lr_base 100 | else: 101 | tags += lr_tag(lr_q, prefix="lq") 102 | if lr_s is None: 103 | lr_s = lr_base 104 | else: 105 | tags += lr_tag(lr_s, prefix="ls") 106 | 107 | 108 | if args.ldiscount != 1.0: 109 | tags += ["ld"] 110 | 111 | # Samples 112 | n_samples = args.samples 113 | tags += ["spl%d"%n_samples] 114 | 115 | # Batch size 116 | batch_size = args.batchsize 117 | tags += ["bs%d"%batch_size] 118 | 119 | # Sleep interleave 120 | sleep_interleave = args.sleep_interleave 121 | tags += ["si%d"%sleep_interleave] 122 | 123 | # Dataset 124 | if args.shuffle: 125 | np.random.seed(23) 126 | preproc = [PermuteColumns()] 127 | tags += ["shuffle"] 128 | else: 129 | preproc = [] 130 | 131 | dataset = CalTechSilhouettes(which_set='train') 132 | valiset = CalTechSilhouettes(which_set='valid') 133 | testset = CalTechSilhouettes(which_set='test') 134 | 135 | 136 | if args.lookahead != 10: 137 | tags += ["lah%d" % args.lookahead] 138 | 139 | tags.sort() 140 | expname = "%s-%s-%s-%s"% ("-".join(tags), args.p_model, args.q_model, "-".join([str(s) for s in layer_sizes])) 141 | 142 | logger.info("Running %s" % expname) 143 | 144 | 145 | trainer = Trainer( 146 | batch_size=batch_size, 147 | n_samples=n_samples, 148 | sleep_interleave=sleep_interleave, 149 | learning_rate_p=lr_p, 150 | learning_rate_q=lr_q, 151 | learning_rate_s=lr_s, 152 | layer_discount=args.ldiscount, 153 | dataset=dataset, 154 | model=model, 155 | termination=EarlyStopping(lookahead=args.lookahead, min_epochs=10), 156 | epoch_monitors=[ 157 | DLogModelParams(), 158 | SampleFromP(n_samples=100), 159 | MonitorLL(name="valiset", data=valiset, n_samples=[1, 5, 25, 100]), 160 | ], 161 | final_monitors=[ 162 | MonitorLL(name="final-valiset", data=valiset, n_samples=[1, 5, 10, 25, 100, 500, 1000, 10000, 100000]), 163 | MonitorLL(name="final-testset", data=testset, n_samples=[1, 5, 10, 25, 100, 500, 1000, 10000, 100000]), 164 | ], 165 | ) 166 | 167 | experiment = Experiment() 168 | experiment.trainer = trainer 169 | experiment.setup_output_dir(expname) 170 | experiment.print_summary() 171 | experiment.setup_logging() 172 | 173 | if args.cont is None: 174 | experiment.run_experiment() 175 | else: 176 | logger.info("Continuing experiment %s ...." % args.cont) 177 | experiment.continue_experiment(args.cont+"/results.h5", row=-1) 178 | 179 | logger.info("Finished. Wrinting metadata") 180 | 181 | experiment.print_summary() 182 | 183 | #============================================================================= 184 | if __name__ == "__main__": 185 | import argparse 186 | 187 | parser = argparse.ArgumentParser() 188 | parser.add_argument('--verbose', '-v', action='count') 189 | parser.add_argument('--shuffle', action='store_true', default=False) 190 | parser.add_argument('--cont', nargs='?', default=None, 191 | help="Continue a previous in result_dir") 192 | parser.add_argument('--samples', default=10, type=int, 193 | help="Number of training samples (default: 10)") 194 | parser.add_argument('--batchsize', default=100, type=int, 195 | help="Mini batch size (default: 100)") 196 | parser.add_argument('--sleep-interleave', '--si', default=2, type=int, 197 | help="Sleep interleave (default: 2)") 198 | parser.add_argument('--lr', default=1e-4, type=float, help="Learning rate (default: 1e-4)") 199 | parser.add_argument('--lr_p', default=None, type=float, help="p learning rate") 200 | parser.add_argument('--lr_q', default=None, type=float, help="wake-q-learing rate") 201 | parser.add_argument('--lr_s', default=None, type=float, help="sleep-q-learning rate") 202 | parser.add_argument('--ldiscount', default=1., type=float, help="layer_discount") 203 | parser.add_argument('--lookahead', default=10, type=int, 204 | help="Termination criteria: # epochs without LL increase") 205 | parser.add_argument('p_model', default="SBN", 206 | help="SBN, DARN or NADE (default: SBN") 207 | parser.add_argument('q_model', default="SBN", 208 | help="SBN, DARN or NADE (default: SBN") 209 | parser.add_argument('layer_sizes', default="500,300,100,50,10", 210 | help="Comma seperated list of sizes. Layer cosest to the data comes first") 211 | args = parser.parse_args() 212 | 213 | FORMAT = '[%(asctime)s] %(name)-15s %(message)s' 214 | DATEFMT = "%H:%M:%S" 215 | logging.basicConfig(format=FORMAT, datefmt=DATEFMT, level=logging.INFO) 216 | 217 | run_experiment(args) 218 | -------------------------------------------------------------------------------- /caltech/sbn-nade.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | 4 | from learning.dataset import CalTechSilhouettes 5 | from learning.termination import LogLikelihoodIncrease, EarlyStopping 6 | from learning.monitor import MonitorLL, DLogModelParams, SampleFromP 7 | from learning.training import Trainer 8 | 9 | from learning.models.rws import LayerStack 10 | from learning.models.sbn import SBN, SBNTop 11 | from learning.models.darn import DARN, DARNTop 12 | from learning.models.nade import NADE, NADETop 13 | 14 | n_vis = 28*28 15 | 16 | dataset = CalTechSilhouettes(which_set='train') 17 | valiset = CalTechSilhouettes(which_set='valid') 18 | testset = CalTechSilhouettes(which_set='test') 19 | 20 | p_layers=[ 21 | SBN( 22 | n_X=n_vis, 23 | n_Y=300, 24 | ), 25 | SBN( 26 | n_X=300, 27 | n_Y=100, 28 | ), 29 | SBN( 30 | n_X=100, 31 | n_Y=50, 32 | ), 33 | SBN( 34 | n_X=50, 35 | n_Y=10, 36 | ), 37 | SBNTop( 38 | n_X=10, 39 | ) 40 | ] 41 | 42 | q_layers=[ 43 | NADE( 44 | n_Y=n_vis, 45 | n_X=300, 46 | ), 47 | NADE( 48 | n_Y=300, 49 | n_X=100, 50 | ), 51 | NADE( 52 | n_Y=100, 53 | n_X=50, 54 | ), 55 | NADE( 56 | n_Y=50, 57 | n_X=10, 58 | ) 59 | ] 60 | 61 | model = LayerStack( 62 | p_layers=p_layers, 63 | q_layers=q_layers, 64 | ) 65 | 66 | trainer = Trainer( 67 | n_samples=5, 68 | learning_rate_p=1e-3, 69 | learning_rate_q=1e-3, 70 | learning_rate_s=1e-3, 71 | layer_discount=1.0, 72 | batch_size=25, 73 | dataset=dataset, 74 | model=model, 75 | termination=EarlyStopping(), 76 | #step_monitors=[MonitorLL(data=smallset, n_samples=[1, 5, 25, 100])], 77 | epoch_monitors=[ 78 | DLogModelParams(), 79 | MonitorLL(name="valiset", data=valiset, n_samples=[1, 5, 25, 100]), 80 | SampleFromP(n_samples=100) 81 | ], 82 | final_monitors=[ 83 | MonitorLL(name="final-valiset", data=valiset, n_samples=[1, 5, 25, 100, 500, 1000]), 84 | MonitorLL(name="final-testset", data=testset, n_samples=[1, 5, 25, 100, 500, 1000]), 85 | ], 86 | monitor_nth_step=100, 87 | ) 88 | -------------------------------------------------------------------------------- /caltech/sbn-sbn.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | 4 | from learning.dataset import CalTechSilhouettes 5 | from learning.termination import LogLikelihoodIncrease, EarlyStopping 6 | from learning.monitor import MonitorLL, DLogModelParams, SampleFromP 7 | from learning.monitor.bootstrap import BootstrapLL 8 | from learning.training import Trainer 9 | 10 | from learning.models.rws import LayerStack 11 | from learning.models.sbn import SBN, SBNTop 12 | from learning.models.darn import DARN, DARNTop 13 | from learning.models.nade import NADE, NADETop 14 | 15 | n_vis = 28*28 16 | 17 | dataset = CalTechSilhouettes(which_set='train') 18 | valiset = CalTechSilhouettes(which_set='valid') 19 | testset = CalTechSilhouettes(which_set='test') 20 | 21 | p_layers=[ 22 | SBN( 23 | n_X=n_vis, 24 | n_Y=500, 25 | ), 26 | SBN( 27 | n_X=500, 28 | n_Y=300, 29 | ), 30 | SBN( 31 | n_X=300, 32 | n_Y=100, 33 | ), 34 | SBN( 35 | n_X=100, 36 | n_Y=50, 37 | ), 38 | SBN( 39 | n_X=50, 40 | n_Y=10, 41 | ), 42 | SBNTop( 43 | n_X=10, 44 | ) 45 | ] 46 | 47 | q_layers=[ 48 | SBN( 49 | n_Y=n_vis, 50 | n_X=500, 51 | ), 52 | SBN( 53 | n_Y=500, 54 | n_X=300, 55 | ), 56 | SBN( 57 | n_Y=300, 58 | n_X=100, 59 | ), 60 | SBN( 61 | n_Y=100, 62 | n_X=50, 63 | ), 64 | SBN( 65 | n_Y=50, 66 | n_X=10, 67 | ) 68 | ] 69 | 70 | model = LayerStack( 71 | p_layers=p_layers, 72 | q_layers=q_layers, 73 | ) 74 | 75 | trainer = Trainer( 76 | n_samples=5, 77 | learning_rate_p=1e-3, 78 | learning_rate_q=1e-3, 79 | learning_rate_s=1e-3, 80 | layer_discount=1.0, 81 | batch_size=100, 82 | dataset=dataset, 83 | model=model, 84 | termination=EarlyStopping(), 85 | #step_monitors=[MonitorLL(data=smallset, n_samples=[1, 5, 25, 100])], 86 | epoch_monitors=[ 87 | DLogModelParams(), 88 | MonitorLL(name="valiset", data=valiset, n_samples=[1, 5, 25, 100]), 89 | SampleFromP(n_samples=100) 90 | ], 91 | final_monitors=[ 92 | MonitorLL(name="final-valiset", data=valiset, n_samples=[1, 5, 25, 100, 500, 1000]), 93 | MonitorLL(name="final-testset", data=testset, n_samples=[1, 5, 25, 100, 500, 1000]), 94 | ], 95 | monitor_nth_step=100, 96 | ) 97 | -------------------------------------------------------------------------------- /data/download.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # 3 | 4 | DOWN=wget 5 | #DOWN=curl 6 | 7 | $DOWN http://www.capsec.org/datasets/adult.h5 8 | $DOWN http://www.capsec.org/datasets/connect4.h5 9 | $DOWN http://www.capsec.org/datasets/dna.h5 10 | $DOWN http://www.capsec.org/datasets/mnist.pkl.gz 11 | $DOWN http://www.capsec.org/datasets/mnist_salakhutdinov.pkl.gz 12 | $DOWN http://www.capsec.org/datasets/mushrooms.h5 13 | $DOWN http://www.capsec.org/datasets/ocr_letters.h5 14 | $DOWN http://www.capsec.org/datasets/rcv1.h5 15 | $DOWN http://www.capsec.org/datasets/web.h5 16 | 17 | -------------------------------------------------------------------------------- /learning/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jbornschein/reweighted-ws/e96414719d09ab4941dc77bab4cf4847acc6a8e7/learning/__init__.py -------------------------------------------------------------------------------- /learning/dataset.py: -------------------------------------------------------------------------------- 1 | """ 2 | This file exist for backward compatibility and presets various 3 | classes under their old name. 4 | """ 5 | 6 | from __future__ import division 7 | 8 | from learning.datasets import * 9 | from learning.datasets.mnist import MNIST 10 | from learning.datasets.caltech import CalTechSilhouettes 11 | -------------------------------------------------------------------------------- /learning/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Classes representing datasets. 3 | """ 4 | 5 | from __future__ import division 6 | 7 | import os 8 | import abc 9 | import logging 10 | import cPickle as pickle 11 | import os.path as path 12 | import gzip 13 | import h5py 14 | 15 | import numpy as np 16 | 17 | import theano 18 | import theano.tensor as T 19 | 20 | from learning.preproc import Preproc 21 | 22 | _logger = logging.getLogger(__name__) 23 | 24 | floatX = theano.config.floatX 25 | 26 | #----------------------------------------------------------------------------- 27 | 28 | def datapath(fname): 29 | """ Try to find *fname* in the dataset directory and return 30 | a absolute path. 31 | """ 32 | candidates = [ 33 | path.abspath(path.join(path.dirname(__file__), "../../data")), 34 | path.abspath("."), 35 | path.abspath("data"), 36 | ] 37 | if 'DATASET_PATH' in os.environ: 38 | candidates.append(os.environ['DATASET_PATH']) 39 | 40 | for c in candidates: 41 | c = path.join(c, fname) 42 | if path.exists(c): 43 | return c 44 | 45 | raise IOError("Could not find %s" % fname) 46 | 47 | #----------------------------------------------------------------------------- 48 | # Dataset base class 49 | 50 | class DataSet(object): 51 | __metaclass__ = abc.ABCMeta 52 | 53 | def __init__(self, preproc=[]): 54 | self._preprocessors = [] 55 | self.add_preproc(preproc) 56 | 57 | def add_preproc(self, preproc): 58 | """ Add the given preprocessors to the list of preprocessors to be used 59 | 60 | Parameters 61 | ---------- 62 | preproc : {Preproc, list of Preprocessors} 63 | """ 64 | if isinstance(preproc, Preproc): 65 | preproc = [preproc,] 66 | 67 | for p in preproc: 68 | assert isinstance(p, Preproc) 69 | 70 | self._preprocessors += preproc 71 | 72 | 73 | def preproc(self, X, Y): 74 | """ Statically preprocess data. 75 | 76 | Parameters 77 | ---------- 78 | X, Y : ndarray 79 | 80 | Returns 81 | ------- 82 | X, Y : ndarray 83 | """ 84 | for p in self._preprocessors: 85 | X, Y = p.preproc(X, Y) 86 | return X, Y 87 | 88 | def late_preproc(self, X, Y): 89 | """ Preprocess a batch of data 90 | 91 | Parameters 92 | ---------- 93 | X, Y : theano.tensor 94 | 95 | Returns 96 | ------- 97 | X, Y : theano.tensor 98 | """ 99 | for p in self._preprocessors: 100 | X, Y = p.late_preproc(X, Y) 101 | return X, Y 102 | 103 | #----------------------------------------------------------------------------- 104 | 105 | class ToyData(DataSet): 106 | def __init__(self, which_set='train', preproc=[]): 107 | super(ToyData, self).__init__(preproc) 108 | 109 | self.which_set = which_set 110 | 111 | X = np.array( 112 | [[1., 1., 1., 1., 0., 0., 0., 0.], 113 | [0., 0., 0., 0., 1., 1., 1., 1.]], dtype=floatX) 114 | Y = np.array([[1., 0.], [0., 1.]], dtype=floatX) 115 | 116 | if which_set == 'train': 117 | self.X = np.concatenate([X]*10) 118 | self.Y = np.concatenate([Y]*10) 119 | elif which_set == 'valid': 120 | self.X = np.concatenate([X]*2) 121 | self.Y = np.concatenate([Y]*2) 122 | elif which_set == 'test': 123 | self.X = np.concatenate([X]*2) 124 | self.Y = np.concatenate([Y]*2) 125 | else: 126 | raise ValueError("Unknown dataset %s" % which_set) 127 | 128 | self.n_datapoints = self.X.shape[0] 129 | 130 | #----------------------------------------------------------------------------- 131 | 132 | class BarsData(DataSet): 133 | def __init__(self, which_set='train', n_datapoints=1000, D=5, preproc=[]): 134 | super(BarsData, self).__init__(preproc) 135 | 136 | n_vis = D**2 137 | n_hid = 2*D 138 | bar_prob = 1./n_hid 139 | 140 | X = np.zeros((n_datapoints, D, D), dtype=floatX) 141 | Y = (np.random.uniform(size=(n_datapoints, n_hid)) < bar_prob).astype(floatX) 142 | 143 | for n in xrange(n_datapoints): 144 | for d in xrange(D): 145 | if Y[n, d] > 0.5: 146 | X[n, d, :] = 1.0 147 | if Y[n, D+d] > 0.5: 148 | X[n, :, d] = 1.0 149 | 150 | self.X = X.reshape((n_datapoints, n_vis)) 151 | self.Y = Y 152 | self.n_datapoints = n_datapoints 153 | 154 | #----------------------------------------------------------------------------- 155 | 156 | class FromModel(DataSet): 157 | def __init__(self, model, n_datapoints, preproc=[]): 158 | super(FromModel, self).__init__(preproc) 159 | 160 | batch_size = 100 161 | 162 | # Compile a Theano function to draw samples from the model 163 | n_samples = T.iscalar('n_samples') 164 | n_samples.tag.test_value = 10 165 | 166 | X, _ = model.sample_p(n_samples) 167 | 168 | do_sample = theano.function( 169 | inputs=[n_samples], 170 | outputs=X[0], 171 | name='sample_p') 172 | 173 | model.setup() 174 | n_vis = model.n_X 175 | #n_hid = model.n_hid 176 | 177 | X = np.empty((n_datapoints, n_vis), dtype=floatX) 178 | #Y = np.empty((n_datapoints, n_hid), dtype=np.floatX) 179 | 180 | for b in xrange(n_datapoints//batch_size): 181 | first = b*batch_size 182 | last = first + batch_size 183 | X[first:last] = do_sample(batch_size) 184 | remain = n_datapoints % batch_size 185 | if remain > 0: 186 | X[last:] = do_sample(remain) 187 | 188 | self.n_datapoints = n_datapoints 189 | self.X = X 190 | self.Y = None 191 | 192 | #----------------------------------------------------------------------------- 193 | 194 | class FromH5(DataSet): 195 | def __init__(self, fname, n_datapoints=None, offset=0, table_X="X", table_Y="Y"): 196 | """ Load a dataset from an HDF5 file. """ 197 | super(FromH5, self).__init__() 198 | 199 | if not path.exists(fname): 200 | fname = datapath(fname) 201 | 202 | with h5py.File(fname, "r") as h5: 203 | # 204 | if not table_X in h5.keys(): 205 | _logger.error("H5 file %s does not contain a table named %s" % (fname, table_X)) 206 | raise ArgumentError() 207 | 208 | N_total, D = h5[table_X].shape 209 | if n_datapoints is None: 210 | n_datapoints = N_total-offset 211 | 212 | X = h5[table_X][offset:(offset+n_datapoints)] 213 | X = X.astype(floatX) 214 | if table_Y in h5.keys(): 215 | Y = h5[table_Y][offset:(offset+n_datapoints)] 216 | Y = Y.astype(floatX) 217 | else: 218 | Y = None 219 | Y = X[:,0] 220 | 221 | self.X = X 222 | self.Y = Y 223 | self.n_datapoints = self.X.shape[0] 224 | 225 | -------------------------------------------------------------------------------- /learning/datasets/caltech.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | Access to various CalTech datasets. 4 | 5 | """ 6 | 7 | from __future__ import division 8 | 9 | import os 10 | import logging 11 | 12 | import numpy as np 13 | 14 | import theano 15 | import theano.tensor as T 16 | 17 | from learning.datasets import DataSet, datapath 18 | 19 | _logger = logging.getLogger(__name__) 20 | 21 | floatX = theano.config.floatX 22 | 23 | #----------------------------------------------------------------------------- 24 | 25 | class CalTechSilhouettes(DataSet): 26 | def __init__(self, which_set='train', n_datapoints=-1, path="caltech-silhouettes", preproc=[]): 27 | super(CalTechSilhouettes, self).__init__(preproc) 28 | 29 | _logger.info("Loading CalTech 101 Silhouettes data (28x28)") 30 | path = datapath(path) 31 | 32 | test_x = np.load(path+"/test_data.npy") 33 | test_y = np.load(path+"/test_labels.npy") 34 | 35 | if which_set == 'train': 36 | X = np.load(path+"/train_data.npy") 37 | Y = np.load(path+"/train_labels.npy") 38 | elif which_set == 'valid': 39 | X = np.load(path+"/val_data.npy") 40 | Y = np.load(path+"/val_labels.npy") 41 | elif which_set == 'test': 42 | X = np.load(path+"/test_data.npy") 43 | Y = np.load(path+"/test_labels.npy") 44 | else: 45 | raise ValueError("Unknown dataset %s" % which_set) 46 | 47 | if n_datapoints > 0: 48 | X = X[:n_datapoints] 49 | Y = Y[:n_datapoints] 50 | else: 51 | n_datapoints = X.shape[0] 52 | 53 | X = X.astype(floatX) 54 | 55 | self.n_datapoints = n_datapoints 56 | self.X = X 57 | self.Y = Y 58 | 59 | -------------------------------------------------------------------------------- /learning/datasets/mnist.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | Access to the MNIST dataset of handwritten digits. 4 | 5 | """ 6 | 7 | from __future__ import division 8 | 9 | import os 10 | import logging 11 | import cPickle as pickle 12 | import gzip 13 | 14 | import numpy as np 15 | 16 | import theano 17 | import theano.tensor as T 18 | 19 | from learning.datasets import DataSet, datapath 20 | 21 | _logger = logging.getLogger(__name__) 22 | 23 | floatX = theano.config.floatX 24 | 25 | #----------------------------------------------------------------------------- 26 | 27 | class MNIST(DataSet): 28 | def __init__(self, which_set='train', n_datapoints=None, fname="mnist.pkl.gz", preproc=[]): 29 | super(MNIST, self).__init__(preproc) 30 | 31 | _logger.info("Loading MNIST data") 32 | fname = datapath(fname) 33 | 34 | if fname[-3:] == ".gz": 35 | open_func = gzip.open 36 | else: 37 | open_func = open 38 | 39 | with open_func(fname) as f: 40 | (train_x, train_y), (valid_x, valid_y), (test_x, test_y) = pickle.load(f) 41 | 42 | if which_set == 'train': 43 | self.X, self.Y = self.prepare(train_x, train_y, n_datapoints) 44 | elif which_set == 'valid': 45 | self.X, self.Y = self.prepare(valid_x, valid_y, n_datapoints) 46 | elif which_set == 'test': 47 | self.X, self.Y = self.prepare(test_x, test_y, n_datapoints) 48 | elif which_set == 'salakhutdinov_train': 49 | train_x = np.concatenate([train_x, valid_x]) 50 | train_y = np.concatenate([train_y, valid_y]) 51 | self.X, self.Y = self.prepare(train_x, train_y, n_datapoints) 52 | elif which_set == 'salakhutdinov_valid': 53 | train_x = np.concatenate([train_x, valid_x])[::-1] 54 | train_y = np.concatenate([train_y, valid_y])[::-1] 55 | self.X, self.Y = self.prepare(train_x, train_y, n_datapoints) 56 | else: 57 | raise ValueError("Unknown dataset %s" % which_set) 58 | 59 | self.n_datapoints = self.X.shape[0] 60 | 61 | def prepare(self, x, y, n_datapoints): 62 | N = x.shape[0] 63 | assert N == y.shape[0] 64 | 65 | if n_datapoints is not None: 66 | N = n_datapoints 67 | 68 | x = x[:N] 69 | y = y[:N] 70 | 71 | one_hot = np.zeros((N, 10), dtype=floatX) 72 | for n in xrange(N): 73 | one_hot[n, y[n]] = 1. 74 | 75 | return x.astype(floatX), one_hot.astype(floatX) 76 | 77 | -------------------------------------------------------------------------------- /learning/datasets/tests/tests_datasets.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import numpy as np 4 | 5 | from learning.models.rws import LayerStack 6 | from learning.models.sbn import SBN, SBNTop 7 | 8 | # unit under test 9 | from learning.datasets import * 10 | from learning.datasets.mnist import MNIST 11 | from learning.datasets.caltech import CalTechSilhouettes 12 | from learning.datasets.tfd import TorontoFaceDataset 13 | 14 | def skip_check(reason): 15 | raise unittest.SkipTest(reason) 16 | 17 | def check_dtype(d): 18 | assert d.X.dtype == np.float32, "Failed for %s" % d 19 | assert d.X.ndim == 2 20 | #if d.Y is not None: 21 | # assert d.Y.dtype == np.float32, "Failed for %s" % d 22 | # assert d.Y.ndim == 2 23 | 24 | def check_same_N(d): 25 | N = d.n_datapoints 26 | assert d.X.shape[0] == N, "Failed for %s" % d 27 | if d.Y is not None: 28 | assert d.Y.shape[0] == N, "Failed for %s" % d 29 | 30 | def check_range(d): 31 | assert d.X.min() >= 0., "Failed for %s" % d 32 | assert d.X.max() <= 1., "Failed for %s" % d 33 | #if d.Y is not None: 34 | # assert d.Y.min() >= 0., "Failed for %s" % d 35 | # assert d.Y.max() <= 1., "Failed for %s" % d 36 | 37 | #----------------------------------------------------------------------------- 38 | 39 | test_matrix = { 40 | (ToyData, 'train') : (check_dtype, check_same_N, check_range), 41 | (ToyData, 'valid') : (check_dtype, check_same_N, check_range), 42 | (ToyData, 'test' ) : (check_dtype, check_same_N, check_range), 43 | (BarsData, 'train') : (check_dtype, check_same_N, check_range), 44 | (BarsData, 'valid') : (check_dtype, check_same_N, check_range), 45 | (BarsData, 'test' ) : (check_dtype, check_same_N, check_range), 46 | (MNIST, 'train') : (check_dtype, check_same_N, check_range), 47 | (MNIST, 'valid') : (check_dtype, check_same_N, check_range), 48 | (MNIST, 'test' ) : (check_dtype, check_same_N, check_range), 49 | (TorontoFaceDataset, 'train') : (check_dtype, check_same_N, check_range), 50 | (TorontoFaceDataset, 'valid') : (check_dtype, check_same_N, check_range), 51 | (TorontoFaceDataset, 'test' ) : (check_dtype, check_same_N, check_range), 52 | (CalTechSilhouettes, 'train') : (check_dtype, check_same_N, check_range), 53 | (CalTechSilhouettes, 'valid') : (check_dtype, check_same_N, check_range), 54 | (CalTechSilhouettes, 'test' ) : (check_dtype, check_same_N, check_range), 55 | } 56 | 57 | def test_datasets(): 58 | for (ds_class, which_set), tests in test_matrix.iteritems(): 59 | try: 60 | data = ds_class(which_set=which_set) 61 | except IOError: 62 | data = None 63 | 64 | for a_check in tests: 65 | if data is None: 66 | yield skip_check, ("Could not load %s - IOError" % ds_class) 67 | else: 68 | yield a_check, data 69 | 70 | #----------------------------------------------------------------------------- 71 | 72 | def test_FromModel(): 73 | D = 5 74 | n_vis = D**2 75 | n_hid = 2*D 76 | 77 | # Ground truth params 78 | W_bars = np.zeros([n_hid, D, D]) 79 | for d in xrange(D): 80 | W_bars[ d, d, :] = 4. 81 | W_bars[D+d, :, d] = 4. 82 | W_bars = W_bars.reshape( (n_hid, n_vis) ) 83 | P_a = -np.log(D/2-1)*np.ones(n_hid) 84 | P_b = -2*np.ones(n_vis) 85 | 86 | # Instantiate model... 87 | p_layers = [ 88 | SBN( 89 | n_X=n_vis, 90 | n_Y=n_hid, 91 | ), 92 | SBNTop( 93 | n_X=n_hid 94 | ) 95 | ] 96 | q_layers = [ 97 | SBN( 98 | n_X=n_hid, 99 | n_Y=n_vis, 100 | ) 101 | ] 102 | p_layers[0].set_model_param('W', W_bars) 103 | p_layers[0].set_model_param('b', P_b) 104 | p_layers[1].set_model_param('a', P_a) 105 | 106 | model = LayerStack( 107 | p_layers=p_layers, 108 | q_layers=q_layers 109 | ) 110 | 111 | # ...and generate data 112 | n_datapoints = 1000 113 | data = FromModel(model=model, n_datapoints=n_datapoints) 114 | 115 | assert data.X.shape == (n_datapoints, n_vis) 116 | 117 | yield check_dtype, data 118 | yield check_range, data 119 | 120 | -------------------------------------------------------------------------------- /learning/datasets/tfd.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | Access to the Totonto Face Dataset 4 | 5 | """ 6 | 7 | from __future__ import division 8 | 9 | import os 10 | import logging 11 | 12 | import numpy as np 13 | from scipy.io import loadmat 14 | 15 | import theano 16 | import theano.tensor as T 17 | 18 | 19 | from learning.datasets import DataSet, datapath 20 | 21 | _logger = logging.getLogger(__name__) 22 | 23 | floatX = theano.config.floatX 24 | 25 | #----------------------------------------------------------------------------- 26 | 27 | class TorontoFaceDataset(DataSet): 28 | def __init__(self, which_set='train', size=48, fold=0, n_datapoints=-1, path="TFD", preproc=[]): 29 | super(TorontoFaceDataset, self).__init__(preproc) 30 | 31 | _logger.info("Loading Toronto Face Dataset (48x48)") 32 | 33 | fname = datapath(path) 34 | 35 | if size == 48: 36 | fname += "/TFD_48x48.mat" 37 | elif size == 96: 38 | fname += "/TFD_96x96.mat" 39 | else: 40 | raise ValueError("Unknown size %s. Allowerd options 48 or 96." % size) 41 | 42 | assert 0 <= fold and fold <= 4 43 | 44 | # Load dataset 45 | data = loadmat(fname) 46 | 47 | if which_set == 'unlabeled': 48 | idx = (data['folds'][:,fold] == 0) 49 | elif which_set == 'train': 50 | idx = (data['folds'][:,fold] == 1) 51 | elif which_set == 'unlabeled+train': 52 | idx = (data['folds'][:,fold] == 0) 53 | idx += (data['folds'][:,fold] == 1) 54 | elif which_set == 'valid': 55 | idx = (data['folds'][:,fold] == 2) 56 | elif which_set == 'test': 57 | idx = (data['folds'][:,fold] == 3) 58 | else: 59 | raise ValueError("Unknown dataset %s" % which_set) 60 | 61 | X = data['images'][idx,:,:] 62 | #Y = data['labs_id'][idx,:] 63 | 64 | if n_datapoints > 0: 65 | X = X[:n_datapoints] 66 | Y = Y[:n_datapoints] 67 | else: 68 | n_datapoints = X.shape[0] 69 | 70 | # Normalize to 0..1 71 | X = (X / 255.).astype(floatX) 72 | 73 | # Flatten images 74 | X = X.reshape([n_datapoints, -1]) 75 | 76 | self.n_datapoints = n_datapoints 77 | self.X = X 78 | self.Y = None 79 | 80 | -------------------------------------------------------------------------------- /learning/experiment.py: -------------------------------------------------------------------------------- 1 | 2 | from __future__ import division 3 | 4 | import sys 5 | sys.path.append("../lib") 6 | 7 | import logging 8 | import time 9 | import cPickle as pickle 10 | import os 11 | import os.path 12 | import errno 13 | from six import iteritems 14 | from shutil import copyfile 15 | 16 | import numpy as np 17 | import h5py 18 | 19 | import theano 20 | import theano.tensor as T 21 | 22 | from utils.datalog import dlog, StoreToH5, TextPrinter 23 | 24 | from dataset import DataSet 25 | from model import Model 26 | from training import TrainerBase 27 | from termination import Termination 28 | from monitor import DLogModelParams 29 | 30 | class Experiment(object): 31 | @classmethod 32 | def from_param_file(cls, fname): 33 | experiment = cls() 34 | experiment.load_param_file(fname) 35 | return experiment 36 | 37 | @classmethod 38 | def from_results(cls, path, row=-1): 39 | param_fname = path + "/paramfile.py" 40 | results_fname = path + "/results.h5" 41 | 42 | experiment = cls() 43 | experiment.load_param_file(param_fname) 44 | 45 | model = experiment.params['model'] 46 | with h5py.File(results_fname, "r") as h5: 47 | model.model_params_from_h5(h5, row, basekey="mode.") 48 | 49 | return experiment 50 | 51 | #------------------------------------------------------------------------- 52 | 53 | def __init__(self): 54 | self.params = {} 55 | self.param_fname = None 56 | self.out_dir = None 57 | self.logger = logging.getLogger("experiment") 58 | 59 | def load_param_file(self, fname): 60 | self.param_fname = fname 61 | execfile(fname, self.params) 62 | 63 | self.set_trainer(self.params['trainer']) 64 | 65 | def setup_output_dir(self, exp_name=None, with_suffix=True): 66 | if exp_name is None: 67 | # Determine experiment name 68 | if self.param_fname: 69 | exp_name = self.param_fname 70 | else: 71 | exp_name = sys.argv[0] 72 | 73 | if with_suffix: 74 | # Determine suffix 75 | if 'PBS_JOBID' in os.environ: 76 | job_no = os.environ['PBS_JOBID'].split('.')[0] # Job Number 77 | suffix = "j"+job_no 78 | elif 'SLURM_JOBID' in os.environ: 79 | job_no = os.environ['SLURM_JOBID'] 80 | suffix = "j"+job_no 81 | else: 82 | suffix = time.strftime("%Y-%m-%d-%H-%M") 83 | 84 | if not with_suffix: 85 | suffix = "-" 86 | 87 | suffix_counter = 0 88 | dirname = "output/%s.%s" % (exp_name, suffix) 89 | while True: 90 | try: 91 | os.makedirs(dirname) 92 | except OSError, e: 93 | if e.errno != errno.EEXIST: 94 | raise e 95 | suffix_counter += 1 96 | dirname = "output/%s.%s+%d" % (exp_name, suffix, suffix_counter) 97 | else: 98 | break 99 | else: 100 | dirname = "output/%s" % (exp_name) 101 | try: 102 | os.makedirs(dirname) 103 | except OSError, e: 104 | if e.errno != errno.EEXIST: 105 | raise e 106 | 107 | out_dir = dirname+"/" 108 | self.out_dir = out_dir 109 | 110 | if self.param_fname: 111 | copyfile(self.param_fname, os.path.join(self.out_dir, "paramfile.py")) 112 | 113 | def setup_logging(self): 114 | assert self.out_dir 115 | 116 | results_fname = os.path.join(self.out_dir, "results.h5") 117 | dlog.set_handler("*", StoreToH5, results_fname) 118 | 119 | #FORMAT = '[%(asctime)s] %(module)-15s %(message)s' 120 | FORMAT = '[%(asctime)s] %(name)-15s %(message)s' 121 | DATEFMT = "%H:%M:%S" 122 | 123 | formatter = logging.Formatter(FORMAT, DATEFMT) 124 | 125 | logger_fname = os.path.join(self.out_dir, "logfile.txt") 126 | fh = logging.FileHandler(logger_fname) 127 | fh.setLevel(logging.INFO) 128 | fh.setFormatter(formatter) 129 | 130 | root_logger = logging.getLogger("") 131 | root_logger.addHandler(fh) 132 | 133 | def print_summary(self): 134 | logger = self.logger 135 | 136 | logger.info("Parameter file: %s" % self.param_fname) 137 | logger.info("Output directory: %s" % self.out_dir) 138 | logger.info("-- Trainer hyperparameter --") 139 | for k, v in iteritems(self.trainer.get_hyper_params()): 140 | if not isinstance(v, (int, float)): 141 | continue 142 | logger.info(" %20s: %s" % (k, v)) 143 | logger.info("-- Model hyperparameter --") 144 | model = self.trainer.model 145 | 146 | 147 | desc = [str(layer.n_X) for layer in model.p_layers] 148 | logger.info(" %20s: %s" % ("layer sizes", "-".join(desc))) 149 | desc = [str(layer.__class__) for layer in model.p_layers] 150 | logger.info(" %20s: %s" % ("p-layers", " - ".join(desc))) 151 | desc = [str(layer.__class__) for layer in model.q_layers] 152 | logger.info(" %20s: %s" % ("q-layers", " - ".join(desc))) 153 | 154 | 155 | #for pl, ql in zip(model.p_layers[:-1], model.q_layers): 156 | # logger.info(" %s" % l.__class__) 157 | # for k, v in l.get_hyper_params().iteritems(): 158 | # logger.info(" %20s: %s" % (k, v)) 159 | #logger.info("Total runtime: %f4.1 h" % runtime) 160 | 161 | def run_experiment(self): 162 | self.sanity_check() 163 | 164 | self.trainer.load_data() 165 | self.trainer.compile() 166 | 167 | self.trainer.perform_learning() 168 | 169 | def continue_experiment(self, results_h5, row=-1, keep_orig_data=True): 170 | logger = self.logger 171 | self.sanity_check() 172 | 173 | # Never copy these keys from original .h5 174 | skip_orig_keys = ( 175 | "trainer.psleep_L", 176 | "trainer.pstep_L" 177 | ) 178 | 179 | logger.info("Copying results from %s" % results_h5) 180 | with h5py.File(results_h5, "r") as h5: 181 | if keep_orig_data: 182 | for key in h5.keys(): 183 | if key in skip_orig_keys: 184 | continue 185 | n_rows = h5[key].shape[0] 186 | if row > -1: 187 | n_rows = min(n_rows, row) 188 | for r in xrange(n_rows): 189 | dlog.append("orig."+key, h5[key][r]) 190 | 191 | # Identify last row without NaN's 192 | #LL100 = h5['learning.monitor.100.LL'] 193 | #row = max(np.where(np.isfinite(LL100))[0])-1 194 | logger.info("Continuing from row %d" % row) 195 | 196 | self.trainer.load_data() 197 | self.trainer.compile() 198 | self.trainer.model.model_params_from_h5(h5, row=row) 199 | 200 | self.trainer.perform_learning() 201 | 202 | #--------------------------------------------------------------- 203 | def sanity_check(self): 204 | if not isinstance(self.trainer, TrainerBase): 205 | raise ValueError("Trainer not set properly") 206 | 207 | if not any( [isinstance(m, DLogModelParams) for m in self.trainer.epoch_monitors] ): 208 | self.logger.warn("DLogModelParams is not setup as an epoch_monitor. Model parameters wouldn't be saved. Adding default DLogModelParams()") 209 | self.trainer.epoch_monitors += DLogModelParams() 210 | 211 | def set_trainer(self, trainer): 212 | assert isinstance(trainer, TrainerBase) 213 | self.trainer = trainer 214 | 215 | -------------------------------------------------------------------------------- /learning/hyperbase.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from __future__ import division 4 | 5 | import logging 6 | from inspect import isfunction 7 | from collections import OrderedDict 8 | from recordtype import recordtype 9 | from six import iteritems 10 | 11 | import numpy as np 12 | 13 | import theano 14 | import theano.tensor as T 15 | 16 | _logger = logging.getLogger(__name__) 17 | 18 | 19 | #------------------------------------------------------------------------------ 20 | 21 | HyperParam = recordtype('HyperParam', 'value name default help') 22 | 23 | #------------------------------------------------------------------------------ 24 | 25 | class HyperBase(object): 26 | initialized = False 27 | 28 | def __init__(self, **hyper_params): 29 | self._hyper_params = OrderedDict() 30 | self.initialized = True 31 | 32 | def _ensure_init(self): 33 | if not self.initialized: 34 | raise ArgumentError("HyperBase base class not initialized yet!" 35 | "Call yperBase.__init__() before doing anything else!") 36 | 37 | def register_hyper_param(self, key, default=None, help=None): 38 | self._ensure_init() 39 | if self._hyper_params.has_key(key): 40 | raise ValueError('A hyper parameter named "%s" already exists' % key) 41 | 42 | self._hyper_params[key] = HyperParam(name=key, value=None, default=default, help=help) 43 | 44 | #-------------------------------------------------------------------------- 45 | 46 | def get_hyper_param(self, key): 47 | """ Return the value of a predefined hyper parameter. """ 48 | param = self._hyper_params.get(key, None) 49 | if param is None: 50 | raise ValueError('Trying to access unknown hyper parameter "%s"' % key) 51 | if param.value is None: 52 | if isfunction(param.default): 53 | self.set_hyper_param(key, param.default()) 54 | else: 55 | self.set_hyper_param(key, param.default) 56 | return param.value 57 | 58 | def get_hyper_params(self, keys=None): 59 | """ """ 60 | if keys is None: 61 | keys = self._hyper_params.keys() 62 | return {k: self.get_hyper_param(k) for k in keys} 63 | else: 64 | return [self.get_hyper_param(k) for k in keys] 65 | 66 | def set_hyper_param(self, key, val=None): 67 | param = self._hyper_params.get(key, None) 68 | if param is None: 69 | raise ValueError('Trying to set unknown hyper parameter "%s"' % key) 70 | param.value = val 71 | 72 | def set_hyper_params(self, d): 73 | for key, val in iteritems(d): 74 | self.set_hyper_param(key, val) 75 | 76 | #------------------------------------------------------------------------ 77 | 78 | def __getattr__(self, name): 79 | if not self.initialized: 80 | raise AttributeError("'%s' object has no attribute '%s'" % (repr(self), name)) 81 | 82 | if name in self._hyper_params: 83 | return self.get_hyper_param(name) 84 | raise AttributeError("'%s' object has no attribute '%s'" % (repr(self), name)) 85 | 86 | def __setattr__(self, name, value): 87 | if not self.initialized: 88 | return object.__setattr__(self, name, value) 89 | 90 | if name in self._hyper_params: 91 | return self.set_hyper_param(name, value) 92 | return object.__setattr__(self, name, value) 93 | 94 | #------------------------------------------------------------------------------ 95 | 96 | -------------------------------------------------------------------------------- /learning/model.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from __future__ import division 4 | 5 | import logging 6 | from six import iteritems 7 | from inspect import isfunction 8 | from collections import OrderedDict 9 | from recordtype import recordtype 10 | 11 | import numpy as np 12 | 13 | import theano 14 | import theano.tensor as T 15 | 16 | _logger = logging.getLogger(__name__) 17 | 18 | floatX = theano.config.floatX 19 | 20 | #------------------------------------------------------------------------------ 21 | 22 | def default_weights(n_in, n_out): 23 | """ Return a n_in * n_out shaped matrix with uniformly sampled elements 24 | between - and + sqrt(6)/sqrt(n_in+n_out). 25 | """ 26 | scale = np.sqrt(6) / np.sqrt(n_in+n_out) 27 | return scale*(2*np.random.uniform(size=(n_in, n_out))-1) / n_in 28 | 29 | #------------------------------------------------------------------------------ 30 | 31 | HyperParam = recordtype('HyperParam', 'value name default help') 32 | ModelParam = recordtype('ModelParam', 'value name default help') 33 | 34 | #------------------------------------------------------------------------------ 35 | 36 | class Model(object): 37 | initialized = False 38 | 39 | def __init__(self, **hyper_params): 40 | self._model_params = OrderedDict() 41 | self._hyper_params = OrderedDict() 42 | self.initialized = True 43 | 44 | def _ensure_init(self): 45 | if not self.initialized: 46 | raise ValueError("Model base class not initialized yet!" 47 | "Call Model.__init__() before doing anything else!") 48 | 49 | def register_hyper_param(self, key, default=None, help=None): 50 | self._ensure_init() 51 | if self._hyper_params.has_key(key): 52 | raise ValueError('A hyper parameter named "%s" already exists' % key) 53 | if self._model_params.has_key(key): 54 | raise ValueError('A model parameter named "%s" already exists' % key) 55 | 56 | self._hyper_params[key] = HyperParam(name=key, value=None, default=default, help=help) 57 | 58 | def register_model_param(self, key, default=None, help=None): 59 | self._ensure_init() 60 | if self._hyper_params.has_key(key): 61 | raise ValueError('A hyper parameter named "%s" already exists' % key) 62 | if self._model_params.has_key(key): 63 | raise ValueError('A model parameter named "%s" already exists' % key) 64 | 65 | self._model_params[key] = ModelParam(name=key, value=None, default=default, help=help) 66 | 67 | #-------------------------------------------------------------------------- 68 | 69 | def get_hyper_param(self, key): 70 | """ Return the value of a predefined hyper parameter. """ 71 | param = self._hyper_params.get(key, None) 72 | if param is None: 73 | raise ValueError('Trying to access unknown hyper parameter "%s"' % key) 74 | if param.value is None: 75 | if isfunction(param.default): 76 | self.set_hyper_param(key, param.default()) 77 | else: 78 | self.set_hyper_param(key, param.default) 79 | return param.value 80 | 81 | def get_hyper_params(self, keys=None): 82 | """ """ 83 | if keys is None: 84 | keys = self._hyper_params.keys() 85 | return {k: self.get_hyper_param(k) for k in keys} 86 | else: 87 | return [self.get_hyper_param(k) for k in keys] 88 | 89 | def set_hyper_param(self, key, val=None): 90 | param = self._hyper_params.get(key, None) 91 | if param is None: 92 | raise ValueError('Trying to set unknown hyper parameter "%s"' % key) 93 | param.value = val 94 | 95 | def set_hyper_params(self, d): 96 | for key, val in iteritems(d): 97 | self.set_hyper_param(key, val) 98 | 99 | #------------------------------------------------------------------------ 100 | 101 | def get_model_param(self, key): 102 | """ Return the value of a predefined model parameter. """ 103 | param = self._model_params.get(key, None) 104 | if param is None: 105 | raise ValueError('Trying to access unknown model parameter "%s"' % key) 106 | if param.value is None: 107 | if isfunction(param.default): 108 | self.set_model_param(key, param.default()) 109 | else: 110 | self.set_model_param(key, param.default) 111 | return param.value 112 | 113 | def get_model_params(self, keys=None): 114 | """ """ 115 | if keys is None: 116 | keys = self._model_params.keys() 117 | return OrderedDict( [(k, self.get_model_param(k)) for k in keys] ) 118 | else: 119 | return [self.get_model_param(k) for k in keys] 120 | 121 | def set_model_param(self, key, val=None): 122 | param = self._model_params.get(key, None) 123 | if param is None: 124 | raise ValueError('Trying to set unknown model parameter "%s"' % key) 125 | if not isinstance(val, T.sharedvar.SharedVariable): 126 | if not isinstance(val, np.ndarray): 127 | val = np.asarray(val) 128 | if val.dtype == np.float: 129 | val = np.asarray(val, dtype=floatX) 130 | val = theano.shared(val, key) 131 | val.tag.test_value = val 132 | param.value = val 133 | 134 | def set_model_params(self, d): 135 | for key, val in iteritems(d): 136 | self.set_model_param(key, val) 137 | 138 | #------------------------------------------------------------------------ 139 | 140 | def __getattr__(self, name): 141 | if not self.initialized: 142 | raise AttributeError("'%s' object has no attribute '%s'" % (repr(self), name)) 143 | 144 | if name in self._model_params: 145 | return self.get_model_param(name) 146 | if name in self._hyper_params: 147 | return self.get_hyper_param(name) 148 | raise AttributeError("'%s' object has no attribute '%s'" % (repr(self), name)) 149 | 150 | def __setattr__(self, name, value): 151 | if not self.initialized: 152 | return object.__setattr__(self, name, value) 153 | 154 | if name in self._model_params: 155 | return self.set_model_param(name, value) 156 | if name in self._hyper_params: 157 | return self.set_hyper_param(name, value) 158 | return object.__setattr__(self, name, value) 159 | 160 | #------------------------------------------------------------------------ 161 | 162 | def model_params_from_dlog(self, dlog, row=-1): 163 | """ Load the model params form an open H5 file """ 164 | for key, param in iteritems(self._model_params): 165 | assert isinstance(param, ModelParam) 166 | value = dlog.load(key, row) 167 | shvar = para.value 168 | shvar.set_value(value) 169 | 170 | def model_params_to_dlog(self, dlog): 171 | """ Append all model params to dlog """ 172 | for key, param in iteritems(self._model_params): 173 | assert isinstance(param, HyperParam) 174 | shvar = param.value 175 | value = shvar.get_value() 176 | dlog.append(key, value) 177 | 178 | def hyper_params_from_dlog(self, dlog, row=-1): 179 | """ Load the hyper params form an open H5 file """ 180 | for key, param in iteritems(self._hyper_params): 181 | assert isinstance(param, HyperParam) 182 | value = dlog.load(key, row) 183 | self.set_hyper_param(key, value) 184 | 185 | def hyper_params_to_dlog(self, dlog): 186 | """ Append all hyper params to dlog """ 187 | for key, param in iteritems(self._hyper_params): 188 | assert isinstance(param, ModelParam) 189 | shvar = param.value 190 | value = shvar.get_value() 191 | dlog.append(key, value) 192 | 193 | #------------------------------------------------------------------------------ 194 | 195 | -------------------------------------------------------------------------------- /learning/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jbornschein/reweighted-ws/e96414719d09ab4941dc77bab4cf4847acc6a8e7/learning/models/__init__.py -------------------------------------------------------------------------------- /learning/models/darn.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from __future__ import division 4 | 5 | import logging 6 | 7 | import numpy as np 8 | 9 | import theano 10 | import theano.tensor as T 11 | from theano.printing import Print 12 | 13 | from learning.model import default_weights 14 | from learning.models.rws import TopModule, Module, theano_rng 15 | from learning.utils.unrolled_scan import unrolled_scan 16 | 17 | _logger = logging.getLogger(__name__) 18 | floatX = theano.config.floatX 19 | 20 | class DARNTop(TopModule): 21 | def __init__(self, **hyper_params): 22 | super(DARNTop, self).__init__() 23 | 24 | # Hyper parameters 25 | self.register_hyper_param('n_X', help='no. binary variables') 26 | self.register_hyper_param('unroll_scan', default=1) 27 | 28 | # Model parameters 29 | self.register_model_param('b', help='sigmoid(b)-bias ', default=lambda: np.zeros(self.n_X)) 30 | self.register_model_param('W', help='weights (triangular)', default=lambda: 0.5*default_weights(self.n_X, self.n_X) ) 31 | 32 | self.set_hyper_params(hyper_params) 33 | 34 | def log_prob(self, X): 35 | """ Evaluate the log-probability for the given samples. 36 | 37 | Parameters 38 | ---------- 39 | X: T.tensor 40 | samples from X 41 | 42 | Returns 43 | ------- 44 | log_p: T.tensor 45 | log-probabilities for the samples in X 46 | """ 47 | n_X, = self.get_hyper_params(['n_X']) 48 | b, W = self.get_model_params(['b', 'W']) 49 | 50 | W = T.tril(W, k=-1) 51 | 52 | prob_X = self.sigmoid(T.dot(X, W) + b) 53 | log_prob = X*T.log(prob_X) + (1-X)*T.log(1-prob_X) 54 | log_prob = T.sum(log_prob, axis=1) 55 | 56 | return log_prob 57 | 58 | 59 | def sample(self, n_samples): 60 | """ Sample from this toplevel module and return X ~ P(X), log(P(X)) 61 | 62 | Parameters 63 | ---------- 64 | n_samples: 65 | number of samples to drawn 66 | 67 | Returns 68 | ------- 69 | X: T.tensor 70 | samples from this module 71 | log_p: T.tensor 72 | log-probabilities for the samples returned in X 73 | """ 74 | n_X, = self.get_hyper_params(['n_X']) 75 | b, W = self.get_model_params(['b', 'W']) 76 | 77 | #------------------------------------------------------------------ 78 | 79 | a_init = T.zeros([n_samples, n_X]) + T.shape_padleft(b) 80 | post_init = T.zeros([n_samples], dtype=floatX) 81 | x_init = T.zeros([n_samples], dtype=floatX) 82 | rand = theano_rng.uniform((n_X, n_samples), nstreams=512) 83 | 84 | def one_iter(i, Wi, rand_i, a, X, post): 85 | pi = self.sigmoid(a[:,i]) 86 | xi = T.cast(rand_i <= pi, floatX) 87 | post = post + T.log(pi*xi + (1-pi)*(1-xi)) 88 | a = a + T.outer(xi, Wi) 89 | return a, xi, post 90 | 91 | [a, X, post], updates = unrolled_scan( 92 | fn=one_iter, 93 | sequences=[T.arange(n_X), W, rand], 94 | outputs_info=[a_init, x_init, post_init], 95 | unroll=self.unroll_scan 96 | ) 97 | assert len(updates) == 0 98 | return X.T, post[-1,:] 99 | 100 | 101 | class DARN(Module): 102 | def __init__(self, **hyper_params): 103 | super(DARN, self).__init__() 104 | 105 | # Hyper parameters 106 | self.register_hyper_param('n_X', help='no. binary variables') 107 | self.register_hyper_param('n_Y', help='no. conditioning binary variables') 108 | self.register_hyper_param('unroll_scan', default=1) 109 | 110 | # Model parameters 111 | self.register_model_param('b', help='sigmoid(b)-bias ', default=lambda: np.zeros(self.n_X)) 112 | self.register_model_param('W', help='weights (triangular)', default=lambda: default_weights(self.n_X, self.n_X) ) 113 | self.register_model_param('U', help='cond. weights U', default=lambda: default_weights(self.n_Y, self.n_X) ) 114 | 115 | self.set_hyper_params(hyper_params) 116 | 117 | 118 | def log_prob(self, X, Y): 119 | """ Evaluate the log-probability for the given samples. 120 | 121 | Parameters 122 | ---------- 123 | Y: T.tensor 124 | samples from the upper layer 125 | X: T.tensor 126 | samples from the lower layer 127 | 128 | Returns 129 | ------- 130 | log_p: T.tensor 131 | log-probabilities for the samples in X and Y 132 | """ 133 | n_X, n_Y = self.get_hyper_params(['n_X', 'n_Y']) 134 | b, W, U = self.get_model_params(['b', 'W', 'U']) 135 | 136 | W = T.tril(W, k=-1) 137 | 138 | prob_X = self.sigmoid(T.dot(X, W) + T.dot(Y, U) + T.shape_padleft(b)) 139 | log_prob = X*T.log(prob_X) + (1-X)*T.log(1-prob_X) 140 | log_prob = T.sum(log_prob, axis=1) 141 | 142 | return log_prob 143 | 144 | 145 | def sample(self, Y): 146 | """ Evaluate the log-probability for the given samples. 147 | 148 | Parameters 149 | ---------- 150 | Y: T.tensor 151 | samples from the upper layer 152 | 153 | Returns 154 | ------- 155 | X: T.tensor 156 | samples from the lower layer 157 | log_p: T.tensor 158 | log-probabilities for the samples in X and Y 159 | """ 160 | n_X, n_Y = self.get_hyper_params(['n_X', 'n_Y']) 161 | b, W, U = self.get_model_params(['b', 'W', 'U']) 162 | 163 | batch_size = Y.shape[0] 164 | 165 | #------------------------------------------------------------------ 166 | 167 | a_init = T.dot(Y, U) + T.shape_padleft(b) # shape (batch, n_vis) 168 | post_init = T.zeros([batch_size], dtype=floatX) 169 | x_init = T.zeros([batch_size], dtype=floatX) 170 | rand = theano_rng.uniform((n_X, batch_size), nstreams=512) 171 | 172 | def one_iter(i, Wi, rand_i, a, X, post): 173 | pi = self.sigmoid(a[:,i]) 174 | xi = T.cast(rand_i <= pi, floatX) 175 | post = post + T.log(pi*xi + (1-pi)*(1-xi)) 176 | a = a + T.outer(xi, Wi) 177 | return a, xi, post 178 | 179 | [a, X, post], updates = unrolled_scan( 180 | fn=one_iter, 181 | sequences=[T.arange(n_X), W, rand], 182 | outputs_info=[a_init, x_init, post_init], 183 | unroll=self.unroll_scan 184 | ) 185 | assert len(updates) == 0 186 | return X.T, post[-1,:] 187 | 188 | 189 | -------------------------------------------------------------------------------- /learning/models/dsbn.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from __future__ import division 4 | 5 | import logging 6 | 7 | import numpy as np 8 | 9 | import theano 10 | import theano.tensor as T 11 | from theano.printing import Print 12 | 13 | from learning.model import default_weights 14 | from learning.models.rws import TopModule, Module, theano_rng 15 | 16 | _logger = logging.getLogger(__name__) 17 | floatX = theano.config.floatX 18 | 19 | 20 | class DSBN(Module): 21 | """ SigmoidBeliefNet with a deterministic layers """ 22 | def __init__(self, **hyper_params): 23 | super(DSBN, self).__init__() 24 | 25 | self.register_hyper_param('n_X', help='no. lower-layer binary variables') 26 | self.register_hyper_param('n_Y', help='no. upper-layer binary variables') 27 | 28 | self.register_hyper_param('n_D', help='no. deterministic units') 29 | self.register_hyper_param('non_lin', default='sigmoid', help="nonlinearity for deterministic layer") 30 | 31 | # Sigmoid Belief Layer 32 | self.register_model_param('a', help='deterministic bias', default=lambda: -np.ones(self.n_D)) 33 | self.register_model_param('U', help='deterministic weights', default=lambda: default_weights(self.n_Y, self.n_D) ) 34 | 35 | self.register_model_param('b', help='stochastic bias', default=lambda: -np.ones(self.n_X)) 36 | self.register_model_param('W', help='stochastic weights', default=lambda: default_weights(self.n_D, self.n_X) ) 37 | 38 | self.set_hyper_params(hyper_params) 39 | 40 | 41 | def non_linearity(self, arr): 42 | """ Elementwise non linerity according to self.non_lin """ 43 | non_lin = self.get_hyper_param('non_lin') 44 | 45 | if non_lin == 'tanh': 46 | return T.tanh(arr) 47 | elif self.non_lin == 'sigmoid': 48 | return self.sigmoid(arr) 49 | else: 50 | raise ValueError("Unknown non_lin") 51 | 52 | def setup(self): 53 | if self.n_D is None: 54 | self.n_D = self.n_Y + self.n_X / 2 55 | 56 | def log_prob(self, X, Y): 57 | """ Evaluate the log-probability for the given samples. 58 | 59 | Parameters 60 | ---------- 61 | Y: T.tensor 62 | samples from the upper layer 63 | X: T.tensor 64 | samples from the lower layer 65 | 66 | Returns 67 | ------- 68 | log_p: T.tensor 69 | log-probabilities for the samples in X and Y 70 | """ 71 | U, a = self.get_model_params(['U', 'a']) 72 | W, b = self.get_model_params(['W', 'b']) 73 | 74 | # posterior P(X|Y) 75 | D = self.non_linearity(T.dot(Y, U) + a) 76 | 77 | prob_X = self.sigmoid(T.dot(D, W) + b) 78 | log_prob = X*T.log(prob_X) + (1-X)*T.log(1-prob_X) 79 | log_prob = T.sum(log_prob, axis=1) 80 | 81 | return log_prob 82 | 83 | def sample(self, Y): 84 | """ Given samples from the upper layer Y, sample values from X 85 | and return then together with their log probability. 86 | 87 | Parameters 88 | ---------- 89 | Y: T.tensor 90 | samples from the upper layer 91 | 92 | Returns 93 | ------- 94 | X: T.tensor 95 | samples from the lower layer 96 | log_p: T.tensor 97 | log-posterior for the samples returned in X 98 | """ 99 | n_X, = self.get_hyper_params(['n_X']) 100 | U, a = self.get_model_params(['U', 'a']) 101 | W, b = self.get_model_params(['W', 'b']) 102 | 103 | n_samples = Y.shape[0] 104 | 105 | # sample X given Y 106 | D = self.non_linearity(T.dot(Y, U) + a) 107 | 108 | prob_X = self.sigmoid(T.dot(D, W) + b) 109 | U = theano_rng.uniform((n_samples, n_X), nstreams=512) 110 | X = T.cast(U <= prob_X, dtype=floatX) 111 | 112 | log_prob = X*T.log(prob_X) + (1-X)*T.log(1-prob_X) 113 | log_prob = log_prob.sum(axis=1) 114 | 115 | return X, log_prob 116 | 117 | def sample_expected(self, Y): 118 | """ Given samples from the upper layer Y, return 119 | the probability for the individual X elements 120 | 121 | Parameters 122 | ---------- 123 | Y: T.tensor 124 | samples from the upper layer 125 | 126 | Returns 127 | ------- 128 | X: T.tensor 129 | """ 130 | U, a = self.get_model_params(['U', 'a']) 131 | W, b = self.get_model_params(['W', 'b']) 132 | 133 | D = self.non_linearity(T.dot(Y, U) + a) 134 | prob_X = self.sigmoid(T.dot(D, W) + b) 135 | 136 | return prob_X 137 | 138 | -------------------------------------------------------------------------------- /learning/models/nade.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from __future__ import division 4 | 5 | import logging 6 | 7 | import numpy as np 8 | 9 | import theano 10 | import theano.tensor as T 11 | from theano.printing import Print 12 | 13 | from learning.model import default_weights 14 | from learning.models.rws import TopModule, Module, theano_rng 15 | from learning.utils.unrolled_scan import unrolled_scan 16 | 17 | _logger = logging.getLogger(__name__) 18 | floatX = theano.config.floatX 19 | 20 | #------------------------------------------------------------------------------ 21 | 22 | class NADETop(TopModule): 23 | """ Top Level NADE """ 24 | def __init__(self, **hyper_params): 25 | super(NADETop, self).__init__() 26 | 27 | self.register_hyper_param('n_X', help='no. observed binary variables') 28 | self.register_hyper_param('n_hid', help='no. latent binary variables') 29 | self.register_hyper_param('unroll_scan', default=1) 30 | 31 | self.register_model_param('b', help='visible bias', default=lambda: np.zeros(self.n_X)) 32 | self.register_model_param('c', help='hidden bias' , default=lambda: np.zeros(self.n_hid)) 33 | self.register_model_param('W', help='encoder weights', default=lambda: default_weights(self.n_X, self.n_hid) ) 34 | self.register_model_param('V', help='decoder weights', default=lambda: default_weights(self.n_hid, self.n_X) ) 35 | 36 | self.set_hyper_params(hyper_params) 37 | 38 | def setup(self): 39 | _logger.info("setup") 40 | if self.n_hid is None: 41 | self.n_hid = self.n_X 42 | 43 | def log_prob(self, X): 44 | """ Evaluate the log-probability for the given samples. 45 | 46 | Parameters 47 | ---------- 48 | X: T.tensor 49 | samples from X 50 | 51 | Returns 52 | ------- 53 | log_p: T.tensor 54 | log-probabilities for the samples in X 55 | """ 56 | n_X, n_hid = self.get_hyper_params(['n_X', 'n_hid']) 57 | b, c, W, V = self.get_model_params(['b', 'c', 'W', 'V']) 58 | 59 | batch_size = X.shape[0] 60 | vis = X 61 | 62 | #------------------------------------------------------------------ 63 | 64 | a_init = T.zeros([batch_size, n_hid]) + T.shape_padleft(c) 65 | post_init = T.zeros([batch_size], dtype=floatX) 66 | 67 | def one_iter(vis_i, Wi, Vi, bi, a, post): 68 | hid = self.sigmoid(a) 69 | pi = self.sigmoid(T.dot(hid, Vi) + bi) 70 | post = post + T.log(pi*vis_i + (1-pi)*(1-vis_i)) 71 | a = a + T.outer(vis_i, Wi) 72 | return a, post 73 | 74 | [a, post], updates = unrolled_scan( 75 | fn=one_iter, 76 | sequences=[vis.T, W, V.T, b], 77 | outputs_info=[a_init, post_init], 78 | unroll=self.unroll_scan 79 | ) 80 | assert len(updates) == 0 81 | return post[-1,:] 82 | 83 | def sample(self, n_samples): 84 | """ Sample from this toplevel module and return X ~ P(X), log(P(X)) 85 | 86 | Parameters 87 | ---------- 88 | n_samples: 89 | number of samples to drawn 90 | 91 | Returns 92 | ------- 93 | X: T.tensor 94 | samples from this module 95 | log_p: T.tensor 96 | log-probabilities for the samples returned in X 97 | """ 98 | n_X, n_hid = self.get_hyper_params(['n_X', 'n_hid']) 99 | b, c, W, V = self.get_model_params(['b', 'c', 'W', 'V']) 100 | 101 | #------------------------------------------------------------------ 102 | 103 | a_init = T.zeros([n_samples, n_hid]) + T.shape_padleft(c) 104 | post_init = T.zeros([n_samples], dtype=floatX) 105 | vis_init = T.zeros([n_samples], dtype=floatX) 106 | rand = theano_rng.uniform((n_X, n_samples), nstreams=512) 107 | 108 | def one_iter(Wi, Vi, bi, rand_i, a, vis_i, post): 109 | hid = self.sigmoid(a) 110 | pi = self.sigmoid(T.dot(hid, Vi) + bi) 111 | vis_i = T.cast(rand_i <= pi, floatX) 112 | post = post + T.log(pi*vis_i + (1-pi)*(1-vis_i)) 113 | a = a + T.outer(vis_i, Wi) 114 | return a, vis_i, post 115 | 116 | [a, vis, post], updates = unrolled_scan( 117 | fn=one_iter, 118 | sequences=[W, V.T, b, rand], 119 | outputs_info=[a_init, vis_init, post_init], 120 | unroll=self.unroll_scan 121 | ) 122 | assert len(updates) == 0 123 | return vis.T, post[-1,:] 124 | 125 | #---------------------------------------------------------------------------- 126 | 127 | class NADE(Module): 128 | """ Conditional NADE """ 129 | def __init__(self, **hyper_params): 130 | super(NADE, self).__init__() 131 | 132 | self.register_hyper_param('n_X', help='no. observed binary variables') 133 | self.register_hyper_param('n_Y', help='no. conditioning binary variables') 134 | self.register_hyper_param('n_hid', help='no. latent binary variables') 135 | self.register_hyper_param('unroll_scan', default=1) 136 | 137 | self.register_model_param('b', help='visible bias', default=lambda: np.zeros(self.n_X)) 138 | self.register_model_param('c', help='hidden bias' , default=lambda: np.zeros(self.n_hid)) 139 | self.register_model_param('Ub', help='cond. weights Ub', default=lambda: default_weights(self.n_Y, self.n_X) ) 140 | self.register_model_param('Uc', help='cond. weights Uc', default=lambda: default_weights(self.n_Y, self.n_hid) ) 141 | self.register_model_param('W', help='encoder weights', default=lambda: default_weights(self.n_X, self.n_hid) ) 142 | self.register_model_param('V', help='decoder weights', default=lambda: default_weights(self.n_hid, self.n_X) ) 143 | 144 | self.set_hyper_params(hyper_params) 145 | 146 | def setup(self): 147 | if self.n_hid is None: 148 | self.n_hid = min(self.n_X, self.n_Y) 149 | 150 | def log_prob(self, X, Y): 151 | """ Evaluate the log-probability for the given samples. 152 | 153 | Parameters 154 | ---------- 155 | Y: T.tensor 156 | samples from the upper layer 157 | X: T.tensor 158 | samples from the lower layer 159 | 160 | Returns 161 | ------- 162 | log_p: T.tensor 163 | log-probabilities for the samples in X and Y 164 | """ 165 | n_X, n_Y, n_hid = self.get_hyper_params(['n_X', 'n_Y', 'n_hid']) 166 | b, c, W, V, Ub, Uc = self.get_model_params(['b', 'c', 'W', 'V', 'Ub', 'Uc']) 167 | 168 | batch_size = X.shape[0] 169 | vis = X 170 | cond = Y 171 | 172 | #------------------------------------------------------------------ 173 | b_cond = b + T.dot(cond, Ub) # shape (batch, n_vis) 174 | c_cond = c + T.dot(cond, Uc) # shape (batch, n_hid) 175 | 176 | a_init = c_cond 177 | post_init = T.zeros([batch_size], dtype=floatX) 178 | 179 | def one_iter(vis_i, Wi, Vi, bi, a, post): 180 | hid = self.sigmoid(a) 181 | pi = self.sigmoid(T.dot(hid, Vi) + bi) 182 | post = post + T.log(pi*vis_i + (1-pi)*(1-vis_i)) 183 | a = a + T.outer(vis_i, Wi) 184 | return a, post 185 | 186 | [a, post], updates = unrolled_scan( 187 | fn=one_iter, 188 | sequences=[vis.T, W, V.T, b_cond.T], 189 | outputs_info=[a_init, post_init], 190 | unroll=self.unroll_scan 191 | ) 192 | assert len(updates) == 0 193 | return post[-1,:] 194 | 195 | def sample(self, Y): 196 | """ Evaluate the log-probability for the given samples. 197 | 198 | Parameters 199 | ---------- 200 | Y: T.tensor 201 | samples from the upper layer 202 | 203 | Returns 204 | ------- 205 | X: T.tensor 206 | samples from the lower layer 207 | log_p: T.tensor 208 | log-probabilities for the samples in X and Y 209 | """ 210 | n_X, n_Y, n_hid = self.get_hyper_params(['n_X', 'n_Y', 'n_hid']) 211 | b, c, W, V, Ub, Uc = self.get_model_params(['b', 'c', 'W', 'V', 'Ub', 'Uc']) 212 | 213 | batch_size = Y.shape[0] 214 | cond = Y 215 | 216 | #------------------------------------------------------------------ 217 | b_cond = b + T.dot(cond, Ub) # shape (batch, n_vis) 218 | c_cond = c + T.dot(cond, Uc) # shape (batch, n_hid) 219 | 220 | a_init = c_cond 221 | post_init = T.zeros([batch_size], dtype=floatX) 222 | vis_init = T.zeros([batch_size], dtype=floatX) 223 | rand = theano_rng.uniform((n_X, batch_size), nstreams=512) 224 | 225 | def one_iter(Wi, Vi, bi, rand_i, a, vis_i, post): 226 | hid = self.sigmoid(a) 227 | pi = self.sigmoid(T.dot(hid, Vi) + bi) 228 | vis_i = T.cast(rand_i <= pi, floatX) 229 | post = post + T.log(pi*vis_i + (1-pi)*(1-vis_i)) 230 | a = a + T.outer(vis_i, Wi) 231 | return a, vis_i, post 232 | 233 | [a, vis, post], updates = unrolled_scan( 234 | fn=one_iter, 235 | sequences=[W, V.T, b_cond.T, rand], 236 | outputs_info=[a_init, vis_init, post_init], 237 | unroll=self.unroll_scan 238 | ) 239 | assert len(updates) == 0 240 | return vis.T, post[-1,:] 241 | 242 | -------------------------------------------------------------------------------- /learning/models/sbn.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from __future__ import division 4 | 5 | import logging 6 | 7 | import numpy as np 8 | 9 | import theano 10 | import theano.tensor as T 11 | from theano.printing import Print 12 | 13 | from learning.models.rws import TopModule, Module, theano_rng 14 | from learning.model import default_weights 15 | 16 | _logger = logging.getLogger(__name__) 17 | floatX = theano.config.floatX 18 | 19 | 20 | class SBNTop(TopModule): 21 | """ FactoizedBernoulliTop top layer """ 22 | def __init__(self, **hyper_params): 23 | super(SBNTop, self).__init__() 24 | 25 | # Hyper parameters 26 | self.register_hyper_param('n_X', help='no. binary variables') 27 | 28 | # Model parameters 29 | self.register_model_param('a', help='sigmoid(a) prior', 30 | default=lambda: -np.ones(self.n_X)) 31 | 32 | self.set_hyper_params(hyper_params) 33 | 34 | def log_prob(self, X): 35 | """ Evaluate the log-probability for the given samples. 36 | 37 | Parameters 38 | ---------- 39 | X: T.tensor 40 | samples from X 41 | 42 | Returns 43 | ------- 44 | log_p: T.tensor 45 | log-probabilities for the samples in X 46 | """ 47 | n_X, = self.get_hyper_params(['n_X']) 48 | a, = self.get_model_params(['a']) 49 | 50 | # Calculate log-bernoulli 51 | prob_X = self.sigmoid(a) 52 | log_prob = X*T.log(prob_X) + (1-X)*T.log(1-prob_X) 53 | log_prob = log_prob.sum(axis=1) 54 | 55 | return log_prob 56 | 57 | def sample(self, n_samples): 58 | """ Sample from this toplevel module and return X ~ P(X), log(P(X)) 59 | 60 | Parameters 61 | ---------- 62 | n_samples: 63 | number of samples to drawn 64 | 65 | Returns 66 | ------- 67 | X: T.tensor 68 | samples from this module 69 | log_p: T.tensor 70 | log-probabilities for the samples returned in X 71 | """ 72 | n_X, = self.get_hyper_params(['n_X']) 73 | a, = self.get_model_params(['a']) 74 | 75 | # sample hiddens 76 | prob_X = self.sigmoid(a) 77 | U = theano_rng.uniform((n_samples, n_X), nstreams=512) 78 | X = T.cast(U <= prob_X, dtype=floatX) 79 | 80 | return X, self.log_prob(X) 81 | 82 | #---------------------------------------------------------------------------- 83 | 84 | class SBN(Module): 85 | """ SigmoidBeliefLayer """ 86 | def __init__(self, **hyper_params): 87 | super(SBN, self).__init__() 88 | 89 | self.register_hyper_param('n_X', help='no. lower-layer binary variables') 90 | self.register_hyper_param('n_Y', help='no. upper-layer binary variables') 91 | 92 | # Sigmoid Belief Layer 93 | self.register_model_param('b', help='P lower-layer bias', default=lambda: -np.ones(self.n_X)) 94 | self.register_model_param('W', help='P weights', default=lambda: default_weights(self.n_Y, self.n_X) ) 95 | 96 | self.set_hyper_params(hyper_params) 97 | 98 | def log_prob(self, X, Y): 99 | """ Evaluate the log-probability for the given samples. 100 | 101 | Parameters 102 | ---------- 103 | Y: T.tensor 104 | samples from the upper layer 105 | X: T.tensor 106 | samples from the lower layer 107 | 108 | Returns 109 | ------- 110 | log_p: T.tensor 111 | log-probabilities for the samples in X and Y 112 | """ 113 | W, b = self.get_model_params(['W', 'b']) 114 | 115 | # posterior P(X|Y) 116 | prob_X = self.sigmoid(T.dot(Y, W) + b) 117 | log_prob = X*T.log(prob_X) + (1-X)*T.log(1-prob_X) 118 | log_prob = T.sum(log_prob, axis=1) 119 | 120 | return log_prob 121 | 122 | def sample(self, Y): 123 | """ Given samples from the upper layer Y, sample values from X 124 | and return then together with their log probability. 125 | 126 | Parameters 127 | ---------- 128 | Y: T.tensor 129 | samples from the upper layer 130 | 131 | Returns 132 | ------- 133 | X: T.tensor 134 | samples from the lower layer 135 | log_p: T.tensor 136 | log-posterior for the samples returned in X 137 | """ 138 | n_X, = self.get_hyper_params(['n_X']) 139 | W, b = self.get_model_params(['W', 'b']) 140 | 141 | n_samples = Y.shape[0] 142 | 143 | # sample X given Y 144 | prob_X = self.sigmoid(T.dot(Y, W) + b) 145 | U = theano_rng.uniform((n_samples, n_X), nstreams=512) 146 | X = T.cast(U <= prob_X, dtype=floatX) 147 | 148 | log_prob = X*T.log(prob_X) + (1-X)*T.log(1-prob_X) 149 | log_prob = log_prob.sum(axis=1) 150 | 151 | return X, log_prob 152 | 153 | def sample_expected(self, Y): 154 | """ Given samples from the upper layer Y, return 155 | the probability for the individual X elements 156 | 157 | Parameters 158 | ---------- 159 | Y: T.tensor 160 | samples from the upper layer 161 | 162 | Returns 163 | ------- 164 | X: T.tensor 165 | """ 166 | W, b = self.get_model_params(['W', 'b']) 167 | 168 | prob_X = self.sigmoid(T.dot(Y, W) + b) 169 | 170 | return prob_X 171 | 172 | -------------------------------------------------------------------------------- /learning/models/tests/test_darn.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import numpy as np 4 | 5 | import theano 6 | import theano.tensor as T 7 | 8 | import learning.tests as testing 9 | from test_rws import RWSLayerTest, RWSTopLayerTest 10 | 11 | # Unit Under Test 12 | from learning.models.darn import DARN, DARNTop 13 | 14 | 15 | #----------------------------------------------------------------------------- 16 | 17 | class TestDARNTop(RWSTopLayerTest, unittest.TestCase): 18 | def setUp(self): 19 | self.n_samples = 10 20 | self.layer = DARNTop( 21 | n_X=8, 22 | ) 23 | self.layer.setup() 24 | 25 | 26 | class TestDARN(RWSLayerTest, unittest.TestCase): 27 | def setUp(self): 28 | self.n_samples = 10 29 | self.layer = DARN( 30 | n_X=16, 31 | n_Y=8, 32 | ) 33 | self.layer.setup() 34 | 35 | -------------------------------------------------------------------------------- /learning/models/tests/test_dsbn.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import numpy as np 4 | 5 | import theano 6 | import theano.tensor as T 7 | 8 | from test_rws import RWSLayerTest, RWSTopLayerTest 9 | 10 | # Unit Under Test 11 | from learning.models.dsbn import DSBN 12 | 13 | #----------------------------------------------------------------------------- 14 | 15 | class TestDSBN(RWSLayerTest, unittest.TestCase): 16 | def setUp(self): 17 | self.n_samples = 10 18 | self.layer = DSBN( 19 | n_X=16, 20 | n_Y=8, 21 | n_D=12, 22 | ) 23 | self.layer.setup() 24 | -------------------------------------------------------------------------------- /learning/models/tests/test_model.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import numpy as np 4 | 5 | from collections import OrderedDict 6 | 7 | # Unit Under Test 8 | from learning.model import * 9 | 10 | class ExampleModel(Model): 11 | def __init__(self, **hyper_params): 12 | super(ExampleModel, self).__init__(**hyper_params) 13 | 14 | self.register_hyper_param("hyper_a") 15 | self.register_hyper_param("hyper_b", default=23) 16 | self.register_hyper_param("hyper_c", default=lambda: 2*21, help="help") 17 | 18 | self.register_model_param("model_a") 19 | self.register_model_param("model_b") 20 | self.register_model_param("model_c", help="help") 21 | 22 | self.set_hyper_params(hyper_params) 23 | 24 | 25 | def test_constructor(): 26 | model = ExampleModel(hyper_a=0) 27 | 28 | hyper_a = model.get_hyper_param('hyper_a') 29 | assert hyper_a == 0 30 | 31 | def test_hyper_defaults(): 32 | model = ExampleModel() 33 | 34 | assert model.get_hyper_param('hyper_b') == 23, model.get_hyper_param('hyper_b') 35 | assert model.get_hyper_param('hyper_c') == 42, model.get_hyper_param('hyper_c') 36 | 37 | def test_hyper_setget(): 38 | model = ExampleModel() 39 | 40 | model.set_hyper_param('hyper_b', 1) 41 | model.set_hyper_param('hyper_c', 2) 42 | assert model.get_hyper_param('hyper_b') == 1 43 | assert model.get_hyper_params(['hyper_b', 'hyper_c']) == [1, 2] 44 | 45 | model.set_hyper_params({'hyper_b': 23, 'hyper_c': 42}) 46 | assert model.get_hyper_param('hyper_b') == 23 47 | assert model.get_hyper_params(['hyper_b', 'hyper_c']) == [23, 42] 48 | 49 | def test_model_setget(): 50 | model = ExampleModel() 51 | 52 | model.set_model_param('model_b', 1) 53 | model.set_model_param('model_c', 2) 54 | assert model.get_model_param('model_b').get_value() == 1 55 | assert model.get_model_param('model_c').get_value() == 2 56 | 57 | model.set_model_params({'model_b': 23, 'model_c': 42}) 58 | assert model.get_model_param('model_b').get_value() == 23 59 | #assert model.get_model_params(['model_b', 'model_c']) == [23, 42] 60 | 61 | def test_get_all_model_params(): 62 | model = ExampleModel() 63 | 64 | model.set_model_param('model_a', 1) 65 | all_params = model.get_model_params() 66 | 67 | assert type(all_params) == OrderedDict 68 | assert len(all_params) == 3 69 | 70 | def test_hyper_attr(): 71 | model = ExampleModel() 72 | 73 | 74 | assert model.hyper_b == 23 75 | assert model.hyper_c == 42 76 | 77 | model.hyper_a = 11 78 | assert model.hyper_a == 11 79 | 80 | def test_model_attr(): 81 | model = ExampleModel() 82 | 83 | model.model_a = 23.5 84 | assert np.allclose(model.model_a.get_value(), 23.5) 85 | -------------------------------------------------------------------------------- /learning/models/tests/test_nade.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import numpy as np 4 | 5 | import theano 6 | import theano.tensor as T 7 | 8 | from test_rws import RWSLayerTest, RWSTopLayerTest 9 | 10 | # Unit Under Test 11 | from learning.models.nade import NADE, NADETop 12 | 13 | 14 | #----------------------------------------------------------------------------- 15 | 16 | class TestNADETop(RWSTopLayerTest, unittest.TestCase): 17 | def setUp(self): 18 | self.n_samples = 10 19 | self.layer = NADETop( 20 | n_X=8, 21 | n_hid=8, 22 | ) 23 | self.layer.setup() 24 | 25 | class TestNADE(RWSLayerTest, unittest.TestCase): 26 | def setUp(self): 27 | self.n_samples = 10 28 | self.layer = NADE( 29 | n_X=16, 30 | n_Y=8, 31 | n_hid=8, 32 | ) 33 | self.layer.setup() 34 | -------------------------------------------------------------------------------- /learning/models/tests/test_rws.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import numpy as np 4 | 5 | import theano 6 | import theano.tensor as T 7 | 8 | import learning.tests as testing 9 | 10 | # Unit Under Test 11 | from learning.models.rws import * 12 | from learning.models.sbn import SBN, SBNTop 13 | 14 | #----------------------------------------------------------------------------- 15 | 16 | class RWSTopLayerTest(object): 17 | def test_basic_log_prob(self): 18 | n_samples = self.n_samples 19 | layer = self.layer 20 | 21 | X, X_ = testing.fmatrix( (n_samples, layer.n_X), name="X") 22 | 23 | log_prob = layer.log_prob(X) 24 | do_log_prob = theano.function([X], log_prob, name="log_prob") 25 | 26 | log_prob_ = do_log_prob(X_) 27 | assert log_prob_.shape == (n_samples,) 28 | assert not np.isnan(log_prob_).any() 29 | 30 | def test_basic_sample(self): 31 | n_samples = self.n_samples 32 | layer = self.layer 33 | 34 | X, log_prob = layer.sample(n_samples) 35 | do_sample_p = theano.function([], [X, log_prob], name="sample") 36 | 37 | X_, log_prob_ = do_sample_p() 38 | assert X_.shape == (n_samples, layer.n_X ) 39 | assert log_prob_.shape == (n_samples,) 40 | assert not np.isnan(log_prob_).any() 41 | 42 | def test_sample_expected(self): 43 | n_samples = self.n_samples 44 | layer = self.layer 45 | 46 | if getattr(layer, "sample_expected", None) is None: 47 | raise unittest.SkipTest("sample_expected not implemented") 48 | 49 | X, log_prob = layer.sample_expected(n_samples) 50 | do_sample_p = theano.function([], [X, log_prob], name="sample_expected") 51 | 52 | X_, log_prob_ = do_sample_p() 53 | assert X_.shape == (n_samples, layer.n_X ) 54 | assert log_prob_.shape == (n_samples,) 55 | assert not np.isnan(log_prob_).any() 56 | 57 | 58 | 59 | class RWSLayerTest(object): 60 | def test_basic_log_prob(self): 61 | n_samples = self.n_samples 62 | layer = self.layer 63 | 64 | X, X_ = testing.fmatrix( (n_samples, layer.n_X), name="X") 65 | Y, Y_ = testing.fmatrix( (n_samples, layer.n_Y), name="H") 66 | 67 | log_prob = layer.log_prob(X, Y) 68 | do_log_prob = theano.function([X, Y], log_prob, name="log_prob") 69 | 70 | log_prob_ = do_log_prob(X_, Y_) 71 | assert log_prob_.shape == (n_samples,) 72 | assert not np.isnan(log_prob_).any() 73 | 74 | 75 | def test_basic_sample(self): 76 | n_samples = self.n_samples 77 | layer = self.layer 78 | 79 | Y, Y_ = testing.fmatrix( (n_samples, layer.n_Y), name="Y") 80 | 81 | X, log_prob = layer.sample(Y) 82 | do_sample_p = theano.function([Y], [X, log_prob], name="sample") 83 | 84 | X_, log_prob_ = do_sample_p(Y_) 85 | assert X_.shape == (n_samples, layer.n_X ) 86 | assert log_prob_.shape == (n_samples,) 87 | assert not np.isnan(log_prob_).any() 88 | 89 | 90 | def test_sample_expected(self): 91 | n_samples = self.n_samples 92 | layer = self.layer 93 | 94 | if getattr(layer, "sample_expected", None) is None: 95 | raise unittest.SkipTest("sample_expected not implemented") 96 | 97 | Y, Y_ = testing.fmatrix( (n_samples, layer.n_Y), name="Y") 98 | 99 | X, log_prob = layer.sample(Y) 100 | do_sample_p = theano.function([Y], [X, log_prob], name="sample") 101 | 102 | X_, log_prob_ = do_sample_p(Y_) 103 | assert X_.shape == (n_samples, layer.n_X ) 104 | assert log_prob_.shape == (n_samples,) 105 | assert not np.isnan(log_prob_).any() 106 | 107 | 108 | #----------------------------------------------------------------------------- 109 | 110 | class TestLayerStack(unittest.TestCase): 111 | n_samples = 25 112 | n_vis = 8 113 | n_hid = 16 114 | n_qhid = 32 115 | 116 | def setUp(self): 117 | p_layers=[ 118 | SBN( 119 | n_X=self.n_vis, 120 | n_Y=self.n_hid, 121 | ), 122 | SBN( 123 | n_X=self.n_hid, 124 | n_Y=self.n_hid, 125 | ), 126 | SBNTop( 127 | n_X=self.n_hid 128 | ) 129 | ] 130 | q_layers=[ 131 | SBN( 132 | n_Y=self.n_vis, 133 | n_X=self.n_hid, 134 | ), 135 | SBN( 136 | n_Y=self.n_hid, 137 | n_X=self.n_hid, 138 | ) 139 | ] 140 | self.stack = LayerStack(p_layers=p_layers, q_layers=q_layers) 141 | self.stack.setup() 142 | 143 | def test_layer_sizes(self): 144 | stack = self.stack 145 | p_layers = stack.p_layers 146 | n_layers = len(p_layers) 147 | 148 | for l in xrange(n_layers-1): 149 | assert p_layers[l].n_Y == p_layers[l+1].n_X 150 | 151 | def test_sample_p(self): 152 | stack = self.stack 153 | 154 | n_samples, n_samples_ = testing.iscalar('n_samples') 155 | X, log_P = stack.sample_p(n_samples=n_samples) 156 | do_sample = theano.function([n_samples], [X[0], log_P], name="do_sample") 157 | 158 | X0_, log_P_ = do_sample(n_samples_) 159 | 160 | assert X0_.shape == (n_samples_, self.n_vis) 161 | assert log_P_.shape == (n_samples_, ) 162 | 163 | def test_log_likelihood(self): 164 | batch_size = 20 165 | stack = self.stack 166 | 167 | X, X_ = testing.fmatrix((batch_size, self.n_vis), 'X') 168 | n_samples, n_samples_ = testing.iscalar('n_samples') 169 | n_samples_ = self.n_samples 170 | 171 | log_PX, w, log_P, log_Q, KL, Hp, Hq = stack.log_likelihood(X, n_samples=n_samples) 172 | do_log_likelihood = theano.function( 173 | [X, n_samples], 174 | [log_PX, log_P, log_Q, w] 175 | ) 176 | 177 | log_PX_, log_P_, log_Q_, w_ = do_log_likelihood(X_, n_samples_) 178 | 179 | print "log_P.shape", log_P_.shape 180 | print "log_Q.shape", log_Q_.shape 181 | print "log_PX.shape", log_PX_.shape 182 | print "w.shape", w_.shape 183 | 184 | assert log_PX_.shape == (batch_size,) 185 | assert log_P_.shape == (batch_size, n_samples_) 186 | assert log_Q_.shape == (batch_size, n_samples_) 187 | assert w_.shape == (batch_size, n_samples_) 188 | 189 | n_layers = len(stack.p_layers) 190 | 191 | assert len(KL) == n_layers 192 | assert len(Hp) == n_layers 193 | assert len(Hq) == n_layers 194 | 195 | 196 | def test_gradients(self): 197 | batch_size = 20 198 | stack = self.stack 199 | n_layers = len(stack.p_layers) 200 | 201 | X, X_ = testing.fmatrix((batch_size, self.n_vis), 'X') 202 | n_samples, n_samples_ = testing.iscalar('n_samples') 203 | n_samples_ = self.n_samples 204 | 205 | lr_p = np.ones(n_layers) 206 | lr_q = np.ones(n_layers) 207 | 208 | log_PX, gradients = stack.get_gradients(X, None, 209 | lr_p=lr_p, lr_q=lr_q, n_samples=n_samples_) 210 | 211 | def test_sleep_gradients(self): 212 | pass 213 | 214 | # def test_ll_grad(self): 215 | 216 | # learning_rate = 1e-3 217 | # batch_size = 20 218 | # stack = self.stack 219 | 220 | # X, X_ = testing.fmatrix((batch_size, self.n_vis), 'X') 221 | # n_samples, n_samples_ = testing.iscalar('n_samples') 222 | # n_samples_ = self.n_samples 223 | 224 | # log_PX, w, log_P, log_Q, KL, Hp, Hq = stack.log_likelihood(X, n_samples=n_samples) 225 | 226 | # cost_p = T.sum(T.sum(log_P*w, axis=1)) 227 | # cost_q = T.sum(T.sum(log_Q*w, axis=1)) 228 | 229 | # updates = OrderedDict() 230 | # for pname, shvar in stack.get_p_params().iteritems(): 231 | # print "Calculating gradient dP/d%s" % pname 232 | # updates[shvar] = T.grad(cost_p, shvar, consider_constant=[w]) 233 | 234 | # for pname, shvar in stack.get_q_params().iteritems(): 235 | # print "Calculating gradient dQ/d%s" % pname 236 | # updates[shvar] = T.grad(cost_q, shvar, consider_constant=[w]) 237 | 238 | 239 | # do_sgd_step = theano.function( 240 | # inputs=[X, n_samples], 241 | # outputs=[log_PX, cost_p, cost_q], 242 | # updates=updates, 243 | # name="sgd_step", 244 | # ) 245 | 246 | # log_PX_, cost_p_, cost_q_, = do_sgd_step(X_, n_samples_) 247 | 248 | # assert log_PX_.shape == (batch_size,) 249 | 250 | #----------------------------------------------------------------------------- 251 | 252 | def test_replicate_batch(): 253 | Av = np.array( [[1., 2., 3.], 254 | [2., 3., 4.]]).astype(np.float32) 255 | A = T.fmatrix('A') 256 | A.tag.test_value = Av 257 | 258 | B = f_replicate_batch(A, 10) 259 | do_replicate = theano.function([A], B, name="f_replicate", allow_input_downcast=True) 260 | Bv = do_replicate(Av) 261 | 262 | assert Bv.shape == (20, 3) 263 | assert Bv[0 , 0] == 1. 264 | assert Bv[10, 0] == 2. 265 | 266 | -------------------------------------------------------------------------------- /learning/models/tests/test_sbn.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import numpy as np 4 | 5 | import theano 6 | import theano.tensor as T 7 | 8 | from test_rws import RWSLayerTest, RWSTopLayerTest 9 | 10 | # Unit Under Test 11 | from learning.models.sbn import SBN, SBNTop 12 | 13 | #----------------------------------------------------------------------------- 14 | 15 | class TestSBNTop(RWSTopLayerTest, unittest.TestCase): 16 | def setUp(self): 17 | self.n_samples = 10 18 | self.layer = SBNTop( 19 | n_X=8 20 | ) 21 | self.layer.setup() 22 | 23 | class TestSBN(RWSLayerTest, unittest.TestCase): 24 | def setUp(self): 25 | self.n_samples = 10 26 | self.layer = SBN( 27 | n_X=16, 28 | n_Y=8, 29 | ) 30 | self.layer.setup() 31 | -------------------------------------------------------------------------------- /learning/monitor/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from __future__ import division 4 | 5 | import abc 6 | import logging 7 | 8 | import numpy as np 9 | 10 | import theano 11 | import theano.tensor as T 12 | 13 | from learning.dataset import DataSet 14 | from learning.model import Model 15 | from learning.hyperbase import HyperBase 16 | import learning.utils.datalog as datalog 17 | 18 | _logger = logging.getLogger("learning.monitor") 19 | 20 | #----------------------------------------------------------------------------- 21 | class Monitor(HyperBase): 22 | """ Abtract base class to monitor stuff """ 23 | __metaclass__ = abc.ABCMeta 24 | 25 | def __init__(self, name=None): 26 | """ 27 | 28 | Parameters 29 | ---------- 30 | name: str 31 | dlog channel to use 32 | """ 33 | if name is None: 34 | name = self.__class__.__name__ 35 | 36 | self.dlog = datalog.getLogger(name) 37 | self.logger = logging.getLogger(name) 38 | 39 | def compile(self): 40 | pass 41 | 42 | def on_init(self, model): 43 | """ Called after the model has been initialized; directly before 44 | the first learning epoch will be performed 45 | """ 46 | pass 47 | 48 | @abc.abstractmethod 49 | def on_iter(self, model): 50 | """ Called whenever a full training epoch has been performed 51 | """ 52 | pass 53 | 54 | 55 | #----------------------------------------------------------------------------- 56 | class DLogHyperParams(Monitor): 57 | def __init__(self, name=None): 58 | if name is None: 59 | name="hyper" 60 | super(DLogHyperParams, self).__init__(name) 61 | 62 | def on_iter(self, model, name=None): 63 | model.hyper_params_to_dlog(self.dlog) 64 | 65 | 66 | #----------------------------------------------------------------------------- 67 | class DLogModelParams(Monitor): 68 | """ 69 | Write all model parameters to a DataLogger called "model_params". 70 | """ 71 | def __init__(self, name=None): 72 | if name is None: 73 | name="model" 74 | super(DLogModelParams, self).__init__(name) 75 | 76 | def on_iter(self, model): 77 | self.logger.info("Saving model parameters") 78 | model.model_params_to_dlog(self.dlog) 79 | 80 | 81 | #----------------------------------------------------------------------------- 82 | class MonitorLL(Monitor): 83 | """ Monitor the LL after each training epoch on an arbitrary 84 | test or validation data set 85 | """ 86 | def __init__(self, data, n_samples, name=None): 87 | super(MonitorLL, self).__init__(name) 88 | 89 | assert isinstance(data, DataSet) 90 | self.dataset = data 91 | 92 | if isinstance(n_samples, int): 93 | n_samples = [n_samples] 94 | self.n_samples = n_samples 95 | 96 | def compile(self, model): 97 | assert isinstance(model, Model) 98 | self.model = model 99 | 100 | dataset = self.dataset 101 | X, Y = dataset.preproc(dataset.X, dataset.Y) 102 | self.X = theano.shared(X, "X") 103 | self.Y = theano.shared(Y, "Y") 104 | 105 | self.logger.info("compiling do_loglikelihood") 106 | n_samples = T.iscalar("n_samples") 107 | batch_idx = T.iscalar("batch_idx") 108 | batch_size = T.iscalar("batch_size") 109 | 110 | first = batch_idx*batch_size 111 | last = first + batch_size 112 | X_batch, Y_batch = dataset.late_preproc(self.X[first:last], self.Y[first:last]) 113 | 114 | log_PX, _, _, _, KL, Hp, Hq = model.log_likelihood(X_batch, n_samples=n_samples) 115 | batch_L = T.sum(log_PX) 116 | batch_L2 = T.sum(log_PX**2) 117 | batch_KL = [T.sum(kl) for kl in KL] 118 | batch_Hp = [T.sum(hp) for hp in Hp] 119 | batch_Hq = [T.sum(hq) for hq in Hq] 120 | 121 | self.do_loglikelihood = theano.function( 122 | inputs=[batch_idx, batch_size, n_samples], 123 | outputs=[batch_L, batch_L2] + batch_KL + batch_Hp + batch_Hq, 124 | name="do_likelihood") 125 | 126 | def on_init(self, model): 127 | self.compile(model) 128 | 129 | def on_iter(self, model): 130 | n_samples = self.n_samples 131 | n_datapoints = self.dataset.n_datapoints 132 | 133 | # 134 | for K in n_samples: 135 | if K <= 10: 136 | batch_size = 100 137 | elif K <= 100: 138 | batch_size = 10 139 | else: 140 | batch_size = 1 141 | 142 | n_layers = len(model.p_layers) 143 | 144 | L = 0 145 | L2 = 0 146 | KL = np.zeros(n_layers) 147 | Hp = np.zeros(n_layers) 148 | Hq = np.zeros(n_layers) 149 | 150 | # Iterate over dataset 151 | for batch_idx in xrange(n_datapoints//batch_size): 152 | outputs = self.do_loglikelihood(batch_idx, batch_size, K) 153 | batch_L , outputs = outputs[0], outputs[1:] 154 | batch_L2, outputs = outputs[0], outputs[1:] 155 | batch_KL, outputs = outputs[:n_layers], outputs[n_layers:] 156 | batch_Hp, outputs = outputs[:n_layers], outputs[n_layers:] 157 | batch_Hq = outputs[:n_layers] 158 | 159 | L += batch_L 160 | L2 += batch_L2 161 | KL += np.array(batch_KL) 162 | Hp += np.array(batch_Hp) 163 | Hq += np.array(batch_Hq) 164 | 165 | L_se = np.sqrt((L2 - (L*L)/n_datapoints) / (n_datapoints - 1)) 166 | L_se *= 1.96 / np.sqrt(n_datapoints) 167 | 168 | L /= n_datapoints 169 | KL /= n_datapoints 170 | Hp /= n_datapoints 171 | Hq /= n_datapoints 172 | 173 | global validation_LL 174 | validation_LL = L 175 | 176 | self.logger.info("(%d datpoints, %d samples): LL=%5.2f +-%3.2f; Hp=%s" % (n_datapoints, K, L, L_se, Hp)) 177 | 178 | prefix = "spl%d." % K 179 | self.dlog.append_all({ 180 | prefix+"LL": L, 181 | prefix+"KL": KL, 182 | prefix+"Hp": Hp, 183 | prefix+"Hq": Hq, 184 | }) 185 | 186 | 187 | #----------------------------------------------------------------------------- 188 | class SampleFromP(Monitor): 189 | """ Draw a number of samples from the P-Model """ 190 | def __init__(self, n_samples=100, data=None): 191 | super(SampleFromP, self).__init__() 192 | 193 | self.n_samples = n_samples 194 | 195 | def compile(self, model): 196 | assert isinstance(model, Model) 197 | 198 | self.logger.info("compiling do_sample") 199 | 200 | n_samples = T.iscalar('n_samples') 201 | n_samples.tag.test_value = self.n_samples 202 | samples, log_p = model.sample_p(n_samples) 203 | 204 | try: 205 | expected_samples = [model.p_layers[0].sample_expected(samples[1])] 206 | self.support_sample_expected = True 207 | except: 208 | expected_samples = [] 209 | self.support_sample_expected = False 210 | 211 | self.do_sample = theano.function( 212 | inputs=[n_samples], 213 | outputs=[log_p] + samples + expected_samples, 214 | name="do_sample") 215 | 216 | def on_init(self, model): 217 | self.compile(model) 218 | 219 | def on_iter(self, model): 220 | n_samples = self.n_samples 221 | n_layers = len(model.p_layers) 222 | 223 | self.logger.info("SampleFromP(n_samples=%d)" % n_samples) 224 | 225 | outputs = self.do_sample(n_samples) 226 | log_p, outputs = outputs[0], outputs[1:] 227 | samples, outputs = outputs[0:n_layers], outputs[n_layers:] 228 | 229 | self.dlog.append("log_p", log_p) 230 | for l in xrange(n_layers): 231 | prefix = "L%d" % l 232 | self.dlog.append(prefix, samples[l]) 233 | 234 | if self.support_sample_expected: 235 | expected_samples = outputs[0] 236 | self.dlog.append("L0_expected", expected_samples) 237 | 238 | -------------------------------------------------------------------------------- /learning/monitor/bootstrap.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from __future__ import division 4 | 5 | import abc 6 | import logging 7 | 8 | import numpy as np 9 | 10 | import theano 11 | import theano.tensor as T 12 | 13 | from learning.dataset import DataSet 14 | from learning.model import Model 15 | from learning.monitor import Monitor 16 | from learning.models.rws import f_replicate_batch, f_logsumexp 17 | import learning.utils.datalog as datalog 18 | 19 | from theano.tensor.shared_randomstreams import RandomStreams 20 | 21 | floatX = theano.config.floatX 22 | theano_rng = RandomStreams(seed=2341) 23 | 24 | _logger = logging.getLogger(__name__) 25 | 26 | #----------------------------------------------------------------------------- 27 | def batch_bootstrap(data, bootstrap_size, n_bootstraps, bootstrap_func): 28 | """ 29 | """ 30 | def scan_func(prev_res, prev_res2, data, bootstrap_size): 31 | high = data.shape[1] 32 | idx = theano_rng.random_integers(size=(bootstrap_size,), low=0, high=(high-1)) 33 | data_ = data[:,idx] 34 | 35 | res = bootstrap_func(data_) 36 | 37 | # Reduce 38 | next_res = prev_res + T.sum(res) 39 | next_res2 = prev_res2 + T.sum(res**2) 40 | return next_res, next_res2 41 | #return T.sum(res), T.sum(res**2) 42 | 43 | result, updates = theano.scan(fn=scan_func, 44 | outputs_info=[0., 0.], 45 | non_sequences=[data, bootstrap_size], 46 | n_steps=n_bootstraps, 47 | ) 48 | 49 | res, res2 = result 50 | return res[-1], res2[-1] 51 | 52 | #----------------------------------------------------------------------------- 53 | 54 | class BootstrapLL(Monitor): 55 | """ Monitor the LL after each training epoch on an arbitrary 56 | test or validation data set 57 | """ 58 | def __init__(self, data, n_samples, n_bootstraps=None, name=None): 59 | super(BootstrapLL, self).__init__(name) 60 | 61 | assert isinstance(data, DataSet) 62 | self.dataset = data 63 | 64 | if isinstance(n_samples, int): 65 | n_samples = [n_samples] 66 | 67 | self.n_samples = n_samples 68 | self.max_samples = max(n_samples) 69 | 70 | # n_bootstraps 71 | if n_bootstraps is None: 72 | n_bootstraps = int(self.max_samples) 73 | self.n_bootstraps = n_bootstraps 74 | 75 | # max_samples 76 | if self.max_samples <= 10: 77 | self.batch_size = 100 78 | elif self.max_samples <= 100: 79 | self.batch_size = 10 80 | else: 81 | self.batch_size = 1 82 | 83 | 84 | def compile(self, model): 85 | assert isinstance(model, Model) 86 | self.model = model 87 | 88 | p_layers = model.p_layers 89 | q_layers = model.q_layers 90 | n_layers = len(p_layers) 91 | 92 | dataset = self.dataset 93 | X, Y = dataset.preproc(dataset.X, dataset.Y) 94 | self.X = theano.shared(X, "X") 95 | self.Y = theano.shared(Y, "Y") 96 | 97 | batch_idx = T.iscalar('batch_idx') 98 | n_bootstraps = T.iscalar('n_bootstraps') 99 | batch_size = self.batch_size 100 | n_samples = self.n_samples 101 | max_samples = self.max_samples 102 | 103 | self.logger.info("compiling do_loglikelihood") 104 | 105 | first = batch_idx*batch_size 106 | last = first + batch_size 107 | 108 | X_batch, Y_batch = dataset.late_preproc(self.X[first:last], self.Y[first:last]) 109 | X_batch = f_replicate_batch(X_batch, max_samples) 110 | samples, log_p, log_q = model.sample_q(X_batch) 111 | 112 | # Reshape and sum 113 | log_p_all = T.zeros((batch_size, max_samples)) 114 | log_q_all = T.zeros((batch_size, max_samples)) 115 | for l in xrange(n_layers): 116 | samples[l] = samples[l].reshape((batch_size, max_samples, p_layers[l].n_X)) 117 | log_p[l] = log_p[l].reshape((batch_size, max_samples)) 118 | log_q[l] = log_q[l].reshape((batch_size, max_samples)) 119 | log_p_all += log_p[l] # agregate all layers 120 | log_q_all += log_q[l] # agregate all layers 121 | log_pq = log_p_all - log_q_all 122 | 123 | def bootstrap_func(log_pq): 124 | # log_pg has shape (batch_size, samples) 125 | K = log_pq.shape[1] 126 | #K = 1 127 | log_px = f_logsumexp(log_pq, axis=1) - T.cast(T.log(K), 'float32') 128 | return log_px 129 | 130 | outputs = [] 131 | for bootstrap_size in n_samples: 132 | log_px, log_px2 = batch_bootstrap(log_pq, bootstrap_size, n_bootstraps, bootstrap_func) 133 | outputs += [log_px, log_px2] 134 | 135 | self.do_loglikelihood = theano.function( 136 | inputs=[batch_idx, n_bootstraps], 137 | outputs=outputs, 138 | name="do_likelihood") 139 | 140 | #log_PX, _, _, _, KL, Hp, Hq = model.log_likelihood(X_batch, n_samples=n_samples) 141 | #batch_log_PX = T.sum(log_PX) 142 | #batch_KL = [T.sum(kl) for kl in KL] 143 | #batch_Hp = [T.sum(hp) for hp in Hp] 144 | #batch_Hq = [T.sum(hq) for hq in Hq] 145 | 146 | def on_init(self, model): 147 | self.compile(model) 148 | 149 | def on_iter(self, model): 150 | n_samples = self.n_samples 151 | batch_size = self.batch_size 152 | n_bootstraps = self.n_bootstraps 153 | n_datapoints = self.dataset.n_datapoints 154 | 155 | n_layers = len(model.p_layers) 156 | 157 | # Iterate over dataset 158 | log_px = [0.] * len(n_samples) 159 | log_px2 = [0.] * len(n_samples) 160 | for batch_idx in xrange(n_datapoints//batch_size): 161 | outputs = self.do_loglikelihood(batch_idx, n_bootstraps) 162 | 163 | for i, K in enumerate(n_samples): 164 | log_px[i] += outputs[0] 165 | log_px2[i] += outputs[1] 166 | outputs = outputs[2:] 167 | 168 | # Calculate final results and display/store 169 | for i, K in enumerate(n_samples): 170 | n = n_datapoints*n_bootstraps 171 | LL = log_px[i] / n 172 | LLse = np.sqrt( (log_px2[i] - (log_px[i]**2/n)) / (n-1)) 173 | LLse *= 1.96 / np.sqrt(n) 174 | 175 | self.logger.info("(%d datpoints, %d samples, %d bootstraps): LL=%5.2f +-%4.2f" % (n_datapoints, K, n_bootstraps, LL, LLse)) 176 | 177 | prefix = "spl%d." % K 178 | self.dlog.append_all({ 179 | prefix+"LL": LL, 180 | prefix+"LL_se": LLse, 181 | }) 182 | 183 | global validation_LL 184 | validation_LL = LL 185 | -------------------------------------------------------------------------------- /learning/preproc.py: -------------------------------------------------------------------------------- 1 | """ 2 | """ 3 | 4 | from __future__ import division 5 | 6 | import abc 7 | import logging 8 | import cPickle as pickle 9 | import gzip 10 | import h5py 11 | 12 | import numpy as np 13 | 14 | import theano 15 | import theano.tensor as T 16 | from theano.tensor.shared_randomstreams import RandomStreams 17 | 18 | import utils.datalog as dlog 19 | 20 | _logger = logging.getLogger(__name__) 21 | 22 | 23 | floatX = theano.config.floatX 24 | 25 | theano_rng = RandomStreams(seed=2341) 26 | 27 | #----------------------------------------------------------------------------- 28 | # Base class for preprocessors 29 | class Preproc(object): 30 | __metaclass__ = abc.ABCMeta 31 | 32 | def preproc(self, X, Y): 33 | """ Preprocess data and return and X, Y tuple. 34 | 35 | Parameters 36 | ---------- 37 | X, Y : ndarray 38 | 39 | Returns 40 | ------- 41 | X, Y : ndarray 42 | """ 43 | return X, Y 44 | 45 | def late_preproc(self, X, Y): 46 | """ Preprocess data and return and X, Y tuple. 47 | 48 | Parameters 49 | ---------- 50 | X, Y : theano.tensor 51 | 52 | Returns 53 | ------- 54 | X, Y : theano.tensor 55 | """ 56 | return X, Y 57 | 58 | 59 | #----------------------------------------------------------------------------- 60 | class Binarize(Preproc): 61 | def __init__(self, threshold=None, late=True): 62 | """ 63 | Binarize data in X; assuming that input data was 0 <= X <= 1. 64 | 65 | Parameters 66 | ---------- 67 | threshold : {float, None} 68 | Threshold when a input is considerd 1; If None, the value in 69 | X determines the probability of the binarized element being a 1. 70 | 71 | late : bool 72 | Should binaization be performed statically or each time the data 73 | is used? 74 | """ 75 | if late and not threshold is None: 76 | _logger.warning("Using a static threshold and late preprocessing does not make much sense; forcing late=False") 77 | late = False 78 | 79 | self.threshold = threshold 80 | self.late = late 81 | 82 | def preproc(self, X, Y): 83 | """ Binarize X """ 84 | if self.late: # skip static processing when late == True 85 | return X, Y 86 | 87 | assert (Y >= 0.0).all() 88 | assert (X <= 1.0).all() 89 | 90 | threshold = self.threshold 91 | if threshold is None: 92 | threshold = np.random.uniform(size=X.shape) 93 | 94 | X = (X >= threshold).astype(floatX) 95 | return X, Y 96 | 97 | def late_preproc(self, X, Y): 98 | """ Binarize X """ 99 | if not self.late: 100 | return X, Y 101 | 102 | threshold = theano_rng.uniform(size=X.shape, ndim=2, low=0.0, high=1.0) 103 | X = (X >= threshold) 104 | return X, Y 105 | 106 | #----------------------------------------------------------------------------- 107 | class PermuteColumns(Preproc): 108 | def __init__(self): 109 | """ 110 | Create a random permutation and permute each feature-vector X of each 111 | datapoint with it. 112 | """ 113 | self.dlog = dlog.getLogger("preproc.permute_columns") 114 | self.permutation = None 115 | 116 | def set_permutation(self, permutation): 117 | self.permutation = permutation 118 | 119 | self.dlog.append("permutation", self.permutation) 120 | self.dlog.append("permutation_inv", np.argsort(self.permutation)) 121 | 122 | def preproc(self, X, Y): 123 | """ Permute X """ 124 | _, n_vis = X.shape 125 | 126 | if self.permutation is None: 127 | permutation = np.random.permutation(n_vis) 128 | self.set_permutation(permutation) 129 | 130 | assert self.permutation.size == n_vis 131 | 132 | X = X[:, self.permutation] 133 | 134 | return X, Y 135 | -------------------------------------------------------------------------------- /learning/termination.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from __future__ import division 4 | 5 | import sys 6 | 7 | import abc 8 | import logging 9 | 10 | import numpy as np 11 | 12 | import theano 13 | import theano.tensor as T 14 | 15 | import monitor 16 | 17 | _logger = logging.getLogger("termination") 18 | 19 | class Termination(object): 20 | __metaclass__ = abc.ABCMeta 21 | 22 | @abc.abstractmethod 23 | def reset(self): 24 | pass 25 | 26 | @abc.abstractmethod 27 | def continue_learning(self, L): 28 | pass 29 | 30 | #----------------------------------------------------------------------------- 31 | class LogLikelihoodIncrease(Termination): 32 | def __init__(self, min_increase=0.001, lookahead=5, max_epochs=1000, min_epochs=10): 33 | super(LogLikelihoodIncrease, self).__init__() 34 | 35 | self.min_epochs = min_epochs 36 | self.max_epochs = max_epochs 37 | self.lookahead = lookahead 38 | self.min_increase = min_increase 39 | 40 | self.reset() 41 | 42 | def reset(self): 43 | self.epochs = 0 44 | self.L = [-np.inf]*self.lookahead 45 | 46 | def continue_learning(self, L): 47 | self.epochs += 1 48 | self.L.append(L) 49 | 50 | best_in_lookahead = np.array(self.L[-self.lookahead:]).max() 51 | best_before_lookahead = np.array(self.L[:-self.lookahead]).max() 52 | increase = (best_in_lookahead-best_before_lookahead)/(np.abs(best_before_lookahead)) 53 | 54 | if np.isnan(increase): 55 | increase = +np.inf 56 | 57 | _logger.info("LL increased by %f %%" % (100*increase)) 58 | 59 | cont = (self.epochs < self.min_epochs) or (increase >= self.min_increase) 60 | cont = cont and (self.epochs <= self.max_epochs) 61 | return cont 62 | 63 | #----------------------------------------------------------------------------- 64 | class EarlyStopping(Termination): 65 | def __init__(self, lookahead=10, min_epochs=10, max_epochs=1000): 66 | super(EarlyStopping, self).__init__() 67 | 68 | self.lookahead = lookahead 69 | self.min_epochs = min_epochs 70 | self.max_epochs = max_epochs 71 | self.epochs = 0 72 | self.fails = 0 73 | self.best_LL = -np.inf 74 | 75 | def reset(self): 76 | self.epochs = 0 77 | self.fails = 0 78 | self.best_LL = -np.inf 79 | 80 | def continue_learning(self, L): 81 | self.epochs += 1 82 | L = monitor.validation_LL 83 | assert isinstance(L, float) 84 | 85 | if self.epochs <= self.min_epochs: 86 | self.fails = 0 87 | 88 | increase = (L-self.best_LL)/(np.abs(self.best_LL)) 89 | if L > self.best_LL: 90 | self.best_LL = L 91 | self.fails = 0 92 | _logger.info("Validation LL=%5.2f (increased by %4.2f %%)" % (L, 100*increase)) 93 | else: 94 | self.fails += 1 95 | _logger.info("Validation LL=%5.2f stagnated (%dth)" % (L, self.fails)) 96 | 97 | if self.epochs > self.max_epochs: 98 | return False 99 | 100 | return self.fails < self.lookahead 101 | 102 | -------------------------------------------------------------------------------- /learning/tests/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | 4 | import theano 5 | import theano.tensor as T 6 | 7 | theano.config.exception_verbosity = 'high' 8 | theano.config.compute_test_value = 'warn' 9 | 10 | floatX = theano.config.floatX 11 | 12 | 13 | def iscalar(name=None): 14 | Av = 1 15 | A = T.iscalar(name=name) 16 | A.tag.test_value = Av 17 | return A, Av 18 | 19 | def fscalar(name=None): 20 | Av = 1.23 21 | A = T.fscalar(name=name) 22 | A.tag.test_value = Av 23 | return A, Av 24 | 25 | def ivector(size, name=None): 26 | Av = np.zeros(size, dtype=np.int) 27 | A = T.iscalar(name=name) 28 | A.tag.test_value = Av 29 | return A, Av 30 | 31 | def fvector(size, name=None): 32 | Av = np.zeros(size, dtype=floatX) 33 | A = T.fscalar(name=name) 34 | A.tag.test_value = Av 35 | return A, Av 36 | 37 | def imatrix(shape, name=None): 38 | Av = np.zeros(shape, dtype=np.int) 39 | A = T.imatrix(name=name) 40 | A.tag.test_value = Av 41 | return A, Av 42 | 43 | def fmatrix(shape, name=None): 44 | Av = np.zeros(shape, dtype=floatX) 45 | A = T.fmatrix(name=name) 46 | A.tag.test_value = Av 47 | return A, Av 48 | 49 | -------------------------------------------------------------------------------- /learning/tests/test_hyperbase.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import numpy as np 4 | 5 | # Unit Under Test 6 | from learning.hyperbase import * 7 | 8 | class ExampleThing(HyperBase): 9 | def __init__(self, **hyper_params): 10 | super(ExampleThing, self).__init__(**hyper_params) 11 | 12 | self.register_hyper_param("hyper_a") 13 | self.register_hyper_param("hyper_b", default=23) 14 | self.register_hyper_param("hyper_c", default=lambda: 2*21, help="help") 15 | 16 | self.set_hyper_params(hyper_params) 17 | 18 | 19 | def test_constructor(): 20 | model = ExampleThing(hyper_a=0) 21 | 22 | hyper_a = model.get_hyper_param('hyper_a') 23 | assert hyper_a == 0 24 | 25 | def test_hyper_defaults(): 26 | model = ExampleThing() 27 | 28 | assert model.get_hyper_param('hyper_b') == 23, model.get_hyper_param('hyper_b') 29 | assert model.get_hyper_param('hyper_c') == 42, model.get_hyper_param('hyper_c') 30 | 31 | def test_hyper_setget(): 32 | model = ExampleThing() 33 | 34 | model.set_hyper_param('hyper_b', 1) 35 | model.set_hyper_param('hyper_c', 2) 36 | assert model.get_hyper_param('hyper_b') == 1 37 | assert model.get_hyper_params(['hyper_b', 'hyper_c']) == [1, 2] 38 | 39 | model.set_hyper_params({'hyper_b': 23, 'hyper_c': 42}) 40 | assert model.get_hyper_param('hyper_b') == 23 41 | assert model.get_hyper_params(['hyper_b', 'hyper_c']) == [23, 42] 42 | 43 | def test_hyper_attr(): 44 | model = ExampleThing() 45 | 46 | 47 | assert model.hyper_b == 23 48 | assert model.hyper_c == 42 49 | 50 | model.hyper_a = 11 51 | assert model.hyper_a == 11 52 | 53 | -------------------------------------------------------------------------------- /learning/tests/test_monitor.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import numpy as np 4 | 5 | import theano 6 | import theano.tensor as T 7 | 8 | from learning.tests.toys import * 9 | 10 | # Unit Under Test 11 | from learning.monitor import * 12 | from learning.monitor.bootstrap import * 13 | 14 | def test_MonitorLL(): 15 | dataset = get_toy_data() 16 | model = get_toy_model() 17 | n_samples = (1, 5, 25, 100, 500) 18 | monitor = MonitorLL(dataset, n_samples) 19 | monitor.compile(model) 20 | 21 | 22 | def test_BootstrapLL(): 23 | dataset = get_toy_data() 24 | model = get_toy_model() 25 | n_samples = (1, 5, 25, 100, 500) 26 | monitor = BootstrapLL(dataset, n_samples) 27 | monitor.compile(model) 28 | 29 | 30 | -------------------------------------------------------------------------------- /learning/tests/test_termination.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import numpy as np 4 | 5 | import theano 6 | import theano.tensor as T 7 | 8 | # Unit Under Test 9 | from learning.termination import * 10 | 11 | def test_ll_maxepochs(): 12 | termination = LogLikelihoodIncrease(min_increase=0.0, max_epochs=10) 13 | 14 | L = -100. 15 | epoch = 0 16 | while termination.continue_learning(L) and (epoch < 12): 17 | epoch += 1 18 | L += 1. 19 | 20 | print "Epochs perfrmed: ", epoch 21 | assert epoch == 10 22 | 23 | -------------------------------------------------------------------------------- /learning/tests/test_training.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import unittest 4 | 5 | from learning.training import Trainer 6 | from learning.tests.toys import * 7 | 8 | def test_complete(): 9 | t = Trainer( 10 | dataset=get_toy_data(), 11 | model=get_toy_model(), 12 | ) 13 | 14 | t.load_data() 15 | t.compile() 16 | t.perform_epoch() 17 | 18 | -------------------------------------------------------------------------------- /learning/tests/testing.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | 4 | import theano 5 | import theano.tensor as T 6 | 7 | theano.config.exception_verbosity = 'high' 8 | theano.config.compute_test_value = 'warn' 9 | 10 | floatX = theano.config.floatX 11 | 12 | 13 | def iscalar(name=None): 14 | Av = 1 15 | A = T.iscalar(name=name) 16 | A.tag.test_value = Av 17 | return A, Av 18 | 19 | def fscalar(name=None): 20 | Av = 1.23 21 | A = T.fscalar(name=name) 22 | A.tag.test_value = Av 23 | return A, Av 24 | 25 | def ivector(size, name=None): 26 | Av = np.zeros(size, dtype=np.int) 27 | A = T.iscalar(name=name) 28 | A.tag.test_value = Av 29 | return A, Av 30 | 31 | def fvector(size, name=None): 32 | Av = np.zeros(size, dtype=floatX) 33 | A = T.fscalar(name=name) 34 | A.tag.test_value = Av 35 | return A, Av 36 | 37 | def imatrix(shape, name=None): 38 | Av = np.zeros(shape, dtype=np.int) 39 | A = T.imatrix(name=name) 40 | A.tag.test_value = Av 41 | return A, Av 42 | 43 | def fmatrix(shape, name=None): 44 | Av = np.zeros(shape, dtype=floatX) 45 | A = T.fmatrix(name=name) 46 | A.tag.test_value = Av 47 | return A, Av 48 | 49 | -------------------------------------------------------------------------------- /learning/tests/toys.py: -------------------------------------------------------------------------------- 1 | """ 2 | """ 3 | 4 | from __future__ import division 5 | 6 | 7 | def get_toy_data(): 8 | from learning.dataset import BarsData 9 | return BarsData(which_set="train", n_datapoints=500) 10 | 11 | def get_toy_model(): 12 | from learning.models.rws import LayerStack 13 | from learning.models.sbn import SBN, SBNTop 14 | 15 | p_layers = [ 16 | SBN( 17 | n_X=25, 18 | n_Y=10 19 | ), 20 | SBNTop( 21 | n_X=10, 22 | ) 23 | ] 24 | q_layers = [ 25 | SBN( 26 | n_X=10, 27 | n_Y=25, 28 | ) 29 | ] 30 | model = LayerStack( 31 | p_layers=p_layers, 32 | q_layers=q_layers, 33 | ) 34 | return model 35 | 36 | -------------------------------------------------------------------------------- /learning/training.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from __future__ import division 4 | 5 | import sys 6 | 7 | import abc 8 | import logging 9 | from six import iteritems 10 | from collections import OrderedDict 11 | from time import time 12 | 13 | import numpy as np 14 | import progressbar as pbar 15 | 16 | import theano 17 | import theano.tensor as T 18 | from theano.tensor.shared_randomstreams import RandomStreams 19 | 20 | import utils.datalog as dlog 21 | 22 | from hyperbase import HyperBase 23 | from termination import Termination 24 | from dataset import DataSet 25 | from model import Model 26 | 27 | theano_rng = RandomStreams(seed=2341) 28 | floatX = theano.config.floatX 29 | 30 | #============================================================================= 31 | # Trainer base class 32 | class TrainerBase(HyperBase): 33 | __metaclass__ = abc.ABCMeta 34 | 35 | def __init__(self, **hyper_params): 36 | super(TrainerBase, self).__init__() 37 | 38 | self.logger = logging.getLogger("trainer") 39 | self.dlog = dlog.getLogger("trainer") 40 | 41 | self.step = 0 42 | 43 | self.register_hyper_param("model", default=None, help="") 44 | self.register_hyper_param("dataset", default=None, help="") 45 | self.register_hyper_param("termination", default=None, help="") 46 | self.register_hyper_param("final_monitors", default=[], help="") 47 | self.register_hyper_param("epoch_monitors", default=[], help="") 48 | self.register_hyper_param("step_monitors", default=[], help="") 49 | self.register_hyper_param("first_epoch_step_monitors", default=[], help="") 50 | self.register_hyper_param("monitor_nth_step", default=1, help="") 51 | 52 | self.shvar = {} 53 | self.shvar_update_fnc = {} 54 | 55 | self.set_hyper_params(hyper_params) 56 | 57 | def mk_shvar(self, name, init, update_fnc=None): 58 | if update_fnc is None: 59 | update_fnc = lambda self: self.get_hyper_param(name) 60 | value = init 61 | if isinstance(value, np.ndarray): 62 | if (value.dtype == np.float32) or (value.dtype == np.float64): 63 | value = value.astype(floatX) 64 | elif isinstance(value, float): 65 | value = np.asarray(value, dtype=floatX) 66 | elif isinstance(value, int): 67 | pass 68 | else: 69 | raise ArgumentError('Unknown datatype') 70 | self.shvar[name] = theano.shared(value, name=name, allow_downcast=True) 71 | self.shvar_update_fnc[name] = update_fnc 72 | 73 | def update_shvars(self): 74 | for key, shvar in iteritems(self.shvar): 75 | value = self.shvar_update_fnc[key](self) 76 | if isinstance(value, np.ndarray): 77 | if (value.dtype == np.float32) or (value.dtype == np.float64): 78 | value = value.astype(floatX) 79 | shvar.set_value(value) 80 | 81 | def load_data(self): 82 | dataset = self.dataset 83 | assert isinstance(dataset, DataSet) 84 | 85 | n_datapoints = dataset.n_datapoints 86 | assert n_datapoints == dataset.X.shape[0] 87 | 88 | X, Y = dataset.preproc(dataset.X, dataset.Y) 89 | self.train_X = theano.shared(X, "train_X") 90 | self.train_Y = theano.shared(Y, "train_Y") 91 | 92 | self.train_perm = theano.shared(np.random.permutation(n_datapoints)) 93 | 94 | def shuffle_train_data(self): 95 | n_datapoints = self.dataset.n_datapoints 96 | self.train_perm.set_value(np.random.permutation(n_datapoints)) 97 | 98 | @abc.abstractmethod 99 | def compile(self): 100 | pass 101 | 102 | 103 | #============================================================================= 104 | # BatchedSGD trainer 105 | class Trainer(TrainerBase): 106 | def __init__(self, **hyper_params): 107 | super(Trainer, self).__init__() 108 | 109 | self.register_hyper_param("learning_rate_p", default=1e-2, help="Learning rate") 110 | self.register_hyper_param("learning_rate_q", default=1e-2, help="Learning rate") 111 | self.register_hyper_param("learning_rate_s", default=1e-2, help="Learning rate") 112 | self.register_hyper_param("lr_decay", default=1.0, help="Learning rated decau per epoch") 113 | self.register_hyper_param("beta", default=0.95, help="Momentum factor") 114 | self.register_hyper_param("weight_decay", default=0.0, help="Weight decay") 115 | self.register_hyper_param("batch_size", default=100, help="") 116 | self.register_hyper_param("sleep_interleave", default=5, help="") 117 | self.register_hyper_param("layer_discount", default=1.0, help="Reduce LR for each successive layer by this factor") 118 | self.register_hyper_param("n_samples", default=10, help="No. samples used during training") 119 | 120 | self.mk_shvar('n_samples', 100) 121 | self.mk_shvar('batch_size', 100) 122 | self.mk_shvar('permutation', np.zeros(10), lambda self: np.zeros(10)) 123 | self.mk_shvar('beta', 1.0) 124 | self.mk_shvar('lr_p', np.zeros(2), lambda self: self.calc_learning_rates(self.learning_rate_p)) 125 | self.mk_shvar('lr_q', np.zeros(2), lambda self: self.calc_learning_rates(self.learning_rate_q)) 126 | self.mk_shvar('lr_s', np.zeros(2), lambda self: self.calc_learning_rates(self.learning_rate_s)) 127 | self.mk_shvar('weight_decay', 0.0) 128 | 129 | self.set_hyper_params(hyper_params) 130 | 131 | def calc_learning_rates(self, base_rate): 132 | n_layers = len(self.model.p_layers) 133 | rng = np.arange(n_layers) 134 | return base_rate * self.layer_discount ** rng 135 | 136 | def compile(self): 137 | """ Theano-compile neccessary functions """ 138 | model = self.model 139 | 140 | assert isinstance(model, Model) 141 | 142 | model.setup() 143 | self.update_shvars() 144 | 145 | #--------------------------------------------------------------------- 146 | self.logger.info("compiling do_step") 147 | 148 | lr_p = self.shvar['lr_p'] 149 | lr_q = self.shvar['lr_q'] 150 | beta = self.shvar['beta'] 151 | weight_decay = self.shvar['weight_decay'] 152 | batch_size = self.shvar['batch_size'] 153 | n_samples = self.shvar['n_samples'] 154 | 155 | batch_idx = T.iscalar('batch_idx') 156 | batch_idx.tag.test_value = 0 157 | 158 | first = batch_idx*batch_size 159 | last = first + batch_size 160 | X_batch = self.train_X[self.train_perm[first:last]] 161 | #Y_batch = self.train_Y[self.train_perm[first:last]] 162 | 163 | X_batch, _ = self.dataset.late_preproc(X_batch, None) 164 | 165 | batch_log_PX, gradients = model.get_gradients( 166 | X_batch, None, 167 | lr_p=lr_p, lr_q=lr_q, 168 | n_samples=n_samples 169 | ) 170 | batch_log_PX = batch_log_PX / batch_size 171 | 172 | # Initialize momentum variables 173 | gradients_old = {} 174 | for shvar, value in iteritems(gradients): 175 | name = value.name 176 | gradients_old[shvar] = theano.shared(shvar.get_value()*0., name=("%s_old"%name)) 177 | 178 | updates = OrderedDict() 179 | for shvar, value in iteritems(gradients): 180 | gradient_old = gradients_old[shvar] 181 | 182 | dTheta = T.switch(T.isnan(value), 183 | gradient_old, 184 | beta*gradient_old + (1.-beta)*value 185 | ) 186 | 187 | updates[gradient_old] = dTheta 188 | updates[shvar] = shvar + dTheta - weight_decay*(shvar+dTheta) 189 | 190 | self.do_step = theano.function( 191 | inputs=[batch_idx], 192 | outputs=batch_log_PX, #, Lp, Lq, w], 193 | updates=updates, 194 | name="do_step") 195 | 196 | #--------------------------------------------------------------------- 197 | self.logger.info("compiling do_sleep_step") 198 | n_dreams = T.iscalar('n_dreams') 199 | n_dreams.tag.test_value = 10 200 | 201 | beta = self.shvar['beta'] 202 | lr_s = self.shvar['lr_s'] 203 | 204 | log_PX, gradients = model.get_sleep_gradients(lr_s, n_dreams) 205 | log_PX = T.sum(log_PX) 206 | 207 | updates = OrderedDict() 208 | for shvar, value in iteritems(gradients): 209 | gradient_old = gradients_old[shvar] 210 | 211 | dTheta = T.switch(T.isnan(value), 212 | gradient_old, 213 | beta*gradient_old + (1.-beta)*value 214 | ) 215 | 216 | updates[gradient_old] = dTheta 217 | updates[shvar] = shvar + dTheta - weight_decay*(shvar+dTheta) 218 | 219 | self.do_sleep_step = theano.function( 220 | inputs=[n_dreams], 221 | outputs=log_PX, 222 | updates=updates, 223 | name="do_sleep_step") 224 | 225 | def perform_learning(self): 226 | self.update_shvars() 227 | 228 | termination = self.termination 229 | model = self.model 230 | 231 | assert isinstance(termination, Termination) 232 | assert isinstance(model, Model) 233 | 234 | # Print information 235 | n_datapoints = self.dataset.n_datapoints 236 | n_batches = n_datapoints // self.batch_size 237 | 238 | self.logger.info("Dataset contains %d datapoints in %d mini-batches (%d datapoints per mini-batch)" % 239 | (n_datapoints, n_batches, self.batch_size)) 240 | self.logger.info("Using %d training samples" % self.n_samples) 241 | self.logger.info("lr_p=%3.1e, lr_q=%3.1e, lr_s=%3.1e, lr_decay=%5.1e layer_discount=%4.2f" % 242 | (self.learning_rate_p, self.learning_rate_q, self.learning_rate_s, self.lr_decay, self.layer_discount)) 243 | 244 | epoch = 0 245 | # Perform first epoch 246 | saved_step_monitors = self.step_monitors 247 | self.step_monitors = self.first_epoch_step_monitors + self.step_monitors 248 | 249 | for m in self.step_monitors + self.epoch_monitors: 250 | m.on_init(model) 251 | m.on_iter(model) 252 | 253 | 254 | self.logger.info("Starting epoch 0...") 255 | L = self.perform_epoch() 256 | self.step_monitors = saved_step_monitors 257 | 258 | # remaining epochs... 259 | termination.reset() 260 | while termination.continue_learning(L): 261 | epoch = epoch + 1 262 | self.logger.info("Starting epoch %d..." % epoch) 263 | L = self.perform_epoch() 264 | 265 | # run final_monitors after lerning converged... 266 | self.logger.info("Calling final_monitors...") 267 | for m in self.final_monitors: 268 | m.on_init(model) 269 | m.on_iter(model) 270 | 271 | #----------------------------------------------------------------------- 272 | def perform_epoch(self): 273 | n_datapoints = self.dataset.n_datapoints 274 | batch_size = self.batch_size 275 | n_batches = n_datapoints // batch_size 276 | epoch = self.step // n_batches 277 | LL_epoch = 0 278 | 279 | self.update_shvars() 280 | self.shuffle_train_data() 281 | 282 | # Update learning rated 283 | self.shvar['lr_p'].set_value((self.calc_learning_rates(self.learning_rate_p / self.lr_decay**epoch)).astype(floatX)) 284 | self.shvar['lr_q'].set_value((self.calc_learning_rates(self.learning_rate_q / self.lr_decay**epoch)).astype(floatX)) 285 | self.shvar['lr_s'].set_value((self.calc_learning_rates(self.learning_rate_s / self.lr_decay**epoch)).astype(floatX)) 286 | 287 | widgets = ["Epoch %d, step "%(epoch+1), pbar.Counter(), ' (', pbar.Percentage(), ') ', pbar.Bar(), ' ', pbar.Timer(), ' ', pbar.ETA()] 288 | bar = pbar.ProgressBar(widgets=widgets, maxval=n_batches).start() 289 | 290 | t0 = time() 291 | while True: 292 | LL = self.perform_step(update=False) 293 | LL_epoch += LL 294 | 295 | batch_idx = self.step % n_batches 296 | bar.update(batch_idx) 297 | 298 | if self.step % n_batches == 0: 299 | break 300 | t = time()-t0 301 | bar.finish() 302 | 303 | LL_epoch /= n_batches 304 | 305 | self.logger.info("Completed epoch %d in %.1fs (%.1fms/step). Calling epoch_monitors..." % (epoch+1, t, t/n_batches*1000)) 306 | for m in self.epoch_monitors: 307 | m.on_iter(self.model) 308 | 309 | self.dlog.append_all({ 310 | 'timing.epoch': t, 311 | 'timing.step': t/n_batches 312 | }) 313 | return LL_epoch 314 | 315 | def perform_step(self, update=True): 316 | n_batches = self.dataset.n_datapoints // self.batch_size 317 | batch_idx = self.step % n_batches 318 | 319 | # Do we need to update shared variables/parameters? 320 | if update: 321 | self.update_shvars() 322 | 323 | LL = self.do_step(batch_idx) 324 | 325 | # 326 | self.step = self.step + 1 327 | epoch = self.step // n_batches 328 | batch_idx = self.step % n_batches 329 | 330 | self.dlog.append("pstep_L", LL) 331 | 332 | if (self.step % self.sleep_interleave == 0) and (self.learning_rate_s > 0.0): 333 | self.logger.debug("Epoch %d, step %d (%d steps total): Performing sleep cycle\x1b[K" % (epoch+1, batch_idx, self.step)) 334 | n_dreams = self.sleep_interleave * self.batch_size 335 | sleep_LL = self.do_sleep_step(n_dreams) 336 | else: 337 | sleep_LL = np.nan 338 | self.dlog.append("psleep_L", sleep_LL) 339 | 340 | if (self.step % self.monitor_nth_step == 0) and (len(self.step_monitors) > 0): 341 | self.logger.info("Epoch %d, step %d (%d steps total): Calling step_monitors...\x1b[K" % (epoch+1, batch_idx, self.step)) 342 | for m in self.step_monitors: 343 | m.on_iter(self.model) 344 | 345 | return LL 346 | -------------------------------------------------------------------------------- /learning/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jbornschein/reweighted-ws/e96414719d09ab4941dc77bab4cf4847acc6a8e7/learning/utils/__init__.py -------------------------------------------------------------------------------- /learning/utils/autotable.py: -------------------------------------------------------------------------------- 1 | # 2 | # Author: Jorg Bornschein , ) 14 | * in pure C or C++: libhdf5 (http://www.hdfgroup.org/HDF5/) 15 | 16 | Basic example:: 17 | 18 | import numpy as np 19 | from pulp.utils import autotable 20 | 21 | tbl = autotable.AutoTable('~/testhdf.h5') 22 | for t in range(10): 23 | tbl.append('t', t) 24 | tbl.append('D', np.random.randn(20)) 25 | 26 | This code creates the file :file:`~/testhdf.h5` with two tables, each having 10 27 | rows: The table *t* will store a single integer within each row, where the 28 | table *D* will store a 20 element vector with gaussian distributed random 29 | numbers in each row. 30 | """ 31 | 32 | import numpy as np 33 | import h5py 34 | 35 | class AutoTable: 36 | """Store data into HDF5 files""" 37 | def __init__(self, fname=None, compression_level=1): 38 | """ 39 | Create a new autotable object which will write data into a file called 40 | fname. 41 | 42 | If fname is not specified (or is None), fname will be derived from 43 | sys.argv[0] by striping the extension and adding ".h5". As a result, the 44 | file will be named like the creating program. 45 | 46 | Compression specifies the compression_level that should be applied when storing data. 47 | 48 | .. note:: If a file named fname existed previously, its content will be deleted! 49 | """ 50 | self.warnings = True 51 | if fname is None: 52 | fname = self._guess_fname() 53 | self.h5 = h5py.File(fname, "w") 54 | self.compression_level = compression_level 55 | self.tables = {} 56 | 57 | def close(self): 58 | """ 59 | Close the HDF file behind this AutoTable instance. 60 | """ 61 | self.h5.close() 62 | 63 | def append(self, name, value): 64 | """ 65 | Append the dataset *values* into a table called *name*. If a specified 66 | table name does not exist, a new table will be created. 67 | 68 | Example:. 69 | 70 | tbl.append("T", temp) 71 | tbl.append("image", np.zeros((256,256)) ) 72 | """ 73 | if type(value)==str: 74 | return self._appendstr(name, value) 75 | 76 | if np.isscalar(value): 77 | value = np.asarray(value) 78 | 79 | if not isinstance(value, np.ndarray): 80 | raise TypeError("Don't know how to handle values of type '%s'", type(value)) 81 | 82 | # Check if we need to create a new table 83 | if not self.tables.has_key(name): 84 | self._create_table(name, value) 85 | 86 | table = self.tables[name] 87 | current_shape = table.shape 88 | new_shape = (current_shape[0]+1, ) + current_shape[1:] 89 | if new_shape[1:] != value.shape: 90 | raise TypeError('Trying to append shape "%s" for %s shaped field "%s"' % (value.shape, current_shape[1:], name)) 91 | try: 92 | table.resize(new_shape) 93 | table[-1] = value 94 | except ValueError: 95 | raise TypeError('Wrong datatype "%s" for "%s" field' % (value.dtype, name)) 96 | self.h5.flush() 97 | 98 | def append_all(self, valdict): 99 | """ 100 | Append the given data to the table. 101 | 102 | *valdict* must be a dictionary containig key value pairs, where key 103 | is a string and specifies the table name. The corresponding value must be 104 | an arbitrary numpy compatible value. If a specified table name does not 105 | exist, a a new table will be created. 106 | 107 | Example:: 108 | 109 | tbl.append( { 't':0.23 , 'D':np.zeros((10,10)) ) 110 | """ 111 | for name, value in valdict.items(): 112 | self.append(name, value) 113 | 114 | def _delete_table(self, name): 115 | """ 116 | Delete a node from the h5-table together with all dictionary entries 117 | that has been created with the node. 118 | """ 119 | del self.tables[name] 120 | raise NotImplemented() 121 | 122 | def _create_table(self, name, example): 123 | """ 124 | Create a new table within the HDF file, where the tables shape and its 125 | datatype are determined by *example*. 126 | """ 127 | if isinstance(example, np.ndarray): 128 | h5_shape = (0,) + example.shape 129 | h5_maxshape = (None,) + example.shape 130 | 131 | h5 = self.h5 132 | self.tables[name] = h5.create_dataset(name, h5_shape, dtype=example.dtype, maxshape=h5_maxshape) 133 | else: 134 | raise NotImplemented() 135 | 136 | def _guess_fname(self): 137 | """ 138 | Derive an fname from sys.argv[0] by striping the extension and adding ".h5". 139 | As a result, the table will be named just like the executing programm. 140 | """ 141 | import sys 142 | import os.path as path 143 | 144 | base, _ = path.splitext(sys.argv[0]) 145 | return base+".h5" 146 | 147 | 148 | 149 | 150 | -------------------------------------------------------------------------------- /learning/utils/datalog.py: -------------------------------------------------------------------------------- 1 | # 2 | # Author: Jorg Bornschein hanlders lookups 244 | 245 | def _lookup(self, tblname): 246 | """ Return a list of handlers to be used for tblname """ 247 | if tblname in self._lookup_cache: 248 | return self._lookup_cache[tblname] 249 | 250 | handlers = [] 251 | for (a_tblname, a_handler) in self.policy: 252 | if a_tblname == tblname or a_tblname == "*": # XXX wildcard matching XXX 253 | handlers.append(a_handler) 254 | self._lookup_cache[tblname] = handlers 255 | return handlers 256 | 257 | def progress(self, message, completed=None): 258 | """ Append some progress message """ 259 | if self.comm.rank != 0: 260 | return 261 | 262 | if completed == None: 263 | print "[%s] %s" % (strftime("%H:%M:%S"), message) 264 | else: 265 | totlen = 65-len(message) 266 | barlen = int(totlen*completed) 267 | spacelen = totlen-barlen 268 | print "[%s] %s [%s%s]" % (strftime("%H:%M:%S"), message, "*"*barlen, "-"*spacelen) 269 | 270 | def append(self, tblname, value): 271 | """ Append the given value and call all the configured DataHandlers.""" 272 | if self.comm.rank != 0: 273 | return 274 | 275 | for h in self._lookup(tblname): 276 | h.append(tblname, value) 277 | 278 | def append_all(self, valdict): 279 | """ 280 | Append the given values and call all the consigured DataHandlers 281 | 282 | *valdict* is expected to be a dictionary of key-value pairs. 283 | """ 284 | if self.comm.rank != 0: 285 | return 286 | 287 | # Construct a set with all handlers to be called 288 | all_handlers = set() 289 | for tblname, val in valdict.items(): 290 | hl = self._lookup(tblname) 291 | all_handlers = all_handlers.union(hl) 292 | 293 | # Call all handlers but create a personalized version 294 | # of valdict with oble the values this particular handler 295 | # is interested in 296 | for handler in all_handlers: 297 | argdict = {} 298 | for tblname, val in valdict.items(): 299 | hl = self._lookup(tblname) 300 | 301 | if handler in hl: 302 | argdict[tblname] = val 303 | 304 | handler.append_all(argdict) 305 | 306 | def ignored(self, tblname): 307 | """ 308 | Returns True, then the given *name* is neither stored onto disk, 309 | nor visualized or triggered upon. When *ignored('something')* returns 310 | True, it will make no difference if you *append* a value to table *tblname* or not. 311 | 312 | This can be especially useful when running a (MPI-)parallel programs and collecting 313 | the value to be logged is an expensive operation. 314 | 315 | Example:: 316 | 317 | if not dlog.ignored('summed_data'): 318 | summed_data = np.empty_like(data) 319 | mpicomm.Reduce((data, MPI.DOUBLE), (summed_data, MPI_DOUBLE), MPI.SUM) 320 | dlog.append('summed_data', summed_data) 321 | 322 | [..] 323 | """ 324 | return self._lookup(tblname) == [] 325 | 326 | def set_handler(self, tblname, handler_class, *args, **kargs): 327 | """ Set the specifies handler for all data stored under the name *tblname* """ 328 | if self.comm.rank != 0: 329 | return 330 | 331 | if not issubclass(handler_class, DataHandler): 332 | raise TypeError("handler_class must be a subclass of DataHandler ") 333 | 334 | # if not, instantiate it now 335 | handler = handler_class(*args, **kargs) # instantiate handler 336 | handler.register(tblname) 337 | 338 | if isinstance(tblname, str): 339 | self.policy.append( (tblname, handler) ) # append to policy 340 | elif hasattr(tblname, '__iter__'): 341 | for t in tblname: 342 | self.policy.append( (t, handler) ) # append to policy 343 | else: 344 | raise TypeError('Table-name must be a string (or a list of strings)') 345 | return handler 346 | 347 | def remove_handler(self, handler): 348 | """ Remove specified handler so that data is no longer stored there. """ 349 | if self.comm.rank != 0: 350 | return 351 | 352 | if isinstance(handler, DataHandler): 353 | for a_tblname, a_handler in self.policy[:]: 354 | if a_handler == handler: 355 | self.policy.remove((a_tblname, a_handler)) 356 | handler.close() 357 | self._lookup_cache = {} 358 | else: 359 | raise ValueError("Please provide valid DataHandler object.") 360 | 361 | def close(self): 362 | """ Reset the datalog and close all registered DataHandlers """ 363 | if self.comm.rank != 0: 364 | return 365 | 366 | for (tblname, handler) in self.policy: 367 | handler.close() 368 | 369 | def getChild(self, name): 370 | return ChildLogger(self, name) 371 | 372 | def getLogger(name=''): 373 | global dlog 374 | return dlog.getChild(name) 375 | 376 | #============================================================================= 377 | # Create global default data logger 378 | 379 | dlog = RootLogger() 380 | -------------------------------------------------------------------------------- /learning/utils/test_autotable.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ 4 | Testcases for the pulp.utils package 5 | """ 6 | 7 | import os.path 8 | import tempfile 9 | import shutil 10 | import numpy as np 11 | import unittest 12 | 13 | import autotable 14 | 15 | #============================================================================= 16 | # AutoTable tests 17 | 18 | class TestAutotable(unittest.TestCase): 19 | def setUp(self): 20 | self.dirname = tempfile.mkdtemp() 21 | self.fname = os.path.join(self.dirname, "autotable-test.h5") 22 | self.at = autotable.AutoTable(self.fname) 23 | 24 | def tearDown(self): 25 | self.at.close() 26 | shutil.rmtree(self.dirname) 27 | 28 | def test_float(self): 29 | vals = {'testFloat': 2.42} 30 | self.at.append_all( vals ) 31 | 32 | def test_nparray(self): 33 | a = np.ones( (10,10) ) 34 | vals = {'testNPArray': a} 35 | self.at.append_all( vals ) 36 | 37 | def test_wrongShape(self): 38 | a = np.ones( (10,10) ) 39 | self.at.append_all( {'testWrongType': a} ) 40 | 41 | b = np.ones( (10) ) 42 | self.assertRaises(TypeError, lambda: self.at.append_all( {'testWrongType': b} ) ) 43 | 44 | -------------------------------------------------------------------------------- /learning/utils/test_datalog.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import sys 4 | sys.path.insert(0, '../..') 5 | 6 | import os.path 7 | import tempfile 8 | import shutil 9 | import numpy as np 10 | import unittest 11 | import h5py 12 | 13 | import datalog as datalog 14 | 15 | #============================================================================= 16 | # DatLog tests 17 | 18 | class TestDataLog(unittest.TestCase): 19 | def setUp(self): 20 | self.dirname = tempfile.mkdtemp() 21 | self.fname = os.path.join(self.dirname, "autotable-test.h5") 22 | 23 | def tearDown(self): 24 | shutil.rmtree(self.dirname) 25 | 26 | def append_content(self, dlog): 27 | dlog.append("T", 0.) 28 | dlog.append("T", 1.) 29 | dlog.append("T", 2.) 30 | 31 | def append_all_content(self, dlog): 32 | vals = {"T": 0., "A": 0.} 33 | dlog.append_all(vals) 34 | vals = {"T": 1., "A": 1.} 35 | dlog.append_all(vals) 36 | vals = {"T": 2., "A": 2.} 37 | dlog.append_all(vals) 38 | 39 | def check_content(self, fname): 40 | with h5py.File(fname, 'r') as h5: 41 | T = h5['T'] 42 | self.assertAlmostEqual(T[0], 0.) 43 | self.assertAlmostEqual(T[1], 1.) 44 | self.assertAlmostEqual(T[2], 2.) 45 | 46 | #------------------------------------------------------------------------ 47 | 48 | def test_default_dlog(self): 49 | dlog = datalog.getLogger() 50 | dlog.ignored("test") 51 | 52 | @unittest.skip("Failing since h5py conversion") 53 | def test_default_handler(self): 54 | dlog = datalog.getLogger() 55 | dlog.set_handler("*", datalog.StoreToH5, self.fname) 56 | self.append_content(dlog) 57 | dlog.close() 58 | 59 | self.check_content(self.fname) 60 | 61 | @unittest.skip("Failing since h5py conversion") 62 | def test_storage_handler(self): 63 | dlog = datalog.getLogger() 64 | dlog.set_handler('T', datalog.StoreToH5, self.fname) 65 | self.append_content(dlog) 66 | dlog.close() 67 | 68 | self.check_content(self.fname) 69 | 70 | def test_append_all(self): 71 | dlog = datalog.getLogger() 72 | dlog.set_handler('T', datalog.StoreToH5, self.fname) 73 | self.append_all_content(dlog) 74 | dlog.close() 75 | 76 | self.check_content(self.fname) 77 | 78 | def test_progress(self): 79 | dlog = datalog.getLogger() 80 | dlog.set_handler('T', datalog.StoreToH5, self.fname) 81 | dlog.progress("Hello, Test") 82 | dlog.close() 83 | 84 | -------------------------------------------------------------------------------- /learning/utils/test_unrolled_scan.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import numpy as np 4 | 5 | import theano 6 | import theano.tensor as T 7 | 8 | # unit under test 9 | from unrolled_scan import * 10 | 11 | def test_unroll1(): 12 | i = T.arange(100) 13 | A = theano.shared(np.random.normal(size=(10,10))) 14 | 15 | def fn1(seq, acc): 16 | return T.dot(acc, A) 17 | 18 | # Unrolled scan 19 | outputs, updates = unrolled_scan(fn1, name='fn1', 20 | sequences=[i], outputs_info=[T.ones_like(A)], 21 | unroll=1 22 | ) 23 | f_fn1 = theano.function([], outputs[-1], name='fn1') 24 | 25 | def test_last_out_only(): 26 | i = T.arange(100) 27 | A = theano.shared(np.random.normal(size=(10,10))) 28 | 29 | def fn1(seq, acc): 30 | return T.dot(acc, A) 31 | 32 | # Normal Theano scan 33 | outputs, updates = theano.scan(fn1, name='fn1', 34 | sequences=[i], outputs_info=[T.ones_like(A)] 35 | ) 36 | f_fn1 = theano.function([], outputs[-1], name='fn1') 37 | res_normal = f_fn1() 38 | 39 | # Unrolled scan 40 | outputs, updates = unrolled_scan(fn1, name='fn1', 41 | sequences=[i], outputs_info=[T.ones_like(A)], 42 | unroll=10 43 | ) 44 | f_fn1 = theano.function([], outputs[-1], name='fn1') 45 | res_unrolled = f_fn1() 46 | 47 | assert np.allclose(res_normal, res_unrolled) 48 | 49 | -------------------------------------------------------------------------------- /learning/utils/unrolled_scan.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from __future__ import division 4 | 5 | 6 | import numpy as np 7 | 8 | import theano 9 | import theano.tensor as T 10 | 11 | def unrolled_scan(fn, sequences=None, outputs_info=None, non_sequences=None, 12 | n_steps=None, truncate_gradient=-1, go_backwards=False, 13 | mode=None, name=None, profile=False, unroll=8): 14 | """ Unrolling version of theano.scan """ 15 | if unroll == 1: 16 | return theano.scan(fn, sequences=sequences, 17 | outputs_info=outputs_info, 18 | non_sequences=non_sequences, 19 | n_steps=n_steps, truncate_gradient=truncate_gradient, 20 | go_backwards= go_backwards, mode=mode, name=name, 21 | profile=profile) 22 | 23 | if sequences is None: 24 | sequences = [] 25 | if outputs_info is None: 26 | outputs_info = [] 27 | if non_sequences is None: 28 | non_sequences = [] 29 | 30 | n_seq = len(sequences) 31 | n_out = len(outputs_info) 32 | n_nseq = len(non_sequences) 33 | 34 | def unrolled_fn(*args): 35 | if len(args) != (n_seq+n_out+n_nseq): 36 | raise ValueError('Scan function %s takes %d arguments but expeted to receive %d' 37 | % (fn, len(args), (n_seq+n_out+n_nseq))) 38 | 39 | seq_args , args = args[:n_seq], args[n_seq:] 40 | out_args , args = args[:n_out], args[n_out:] 41 | nseq_args, args = args[:n_nseq], args[n_nseq:] 42 | assert len(args) == 0 43 | 44 | 45 | for i in xrange(unroll): 46 | seq_args_i = [arg[i] for arg in seq_args] 47 | all_args = list(seq_args_i)+list(out_args)+list(nseq_args) 48 | out_args = fn(*all_args) 49 | 50 | if not isinstance(out_args, (tuple, list)): 51 | out_args = (out_args,) 52 | assert len(out_args) == n_out 53 | if len(out_args) == 1: 54 | out_args = out_args[0] 55 | return out_args 56 | 57 | def reshape_arg(arg): 58 | new_shape = [arg.shape[0]//unroll, unroll]+[arg.shape[i] for i in xrange(1, arg.ndim)] 59 | return arg.reshape(new_shape) 60 | #return arg.reshape( [arg.shape[0]//unroll, unroll] ) # +arg.shape[1:], ndim=arg.ndim+1 ) 61 | #return arg.reshape( [arg.shape[0]//unroll, unroll]+arg.shape[1:], ndim=arg.ndim+1 ) 62 | sequences = [reshape_arg(arg) for arg in sequences] 63 | 64 | if len(sequences) == 0: 65 | sequences = None 66 | if len(outputs_info) == 0: 67 | outputs_info = None 68 | if len(non_sequences) == 0: 69 | non_sequences = None 70 | 71 | return theano.scan(unrolled_fn, sequences=sequences, 72 | outputs_info=outputs_info, 73 | non_sequences=non_sequences, 74 | n_steps=n_steps, truncate_gradient=truncate_gradient, 75 | go_backwards= go_backwards, mode=mode, name=name, 76 | profile=profile) 77 | 78 | 79 | #----------------------------------------------------------------------------- 80 | if __name__ == "__main__": 81 | import logging 82 | from time import time 83 | import ipdb 84 | theano.config.exception_verbosity = 'high' 85 | 86 | def benchmark(fn, tries=4, iterations=100): 87 | t_best = np.inf 88 | t_worst = 0. 89 | 90 | for t in xrange(tries): 91 | t0 = time() 92 | for i in xrange(iterations): 93 | fn() 94 | t = (time()-t0) / iterations 95 | t_best = min(t_best, t) 96 | t_worst = max(t_worst, t) 97 | print " t_best = %f ms t_worst = %f ms" %(t_best*1000, t_worst*1000) 98 | 99 | #------------------------------------------------------------------------- 100 | 101 | i = T.arange(100) 102 | A = theano.shared(np.random.normal(size=(10,10))) 103 | 104 | def fn1(seq, acc): 105 | return T.dot(acc, A) 106 | 107 | print "-"*78 108 | print "Unrolled SCAN:" 109 | outputs, updates = unrolled_scan(fn1, name='fn1', 110 | sequences=[i], outputs_info=[T.ones_like(A)], 111 | unroll=10 112 | ) 113 | f_fn1 = theano.function([], outputs[-1], name='fn1') 114 | 115 | res = f_fn1() 116 | print res.shape 117 | print res 118 | benchmark(f_fn1) 119 | 120 | print "-"*78 121 | print "Normal SCAN:" 122 | outputs, updates = theano.scan(fn1, name='fn1', 123 | sequences=[i], outputs_info=[T.ones_like(A)] 124 | ) 125 | f_fn1 = theano.function([], outputs[-1], name='fn1') 126 | 127 | res = f_fn1() 128 | print res.shape 129 | print res 130 | benchmark(f_fn1) 131 | 132 | -------------------------------------------------------------------------------- /mnist/param-mnist-darn-200.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | 4 | from learning.dataset import BarsData, FromModel, MNIST 5 | from learning.preproc import Binarize 6 | from learning.training import Trainer 7 | from learning.termination import LogLikelihoodIncrease, EarlyStopping 8 | from learning.monitor import MonitorLL, DLogModelParams, SampleFromP 9 | 10 | from learning.models.rws import LayerStack 11 | from learning.models.sbn import SBN, SBNTop 12 | from learning.models.darn import DARN, DARNTop 13 | from learning.models.nade import NADE, NADETop 14 | 15 | n_vis = 28*28 16 | 17 | dataset = MNIST(fname="data/mnist_salakhutdinov.pkl.gz", which_set='salakhutdinov_train', n_datapoints=59000) 18 | smallset = MNIST(fname="data/mnist_salakhutdinov.pkl.gz", which_set='salakhutdinov_valid', n_datapoints=100) 19 | valiset = MNIST(fname="data/mnist_salakhutdinov.pkl.gz", which_set='salakhutdinov_valid', n_datapoints=1000) 20 | testset = MNIST(fname="data/mnist_salakhutdinov.pkl.gz", which_set='test', n_datapoints=10000) 21 | 22 | p_layers=[ 23 | DARN( 24 | n_X=n_vis, 25 | n_Y=200, 26 | ), 27 | DARNTop( 28 | n_X=200, 29 | ), 30 | ] 31 | 32 | q_layers=[ 33 | CNADE( 34 | n_Y=n_vis, 35 | n_X=200, 36 | n_hid=200, 37 | ), 38 | ] 39 | 40 | model = LayerStack( 41 | p_layers=p_layers, 42 | q_layers=q_layers, 43 | ) 44 | 45 | trainer = Trainer( 46 | n_samples=5, 47 | learning_rate_p=1e-3, 48 | learning_rate_q=1e-3, 49 | learning_rate_s=1e-3, 50 | layer_discount=1.0, 51 | batch_size=25, 52 | dataset=dataset, 53 | model=model, 54 | termination=EarlyStopping(), 55 | #step_monitors=[MonitorLL(data=smallset, n_samples=[1, 5, 25, 100])], 56 | epoch_monitors=[MonitorLL(data=valiset, n_samples=[100]), DLogModelParams(), SampleFromP(n_samples=100)], 57 | final_monitors=[MonitorLL(data=testset, n_samples=[1, 5, 10, 25, 100, 500])], 58 | monitor_nth_step=100, 59 | ) 60 | -------------------------------------------------------------------------------- /mnist/param-nade-nade-200.py: -------------------------------------------------------------------------------- 1 | # 2 | 3 | import numpy as np 4 | 5 | from learning.dataset import BarsData, FromModel, MNIST 6 | from learning.preproc import PermuteColumns 7 | from learning.training import Trainer 8 | from learning.termination import LogLikelihoodIncrease, EarlyStopping 9 | from learning.monitor import MonitorLL, DLogModelParams, SampleFromP 10 | 11 | from learning.models.rws import LayerStack 12 | from learning.models.sbn import SBN, SBNTop 13 | from learning.models.darn import DARN, DARNTop 14 | from learning.models.nade import NADE, NADETop 15 | 16 | n_vis = 28*28 17 | 18 | permute = PermuteColumns() 19 | dataset = MNIST(fname="mnist_salakhutdinov.pkl.gz", which_set='salakhutdinov_train', preproc=[permute], n_datapoints=50000) 20 | valiset = MNIST(fname="mnist_salakhutdinov.pkl.gz", which_set='salakhutdinov_valid', preproc=[permute], n_datapoints=1000) 21 | testset = MNIST(fname="mnist_salakhutdinov.pkl.gz", which_set='test', preproc=[permute], n_datapoints=10000) 22 | 23 | p_layers=[ 24 | NADE( 25 | n_X=n_vis, 26 | n_Y=200, 27 | clamp_sigmoid=True, 28 | ), 29 | NADETop( 30 | n_X=200, 31 | clamp_sigmoid=True, 32 | ), 33 | ] 34 | 35 | q_layers=[ 36 | NADE( 37 | n_Y=n_vis, 38 | n_X=200, 39 | clamp_sigmoid=True, 40 | ) 41 | ] 42 | 43 | model = LayerStack( 44 | p_layers=p_layers, 45 | q_layers=q_layers, 46 | ) 47 | 48 | trainer = Trainer( 49 | n_samples=5, 50 | learning_rate_p=1e-3, 51 | learning_rate_q=1e-3, 52 | learning_rate_s=1e-3, 53 | layer_discount=1.0, 54 | batch_size=25, 55 | dataset=dataset, 56 | model=model, 57 | termination=EarlyStopping(min_epochs=250, max_epochs=250), 58 | #step_monitors=[MonitorLL(data=smallset, n_samples=[1, 5, 25, 100])], 59 | epoch_monitors=[ 60 | DLogModelParams(), 61 | SampleFromP(n_samples=100) 62 | MonitorLL(name="valiset", data=valiset, n_samples=[100]), 63 | ], 64 | final_monitors=[ 65 | MonitorLL(name="final-valiset", data=valiset, n_samples=[1, 5, 10, 25, 100, 500, 1000]), 66 | MonitorLL(name="final-testset", data=testset, n_samples=[1, 5, 10, 25, 100, 500, 1000]), 67 | ], 68 | #monitor_nth_step=100, 69 | ) 70 | -------------------------------------------------------------------------------- /mnist/rerun-monitors.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from __future__ import division, print_function 4 | 5 | import sys 6 | sys.path.append("../") 7 | 8 | import os 9 | import os.path 10 | import logging 11 | from time import time 12 | import cPickle as pickle 13 | 14 | import numpy as np 15 | 16 | logger = logging.getLogger() 17 | 18 | 19 | def find_LL_channel(h5): 20 | """ Find and load some LL dataset that we want to use. 21 | 22 | Returns 23 | ------- 24 | datast name [str] or raise a ValueError 25 | """ 26 | LL_candidates = [ 27 | 'valiset.spl100.LL', 'valiset.spl25.LL', 'valiset.spl10.LL', 'valiset.spl5.LL', 28 | 'dataset.spl100.LL', 'dataset.spl25.LL', 'dataset.spl10.LL', 'dataset.spl5.LL' 29 | ] 30 | for name in LL_candidates: 31 | if name in h5: 32 | return name 33 | raise ValueError("Could not find LL dataset") 34 | 35 | 36 | def run_monitors(model, monitors): 37 | for m in monitors: 38 | m.on_iter(model) 39 | 40 | def rerun_monitors(args): 41 | from learning.utils.datalog import dlog, StoreToH5, TextPrinter 42 | 43 | from learning.experiment import Experiment 44 | from learning.monitor import MonitorLL, DLogModelParams, SampleFromP 45 | from learning.monitor.llbound import LLBound 46 | from learning.dataset import MNIST 47 | from learning.preproc import PermuteColumns 48 | 49 | from learning.rws import LayerStack 50 | from learning.sbn import SBN, SBNTop 51 | from learning.darn import DARN, DARNTop 52 | from learning.nade import NADE, NADETop 53 | 54 | import h5py 55 | 56 | 57 | logger.debug("Arguments %s" % args) 58 | tags = [] 59 | 60 | 61 | # Layer models 62 | layer_models = { 63 | "sbn" : (SBN, SBNTop), 64 | "darn": (DARN, DARNTop), 65 | "nade": (NADE, NADETop), 66 | } 67 | 68 | if not args.p_model in layer_models: 69 | raise "Unknown P-layer model %s" % args.p_model 70 | p_layer, p_top = layer_models[args.p_model] 71 | 72 | if not args.q_model in layer_models: 73 | raise "Unknown P-layer model %s" % args.p_model 74 | q_layer, q_top = layer_models[args.q_model] 75 | 76 | # Layer sizes 77 | layer_sizes = [int(s) for s in args.layer_sizes.split(",")] 78 | 79 | n_X = 28*28 80 | 81 | p_layers = [] 82 | q_layers = [] 83 | 84 | for ls in layer_sizes: 85 | n_Y = ls 86 | p_layers.append( 87 | p_layer(n_X=n_X, n_Y=n_Y, clamp_sigmoid=True) 88 | ) 89 | q_layers.append( 90 | q_layer(n_X=n_Y, n_Y=n_X) 91 | ) 92 | n_X = n_Y 93 | p_layers.append( p_top(n_X=n_X, clamp_sigmoid=True) ) 94 | 95 | 96 | model = LayerStack( 97 | p_layers=p_layers, 98 | q_layers=q_layers 99 | ) 100 | model.setup() 101 | 102 | # Dataset 103 | if args.shuffle: 104 | np.random.seed(23) 105 | preproc = [PermuteColumns()] 106 | tags += ["shuffle"] 107 | else: 108 | preproc = [] 109 | 110 | tags.sort() 111 | 112 | expname = args.cont 113 | if expname[-1] == "/": 114 | expname = expname[:-1] 115 | 116 | logger.info("Loading dataset...") 117 | testset = MNIST(fname="mnist_salakhutdinov.pkl.gz", which_set='test', preproc=preproc, n_datapoints=10000) 118 | 119 | #----------------------------------------------------------------------------- 120 | logger.info("Setting up monitors...") 121 | #monitors = [MonitorLL(data=testset, n_samples=[1, 5, 10, 25, 100, 500, 1000, 10000, 100000])] 122 | #monitors = [MonitorLL(data=testset, n_samples=[1, 5, 10, 25, 100, 500, 1000]), LLBound(data=testset, n_samples=[1, 5, 10, 25, 100, 500, 1000])] 123 | monitors = [MonitorLL(data=testset, n_samples=[1, 5, 10, 25, 100]), LLBound(data=testset, n_samples=[1, 5, 10, 25, 100])] 124 | #monitors = [BootstrapLL(data=testset, n_samples=[1, 5, 10, 25, 100, 500, 1000])] 125 | #monitors = [MonitorLL(data=testset, n_samples=[500,])] 126 | #monitors = [SampleFromP(n_samples=200)] 127 | 128 | #----------------------------------------------------------------------------- 129 | result_dir = "reruns/%s" % os.path.basename(expname) 130 | results_fname = result_dir+"/results.h5" 131 | logger.info("Output logging to %s" % result_dir) 132 | os.makedirs(result_dir) 133 | dlog.set_handler("*", StoreToH5, results_fname) 134 | 135 | fname = args.cont + "/results.h5" 136 | logger.info("Loading from %s" % fname) 137 | with h5py.File(fname, "r") as h5: 138 | 139 | # Find a validation LL to report... 140 | LL_dataset = find_LL_channel(h5) 141 | LL = h5[LL_dataset][:] 142 | 143 | best_rows = list(np.argsort(-LL)[:args.best]) 144 | 145 | logger.info("Read LL from '%s'" % LL_dataset) 146 | logger.info("Final validation LL: %5.2f (iteration %d)" % (LL[-1], LL.shape[0])) 147 | for row in best_rows: 148 | logger.info(" validation LL: %5.2f (iteration %d)" % (LL[row], row)) 149 | 150 | for m in monitors: 151 | m.on_init(model) 152 | 153 | rows = [-1] + best_rows 154 | for row in rows: 155 | logger.info("Loading model (row %d)..." % -1) 156 | logger.info("LL on validation set: %f5.2" % LL[-1]) 157 | model.model_params_from_h5(h5, row=-1) 158 | run_monitors(model, monitors) 159 | 160 | logger.info("Finished.") 161 | 162 | #experiment.print_summary() 163 | 164 | #============================================================================= 165 | if __name__ == "__main__": 166 | import argparse 167 | 168 | parser = argparse.ArgumentParser() 169 | parser.add_argument('--verbose', '-v', action='count') 170 | parser.add_argument('--shuffle', action='store_true', default=False) 171 | parser.add_argument('--best', type=int, default=1, 172 | help="Scan for the N best iterations additionally to the last one") 173 | parser.add_argument('cont', 174 | help="Continue a previous in result_dir") 175 | parser.add_argument('p_model', default="SBN", 176 | help="SBN, DARN or NADE (default: SBN") 177 | parser.add_argument('q_model', default="SBN", 178 | help="SBN, DARN or NADE (default: SBN") 179 | parser.add_argument('layer_sizes', default="200,200,10", 180 | help="Comma seperated list of sizes. Layer cosest to the data comes first") 181 | args = parser.parse_args() 182 | 183 | FORMAT = '[%(asctime)s] %(module)-15s %(message)s' 184 | DATEFMT = "%H:%M:%S" 185 | logging.basicConfig(format=FORMAT, datefmt=DATEFMT, level=logging.INFO) 186 | 187 | rerun_monitors(args) 188 | -------------------------------------------------------------------------------- /mnist/run-mnist: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | 4 | from __future__ import division 5 | 6 | import sys 7 | sys.path.append("../") 8 | 9 | import logging 10 | from time import time 11 | import cPickle as pickle 12 | 13 | import numpy as np 14 | 15 | logger = logging.getLogger() 16 | 17 | 18 | def run_experiment(args): 19 | from learning.experiment import Experiment 20 | from learning.training import Trainer 21 | from learning.termination import EarlyStopping 22 | from learning.monitor import MonitorLL, DLogModelParams, SampleFromP 23 | from learning.monitor.bootstrap import BootstrapLL 24 | from learning.dataset import MNIST 25 | from learning.preproc import PermuteColumns, Binarize 26 | 27 | from learning.models.rws import LayerStack 28 | from learning.models.sbn import SBN, SBNTop 29 | from learning.models.dsbn import DSBN 30 | from learning.models.darn import DARN, DARNTop 31 | from learning.models.nade import NADE, NADETop 32 | 33 | np.set_printoptions(precision=2) 34 | 35 | logger.debug("Arguments %s" % args) 36 | tags = [] 37 | 38 | np.random.seed(23) 39 | 40 | # Layer models 41 | layer_models = { 42 | "sbn" : (SBN, SBNTop), 43 | "dsbn": (DSBN, SBNTop), 44 | "darn": (DARN, DARNTop), 45 | "nade": (NADE, NADETop), 46 | } 47 | 48 | if not args.p_model in layer_models: 49 | raise "Unknown P-layer model %s" % args.p_model 50 | p_layer, p_top = layer_models[args.p_model] 51 | 52 | if not args.q_model in layer_models: 53 | raise "Unknown P-layer model %s" % args.p_model 54 | q_layer, q_top = layer_models[args.q_model] 55 | 56 | # n_samples to evaluate model 57 | n_samples_epoch = [1, 5, 25, 100] 58 | n_samples_final = [1, 5, 10, 25, 100, 500, 1000, 10000, 100000] 59 | if (args.p_model in ['darn', 'nade']) or (args.q_model in ['darn', 'nade']): 60 | n_samples_epoch = [1, 5, 25] 61 | n_samples_final = [1, 5, 10, 25, 100, 500] 62 | 63 | 64 | 65 | # Layer sizes 66 | layer_sizes = [int(s) for s in args.layer_sizes.split(",")] 67 | 68 | n_X = 28*28 69 | 70 | p_layers = [] 71 | q_layers = [] 72 | 73 | for ls in layer_sizes: 74 | n_Y = ls 75 | p_layers.append( 76 | p_layer(n_X=n_X, n_Y=n_Y) 77 | ) 78 | q_layers.append( 79 | q_layer(n_X=n_Y, n_Y=n_X) 80 | ) 81 | n_X = n_Y 82 | p_layers.append( p_top(n_X=n_X) ) 83 | 84 | 85 | model = LayerStack( 86 | p_layers=p_layers, 87 | q_layers=q_layers 88 | ) 89 | model.setup() 90 | 91 | # Learning rate 92 | def lr_tag(value): 93 | """ Convert a float into a short tag-usable string representation. E.g.: 94 | 0.1 -> 11 95 | 0.01 -> 12 96 | 0.001 -> 13 97 | 0.005 -> 53 98 | """ 99 | if value == 0.0: 100 | return "00" 101 | exp = np.floor(np.log10(value)) 102 | leading = ("%e"%value)[0] 103 | return "%s%d" % (leading, -exp) 104 | 105 | lr_base = args.lr 106 | tags += ["lr"+lr_tag(lr_base)] 107 | lr_p = args.lr_p 108 | lr_q = args.lr_q 109 | lr_s = args.lr_s 110 | if lr_p is None: 111 | lr_p = lr_base 112 | else: 113 | tags += ["lp"+lr_tag(lr_p)] 114 | if lr_q is None: 115 | lr_q = lr_base 116 | else: 117 | tags += ["lq"+lr_tag(lr_q)] 118 | if lr_s is None: 119 | lr_s = lr_base 120 | else: 121 | tags += ["ls"+lr_tag(lr_s)] 122 | 123 | # Layer discount 124 | if args.ldiscount != 1.0: 125 | tags += ["ldiscount"] 126 | 127 | # LR decay 128 | if args.lrdecay != 1.0: 129 | tags += ["lrdecay"+lr_tag(args.lrdecay-1.)] 130 | 131 | # Samples 132 | n_samples = args.samples 133 | tags += ["spl%d"%n_samples] 134 | 135 | # Batch size 136 | batch_size = args.batchsize 137 | tags += ["bs%d"%batch_size] 138 | 139 | # Sleep interleave 140 | sleep_interleave = args.sleep_interleave 141 | tags += ["si%d"%sleep_interleave] 142 | 143 | # Dataset 144 | if args.shuffle: 145 | np.random.seed(23) 146 | preproc = [PermuteColumns()] 147 | tags += ["shuffle"] 148 | else: 149 | preproc = [] 150 | 151 | if args.rebinarize: 152 | binarize_preproc = preproc + [Binarize(late=True)] 153 | dataset = MNIST(which_set='train', preproc=binarize_preproc, n_datapoints=50000) 154 | valiset = MNIST(which_set='valid', preproc=binarize_preproc, n_datapoints=10000) 155 | testset = MNIST(fname="mnist_salakhutdinov.pkl.gz", which_set='test', preproc=preproc, n_datapoints=10000) 156 | tags += ["rb"] 157 | else: 158 | dataset = MNIST(fname="mnist_salakhutdinov.pkl.gz", which_set='train', preproc=preproc, n_datapoints=50000) 159 | valiset = MNIST(fname="mnist_salakhutdinov.pkl.gz", which_set='valid', preproc=preproc, n_datapoints=10000) 160 | testset = MNIST(fname="mnist_salakhutdinov.pkl.gz", which_set='test', preproc=preproc, n_datapoints=10000) 161 | 162 | if args.lookahead != 10: 163 | tags += ["lah%d" % args.lookahead] 164 | 165 | tags.sort() 166 | expname = "%s-%s-%s-%s"% ("-".join(tags), args.p_model, args.q_model, "-".join([str(s) for s in layer_sizes])) 167 | 168 | logger.info("Running %s" % expname) 169 | 170 | 171 | trainer = Trainer( 172 | batch_size=batch_size, 173 | n_samples=n_samples, 174 | sleep_interleave=sleep_interleave, 175 | learning_rate_p=lr_p, 176 | learning_rate_q=lr_q, 177 | learning_rate_s=lr_s, 178 | layer_discount=args.ldiscount, 179 | lr_decay=args.lrdecay, 180 | dataset=dataset, 181 | model=model, 182 | termination=EarlyStopping(lookahead=args.lookahead, min_epochs=10), 183 | epoch_monitors=[ 184 | DLogModelParams(), 185 | SampleFromP(n_samples=100), 186 | MonitorLL(name="valiset", data=valiset, n_samples=n_samples_epoch), 187 | ], 188 | final_monitors=[ 189 | MonitorLL(name="final-valiset", data=valiset, n_samples=n_samples_final), 190 | MonitorLL(name="final-testset", data=testset, n_samples=n_samples_final), 191 | ], 192 | ) 193 | 194 | experiment = Experiment() 195 | experiment.trainer = trainer 196 | experiment.setup_output_dir(expname) 197 | experiment.print_summary() 198 | experiment.setup_logging() 199 | 200 | if args.cont is None: 201 | experiment.run_experiment() 202 | else: 203 | logger.info("Continuing experiment %s ...." % args.cont) 204 | experiment.continue_experiment(args.cont+"/results.h5", row=-1) 205 | 206 | logger.info("Finished. Wrinting metadata") 207 | 208 | experiment.print_summary() 209 | 210 | #============================================================================= 211 | if __name__ == "__main__": 212 | import argparse 213 | 214 | parser = argparse.ArgumentParser() 215 | parser.add_argument('--verbose', '-v', action='count') 216 | parser.add_argument('--shuffle', action='store_true', default=False) 217 | parser.add_argument('--cont', nargs='?', default=None, 218 | help="Continue a previous in result_dir") 219 | parser.add_argument('--samples', default=10, type=int, 220 | help="Number of training samples (default: 10)") 221 | parser.add_argument('--batchsize', default=100, type=int, 222 | help="Mini batch size (default: 100)") 223 | parser.add_argument('--sleep-interleave', '--si', default=2, type=int, 224 | help="Sleep interleave (default: 2)") 225 | parser.add_argument('--lr', default=1e-3, type=float, help="Learning rate (default: 1e-3)") 226 | parser.add_argument('--lr_p', default=None, type=float, help="p learning rate") 227 | parser.add_argument('--lr_q', default=None, type=float, help="wake-q-learing rate") 228 | parser.add_argument('--lr_s', default=None, type=float, help="sleep-q-learning rate") 229 | parser.add_argument('--lrdecay', default=1., type=float, help="learning rate decay") 230 | parser.add_argument('--ldiscount', default=1., type=float, help="layer_discount") 231 | parser.add_argument('--rebinarize', default=False, action="store_true", 232 | help="Resample binary MNIST from orig. dataset during traaining"); 233 | parser.add_argument('--lookahead', default=10, type=int, 234 | help="Termination criteria: # epochs without LL increase") 235 | parser.add_argument('p_model', default="SBN", 236 | help="SBN, DARN or NADE (default: SBN") 237 | parser.add_argument('q_model', default="SBN", 238 | help="SBN, DARN or NADE (default: SBN") 239 | parser.add_argument('layer_sizes', default="200,200,10", 240 | help="Comma seperated list of sizes. Layer cosest to the data comes first") 241 | args = parser.parse_args() 242 | 243 | FORMAT = '[%(asctime)s] %(name)-15s %(message)s' 244 | DATEFMT = "%H:%M:%S" 245 | logging.basicConfig(format=FORMAT, datefmt=DATEFMT, level=logging.INFO) 246 | 247 | run_experiment(args) 248 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | h5py >= 2.0 2 | nose >= 1.2 3 | numpy >= 1.8.0 4 | progressbar2 >= 2.6.0 5 | recordtype >= 1.0 6 | scipy >= 0.14.0 7 | six >= 1.1.0 8 | theano >= 0.5 9 | -------------------------------------------------------------------------------- /run-exp.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from __future__ import division 4 | 5 | import sys 6 | 7 | import logging 8 | import numpy as np 9 | 10 | #============================================================================= 11 | if __name__ == "__main__": 12 | import argparse 13 | 14 | logger = logging.getLogger(__name__) 15 | 16 | parser = argparse.ArgumentParser() 17 | parser.add_argument('--verbose', '-v', action='count') 18 | parser.add_argument('--overwrite', action='store_true') 19 | parser.add_argument('--name', "-n", default=None) 20 | parser.add_argument('param_file') 21 | parser.add_argument('result_dir', nargs='?', default=None, 22 | help="Continue a previous in result_dir") 23 | args = parser.parse_args() 24 | 25 | import theano 26 | import theano.tensor as T 27 | 28 | from learning.utils.datalog import dlog, StoreToH5, TextPrinter 29 | from learning.experiment import Experiment 30 | 31 | FORMAT = '[%(asctime)s] %(name)-15s %(message)s' 32 | DATEFMT = "%H:%M:%S" 33 | logging.basicConfig(format=FORMAT, datefmt=DATEFMT, level=logging.INFO) 34 | 35 | if args.name is None: 36 | out_name = args.param_file 37 | else: 38 | out_name = args.name 39 | 40 | np.random.seed(23) 41 | experiment = Experiment.from_param_file(args.param_file) 42 | experiment.setup_output_dir(out_name, with_suffix=(not args.overwrite)) 43 | experiment.setup_logging() 44 | experiment.print_summary() 45 | 46 | if args.result_dir is None: 47 | experiment.run_experiment() 48 | else: 49 | experiment.continue_experiment(args.result_dir+"/results.h5") 50 | 51 | logger.info("Finished. Exiting") 52 | experiment.print_summary() 53 | -------------------------------------------------------------------------------- /shippable.yml: -------------------------------------------------------------------------------- 1 | build_image: jbornschein/theano 2 | 3 | language: python 4 | 5 | python: 6 | - "2.7" 7 | 8 | install: 9 | - pip install -r requirements.txt 10 | 11 | script: 12 | - nosetests -v 13 | -------------------------------------------------------------------------------- /show-W0.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from __future__ import division 4 | 5 | import sys 6 | 7 | import logging 8 | from time import time 9 | import cPickle as pickle 10 | 11 | import numpy as np 12 | import h5py 13 | 14 | 15 | import pylab 16 | #import theano 17 | #import theano.tensor as T 18 | 19 | _logger = logging.getLogger() 20 | 21 | #============================================================================= 22 | if __name__ == "__main__": 23 | import argparse 24 | 25 | logger = logging.getLogger(__name__) 26 | 27 | parser = argparse.ArgumentParser() 28 | parser.add_argument('--verbose', '-v', action="store_true", default=False) 29 | parser.add_argument('--param', default="model.L0.P.W") 30 | parser.add_argument('--transpose', '-T', action="store_true", default=False) 31 | parser.add_argument('--shape', default="28,28", 32 | help="Shape for each samples (default: 28,28)") 33 | parser.add_argument('--row', default=-1, type=int, 34 | help="Iteration to visualize") 35 | parser.add_argument('out_dir', nargs=1) 36 | args = parser.parse_args() 37 | 38 | if args.verbose: 39 | level = logging.DEBUG 40 | else: 41 | level = logging.INFO 42 | 43 | FORMAT = '[%(asctime)s] %(message)s' 44 | DATEFMT = "%H:%M:%S" 45 | logging.basicConfig(format=FORMAT, datefmt=DATEFMT, level=level) 46 | 47 | fname = args.out_dir[0]+"/results.h5" 48 | param = args.param 49 | try: 50 | with h5py.File(fname, "r") as h5: 51 | 52 | logger.debug("Keys:") 53 | for k, v in h5.iteritems(): 54 | logger.debug(" %-30s %s" % (k, v.shape)) 55 | 56 | row = args.row 57 | total_rows = h5[param].shape[0] 58 | logger.info("Visualizing row %d of %d..." % (args.row, total_rows)) 59 | 60 | W0 = h5[param][row,:,:] 61 | if args.transpose: 62 | W0 = W0.T 63 | H, D = W0.shape 64 | 65 | if 'preproc.permute_columns.permutation_inv' in h5: 66 | logger.debug("Experiment used PermuteColumns preproc -- loading inv_perm") 67 | perm_inv = h5['preproc.permute_columns.permutation_inv'][:] 68 | else: 69 | perm_inv = np.arange(D) 70 | 71 | except KeyError, e: 72 | logger.info("Failed to read data from %s: %s" % (fname, e)) 73 | exit(1) 74 | 75 | except IOError, e: 76 | logger.info("Failed to open %s: %s" % (fname, e)) 77 | exit(1) 78 | 79 | shape = tuple([int(s) for s in args.shape.split(",")]) 80 | 81 | width = int(np.sqrt(H)) 82 | height = width 83 | if width*height < H: 84 | width = width + 1 85 | if width*height < H: 86 | height = height + 1 87 | 88 | logger.debug("Using shape: %s -- %s" % (args.shape, shape)) 89 | assert len(shape) == 2 90 | 91 | pylab.figure() 92 | for h in xrange(H): 93 | pylab.subplot(width, height, h+1) 94 | pylab.imshow( W0[h,perm_inv].reshape(shape), interpolation='nearest') 95 | pylab.gray() 96 | pylab.axis('off') 97 | 98 | pylab.legend(loc="lower right") 99 | pylab.show(block=True) 100 | 101 | -------------------------------------------------------------------------------- /show-layerwise.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from __future__ import division 4 | 5 | import sys 6 | 7 | import logging 8 | from time import time 9 | import cPickle as pickle 10 | 11 | import numpy as np 12 | import h5py 13 | 14 | 15 | import pylab 16 | #import theano 17 | #import theano.tensor as T 18 | 19 | _logger = logging.getLogger() 20 | 21 | #============================================================================= 22 | if __name__ == "__main__": 23 | import argparse 24 | 25 | logger = logging.getLogger(__name__) 26 | 27 | parser = argparse.ArgumentParser() 28 | parser.add_argument('--verbose', '-v', action="store_true", default=False) 29 | parser.add_argument('--dataset', '-d', default="valiset") 30 | parser.add_argument('--samples', '-s', default=100) 31 | parser.add_argument('--stacked', action="store_true", default=False) 32 | parser.add_argument('out_dir', nargs=1) 33 | args = parser.parse_args() 34 | 35 | if args.verbose: 36 | level = logging.DEBUG 37 | else: 38 | level = logging.INFO 39 | 40 | FORMAT = '[%(asctime)s] %(message)s' 41 | DATEFMT = "%H:%M:%S" 42 | logging.basicConfig(format=FORMAT, datefmt=DATEFMT, level=level) 43 | 44 | fname = args.out_dir[0]+"/results.h5" 45 | table = "%s.spl%d.Hp" % (args.dataset, args.samples) 46 | 47 | try: 48 | with h5py.File(fname, "r") as h5: 49 | Hp = h5[table][:] 50 | 51 | except KeyError, e: 52 | logger.info("Failed to read data from %s: %s" % (fname, e)) 53 | exit(1) 54 | 55 | except IOError, e: 56 | logger.info("Failed to open %s fname: %s" % (fname, e)) 57 | exit(1) 58 | 59 | epochs = Hp.shape[0] 60 | n_layers = Hp.shape[1] 61 | 62 | if args.stacked: 63 | ylim = 2*Hp[-1].sum() 64 | pylab.ylim([ylim, 0]) 65 | pylab.stackplot(np.arange(epochs), Hp[:,::-1].T) 66 | else: 67 | ylim = 2*Hp[-1].min() 68 | pylab.ylim([ylim, 0]) 69 | pylab.plot(Hp) 70 | 71 | #pylab.figsize(12, 8) 72 | pylab.xlabel("Epochs") 73 | #pylab.ylabel("avg_{x~testdata} log( E_{h~q}[p(x,h)/q(h|x)]") 74 | pylab.legend(["layer %d" % i for i in xrange(n_layers)], loc="lower right") 75 | pylab.show(block=True) 76 | 77 | -------------------------------------------------------------------------------- /show-ll.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from __future__ import division 4 | 5 | import sys 6 | 7 | import logging 8 | from time import time 9 | import cPickle as pickle 10 | 11 | import numpy as np 12 | import h5py 13 | 14 | 15 | import pylab 16 | #import theano 17 | #import theano.tensor as T 18 | 19 | _logger = logging.getLogger() 20 | 21 | #============================================================================= 22 | if __name__ == "__main__": 23 | import argparse 24 | 25 | logger = logging.getLogger(__name__) 26 | 27 | parser = argparse.ArgumentParser() 28 | parser.add_argument('--verbose', '-v', action="store_true", default=False) 29 | parser.add_argument('--dataset', '-d', default="valiset") 30 | parser.add_argument('--samples', '-s', default=100, type=int) 31 | parser.add_argument('out_dir', nargs='+') 32 | args = parser.parse_args() 33 | 34 | if args.verbose: 35 | level = logging.DEBUG 36 | else: 37 | level = logging.INFO 38 | 39 | FORMAT = '[%(asctime)s] %(message)s' 40 | DATEFMT = "%H:%M:%S" 41 | logging.basicConfig(format=FORMAT, datefmt=DATEFMT, level=level) 42 | 43 | ylim = +np.inf 44 | for out_dir in args.out_dir: 45 | fname = out_dir+"/results.h5" 46 | 47 | table = "%s.spl%d.LL" % (args.dataset, args.samples) 48 | 49 | try: 50 | with h5py.File(fname, "r") as h5: 51 | 52 | print "==== %s ====" % out_dir 53 | logger.debug("Keys:") 54 | for k, v in h5.iteritems(): 55 | logger.debug(" %-30s %s" % (k, v.shape)) 56 | 57 | 58 | LL = h5[table][:] 59 | LL = LL[np.isfinite(LL)] # filter NaNs and INFs 60 | LL_final = LL[-1] 61 | ylim = min(ylim, 2*LL_final) 62 | 63 | pylab.plot(LL, label=out_dir[-20:]) 64 | print "Final LL [%d samples]: %.2f" % (args.samples, LL_final) 65 | 66 | for spl in [10000, 5000, 1000, 500, 250, 100, 50, 25, 10]: 67 | try: 68 | final_valiset = h5["final-valiset.spl%d.LL"%spl][-1] 69 | final_testset = h5["final-testset.spl%d.LL"%spl][-1] 70 | 71 | print "valiset-final [%d samples]: %.2f" % (spl, final_valiset) 72 | print "testset-final [%d samples]: %.2f" % (spl, final_testset) 73 | break 74 | except KeyError as e: 75 | continue 76 | 77 | 78 | except KeyError as e: 79 | logger.info("Failed to read data from %s: %s" % (fname, e)) 80 | 81 | except IOError as e: 82 | logger.info("Failed to open %s fname: %s" % (fname, e)) 83 | 84 | 85 | #pylab.figsize(12, 8) 86 | if ylim < 0: 87 | pylab.ylim([ylim, 0]) 88 | else: 89 | pylab.ylim([0, ylim]) 90 | pylab.xlabel("Epochs") 91 | pylab.ylabel("avg_{x~testdata} log( E_{h~q}[p(x,h)/q(h|x)]") 92 | pylab.legend(loc="lower right") 93 | pylab.show(block=True) 94 | 95 | -------------------------------------------------------------------------------- /show-param-stats.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from __future__ import division 4 | 5 | import sys 6 | 7 | import logging 8 | from time import time 9 | import cPickle as pickle 10 | 11 | import numpy as np 12 | import h5py 13 | 14 | 15 | import pylab 16 | #import theano 17 | #import theano.tensor as T 18 | 19 | _logger = logging.getLogger() 20 | 21 | #============================================================================= 22 | if __name__ == "__main__": 23 | import argparse 24 | 25 | logger = logging.getLogger(__name__) 26 | 27 | parser = argparse.ArgumentParser() 28 | parser.add_argument('--verbose', '-v', action="store_true", default=False) 29 | parser.add_argument('--filter', type=str, default=None) 30 | parser.add_argument('results', nargs='+') 31 | args = parser.parse_args() 32 | 33 | if args.verbose: 34 | level = logging.DEBUG 35 | else: 36 | level = logging.INFO 37 | 38 | FORMAT = '[%(asctime)s] %(message)s' 39 | DATEFMT = "%H:%M:%S" 40 | logging.basicConfig(format=FORMAT, datefmt=DATEFMT, level=level) 41 | 42 | ylim = +np.inf 43 | for result_dir in args.results: 44 | fname = result_dir+"/results.h5" 45 | 46 | try: 47 | with h5py.File(fname, "r") as h5: 48 | 49 | print "==== %s ====" % result_dir 50 | logger.debug("Keys:") 51 | for k, v in h5.iteritems(): 52 | logger.debug(" %-30s %s" % (k, v.shape)) 53 | 54 | for k,v in h5.iteritems(): 55 | if not 'model.' in k: 56 | continue 57 | 58 | if not args.filter is None: 59 | if args.filter not in k: 60 | continue 61 | 62 | values = v[:] 63 | iterations = v.shape[0] 64 | values = values.reshape( [iterations, -1] ) 65 | 66 | v_min = np.min(values, axis=1) 67 | v_max = np.max(values, axis=1) 68 | v_mean = np.mean(values, axis=1) 69 | 70 | pylab.errorbar(np.arange(iterations), v_mean, yerr=[v_max-v_mean, v_mean-v_min], label=k) 71 | 72 | except KeyError as e: 73 | logger.info("Failed to read data from %s: %s" % (fname, e)) 74 | 75 | except IOError as e: 76 | logger.info("Failed to open %s fname: %s" % (fname, e)) 77 | 78 | 79 | #pylab.figsize(12, 8) 80 | pylab.xlabel("Epochs") 81 | pylab.ylabel("") 82 | pylab.legend(loc="lower right") 83 | pylab.show(block=True) 84 | 85 | -------------------------------------------------------------------------------- /show-param-trajectory.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python2 2 | 3 | from __future__ import division, print_function 4 | 5 | import logging 6 | import h5py 7 | import numpy as np 8 | import tsne 9 | import pylab 10 | 11 | #x2 = tsne.bh_sne(x) 12 | 13 | 14 | if __name__ == "__main__": 15 | import sys 16 | import argparse 17 | 18 | logger = logging.getLogger(__name__) 19 | 20 | parser = argparse.ArgumentParser() 21 | parser.add_argument('--verbose', '-v', action="store_true", default=False) 22 | parser.add_argument('--param', type=str, default="L0.P.W_mu") 23 | parser.add_argument('result_dir', nargs='+') 24 | args = parser.parse_args() 25 | 26 | if args.verbose: 27 | level = logging.DEBUG 28 | else: 29 | level = logging.INFO 30 | 31 | FORMAT = '[%(asctime)s] %(message)s' 32 | DATEFMT = "%H:%M:%S" 33 | logging.basicConfig(format=FORMAT, datefmt=DATEFMT, level=level) 34 | 35 | P_all = None 36 | N_iter = [] 37 | for i, d in enumerate(args.result_dir): 38 | fname = d+"/results.h5" 39 | with h5py.File(fname, 'r') as h5: 40 | logger.debug("Keys:") 41 | for k, v in h5.iteritems(): 42 | logger.debug(" %-30s %s" % (k, v.shape)) 43 | 44 | key = "model." + args.param 45 | P = h5[key][:] 46 | 47 | n_iter = P.shape[0] 48 | P = P.reshape([n_iter, -1]) 49 | 50 | mask = np.isfinite(P).all(axis=1) 51 | P = P[mask] 52 | logger.info("%s: loaded %d iterations (%d contained NaNs)" % (d, mask.sum(), n_iter-mask.sum())) 53 | N_iter.append(P.shape[0]) 54 | 55 | if P_all is None: 56 | P_all = P 57 | else: 58 | P_all = np.concatenate([P_all, P]) 59 | 60 | P_all = P_all.astype(np.float) 61 | logger.info("Running T-SNE on %s" % str(P_all.shape)) 62 | 63 | P2_all = tsne.bh_sne(P_all, pca_d=None, perplexity=10, theta=0.5) 64 | 65 | for n_iter in N_iter: 66 | P2 = P2_all[:n_iter] 67 | P2_all = P2_all[n_iter:] 68 | 69 | c = np.linspace(0, 1, n_iter) 70 | pylab.scatter(P2[:,0], P2[:,1], c=c) 71 | pylab.show(block=True) 72 | 73 | -------------------------------------------------------------------------------- /show-samples.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from __future__ import division 4 | 5 | import sys 6 | 7 | import logging 8 | from time import time 9 | import cPickle as pickle 10 | 11 | import numpy as np 12 | import h5py 13 | 14 | 15 | import pylab 16 | #import theano 17 | #import theano.tensor as T 18 | 19 | _logger = logging.getLogger() 20 | 21 | #============================================================================= 22 | if __name__ == "__main__": 23 | import argparse 24 | 25 | logger = logging.getLogger(__name__) 26 | 27 | parser = argparse.ArgumentParser() 28 | parser.add_argument('--verbose', '-v', action="store_true", default=False) 29 | parser.add_argument('--shape', default="28,28", 30 | help="Shape for each samples (default: 28,28)") 31 | parser.add_argument('--nsamples', '-n', default=100, 32 | help="Number of samples to show") 33 | parser.add_argument('--sort', default=False, action="store_true", 34 | help="Sort samples according to their probability") 35 | parser.add_argument('--expected', default=False, action="store_true", 36 | help="Show per-pixel expectation rather than sampled values") 37 | parser.add_argument('out_dir', nargs=1) 38 | args = parser.parse_args() 39 | 40 | if args.verbose: 41 | level = logging.DEBUG 42 | else: 43 | level = logging.INFO 44 | 45 | FORMAT = '[%(asctime)s] %(message)s' 46 | DATEFMT = "%H:%M:%S" 47 | logging.basicConfig(format=FORMAT, datefmt=DATEFMT, level=level) 48 | 49 | fname = args.out_dir[0]+"/results.h5" 50 | try: 51 | with h5py.File(fname, "r") as h5: 52 | 53 | logger.debug("Keys:") 54 | for k, v in h5.iteritems(): 55 | logger.debug(" %-30s %s" % (k, v.shape)) 56 | 57 | if args.expected: 58 | samples = h5['SampleFromP.L0_expected'][-1,:,:] 59 | else: 60 | samples = h5['SampleFromP.L0'][-1,:,:] 61 | log_p = h5['SampleFromP.log_p'][-1,:] 62 | _, D = samples.shape 63 | 64 | if 'preproc.permute_columns.permutation_inv' in h5: 65 | logger.debug("Experiment used PermuteColumns preproc -- loading inv_perm") 66 | perm_inv = h5['preproc.permute_columns.permutation_inv'][:] 67 | else: 68 | perm_inv = np.arange(D) 69 | 70 | 71 | 72 | except KeyError, e: 73 | logger.info("Failed to read data from %s: %s" % (fname, e)) 74 | exit(1) 75 | 76 | except IOError, e: 77 | logger.info("Failed to open %s: %s" % (fname, e)) 78 | exit(1) 79 | 80 | shape = tuple([int(s) for s in args.shape.split(",")]) 81 | logger.debug("Using shape: %s -- %s" % (args.shape, shape)) 82 | assert len(shape) == 2 83 | 84 | if args.sort: 85 | idx = np.argsort(log_p)[::-1] 86 | samples = samples[idx] 87 | log_p = log_p[idx] 88 | 89 | pylab.figure() 90 | for i in xrange(args.nsamples): 91 | pylab.subplot(10, 10, i+1) 92 | pylab.imshow( samples[i,perm_inv].reshape(shape), interpolation='nearest') 93 | pylab.gray() 94 | pylab.axis('off') 95 | 96 | pylab.legend(loc="lower right") 97 | pylab.show(block=True) 98 | 99 | -------------------------------------------------------------------------------- /theanorc: -------------------------------------------------------------------------------- 1 | [global] 2 | floatX = float32 3 | 4 | [nvcc] 5 | fastmath = True 6 | 7 | [blas] 8 | ldflags = -lblas -lgfortran 9 | 10 | -------------------------------------------------------------------------------- /uci/run-uci: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | 4 | from __future__ import division 5 | 6 | import sys 7 | sys.path.append("../") 8 | 9 | import logging 10 | from time import time 11 | import cPickle as pickle 12 | 13 | import numpy as np 14 | 15 | _logger = logging.getLogger() 16 | 17 | 18 | def run_experiment(args): 19 | from learning.experiment import Experiment 20 | from learning.training import Trainer 21 | from learning.termination import EarlyStopping 22 | from learning.monitor import MonitorLL, DLogModelParams, SampleFromP 23 | from learning.dataset import FromH5 24 | from learning.preproc import PermuteColumns 25 | 26 | from learning.models.rws import LayerStack 27 | from learning.models.sbn import SBN, SBNTop 28 | from learning.models.darn import DARN, DARNTop 29 | from learning.models.nade import NADE, NADETop 30 | 31 | 32 | _logger.debug("Arguments %s" % args) 33 | 34 | tags = [] 35 | 36 | np.random.seed(23) 37 | 38 | # Dataset 39 | _logger.info("Dataset %s" % args.dataset) 40 | 41 | fname = "%s.h5" % args.dataset 42 | 43 | preproc = PermuteColumns() 44 | #dataset = FromH5(fname=fname, preproc=[preproc], table_X="train") 45 | #valiset = FromH5(fname=fname, preproc=[preproc], table_X="valid") 46 | #testset = FromH5(fname=fname, preproc=[preproc], table_X="test") 47 | dataset = FromH5(fname=fname, table_X="train") 48 | valiset = FromH5(fname=fname, table_X="valid") 49 | testset = FromH5(fname=fname, table_X="test") 50 | 51 | # Layer models 52 | layer_models = { 53 | "sbn" : (SBN, SBNTop), 54 | "darn": (DARN, DARNTop), 55 | "nade": (NADE, NADETop), 56 | } 57 | 58 | if not args.p_model in layer_models: 59 | raise "Unknown P-layer model %s" % args.p_model 60 | p_layer, p_top = layer_models[args.p_model] 61 | 62 | if not args.q_model in layer_models: 63 | raise "Unknown P-layer model %s" % args.p_model 64 | q_layer, q_top = layer_models[args.q_model] 65 | 66 | # Layer sizes 67 | layer_sizes = [int(s) for s in args.layer_sizes.split(",")] 68 | 69 | n_X = dataset.X.shape[1] 70 | 71 | p_layers = [] 72 | q_layers = [] 73 | 74 | for ls in layer_sizes: 75 | n_Y = ls 76 | p_layers.append( 77 | p_layer(n_X=n_X, n_Y=n_Y, clamp_sigmoid=True) 78 | ) 79 | q_layers.append( 80 | q_layer(n_X=n_Y, n_Y=n_X) 81 | ) 82 | n_X = n_Y 83 | p_layers.append( p_top(n_X=n_X, clamp_sigmoid=True) ) 84 | 85 | 86 | model = LayerStack( 87 | p_layers=p_layers, 88 | q_layers=q_layers 89 | ) 90 | model.setup() 91 | 92 | # Learning rate 93 | def lr_tag(value, prefix): 94 | exp = np.floor(np.log10(value)) 95 | leading = ("%e"%value)[0] 96 | return ["%s%s%d" % (prefix, leading, -exp)] 97 | 98 | lr_base = args.lr 99 | tags += lr_tag(lr_base, prefix="lr") 100 | lr_p = args.lr_p 101 | lr_q = args.lr_q 102 | lr_s = args.lr_s 103 | if lr_p is None: 104 | lr_p = lr_base 105 | else: 106 | tags += lr_tag(lr_p, prefix="lp") 107 | if lr_q is None: 108 | lr_q = lr_base 109 | else: 110 | tags += lr_tag(lr_q, prefix="lq") 111 | if lr_s is None: 112 | lr_s = lr_base 113 | else: 114 | tags += lr_tag(lr_s, prefix="ls") 115 | 116 | # Samples 117 | n_samples = args.samples 118 | tags += ["spl%d"%n_samples] 119 | 120 | # Batch size 121 | batch_size = args.batchsize 122 | tags += ["bs%d"%batch_size] 123 | 124 | # Sleep interleave 125 | sleep_interleave = args.sleep_interleave 126 | tags += ["si%d"%sleep_interleave] 127 | 128 | tags.sort() 129 | expname = "%s-%s-%s-%s-%s"% (args.dataset, "-".join(tags), args.p_model, args.q_model, "-".join([str(s) for s in layer_sizes])) 130 | 131 | _logger.info("Running %s" % expname) 132 | 133 | trainer = Trainer( 134 | batch_size=batch_size, 135 | n_samples=n_samples, 136 | sleep_interleave=sleep_interleave, 137 | learning_rate_p=lr_p, 138 | learning_rate_q=lr_q, 139 | learning_rate_s=lr_s, 140 | layer_discount=1.0, 141 | anneal=1., 142 | dataset=dataset, 143 | model=model, 144 | termination=EarlyStopping(lookahead=5, min_epochs=160), 145 | epoch_monitors=[ 146 | DLogModelParams(), 147 | SampleFromP(), 148 | MonitorLL(name="valiset", data=valiset, n_samples=[1, 5, 25, 100]), 149 | ], 150 | final_monitors=[ 151 | MonitorLL(name="final-valiset", data=valiset, n_samples=[1, 5, 10, 25, 100, 500, 1000]), 152 | MonitorLL(name="final-testset", data=testset, n_samples=[1, 5, 10, 25, 100, 500, 1000]), 153 | ], 154 | ) 155 | 156 | experiment = Experiment() 157 | experiment.trainer = trainer 158 | experiment.setup_output_dir(expname) 159 | experiment.print_summary() 160 | experiment.setup_logging() 161 | 162 | if args.cont is None: 163 | experiment.run_experiment() 164 | else: 165 | _logger.info("Continuing experiment %s ...." % args.cont) 166 | experiment.continue_experiment(args.cont+"/results.h5") 167 | 168 | experiment.print_summary() 169 | 170 | #============================================================================= 171 | if __name__ == "__main__": 172 | import argparse 173 | 174 | parser = argparse.ArgumentParser() 175 | parser.add_argument('--verbose', '-v', action='count') 176 | parser.add_argument('--cont', nargs='?', default=None, 177 | help="Continue a previous in result_dir") 178 | parser.add_argument('--samples', default=5, type=int, 179 | help="Number of training samples (default: 5)") 180 | parser.add_argument('--batchsize', default=25, type=int, 181 | help="Mini batch size (default: 25)") 182 | parser.add_argument('--sleep-interleave', '--si', default=2, type=int, 183 | help="Sleep interleave (default: 2)") 184 | parser.add_argument('--dataset', default="adult", type=str, 185 | help="Dataset to use") 186 | parser.add_argument('--lr', default=1e-3, type=float, help="Learning rate (default: 1e-3)") 187 | parser.add_argument('--lr_p', default=None, type=float, help="p learning rate") 188 | parser.add_argument('--lr_q', default=None, type=float, help="wake-q-learing rate") 189 | parser.add_argument('--lr_s', default=None, type=float, help="sleep-q-learning rate") 190 | parser.add_argument('p_model', default="SBN", 191 | help="SBN, DARN or NADE (default: SBN") 192 | parser.add_argument('q_model', default="SBN", 193 | help="SBN, DARN or NADE (default: SBN") 194 | parser.add_argument('layer_sizes', default="200,200,10", 195 | help="Comma seperated list of sizes. Layer cosest to the data comes first") 196 | args = parser.parse_args() 197 | 198 | FORMAT = '[%(asctime)s] %(name)-15s %(message)s' 199 | DATEFMT = "%H:%M:%S" 200 | logging.basicConfig(format=FORMAT, datefmt=DATEFMT, level=logging.INFO) 201 | 202 | run_experiment(args) 203 | --------------------------------------------------------------------------------