├── .gitignore ├── README.md ├── __init__.py ├── chopped.py ├── datasets ├── __init__.py ├── amazon.py ├── pca_amazon.py └── preprocess_amazon.py ├── experiments ├── analyze.py ├── check.py ├── compare_size.py ├── extract_ppf_data.py ├── plot.py ├── plot_all.py ├── plot_all_ppf.py ├── plot_best.py ├── plot_best_l2.py ├── plot_pff.py ├── plot_ppf_dropout.py ├── random_search_dropout_lwta_amazon │ ├── configure.py │ ├── launch.sh │ ├── make_launch.py │ ├── task_0_template.yaml │ ├── task_1_template.yaml │ └── worker.sh ├── random_search_dropout_lwta_mnist │ ├── configure.py │ ├── launch.sh │ ├── make_launch.py │ ├── task_0_template.yaml │ ├── task_1_template.yaml │ └── worker.sh ├── random_search_dropout_lwta_mnist_amazon │ ├── configure.py │ ├── launch.sh │ ├── make_launch.py │ ├── task_0_template.yaml │ ├── task_1_template.yaml │ └── worker.sh ├── random_search_dropout_maxout_amazon │ ├── configure.py │ ├── launch.sh │ ├── make_launch.py │ ├── mnist_0.yaml │ ├── mnist_1.yaml │ ├── task1_nested.yaml │ ├── task_0_template.yaml │ ├── task_1_template.yaml │ ├── task_2_template.yaml │ ├── task_3_template.yaml │ └── worker.sh ├── random_search_dropout_maxout_cifar │ ├── configure.py │ ├── launch.sh │ ├── make_launch.py │ ├── task_0_template.yaml │ ├── task_1_template.yaml │ ├── task_2_template.yaml │ └── worker.sh ├── random_search_dropout_maxout_mnist │ ├── configure.py │ ├── launch.sh │ ├── make_launch.py │ ├── task_0_template.yaml │ ├── task_1_template.yaml │ └── worker.sh ├── random_search_dropout_maxout_mnist_amazon │ ├── configure.py │ ├── launch.sh │ ├── local_launch.sh │ ├── local_launch2.sh │ ├── make_launch.py │ ├── task_0_template.yaml │ ├── task_1_template.yaml │ └── worker.sh ├── random_search_dropout_relu_amazon │ ├── configure.py │ ├── launch.sh │ ├── make_launch.py │ ├── task_0_template.yaml │ ├── task_1_template.yaml │ └── worker.sh ├── random_search_dropout_relu_mnist │ ├── launch.sh │ ├── make_launch.py │ ├── task_0_template.yaml │ ├── task_1_template.yaml │ └── worker.sh ├── random_search_dropout_relu_mnist_025 │ ├── configure.py │ ├── make_launch.py │ ├── task_0_template.yaml │ └── task_1_template.yaml ├── random_search_dropout_relu_mnist_075 │ ├── configure.py │ ├── make_launch.py │ ├── task_0_template.yaml │ └── task_1_template.yaml ├── random_search_dropout_relu_mnist_amazon │ ├── configure.py │ ├── launch.sh │ ├── local_launch.sh │ ├── local_launch2.sh │ ├── make_launch.py │ ├── task_0_template.yaml │ ├── task_1_template.yaml │ └── worker.sh ├── random_search_dropout_sigmoid_amazon │ ├── configure.py │ ├── launch.sh │ ├── task_0_template.yaml │ ├── task_1_template.yaml │ └── worker.sh ├── random_search_dropout_sigmoid_mnist │ ├── configure.py │ ├── launch.sh │ ├── task_0_template.yaml │ ├── task_1_template.yaml │ └── worker.sh ├── random_search_dropout_sigmoid_mnist_amazon │ ├── configure.py │ ├── launch.sh │ ├── local_launch.sh │ ├── local_launch2.sh │ ├── task_0_template.yaml │ ├── task_1_template.yaml │ └── worker.sh ├── random_search_sgd_lwta_amazon │ ├── configure.py │ ├── launch.sh │ ├── make_launch.py │ ├── task_0_template.yaml │ ├── task_1_template.yaml │ └── worker.sh ├── random_search_sgd_lwta_mnist │ ├── configure.py │ ├── launch.sh │ ├── make_launch.py │ ├── task_0_template.yaml │ ├── task_1_template.yaml │ └── worker.sh ├── random_search_sgd_lwta_mnist_amazon │ ├── configure.py │ ├── launch.sh │ ├── make_launch.py │ ├── task_0_template.yaml │ ├── task_1_template.yaml │ └── worker.sh ├── random_search_sgd_maxout_amazon │ ├── configure.py │ ├── launch.sh │ ├── make_launch.py │ ├── task_0_template.yaml │ ├── task_1_template.yaml │ └── worker.sh ├── random_search_sgd_maxout_mnist │ ├── configure.py │ ├── launch.sh │ ├── make_launch.py │ ├── task_0_template.yaml │ ├── task_1_template.yaml │ └── worker.sh ├── random_search_sgd_maxout_mnist_amazon │ ├── configure.py │ ├── launch.sh │ ├── make_launch.py │ ├── task_0_template.yaml │ ├── task_1_template.yaml │ └── worker.sh ├── random_search_sgd_relu_amazon │ ├── configure.py │ ├── launch.sh │ ├── make_launch.py │ ├── task_0_template.yaml │ ├── task_1_template.yaml │ └── worker.sh ├── random_search_sgd_relu_mnist │ ├── configure.py │ ├── launch.sh │ ├── make_launch.py │ ├── task_0_template.yaml │ ├── task_1_template.yaml │ └── worker.sh ├── random_search_sgd_relu_mnist_amazon │ ├── configure.py │ ├── launch.sh │ ├── local_launch.sh │ ├── local_launch2.sh │ ├── make_launch.py │ ├── task_0_template.yaml │ ├── task_1_template.yaml │ └── worker.sh ├── random_search_sgd_sigmoid_amazon │ ├── configure.py │ ├── launch.sh │ ├── task_0_template.yaml │ ├── task_1_template.yaml │ └── worker.sh ├── random_search_sgd_sigmoid_mnist │ ├── configure.py │ ├── launch.sh │ ├── make_launch.py │ ├── task_0_template.yaml │ ├── task_1_template.yaml │ └── worker.sh ├── random_search_sgd_sigmoid_mnist_amazon │ ├── configure.py │ ├── launch.sh │ ├── local_launch.sh │ ├── local_launch2.sh │ ├── task_0_template.yaml │ ├── task_1_template.yaml │ └── worker.sh └── size_plot.py ├── lwta.py ├── ppf.py ├── scratch ├── README ├── mnist_pi_60k.yaml ├── sgd_relu_mnist_task_0.yaml ├── sgd_relu_mnist_task_1.yaml ├── sgd_relu_mnist_task_1a.yaml └── sgd_relu_mnist_task_1b.yaml └── test_lwta.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.py[cod] 2 | 3 | # C extensions 4 | *.so 5 | 6 | # Packages 7 | *.egg 8 | *.egg-info 9 | dist 10 | build 11 | eggs 12 | parts 13 | bin 14 | var 15 | sdist 16 | develop-eggs 17 | .installed.cfg 18 | lib 19 | lib64 20 | __pycache__ 21 | 22 | # Installer logs 23 | pip-log.txt 24 | 25 | # Unit test / coverage reports 26 | .coverage 27 | .tox 28 | nosetests.xml 29 | 30 | # Translations 31 | *.mo 32 | 33 | # Mr Developer 34 | .mr.developer.cfg 35 | .project 36 | .pydevproject 37 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | forgetting 2 | ========== 3 | 4 | Repository of code for the experiments for the ICLR submission "An Empirical Investigation of Catastrophic Forgetting in Gradient-Based Networks" 5 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = "Ian Goodfellow" 2 | 3 | import numpy as np 4 | 5 | from pylearn2.datasets.dense_design_matrix import DenseDesignMatrix 6 | 7 | def re_initialize(mlp, re_initialize): 8 | 9 | rng = np.random.RandomState([1, 2, 3]) 10 | 11 | if not hasattr(mlp, 'monitor_stack'): 12 | mlp.monitor_stack = [mlp.monitor] 13 | else: 14 | mlp.monitor_stack.append(mlp.monitor) 15 | del mlp.monitor 16 | 17 | for idx in re_initialize: 18 | layer = mlp.layers[idx] 19 | for param in layer.get_params(): 20 | if param.ndim == 2: 21 | value = param.get_value() 22 | value = rng.uniform(-layer.irange, layer.irange, value.shape) 23 | param.set_value(value.astype(param.dtype)) 24 | else: 25 | assert param.ndim == 1 26 | value = param.get_value() 27 | value *= 0 28 | value += layer.bias_hid 29 | param.set_value(value.astype(param.dtype)) 30 | 31 | return mlp 32 | 33 | class permute_and_flip(object): 34 | 35 | def __init__(self, flip = True): 36 | self.flip = flip 37 | 38 | def apply(self, dataset, can_fit=False): 39 | 40 | X = dataset.X 41 | if X is None: 42 | print '!!!!!!!!!!!!!!!!!permute_and_flip does nothing because no data!!!!!!!!!!!!!!!' 43 | return 44 | 45 | rng = np.random.RandomState([17., 35., 19.]) 46 | n = X.shape[1] 47 | 48 | for i in xrange(X.shape[1]): 49 | j = rng.randint(n) 50 | tmp = X[:,i].copy() 51 | X[:,i] = X[:,j].copy() 52 | X[:,j] = tmp.copy() 53 | 54 | if self.flip: 55 | dataset.X = 1. - X 56 | 57 | class LimitClass(object): 58 | def __init__(self, include_classes, size = None): 59 | self.include_classes = include_classes 60 | self.size = size 61 | 62 | def apply(self, dataset, can_fit = False): 63 | indexes = [] 64 | for i in xrange(dataset.y.shape[0]): 65 | if np.argmax(dataset.y[i]) in self.include_classes: 66 | indexes.append(i) 67 | 68 | dataset.X = dataset.X[indexes] 69 | y = dataset.y[indexes] 70 | 71 | if self.size is not None: 72 | index = range(self.size) 73 | dataset.rng.shuffle(index) 74 | dataset.X = dataset.X[index] 75 | y = y[index] 76 | 77 | # make it one_hot again 78 | one_hot = np.zeros((y.shape[0], len(self.include_classes)), dtype='float32') 79 | for i in xrange(y.shape[0]): 80 | one_hot[i, self.include_classes.index(np.argmax(y[i]))] = 1. 81 | dataset.y = one_hot 82 | dataset.data_specs[0].components[1].dim = len(self.include_classes) 83 | 84 | def concat(datasets): 85 | Xs = map(lambda x : x.X, datasets) 86 | ys = map(lambda x: x.y, datasets) 87 | X = np.concatenate(Xs, axis=0) 88 | y = np.concatenate(ys, axis=0) 89 | return DenseDesignMatrix(X=X, y=y) 90 | -------------------------------------------------------------------------------- /datasets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/goodfeli/forgetting/8a56685ffdd3b23b53080e1820d47e6a2d856f97/datasets/__init__.py -------------------------------------------------------------------------------- /datasets/pca_amazon.py: -------------------------------------------------------------------------------- 1 | """ 2 | This script makes a dataset of amazon 5000 reduced to 784 wit PCA 3 | 4 | """ 5 | 6 | from pylearn2.utils import serial 7 | from pylearn2.datasets import preprocessing 8 | from pylearn2.utils import string_utils 9 | from sklearn.decomposition import PCA 10 | from forgetting.datasets.amazon import AmazonSmall 11 | 12 | data_dir = string_utils.preprocess('${PYLEARN2_DATA_PATH}/multi_domain_sentiment_analysis/acl_07/numpy-data') 13 | 14 | #preprocessor = preprocessing.PCA(num_components = 784) 15 | 16 | #for category in AmazonSmall.valid_categories: 17 | #train = AmazonSmall(which_set = 'train', category = category) 18 | #print train.X.shape 19 | #train.apply_preprocessor(preprocessor = preprocessor, can_fit = True) 20 | #train.use_design_loc(data_dir+'/design_loc_train_{}.npy'.format(category)) 21 | #serial.save(data_dir+ '/in-domain-train-{}-784-x.npy'.format(category), train.X) 22 | #serial.save(data_dir+ '/in-domain-train-{}-784-y.npy'.format(category), train.y) 23 | 24 | #test = AmazonSmall(which_set = 'test', category = category) 25 | #test.apply_preprocessor(preprocessor = preprocessor, can_fit = False) 26 | #test.use_design_loc(data_dir+'/design_loc_test_{}.npy'.format(category)) 27 | #serial.save(data_dir+ '/in-domain-test-{}-784-x.npy'.format(category), test.X) 28 | #serial.save(data_dir+ '/in-domain-test-{}-784-y.npy'.format(category), test.y) 29 | 30 | #serial.save(data_dir + '/{}-preprocessor.pkl'.format(category),preprocessor) 31 | 32 | #print train.X.shape 33 | #print test.X.shape 34 | 35 | 36 | 37 | def transform(train, test, n_comp): 38 | pca = PCA(n_components = n_comp) 39 | pca.fit(train.X) 40 | train.X = pca.transform(train.X) 41 | test.X = pca.transform(test.X) 42 | return train, test 43 | 44 | 45 | 46 | for category in AmazonSmall.valid_categories: 47 | train = AmazonSmall(which_set = 'train', category = category) 48 | test = AmazonSmall(which_set = 'test', category = category) 49 | train, test = transform(train, test, 784) 50 | serial.save(data_dir+ '/in-domain-train-{}-784-x.npy'.format(category), train.X) 51 | serial.save(data_dir+ '/in-domain-train-{}-784-y.npy'.format(category), train.y) 52 | serial.save(data_dir+ '/in-domain-test-{}-784-x.npy'.format(category), test.X) 53 | serial.save(data_dir+ '/in-domain-test-{}-784-y.npy'.format(category), test.y) 54 | -------------------------------------------------------------------------------- /datasets/preprocess_amazon.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import ipdb 3 | 4 | PATH = "/data/lisa/data/multi_domain_sentiment_analysis/acl_07/pylibsvm-data/" 5 | SAVE_PATH = "/data/lisa/data/multi_domain_sentiment_analysis/acl_07/numpy-data/" 6 | NUM_FEAT = 5000 7 | 8 | def preporcess(file_path, num_feat): 9 | """ 10 | Reads Xavier's preprocessed data into binary fromat and 11 | save as npy file 12 | """ 13 | 14 | def dict_vec(data, num_feat): 15 | rval = np.zeros(num_feat) 16 | for i in xrange(num_feat): 17 | if data.has_key(i): 18 | rval[i] = data[i] 19 | 20 | return rval 21 | 22 | f = open(file_path, 'r') 23 | feats = [] 24 | for line in f: 25 | line = line.rstrip(' \n').split(' ') 26 | dic_feat = ({int(key) - 1: int(value) for (key, value) in \ 27 | [item.split(':') for item in line]}) 28 | feats.append(dict_vec(dic_feat, num_feat)) 29 | return np.concatenate([item[np.newaxis, :] for item in feats]) 30 | 31 | def label(file_path): 32 | ipdb.set_trace() 33 | 34 | if __name__ == "__main__": 35 | 36 | for set in ['train', 'test']: 37 | for cat in ['kitchen', 'dvd', 'books', 'electronics']: 38 | path = "{}in-domain-{}-{}-{}.vec".format(PATH,set, cat, NUM_FEAT) 39 | x = preporcess(path, NUM_FEAT) 40 | path = "{}in-domain-{}-{}-{}.lab".format(PATH,set, cat, NUM_FEAT) 41 | y = np.loadtxt(path) 42 | 43 | path = "{}in-domain-{}-{}-{}-x.npy".format(SAVE_PATH, set, cat, NUM_FEAT) 44 | np.save(path, x) 45 | path = "{}in-domain-{}-{}-{}-y.npy".format(SAVE_PATH, set, cat, NUM_FEAT) 46 | np.save(path, y) 47 | 48 | #all 49 | path = "{}all-domain-{}-{}.vec".format(PATH,set, NUM_FEAT) 50 | x = preporcess(path, NUM_FEAT) 51 | path = "{}all-domain-{}-{}.lab".format(PATH,set, NUM_FEAT) 52 | y = np.loadtxt(path) 53 | 54 | path = "{}in-domain-{}-{}-{}-x.npy".format(SAVE_PATH, set, 'all', NUM_FEAT) 55 | np.save(path, x) 56 | path = "{}in-domain-{}-{}-{}-y.npy".format(SAVE_PATH, set, 'all', NUM_FEAT) 57 | np.save(path, y) 58 | 59 | 60 | -------------------------------------------------------------------------------- /experiments/analyze.py: -------------------------------------------------------------------------------- 1 | __author__ = "Ian Goodfellow" 2 | 3 | import gc 4 | import numpy as np 5 | import os 6 | import sys 7 | import pickle 8 | 9 | from pylearn2.utils import serial 10 | 11 | _, d, name = sys.argv 12 | 13 | fs = os.listdir(d) 14 | 15 | best = np.inf 16 | 17 | results = {'name' : [], 'test_old' : [], 'test_new' : []} 18 | for f in fs: 19 | try: 20 | model = serial.load(os.path.join(d, f, 'task_1_best.pkl')) 21 | except Exception: 22 | print f, ' not to task 1 yet' 23 | continue 24 | monitor = model.monitor 25 | channels = monitor.channels 26 | def read_channel(s): 27 | return float(channels[s].val_record[-1]) 28 | task_0_model = serial.load(os.path.join(d, f, 'task_0_best.pkl')) 29 | monitor = task_0_model.monitor 30 | v = float(monitor.channels['valid_y_misclass'].val_record[-1]) 31 | print 'job#, orig valid, valid both, new test, old test' 32 | vb, tn, to = map(read_channel, ['valid_both_y_misclass', 'test_y_misclass', 'test_old_y_misclass']) 33 | results['name'].append(os.path.join(d, f, 'task_1_best.pkl')) 34 | results['test_old'].append(to) 35 | results['test_new'].append(tn) 36 | if vb < best: 37 | best = vb 38 | print '!', [f, v] + [vb, tn, to] 39 | else: 40 | print [f, v] + [vb, tn, to] 41 | gc.collect() 42 | 43 | with open("results/{}.pkl".format(name), 'w') as outf: 44 | pickle.dump(results, outf) 45 | -------------------------------------------------------------------------------- /experiments/check.py: -------------------------------------------------------------------------------- 1 | __author__ = "Ian Goodfellow" 2 | 3 | import gc 4 | import numpy as np 5 | import os 6 | import sys 7 | 8 | from pylearn2.utils import serial 9 | 10 | _, d = sys.argv 11 | 12 | fs = os.listdir(d) 13 | 14 | best = np.inf 15 | 16 | for f in fs: 17 | try: 18 | model = serial.load(os.path.join(d, f, 'task_1_best.pkl')) 19 | except Exception: 20 | print f, ' not to task 1 yet' 21 | continue 22 | try: 23 | finished_model = serial.load(os.path.join(d, f, 'task_1.pkl')) 24 | except Exception: 25 | print f, 'task 1 produced no post-validation output' 26 | if not finished_model.monitor.training_succeeded: 27 | print f, 'task 1 had a problem' 28 | continue 29 | monitor = model.monitor 30 | channels = monitor.channels 31 | def read_channel(s): 32 | return float(channels[s].val_record[-1]) 33 | task_0_model = serial.load(os.path.join(d, f, 'task_0_best.pkl')) 34 | monitor = task_0_model.monitor 35 | v = float(monitor.channels['valid_y_misclass'].val_record[-1]) 36 | print 'job#, orig valid, valid both, new test, old test' 37 | vb, tn, to = map(read_channel, ['valid_both_y_misclass', 'test_y_misclass', 'test_old_y_misclass']) 38 | if vb < best: 39 | best = vb 40 | print '!', [f, v] + [vb, tn, to] 41 | else: 42 | print [f, v] + [vb, tn, to] 43 | gc.collect() 44 | -------------------------------------------------------------------------------- /experiments/compare_size.py: -------------------------------------------------------------------------------- 1 | __author__ = "Ian Goodfellow" 2 | 3 | import gc 4 | import numpy as np 5 | import os 6 | import sys 7 | from pylearn2.utils import serial 8 | import ipdb 9 | 10 | 11 | 12 | i = 0 13 | 14 | 15 | def get_val(main_dir): 16 | d = os.path.join(main_dir, 'exp') 17 | fs = os.listdir(d) 18 | 19 | x = [] 20 | for f in fs: 21 | model = serial.load(os.path.join(d, f, 'task_0_best.pkl')) 22 | x.append(float(model.monitor.channels['test_y_misclass'].val_record[-1])) 23 | 24 | gc.collect() 25 | 26 | return x, fs 27 | 28 | 29 | def do_stuff(main_path, exp, ds): 30 | #sgd 31 | path = os.path.join(main_path, "random_search_sgd_{}_{}/".format(exp, ds)) 32 | x, fs =get_val(path) 33 | print "sgd", fs[np.argmin(x)] 34 | 35 | path = os.path.join(main_path, "random_search_dropout_{}_{}/".format(exp, ds)) 36 | x, fs =get_val(path) 37 | print "dropout", fs[np.argmin(x)] 38 | 39 | 40 | 41 | if __name__ == "__main__": 42 | path = sys.argv[1] 43 | exp = sys.argv[2] 44 | ds = sys.argv[3] 45 | do_stuff(path, exp, ds) 46 | -------------------------------------------------------------------------------- /experiments/extract_ppf_data.py: -------------------------------------------------------------------------------- 1 | __author__ = "Ian Goodfellow" 2 | 3 | import gc 4 | import numpy as np 5 | import os 6 | import sys 7 | 8 | from pylearn2.utils import serial 9 | 10 | from forgetting.ppf import cloud_to_ppf 11 | 12 | 13 | i = 0 14 | 15 | colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k', '#555555'] 16 | markers = ['d', 'x', '+', 'o', '<', 'v', '^', 's'] 17 | 18 | 19 | data = {'name' : [], 'new' : [], 'old' : []} 20 | 21 | for main_dir in sys.argv[1:]: 22 | print main_dir 23 | d = os.path.join(main_dir, 'exp') 24 | 25 | fs = os.listdir(d) 26 | 27 | x = [] 28 | y = [] 29 | for f in fs: 30 | """ 31 | try: 32 | model = serial.load(os.path.join(d, f, 'task_1_best.pkl')) 33 | except Exception: 34 | print f, ' not to task 1 yet' 35 | continue 36 | """ 37 | try: 38 | finished_model = serial.load(os.path.join(d, f, 'task_1.pkl')) 39 | except Exception: 40 | print f, 'task 1 produced no post-validation output' 41 | continue 42 | if not finished_model.monitor.training_succeeded: 43 | print f, 'task 1 had a problem' 44 | continue 45 | """ 46 | monitor = model.monitor 47 | channels = monitor.channels 48 | def read_channel(s): 49 | return float(channels[s].val_record[-1]) 50 | """ 51 | 52 | x += finished_model.monitor.channels['test_old_y_misclass'].val_record 53 | y += finished_model.monitor.channels['test_y_misclass'].val_record 54 | 55 | gc.collect() 56 | old, new = cloud_to_ppf(x, y, False) 57 | data['name'].append(main_dir) 58 | data['new'].append(new) 59 | data['old'].append(old) 60 | 61 | i += 1 62 | 63 | serial.save('dr_ppf.pkl', data) 64 | -------------------------------------------------------------------------------- /experiments/plot.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | from pylearn2.utils import serial 4 | 5 | 6 | EXPS = ['dropout_lwta_amazon', 'sgd_lwta_amazon', 'sgd_relu_amazon', 'dropout_relu_amazon', 'dropout_maxout_amazon', 'sgd_maxout_amazon', 'sgd_sigmoid_amazon', 'dropout_sigmoid_amazon'] 7 | 8 | for item in EXPS: 9 | data = serial.load("results/{}.pkl".format(item)) 10 | sort_indx = sorted(range(len(data['test_old'])), key=lambda k: data['test_old'][k]) 11 | plt.plot(np.asarray(data['test_old'])[sort_indx], 12 | np.asarray(data['test_new'])[sort_indx], 13 | label = "radndom_search_{}".format(item)) 14 | #plt.yscale('log') 15 | #plt.xscale('log') 16 | plt.legend() 17 | plt.show() 18 | 19 | 20 | -------------------------------------------------------------------------------- /experiments/plot_all.py: -------------------------------------------------------------------------------- 1 | __author__ = "Ian Goodfellow" 2 | 3 | import gc 4 | from matplotlib import pyplot as plt 5 | import numpy as np 6 | import os 7 | import sys 8 | 9 | from pylearn2.utils import serial 10 | 11 | plt.hold(True) 12 | 13 | i = 0 14 | 15 | colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k', '#555555'] 16 | markers = ['d', 'x', '+', 'o', '<', 'v', '^', 's'] 17 | 18 | for main_dir in sys.argv[1:]: 19 | print main_dir 20 | d = os.path.join(main_dir, 'exp') 21 | 22 | fs = os.listdir(d) 23 | 24 | first = True 25 | for f in fs: 26 | """ 27 | try: 28 | model = serial.load(os.path.join(d, f, 'task_1_best.pkl')) 29 | except Exception: 30 | print f, ' not to task 1 yet' 31 | continue 32 | """ 33 | try: 34 | finished_model = serial.load(os.path.join(d, f, 'task_1.pkl')) 35 | except Exception: 36 | print f, 'task 1 produced no post-validation output' 37 | assert False 38 | if not finished_model.monitor.training_succeeded: 39 | print f, 'task 1 had a problem' 40 | continue 41 | """ 42 | monitor = model.monitor 43 | channels = monitor.channels 44 | def read_channel(s): 45 | return float(channels[s].val_record[-1]) 46 | """ 47 | 48 | old = finished_model.monitor.channels['test_old_y_misclass'].val_record 49 | new = finished_model.monitor.channels['test_y_misclass'].val_record 50 | 51 | filtered = [elem for elem in zip(old, new) if max(elem) < .1] 52 | 53 | if len(filtered) == 0: 54 | continue 55 | 56 | old = [elem[0] for elem in filtered] 57 | new = [elem[1] for elem in filtered] 58 | 59 | if first: 60 | plt.scatter(old, new, label=main_dir, color=colors[i], marker=markers[i]) 61 | else: 62 | plt.scatter(old, new, color=colors[i], marker=markers[i]) 63 | first = False 64 | 65 | gc.collect() 66 | 67 | 68 | 69 | i += 1 70 | 71 | plt.legend() 72 | plt.show() 73 | -------------------------------------------------------------------------------- /experiments/plot_all_ppf.py: -------------------------------------------------------------------------------- 1 | __author__ = "Ian Goodfellow" 2 | 3 | import gc 4 | from matplotlib import pyplot as plt 5 | import numpy as np 6 | import os 7 | import sys 8 | 9 | from pylearn2.utils import serial 10 | 11 | from forgetting.ppf import cloud_to_ppf 12 | 13 | plt.hold(True) 14 | ax = plt.gca() 15 | ax.set_xscale('log') 16 | ax.set_yscale('log') 17 | 18 | i = 0 19 | 20 | colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k', '#555555'] 21 | markers = ['d', 'x', '+', 'o', '<', 'v', '^', 's'] 22 | 23 | for main_dir in sys.argv[1:]: 24 | print main_dir 25 | d = os.path.join(main_dir, 'exp') 26 | 27 | fs = os.listdir(d) 28 | 29 | x = [] 30 | y = [] 31 | for f in fs: 32 | """ 33 | try: 34 | model = serial.load(os.path.join(d, f, 'task_1_best.pkl')) 35 | except Exception: 36 | print f, ' not to task 1 yet' 37 | continue 38 | """ 39 | try: 40 | finished_model = serial.load(os.path.join(d, f, 'task_1.pkl')) 41 | except Exception: 42 | print f, 'task 1 produced no post-validation output' 43 | assert False 44 | if not finished_model.monitor.training_succeeded: 45 | print f, 'task 1 had a problem' 46 | continue 47 | """ 48 | monitor = model.monitor 49 | channels = monitor.channels 50 | def read_channel(s): 51 | return float(channels[s].val_record[-1]) 52 | """ 53 | 54 | x += finished_model.monitor.channels['test_old_y_misclass'].val_record 55 | y += finished_model.monitor.channels['test_y_misclass'].val_record 56 | 57 | gc.collect() 58 | old, new = cloud_to_ppf(x, y, False) 59 | plt.plot(old, new, label=main_dir, color=colors[i], marker=markers[i]) 60 | 61 | 62 | i += 1 63 | 64 | plt.legend() 65 | plt.show() 66 | -------------------------------------------------------------------------------- /experiments/plot_best.py: -------------------------------------------------------------------------------- 1 | __author__ = "Ian Goodfellow" 2 | 3 | import gc 4 | from matplotlib import pyplot as plt 5 | import numpy as np 6 | import os 7 | import sys 8 | 9 | from pylearn2.utils import serial 10 | 11 | plt.hold(True) 12 | 13 | i = 0 14 | 15 | colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k', '#eeefff'] 16 | markers = ['d', 'x', '+', 'o', '<', 'v', '^', 's'] 17 | 18 | for main_dir in sys.argv[1:]: 19 | print main_dir 20 | d = os.path.join(main_dir, 'exp') 21 | 22 | fs = os.listdir(d) 23 | 24 | best = np.inf 25 | best_channels = None 26 | 27 | for f in fs: 28 | try: 29 | model = serial.load(os.path.join(d, f, 'task_1_best.pkl')) 30 | except Exception: 31 | print f, ' not to task 1 yet' 32 | continue 33 | try: 34 | finished_model = serial.load(os.path.join(d, f, 'task_1.pkl')) 35 | except Exception: 36 | print f, 'task 1 produced no post-validation output' 37 | assert False 38 | if not finished_model.monitor.training_succeeded: 39 | print f, 'task 1 had a problem' 40 | continue 41 | monitor = model.monitor 42 | channels = monitor.channels 43 | def read_channel(s): 44 | return float(channels[s].val_record[-1]) 45 | v = read_channel('valid_both_y_misclass') 46 | if v < best: 47 | best = v 48 | best_channels = finished_model.monitor.channels 49 | gc.collect() 50 | 51 | old = best_channels['test_old_y_misclass'].val_record 52 | new = best_channels['test_y_misclass'].val_record 53 | 54 | filtered = [elem for elem in zip(old, new) if max(elem) < .1] 55 | 56 | old = [elem[0] for elem in filtered] 57 | new = [elem[1] for elem in filtered] 58 | 59 | plt.scatter(old, new, label=main_dir, color=colors[i], marker=markers[i]) 60 | 61 | i += 1 62 | 63 | plt.legend() 64 | plt.show() 65 | -------------------------------------------------------------------------------- /experiments/plot_best_l2.py: -------------------------------------------------------------------------------- 1 | __author__ = "Ian Goodfellow" 2 | 3 | import gc 4 | from matplotlib import pyplot as plt 5 | import numpy as np 6 | import os 7 | import sys 8 | 9 | from pylearn2.utils import serial 10 | 11 | plt.hold(True) 12 | 13 | i = 0 14 | 15 | colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k', '#555555'] 16 | markers = ['d', 'x', '+', 'o', '<', 'v', '^', 's'] 17 | 18 | for main_dir in sys.argv[1:]: 19 | print main_dir 20 | d = os.path.join(main_dir, 'exp') 21 | 22 | fs = os.listdir(d) 23 | 24 | best = np.inf 25 | best_channels = None 26 | 27 | for f in fs: 28 | try: 29 | finished_model = serial.load(os.path.join(d, f, 'task_1.pkl')) 30 | except Exception: 31 | print f, 'task 1 produced no post-validation output' 32 | assert False 33 | if not finished_model.monitor.training_succeeded: 34 | print f, 'task 1 had a problem' 35 | continue 36 | monitor = finished_model.monitor 37 | channels = monitor.channels 38 | 39 | old = channels['test_old_y_misclass'].val_record 40 | new = channels['test_y_misclass'].val_record 41 | 42 | l2 = [np.sqrt(o ** 2. + n **2.) for o, n in zip(old, new)] 43 | v = min(l2) 44 | 45 | if v < best: 46 | best = v 47 | best_channels = finished_model.monitor.channels 48 | gc.collect() 49 | 50 | old = best_channels['test_old_y_misclass'].val_record 51 | new = best_channels['test_y_misclass'].val_record 52 | 53 | filtered = [elem for elem in zip(old, new) if max(elem) < .1] 54 | 55 | old = [elem[0] for elem in filtered] 56 | new = [elem[1] for elem in filtered] 57 | 58 | plt.scatter(old, new, label=main_dir, color=colors[i], marker=markers[i]) 59 | 60 | i += 1 61 | 62 | plt.legend() 63 | plt.show() 64 | -------------------------------------------------------------------------------- /experiments/plot_pff.py: -------------------------------------------------------------------------------- 1 | __author__ = "Ian Goodfellow" 2 | 3 | import gc 4 | from matplotlib import pyplot as plt 5 | import numpy as np 6 | import os 7 | import sys 8 | import pickle 9 | 10 | 11 | 12 | plt.hold(True) 13 | ax = plt.gca() 14 | ax.set_xscale('log') 15 | ax.set_yscale('log') 16 | 17 | 18 | colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k', '#555555'] 19 | markers = ['d', 'x', '+', 'o', '<', 'v', '^', 's'] 20 | 21 | data = pickle.load(open(sys.argv[1], 'r')) 22 | 23 | def dumb_sort(data): 24 | sort_keys = ['sgd_sigmoid', 'dropout_sigmoid', 'sgd_relu', 'dropout_relu', 'sgd_maxout', 'dropout_maxout', 'sgd_lwta', 'dropout_lwta'] 25 | new = [] 26 | old = [] 27 | name = [] 28 | for item in sort_keys: 29 | for i in xrange(len(data['name'])): 30 | if item in data['name'][i]: 31 | new.append(data['new'][i]) 32 | old.append(data['old'][i]) 33 | name.append(data['name'][i]) 34 | continue 35 | return new, old, name 36 | 37 | def convert_name(name): 38 | def captalize(name): 39 | if name == 'dropout': 40 | return 'Dropout' 41 | if name == 'sgd': 42 | return 'SGD' 43 | if name == 'maxout': 44 | return 'Maxout' 45 | if name == 'relu': 46 | return 'ReLUs' 47 | if name == 'lwta': 48 | return 'LWTA' 49 | if name == 'sigmoid': 50 | return 'Sigmoid' 51 | 52 | name = name.split('_') 53 | return "{}, {}".format(captalize(name[2]), captalize(name[3])) 54 | 55 | new_, old_, name_ = dumb_sort(data) 56 | i = 0 57 | for old, new, main_dir in zip(old_, new_, name_): 58 | plt.plot(old, new, label= convert_name(main_dir), color=colors[i], marker=markers[i]) 59 | i+=1 60 | 61 | 62 | # mnist 63 | if sys.argv[2] == 'mnist': 64 | plt.legend(bbox_to_anchor=(0.8,1.)) 65 | plt.xlabel('Test error, old task') 66 | plt.ylabel('Test error, new task') 67 | plt.title('Old task: MNIST, New task: MNIST permutation') 68 | 69 | # mnist_amazon 70 | if sys.argv[2] == 'mnist_amazon': 71 | plt.xlabel('Test error, old task') 72 | plt.ylabel('Test error, new task') 73 | plt.title('Old task: MNIST (2,9), New task: Amazon (DVD)') 74 | plt.legend(bbox_to_anchor=(0.87,1.)) 75 | plt.xlim(0.5e-3,1.) 76 | 77 | # amazon 78 | if sys.argv[2] == 'amazon': 79 | plt.legend(bbox_to_anchor=(0.4,1.)) 80 | plt.xlabel('Test error, old task') 81 | plt.ylabel('Test error, new task') 82 | plt.title('Old task: Amazon (Kitchen), New task: Amazon (DVD)') 83 | 84 | 85 | plt.show() 86 | -------------------------------------------------------------------------------- /experiments/plot_ppf_dropout.py: -------------------------------------------------------------------------------- 1 | __author__ = "Ian Goodfellow" 2 | 3 | import gc 4 | from matplotlib import pyplot as plt 5 | import numpy as np 6 | import os 7 | import sys 8 | import pickle 9 | import ipdb 10 | 11 | 12 | plt.hold(True) 13 | ax = plt.gca() 14 | ax.set_xscale('log') 15 | ax.set_yscale('log') 16 | 17 | 18 | colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k', '#555555'] 19 | markers = ['d', 'x', '+', 'o', '<', 'v', '^', 's'] 20 | 21 | data = pickle.load(open(sys.argv[1], 'r')) 22 | #ipdb.set_trace() 23 | 24 | 25 | def dumb_sort(data): 26 | sort_keys = ['sgd_sigmoid', 'dropout_sigmoid', 'sgd_relu', 'dropout_relu', 'sgd_maxout', 'dropout_maxout', 'sgd_lwta', 'dropout_lwta'] 27 | sort_keys = [ 'relu_mnist_025', 'dropout_relu_mnist/', 'relu_mnist_075', 'sgd_relu',] 28 | new = [] 29 | old = [] 30 | name = [] 31 | for item in sort_keys: 32 | for i in xrange(len(data['name'])): 33 | if item in data['name'][i]: 34 | new.append(data['new'][i]) 35 | old.append(data['old'][i]) 36 | name.append(data['name'][i]) 37 | continue 38 | return new, old, name 39 | 40 | def convert_name(name): 41 | if name == '/data/lisatmp/goodfeli/random_search_dropout_relu_mnist_025/': 42 | #return 'Dropout p=0.25' 43 | return 'Inclusion p=0.25' 44 | if name == '/data/lisatmp/goodfeli/random_search_dropout_relu_mnist_075/': 45 | #return 'Dropout p=0.75' 46 | return 'Inclusion p=0.75' 47 | if name == '/u/goodfeli/forgetting/experiments/random_search_dropout_relu_mnist/': 48 | #return 'Dropout p=0.5' 49 | return 'Inclusion p=0.5' 50 | if name == '/u/goodfeli/forgetting/experiments/random_search_sgd_relu_mnist/': 51 | #return 'Dropout p=1.0' 52 | return 'Inclusion p=1.0' 53 | 54 | 55 | 56 | new_, old_, name_ = dumb_sort(data) 57 | #new_, old_, name_ = data['new'], data['old'], data['name'] 58 | i = 0 59 | for old, new, main_dir in zip(old_, new_, name_): 60 | plt.plot(old, new, label= convert_name(main_dir), color=colors[i], marker=markers[i]) 61 | i+=1 62 | 63 | 64 | # mnist 65 | plt.legend(bbox_to_anchor=(0.8,1.), prop={'size':20}) 66 | #plt.legend() 67 | plt.xlabel('Test error, old task') 68 | plt.ylabel('Test error, new task') 69 | plt.title('Old task: MNIST, New task: MNIST permutation, Activation: ReLUs') 70 | 71 | 72 | plt.show() 73 | -------------------------------------------------------------------------------- /experiments/random_search_dropout_lwta_amazon/configure.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from pylearn2.utils import serial 4 | EXP_PATH = "/RQexec/mirzameh/results/forgetting/random_search_dropout_lwta_amazon/" 5 | 6 | num_jobs = 25 7 | 8 | rng = np.random.RandomState([2013, 11, 22]) 9 | 10 | task_0_template = open('task_0_template.yaml', 'r').read() 11 | task_1_template = open('task_1_template.yaml', 'r').read() 12 | 13 | for job_id in xrange(num_jobs): 14 | 15 | h0_col_norm = rng.uniform(1., 5.) 16 | h1_col_norm = rng.uniform(1., 5.) 17 | y_col_norm = rng.uniform(1., 5.) 18 | 19 | h0_dim = rng.randint(250, 5000) 20 | num_pieces_0 = rng.randint(2, 6) 21 | h0_dim -= h0_dim % num_pieces_0 22 | h1_dim = rng.randint(250, 5000) 23 | num_pieces_1 = rng.randint(2, 6) 24 | h1_dim -= h1_dim % num_pieces_1 25 | 26 | def random_init_string(): 27 | irange = 10. ** rng.uniform(-2.3, -1.) 28 | return "irange: " + str(irange) 29 | 30 | h0_init = random_init_string() 31 | h1_init = random_init_string() 32 | 33 | if rng.randint(2): 34 | y_init = "sparse_init: 0" 35 | else: 36 | y_init = random_init_string() 37 | 38 | h0_bias = 0. 39 | h1_bias = 1. 40 | 41 | 42 | learning_rate = 10. ** rng.uniform(-2., -.5) 43 | 44 | if rng.randint(2): 45 | msat = 2 46 | else: 47 | msat = rng.randint(2, 1000) 48 | 49 | final_momentum = rng.uniform(.5, .9) 50 | 51 | lr_sat = rng.randint(200, 1000) 52 | 53 | decay = 10. ** rng.uniform(-3, -1) 54 | 55 | 56 | task_0_yaml_str = task_0_template % locals() 57 | 58 | serial.mkdir('{}exp/'.format(EXP_PATH) + str(job_id)) 59 | train_file_full_stem = '{}exp/'.format(EXP_PATH)+str(job_id)+'/' 60 | f = open(train_file_full_stem + 'task_0.yaml', 'w') 61 | f.write(task_0_yaml_str) 62 | f.close() 63 | 64 | task_1_yaml_str = task_1_template % locals() 65 | 66 | serial.mkdir('{}exp/'.format(EXP_PATH) + str(job_id)) 67 | f = open(train_file_full_stem + 'task_1.yaml', 'w') 68 | f.write(task_1_yaml_str) 69 | f.close() 70 | -------------------------------------------------------------------------------- /experiments/random_search_dropout_lwta_amazon/launch.sh: -------------------------------------------------------------------------------- 1 | jobdispatch --torque --env=THEANO_FLAGS=device=gpu,floatX=float32,force_device=True --duree=24:00:00 --whitespace --gpu $SCRATCH/results/forgetting/random_search_dropout_lwta_amazon/worker.sh $SCRATCH/results/forgetting/random_search_dropout_lwta_amazon/exp/"{{0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24}}" 2 | -------------------------------------------------------------------------------- /experiments/random_search_dropout_lwta_amazon/make_launch.py: -------------------------------------------------------------------------------- 1 | base = """jobdispatch --torque --env=THEANO_FLAGS=device=gpu,floatX=float32,force_device=True --duree=24:00:00 --whitespace --gpu $F/experiments/random_search_dropout_maxout_mnist/worker.sh $F/experiments/random_search_dropout_maxout_mnist/exp/"{{%(args)s}}\"""" 2 | args = ','.join([str(job_id) for job_id in xrange(25)]) 3 | f = open('launch.sh', 'w') 4 | f.write(base % locals()) 5 | f.close() 6 | -------------------------------------------------------------------------------- /experiments/random_search_dropout_lwta_amazon/task_1_template.yaml: -------------------------------------------------------------------------------- 1 | # Author: Ian Goodfellow 2 | !obj:pylearn2.train.Train { 3 | dataset: &train !obj:forgetting.datasets.amazon.AmazonSmall { 4 | which_set: 'train', 5 | category: 'dvd', 6 | one_hot: 1, 7 | start: 0, 8 | stop: 1000 9 | }, 10 | model: !obj:pylearn2.monitor.push_monitor { 11 | model: !pkl: "task_0_best.pkl", 12 | name: "monitor_first" 13 | }, 14 | algorithm: !obj:pylearn2.training_algorithms.sgd.SGD { 15 | batch_size: 100, 16 | learning_rate: %(learning_rate)f, 17 | init_momentum: .5, 18 | monitoring_dataset: 19 | { 20 | 'train' : *train, 21 | 'valid_both' : !obj:forgetting.concat { 22 | datasets: [ !obj:forgetting.datasets.amazon.AmazonSmall { 23 | which_set: 'train', 24 | category: 'dvd', 25 | one_hot: 1, 26 | start: 1000, 27 | stop: 1600 28 | }, !obj:forgetting.datasets.amazon.AmazonSmall { 29 | which_set: 'train', 30 | category: 'kitchen', 31 | one_hot: 1, 32 | start: 1000, 33 | stop: 1600 34 | }, 35 | ] 36 | }, 37 | 'test' : !obj:forgetting.datasets.amazon.AmazonSmall { 38 | which_set: 'test', 39 | category: 'dvd', 40 | one_hot: 1, 41 | }, 42 | 'test_old' : !obj:forgetting.datasets.amazon.AmazonSmall { 43 | which_set: 'test', 44 | category: 'kitchen', 45 | one_hot: 1, 46 | }, 47 | }, 48 | cost: !obj:pylearn2.costs.cost.SumOfCosts { costs: [ 49 | !obj:pylearn2.costs.mlp.dropout.Dropout { 50 | input_include_probs: { h0: .8 } 51 | } 52 | ] 53 | }, 54 | termination_criterion: !obj:pylearn2.termination_criteria.MonitorBased { 55 | channel_name: "valid_both_y_misclass", 56 | prop_decrease: 0., 57 | N: 100 58 | } 59 | }, 60 | extensions: [ 61 | !obj:pylearn2.train_extensions.best_params.MonitorBasedSaveBest { 62 | channel_name: 'valid_both_y_misclass', 63 | save_path: "${PYLEARN2_TRAIN_FILE_FULL_STEM}_best.pkl" 64 | }, !obj:pylearn2.training_algorithms.sgd.MomentumAdjustor { 65 | start: 1, 66 | saturate: %(msat)d, 67 | final_momentum: %(final_momentum)f 68 | }, 69 | !obj:pylearn2.training_algorithms.sgd.LinearDecayOverEpoch { 70 | start: 1, 71 | saturate: %(lr_sat)d, 72 | decay_factor: %(decay)f 73 | } 74 | ], 75 | save_path: "${PYLEARN2_TRAIN_FILE_FULL_STEM}.pkl", 76 | save_freq: 1 77 | } 78 | -------------------------------------------------------------------------------- /experiments/random_search_dropout_lwta_amazon/worker.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | cd $1 3 | python ~/projects/pylearn2/pylearn2/scripts/train.py task_0.yaml || exit -1 4 | echo "starting task 1" 5 | python ~/projects/pylearn2/pylearn2/scripts/train.py task_1.yaml 6 | -------------------------------------------------------------------------------- /experiments/random_search_dropout_lwta_mnist/configure.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from pylearn2.utils import serial 4 | 5 | num_jobs = 25 6 | 7 | rng = np.random.RandomState([2013, 11, 22]) 8 | 9 | task_0_template = open('task_0_template.yaml', 'r').read() 10 | task_1_template = open('task_1_template.yaml', 'r').read() 11 | 12 | for job_id in xrange(num_jobs): 13 | 14 | h0_col_norm = rng.uniform(1., 5.) 15 | h1_col_norm = rng.uniform(1., 5.) 16 | y_col_norm = rng.uniform(1., 5.) 17 | 18 | h0_dim = rng.randint(250, 5000) 19 | num_pieces_0 = rng.randint(2, 6) 20 | h0_dim -= h0_dim % num_pieces_0 21 | h1_dim = rng.randint(250, 5000) 22 | num_pieces_1 = rng.randint(2, 6) 23 | h1_dim -= h1_dim % num_pieces_1 24 | 25 | def random_init_string(): 26 | irange = 10. ** rng.uniform(-2.3, -1.) 27 | return "irange: " + str(irange) 28 | 29 | h0_init = random_init_string() 30 | h1_init = random_init_string() 31 | 32 | if rng.randint(2): 33 | y_init = "sparse_init: 0" 34 | else: 35 | y_init = random_init_string() 36 | 37 | h0_bias = 0. 38 | h1_bias = 1. 39 | 40 | 41 | learning_rate = 10. ** rng.uniform(-2., -.5) 42 | 43 | if rng.randint(2): 44 | msat = 2 45 | else: 46 | msat = rng.randint(2, 1000) 47 | 48 | final_momentum = rng.uniform(.5, .9) 49 | 50 | lr_sat = rng.randint(200, 1000) 51 | 52 | decay = 10. ** rng.uniform(-3, -1) 53 | 54 | 55 | task_0_yaml_str = task_0_template % locals() 56 | 57 | serial.mkdir('exp/' + str(job_id)) 58 | train_file_full_stem = 'exp/'+str(job_id)+'/' 59 | f = open(train_file_full_stem + 'task_0.yaml', 'w') 60 | f.write(task_0_yaml_str) 61 | f.close() 62 | 63 | task_1_yaml_str = task_1_template % locals() 64 | 65 | serial.mkdir('exp/' + str(job_id)) 66 | f = open(train_file_full_stem + 'task_1.yaml', 'w') 67 | f.write(task_1_yaml_str) 68 | f.close() 69 | -------------------------------------------------------------------------------- /experiments/random_search_dropout_lwta_mnist/launch.sh: -------------------------------------------------------------------------------- 1 | jobdispatch --torque --env=THEANO_FLAGS=device=gpu,floatX=float32,force_device=True --duree=24:00:00 --whitespace --gpu $F/experiments/random_search_dropout_lwta_mnist/worker.sh $F/experiments/random_search_dropout_lwta_mnist/exp/"{{0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24}}" -------------------------------------------------------------------------------- /experiments/random_search_dropout_lwta_mnist/make_launch.py: -------------------------------------------------------------------------------- 1 | base = """jobdispatch --torque --env=THEANO_FLAGS=device=gpu,floatX=float32,force_device=True --duree=24:00:00 --whitespace --gpu $F/experiments/random_search_dropout_lwta_mnist/worker.sh $F/experiments/random_search_dropout_lwta_mnist/exp/"{{%(args)s}}\"""" 2 | args = ','.join([str(job_id) for job_id in xrange(25)]) 3 | f = open('launch.sh', 'w') 4 | f.write(base % locals()) 5 | f.close() 6 | -------------------------------------------------------------------------------- /experiments/random_search_dropout_lwta_mnist/worker.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | cd $1 3 | train.py task_0.yaml || exit -1 4 | echo "starting task 1" 5 | train.py task_1.yaml 6 | -------------------------------------------------------------------------------- /experiments/random_search_dropout_lwta_mnist_amazon/configure.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from pylearn2.utils import serial 4 | 5 | num_jobs = 25 6 | 7 | rng = np.random.RandomState([2013, 11, 22]) 8 | 9 | #task_0_template = open('task_0_template.yaml', 'r').read() 10 | task_1_template = open('task_1_template.yaml', 'r').read() 11 | 12 | for job_id in xrange(num_jobs): 13 | 14 | h0_col_norm = rng.uniform(1., 5.) 15 | h1_col_norm = rng.uniform(1., 5.) 16 | y_col_norm = rng.uniform(1., 5.) 17 | 18 | h0_dim = rng.randint(250, 5000) 19 | num_pieces_0 = rng.randint(2, 6) 20 | h0_dim -= h0_dim % num_pieces_0 21 | h1_dim = rng.randint(250, 5000) 22 | num_pieces_1 = rng.randint(2, 6) 23 | h1_dim -= h1_dim % num_pieces_1 24 | 25 | def random_init_string(): 26 | irange = 10. ** rng.uniform(-2.3, -1.) 27 | return "irange: " + str(irange) 28 | 29 | h0_init = random_init_string() 30 | h1_init = random_init_string() 31 | 32 | if rng.randint(2): 33 | y_init = "sparse_init: 0" 34 | else: 35 | y_init = random_init_string() 36 | 37 | h0_bias = 0. 38 | h1_bias = 1. 39 | 40 | 41 | learning_rate = 10. ** rng.uniform(-2., -.5) 42 | 43 | if rng.randint(2): 44 | msat = 2 45 | else: 46 | msat = rng.randint(2, 1000) 47 | 48 | final_momentum = rng.uniform(.5, .9) 49 | 50 | lr_sat = rng.randint(200, 1000) 51 | 52 | decay = 10. ** rng.uniform(-3, -1) 53 | 54 | 55 | #task_0_yaml_str = task_0_template % locals() 56 | 57 | #serial.mkdir('exp/' + str(job_id)) 58 | train_file_full_stem = 'exp/'+str(job_id)+'/' 59 | train_file_full_stem = '{}exp/{}/'.format('/scratch/mmirza/results/forgetting/random_search_dropout_lwta_mnist_amazon/', job_id) 60 | #f = open(train_file_full_stem + 'task_0.yaml', 'w') 61 | #f.write(task_0_yaml_str) 62 | #f.close() 63 | 64 | task_1_yaml_str = task_1_template % locals() 65 | 66 | #serial.mkdir('exp/' + str(job_id)) 67 | f = open(train_file_full_stem + 'task_1.yaml', 'w') 68 | f.write(task_1_yaml_str) 69 | f.close() 70 | -------------------------------------------------------------------------------- /experiments/random_search_dropout_lwta_mnist_amazon/launch.sh: -------------------------------------------------------------------------------- 1 | jobdispatch --env=THEANO_FLAGS=device=gpu,floatX=float32,force_device=True --duree=4h --mem=6G --gpu $SCRATCH/results/forgetting/random_search_dropout_lwta_mnist_amazon/worker.sh $SCRATCH/results/forgetting/random_search_dropout_lwta_mnist_amazon/exp/"{{0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24}}" 2 | -------------------------------------------------------------------------------- /experiments/random_search_dropout_lwta_mnist_amazon/make_launch.py: -------------------------------------------------------------------------------- 1 | base = """jobdispatch --torque --env=THEANO_FLAGS=device=gpu,floatX=float32,force_device=True --duree=24:00:00 --whitespace --gpu $F/experiments/random_search_dropout_lwta_mnist/worker.sh $F/experiments/random_search_dropout_lwta_mnist/exp/"{{%(args)s}}\"""" 2 | args = ','.join([str(job_id) for job_id in xrange(25)]) 3 | f = open('launch.sh', 'w') 4 | f.write(base % locals()) 5 | f.close() 6 | -------------------------------------------------------------------------------- /experiments/random_search_dropout_lwta_mnist_amazon/worker.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | cd $1 3 | python /work/mmirza/pylearn2/pylearn2/scripts/train.py task_0.yaml || exit -1 4 | echo "starting task 1" 5 | python /work/mmirza/pylearn2/pylearn2/scripts/train.py task_1.yaml 6 | -------------------------------------------------------------------------------- /experiments/random_search_dropout_maxout_amazon/configure.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from pylearn2.utils import serial 4 | 5 | num_jobs = 25 6 | 7 | rng = np.random.RandomState([2013, 11, 22]) 8 | 9 | task_0_template = open('task_0_template.yaml', 'r').read() 10 | task_1_template = open('task_1_template.yaml', 'r').read() 11 | 12 | for job_id in xrange(num_jobs): 13 | 14 | h0_col_norm = rng.uniform(1., 5.) 15 | h1_col_norm = rng.uniform(1., 5.) 16 | y_col_norm = rng.uniform(1., 5.) 17 | 18 | h0_dim = rng.randint(250, 5000) 19 | num_pieces_0 = rng.randint(2, 6) 20 | num_units_0 = h0_dim // num_pieces_0 21 | h1_dim = rng.randint(250, 5000) 22 | num_pieces_1 = rng.randint(2, 6) 23 | num_units_1 = h1_dim // num_pieces_1 24 | 25 | def random_init_string(): 26 | irange = 10. ** rng.uniform(-2.3, -1.) 27 | return "irange: " + str(irange) 28 | 29 | h0_init = random_init_string() 30 | h1_init = random_init_string() 31 | 32 | if rng.randint(2): 33 | y_init = "sparse_init: 0" 34 | else: 35 | y_init = random_init_string() 36 | 37 | h0_bias = 0. 38 | h1_bias = 1. 39 | 40 | 41 | learning_rate = 10. ** rng.uniform(-2., -.5) 42 | 43 | if rng.randint(2): 44 | msat = 2 45 | else: 46 | msat = rng.randint(2, 1000) 47 | 48 | final_momentum = rng.uniform(.5, .9) 49 | 50 | lr_sat = rng.randint(200, 1000) 51 | 52 | decay = 10. ** rng.uniform(-3, -1) 53 | 54 | 55 | task_0_yaml_str = task_0_template % locals() 56 | 57 | serial.mkdir('exp/' + str(job_id)) 58 | train_file_full_stem = 'exp/'+str(job_id)+'/' 59 | f = open(train_file_full_stem + 'task_0.yaml', 'w') 60 | f.write(task_0_yaml_str) 61 | f.close() 62 | 63 | task_1_yaml_str = task_1_template % locals() 64 | 65 | serial.mkdir('exp/' + str(job_id)) 66 | f = open(train_file_full_stem + 'task_1.yaml', 'w') 67 | f.write(task_1_yaml_str) 68 | f.close() 69 | -------------------------------------------------------------------------------- /experiments/random_search_dropout_maxout_amazon/launch.sh: -------------------------------------------------------------------------------- 1 | jobdispatch --torque --env=THEANO_FLAGS=device=gpu,floatX=float32,force_device=True --duree=24:00:00 --whitespace --gpu $HOME/projects/forgetting/experiments/random_search_dropout_maxout_amazon/worker.sh $HOME/projects/forgetting/experiments/random_search_dropout_maxout_amazon/exp/"{{0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24}}" 2 | -------------------------------------------------------------------------------- /experiments/random_search_dropout_maxout_amazon/make_launch.py: -------------------------------------------------------------------------------- 1 | base = """jobdispatch --torque --env=THEANO_FLAGS=device=gpu,floatX=float32,force_device=True --duree=24:00:00 --whitespace --gpu $F/experiments/random_search_dropout_maxout_mnist/worker.sh $F/experiments/random_search_dropout_maxout_mnist/exp/"{{%(args)s}}\"""" 2 | args = ','.join([str(job_id) for job_id in xrange(25)]) 3 | f = open('launch.sh', 'w') 4 | f.write(base % locals()) 5 | f.close() 6 | -------------------------------------------------------------------------------- /experiments/random_search_dropout_maxout_amazon/task_0_template.yaml: -------------------------------------------------------------------------------- 1 | !obj:pylearn2.train.Train { 2 | dataset: &train !obj:forgetting.datasets.amazon.AmazonSmall { 3 | which_set: 'train', 4 | category: 'kitchen', 5 | one_hot: 1, 6 | start: 0, 7 | stop: 1000 8 | }, 9 | model: !obj:pylearn2.models.mlp.MLP { 10 | layers: [ !obj:pylearn2.models.maxout.Maxout { 11 | max_col_norm: %(h0_col_norm)f, 12 | layer_name: 'h0', 13 | num_units: %(num_units_0)d, 14 | num_pieces: %(num_pieces_0)d, 15 | %(h0_init)s, 16 | init_bias: %(h0_bias)f 17 | }, !obj:pylearn2.models.maxout.Maxout { 18 | max_col_norm: %(h1_col_norm)f, 19 | layer_name: 'h1', 20 | num_units: %(num_units_1)d, 21 | num_pieces: %(num_pieces_1)d, 22 | %(h1_init)s, 23 | init_bias: %(h1_bias)f 24 | }, !obj:pylearn2.models.mlp.Softmax { 25 | max_col_norm: %(y_col_norm)f, 26 | layer_name: 'y', 27 | n_classes: 2, 28 | %(y_init)s 29 | } 30 | ], 31 | nvis: 5000, 32 | }, 33 | algorithm: !obj:pylearn2.training_algorithms.sgd.SGD { 34 | batch_size: 100, 35 | learning_rate: %(learning_rate)f, 36 | init_momentum: .5, 37 | monitoring_dataset: 38 | { 39 | 'train' : *train, 40 | 'valid' : !obj:forgetting.datasets.amazon.AmazonSmall { 41 | which_set: 'train', 42 | category: 'kitchen', 43 | one_hot: 1, 44 | start: 1000, 45 | stop: 1600 46 | }, 47 | 'test' : !obj:forgetting.datasets.amazon.AmazonSmall { 48 | which_set: 'test', 49 | category: 'kitchen', 50 | one_hot: 1, 51 | }, 52 | }, 53 | cost: !obj:pylearn2.costs.cost.SumOfCosts { costs: [ 54 | !obj:pylearn2.costs.mlp.dropout.Dropout { 55 | input_include_probs: { h0: .8 } 56 | } 57 | ] 58 | }, 59 | termination_criterion: !obj:pylearn2.termination_criteria.MonitorBased { 60 | channel_name: "valid_y_misclass", 61 | prop_decrease: 0., 62 | N: 100 63 | } 64 | }, 65 | extensions: [ 66 | !obj:pylearn2.train_extensions.best_params.MonitorBasedSaveBest { 67 | channel_name: 'valid_y_misclass', 68 | save_path: "${PYLEARN2_TRAIN_FILE_FULL_STEM}_best.pkl" 69 | }, !obj:pylearn2.training_algorithms.sgd.MomentumAdjustor { 70 | start: 1, 71 | saturate: %(msat)d, 72 | final_momentum: %(final_momentum)f 73 | }, 74 | !obj:pylearn2.training_algorithms.sgd.LinearDecayOverEpoch { 75 | start: 1, 76 | saturate: %(lr_sat)d, 77 | decay_factor: %(decay)f 78 | } 79 | ] 80 | } 81 | -------------------------------------------------------------------------------- /experiments/random_search_dropout_maxout_amazon/task_1_template.yaml: -------------------------------------------------------------------------------- 1 | !obj:pylearn2.train.Train { 2 | dataset: &train !obj:forgetting.datasets.amazon.AmazonSmall { 3 | which_set: 'train', 4 | category: 'dvd', 5 | one_hot: 1, 6 | start: 0, 7 | stop: 1000 8 | }, 9 | model: !obj:pylearn2.monitor.push_monitor { 10 | model: !pkl: "task_0_best.pkl", 11 | name: "monitor_first" 12 | }, 13 | algorithm: !obj:pylearn2.training_algorithms.sgd.SGD { 14 | batch_size: 100, 15 | learning_rate: %(learning_rate)f, 16 | init_momentum: .5, 17 | monitoring_dataset: 18 | { 19 | 'train' : *train, 20 | 'valid_both' : !obj:forgetting.concat { 21 | datasets: [ !obj:forgetting.datasets.amazon.AmazonSmall { 22 | which_set: 'train', 23 | category: 'dvd', 24 | one_hot: 1, 25 | start: 1000, 26 | stop: 1600 27 | }, !obj:forgetting.datasets.amazon.AmazonSmall { 28 | which_set: 'train', 29 | category: 'kitchen', 30 | one_hot: 1, 31 | start: 1000, 32 | stop: 1600 33 | }, 34 | 35 | ] 36 | }, 37 | 'test' : !obj:forgetting.datasets.amazon.AmazonSmall { 38 | which_set: 'test', 39 | category: 'dvd', 40 | one_hot: 1, 41 | }, 42 | 'test_old' : !obj:forgetting.datasets.amazon.AmazonSmall { 43 | which_set: 'test', 44 | category: 'kitchen', 45 | one_hot: 1, 46 | }, 47 | }, 48 | cost: !obj:pylearn2.costs.cost.SumOfCosts { costs: [ 49 | !obj:pylearn2.costs.mlp.dropout.Dropout { 50 | input_include_probs: { h0: .8 } 51 | } 52 | ] 53 | }, 54 | termination_criterion: !obj:pylearn2.termination_criteria.MonitorBased { 55 | channel_name: "valid_both_y_misclass", 56 | prop_decrease: 0., 57 | N: 100 58 | } 59 | }, 60 | extensions: [ 61 | !obj:pylearn2.train_extensions.best_params.MonitorBasedSaveBest { 62 | channel_name: 'valid_both_y_misclass', 63 | save_path: "${PYLEARN2_TRAIN_FILE_FULL_STEM}_best.pkl" 64 | }, !obj:pylearn2.training_algorithms.sgd.MomentumAdjustor { 65 | start: 1, 66 | saturate: %(msat)d, 67 | final_momentum: %(final_momentum)f 68 | }, 69 | !obj:pylearn2.training_algorithms.sgd.LinearDecayOverEpoch { 70 | start: 1, 71 | saturate: %(lr_sat)d, 72 | decay_factor: %(decay)f 73 | } 74 | ], 75 | save_path: "${PYLEARN2_TRAIN_FILE_FULL_STEM}.pkl", 76 | save_freq: 1 77 | } 78 | -------------------------------------------------------------------------------- /experiments/random_search_dropout_maxout_amazon/task_2_template.yaml: -------------------------------------------------------------------------------- 1 | !obj:pylearn2.train.Train { 2 | dataset: &train !obj:forgetting.datasets.amazon.AmazonSmall { 3 | which_set: 'train', 4 | category: 'kitchen', 5 | one_hot: 1, 6 | start: 0, 7 | stop: 1000 8 | }, 9 | model: !obj:pylearn2.models.mlp.MLP { 10 | layers: [ !obj:pylearn2.models.maxout.Maxout { 11 | max_col_norm: %(h0_col_norm)f, 12 | layer_name: 'h0', 13 | num_units: %(num_units_0)d, 14 | num_pieces: %(num_pieces_0)d, 15 | %(h0_init)s, 16 | init_bias: %(h0_bias)f 17 | }, !obj:pylearn2.models.maxout.Maxout { 18 | max_col_norm: %(h1_col_norm)f, 19 | layer_name: 'h1', 20 | num_units: %(num_units_1)d, 21 | num_pieces: %(num_pieces_1)d, 22 | %(h1_init)s, 23 | init_bias: %(h1_bias)f 24 | }, !obj:pylearn2.models.mlp.Softmax { 25 | max_col_norm: %(y_col_norm)f, 26 | layer_name: 'y', 27 | n_classes: 2, 28 | %(y_init)s 29 | } 30 | ], 31 | nvis: 5000, 32 | }, 33 | algorithm: !obj:pylearn2.training_algorithms.sgd.SGD { 34 | batch_size: 100, 35 | learning_rate: %(learning_rate)f, 36 | init_momentum: .5, 37 | monitoring_dataset: 38 | { 39 | 'train' : *train, 40 | 'valid' : !obj:forgetting.datasets.amazon.AmazonSmall { 41 | which_set: 'train', 42 | category: 'kitchen', 43 | one_hot: 1, 44 | start: 1000, 45 | stop: 1600 46 | }, 47 | 'test' : !obj:forgetting.datasets.amazon.AmazonSmall { 48 | which_set: 'test', 49 | category: 'kitchen', 50 | one_hot: 1, 51 | }, 52 | }, 53 | cost: !obj:pylearn2.costs.cost.SumOfCosts { costs: [ 54 | !obj:pylearn2.costs.mlp.dropout.Dropout { 55 | input_include_probs: { h0: .8 } 56 | } 57 | ] 58 | }, 59 | termination_criterion: !obj:pylearn2.termination_criteria.MonitorBased { 60 | channel_name: "valid_y_misclass", 61 | prop_decrease: 0., 62 | N: 100 63 | } 64 | }, 65 | extensions: [ 66 | !obj:pylearn2.train_extensions.best_params.MonitorBasedSaveBest { 67 | channel_name: 'valid_y_misclass', 68 | save_path: "${PYLEARN2_TRAIN_FILE_FULL_STEM}_best.pkl" 69 | }, !obj:pylearn2.training_algorithms.sgd.MomentumAdjustor { 70 | start: 1, 71 | saturate: %(msat)d, 72 | final_momentum: %(final_momentum)f 73 | }, 74 | !obj:pylearn2.training_algorithms.sgd.LinearDecayOverEpoch { 75 | start: 1, 76 | saturate: %(lr_sat)d, 77 | decay_factor: %(decay)f 78 | } 79 | ] 80 | } 81 | -------------------------------------------------------------------------------- /experiments/random_search_dropout_maxout_amazon/worker.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | cd $1 3 | python ~/projects/pylearn2/pylearn2/scripts/train.py task_0.yaml || exit -1 4 | echo "starting task 1" 5 | python ~/projects/pylearn2/pylearn2/scripts/train.py task_1.yaml 6 | -------------------------------------------------------------------------------- /experiments/random_search_dropout_maxout_cifar/configure.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from pylearn2.utils import serial 4 | 5 | num_jobs = 25 6 | 7 | rng = np.random.RandomState([2013, 11, 22]) 8 | 9 | task_0_template = open('task_0_template.yaml', 'r').read() 10 | task_1_template = open('task_1_template.yaml', 'r').read() 11 | task_2_template = open('task_2_template.yaml', 'r').read() 12 | 13 | channel_sizes = [16, 32, 64, 96, 128] 14 | 15 | for job_id in xrange(num_jobs): 16 | 17 | h0_ker_norm = rng.uniform(1., 5.) 18 | h1_ker_norm = rng.uniform(1., 5.) 19 | h2_ker_norm = rng.uniform(1., 5.) 20 | h3_col_norm = rng.uniform(1., 5.) 21 | h4_col_norm = rng.uniform(1., 5.) 22 | y_col_norm = rng.uniform(1., 5.) 23 | 24 | h0_num_chan = channel_sizes[rng.randint(len(channel_sizes))] 25 | h1_num_chan = channel_sizes[rng.randint(len(channel_sizes))] 26 | h2_num_chan = channel_sizes[rng.randint(len(channel_sizes))] 27 | 28 | piece_sizes = [1,2,3] 29 | h0_num_pieces = piece_sizes[rng.randint(len(piece_sizes))] 30 | h1_num_pieces = piece_sizes[rng.randint(len(piece_sizes))] 31 | h2_num_pieces = piece_sizes[rng.randint(len(piece_sizes))] 32 | 33 | h3_dim = rng.randint(250, 5000) 34 | h3_num_pieces = rng.randint(2, 6) 35 | h3_num_units = h3_dim //h3_num_pieces 36 | h4_dim = rng.randint(250, 5000) 37 | h4_num_pieces = rng.randint(2, 6) 38 | h4_num_units = h4_dim //h4_num_pieces 39 | 40 | def random_init_string(): 41 | irange = 10. ** rng.uniform(-2.3, -1.) 42 | return "irange: " + str(irange) 43 | 44 | h0_init = random_init_string() 45 | h1_init = random_init_string() 46 | h2_init = random_init_string() 47 | h3_init = random_init_string() 48 | h4_init = random_init_string() 49 | 50 | if rng.randint(2): 51 | y_init = "sparse_init: 0" 52 | else: 53 | y_init = random_init_string() 54 | 55 | h3_bias = 1. 56 | h4_bias = 1. 57 | 58 | 59 | learning_rate = 10. ** rng.uniform(-2., -.5) 60 | 61 | if rng.randint(2): 62 | msat = 2 63 | else: 64 | msat = rng.randint(2, 1000) 65 | 66 | final_momentum = rng.uniform(.5, .9) 67 | 68 | lr_sat = rng.randint(200, 1000) 69 | 70 | decay = 10. ** rng.uniform(-3, -1) 71 | 72 | 73 | #task_0_yaml_str = task_0_template % locals() 74 | 75 | ##serial.mkdir('exp/' + str(job_id)) 76 | train_file_full_stem = 'exp/'+str(job_id)+'/' 77 | #f = open(train_file_full_stem + 'task_0.yaml', 'w') 78 | #f.write(task_0_yaml_str) 79 | #f.close() 80 | 81 | #task_1_yaml_str = task_1_template % locals() 82 | 83 | ##serial.mkdir('exp/' + str(job_id)) 84 | #f = open(train_file_full_stem + 'task_1.yaml', 'w') 85 | #f.write(task_1_yaml_str) 86 | #f.close() 87 | 88 | task_2_yaml_str = task_2_template % locals() 89 | 90 | #serial.mkdir('exp/' + str(job_id)) 91 | f = open(train_file_full_stem + 'task_2.yaml', 'w') 92 | f.write(task_2_yaml_str) 93 | f.close() 94 | -------------------------------------------------------------------------------- /experiments/random_search_dropout_maxout_cifar/launch.sh: -------------------------------------------------------------------------------- 1 | jobdispatch --torque --env=THEANO_FLAGS=device=gpu,floatX=float32,force_device=True --duree=24:00:00 --whitespace --gpu $HOME/projects/forgetting/experiments/random_search_dropout_maxout_cifar/worker.sh $HOME/projects/forgetting/experiments/random_search_dropout_maxout_cifar/exp/"{{0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24}}" 2 | -------------------------------------------------------------------------------- /experiments/random_search_dropout_maxout_cifar/make_launch.py: -------------------------------------------------------------------------------- 1 | base = """jobdispatch --torque --env=THEANO_FLAGS=device=gpu,floatX=float32,force_device=True --duree=24:00:00 --whitespace --gpu $F/experiments/random_search_dropout_maxout_cifar/worker.sh $F/experiments/random_search_dropout_maxout_cifar/exp/"{{%(args)s}}\"""" 2 | args = ','.join([str(job_id) for job_id in xrange(25)]) 3 | f = open('launch.sh', 'w') 4 | f.write(base % locals()) 5 | f.close() 6 | -------------------------------------------------------------------------------- /experiments/random_search_dropout_maxout_cifar/task_1_template.yaml: -------------------------------------------------------------------------------- 1 | !obj:pylearn2.train.Train { 2 | dataset: !obj:pylearn2.datasets.svhn.SVHN_On_Memory { 3 | which_set: 'train', 4 | start: 0, 5 | stop: 50000, 6 | }, 7 | model: !obj:pylearn2.monitor.push_monitor { 8 | model: !pkl: "task_0_best.pkl", 9 | name: "monitor_first" 10 | }, 11 | algorithm: !obj:pylearn2.training_algorithms.sgd.SGD { 12 | learning_rate: %(learning_rate)f, 13 | init_momentum: .5, 14 | monitoring_dataset: 15 | { 16 | 'valid_both': !obj:forgetting.concat { 17 | datasets: [ 18 | !obj:pylearn2.datasets.svhn.SVHN_On_Memory { 19 | which_set: 'train', 20 | start: 50000, 21 | stop: 60000, 22 | }, 23 | !obj:pylearn2.datasets.zca_dataset.ZCA_Dataset { 24 | preprocessed_dataset: !pkl: "${PYLEARN2_DATA_PATH}/cifar10/pylearn2_gcn_whitened/train.pkl", 25 | preprocessor: !pkl: "${PYLEARN2_DATA_PATH}/cifar10/pylearn2_gcn_whitened/preprocessor.pkl", 26 | start: 40000, 27 | stop: 50000, 28 | axes: ['c', 0, 1, 'b'] 29 | },] 30 | }, 31 | 'test' : !obj:pylearn2.datasets.svhn.SVHN_On_Memory { 32 | which_set: 'test', 33 | }, 34 | 'test_old' : !obj:pylearn2.datasets.zca_dataset.ZCA_Dataset { 35 | preprocessed_dataset: !pkl: "${PYLEARN2_DATA_PATH}/cifar10/pylearn2_gcn_whitened/test.pkl", 36 | preprocessor: !pkl: "${PYLEARN2_DATA_PATH}/cifar10/pylearn2_gcn_whitened/preprocessor.pkl", 37 | axes: ['c', 0, 1, 'b'] 38 | }, 39 | }, 40 | cost: !obj:pylearn2.costs.mlp.dropout.Dropout { 41 | input_include_probs: { 'h0' : .8 }, 42 | input_scales: { 'h0' : 1. } 43 | }, 44 | termination_criterion: !obj:pylearn2.termination_criteria.EpochCounter { 45 | max_epochs: 785 46 | }, 47 | }, 48 | extensions: [ 49 | !obj:pylearn2.train_extensions.best_params.MonitorBasedSaveBest { 50 | channel_name: 'valid_both_y_misclass', 51 | save_path: "${PYLEARN2_TRAIN_FILE_FULL_STEM}_best.pkl" 52 | }, 53 | !obj:pylearn2.training_algorithms.sgd.MomentumAdjustor { 54 | start: 1, 55 | saturate: %(msat)i, 56 | final_momentum: %(final_momentum)f, 57 | }, 58 | !obj:pylearn2.training_algorithms.sgd.LinearDecayOverEpoch { 59 | start: 1, 60 | saturate: %(lr_sat)d, 61 | decay_factor: %(decay)f, 62 | }, 63 | ], 64 | save_path: "${PYLEARN2_TRAIN_FILE_FULL_STEM}.pkl", 65 | save_freq: 1 66 | } 67 | -------------------------------------------------------------------------------- /experiments/random_search_dropout_maxout_cifar/worker.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | cd $1 3 | python ~/projects/pylearn2/pylearn2/scripts/train.py task_2.yaml || exit -1 4 | #echo "starting task 1" 5 | #train.py task_1.yaml 6 | -------------------------------------------------------------------------------- /experiments/random_search_dropout_maxout_mnist/configure.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from pylearn2.utils import serial 4 | 5 | num_jobs = 25 6 | 7 | rng = np.random.RandomState([2013, 11, 22]) 8 | 9 | task_0_template = open('task_0_template.yaml', 'r').read() 10 | task_1_template = open('task_1_template.yaml', 'r').read() 11 | 12 | for job_id in xrange(num_jobs): 13 | 14 | h0_col_norm = rng.uniform(1., 5.) 15 | h1_col_norm = rng.uniform(1., 5.) 16 | y_col_norm = rng.uniform(1., 5.) 17 | 18 | h0_dim = rng.randint(250, 5000) 19 | num_pieces_0 = rng.randint(2, 6) 20 | num_units_0 = h0_dim // num_pieces_0 21 | h1_dim = rng.randint(250, 5000) 22 | num_pieces_1 = rng.randint(2, 6) 23 | num_units_1 = h1_dim // num_pieces_1 24 | 25 | def random_init_string(): 26 | irange = 10. ** rng.uniform(-2.3, -1.) 27 | return "irange: " + str(irange) 28 | 29 | h0_init = random_init_string() 30 | h1_init = random_init_string() 31 | 32 | if rng.randint(2): 33 | y_init = "sparse_init: 0" 34 | else: 35 | y_init = random_init_string() 36 | 37 | h0_bias = 0. 38 | h1_bias = 1. 39 | 40 | 41 | learning_rate = 10. ** rng.uniform(-2., -.5) 42 | 43 | if rng.randint(2): 44 | msat = 2 45 | else: 46 | msat = rng.randint(2, 1000) 47 | 48 | final_momentum = rng.uniform(.5, .9) 49 | 50 | lr_sat = rng.randint(200, 1000) 51 | 52 | decay = 10. ** rng.uniform(-3, -1) 53 | 54 | 55 | task_0_yaml_str = task_0_template % locals() 56 | 57 | serial.mkdir('exp/' + str(job_id)) 58 | train_file_full_stem = 'exp/'+str(job_id)+'/' 59 | f = open(train_file_full_stem + 'task_0.yaml', 'w') 60 | f.write(task_0_yaml_str) 61 | f.close() 62 | 63 | task_1_yaml_str = task_1_template % locals() 64 | 65 | serial.mkdir('exp/' + str(job_id)) 66 | f = open(train_file_full_stem + 'task_1.yaml', 'w') 67 | f.write(task_1_yaml_str) 68 | f.close() 69 | -------------------------------------------------------------------------------- /experiments/random_search_dropout_maxout_mnist/launch.sh: -------------------------------------------------------------------------------- 1 | jobdispatch --torque --env=THEANO_FLAGS=device=gpu,floatX=float32,force_device=True --duree=24:00:00 --whitespace --gpu $F/experiments/random_search_dropout_maxout_mnist/worker.sh $F/experiments/random_search_dropout_maxout_mnist/exp/"{{0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24}}" -------------------------------------------------------------------------------- /experiments/random_search_dropout_maxout_mnist/make_launch.py: -------------------------------------------------------------------------------- 1 | base = """jobdispatch --torque --env=THEANO_FLAGS=device=gpu,floatX=float32,force_device=True --duree=24:00:00 --whitespace --gpu $F/experiments/random_search_dropout_maxout_mnist/worker.sh $F/experiments/random_search_dropout_maxout_mnist/exp/"{{%(args)s}}\"""" 2 | args = ','.join([str(job_id) for job_id in xrange(25)]) 3 | f = open('launch.sh', 'w') 4 | f.write(base % locals()) 5 | f.close() 6 | -------------------------------------------------------------------------------- /experiments/random_search_dropout_maxout_mnist/worker.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | cd $1 3 | train.py task_0.yaml || exit -1 4 | echo "starting task 1" 5 | train.py task_1.yaml 6 | -------------------------------------------------------------------------------- /experiments/random_search_dropout_maxout_mnist_amazon/configure.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from pylearn2.utils import serial 4 | 5 | num_jobs = 25 6 | 7 | rng = np.random.RandomState([2013, 11, 22]) 8 | 9 | task_0_template = open('task_0_template.yaml', 'r').read() 10 | task_1_template = open('task_1_template.yaml', 'r').read() 11 | 12 | for job_id in xrange(num_jobs): 13 | 14 | h0_col_norm = rng.uniform(1., 5.) 15 | h1_col_norm = rng.uniform(1., 5.) 16 | y_col_norm = rng.uniform(1., 5.) 17 | 18 | h0_dim = rng.randint(250, 5000) 19 | num_pieces_0 = rng.randint(2, 6) 20 | num_units_0 = h0_dim // num_pieces_0 21 | h1_dim = rng.randint(250, 5000) 22 | num_pieces_1 = rng.randint(2, 6) 23 | num_units_1 = h1_dim // num_pieces_1 24 | 25 | def random_init_string(): 26 | irange = 10. ** rng.uniform(-2.3, -1.) 27 | return "irange: " + str(irange) 28 | 29 | h0_init = random_init_string() 30 | h1_init = random_init_string() 31 | 32 | if rng.randint(2): 33 | y_init = "sparse_init: 0" 34 | else: 35 | y_init = random_init_string() 36 | 37 | h0_bias = 0. 38 | h1_bias = 1. 39 | 40 | 41 | learning_rate = 10. ** rng.uniform(-2., -.5) 42 | 43 | if rng.randint(2): 44 | msat = 2 45 | else: 46 | msat = rng.randint(2, 1000) 47 | 48 | final_momentum = rng.uniform(.5, .9) 49 | 50 | lr_sat = rng.randint(200, 1000) 51 | 52 | decay = 10. ** rng.uniform(-3, -1) 53 | 54 | 55 | #task_0_yaml_str = task_0_template % locals() 56 | 57 | #serial.mkdir('exp/' + str(job_id)) 58 | #train_file_full_stem = 'exp/'+str(job_id)+'/' 59 | train_file_full_stem = '{}exp/{}/'.format('/scratch/mmirza/results/forgetting/random_search_dropout_maxout_mnist_amazon/', job_id) 60 | #f = open(train_file_full_stem + 'task_0.yaml', 'w') 61 | #f.write(task_0_yaml_str) 62 | #f.close() 63 | 64 | task_1_yaml_str = task_1_template % locals() 65 | 66 | #serial.mkdir('exp/' + str(job_id)) 67 | f = open(train_file_full_stem + 'task_1.yaml', 'w') 68 | f.write(task_1_yaml_str) 69 | f.close() 70 | -------------------------------------------------------------------------------- /experiments/random_search_dropout_maxout_mnist_amazon/launch.sh: -------------------------------------------------------------------------------- 1 | jobdispatch --env=THEANO_FLAGS=device=gpu,floatX=float32,force_device=True --duree=3h --mem=6G --gpu $SCRATCH/results/forgetting/random_search_dropout_maxout_mnist_amazon/worker.sh $SCRATCH/results/forgetting/random_search_dropout_maxout_mnist_amazon/exp/"{{0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24}}" 2 | -------------------------------------------------------------------------------- /experiments/random_search_dropout_maxout_mnist_amazon/local_launch.sh: -------------------------------------------------------------------------------- 1 | for fold in 1 2 3 4 5 6 7 8 8 9 10 11 12 2 | do 3 | $SCRATCH/results/forgetting/random_search_dropout_maxout_mnist_amazon/worker.sh $SCRATCH/results/forgetting/random_search_dropout_maxout_mnist_amazon/exp/$fold 4 | done 5 | 6 | -------------------------------------------------------------------------------- /experiments/random_search_dropout_maxout_mnist_amazon/local_launch2.sh: -------------------------------------------------------------------------------- 1 | for fold in 13 14 15 16 17 18 19 20 21 22 23 24 25 2 | do 3 | $SCRATCH/results/forgetting/random_search_dropout_maxout_mnist_amazon/worker.sh $SCRATCH/results/forgetting/random_search_dropout_maxout_mnist_amazon/exp/$fold 4 | done 5 | 6 | -------------------------------------------------------------------------------- /experiments/random_search_dropout_maxout_mnist_amazon/make_launch.py: -------------------------------------------------------------------------------- 1 | base = """jobdispatch --torque --env=THEANO_FLAGS=device=gpu,floatX=float32,force_device=True --duree=24:00:00 --whitespace --gpu $F/experiments/random_search_dropout_maxout_mnist/worker.sh $F/experiments/random_search_dropout_maxout_mnist/exp/"{{%(args)s}}\"""" 2 | args = ','.join([str(job_id) for job_id in xrange(25)]) 3 | f = open('launch.sh', 'w') 4 | f.write(base % locals()) 5 | f.close() 6 | -------------------------------------------------------------------------------- /experiments/random_search_dropout_maxout_mnist_amazon/worker.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | cd $1 3 | python /work/mmirza/pylearn2/pylearn2/scripts/train.py task_0.yaml || exit -1 4 | echo "starting task 1" 5 | python /work/mmirza/pylearn2/pylearn2/scripts/train.py task_1.yaml 6 | -------------------------------------------------------------------------------- /experiments/random_search_dropout_relu_amazon/configure.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from pylearn2.utils import serial 4 | EXP_PATH = "/RQexec/mirzameh/results/forgetting/random_search_dropout_relu_amazon/" 5 | 6 | num_jobs = 25 7 | 8 | rng = np.random.RandomState([2013, 11, 22]) 9 | 10 | task_0_template = open('task_0_template.yaml', 'r').read() 11 | task_1_template = open('task_1_template.yaml', 'r').read() 12 | 13 | for job_id in xrange(num_jobs): 14 | 15 | h0_col_norm = rng.uniform(1., 5.) 16 | h1_col_norm = rng.uniform(1., 5.) 17 | y_col_norm = rng.uniform(1., 5.) 18 | 19 | h0_dim = rng.randint(250, 5000) 20 | h1_dim = rng.randint(250, 5000) 21 | 22 | def random_init_string(): 23 | if rng.randint(2): 24 | sparse_init = rng.randint(10, 30) 25 | return "sparse_init: " + str(sparse_init) 26 | irange = 10. ** rng.uniform(-2.3, -1.) 27 | return "irange: " + str(irange) 28 | 29 | h0_init = random_init_string() 30 | h1_init = random_init_string() 31 | 32 | if rng.randint(2): 33 | y_init = "sparse_init: 0" 34 | else: 35 | y_init = random_init_string() 36 | 37 | def rectifier_bias(): 38 | if rng.randint(2): 39 | return 0 40 | return rng.uniform(0, .3) 41 | 42 | h0_bias = rectifier_bias() 43 | h1_bias = rectifier_bias() 44 | 45 | 46 | learning_rate = 10. ** rng.uniform(-2., -.5) 47 | 48 | if rng.randint(2): 49 | msat = 2 50 | else: 51 | msat = rng.randint(2, 1000) 52 | 53 | final_momentum = rng.uniform(.5, .9) 54 | 55 | lr_sat = rng.randint(200, 1000) 56 | 57 | decay = 10. ** rng.uniform(-3, -1) 58 | 59 | 60 | task_0_yaml_str = task_0_template % locals() 61 | 62 | serial.mkdir('{}exp/'.format(EXP_PATH) + str(job_id)) 63 | train_file_full_stem = '{}exp/'.format(EXP_PATH)+str(job_id)+'/' 64 | f = open(train_file_full_stem + 'task_0.yaml', 'w') 65 | f.write(task_0_yaml_str) 66 | f.close() 67 | 68 | task_1_yaml_str = task_1_template % locals() 69 | 70 | serial.mkdir('{}exp/'.format(EXP_PATH) + str(job_id)) 71 | f = open(train_file_full_stem + 'task_1.yaml', 'w') 72 | f.write(task_1_yaml_str) 73 | f.close() 74 | 75 | -------------------------------------------------------------------------------- /experiments/random_search_dropout_relu_amazon/launch.sh: -------------------------------------------------------------------------------- 1 | jobdispatch --torque --env=THEANO_FLAGS=device=gpu,floatX=float32,force_device=True --duree=24:00:00 --whitespace --gpu $SCRATCH/results/forgetting/random_search_dropout_relu_amazon/worker.sh $SCRATCH/results/forgetting/random_search_dropout_relu_amazon/exp/"{{0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24}}" 2 | -------------------------------------------------------------------------------- /experiments/random_search_dropout_relu_amazon/make_launch.py: -------------------------------------------------------------------------------- 1 | base = """jobdispatch --torque --env=THEANO_FLAGS=device=gpu,floatX=float32,force_device=True --duree=24:00:00 --whitespace --gpu $F/experiments/random_search_dropout_relu_mnist/worker.sh $F/experiments/random_search_dropout_relu_mnist/exp/"{{%(args)s}}\"""" 2 | args = ','.join([str(job_id) for job_id in xrange(25)]) 3 | f = open('launch.sh', 'w') 4 | f.write(base % locals()) 5 | f.close() 6 | -------------------------------------------------------------------------------- /experiments/random_search_dropout_relu_amazon/task_0_template.yaml: -------------------------------------------------------------------------------- 1 | !obj:pylearn2.train.Train { 2 | dataset: &train !obj:forgetting.datasets.amazon.AmazonSmall { 3 | which_set: 'train', 4 | category: 'kitchen', 5 | one_hot: 1, 6 | start: 0, 7 | stop: 1000 8 | }, 9 | model: !obj:pylearn2.models.mlp.MLP { 10 | layers: [ !obj:pylearn2.models.mlp.RectifiedLinear { 11 | max_col_norm: %(h0_col_norm)f, 12 | layer_name: 'h0', 13 | dim: %(h0_dim)d, 14 | %(h0_init)s, 15 | init_bias: %(h0_bias)f 16 | }, !obj:pylearn2.models.mlp.RectifiedLinear { 17 | max_col_norm: %(h1_col_norm)f, 18 | layer_name: 'h1', 19 | dim: %(h1_dim)d, 20 | %(h1_init)s, 21 | init_bias: %(h1_bias)f 22 | }, !obj:pylearn2.models.mlp.Softmax { 23 | max_col_norm: %(y_col_norm)f, 24 | layer_name: 'y', 25 | n_classes: 2, 26 | %(y_init)s 27 | } 28 | ], 29 | nvis: 5000, 30 | }, 31 | algorithm: !obj:pylearn2.training_algorithms.sgd.SGD { 32 | batch_size: 100, 33 | learning_rate: %(learning_rate)f, 34 | init_momentum: .5, 35 | monitoring_dataset: 36 | { 37 | 'train' : *train, 38 | 'valid' : !obj:forgetting.datasets.amazon.AmazonSmall { 39 | which_set: 'train', 40 | category: 'kitchen', 41 | one_hot: 1, 42 | start: 1000, 43 | stop: 1600 44 | }, 45 | 'test' : !obj:forgetting.datasets.amazon.AmazonSmall { 46 | which_set: 'test', 47 | category: 'kitchen', 48 | one_hot: 1, 49 | }, 50 | }, 51 | cost: !obj:pylearn2.costs.cost.SumOfCosts { costs: [ 52 | !obj:pylearn2.costs.mlp.dropout.Dropout { 53 | input_include_probs: { h0: .8 } 54 | } 55 | ] 56 | }, 57 | termination_criterion: !obj:pylearn2.termination_criteria.MonitorBased { 58 | channel_name: "valid_y_misclass", 59 | prop_decrease: 0., 60 | N: 100 61 | } 62 | }, 63 | extensions: [ 64 | !obj:pylearn2.train_extensions.best_params.MonitorBasedSaveBest { 65 | channel_name: 'valid_y_misclass', 66 | save_path: "${PYLEARN2_TRAIN_FILE_FULL_STEM}_best.pkl" 67 | }, !obj:pylearn2.training_algorithms.sgd.MomentumAdjustor { 68 | start: 1, 69 | saturate: %(msat)d, 70 | final_momentum: %(final_momentum)f 71 | }, 72 | !obj:pylearn2.training_algorithms.sgd.LinearDecayOverEpoch { 73 | start: 1, 74 | saturate: %(lr_sat)d, 75 | decay_factor: %(decay)f 76 | } 77 | ] 78 | } 79 | -------------------------------------------------------------------------------- /experiments/random_search_dropout_relu_amazon/task_1_template.yaml: -------------------------------------------------------------------------------- 1 | !obj:pylearn2.train.Train { 2 | dataset: &train !obj:forgetting.datasets.amazon.AmazonSmall { 3 | which_set: 'train', 4 | category: 'dvd', 5 | one_hot: 1, 6 | start: 0, 7 | stop: 1000 8 | }, 9 | model: !obj:pylearn2.monitor.push_monitor { 10 | model: !pkl: "task_0_best.pkl", 11 | name: "monitor_first" 12 | }, 13 | algorithm: !obj:pylearn2.training_algorithms.sgd.SGD { 14 | batch_size: 100, 15 | learning_rate: %(learning_rate)f, 16 | init_momentum: .5, 17 | monitoring_dataset: 18 | { 19 | 'train' : *train, 20 | 'valid_both' : !obj:forgetting.concat { 21 | datasets: [ !obj:forgetting.datasets.amazon.AmazonSmall { 22 | which_set: 'train', 23 | category: 'dvd', 24 | one_hot: 1, 25 | start: 1000, 26 | stop: 1600 27 | }, !obj:forgetting.datasets.amazon.AmazonSmall { 28 | which_set: 'train', 29 | category: 'kitchen', 30 | one_hot: 1, 31 | start: 1000, 32 | stop: 1600 33 | }, 34 | ] 35 | }, 36 | 'test' : !obj:forgetting.datasets.amazon.AmazonSmall { 37 | which_set: 'test', 38 | category: 'dvd', 39 | one_hot: 1, 40 | }, 41 | 'test_old' : !obj:forgetting.datasets.amazon.AmazonSmall { 42 | which_set: 'test', 43 | category: 'kitchen', 44 | one_hot: 1, 45 | }, 46 | }, 47 | cost: !obj:pylearn2.costs.cost.SumOfCosts { costs: [ 48 | !obj:pylearn2.costs.mlp.dropout.Dropout { 49 | input_include_probs: { h0: .8 } 50 | } 51 | ] 52 | }, 53 | termination_criterion: !obj:pylearn2.termination_criteria.MonitorBased { 54 | channel_name: "valid_both_y_misclass", 55 | prop_decrease: 0., 56 | N: 100 57 | } 58 | }, 59 | extensions: [ 60 | !obj:pylearn2.train_extensions.best_params.MonitorBasedSaveBest { 61 | channel_name: 'valid_both_y_misclass', 62 | save_path: "${PYLEARN2_TRAIN_FILE_FULL_STEM}_best.pkl" 63 | }, !obj:pylearn2.training_algorithms.sgd.MomentumAdjustor { 64 | start: 1, 65 | saturate: %(msat)d, 66 | final_momentum: %(final_momentum)f 67 | }, 68 | !obj:pylearn2.training_algorithms.sgd.LinearDecayOverEpoch { 69 | start: 1, 70 | saturate: %(lr_sat)d, 71 | decay_factor: %(decay)f 72 | } 73 | ], 74 | save_path: "${PYLEARN2_TRAIN_FILE_FULL_STEM}.pkl", 75 | save_freq: 1 76 | } 77 | -------------------------------------------------------------------------------- /experiments/random_search_dropout_relu_amazon/worker.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | cd $1 3 | python ~/projects/pylearn2/pylearn2/scripts/train.py task_0.yaml || exit -1 4 | echo "starting task 1" 5 | python ~/projects/pylearn2/pylearn2/scripts/train.py task_1.yaml 6 | -------------------------------------------------------------------------------- /experiments/random_search_dropout_relu_mnist/launch.sh: -------------------------------------------------------------------------------- 1 | jobdispatch --torque --env=THEANO_FLAGS=device=gpu,floatX=float32,force_device=True --duree=24:00:00 --whitespace --gpu $F/experiments/random_search_dropout_relu_mnist/worker.sh $F/experiments/random_search_dropout_relu_mnist/exp/"{{0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24}}" -------------------------------------------------------------------------------- /experiments/random_search_dropout_relu_mnist/make_launch.py: -------------------------------------------------------------------------------- 1 | base = """jobdispatch --torque --env=THEANO_FLAGS=device=gpu,floatX=float32,force_device=True --duree=24:00:00 --whitespace --gpu $F/experiments/random_search_dropout_relu_mnist/worker.sh $F/experiments/random_search_dropout_relu_mnist/exp/"{{%(args)s}}\"""" 2 | args = ','.join([str(job_id) for job_id in xrange(25)]) 3 | f = open('launch.sh', 'w') 4 | f.write(base % locals()) 5 | f.close() 6 | -------------------------------------------------------------------------------- /experiments/random_search_dropout_relu_mnist/task_0_template.yaml: -------------------------------------------------------------------------------- 1 | # Author: Ian Goodfellow 2 | # We train with the un-permuted version first. 3 | # This way after training with the permuted version, we can see if the 4 | # filters in the un-permuted space are still recognizable. 5 | !obj:pylearn2.train.Train { 6 | dataset: &train !obj:pylearn2.datasets.mnist.MNIST { 7 | which_set: 'train', 8 | one_hot: 1, 9 | start: 0, 10 | stop: 50000 11 | }, 12 | model: !obj:pylearn2.models.mlp.MLP { 13 | layers: [ !obj:pylearn2.models.mlp.RectifiedLinear { 14 | max_col_norm: %(h0_col_norm)f, 15 | layer_name: 'h0', 16 | dim: %(h0_dim)d, 17 | %(h0_init)s, 18 | init_bias: %(h0_bias)f 19 | }, !obj:pylearn2.models.mlp.RectifiedLinear { 20 | max_col_norm: %(h1_col_norm)f, 21 | layer_name: 'h1', 22 | dim: %(h1_dim)d, 23 | %(h1_init)s, 24 | init_bias: %(h1_bias)f 25 | }, !obj:pylearn2.models.mlp.Softmax { 26 | max_col_norm: %(y_col_norm)f, 27 | layer_name: 'y', 28 | n_classes: 10, 29 | %(y_init)s 30 | } 31 | ], 32 | nvis: 784, 33 | }, 34 | algorithm: !obj:pylearn2.training_algorithms.sgd.SGD { 35 | batch_size: 100, 36 | learning_rate: %(learning_rate)f, 37 | init_momentum: .5, 38 | monitoring_dataset: 39 | { 40 | 'train' : *train, 41 | 'valid' : !obj:pylearn2.datasets.mnist.MNIST { 42 | which_set: 'train', 43 | one_hot: 1, 44 | start: 50000, 45 | stop: 60000 46 | }, 47 | 'test' : !obj:pylearn2.datasets.mnist.MNIST { 48 | which_set: 'test', 49 | one_hot: 1, 50 | } 51 | }, 52 | cost: !obj:pylearn2.costs.cost.SumOfCosts { costs: [ 53 | !obj:pylearn2.costs.mlp.dropout.Dropout { 54 | input_include_probs: { h0: .8 } 55 | } 56 | ] 57 | }, 58 | termination_criterion: !obj:pylearn2.termination_criteria.MonitorBased { 59 | channel_name: "valid_y_misclass", 60 | prop_decrease: 0., 61 | N: 100 62 | } 63 | }, 64 | extensions: [ 65 | !obj:pylearn2.train_extensions.best_params.MonitorBasedSaveBest { 66 | channel_name: 'valid_y_misclass', 67 | save_path: "${PYLEARN2_TRAIN_FILE_FULL_STEM}_best.pkl" 68 | }, !obj:pylearn2.training_algorithms.sgd.MomentumAdjustor { 69 | start: 1, 70 | saturate: %(msat)d, 71 | final_momentum: %(final_momentum)f 72 | }, 73 | !obj:pylearn2.training_algorithms.sgd.LinearDecayOverEpoch { 74 | start: 1, 75 | saturate: %(lr_sat)d, 76 | decay_factor: %(decay)f 77 | } 78 | ] 79 | } 80 | -------------------------------------------------------------------------------- /experiments/random_search_dropout_relu_mnist/worker.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | cd $1 3 | train.py task_0.yaml || exit -1 4 | echo "starting task 1" 5 | train.py task_1.yaml 6 | -------------------------------------------------------------------------------- /experiments/random_search_dropout_relu_mnist_025/configure.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from pylearn2.utils import serial 4 | 5 | num_jobs = 25 6 | 7 | rng = np.random.RandomState([2013, 11, 22]) 8 | 9 | task_0_template = open('task_0_template.yaml', 'r').read() 10 | task_1_template = open('task_1_template.yaml', 'r').read() 11 | 12 | for job_id in xrange(num_jobs): 13 | 14 | h0_col_norm = rng.uniform(1., 5.) 15 | h1_col_norm = rng.uniform(1., 5.) 16 | y_col_norm = rng.uniform(1., 5.) 17 | 18 | h0_dim = rng.randint(250, 5000) 19 | h1_dim = rng.randint(250, 5000) 20 | 21 | def random_init_string(): 22 | if rng.randint(2): 23 | sparse_init = rng.randint(10, 30) 24 | return "sparse_init: " + str(sparse_init) 25 | irange = 10. ** rng.uniform(-2.3, -1.) 26 | return "irange: " + str(irange) 27 | 28 | h0_init = random_init_string() 29 | h1_init = random_init_string() 30 | 31 | if rng.randint(2): 32 | y_init = "sparse_init: 0" 33 | else: 34 | y_init = random_init_string() 35 | 36 | def rectifier_bias(): 37 | if rng.randint(2): 38 | return 0 39 | return rng.uniform(0, .3) 40 | 41 | h0_bias = rectifier_bias() 42 | h1_bias = rectifier_bias() 43 | 44 | 45 | learning_rate = 10. ** rng.uniform(-2., -.5) 46 | 47 | if rng.randint(2): 48 | msat = 2 49 | else: 50 | msat = rng.randint(2, 1000) 51 | 52 | final_momentum = rng.uniform(.5, .9) 53 | 54 | lr_sat = rng.randint(200, 1000) 55 | 56 | decay = 10. ** rng.uniform(-3, -1) 57 | 58 | 59 | task_0_yaml_str = task_0_template % locals() 60 | 61 | serial.mkdir('exp/' + str(job_id)) 62 | train_file_full_stem = 'exp/'+str(job_id)+'/' 63 | f = open(train_file_full_stem + 'task_0.yaml', 'w') 64 | f.write(task_0_yaml_str) 65 | f.close() 66 | 67 | task_1_yaml_str = task_1_template % locals() 68 | 69 | serial.mkdir('exp/' + str(job_id)) 70 | f = open(train_file_full_stem + 'task_1.yaml', 'w') 71 | f.write(task_1_yaml_str) 72 | f.close() 73 | -------------------------------------------------------------------------------- /experiments/random_search_dropout_relu_mnist_025/make_launch.py: -------------------------------------------------------------------------------- 1 | base = """jobdispatch --torque --env=THEANO_FLAGS=device=gpu,floatX=float32,force_device=True --duree=24:00:00 --whitespace --gpu $F/experiments/random_search_dropout_relu_mnist_025/worker.sh $F/experiments/random_search_dropout_relu_mnist_025/exp/"{{%(args)s}}\"""" 2 | args = ','.join([str(job_id) for job_id in xrange(25)]) 3 | f = open('launch.sh', 'w') 4 | f.write(base % locals()) 5 | f.close() 6 | -------------------------------------------------------------------------------- /experiments/random_search_dropout_relu_mnist_075/configure.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from pylearn2.utils import serial 4 | 5 | num_jobs = 25 6 | 7 | rng = np.random.RandomState([2013, 11, 22]) 8 | 9 | task_0_template = open('task_0_template.yaml', 'r').read() 10 | task_1_template = open('task_1_template.yaml', 'r').read() 11 | 12 | for job_id in xrange(num_jobs): 13 | 14 | h0_col_norm = rng.uniform(1., 5.) 15 | h1_col_norm = rng.uniform(1., 5.) 16 | y_col_norm = rng.uniform(1., 5.) 17 | 18 | h0_dim = rng.randint(250, 5000) 19 | h1_dim = rng.randint(250, 5000) 20 | 21 | def random_init_string(): 22 | if rng.randint(2): 23 | sparse_init = rng.randint(10, 30) 24 | return "sparse_init: " + str(sparse_init) 25 | irange = 10. ** rng.uniform(-2.3, -1.) 26 | return "irange: " + str(irange) 27 | 28 | h0_init = random_init_string() 29 | h1_init = random_init_string() 30 | 31 | if rng.randint(2): 32 | y_init = "sparse_init: 0" 33 | else: 34 | y_init = random_init_string() 35 | 36 | def rectifier_bias(): 37 | if rng.randint(2): 38 | return 0 39 | return rng.uniform(0, .3) 40 | 41 | h0_bias = rectifier_bias() 42 | h1_bias = rectifier_bias() 43 | 44 | 45 | learning_rate = 10. ** rng.uniform(-2., -.5) 46 | 47 | if rng.randint(2): 48 | msat = 2 49 | else: 50 | msat = rng.randint(2, 1000) 51 | 52 | final_momentum = rng.uniform(.5, .9) 53 | 54 | lr_sat = rng.randint(200, 1000) 55 | 56 | decay = 10. ** rng.uniform(-3, -1) 57 | 58 | 59 | task_0_yaml_str = task_0_template % locals() 60 | 61 | serial.mkdir('exp/' + str(job_id)) 62 | train_file_full_stem = 'exp/'+str(job_id)+'/' 63 | f = open(train_file_full_stem + 'task_0.yaml', 'w') 64 | f.write(task_0_yaml_str) 65 | f.close() 66 | 67 | task_1_yaml_str = task_1_template % locals() 68 | 69 | serial.mkdir('exp/' + str(job_id)) 70 | f = open(train_file_full_stem + 'task_1.yaml', 'w') 71 | f.write(task_1_yaml_str) 72 | f.close() 73 | -------------------------------------------------------------------------------- /experiments/random_search_dropout_relu_mnist_075/make_launch.py: -------------------------------------------------------------------------------- 1 | base = """jobdispatch --torque --env=THEANO_FLAGS=device=gpu,floatX=float32,force_device=True --duree=24:00:00 --whitespace --gpu $F/experiments/random_search_dropout_relu_mnist_075/worker.sh $F/experiments/random_search_dropout_relu_mnist_075/exp/"{{%(args)s}}\"""" 2 | args = ','.join([str(job_id) for job_id in xrange(25)]) 3 | f = open('launch.sh', 'w') 4 | f.write(base % locals()) 5 | f.close() 6 | -------------------------------------------------------------------------------- /experiments/random_search_dropout_relu_mnist_amazon/configure.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from pylearn2.utils import serial 4 | 5 | num_jobs = 25 6 | 7 | rng = np.random.RandomState([2013, 11, 22]) 8 | 9 | task_0_template = open('task_0_template.yaml', 'r').read() 10 | task_1_template = open('task_1_template.yaml', 'r').read() 11 | 12 | for job_id in xrange(num_jobs): 13 | 14 | h0_col_norm = rng.uniform(1., 5.) 15 | h1_col_norm = rng.uniform(1., 5.) 16 | y_col_norm = rng.uniform(1., 5.) 17 | 18 | h0_dim = rng.randint(250, 5000) 19 | num_pieces_0 = rng.randint(2, 6) 20 | num_units_0 = h0_dim // num_pieces_0 21 | h1_dim = rng.randint(250, 5000) 22 | num_pieces_1 = rng.randint(2, 6) 23 | num_units_1 = h1_dim // num_pieces_1 24 | 25 | def random_init_string(): 26 | irange = 10. ** rng.uniform(-2.3, -1.) 27 | return "irange: " + str(irange) 28 | 29 | h0_init = random_init_string() 30 | h1_init = random_init_string() 31 | 32 | if rng.randint(2): 33 | y_init = "sparse_init: 0" 34 | else: 35 | y_init = random_init_string() 36 | 37 | h0_bias = 0. 38 | h1_bias = 1. 39 | 40 | 41 | learning_rate = 10. ** rng.uniform(-2., -.5) 42 | 43 | if rng.randint(2): 44 | msat = 2 45 | else: 46 | msat = rng.randint(2, 1000) 47 | 48 | final_momentum = rng.uniform(.5, .9) 49 | 50 | lr_sat = rng.randint(200, 1000) 51 | 52 | decay = 10. ** rng.uniform(-3, -1) 53 | 54 | 55 | #task_0_yaml_str = task_0_template % locals() 56 | 57 | #serial.mkdir('exp/' + str(job_id)) 58 | #train_file_full_stem = 'exp/'+str(job_id)+'/' 59 | train_file_full_stem = '{}exp/{}/'.format('/scratch/mmirza/results/forgetting/random_search_dropout_relu_mnist_amazon/', job_id) 60 | #f = open(train_file_full_stem + 'task_0.yaml', 'w') 61 | #f.write(task_0_yaml_str) 62 | #f.close() 63 | 64 | task_1_yaml_str = task_1_template % locals() 65 | 66 | serial.mkdir('exp/' + str(job_id)) 67 | f = open(train_file_full_stem + 'task_1.yaml', 'w') 68 | f.write(task_1_yaml_str) 69 | f.close() 70 | -------------------------------------------------------------------------------- /experiments/random_search_dropout_relu_mnist_amazon/launch.sh: -------------------------------------------------------------------------------- 1 | jobdispatch --env=THEANO_FLAGS=device=gpu,floatX=float32,force_device=True --duree=3h --mem=6G --gpu $SCRATCH/results/forgetting/random_search_dropout_relu_mnist_amazon/worker.sh $SCRATCH/results/forgetting/random_search_dropout_relu_mnist_amazon/exp/"{{0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24}}" 2 | -------------------------------------------------------------------------------- /experiments/random_search_dropout_relu_mnist_amazon/local_launch.sh: -------------------------------------------------------------------------------- 1 | for fold in 1 2 3 4 5 6 7 8 8 9 10 11 12 2 | do 3 | $SCRATCH/results/forgetting/random_search_dropout_maxout_mnist_amazon/worker.sh $SCRATCH/results/forgetting/random_search_dropout_maxout_mnist_amazon/exp/$fold 4 | done 5 | 6 | -------------------------------------------------------------------------------- /experiments/random_search_dropout_relu_mnist_amazon/local_launch2.sh: -------------------------------------------------------------------------------- 1 | for fold in 13 14 15 16 17 18 19 20 21 22 23 24 25 2 | do 3 | $SCRATCH/results/forgetting/random_search_dropout_maxout_mnist_amazon/worker.sh $SCRATCH/results/forgetting/random_search_dropout_maxout_mnist_amazon/exp/$fold 4 | done 5 | 6 | -------------------------------------------------------------------------------- /experiments/random_search_dropout_relu_mnist_amazon/make_launch.py: -------------------------------------------------------------------------------- 1 | base = """jobdispatch --torque --env=THEANO_FLAGS=device=gpu,floatX=float32,force_device=True --duree=24:00:00 --whitespace --gpu $F/experiments/random_search_dropout_relu_mnist/worker.sh $F/experiments/random_search_dropout_relu_mnist/exp/"{{%(args)s}}\"""" 2 | args = ','.join([str(job_id) for job_id in xrange(25)]) 3 | f = open('launch.sh', 'w') 4 | f.write(base % locals()) 5 | f.close() 6 | -------------------------------------------------------------------------------- /experiments/random_search_dropout_relu_mnist_amazon/worker.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | cd $1 3 | python /work/mmirza/pylearn2/pylearn2/scripts/train.py task_0.yaml || exit -1 4 | echo "starting task 1" 5 | python /work/mmirza/pylearn2/pylearn2/scripts/train.py task_1.yaml 6 | -------------------------------------------------------------------------------- /experiments/random_search_dropout_sigmoid_amazon/configure.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from pylearn2.utils import serial 4 | 5 | num_jobs = 25 6 | EXP_PATH = "/RQexec/mirzameh/results/forgetting/random_search_dropout_sigmoid_amazon/" 7 | 8 | rng = np.random.RandomState([2013, 11, 22]) 9 | 10 | task_0_template = open('task_0_template.yaml', 'r').read() 11 | task_1_template = open('task_1_template.yaml', 'r').read() 12 | 13 | for job_id in xrange(num_jobs): 14 | 15 | h0_col_norm = rng.uniform(1., 5.) 16 | h1_col_norm = rng.uniform(1., 5.) 17 | y_col_norm = rng.uniform(1., 5.) 18 | 19 | h0_dim = rng.randint(250, 5000) 20 | h1_dim = rng.randint(250, 5000) 21 | 22 | def random_init_string(): 23 | if rng.randint(2): 24 | sparse_init = rng.randint(10, 30) 25 | return "sparse_init: " + str(sparse_init) 26 | irange = 10. ** rng.uniform(-2.3, -1.) 27 | return "irange: " + str(irange) 28 | 29 | h0_init = random_init_string() 30 | h1_init = random_init_string() 31 | 32 | if rng.randint(2): 33 | y_init = "sparse_init: 0" 34 | else: 35 | y_init = random_init_string() 36 | 37 | def sigmoid_bias(): 38 | return rng.uniform(0, .3) 39 | 40 | h0_bias = sigmoid_bias() 41 | h1_bias = sigmoid_bias() 42 | 43 | 44 | learning_rate = 10. ** rng.uniform(-2., -.5) 45 | 46 | if rng.randint(2): 47 | msat = 2 48 | else: 49 | msat = rng.randint(2, 1000) 50 | 51 | final_momentum = rng.uniform(.5, .9) 52 | 53 | lr_sat = rng.randint(200, 1000) 54 | 55 | decay = 10. ** rng.uniform(-3, -1) 56 | 57 | 58 | task_0_yaml_str = task_0_template % locals() 59 | 60 | serial.mkdir('{}exp/'.format(EXP_PATH) + str(job_id)) 61 | train_file_full_stem = '{}exp/'.format(EXP_PATH)+str(job_id)+'/' 62 | f = open(train_file_full_stem + 'task_0.yaml', 'w') 63 | f.write(task_0_yaml_str) 64 | f.close() 65 | 66 | task_1_yaml_str = task_1_template % locals() 67 | 68 | serial.mkdir('{}exp/'.format(EXP_PATH) + str(job_id)) 69 | f = open(train_file_full_stem + 'task_1.yaml', 'w') 70 | f.write(task_1_yaml_str) 71 | f.close() 72 | -------------------------------------------------------------------------------- /experiments/random_search_dropout_sigmoid_amazon/launch.sh: -------------------------------------------------------------------------------- 1 | jobdispatch --torque --env=THEANO_FLAGS=device=gpu,floatX=float32,force_device=True --duree=24:00:00 --whitespace --gpu $SCRATCH/results/forgetting/random_search_dropout_sigmoid_amazon/worker.sh $SCRATCH/results/forgetting/random_search_dropout_sigmoid_amazon/exp/"{{0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24}}" 2 | -------------------------------------------------------------------------------- /experiments/random_search_dropout_sigmoid_amazon/task_0_template.yaml: -------------------------------------------------------------------------------- 1 | !obj:pylearn2.train.Train { 2 | dataset: &train !obj:forgetting.datasets.amazon.AmazonSmall { 3 | which_set: 'train', 4 | category: 'kitchen', 5 | one_hot: 1, 6 | start: 0, 7 | stop: 1000 8 | }, 9 | model: !obj:pylearn2.models.mlp.MLP { 10 | layers: [ !obj:pylearn2.models.mlp.Sigmoid { 11 | max_col_norm: %(h0_col_norm)f, 12 | layer_name: 'h0', 13 | dim: %(h0_dim)d, 14 | %(h0_init)s, 15 | init_bias: %(h0_bias)f 16 | }, !obj:pylearn2.models.mlp.Sigmoid { 17 | max_col_norm: %(h1_col_norm)f, 18 | layer_name: 'h1', 19 | dim: %(h1_dim)d, 20 | %(h1_init)s, 21 | init_bias: %(h1_bias)f 22 | }, !obj:pylearn2.models.mlp.Softmax { 23 | max_col_norm: %(y_col_norm)f, 24 | layer_name: 'y', 25 | n_classes: 2, 26 | %(y_init)s 27 | } 28 | ], 29 | nvis: 5000, 30 | }, 31 | algorithm: !obj:pylearn2.training_algorithms.sgd.SGD { 32 | batch_size: 100, 33 | learning_rate: %(learning_rate)f, 34 | init_momentum: .5, 35 | monitoring_dataset: 36 | { 37 | 'train' : *train, 38 | 'valid' : !obj:forgetting.datasets.amazon.AmazonSmall { 39 | which_set: 'train', 40 | category: 'kitchen', 41 | one_hot: 1, 42 | start: 1000, 43 | stop: 1600 44 | }, 45 | 'test' : !obj:forgetting.datasets.amazon.AmazonSmall { 46 | which_set: 'test', 47 | category: 'kitchen', 48 | one_hot: 1, 49 | }, 50 | }, 51 | cost: !obj:pylearn2.costs.cost.SumOfCosts { costs: [ 52 | !obj:pylearn2.costs.mlp.dropout.Dropout { 53 | input_include_probs: { h0: .8 } 54 | } 55 | ] 56 | }, 57 | termination_criterion: !obj:pylearn2.termination_criteria.MonitorBased { 58 | channel_name: "valid_y_misclass", 59 | prop_decrease: 0., 60 | N: 100 61 | } 62 | }, 63 | extensions: [ 64 | !obj:pylearn2.train_extensions.best_params.MonitorBasedSaveBest { 65 | channel_name: 'valid_y_misclass', 66 | save_path: "${PYLEARN2_TRAIN_FILE_FULL_STEM}_best.pkl" 67 | }, !obj:pylearn2.training_algorithms.sgd.MomentumAdjustor { 68 | start: 1, 69 | saturate: %(msat)d, 70 | final_momentum: %(final_momentum)f 71 | }, 72 | !obj:pylearn2.training_algorithms.sgd.LinearDecayOverEpoch { 73 | start: 1, 74 | saturate: %(lr_sat)d, 75 | decay_factor: %(decay)f 76 | } 77 | ] 78 | } 79 | -------------------------------------------------------------------------------- /experiments/random_search_dropout_sigmoid_amazon/task_1_template.yaml: -------------------------------------------------------------------------------- 1 | !obj:pylearn2.train.Train { 2 | dataset: &train !obj:forgetting.datasets.amazon.AmazonSmall { 3 | which_set: 'train', 4 | category: 'dvd', 5 | one_hot: 1, 6 | start: 0, 7 | stop: 1000 8 | }, 9 | model: !obj:pylearn2.monitor.push_monitor { 10 | model: !pkl: "task_0_best.pkl", 11 | name: "monitor_first" 12 | }, 13 | algorithm: !obj:pylearn2.training_algorithms.sgd.SGD { 14 | batch_size: 100, 15 | learning_rate: %(learning_rate)f, 16 | init_momentum: .5, 17 | monitoring_dataset: 18 | { 19 | 'train' : *train, 20 | 'valid_both' : !obj:forgetting.concat { 21 | datasets: [ !obj:forgetting.datasets.amazon.AmazonSmall { 22 | which_set: 'train', 23 | category: 'dvd', 24 | one_hot: 1, 25 | start: 1000, 26 | stop: 1600 27 | }, !obj:forgetting.datasets.amazon.AmazonSmall { 28 | which_set: 'train', 29 | category: 'kitchen', 30 | one_hot: 1, 31 | start: 1000, 32 | stop: 1600 33 | }, 34 | ] 35 | }, 36 | 'test' : !obj:forgetting.datasets.amazon.AmazonSmall { 37 | which_set: 'test', 38 | category: 'dvd', 39 | one_hot: 1, 40 | }, 41 | 'test_old' : !obj:forgetting.datasets.amazon.AmazonSmall { 42 | which_set: 'test', 43 | category: 'kitchen', 44 | one_hot: 1, 45 | }, 46 | }, 47 | cost: !obj:pylearn2.costs.cost.SumOfCosts { costs: [ 48 | !obj:pylearn2.costs.mlp.dropout.Dropout { 49 | input_include_probs: { h0: .8 } 50 | } 51 | ] 52 | }, 53 | termination_criterion: !obj:pylearn2.termination_criteria.MonitorBased { 54 | channel_name: "valid_both_y_misclass", 55 | prop_decrease: 0., 56 | N: 100 57 | } 58 | }, 59 | extensions: [ 60 | !obj:pylearn2.train_extensions.best_params.MonitorBasedSaveBest { 61 | channel_name: 'valid_both_y_misclass', 62 | save_path: "${PYLEARN2_TRAIN_FILE_FULL_STEM}_best.pkl" 63 | }, !obj:pylearn2.training_algorithms.sgd.MomentumAdjustor { 64 | start: 1, 65 | saturate: %(msat)d, 66 | final_momentum: %(final_momentum)f 67 | }, 68 | !obj:pylearn2.training_algorithms.sgd.LinearDecayOverEpoch { 69 | start: 1, 70 | saturate: %(lr_sat)d, 71 | decay_factor: %(decay)f 72 | } 73 | ], 74 | save_path: "${PYLEARN2_TRAIN_FILE_FULL_STEM}.pkl", 75 | save_freq: 1 76 | } 77 | -------------------------------------------------------------------------------- /experiments/random_search_dropout_sigmoid_amazon/worker.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | cd $1 3 | python ~/projects/pylearn2/pylearn2/scripts/train.py task_0.yaml || exit -1 4 | echo "starting task 1" 5 | python ~/projects/pylearn2/pylearn2/scripts/train.py task_1.yaml 6 | -------------------------------------------------------------------------------- /experiments/random_search_dropout_sigmoid_mnist/configure.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from pylearn2.utils import serial 4 | 5 | num_jobs = 25 6 | 7 | rng = np.random.RandomState([2013, 11, 22]) 8 | 9 | task_0_template = open('task_0_template.yaml', 'r').read() 10 | task_1_template = open('task_1_template.yaml', 'r').read() 11 | 12 | for job_id in xrange(num_jobs): 13 | 14 | h0_col_norm = rng.uniform(1., 5.) 15 | h1_col_norm = rng.uniform(1., 5.) 16 | y_col_norm = rng.uniform(1., 5.) 17 | 18 | h0_dim = rng.randint(250, 5000) 19 | h1_dim = rng.randint(250, 5000) 20 | 21 | def random_init_string(): 22 | if rng.randint(2): 23 | sparse_init = rng.randint(10, 30) 24 | return "sparse_init: " + str(sparse_init) 25 | irange = 10. ** rng.uniform(-2.3, -1.) 26 | return "irange: " + str(irange) 27 | 28 | h0_init = random_init_string() 29 | h1_init = random_init_string() 30 | 31 | if rng.randint(2): 32 | y_init = "sparse_init: 0" 33 | else: 34 | y_init = random_init_string() 35 | 36 | def sigmoid_bias(): 37 | return rng.uniform(0, .3) 38 | 39 | h0_bias = sigmoid_bias() 40 | h1_bias = sigmoid_bias() 41 | 42 | 43 | learning_rate = 10. ** rng.uniform(-2., -.5) 44 | 45 | if rng.randint(2): 46 | msat = 2 47 | else: 48 | msat = rng.randint(2, 1000) 49 | 50 | final_momentum = rng.uniform(.5, .9) 51 | 52 | lr_sat = rng.randint(200, 1000) 53 | 54 | decay = 10. ** rng.uniform(-3, -1) 55 | 56 | 57 | task_0_yaml_str = task_0_template % locals() 58 | 59 | serial.mkdir('exp/' + str(job_id)) 60 | train_file_full_stem = 'exp/'+str(job_id)+'/' 61 | f = open(train_file_full_stem + 'task_0.yaml', 'w') 62 | f.write(task_0_yaml_str) 63 | f.close() 64 | 65 | task_1_yaml_str = task_1_template % locals() 66 | 67 | serial.mkdir('exp/' + str(job_id)) 68 | f = open(train_file_full_stem + 'task_1.yaml', 'w') 69 | f.write(task_1_yaml_str) 70 | f.close() 71 | -------------------------------------------------------------------------------- /experiments/random_search_dropout_sigmoid_mnist/launch.sh: -------------------------------------------------------------------------------- 1 | jobdispatch --torque --env=THEANO_FLAGS=device=gpu,floatX=float32,force_device=True --duree=24:00:00 --whitespace --gpu $F/experiments/random_search_dropout_sigmoid_mnist/worker.sh $F/experiments/random_search_dropout_sigmoid_mnist/exp/"{{0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24}}" -------------------------------------------------------------------------------- /experiments/random_search_dropout_sigmoid_mnist/task_0_template.yaml: -------------------------------------------------------------------------------- 1 | # Author: Ian Goodfellow 2 | # We train with the un-permuted version first. 3 | # This way after training with the permuted version, we can see if the 4 | # filters in the un-permuted space are still recognizable. 5 | !obj:pylearn2.train.Train { 6 | dataset: &train !obj:pylearn2.datasets.mnist.MNIST { 7 | which_set: 'train', 8 | one_hot: 1, 9 | start: 0, 10 | stop: 50000 11 | }, 12 | model: !obj:pylearn2.models.mlp.MLP { 13 | layers: [ !obj:pylearn2.models.mlp.Sigmoid { 14 | max_col_norm: %(h0_col_norm)f, 15 | layer_name: 'h0', 16 | dim: %(h0_dim)d, 17 | %(h0_init)s, 18 | init_bias: %(h0_bias)f 19 | }, !obj:pylearn2.models.mlp.Sigmoid { 20 | max_col_norm: %(h1_col_norm)f, 21 | layer_name: 'h1', 22 | dim: %(h1_dim)d, 23 | %(h1_init)s, 24 | init_bias: %(h1_bias)f 25 | }, !obj:pylearn2.models.mlp.Softmax { 26 | max_col_norm: %(y_col_norm)f, 27 | layer_name: 'y', 28 | n_classes: 10, 29 | %(y_init)s 30 | } 31 | ], 32 | nvis: 784, 33 | }, 34 | algorithm: !obj:pylearn2.training_algorithms.sgd.SGD { 35 | batch_size: 100, 36 | learning_rate: %(learning_rate)f, 37 | init_momentum: .5, 38 | monitoring_dataset: 39 | { 40 | 'train' : *train, 41 | 'valid' : !obj:pylearn2.datasets.mnist.MNIST { 42 | which_set: 'train', 43 | one_hot: 1, 44 | start: 50000, 45 | stop: 60000 46 | }, 47 | 'test' : !obj:pylearn2.datasets.mnist.MNIST { 48 | which_set: 'test', 49 | one_hot: 1, 50 | } 51 | }, 52 | cost: !obj:pylearn2.costs.cost.SumOfCosts { costs: [ 53 | !obj:pylearn2.costs.mlp.dropout.Dropout { 54 | input_include_probs: { h0: .8 } 55 | } 56 | ] 57 | }, 58 | termination_criterion: !obj:pylearn2.termination_criteria.MonitorBased { 59 | channel_name: "valid_y_misclass", 60 | prop_decrease: 0., 61 | N: 100 62 | } 63 | }, 64 | extensions: [ 65 | !obj:pylearn2.train_extensions.best_params.MonitorBasedSaveBest { 66 | channel_name: 'valid_y_misclass', 67 | save_path: "${PYLEARN2_TRAIN_FILE_FULL_STEM}_best.pkl" 68 | }, !obj:pylearn2.training_algorithms.sgd.MomentumAdjustor { 69 | start: 1, 70 | saturate: %(msat)d, 71 | final_momentum: %(final_momentum)f 72 | }, 73 | !obj:pylearn2.training_algorithms.sgd.LinearDecayOverEpoch { 74 | start: 1, 75 | saturate: %(lr_sat)d, 76 | decay_factor: %(decay)f 77 | } 78 | ] 79 | } 80 | -------------------------------------------------------------------------------- /experiments/random_search_dropout_sigmoid_mnist/worker.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | cd $1 3 | train.py task_0.yaml || exit -1 4 | echo "starting task 1" 5 | train.py task_1.yaml 6 | -------------------------------------------------------------------------------- /experiments/random_search_dropout_sigmoid_mnist_amazon/configure.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from pylearn2.utils import serial 4 | 5 | num_jobs = 25 6 | 7 | rng = np.random.RandomState([2013, 11, 22]) 8 | 9 | task_0_template = open('task_0_template.yaml', 'r').read() 10 | task_1_template = open('task_1_template.yaml', 'r').read() 11 | 12 | for job_id in xrange(num_jobs): 13 | 14 | h0_col_norm = rng.uniform(1., 5.) 15 | h1_col_norm = rng.uniform(1., 5.) 16 | y_col_norm = rng.uniform(1., 5.) 17 | 18 | h0_dim = rng.randint(250, 5000) 19 | h1_dim = rng.randint(250, 5000) 20 | 21 | def random_init_string(): 22 | if rng.randint(2): 23 | sparse_init = rng.randint(10, 30) 24 | return "sparse_init: " + str(sparse_init) 25 | irange = 10. ** rng.uniform(-2.3, -1.) 26 | return "irange: " + str(irange) 27 | 28 | h0_init = random_init_string() 29 | h1_init = random_init_string() 30 | 31 | if rng.randint(2): 32 | y_init = "sparse_init: 0" 33 | else: 34 | y_init = random_init_string() 35 | 36 | def sigmoid_bias(): 37 | return rng.uniform(0, .3) 38 | 39 | h0_bias = sigmoid_bias() 40 | h1_bias = sigmoid_bias() 41 | 42 | 43 | learning_rate = 10. ** rng.uniform(-2., -.5) 44 | 45 | if rng.randint(2): 46 | msat = 2 47 | else: 48 | msat = rng.randint(2, 1000) 49 | 50 | final_momentum = rng.uniform(.5, .9) 51 | 52 | lr_sat = rng.randint(200, 1000) 53 | 54 | decay = 10. ** rng.uniform(-3, -1) 55 | 56 | 57 | #task_0_yaml_str = task_0_template % locals() 58 | 59 | #serial.mkdir('exp/' + str(job_id)) 60 | #train_file_full_stem = 'exp/'+str(job_id)+'/' 61 | train_file_full_stem = '{}exp/{}/'.format('/scratch/mmirza/results/forgetting/random_search_dropout_sigmoid_mnist_amazon/', job_id) 62 | #f = open(train_file_full_stem + 'task_0.yaml', 'w') 63 | #f.write(task_0_yaml_str) 64 | #f.close() 65 | 66 | task_1_yaml_str = task_1_template % locals() 67 | 68 | serial.mkdir('exp/' + str(job_id)) 69 | f = open(train_file_full_stem + 'task_1.yaml', 'w') 70 | f.write(task_1_yaml_str) 71 | f.close() 72 | -------------------------------------------------------------------------------- /experiments/random_search_dropout_sigmoid_mnist_amazon/launch.sh: -------------------------------------------------------------------------------- 1 | jobdispatch --env=THEANO_FLAGS=device=gpu,floatX=float32,force_device=True --duree=2h --mem=6G --gpu $SCRATCH/results/forgetting/random_search_dropout_sigmoid_mnist_amazon/worker.sh $SCRATCH/results/forgetting/random_search_dropout_sigmoid_mnist_amazon/exp/"{{0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24}}" 2 | -------------------------------------------------------------------------------- /experiments/random_search_dropout_sigmoid_mnist_amazon/local_launch.sh: -------------------------------------------------------------------------------- 1 | for fold in 1 2 3 4 5 6 7 8 8 9 10 11 12 2 | do 3 | $SCRATCH/results/forgetting/random_search_dropout_maxout_mnist_amazon/worker.sh $SCRATCH/results/forgetting/random_search_dropout_maxout_mnist_amazon/exp/$fold 4 | done 5 | 6 | -------------------------------------------------------------------------------- /experiments/random_search_dropout_sigmoid_mnist_amazon/local_launch2.sh: -------------------------------------------------------------------------------- 1 | for fold in 13 14 15 16 17 18 19 20 21 22 23 24 25 2 | do 3 | $SCRATCH/results/forgetting/random_search_dropout_maxout_mnist_amazon/worker.sh $SCRATCH/results/forgetting/random_search_dropout_maxout_mnist_amazon/exp/$fold 4 | done 5 | 6 | -------------------------------------------------------------------------------- /experiments/random_search_dropout_sigmoid_mnist_amazon/worker.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | cd $1 3 | python /work/mmirza/pylearn2/pylearn2/scripts/train.py task_0.yaml || exit -1 4 | echo "starting task 1" 5 | python /work/mmirza/pylearn2/pylearn2/scripts/train.py task_1.yaml 6 | -------------------------------------------------------------------------------- /experiments/random_search_sgd_lwta_amazon/configure.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from pylearn2.utils import serial 4 | EXP_PATH = "/RQexec/mirzameh/results/forgetting/random_search_sgd_lwta_amazon/" 5 | 6 | num_jobs = 25 7 | 8 | rng = np.random.RandomState([2013, 11, 22]) 9 | 10 | task_0_template = open('task_0_template.yaml', 'r').read() 11 | task_1_template = open('task_1_template.yaml', 'r').read() 12 | 13 | for job_id in xrange(num_jobs): 14 | 15 | h0_col_norm = rng.uniform(1., 5.) 16 | h1_col_norm = rng.uniform(1., 5.) 17 | y_col_norm = rng.uniform(1., 5.) 18 | 19 | h0_dim = rng.randint(250, 5000) 20 | num_pieces_0 = rng.randint(2, 6) 21 | h0_dim -= h0_dim % num_pieces_0 22 | h1_dim = rng.randint(250, 5000) 23 | num_pieces_1 = rng.randint(2, 6) 24 | h1_dim -= h1_dim % num_pieces_1 25 | 26 | def random_init_string(): 27 | irange = 10. ** rng.uniform(-2.3, -1.) 28 | return "irange: " + str(irange) 29 | 30 | h0_init = random_init_string() 31 | h1_init = random_init_string() 32 | 33 | if rng.randint(2): 34 | y_init = "sparse_init: 0" 35 | else: 36 | y_init = random_init_string() 37 | 38 | h0_bias = 0. 39 | h1_bias = 1. 40 | 41 | 42 | learning_rate = 10. ** rng.uniform(-2., -.5) 43 | 44 | if rng.randint(2): 45 | msat = 2 46 | else: 47 | msat = rng.randint(2, 1000) 48 | 49 | final_momentum = rng.uniform(.5, .9) 50 | 51 | lr_sat = rng.randint(200, 1000) 52 | 53 | decay = 10. ** rng.uniform(-3, -1) 54 | 55 | 56 | task_0_yaml_str = task_0_template % locals() 57 | 58 | serial.mkdir('{}exp/'.format(EXP_PATH) + str(job_id)) 59 | train_file_full_stem = '{}exp/'.format(EXP_PATH)+str(job_id)+'/' 60 | f = open(train_file_full_stem + 'task_0.yaml', 'w') 61 | f.write(task_0_yaml_str) 62 | f.close() 63 | 64 | task_1_yaml_str = task_1_template % locals() 65 | 66 | serial.mkdir('{}exp/'.format(EXP_PATH) + str(job_id)) 67 | f = open(train_file_full_stem + 'task_1.yaml', 'w') 68 | f.write(task_1_yaml_str) 69 | f.close() 70 | -------------------------------------------------------------------------------- /experiments/random_search_sgd_lwta_amazon/launch.sh: -------------------------------------------------------------------------------- 1 | jobdispatch --torque --env=THEANO_FLAGS=device=gpu,floatX=float32,force_device=True --duree=24:00:00 --whitespace --gpu $SCRATCH/results/forgetting/random_search_sgd_lwta_amazon/worker.sh $SCRATCH/results/forgetting/random_search_sgd_lwta_amazon/exp/"{{0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24}}" 2 | -------------------------------------------------------------------------------- /experiments/random_search_sgd_lwta_amazon/make_launch.py: -------------------------------------------------------------------------------- 1 | base = """jobdispatch --torque --env=THEANO_FLAGS=device=gpu,floatX=float32,force_device=True --duree=24:00:00 --whitespace --gpu $F/experiments/random_search_dropout_maxout_mnist/worker.sh $F/experiments/random_search_dropout_maxout_mnist/exp/"{{%(args)s}}\"""" 2 | args = ','.join([str(job_id) for job_id in xrange(25)]) 3 | f = open('launch.sh', 'w') 4 | f.write(base % locals()) 5 | f.close() 6 | -------------------------------------------------------------------------------- /experiments/random_search_sgd_lwta_amazon/task_0_template.yaml: -------------------------------------------------------------------------------- 1 | !obj:pylearn2.train.Train { 2 | dataset: &train !obj:forgetting.datasets.amazon.AmazonSmall { 3 | which_set: 'train', 4 | category: 'kitchen', 5 | one_hot: 1, 6 | start: 0, 7 | stop: 1000 8 | }, 9 | model: !obj:pylearn2.models.mlp.MLP { 10 | layers: [ !obj:forgetting.lwta.LWTA { 11 | max_col_norm: %(h0_col_norm)f, 12 | layer_name: 'h0', 13 | dim: %(h0_dim)d, 14 | block_size: %(num_pieces_0)d, 15 | %(h0_init)s, 16 | init_bias: %(h0_bias)f 17 | }, 18 | !obj:forgetting.lwta.LWTA { 19 | max_col_norm: %(h1_col_norm)f, 20 | layer_name: 'h1', 21 | dim: %(h1_dim)d, 22 | block_size: %(num_pieces_1)d, 23 | %(h1_init)s, 24 | init_bias: %(h1_bias)f 25 | }, !obj:pylearn2.models.mlp.Softmax { 26 | max_col_norm: %(y_col_norm)f, 27 | layer_name: 'y', 28 | n_classes: 2, 29 | %(y_init)s 30 | } 31 | ], 32 | nvis: 5000, 33 | }, 34 | algorithm: !obj:pylearn2.training_algorithms.sgd.SGD { 35 | batch_size: 100, 36 | learning_rate: %(learning_rate)f, 37 | init_momentum: .5, 38 | monitoring_dataset: 39 | { 40 | 'train' : *train, 41 | 'valid' : !obj:forgetting.datasets.amazon.AmazonSmall { 42 | which_set: 'train', 43 | category: 'kitchen', 44 | one_hot: 1, 45 | start: 1000, 46 | stop: 1600 47 | }, 48 | 'test' : !obj:forgetting.datasets.amazon.AmazonSmall { 49 | which_set: 'test', 50 | category: 'kitchen', 51 | one_hot: 1, 52 | }, 53 | }, 54 | cost: !obj:pylearn2.costs.cost.SumOfCosts { costs: [ 55 | !obj:pylearn2.costs.mlp.Default {}, 56 | ] 57 | }, 58 | termination_criterion: !obj:pylearn2.termination_criteria.MonitorBased { 59 | channel_name: "valid_y_misclass", 60 | prop_decrease: 0., 61 | N: 100 62 | } 63 | }, 64 | extensions: [ 65 | !obj:pylearn2.train_extensions.best_params.MonitorBasedSaveBest { 66 | channel_name: 'valid_y_misclass', 67 | save_path: "${PYLEARN2_TRAIN_FILE_FULL_STEM}_best.pkl" 68 | }, !obj:pylearn2.training_algorithms.sgd.MomentumAdjustor { 69 | start: 1, 70 | saturate: %(msat)d, 71 | final_momentum: %(final_momentum)f 72 | }, 73 | !obj:pylearn2.training_algorithms.sgd.LinearDecayOverEpoch { 74 | start: 1, 75 | saturate: %(lr_sat)d, 76 | decay_factor: %(decay)f 77 | } 78 | ] 79 | } 80 | -------------------------------------------------------------------------------- /experiments/random_search_sgd_lwta_amazon/task_1_template.yaml: -------------------------------------------------------------------------------- 1 | # Author: Ian Goodfellow 2 | !obj:pylearn2.train.Train { 3 | dataset: &train !obj:forgetting.datasets.amazon.AmazonSmall { 4 | which_set: 'train', 5 | category: 'dvd', 6 | one_hot: 1, 7 | start: 0, 8 | stop: 1000 9 | }, 10 | model: !obj:pylearn2.monitor.push_monitor { 11 | model: !pkl: "task_0_best.pkl", 12 | name: "monitor_first" 13 | }, 14 | algorithm: !obj:pylearn2.training_algorithms.sgd.SGD { 15 | batch_size: 100, 16 | learning_rate: %(learning_rate)f, 17 | init_momentum: .5, 18 | monitoring_dataset: 19 | { 20 | 'train' : *train, 21 | 'valid_both' : !obj:forgetting.concat { 22 | datasets: [ !obj:forgetting.datasets.amazon.AmazonSmall { 23 | which_set: 'train', 24 | category: 'dvd', 25 | one_hot: 1, 26 | start: 1000, 27 | stop: 1600 28 | }, !obj:forgetting.datasets.amazon.AmazonSmall { 29 | which_set: 'train', 30 | category: 'kitchen', 31 | one_hot: 1, 32 | start: 1000, 33 | stop: 1600 34 | }, 35 | ] 36 | }, 37 | 'test' : !obj:forgetting.datasets.amazon.AmazonSmall { 38 | which_set: 'test', 39 | category: 'dvd', 40 | one_hot: 1, 41 | }, 42 | 'test_old' : !obj:forgetting.datasets.amazon.AmazonSmall { 43 | which_set: 'test', 44 | category: 'kitchen', 45 | one_hot: 1, 46 | }, 47 | }, 48 | cost: !obj:pylearn2.costs.cost.SumOfCosts { costs: [ 49 | !obj:pylearn2.costs.mlp.Default {}, 50 | ] 51 | }, 52 | termination_criterion: !obj:pylearn2.termination_criteria.MonitorBased { 53 | channel_name: "valid_both_y_misclass", 54 | prop_decrease: 0., 55 | N: 100 56 | } 57 | }, 58 | extensions: [ 59 | !obj:pylearn2.train_extensions.best_params.MonitorBasedSaveBest { 60 | channel_name: 'valid_both_y_misclass', 61 | save_path: "${PYLEARN2_TRAIN_FILE_FULL_STEM}_best.pkl" 62 | }, !obj:pylearn2.training_algorithms.sgd.MomentumAdjustor { 63 | start: 1, 64 | saturate: %(msat)d, 65 | final_momentum: %(final_momentum)f 66 | }, 67 | !obj:pylearn2.training_algorithms.sgd.LinearDecayOverEpoch { 68 | start: 1, 69 | saturate: %(lr_sat)d, 70 | decay_factor: %(decay)f 71 | } 72 | ], 73 | save_path: "${PYLEARN2_TRAIN_FILE_FULL_STEM}.pkl", 74 | save_freq: 1 75 | } 76 | -------------------------------------------------------------------------------- /experiments/random_search_sgd_lwta_amazon/worker.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | cd $1 3 | python ~/projects/pylearn2/pylearn2/scripts/train.py task_0.yaml || exit -1 4 | echo "starting task 1" 5 | python ~/projects/pylearn2/pylearn2/scripts/train.py task_1.yaml 6 | -------------------------------------------------------------------------------- /experiments/random_search_sgd_lwta_mnist/configure.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from pylearn2.utils import serial 4 | 5 | num_jobs = 25 6 | 7 | rng = np.random.RandomState([2013, 11, 22]) 8 | 9 | task_0_template = open('task_0_template.yaml', 'r').read() 10 | task_1_template = open('task_1_template.yaml', 'r').read() 11 | 12 | for job_id in xrange(num_jobs): 13 | 14 | h0_col_norm = rng.uniform(1., 5.) 15 | h1_col_norm = rng.uniform(1., 5.) 16 | y_col_norm = rng.uniform(1., 5.) 17 | 18 | h0_dim = rng.randint(250, 5000) 19 | num_pieces_0 = rng.randint(2, 6) 20 | h0_dim -= h0_dim % num_pieces_0 21 | h1_dim = rng.randint(250, 5000) 22 | num_pieces_1 = rng.randint(2, 6) 23 | h1_dim -= h1_dim % num_pieces_1 24 | 25 | def random_init_string(): 26 | irange = 10. ** rng.uniform(-2.3, -1.) 27 | return "irange: " + str(irange) 28 | 29 | h0_init = random_init_string() 30 | h1_init = random_init_string() 31 | 32 | if rng.randint(2): 33 | y_init = "sparse_init: 0" 34 | else: 35 | y_init = random_init_string() 36 | 37 | h0_bias = 0. 38 | h1_bias = 1. 39 | 40 | 41 | learning_rate = 10. ** rng.uniform(-2., -.5) 42 | 43 | if rng.randint(2): 44 | msat = 2 45 | else: 46 | msat = rng.randint(2, 1000) 47 | 48 | final_momentum = rng.uniform(.5, .9) 49 | 50 | lr_sat = rng.randint(200, 1000) 51 | 52 | decay = 10. ** rng.uniform(-3, -1) 53 | 54 | 55 | task_0_yaml_str = task_0_template % locals() 56 | 57 | serial.mkdir('exp/' + str(job_id)) 58 | train_file_full_stem = 'exp/'+str(job_id)+'/' 59 | f = open(train_file_full_stem + 'task_0.yaml', 'w') 60 | f.write(task_0_yaml_str) 61 | f.close() 62 | 63 | task_1_yaml_str = task_1_template % locals() 64 | 65 | serial.mkdir('exp/' + str(job_id)) 66 | f = open(train_file_full_stem + 'task_1.yaml', 'w') 67 | f.write(task_1_yaml_str) 68 | f.close() 69 | -------------------------------------------------------------------------------- /experiments/random_search_sgd_lwta_mnist/launch.sh: -------------------------------------------------------------------------------- 1 | jobdispatch --torque --env=THEANO_FLAGS=device=gpu,floatX=float32,force_device=True --duree=24:00:00 --whitespace --gpu $F/experiments/random_search_sgd_lwta_mnist/worker.sh $F/experiments/random_search_sgd_lwta_mnist/exp/"{{0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24}}" -------------------------------------------------------------------------------- /experiments/random_search_sgd_lwta_mnist/make_launch.py: -------------------------------------------------------------------------------- 1 | base = """jobdispatch --torque --env=THEANO_FLAGS=device=gpu,floatX=float32,force_device=True --duree=24:00:00 --whitespace --gpu $F/experiments/random_search_sgd_lwta_mnist/worker.sh $F/experiments/random_search_sgd_lwta_mnist/exp/"{{%(args)s}}\"""" 2 | args = ','.join([str(job_id) for job_id in xrange(25)]) 3 | f = open('launch.sh', 'w') 4 | f.write(base % locals()) 5 | f.close() 6 | -------------------------------------------------------------------------------- /experiments/random_search_sgd_lwta_mnist/task_0_template.yaml: -------------------------------------------------------------------------------- 1 | # Author: Ian Goodfellow 2 | # We train with the un-permuted version first. 3 | # This way after training with the permuted version, we can see if the 4 | # filters in the un-permuted space are still recognizable. 5 | !obj:pylearn2.train.Train { 6 | dataset: &train !obj:pylearn2.datasets.mnist.MNIST { 7 | which_set: 'train', 8 | one_hot: 1, 9 | start: 0, 10 | stop: 50000 11 | }, 12 | model: !obj:pylearn2.models.mlp.MLP { 13 | layers: [ !obj:forgetting.lwta.LWTA { 14 | max_col_norm: %(h0_col_norm)f, 15 | layer_name: 'h0', 16 | dim: %(h0_dim)d, 17 | block_size: %(num_pieces_0)d, 18 | %(h0_init)s, 19 | init_bias: %(h0_bias)f 20 | }, 21 | !obj:forgetting.lwta.LWTA { 22 | max_col_norm: %(h1_col_norm)f, 23 | layer_name: 'h1', 24 | dim: %(h1_dim)d, 25 | block_size: %(num_pieces_1)d, 26 | %(h1_init)s, 27 | init_bias: %(h1_bias)f 28 | }, !obj:pylearn2.models.mlp.Softmax { 29 | max_col_norm: %(y_col_norm)f, 30 | layer_name: 'y', 31 | n_classes: 10, 32 | %(y_init)s 33 | } 34 | ], 35 | nvis: 784, 36 | }, 37 | algorithm: !obj:pylearn2.training_algorithms.sgd.SGD { 38 | batch_size: 100, 39 | learning_rate: %(learning_rate)f, 40 | init_momentum: .5, 41 | monitoring_dataset: 42 | { 43 | 'train' : *train, 44 | 'valid' : !obj:pylearn2.datasets.mnist.MNIST { 45 | which_set: 'train', 46 | one_hot: 1, 47 | start: 50000, 48 | stop: 60000 49 | }, 50 | 'test' : !obj:pylearn2.datasets.mnist.MNIST { 51 | which_set: 'test', 52 | one_hot: 1, 53 | } 54 | }, 55 | cost: !obj:pylearn2.costs.cost.SumOfCosts { costs: [ 56 | !obj:pylearn2.costs.mlp.Default {}, 57 | ] 58 | }, 59 | termination_criterion: !obj:pylearn2.termination_criteria.MonitorBased { 60 | channel_name: "valid_y_misclass", 61 | prop_decrease: 0., 62 | N: 100 63 | } 64 | }, 65 | extensions: [ 66 | !obj:pylearn2.train_extensions.best_params.MonitorBasedSaveBest { 67 | channel_name: 'valid_y_misclass', 68 | save_path: "${PYLEARN2_TRAIN_FILE_FULL_STEM}_best.pkl" 69 | }, !obj:pylearn2.training_algorithms.sgd.MomentumAdjustor { 70 | start: 1, 71 | saturate: %(msat)d, 72 | final_momentum: %(final_momentum)f 73 | }, 74 | !obj:pylearn2.training_algorithms.sgd.LinearDecayOverEpoch { 75 | start: 1, 76 | saturate: %(lr_sat)d, 77 | decay_factor: %(decay)f 78 | } 79 | ] 80 | } 81 | -------------------------------------------------------------------------------- /experiments/random_search_sgd_lwta_mnist/task_1_template.yaml: -------------------------------------------------------------------------------- 1 | # Author: Ian Goodfellow 2 | !obj:pylearn2.train.Train { 3 | dataset: &train !obj:pylearn2.datasets.mnist.MNIST { 4 | which_set: 'train', 5 | one_hot: 1, 6 | start: 0, 7 | stop: 50000, 8 | preprocessor: !obj:forgetting.permute_and_flip { 9 | flip: 0 10 | } 11 | }, 12 | model: !obj:pylearn2.monitor.push_monitor { 13 | model: !pkl: "task_0_best.pkl", 14 | name: "monitor_first" 15 | }, 16 | algorithm: !obj:pylearn2.training_algorithms.sgd.SGD { 17 | batch_size: 100, 18 | learning_rate: %(learning_rate)f, 19 | init_momentum: .5, 20 | monitoring_dataset: 21 | { 22 | 'train' : *train, 23 | 'valid_both' : !obj:forgetting.concat { 24 | datasets: [ 25 | !obj:pylearn2.datasets.mnist.MNIST { 26 | which_set: 'train', 27 | one_hot: 1, 28 | start: 50000, 29 | stop: 60000, 30 | }, 31 | !obj:pylearn2.datasets.mnist.MNIST { 32 | which_set: 'train', 33 | one_hot: 1, 34 | start: 50000, 35 | stop: 60000, 36 | preprocessor: !obj:forgetting.permute_and_flip { 37 | flip: 0 38 | } 39 | } 40 | ] 41 | }, 42 | 'test' : !obj:pylearn2.datasets.mnist.MNIST { 43 | which_set: 'test', 44 | one_hot: 1, 45 | preprocessor: !obj:forgetting.permute_and_flip { 46 | flip: 0 47 | } 48 | }, 49 | 'test_old' : !obj:pylearn2.datasets.mnist.MNIST { 50 | which_set: 'test', 51 | one_hot: 1, 52 | } 53 | }, 54 | cost: !obj:pylearn2.costs.cost.SumOfCosts { costs: [ 55 | !obj:pylearn2.costs.mlp.Default {}, 56 | ] 57 | }, 58 | termination_criterion: !obj:pylearn2.termination_criteria.MonitorBased { 59 | channel_name: "valid_both_y_misclass", 60 | prop_decrease: 0., 61 | N: 100 62 | } 63 | }, 64 | extensions: [ 65 | !obj:pylearn2.train_extensions.best_params.MonitorBasedSaveBest { 66 | channel_name: 'valid_both_y_misclass', 67 | save_path: "${PYLEARN2_TRAIN_FILE_FULL_STEM}_best.pkl" 68 | }, !obj:pylearn2.training_algorithms.sgd.MomentumAdjustor { 69 | start: 1, 70 | saturate: %(msat)d, 71 | final_momentum: %(final_momentum)f 72 | }, 73 | !obj:pylearn2.training_algorithms.sgd.LinearDecayOverEpoch { 74 | start: 1, 75 | saturate: %(lr_sat)d, 76 | decay_factor: %(decay)f 77 | } 78 | ], 79 | save_path: "${PYLEARN2_TRAIN_FILE_FULL_STEM}.pkl", 80 | save_freq: 1 81 | } 82 | -------------------------------------------------------------------------------- /experiments/random_search_sgd_lwta_mnist/worker.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | cd $1 3 | train.py task_0.yaml || exit -1 4 | echo "starting task 1" 5 | train.py task_1.yaml 6 | -------------------------------------------------------------------------------- /experiments/random_search_sgd_lwta_mnist_amazon/configure.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from pylearn2.utils import serial 4 | 5 | num_jobs = 25 6 | 7 | rng = np.random.RandomState([2013, 11, 22]) 8 | 9 | #task_0_template = open('task_0_template.yaml', 'r').read() 10 | task_1_template = open('task_1_template.yaml', 'r').read() 11 | 12 | for job_id in xrange(num_jobs): 13 | 14 | h0_col_norm = rng.uniform(1., 5.) 15 | h1_col_norm = rng.uniform(1., 5.) 16 | y_col_norm = rng.uniform(1., 5.) 17 | 18 | h0_dim = rng.randint(250, 5000) 19 | num_pieces_0 = rng.randint(2, 6) 20 | h0_dim -= h0_dim % num_pieces_0 21 | h1_dim = rng.randint(250, 5000) 22 | num_pieces_1 = rng.randint(2, 6) 23 | h1_dim -= h1_dim % num_pieces_1 24 | 25 | def random_init_string(): 26 | irange = 10. ** rng.uniform(-2.3, -1.) 27 | return "irange: " + str(irange) 28 | 29 | h0_init = random_init_string() 30 | h1_init = random_init_string() 31 | 32 | if rng.randint(2): 33 | y_init = "sparse_init: 0" 34 | else: 35 | y_init = random_init_string() 36 | 37 | h0_bias = 0. 38 | h1_bias = 1. 39 | 40 | 41 | learning_rate = 10. ** rng.uniform(-2., -.5) 42 | 43 | if rng.randint(2): 44 | msat = 2 45 | else: 46 | msat = rng.randint(2, 1000) 47 | 48 | final_momentum = rng.uniform(.5, .9) 49 | 50 | lr_sat = rng.randint(200, 1000) 51 | 52 | decay = 10. ** rng.uniform(-3, -1) 53 | 54 | 55 | #task_0_yaml_str = task_0_template % locals() 56 | 57 | #serial.mkdir('exp/' + str(job_id)) 58 | train_file_full_stem = '/scratch/mmirza/results/forgetting/random_search_sgd_lwta_mnist_amazon/exp/'+str(job_id)+'/' 59 | #f = open(train_file_full_stem + 'task_0.yaml', 'w') 60 | #f.write(task_0_yaml_str) 61 | #f.close() 62 | 63 | task_1_yaml_str = task_1_template % locals() 64 | 65 | #serial.mkdir('exp/' + str(job_id)) 66 | f = open(train_file_full_stem + 'task_1.yaml', 'w') 67 | f.write(task_1_yaml_str) 68 | f.close() 69 | -------------------------------------------------------------------------------- /experiments/random_search_sgd_lwta_mnist_amazon/launch.sh: -------------------------------------------------------------------------------- 1 | jobdispatch --env=THEANO_FLAGS=device=gpu,floatX=float32,force_device=True --duree=3h --mem=6G --gpu $SCRATCH/results/forgetting/random_search_sgd_lwta_mnist_amazon/worker.sh $SCRATCH/results/forgetting/random_search_sgd_lwta_mnist_amazon/exp/"{{0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24}}" 2 | -------------------------------------------------------------------------------- /experiments/random_search_sgd_lwta_mnist_amazon/make_launch.py: -------------------------------------------------------------------------------- 1 | base = """jobdispatch --torque --env=THEANO_FLAGS=device=gpu,floatX=float32,force_device=True --duree=24:00:00 --whitespace --gpu $F/experiments/random_search_dropout_lwta_mnist/worker.sh $F/experiments/random_search_dropout_lwta_mnist/exp/"{{%(args)s}}\"""" 2 | args = ','.join([str(job_id) for job_id in xrange(25)]) 3 | f = open('launch.sh', 'w') 4 | f.write(base % locals()) 5 | f.close() 6 | -------------------------------------------------------------------------------- /experiments/random_search_sgd_lwta_mnist_amazon/worker.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | cd $1 3 | python /work/mmirza/pylearn2/pylearn2/scripts/train.py task_0.yaml || exit -1 4 | echo "starting task 1" 5 | python /work/mmirza/pylearn2/pylearn2/scripts/train.py task_1.yaml 6 | -------------------------------------------------------------------------------- /experiments/random_search_sgd_maxout_amazon/configure.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from pylearn2.utils import serial 4 | 5 | num_jobs = 25 6 | EXP_PATH = "/RQexec/mirzameh/results/forgetting/random_search_sgd_maxout_amazon/" 7 | rng = np.random.RandomState([2013, 11, 22]) 8 | 9 | task_0_template = open('task_0_template.yaml', 'r').read() 10 | task_1_template = open('task_1_template.yaml', 'r').read() 11 | 12 | for job_id in xrange(num_jobs): 13 | 14 | h0_col_norm = rng.uniform(1., 5.) 15 | h1_col_norm = rng.uniform(1., 5.) 16 | y_col_norm = rng.uniform(1., 5.) 17 | 18 | h0_dim = rng.randint(250, 5000) 19 | num_pieces_0 = rng.randint(2, 6) 20 | num_units_0 = h0_dim // num_pieces_0 21 | h1_dim = rng.randint(250, 5000) 22 | num_pieces_1 = rng.randint(2, 6) 23 | num_units_1 = h1_dim // num_pieces_1 24 | 25 | def random_init_string(): 26 | irange = 10. ** rng.uniform(-2.3, -1.) 27 | return "irange: " + str(irange) 28 | 29 | h0_init = random_init_string() 30 | h1_init = random_init_string() 31 | 32 | if rng.randint(2): 33 | y_init = "sparse_init: 0" 34 | else: 35 | y_init = random_init_string() 36 | 37 | h0_bias = 0. 38 | h1_bias = 1. 39 | 40 | 41 | learning_rate = 10. ** rng.uniform(-2., -.5) 42 | 43 | if rng.randint(2): 44 | msat = 2 45 | else: 46 | msat = rng.randint(2, 1000) 47 | 48 | final_momentum = rng.uniform(.5, .9) 49 | 50 | lr_sat = rng.randint(200, 1000) 51 | 52 | decay = 10. ** rng.uniform(-3, -1) 53 | 54 | 55 | task_0_yaml_str = task_0_template % locals() 56 | 57 | serial.mkdir('{}exp/'.format(EXP_PATH) + str(job_id)) 58 | train_file_full_stem = '{}exp/'.format(EXP_PATH)+str(job_id)+'/' 59 | f = open(train_file_full_stem + 'task_0.yaml', 'w') 60 | f.write(task_0_yaml_str) 61 | f.close() 62 | 63 | task_1_yaml_str = task_1_template % locals() 64 | 65 | serial.mkdir('{}exp/'.format(EXP_PATH) + str(job_id)) 66 | f = open(train_file_full_stem + 'task_1.yaml', 'w') 67 | f.write(task_1_yaml_str) 68 | f.close() 69 | -------------------------------------------------------------------------------- /experiments/random_search_sgd_maxout_amazon/launch.sh: -------------------------------------------------------------------------------- 1 | jobdispatch --torque --env=THEANO_FLAGS=device=gpu,floatX=float32,force_device=True --duree=24:00:00 --whitespace --gpu $SCRATCH/results/forgetting/random_search_sgd_maxout_amazon/worker.sh $SCRATCH/results/forgetting/random_search_sgd_maxout_amazon/exp/"{{0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24}}" 2 | -------------------------------------------------------------------------------- /experiments/random_search_sgd_maxout_amazon/make_launch.py: -------------------------------------------------------------------------------- 1 | base = """jobdispatch --torque --env=THEANO_FLAGS=device=gpu,floatX=float32,force_device=True --duree=24:00:00 --whitespace --gpu $F/experiments/random_search_dropout_maxout_mnist/worker.sh $F/experiments/random_search_dropout_maxout_mnist/exp/"{{%(args)s}}\"""" 2 | args = ','.join([str(job_id) for job_id in xrange(25)]) 3 | f = open('launch.sh', 'w') 4 | f.write(base % locals()) 5 | f.close() 6 | -------------------------------------------------------------------------------- /experiments/random_search_sgd_maxout_amazon/task_0_template.yaml: -------------------------------------------------------------------------------- 1 | !obj:pylearn2.train.Train { 2 | dataset: &train !obj:forgetting.datasets.amazon.AmazonSmall { 3 | which_set: 'train', 4 | category: 'kitchen', 5 | one_hot: 1, 6 | start: 0, 7 | stop: 1000 8 | }, 9 | model: !obj:pylearn2.models.mlp.MLP { 10 | layers: [ !obj:pylearn2.models.maxout.Maxout { 11 | max_col_norm: %(h0_col_norm)f, 12 | layer_name: 'h0', 13 | num_units: %(num_units_0)d, 14 | num_pieces: %(num_pieces_0)d, 15 | %(h0_init)s, 16 | init_bias: %(h0_bias)f 17 | }, !obj:pylearn2.models.maxout.Maxout { 18 | max_col_norm: %(h1_col_norm)f, 19 | layer_name: 'h1', 20 | num_units: %(num_units_1)d, 21 | num_pieces: %(num_pieces_1)d, 22 | %(h1_init)s, 23 | init_bias: %(h1_bias)f 24 | }, !obj:pylearn2.models.mlp.Softmax { 25 | max_col_norm: %(y_col_norm)f, 26 | layer_name: 'y', 27 | n_classes: 2, 28 | %(y_init)s 29 | } 30 | ], 31 | nvis: 5000, 32 | }, 33 | algorithm: !obj:pylearn2.training_algorithms.sgd.SGD { 34 | batch_size: 100, 35 | learning_rate: %(learning_rate)f, 36 | init_momentum: .5, 37 | monitoring_dataset: 38 | { 39 | 'train' : *train, 40 | 'valid' : !obj:forgetting.datasets.amazon.AmazonSmall { 41 | which_set: 'train', 42 | category: 'kitchen', 43 | one_hot: 1, 44 | start: 1000, 45 | stop: 1600 46 | }, 47 | 'test' : !obj:forgetting.datasets.amazon.AmazonSmall { 48 | which_set: 'test', 49 | category: 'kitchen', 50 | one_hot: 1, 51 | }, 52 | }, 53 | cost: !obj:pylearn2.costs.cost.SumOfCosts { costs: [ 54 | !obj:pylearn2.costs.mlp.Default {}, 55 | ] 56 | }, 57 | termination_criterion: !obj:pylearn2.termination_criteria.MonitorBased { 58 | channel_name: "valid_y_misclass", 59 | prop_decrease: 0., 60 | N: 100 61 | } 62 | }, 63 | extensions: [ 64 | !obj:pylearn2.train_extensions.best_params.MonitorBasedSaveBest { 65 | channel_name: 'valid_y_misclass', 66 | save_path: "${PYLEARN2_TRAIN_FILE_FULL_STEM}_best.pkl" 67 | }, !obj:pylearn2.training_algorithms.sgd.MomentumAdjustor { 68 | start: 1, 69 | saturate: %(msat)d, 70 | final_momentum: %(final_momentum)f 71 | }, 72 | !obj:pylearn2.training_algorithms.sgd.LinearDecayOverEpoch { 73 | start: 1, 74 | saturate: %(lr_sat)d, 75 | decay_factor: %(decay)f 76 | } 77 | ] 78 | } 79 | -------------------------------------------------------------------------------- /experiments/random_search_sgd_maxout_amazon/task_1_template.yaml: -------------------------------------------------------------------------------- 1 | !obj:pylearn2.train.Train { 2 | dataset: &train !obj:forgetting.datasets.amazon.AmazonSmall { 3 | which_set: 'train', 4 | category: 'dvd', 5 | one_hot: 1, 6 | start: 0, 7 | stop: 1000 8 | }, 9 | model: !obj:pylearn2.monitor.push_monitor { 10 | model: !pkl: "task_0_best.pkl", 11 | name: "monitor_first" 12 | }, 13 | algorithm: !obj:pylearn2.training_algorithms.sgd.SGD { 14 | batch_size: 100, 15 | learning_rate: %(learning_rate)f, 16 | init_momentum: .5, 17 | monitoring_dataset: 18 | { 19 | 'train' : *train, 20 | 'valid_both' : !obj:forgetting.concat { 21 | datasets: [ !obj:forgetting.datasets.amazon.AmazonSmall { 22 | which_set: 'train', 23 | category: 'dvd', 24 | one_hot: 1, 25 | start: 1000, 26 | stop: 1600 27 | }, !obj:forgetting.datasets.amazon.AmazonSmall { 28 | which_set: 'train', 29 | category: 'kitchen', 30 | one_hot: 1, 31 | start: 1000, 32 | stop: 1600 33 | }, 34 | 35 | ] 36 | }, 37 | 'test' : !obj:forgetting.datasets.amazon.AmazonSmall { 38 | which_set: 'test', 39 | category: 'dvd', 40 | one_hot: 1, 41 | }, 42 | 'test_old' : !obj:forgetting.datasets.amazon.AmazonSmall { 43 | which_set: 'test', 44 | category: 'kitchen', 45 | one_hot: 1, 46 | }, 47 | }, 48 | cost: !obj:pylearn2.costs.cost.SumOfCosts { costs: [ 49 | !obj:pylearn2.costs.mlp.Default {}, 50 | ] 51 | }, 52 | termination_criterion: !obj:pylearn2.termination_criteria.MonitorBased { 53 | channel_name: "valid_both_y_misclass", 54 | prop_decrease: 0., 55 | N: 100 56 | } 57 | }, 58 | extensions: [ 59 | !obj:pylearn2.train_extensions.best_params.MonitorBasedSaveBest { 60 | channel_name: 'valid_both_y_misclass', 61 | save_path: "${PYLEARN2_TRAIN_FILE_FULL_STEM}_best.pkl" 62 | }, !obj:pylearn2.training_algorithms.sgd.MomentumAdjustor { 63 | start: 1, 64 | saturate: %(msat)d, 65 | final_momentum: %(final_momentum)f 66 | }, 67 | !obj:pylearn2.training_algorithms.sgd.LinearDecayOverEpoch { 68 | start: 1, 69 | saturate: %(lr_sat)d, 70 | decay_factor: %(decay)f 71 | } 72 | ], 73 | save_path: "${PYLEARN2_TRAIN_FILE_FULL_STEM}.pkl", 74 | save_freq: 1 75 | } 76 | -------------------------------------------------------------------------------- /experiments/random_search_sgd_maxout_amazon/worker.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | cd $1 3 | python ~/projects/pylearn2/pylearn2/scripts/train.py task_0.yaml || exit -1 4 | echo "starting task 1" 5 | python ~/projects/pylearn2/pylearn2/scripts/train.py task_1.yaml 6 | -------------------------------------------------------------------------------- /experiments/random_search_sgd_maxout_mnist/configure.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from pylearn2.utils import serial 4 | 5 | num_jobs = 25 6 | 7 | rng = np.random.RandomState([2013, 11, 22]) 8 | 9 | task_0_template = open('task_0_template.yaml', 'r').read() 10 | task_1_template = open('task_1_template.yaml', 'r').read() 11 | 12 | for job_id in xrange(num_jobs): 13 | 14 | h0_col_norm = rng.uniform(1., 5.) 15 | h1_col_norm = rng.uniform(1., 5.) 16 | y_col_norm = rng.uniform(1., 5.) 17 | 18 | h0_dim = rng.randint(250, 5000) 19 | num_pieces_0 = rng.randint(2, 6) 20 | num_units_0 = h0_dim // num_pieces_0 21 | h1_dim = rng.randint(250, 5000) 22 | num_pieces_1 = rng.randint(2, 6) 23 | num_units_1 = h1_dim // num_pieces_1 24 | 25 | def random_init_string(): 26 | irange = 10. ** rng.uniform(-2.3, -1.) 27 | return "irange: " + str(irange) 28 | 29 | h0_init = random_init_string() 30 | h1_init = random_init_string() 31 | 32 | if rng.randint(2): 33 | y_init = "sparse_init: 0" 34 | else: 35 | y_init = random_init_string() 36 | 37 | h0_bias = 0. 38 | h1_bias = 1. 39 | 40 | 41 | learning_rate = 10. ** rng.uniform(-2., -.5) 42 | 43 | if rng.randint(2): 44 | msat = 2 45 | else: 46 | msat = rng.randint(2, 1000) 47 | 48 | final_momentum = rng.uniform(.5, .9) 49 | 50 | lr_sat = rng.randint(200, 1000) 51 | 52 | decay = 10. ** rng.uniform(-3, -1) 53 | 54 | 55 | task_0_yaml_str = task_0_template % locals() 56 | 57 | serial.mkdir('exp/' + str(job_id)) 58 | train_file_full_stem = 'exp/'+str(job_id)+'/' 59 | f = open(train_file_full_stem + 'task_0.yaml', 'w') 60 | f.write(task_0_yaml_str) 61 | f.close() 62 | 63 | task_1_yaml_str = task_1_template % locals() 64 | 65 | serial.mkdir('exp/' + str(job_id)) 66 | f = open(train_file_full_stem + 'task_1.yaml', 'w') 67 | f.write(task_1_yaml_str) 68 | f.close() 69 | -------------------------------------------------------------------------------- /experiments/random_search_sgd_maxout_mnist/launch.sh: -------------------------------------------------------------------------------- 1 | jobdispatch --torque --env=THEANO_FLAGS=device=gpu,floatX=float32,force_device=True --duree=24:00:00 --whitespace --gpu $F/experiments/random_search_sgd_maxout_mnist/worker.sh $F/experiments/random_search_sgd_maxout_mnist/exp/"{{0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24}}" -------------------------------------------------------------------------------- /experiments/random_search_sgd_maxout_mnist/make_launch.py: -------------------------------------------------------------------------------- 1 | base = """jobdispatch --torque --env=THEANO_FLAGS=device=gpu,floatX=float32,force_device=True --duree=24:00:00 --whitespace --gpu $F/experiments/random_search_sgd_maxout_mnist/worker.sh $F/experiments/random_search_sgd_maxout_mnist/exp/"{{%(args)s}}\"""" 2 | args = ','.join([str(job_id) for job_id in xrange(25)]) 3 | f = open('launch.sh', 'w') 4 | f.write(base % locals()) 5 | f.close() 6 | -------------------------------------------------------------------------------- /experiments/random_search_sgd_maxout_mnist/task_0_template.yaml: -------------------------------------------------------------------------------- 1 | # Author: Ian Goodfellow 2 | # We train with the un-permuted version first. 3 | # This way after training with the permuted version, we can see if the 4 | # filters in the un-permuted space are still recognizable. 5 | !obj:pylearn2.train.Train { 6 | dataset: &train !obj:pylearn2.datasets.mnist.MNIST { 7 | which_set: 'train', 8 | one_hot: 1, 9 | start: 0, 10 | stop: 50000 11 | }, 12 | model: !obj:pylearn2.models.mlp.MLP { 13 | layers: [ !obj:pylearn2.models.maxout.Maxout { 14 | max_col_norm: %(h0_col_norm)f, 15 | layer_name: 'h0', 16 | num_units: %(num_units_0)d, 17 | num_pieces: %(num_pieces_0)d, 18 | %(h0_init)s, 19 | init_bias: %(h0_bias)f 20 | }, !obj:pylearn2.models.maxout.Maxout { 21 | max_col_norm: %(h1_col_norm)f, 22 | layer_name: 'h1', 23 | num_units: %(num_units_1)d, 24 | num_pieces: %(num_pieces_1)d, 25 | %(h1_init)s, 26 | init_bias: %(h1_bias)f 27 | }, !obj:pylearn2.models.mlp.Softmax { 28 | max_col_norm: %(y_col_norm)f, 29 | layer_name: 'y', 30 | n_classes: 10, 31 | %(y_init)s 32 | } 33 | ], 34 | nvis: 784, 35 | }, 36 | algorithm: !obj:pylearn2.training_algorithms.sgd.SGD { 37 | batch_size: 100, 38 | learning_rate: %(learning_rate)f, 39 | init_momentum: .5, 40 | monitoring_dataset: 41 | { 42 | 'train' : *train, 43 | 'valid' : !obj:pylearn2.datasets.mnist.MNIST { 44 | which_set: 'train', 45 | one_hot: 1, 46 | start: 50000, 47 | stop: 60000 48 | }, 49 | 'test' : !obj:pylearn2.datasets.mnist.MNIST { 50 | which_set: 'test', 51 | one_hot: 1, 52 | } 53 | }, 54 | cost: !obj:pylearn2.costs.cost.SumOfCosts { costs: [ 55 | !obj:pylearn2.costs.mlp.Default {}, 56 | ] 57 | }, 58 | termination_criterion: !obj:pylearn2.termination_criteria.MonitorBased { 59 | channel_name: "valid_y_misclass", 60 | prop_decrease: 0., 61 | N: 100 62 | } 63 | }, 64 | extensions: [ 65 | !obj:pylearn2.train_extensions.best_params.MonitorBasedSaveBest { 66 | channel_name: 'valid_y_misclass', 67 | save_path: "${PYLEARN2_TRAIN_FILE_FULL_STEM}_best.pkl" 68 | }, !obj:pylearn2.training_algorithms.sgd.MomentumAdjustor { 69 | start: 1, 70 | saturate: %(msat)d, 71 | final_momentum: %(final_momentum)f 72 | }, 73 | !obj:pylearn2.training_algorithms.sgd.LinearDecayOverEpoch { 74 | start: 1, 75 | saturate: %(lr_sat)d, 76 | decay_factor: %(decay)f 77 | } 78 | ] 79 | } 80 | -------------------------------------------------------------------------------- /experiments/random_search_sgd_maxout_mnist/task_1_template.yaml: -------------------------------------------------------------------------------- 1 | # Author: Ian Goodfellow 2 | !obj:pylearn2.train.Train { 3 | dataset: &train !obj:pylearn2.datasets.mnist.MNIST { 4 | which_set: 'train', 5 | one_hot: 1, 6 | start: 0, 7 | stop: 50000, 8 | preprocessor: !obj:forgetting.permute_and_flip { 9 | flip: 0 10 | } 11 | }, 12 | model: !obj:pylearn2.monitor.push_monitor { 13 | model: !pkl: "task_0_best.pkl", 14 | name: "monitor_first" 15 | }, 16 | algorithm: !obj:pylearn2.training_algorithms.sgd.SGD { 17 | batch_size: 100, 18 | learning_rate: %(learning_rate)f, 19 | init_momentum: .5, 20 | monitoring_dataset: 21 | { 22 | 'train' : *train, 23 | 'valid_both' : !obj:forgetting.concat { 24 | datasets: [ 25 | !obj:pylearn2.datasets.mnist.MNIST { 26 | which_set: 'train', 27 | one_hot: 1, 28 | start: 50000, 29 | stop: 60000, 30 | }, 31 | !obj:pylearn2.datasets.mnist.MNIST { 32 | which_set: 'train', 33 | one_hot: 1, 34 | start: 50000, 35 | stop: 60000, 36 | preprocessor: !obj:forgetting.permute_and_flip { 37 | flip: 0 38 | } 39 | } 40 | ] 41 | }, 42 | 'test' : !obj:pylearn2.datasets.mnist.MNIST { 43 | which_set: 'test', 44 | one_hot: 1, 45 | preprocessor: !obj:forgetting.permute_and_flip { 46 | flip: 0 47 | } 48 | }, 49 | 'test_old' : !obj:pylearn2.datasets.mnist.MNIST { 50 | which_set: 'test', 51 | one_hot: 1, 52 | } 53 | }, 54 | cost: !obj:pylearn2.costs.cost.SumOfCosts { costs: [ 55 | !obj:pylearn2.costs.mlp.Default {}, 56 | ] 57 | }, 58 | termination_criterion: !obj:pylearn2.termination_criteria.MonitorBased { 59 | channel_name: "valid_both_y_misclass", 60 | prop_decrease: 0., 61 | N: 100 62 | } 63 | }, 64 | extensions: [ 65 | !obj:pylearn2.train_extensions.best_params.MonitorBasedSaveBest { 66 | channel_name: 'valid_both_y_misclass', 67 | save_path: "${PYLEARN2_TRAIN_FILE_FULL_STEM}_best.pkl" 68 | }, !obj:pylearn2.training_algorithms.sgd.MomentumAdjustor { 69 | start: 1, 70 | saturate: %(msat)d, 71 | final_momentum: %(final_momentum)f 72 | }, 73 | !obj:pylearn2.training_algorithms.sgd.LinearDecayOverEpoch { 74 | start: 1, 75 | saturate: %(lr_sat)d, 76 | decay_factor: %(decay)f 77 | } 78 | ], 79 | save_path: "${PYLEARN2_TRAIN_FILE_FULL_STEM}.pkl", 80 | save_freq: 1 81 | } 82 | -------------------------------------------------------------------------------- /experiments/random_search_sgd_maxout_mnist/worker.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | cd $1 3 | train.py task_0.yaml || exit -1 4 | echo "starting task 1" 5 | train.py task_1.yaml 6 | -------------------------------------------------------------------------------- /experiments/random_search_sgd_maxout_mnist_amazon/configure.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from pylearn2.utils import serial 4 | 5 | num_jobs = 25 6 | 7 | rng = np.random.RandomState([2013, 11, 22]) 8 | 9 | task_0_template = open('task_0_template.yaml', 'r').read() 10 | task_1_template = open('task_1_template.yaml', 'r').read() 11 | 12 | for job_id in xrange(num_jobs): 13 | 14 | h0_col_norm = rng.uniform(1., 5.) 15 | h1_col_norm = rng.uniform(1., 5.) 16 | y_col_norm = rng.uniform(1., 5.) 17 | 18 | h0_dim = rng.randint(250, 5000) 19 | num_pieces_0 = rng.randint(2, 6) 20 | num_units_0 = h0_dim // num_pieces_0 21 | h1_dim = rng.randint(250, 5000) 22 | num_pieces_1 = rng.randint(2, 6) 23 | num_units_1 = h1_dim // num_pieces_1 24 | 25 | def random_init_string(): 26 | irange = 10. ** rng.uniform(-2.3, -1.) 27 | return "irange: " + str(irange) 28 | 29 | h0_init = random_init_string() 30 | h1_init = random_init_string() 31 | 32 | if rng.randint(2): 33 | y_init = "sparse_init: 0" 34 | else: 35 | y_init = random_init_string() 36 | 37 | h0_bias = 0. 38 | h1_bias = 1. 39 | 40 | 41 | learning_rate = 10. ** rng.uniform(-2., -.5) 42 | 43 | if rng.randint(2): 44 | msat = 2 45 | else: 46 | msat = rng.randint(2, 1000) 47 | 48 | final_momentum = rng.uniform(.5, .9) 49 | 50 | lr_sat = rng.randint(200, 1000) 51 | 52 | decay = 10. ** rng.uniform(-3, -1) 53 | 54 | 55 | #task_0_yaml_str = task_0_template % locals() 56 | 57 | #serial.mkdir('exp/' + str(job_id)) 58 | #train_file_full_stem = 'exp/'+str(job_id)+'/' 59 | train_file_full_stem = '{}exp/{}/'.format('/scratch/mmirza/results/forgetting/random_search_sgd_maxout_mnist_amazon/', job_id) 60 | #f = open(train_file_full_stem + 'task_0.yaml', 'w') 61 | #f.write(task_0_yaml_str) 62 | #f.close() 63 | 64 | task_1_yaml_str = task_1_template % locals() 65 | 66 | #serial.mkdir('exp/' + str(job_id)) 67 | f = open(train_file_full_stem + 'task_1.yaml', 'w') 68 | f.write(task_1_yaml_str) 69 | f.close() 70 | -------------------------------------------------------------------------------- /experiments/random_search_sgd_maxout_mnist_amazon/launch.sh: -------------------------------------------------------------------------------- 1 | jobdispatch --env=THEANO_FLAGS=device=gpu,floatX=float32,force_device=True --duree=3h --mem=6G --gpu $SCRATCH/results/forgetting/random_search_sgd_maxout_mnist_amazon/worker.sh $SCRATCH/results/forgetting/random_search_sgd_maxout_mnist_amazon/exp/"{{0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24}}" 2 | -------------------------------------------------------------------------------- /experiments/random_search_sgd_maxout_mnist_amazon/make_launch.py: -------------------------------------------------------------------------------- 1 | base = """jobdispatch --torque --env=THEANO_FLAGS=device=gpu,floatX=float32,force_device=True --duree=24:00:00 --whitespace --gpu $F/experiments/random_search_dropout_maxout_mnist/worker.sh $F/experiments/random_search_dropout_maxout_mnist/exp/"{{%(args)s}}\"""" 2 | args = ','.join([str(job_id) for job_id in xrange(25)]) 3 | f = open('launch.sh', 'w') 4 | f.write(base % locals()) 5 | f.close() 6 | -------------------------------------------------------------------------------- /experiments/random_search_sgd_maxout_mnist_amazon/worker.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | cd $1 3 | python /work/mmirza/pylearn2/pylearn2/scripts/train.py task_0.yaml || exit -1 4 | echo "starting task 1" 5 | python /work/mmirza/pylearn2/pylearn2/scripts/train.py task_1.yaml 6 | -------------------------------------------------------------------------------- /experiments/random_search_sgd_relu_amazon/configure.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from pylearn2.utils import serial 4 | EXP_PATH = "/RQexec/mirzameh/results/forgetting/random_search_sgd_relu_amazon/" 5 | 6 | num_jobs = 25 7 | 8 | rng = np.random.RandomState([2013, 11, 22]) 9 | 10 | task_0_template = open('task_0_template.yaml', 'r').read() 11 | task_1_template = open('task_1_template.yaml', 'r').read() 12 | 13 | for job_id in xrange(num_jobs): 14 | 15 | h0_col_norm = rng.uniform(1., 5.) 16 | h1_col_norm = rng.uniform(1., 5.) 17 | y_col_norm = rng.uniform(1., 5.) 18 | 19 | h0_dim = rng.randint(250, 5000) 20 | h1_dim = rng.randint(250, 5000) 21 | 22 | def random_init_string(): 23 | if rng.randint(2): 24 | sparse_init = rng.randint(10, 30) 25 | return "sparse_init: " + str(sparse_init) 26 | irange = 10. ** rng.uniform(-2.3, -1.) 27 | return "irange: " + str(irange) 28 | 29 | h0_init = random_init_string() 30 | h1_init = random_init_string() 31 | 32 | if rng.randint(2): 33 | y_init = "sparse_init: 0" 34 | else: 35 | y_init = random_init_string() 36 | 37 | def rectifier_bias(): 38 | if rng.randint(2): 39 | return 0 40 | return rng.uniform(0, .3) 41 | 42 | h0_bias = rectifier_bias() 43 | h1_bias = rectifier_bias() 44 | 45 | 46 | learning_rate = 10. ** rng.uniform(-2., -.5) 47 | 48 | if rng.randint(2): 49 | msat = 2 50 | else: 51 | msat = rng.randint(2, 1000) 52 | 53 | final_momentum = rng.uniform(.5, .9) 54 | 55 | lr_sat = rng.randint(200, 1000) 56 | 57 | decay = 10. ** rng.uniform(-3, -1) 58 | 59 | 60 | task_0_yaml_str = task_0_template % locals() 61 | 62 | serial.mkdir('{}exp/'.format(EXP_PATH) + str(job_id)) 63 | train_file_full_stem = '{}exp/'.format(EXP_PATH)+str(job_id)+'/' 64 | f = open(train_file_full_stem + 'task_0.yaml', 'w') 65 | f.write(task_0_yaml_str) 66 | f.close() 67 | 68 | task_1_yaml_str = task_1_template % locals() 69 | 70 | serial.mkdir('{}exp/'.format(EXP_PATH) + str(job_id)) 71 | f = open(train_file_full_stem + 'task_1.yaml', 'w') 72 | f.write(task_1_yaml_str) 73 | f.close() 74 | 75 | -------------------------------------------------------------------------------- /experiments/random_search_sgd_relu_amazon/launch.sh: -------------------------------------------------------------------------------- 1 | jobdispatch --torque --env=THEANO_FLAGS=device=gpu,floatX=float32,force_device=True --duree=24:00:00 --whitespace --gpu $SCRATCH/results/forgetting/random_search_sgd_relu_amazon/worker.sh $SCRATCH/results/forgetting/random_search_sgd_relu_amazon/exp/"{{0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24}}" 2 | -------------------------------------------------------------------------------- /experiments/random_search_sgd_relu_amazon/make_launch.py: -------------------------------------------------------------------------------- 1 | base = """jobdispatch --torque --env=THEANO_FLAGS=device=gpu,floatX=float32,force_device=True --duree=24:00:00 --whitespace --gpu $F/experiments/random_search_dropout_relu_mnist/worker.sh $F/experiments/random_search_dropout_relu_mnist/exp/"{{%(args)s}}\"""" 2 | args = ','.join([str(job_id) for job_id in xrange(25)]) 3 | f = open('launch.sh', 'w') 4 | f.write(base % locals()) 5 | f.close() 6 | -------------------------------------------------------------------------------- /experiments/random_search_sgd_relu_amazon/task_0_template.yaml: -------------------------------------------------------------------------------- 1 | !obj:pylearn2.train.Train { 2 | dataset: &train !obj:forgetting.datasets.amazon.AmazonSmall { 3 | which_set: 'train', 4 | category: 'kitchen', 5 | one_hot: 1, 6 | start: 0, 7 | stop: 1000 8 | }, 9 | model: !obj:pylearn2.models.mlp.MLP { 10 | layers: [ !obj:pylearn2.models.mlp.RectifiedLinear { 11 | max_col_norm: %(h0_col_norm)f, 12 | layer_name: 'h0', 13 | dim: %(h0_dim)d, 14 | %(h0_init)s, 15 | init_bias: %(h0_bias)f 16 | }, !obj:pylearn2.models.mlp.RectifiedLinear { 17 | max_col_norm: %(h1_col_norm)f, 18 | layer_name: 'h1', 19 | dim: %(h1_dim)d, 20 | %(h1_init)s, 21 | init_bias: %(h1_bias)f 22 | }, !obj:pylearn2.models.mlp.Softmax { 23 | max_col_norm: %(y_col_norm)f, 24 | layer_name: 'y', 25 | n_classes: 2, 26 | %(y_init)s 27 | } 28 | ], 29 | nvis: 5000, 30 | }, 31 | algorithm: !obj:pylearn2.training_algorithms.sgd.SGD { 32 | batch_size: 100, 33 | learning_rate: %(learning_rate)f, 34 | init_momentum: .5, 35 | monitoring_dataset: 36 | { 37 | 'train' : *train, 38 | 'valid' : !obj:forgetting.datasets.amazon.AmazonSmall { 39 | which_set: 'train', 40 | category: 'kitchen', 41 | one_hot: 1, 42 | start: 1000, 43 | stop: 1600 44 | }, 45 | 'test' : !obj:forgetting.datasets.amazon.AmazonSmall { 46 | which_set: 'test', 47 | category: 'kitchen', 48 | one_hot: 1, 49 | }, 50 | }, 51 | cost: !obj:pylearn2.costs.cost.SumOfCosts { costs: [ 52 | !obj:pylearn2.costs.mlp.Default {}, 53 | ] 54 | }, 55 | termination_criterion: !obj:pylearn2.termination_criteria.MonitorBased { 56 | channel_name: "valid_y_misclass", 57 | prop_decrease: 0., 58 | N: 100 59 | } 60 | }, 61 | extensions: [ 62 | !obj:pylearn2.train_extensions.best_params.MonitorBasedSaveBest { 63 | channel_name: 'valid_y_misclass', 64 | save_path: "${PYLEARN2_TRAIN_FILE_FULL_STEM}_best.pkl" 65 | }, !obj:pylearn2.training_algorithms.sgd.MomentumAdjustor { 66 | start: 1, 67 | saturate: %(msat)d, 68 | final_momentum: %(final_momentum)f 69 | }, 70 | !obj:pylearn2.training_algorithms.sgd.LinearDecayOverEpoch { 71 | start: 1, 72 | saturate: %(lr_sat)d, 73 | decay_factor: %(decay)f 74 | } 75 | ] 76 | } 77 | -------------------------------------------------------------------------------- /experiments/random_search_sgd_relu_amazon/task_1_template.yaml: -------------------------------------------------------------------------------- 1 | !obj:pylearn2.train.Train { 2 | dataset: &train !obj:forgetting.datasets.amazon.AmazonSmall { 3 | which_set: 'train', 4 | category: 'dvd', 5 | one_hot: 1, 6 | start: 0, 7 | stop: 1000 8 | }, 9 | model: !obj:pylearn2.monitor.push_monitor { 10 | model: !pkl: "task_0_best.pkl", 11 | name: "monitor_first" 12 | }, 13 | algorithm: !obj:pylearn2.training_algorithms.sgd.SGD { 14 | batch_size: 100, 15 | learning_rate: %(learning_rate)f, 16 | init_momentum: .5, 17 | monitoring_dataset: 18 | { 19 | 'train' : *train, 20 | 'valid_both' : !obj:forgetting.concat { 21 | datasets: [ !obj:forgetting.datasets.amazon.AmazonSmall { 22 | which_set: 'train', 23 | category: 'dvd', 24 | one_hot: 1, 25 | start: 1000, 26 | stop: 1600 27 | }, !obj:forgetting.datasets.amazon.AmazonSmall { 28 | which_set: 'train', 29 | category: 'kitchen', 30 | one_hot: 1, 31 | start: 1000, 32 | stop: 1600 33 | }, 34 | ] 35 | }, 36 | 'test' : !obj:forgetting.datasets.amazon.AmazonSmall { 37 | which_set: 'test', 38 | category: 'dvd', 39 | one_hot: 1, 40 | }, 41 | 'test_old' : !obj:forgetting.datasets.amazon.AmazonSmall { 42 | which_set: 'test', 43 | category: 'kitchen', 44 | one_hot: 1, 45 | }, 46 | }, 47 | cost: !obj:pylearn2.costs.cost.SumOfCosts { costs: [ 48 | !obj:pylearn2.costs.mlp.Default {}, 49 | ] 50 | }, 51 | termination_criterion: !obj:pylearn2.termination_criteria.MonitorBased { 52 | channel_name: "valid_both_y_misclass", 53 | prop_decrease: 0., 54 | N: 100 55 | } 56 | }, 57 | extensions: [ 58 | !obj:pylearn2.train_extensions.best_params.MonitorBasedSaveBest { 59 | channel_name: 'valid_both_y_misclass', 60 | save_path: "${PYLEARN2_TRAIN_FILE_FULL_STEM}_best.pkl" 61 | }, !obj:pylearn2.training_algorithms.sgd.MomentumAdjustor { 62 | start: 1, 63 | saturate: %(msat)d, 64 | final_momentum: %(final_momentum)f 65 | }, 66 | !obj:pylearn2.training_algorithms.sgd.LinearDecayOverEpoch { 67 | start: 1, 68 | saturate: %(lr_sat)d, 69 | decay_factor: %(decay)f 70 | } 71 | ], 72 | save_path: "${PYLEARN2_TRAIN_FILE_FULL_STEM}.pkl", 73 | save_freq: 1 74 | } 75 | -------------------------------------------------------------------------------- /experiments/random_search_sgd_relu_amazon/worker.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | cd $1 3 | python ~/projects/pylearn2/pylearn2/scripts/train.py task_0.yaml || exit -1 4 | echo "starting task 1" 5 | python ~/projects/pylearn2/pylearn2/scripts/train.py task_1.yaml 6 | -------------------------------------------------------------------------------- /experiments/random_search_sgd_relu_mnist/configure.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from pylearn2.utils import serial 4 | 5 | num_jobs = 25 6 | 7 | rng = np.random.RandomState([2013, 11, 22]) 8 | 9 | task_0_template = open('task_0_template.yaml', 'r').read() 10 | task_1_template = open('task_1_template.yaml', 'r').read() 11 | 12 | for job_id in xrange(num_jobs): 13 | 14 | h0_col_norm = rng.uniform(1., 5.) 15 | h1_col_norm = rng.uniform(1., 5.) 16 | y_col_norm = rng.uniform(1., 5.) 17 | 18 | h0_dim = rng.randint(250, 5000) 19 | h1_dim = rng.randint(250, 5000) 20 | 21 | def random_init_string(): 22 | if rng.randint(2): 23 | sparse_init = rng.randint(10, 30) 24 | return "sparse_init: " + str(sparse_init) 25 | irange = 10. ** rng.uniform(-2.3, -1.) 26 | return "irange: " + str(irange) 27 | 28 | h0_init = random_init_string() 29 | h1_init = random_init_string() 30 | 31 | if rng.randint(2): 32 | y_init = "sparse_init: 0" 33 | else: 34 | y_init = random_init_string() 35 | 36 | def rectifier_bias(): 37 | if rng.randint(2): 38 | return 0 39 | return rng.uniform(0, .3) 40 | 41 | h0_bias = rectifier_bias() 42 | h1_bias = rectifier_bias() 43 | 44 | 45 | learning_rate = 10. ** rng.uniform(-2., -.5) 46 | 47 | if rng.randint(2): 48 | msat = 2 49 | else: 50 | msat = rng.randint(2, 1000) 51 | 52 | final_momentum = rng.uniform(.5, .9) 53 | 54 | lr_sat = rng.randint(200, 1000) 55 | 56 | decay = 10. ** rng.uniform(-3, -1) 57 | 58 | 59 | task_0_yaml_str = task_0_template % locals() 60 | 61 | serial.mkdir('exp/' + str(job_id)) 62 | train_file_full_stem = 'exp/'+str(job_id)+'/' 63 | f = open(train_file_full_stem + 'task_0.yaml', 'w') 64 | f.write(task_0_yaml_str) 65 | f.close() 66 | 67 | task_1_yaml_str = task_1_template % locals() 68 | 69 | serial.mkdir('exp/' + str(job_id)) 70 | f = open(train_file_full_stem + 'task_1.yaml', 'w') 71 | f.write(task_1_yaml_str) 72 | f.close() 73 | -------------------------------------------------------------------------------- /experiments/random_search_sgd_relu_mnist/launch.sh: -------------------------------------------------------------------------------- 1 | jobdispatch --torque --env=THEANO_FLAGS=device=gpu,floatX=float32,force_device=True --duree=24:00:00 --whitespace --gpu $F/experiments/random_search_sgd_relu_mnist/worker.sh $F/experiments/random_search_sgd_relu_mnist/exp/"{{0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24}}" -------------------------------------------------------------------------------- /experiments/random_search_sgd_relu_mnist/make_launch.py: -------------------------------------------------------------------------------- 1 | base = """jobdispatch --torque --env=THEANO_FLAGS=device=gpu,floatX=float32,force_device=True --duree=24:00:00 --whitespace --gpu $F/experiments/random_search_sgd_relu_mnist/worker.sh $F/experiments/random_search_sgd_relu_mnist/exp/"{{%(args)s}}\"""" 2 | args = ','.join([str(job_id) for job_id in xrange(25)]) 3 | f = open('launch.sh', 'w') 4 | f.write(base % locals()) 5 | f.close() 6 | -------------------------------------------------------------------------------- /experiments/random_search_sgd_relu_mnist/task_0_template.yaml: -------------------------------------------------------------------------------- 1 | # Author: Ian Goodfellow 2 | # We train with the un-permuted version first. 3 | # This way after training with the permuted version, we can see if the 4 | # filters in the un-permuted space are still recognizable. 5 | !obj:pylearn2.train.Train { 6 | dataset: &train !obj:pylearn2.datasets.mnist.MNIST { 7 | which_set: 'train', 8 | one_hot: 1, 9 | start: 0, 10 | stop: 50000 11 | }, 12 | model: !obj:pylearn2.models.mlp.MLP { 13 | layers: [ !obj:pylearn2.models.mlp.RectifiedLinear { 14 | max_col_norm: %(h0_col_norm)f, 15 | layer_name: 'h0', 16 | dim: %(h0_dim)d, 17 | %(h0_init)s, 18 | init_bias: %(h0_bias)f 19 | }, !obj:pylearn2.models.mlp.RectifiedLinear { 20 | max_col_norm: %(h1_col_norm)f, 21 | layer_name: 'h1', 22 | dim: %(h1_dim)d, 23 | %(h1_init)s, 24 | init_bias: %(h1_bias)f 25 | }, !obj:pylearn2.models.mlp.Softmax { 26 | max_col_norm: %(y_col_norm)f, 27 | layer_name: 'y', 28 | n_classes: 10, 29 | %(y_init)s 30 | } 31 | ], 32 | nvis: 784, 33 | }, 34 | algorithm: !obj:pylearn2.training_algorithms.sgd.SGD { 35 | batch_size: 100, 36 | learning_rate: %(learning_rate)f, 37 | init_momentum: .5, 38 | monitoring_dataset: 39 | { 40 | 'train' : *train, 41 | 'valid' : !obj:pylearn2.datasets.mnist.MNIST { 42 | which_set: 'train', 43 | one_hot: 1, 44 | start: 50000, 45 | stop: 60000 46 | }, 47 | 'test' : !obj:pylearn2.datasets.mnist.MNIST { 48 | which_set: 'test', 49 | one_hot: 1, 50 | } 51 | }, 52 | cost: !obj:pylearn2.costs.cost.SumOfCosts { costs: [ 53 | !obj:pylearn2.costs.cost.MethodCost { 54 | method: 'cost_from_X', 55 | supervised: 1 56 | } 57 | ] 58 | }, 59 | termination_criterion: !obj:pylearn2.termination_criteria.MonitorBased { 60 | channel_name: "valid_y_misclass", 61 | prop_decrease: 0., 62 | N: 100 63 | } 64 | }, 65 | extensions: [ 66 | !obj:pylearn2.train_extensions.best_params.MonitorBasedSaveBest { 67 | channel_name: 'valid_y_misclass', 68 | save_path: "${PYLEARN2_TRAIN_FILE_FULL_STEM}_best.pkl" 69 | }, !obj:pylearn2.training_algorithms.sgd.MomentumAdjustor { 70 | start: 1, 71 | saturate: %(msat)d, 72 | final_momentum: %(final_momentum)f 73 | }, 74 | !obj:pylearn2.training_algorithms.sgd.LinearDecayOverEpoch { 75 | start: 1, 76 | saturate: %(lr_sat)d, 77 | decay_factor: %(decay)f 78 | } 79 | ] 80 | } 81 | -------------------------------------------------------------------------------- /experiments/random_search_sgd_relu_mnist/worker.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | cd $1 3 | train.py task_0.yaml || exit -1 4 | echo "starting task 1" 5 | train.py task_1.yaml 6 | -------------------------------------------------------------------------------- /experiments/random_search_sgd_relu_mnist_amazon/configure.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from pylearn2.utils import serial 4 | 5 | num_jobs = 25 6 | 7 | rng = np.random.RandomState([2013, 11, 22]) 8 | 9 | task_0_template = open('task_0_template.yaml', 'r').read() 10 | task_1_template = open('task_1_template.yaml', 'r').read() 11 | 12 | for job_id in xrange(num_jobs): 13 | 14 | h0_col_norm = rng.uniform(1., 5.) 15 | h1_col_norm = rng.uniform(1., 5.) 16 | y_col_norm = rng.uniform(1., 5.) 17 | 18 | h0_dim = rng.randint(250, 5000) 19 | num_pieces_0 = rng.randint(2, 6) 20 | num_units_0 = h0_dim // num_pieces_0 21 | h1_dim = rng.randint(250, 5000) 22 | num_pieces_1 = rng.randint(2, 6) 23 | num_units_1 = h1_dim // num_pieces_1 24 | 25 | def random_init_string(): 26 | irange = 10. ** rng.uniform(-2.3, -1.) 27 | return "irange: " + str(irange) 28 | 29 | h0_init = random_init_string() 30 | h1_init = random_init_string() 31 | 32 | if rng.randint(2): 33 | y_init = "sparse_init: 0" 34 | else: 35 | y_init = random_init_string() 36 | 37 | h0_bias = 0. 38 | h1_bias = 1. 39 | 40 | 41 | learning_rate = 10. ** rng.uniform(-2., -.5) 42 | 43 | if rng.randint(2): 44 | msat = 2 45 | else: 46 | msat = rng.randint(2, 1000) 47 | 48 | final_momentum = rng.uniform(.5, .9) 49 | 50 | lr_sat = rng.randint(200, 1000) 51 | 52 | decay = 10. ** rng.uniform(-3, -1) 53 | 54 | 55 | #task_0_yaml_str = task_0_template % locals() 56 | 57 | #serial.mkdir('exp/' + str(job_id)) 58 | #train_file_full_stem = 'exp/'+str(job_id)+'/' 59 | train_file_full_stem = '{}exp/{}/'.format('/scratch/mmirza/results/forgetting/random_search_sgd_relu_mnist_amazon/', job_id) 60 | #f = open(train_file_full_stem + 'task_0.yaml', 'w') 61 | #f.write(task_0_yaml_str) 62 | #f.close() 63 | 64 | task_1_yaml_str = task_1_template % locals() 65 | 66 | serial.mkdir('exp/' + str(job_id)) 67 | f = open(train_file_full_stem + 'task_1.yaml', 'w') 68 | f.write(task_1_yaml_str) 69 | f.close() 70 | -------------------------------------------------------------------------------- /experiments/random_search_sgd_relu_mnist_amazon/launch.sh: -------------------------------------------------------------------------------- 1 | jobdispatch --env=THEANO_FLAGS=device=gpu,floatX=float32,force_device=True --duree=3h --mem=6G --gpu $SCRATCH/results/forgetting/random_search_sgd_relu_mnist_amazon/worker.sh $SCRATCH/results/forgetting/random_search_sgd_relu_mnist_amazon/exp/"{{0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24}}" 2 | -------------------------------------------------------------------------------- /experiments/random_search_sgd_relu_mnist_amazon/local_launch.sh: -------------------------------------------------------------------------------- 1 | for fold in 1 2 3 4 5 6 7 8 8 9 10 11 12 2 | do 3 | $SCRATCH/results/forgetting/random_search_dropout_maxout_mnist_amazon/worker.sh $SCRATCH/results/forgetting/random_search_dropout_maxout_mnist_amazon/exp/$fold 4 | done 5 | 6 | -------------------------------------------------------------------------------- /experiments/random_search_sgd_relu_mnist_amazon/local_launch2.sh: -------------------------------------------------------------------------------- 1 | for fold in 13 14 15 16 17 18 19 20 21 22 23 24 25 2 | do 3 | $SCRATCH/results/forgetting/random_search_dropout_maxout_mnist_amazon/worker.sh $SCRATCH/results/forgetting/random_search_dropout_maxout_mnist_amazon/exp/$fold 4 | done 5 | 6 | -------------------------------------------------------------------------------- /experiments/random_search_sgd_relu_mnist_amazon/make_launch.py: -------------------------------------------------------------------------------- 1 | base = """jobdispatch --torque --env=THEANO_FLAGS=device=gpu,floatX=float32,force_device=True --duree=24:00:00 --whitespace --gpu $F/experiments/random_search_dropout_relu_mnist/worker.sh $F/experiments/random_search_dropout_relu_mnist/exp/"{{%(args)s}}\"""" 2 | args = ','.join([str(job_id) for job_id in xrange(25)]) 3 | f = open('launch.sh', 'w') 4 | f.write(base % locals()) 5 | f.close() 6 | -------------------------------------------------------------------------------- /experiments/random_search_sgd_relu_mnist_amazon/worker.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | cd $1 3 | python /work/mmirza/pylearn2/pylearn2/scripts/train.py task_0.yaml || exit -1 4 | echo "starting task 1" 5 | python /work/mmirza/pylearn2/pylearn2/scripts/train.py task_1.yaml 6 | -------------------------------------------------------------------------------- /experiments/random_search_sgd_sigmoid_amazon/configure.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from pylearn2.utils import serial 4 | 5 | num_jobs = 25 6 | EXP_PATH = "/RQexec/mirzameh/results/forgetting/random_search_sgd_sigmoid_amazon/" 7 | 8 | rng = np.random.RandomState([2013, 11, 22]) 9 | 10 | task_0_template = open('task_0_template.yaml', 'r').read() 11 | task_1_template = open('task_1_template.yaml', 'r').read() 12 | 13 | for job_id in xrange(num_jobs): 14 | 15 | h0_col_norm = rng.uniform(1., 5.) 16 | h1_col_norm = rng.uniform(1., 5.) 17 | y_col_norm = rng.uniform(1., 5.) 18 | 19 | h0_dim = rng.randint(250, 5000) 20 | h1_dim = rng.randint(250, 5000) 21 | 22 | def random_init_string(): 23 | if rng.randint(2): 24 | sparse_init = rng.randint(10, 30) 25 | return "sparse_init: " + str(sparse_init) 26 | irange = 10. ** rng.uniform(-2.3, -1.) 27 | return "irange: " + str(irange) 28 | 29 | h0_init = random_init_string() 30 | h1_init = random_init_string() 31 | 32 | if rng.randint(2): 33 | y_init = "sparse_init: 0" 34 | else: 35 | y_init = random_init_string() 36 | 37 | def sigmoid_bias(): 38 | return rng.uniform(0, .3) 39 | 40 | h0_bias = sigmoid_bias() 41 | h1_bias = sigmoid_bias() 42 | 43 | 44 | learning_rate = 10. ** rng.uniform(-2., -.5) 45 | 46 | if rng.randint(2): 47 | msat = 2 48 | else: 49 | msat = rng.randint(2, 1000) 50 | 51 | final_momentum = rng.uniform(.5, .9) 52 | 53 | lr_sat = rng.randint(200, 1000) 54 | 55 | decay = 10. ** rng.uniform(-3, -1) 56 | 57 | 58 | task_0_yaml_str = task_0_template % locals() 59 | 60 | serial.mkdir('{}exp/'.format(EXP_PATH) + str(job_id)) 61 | train_file_full_stem = '{}exp/'.format(EXP_PATH)+str(job_id)+'/' 62 | f = open(train_file_full_stem + 'task_0.yaml', 'w') 63 | f.write(task_0_yaml_str) 64 | f.close() 65 | 66 | task_1_yaml_str = task_1_template % locals() 67 | 68 | serial.mkdir('{}exp/'.format(EXP_PATH) + str(job_id)) 69 | f = open(train_file_full_stem + 'task_1.yaml', 'w') 70 | f.write(task_1_yaml_str) 71 | f.close() 72 | -------------------------------------------------------------------------------- /experiments/random_search_sgd_sigmoid_amazon/launch.sh: -------------------------------------------------------------------------------- 1 | jobdispatch --torque --env=THEANO_FLAGS=device=gpu,floatX=float32,force_device=True --duree=24:00:00 --whitespace --gpu $SCRATCH/results/forgetting/random_search_sgd_sigmoid_amazon/worker.sh $SCRATCH/results/forgetting/random_search_sgd_sigmoid_amazon/exp/"{{0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24}}" 2 | -------------------------------------------------------------------------------- /experiments/random_search_sgd_sigmoid_amazon/task_0_template.yaml: -------------------------------------------------------------------------------- 1 | !obj:pylearn2.train.Train { 2 | dataset: &train !obj:forgetting.datasets.amazon.AmazonSmall { 3 | which_set: 'train', 4 | category: 'kitchen', 5 | one_hot: 1, 6 | start: 0, 7 | stop: 1000 8 | }, 9 | model: !obj:pylearn2.models.mlp.MLP { 10 | layers: [ !obj:pylearn2.models.mlp.Sigmoid { 11 | max_col_norm: %(h0_col_norm)f, 12 | layer_name: 'h0', 13 | dim: %(h0_dim)d, 14 | %(h0_init)s, 15 | init_bias: %(h0_bias)f 16 | }, !obj:pylearn2.models.mlp.Sigmoid { 17 | max_col_norm: %(h1_col_norm)f, 18 | layer_name: 'h1', 19 | dim: %(h1_dim)d, 20 | %(h1_init)s, 21 | init_bias: %(h1_bias)f 22 | }, !obj:pylearn2.models.mlp.Softmax { 23 | max_col_norm: %(y_col_norm)f, 24 | layer_name: 'y', 25 | n_classes: 2, 26 | %(y_init)s 27 | } 28 | ], 29 | nvis: 5000, 30 | }, 31 | algorithm: !obj:pylearn2.training_algorithms.sgd.SGD { 32 | batch_size: 100, 33 | learning_rate: %(learning_rate)f, 34 | init_momentum: .5, 35 | monitoring_dataset: 36 | { 37 | 'train' : *train, 38 | 'valid' : !obj:forgetting.datasets.amazon.AmazonSmall { 39 | which_set: 'train', 40 | category: 'kitchen', 41 | one_hot: 1, 42 | start: 1000, 43 | stop: 1600 44 | }, 45 | 'test' : !obj:forgetting.datasets.amazon.AmazonSmall { 46 | which_set: 'test', 47 | category: 'kitchen', 48 | one_hot: 1, 49 | }, 50 | }, 51 | cost: !obj:pylearn2.costs.cost.SumOfCosts { costs: [ 52 | !obj:pylearn2.costs.mlp.Default {}, 53 | ] 54 | }, 55 | termination_criterion: !obj:pylearn2.termination_criteria.MonitorBased { 56 | channel_name: "valid_y_misclass", 57 | prop_decrease: 0., 58 | N: 100 59 | } 60 | }, 61 | extensions: [ 62 | !obj:pylearn2.train_extensions.best_params.MonitorBasedSaveBest { 63 | channel_name: 'valid_y_misclass', 64 | save_path: "${PYLEARN2_TRAIN_FILE_FULL_STEM}_best.pkl" 65 | }, !obj:pylearn2.training_algorithms.sgd.MomentumAdjustor { 66 | start: 1, 67 | saturate: %(msat)d, 68 | final_momentum: %(final_momentum)f 69 | }, 70 | !obj:pylearn2.training_algorithms.sgd.LinearDecayOverEpoch { 71 | start: 1, 72 | saturate: %(lr_sat)d, 73 | decay_factor: %(decay)f 74 | } 75 | ] 76 | } 77 | -------------------------------------------------------------------------------- /experiments/random_search_sgd_sigmoid_amazon/task_1_template.yaml: -------------------------------------------------------------------------------- 1 | !obj:pylearn2.train.Train { 2 | dataset: &train !obj:forgetting.datasets.amazon.AmazonSmall { 3 | which_set: 'train', 4 | category: 'dvd', 5 | one_hot: 1, 6 | start: 0, 7 | stop: 1000 8 | }, 9 | model: !obj:pylearn2.monitor.push_monitor { 10 | model: !pkl: "task_0_best.pkl", 11 | name: "monitor_first" 12 | }, 13 | algorithm: !obj:pylearn2.training_algorithms.sgd.SGD { 14 | batch_size: 100, 15 | learning_rate: %(learning_rate)f, 16 | init_momentum: .5, 17 | monitoring_dataset: 18 | { 19 | 'train' : *train, 20 | 'valid_both' : !obj:forgetting.concat { 21 | datasets: [ !obj:forgetting.datasets.amazon.AmazonSmall { 22 | which_set: 'train', 23 | category: 'dvd', 24 | one_hot: 1, 25 | start: 1000, 26 | stop: 1600 27 | }, !obj:forgetting.datasets.amazon.AmazonSmall { 28 | which_set: 'train', 29 | category: 'kitchen', 30 | one_hot: 1, 31 | start: 1000, 32 | stop: 1600 33 | }, 34 | ] 35 | }, 36 | 'test' : !obj:forgetting.datasets.amazon.AmazonSmall { 37 | which_set: 'test', 38 | category: 'dvd', 39 | one_hot: 1, 40 | }, 41 | 'test_old' : !obj:forgetting.datasets.amazon.AmazonSmall { 42 | which_set: 'test', 43 | category: 'kitchen', 44 | one_hot: 1, 45 | }, 46 | }, 47 | cost: !obj:pylearn2.costs.cost.SumOfCosts { costs: [ 48 | !obj:pylearn2.costs.mlp.Default {}, 49 | ] 50 | }, 51 | termination_criterion: !obj:pylearn2.termination_criteria.MonitorBased { 52 | channel_name: "valid_both_y_misclass", 53 | prop_decrease: 0., 54 | N: 100 55 | } 56 | }, 57 | extensions: [ 58 | !obj:pylearn2.train_extensions.best_params.MonitorBasedSaveBest { 59 | channel_name: 'valid_both_y_misclass', 60 | save_path: "${PYLEARN2_TRAIN_FILE_FULL_STEM}_best.pkl" 61 | }, !obj:pylearn2.training_algorithms.sgd.MomentumAdjustor { 62 | start: 1, 63 | saturate: %(msat)d, 64 | final_momentum: %(final_momentum)f 65 | }, 66 | !obj:pylearn2.training_algorithms.sgd.LinearDecayOverEpoch { 67 | start: 1, 68 | saturate: %(lr_sat)d, 69 | decay_factor: %(decay)f 70 | } 71 | ], 72 | save_path: "${PYLEARN2_TRAIN_FILE_FULL_STEM}.pkl", 73 | save_freq: 1 74 | } 75 | -------------------------------------------------------------------------------- /experiments/random_search_sgd_sigmoid_amazon/worker.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | cd $1 3 | python ~/projects/pylearn2/pylearn2/scripts/train.py task_0.yaml || exit -1 4 | echo "starting task 1" 5 | python ~/projects/pylearn2/pylearn2/scripts/train.py task_1.yaml 6 | -------------------------------------------------------------------------------- /experiments/random_search_sgd_sigmoid_mnist/configure.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from pylearn2.utils import serial 4 | 5 | num_jobs = 25 6 | 7 | rng = np.random.RandomState([2013, 11, 22]) 8 | 9 | task_0_template = open('task_0_template.yaml', 'r').read() 10 | task_1_template = open('task_1_template.yaml', 'r').read() 11 | 12 | for job_id in xrange(num_jobs): 13 | 14 | h0_col_norm = rng.uniform(1., 5.) 15 | h1_col_norm = rng.uniform(1., 5.) 16 | y_col_norm = rng.uniform(1., 5.) 17 | 18 | h0_dim = rng.randint(250, 5000) 19 | h1_dim = rng.randint(250, 5000) 20 | 21 | def random_init_string(): 22 | if rng.randint(2): 23 | sparse_init = rng.randint(10, 30) 24 | return "sparse_init: " + str(sparse_init) 25 | irange = 10. ** rng.uniform(-2.3, -1.) 26 | return "irange: " + str(irange) 27 | 28 | h0_init = random_init_string() 29 | h1_init = random_init_string() 30 | 31 | if rng.randint(2): 32 | y_init = "sparse_init: 0" 33 | else: 34 | y_init = random_init_string() 35 | 36 | def sigmoid_bias(): 37 | return rng.uniform(0, .3) 38 | 39 | h0_bias = sigmoid_bias() 40 | h1_bias = sigmoid_bias() 41 | 42 | 43 | learning_rate = 10. ** rng.uniform(-2., -.5) 44 | 45 | if rng.randint(2): 46 | msat = 2 47 | else: 48 | msat = rng.randint(2, 1000) 49 | 50 | final_momentum = rng.uniform(.5, .9) 51 | 52 | lr_sat = rng.randint(200, 1000) 53 | 54 | decay = 10. ** rng.uniform(-3, -1) 55 | 56 | 57 | task_0_yaml_str = task_0_template % locals() 58 | 59 | serial.mkdir('exp/' + str(job_id)) 60 | train_file_full_stem = 'exp/'+str(job_id)+'/' 61 | f = open(train_file_full_stem + 'task_0.yaml', 'w') 62 | f.write(task_0_yaml_str) 63 | f.close() 64 | 65 | task_1_yaml_str = task_1_template % locals() 66 | 67 | serial.mkdir('exp/' + str(job_id)) 68 | f = open(train_file_full_stem + 'task_1.yaml', 'w') 69 | f.write(task_1_yaml_str) 70 | f.close() 71 | -------------------------------------------------------------------------------- /experiments/random_search_sgd_sigmoid_mnist/launch.sh: -------------------------------------------------------------------------------- 1 | jobdispatch --torque --env=THEANO_FLAGS=device=gpu,floatX=float32,force_device=True --duree=24:00:00 --whitespace --gpu $F/experiments/random_search_sgd_sigmoid_mnist/worker.sh $F/experiments/random_search_sgd_sigmoid_mnist/exp/"{{0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24}}" -------------------------------------------------------------------------------- /experiments/random_search_sgd_sigmoid_mnist/make_launch.py: -------------------------------------------------------------------------------- 1 | base = """jobdispatch --torque --env=THEANO_FLAGS=device=gpu,floatX=float32,force_device=True --duree=24:00:00 --whitespace --gpu $F/experiments/random_search_sgd_sigmoid_mnist/worker.sh $F/experiments/random_search_sgd_sigmoid_mnist/exp/"{{%(args)s}}\"""" 2 | args = ','.join([str(job_id) for job_id in xrange(25)]) 3 | f = open('launch.sh', 'w') 4 | f.write(base % locals()) 5 | f.close() 6 | -------------------------------------------------------------------------------- /experiments/random_search_sgd_sigmoid_mnist/task_0_template.yaml: -------------------------------------------------------------------------------- 1 | # Author: Ian Goodfellow 2 | # We train with the un-permuted version first. 3 | # This way after training with the permuted version, we can see if the 4 | # filters in the un-permuted space are still recognizable. 5 | !obj:pylearn2.train.Train { 6 | dataset: &train !obj:pylearn2.datasets.mnist.MNIST { 7 | which_set: 'train', 8 | one_hot: 1, 9 | start: 0, 10 | stop: 50000 11 | }, 12 | model: !obj:pylearn2.models.mlp.MLP { 13 | layers: [ !obj:pylearn2.models.mlp.Sigmoid { 14 | max_col_norm: %(h0_col_norm)f, 15 | layer_name: 'h0', 16 | dim: %(h0_dim)d, 17 | %(h0_init)s, 18 | init_bias: %(h0_bias)f 19 | }, !obj:pylearn2.models.mlp.Sigmoid { 20 | max_col_norm: %(h1_col_norm)f, 21 | layer_name: 'h1', 22 | dim: %(h1_dim)d, 23 | %(h1_init)s, 24 | init_bias: %(h1_bias)f 25 | }, !obj:pylearn2.models.mlp.Softmax { 26 | max_col_norm: %(y_col_norm)f, 27 | layer_name: 'y', 28 | n_classes: 10, 29 | %(y_init)s 30 | } 31 | ], 32 | nvis: 784, 33 | }, 34 | algorithm: !obj:pylearn2.training_algorithms.sgd.SGD { 35 | batch_size: 100, 36 | learning_rate: %(learning_rate)f, 37 | init_momentum: .5, 38 | monitoring_dataset: 39 | { 40 | 'train' : *train, 41 | 'valid' : !obj:pylearn2.datasets.mnist.MNIST { 42 | which_set: 'train', 43 | one_hot: 1, 44 | start: 50000, 45 | stop: 60000 46 | }, 47 | 'test' : !obj:pylearn2.datasets.mnist.MNIST { 48 | which_set: 'test', 49 | one_hot: 1, 50 | } 51 | }, 52 | cost: !obj:pylearn2.costs.cost.SumOfCosts { costs: [ 53 | !obj:pylearn2.costs.mlp.Default { 54 | } 55 | ] 56 | }, 57 | termination_criterion: !obj:pylearn2.termination_criteria.MonitorBased { 58 | channel_name: "valid_y_misclass", 59 | prop_decrease: 0., 60 | N: 100 61 | } 62 | }, 63 | extensions: [ 64 | !obj:pylearn2.train_extensions.best_params.MonitorBasedSaveBest { 65 | channel_name: 'valid_y_misclass', 66 | save_path: "${PYLEARN2_TRAIN_FILE_FULL_STEM}_best.pkl" 67 | }, !obj:pylearn2.training_algorithms.sgd.MomentumAdjustor { 68 | start: 1, 69 | saturate: %(msat)d, 70 | final_momentum: %(final_momentum)f 71 | }, 72 | !obj:pylearn2.training_algorithms.sgd.LinearDecayOverEpoch { 73 | start: 1, 74 | saturate: %(lr_sat)d, 75 | decay_factor: %(decay)f 76 | } 77 | ] 78 | } 79 | -------------------------------------------------------------------------------- /experiments/random_search_sgd_sigmoid_mnist/task_1_template.yaml: -------------------------------------------------------------------------------- 1 | # Author: Ian Goodfellow 2 | !obj:pylearn2.train.Train { 3 | dataset: &train !obj:pylearn2.datasets.mnist.MNIST { 4 | which_set: 'train', 5 | one_hot: 1, 6 | start: 0, 7 | stop: 50000, 8 | preprocessor: !obj:forgetting.permute_and_flip { 9 | flip: 0 10 | } 11 | }, 12 | model: !obj:pylearn2.monitor.push_monitor { 13 | model: !pkl: "task_0_best.pkl", 14 | name: "monitor_first" 15 | }, 16 | algorithm: !obj:pylearn2.training_algorithms.sgd.SGD { 17 | batch_size: 100, 18 | learning_rate: %(learning_rate)f, 19 | init_momentum: .5, 20 | monitoring_dataset: 21 | { 22 | 'train' : *train, 23 | 'valid_both' : !obj:forgetting.concat { 24 | datasets: [ 25 | !obj:pylearn2.datasets.mnist.MNIST { 26 | which_set: 'train', 27 | one_hot: 1, 28 | start: 50000, 29 | stop: 60000, 30 | }, 31 | !obj:pylearn2.datasets.mnist.MNIST { 32 | which_set: 'train', 33 | one_hot: 1, 34 | start: 50000, 35 | stop: 60000, 36 | preprocessor: !obj:forgetting.permute_and_flip { 37 | flip: 0 38 | } 39 | } 40 | ] 41 | }, 42 | 'test' : !obj:pylearn2.datasets.mnist.MNIST { 43 | which_set: 'test', 44 | one_hot: 1, 45 | preprocessor: !obj:forgetting.permute_and_flip { 46 | flip: 0 47 | } 48 | }, 49 | 'test_old' : !obj:pylearn2.datasets.mnist.MNIST { 50 | which_set: 'test', 51 | one_hot: 1, 52 | } 53 | }, 54 | cost: !obj:pylearn2.costs.cost.SumOfCosts { costs: [ 55 | !obj:pylearn2.costs.mlp.Default {} 56 | ] 57 | }, 58 | termination_criterion: !obj:pylearn2.termination_criteria.MonitorBased { 59 | channel_name: "valid_both_y_misclass", 60 | prop_decrease: 0., 61 | N: 100 62 | } 63 | }, 64 | extensions: [ 65 | !obj:pylearn2.train_extensions.best_params.MonitorBasedSaveBest { 66 | channel_name: 'valid_both_y_misclass', 67 | save_path: "${PYLEARN2_TRAIN_FILE_FULL_STEM}_best.pkl" 68 | }, !obj:pylearn2.training_algorithms.sgd.MomentumAdjustor { 69 | start: 1, 70 | saturate: %(msat)d, 71 | final_momentum: %(final_momentum)f 72 | }, 73 | !obj:pylearn2.training_algorithms.sgd.LinearDecayOverEpoch { 74 | start: 1, 75 | saturate: %(lr_sat)d, 76 | decay_factor: %(decay)f 77 | } 78 | ], 79 | save_path: "${PYLEARN2_TRAIN_FILE_FULL_STEM}.pkl", 80 | save_freq: 1 81 | } 82 | -------------------------------------------------------------------------------- /experiments/random_search_sgd_sigmoid_mnist/worker.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | cd $1 3 | train.py task_0.yaml || exit -1 4 | echo "starting task 1" 5 | train.py task_1.yaml 6 | -------------------------------------------------------------------------------- /experiments/random_search_sgd_sigmoid_mnist_amazon/configure.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from pylearn2.utils import serial 4 | 5 | num_jobs = 25 6 | 7 | rng = np.random.RandomState([2013, 11, 22]) 8 | 9 | task_0_template = open('task_0_template.yaml', 'r').read() 10 | task_1_template = open('task_1_template.yaml', 'r').read() 11 | 12 | for job_id in xrange(num_jobs): 13 | 14 | h0_col_norm = rng.uniform(1., 5.) 15 | h1_col_norm = rng.uniform(1., 5.) 16 | y_col_norm = rng.uniform(1., 5.) 17 | 18 | h0_dim = rng.randint(250, 5000) 19 | h1_dim = rng.randint(250, 5000) 20 | 21 | def random_init_string(): 22 | if rng.randint(2): 23 | sparse_init = rng.randint(10, 30) 24 | return "sparse_init: " + str(sparse_init) 25 | irange = 10. ** rng.uniform(-2.3, -1.) 26 | return "irange: " + str(irange) 27 | 28 | h0_init = random_init_string() 29 | h1_init = random_init_string() 30 | 31 | if rng.randint(2): 32 | y_init = "sparse_init: 0" 33 | else: 34 | y_init = random_init_string() 35 | 36 | def sigmoid_bias(): 37 | return rng.uniform(0, .3) 38 | 39 | h0_bias = sigmoid_bias() 40 | h1_bias = sigmoid_bias() 41 | 42 | 43 | learning_rate = 10. ** rng.uniform(-2., -.5) 44 | 45 | if rng.randint(2): 46 | msat = 2 47 | else: 48 | msat = rng.randint(2, 1000) 49 | 50 | final_momentum = rng.uniform(.5, .9) 51 | 52 | lr_sat = rng.randint(200, 1000) 53 | 54 | decay = 10. ** rng.uniform(-3, -1) 55 | 56 | 57 | #task_0_yaml_str = task_0_template % locals() 58 | 59 | #serial.mkdir('exp/' + str(job_id)) 60 | #train_file_full_stem = 'exp/'+str(job_id)+'/' 61 | train_file_full_stem = '{}exp/{}/'.format('/scratch/mmirza/results/forgetting/random_search_sgd_sigmoid_mnist_amazon/', job_id) 62 | #f = open(train_file_full_stem + 'task_0.yaml', 'w') 63 | #f.write(task_0_yaml_str) 64 | #f.close() 65 | 66 | task_1_yaml_str = task_1_template % locals() 67 | 68 | #serial.mkdir('exp/' + str(job_id)) 69 | f = open(train_file_full_stem + 'task_1.yaml', 'w') 70 | f.write(task_1_yaml_str) 71 | f.close() 72 | -------------------------------------------------------------------------------- /experiments/random_search_sgd_sigmoid_mnist_amazon/launch.sh: -------------------------------------------------------------------------------- 1 | jobdispatch --env=THEANO_FLAGS=device=gpu,floatX=float32,force_device=True --duree=2h --mem=6G --gpu $SCRATCH/results/forgetting/random_search_sgd_sigmoid_mnist_amazon/worker.sh $SCRATCH/results/forgetting/random_search_sgd_sigmoid_mnist_amazon/exp/"{{0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24}}" 2 | -------------------------------------------------------------------------------- /experiments/random_search_sgd_sigmoid_mnist_amazon/local_launch.sh: -------------------------------------------------------------------------------- 1 | for fold in 1 2 3 4 5 6 7 8 8 9 10 11 12 2 | do 3 | $SCRATCH/results/forgetting/random_search_dropout_maxout_mnist_amazon/worker.sh $SCRATCH/results/forgetting/random_search_dropout_maxout_mnist_amazon/exp/$fold 4 | done 5 | 6 | -------------------------------------------------------------------------------- /experiments/random_search_sgd_sigmoid_mnist_amazon/local_launch2.sh: -------------------------------------------------------------------------------- 1 | for fold in 13 14 15 16 17 18 19 20 21 22 23 24 25 2 | do 3 | $SCRATCH/results/forgetting/random_search_dropout_maxout_mnist_amazon/worker.sh $SCRATCH/results/forgetting/random_search_dropout_maxout_mnist_amazon/exp/$fold 4 | done 5 | 6 | -------------------------------------------------------------------------------- /experiments/random_search_sgd_sigmoid_mnist_amazon/worker.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | cd $1 3 | python /work/mmirza/pylearn2/pylearn2/scripts/train.py task_0.yaml || exit -1 4 | echo "starting task 1" 5 | python /work/mmirza/pylearn2/pylearn2/scripts/train.py task_1.yaml 6 | -------------------------------------------------------------------------------- /lwta.py: -------------------------------------------------------------------------------- 1 | __author__ = "Xia Da, Ian Goodfellow" 2 | from theano.sandbox.rng_mrg import MRG_RandomStreams 3 | from theano import tensor as T 4 | from pylearn2.models.mlp import Linear 5 | 6 | def lwta(p, block_size): 7 | """ 8 | The hard local winner take all non-linearity from "Compete to Compute" 9 | by Rupesh Srivastava et al 10 | Our implementation differs slightly from theirs--we break ties randomly, 11 | they break them by earliest index. This difference is just due to ease 12 | of implementation in theano. 13 | """ 14 | batch_size = p.shape[0] 15 | num_filters = p.shape[1] 16 | num_blocks = num_filters // block_size 17 | w = p.reshape((batch_size, num_blocks, block_size)) 18 | block_max = w.max(axis=2).dimshuffle(0, 1, 'x') * T.ones_like(w) 19 | max_mask = T.cast(w >= block_max, 'float32') 20 | theano_rng = MRG_RandomStreams(20131206 % (2 ** 16)) 21 | denom = max_mask.sum(axis=2).dimshuffle(0, 1, 'x') 22 | probs = max_mask / denom 23 | probs = probs.reshape((batch_size * num_blocks, block_size)) 24 | max_mask = theano_rng.multinomial(pvals=probs, dtype='float32') 25 | max_mask = max_mask.reshape((batch_size, num_blocks, block_size)) 26 | w = w * max_mask 27 | w = w.reshape((p.shape[0], p.shape[1])) 28 | return w 29 | 30 | class LWTA(Linear): 31 | """ 32 | An MLP Layer using the LWTA non-linearity. 33 | """ 34 | def __init__(self, block_size, **kwargs): 35 | super(LWTA, self).__init__(**kwargs) 36 | self.block_size = block_size 37 | 38 | def fprop(self, state_below): 39 | p = super(LWTA, self).fprop(state_below) 40 | w = lwta(p, self.block_size) 41 | w.name = self.layer_name + '_out' 42 | return w 43 | -------------------------------------------------------------------------------- /ppf.py: -------------------------------------------------------------------------------- 1 | __author__ = "Ian Goodfellow" 2 | 3 | def cloud_to_ppf(x, y, higher_better=True): 4 | """ 5 | x: A list of x coordinates 6 | y: A list of y coordinates 7 | 8 | Makes a production possibility frontier, i.e. a curve showing 9 | how much y is possible for each value of x. 10 | 11 | If higher_better, we assume bigger values of y are better, so we 12 | make a curve that lies above all points in the cloud. 13 | 14 | Returns ppf_x, ppf_y, which are lists of x and y points defining the ppf 15 | curve. 16 | """ 17 | 18 | print 'running cloud to ppf' 19 | 20 | combined = zip(x, y) 21 | s = sorted(combined, key = lambda e : e[0]) 22 | x = [e[0] for e in s] 23 | y = [e[1] for e in s] 24 | 25 | i = 0 26 | while i < len(x): 27 | lx = x[i] 28 | ly = y[i] 29 | j = len(x) - 1 30 | while j > i: 31 | rx = x[j] 32 | #assert rx > lx 33 | ry = y[j] 34 | slope = (ry - ly) / (rx - lx) 35 | intercept = ly - slope * lx 36 | k = i + 1 37 | 38 | while k < j: 39 | mx = x[k] 40 | my = y[k] 41 | pred = slope * mx + intercept 42 | if (higher_better and my <= pred) or (my >= pred and not 43 | higher_better): 44 | del x[k] 45 | del y[k] 46 | j -= 1 47 | else: 48 | k += 1 49 | j -= 1 50 | i += 1 51 | 52 | 53 | 54 | print 'done running cloud to pff' 55 | 56 | return x, y 57 | 58 | 59 | -------------------------------------------------------------------------------- /scratch/README: -------------------------------------------------------------------------------- 1 | These are relatively throwaway experiments done to make sure the basic setup 2 | is working before moving on to the carefully controlled experiments. 3 | -------------------------------------------------------------------------------- /scratch/mnist_pi_60k.yaml: -------------------------------------------------------------------------------- 1 | !obj:pylearn2.train.Train { 2 | dataset: &train !obj:pylearn2.datasets.mnist.MNIST { 3 | which_set: 'train', 4 | one_hot: 1, 5 | start: 0, 6 | stop: 60000 7 | }, 8 | model: !obj:pylearn2.models.mlp.MLP { 9 | layers: [ !obj:forgetting.lwta.LWTA { 10 | layer_name: 'h0', 11 | dim: 1000, 12 | block_size: 2, 13 | irange: .03, 14 | max_col_norm: 10. 15 | }, !obj:forgetting.lwta.LWTA { 16 | layer_name: 'h1', 17 | dim: 1000, 18 | block_size: 2, 19 | irange: .03, 20 | max_col_norm: 10. 21 | }, !obj:forgetting.lwta.LWTA { 22 | layer_name: 'h2', 23 | dim: 1000, 24 | block_size: 2, 25 | irange: .03, 26 | max_col_norm: 10. 27 | }, !obj:pylearn2.models.mlp.Softmax { 28 | layer_name: 'y', 29 | n_classes: 10, 30 | irange: .03, 31 | max_col_norm: 10. 32 | } 33 | ], 34 | nvis: 784 35 | }, 36 | algorithm: !obj:pylearn2.training_algorithms.sgd_lwta.SGD { 37 | batch_size: 100, 38 | learning_rate: 1., 39 | init_momentum: .9, 40 | dropout_momentum: True, 41 | monitoring_dataset: 42 | { 43 | 'train' : *train, 44 | 'valid' : !obj:pylearn2.datasets.mnist.MNIST { 45 | which_set: 'train', 46 | one_hot: 1, 47 | start: 50000, 48 | stop: 60000 49 | }, 50 | 'test' : !obj:pylearn2.datasets.mnist.MNIST { 51 | which_set: 'test', 52 | one_hot: 1, 53 | } 54 | }, 55 | termination_criterion: !obj:pylearn2.termination_criteria.ChannelTarget { 56 | channel_name: "train_objective", 57 | target: 0.00004671578 58 | }, 59 | update_callbacks: !obj:pylearn2.training_algorithms.sgd_lwta.ExponentialDecayOverEpoch { 60 | decay_factor: .95, 61 | min_lr: .01, 62 | minibatches_per_epoch: 500 63 | } 64 | }, 65 | extensions: [ !obj:pylearn2.train_extensions.best_params.MonitorBasedSaveBest { 66 | channel_name: 'valid_y_misclass', 67 | save_path: "${PYLEARN2_TRAIN_FILE_FULL_STEM}_best.pkl" 68 | }, !obj:pylearn2.training_algorithms.sgd.MomentumAdjustor { 69 | start: 1, 70 | saturate: 100, 71 | final_momentum: .95 72 | } 73 | ], 74 | save_path: "${PYLEARN2_TRAIN_FILE_FULL_STEM}.pkl", 75 | save_freq: 1 76 | } 77 | -------------------------------------------------------------------------------- /scratch/sgd_relu_mnist_task_0.yaml: -------------------------------------------------------------------------------- 1 | # Author: Ian Goodfellow 2 | # We train with the un-permuted version first. 3 | # This way after training with the permuted version, we can see if the 4 | # filters in the un-permuted space are still recognizable. 5 | !obj:pylearn2.train.Train { 6 | dataset: &train !obj:pylearn2.datasets.mnist.MNIST { 7 | which_set: 'train', 8 | one_hot: 1, 9 | start: 0, 10 | stop: 50000 11 | }, 12 | model: !obj:pylearn2.models.mlp.MLP { 13 | layers: [ !obj:pylearn2.models.mlp.RectifiedLinear { 14 | max_col_norm: 1.9, 15 | layer_name: 'h0', 16 | dim: 500, 17 | sparse_init: 15, 18 | }, !obj:pylearn2.models.mlp.RectifiedLinear { 19 | max_col_norm: 1.9, 20 | layer_name: 'h1', 21 | dim: 1000, 22 | sparse_init: 15, 23 | }, !obj:pylearn2.models.mlp.Softmax { 24 | max_col_norm: 1.9, 25 | layer_name: 'y', 26 | n_classes: 10, 27 | irange: 0. 28 | } 29 | ], 30 | nvis: 784, 31 | }, 32 | algorithm: !obj:pylearn2.training_algorithms.sgd.SGD { 33 | batch_size: 100, 34 | learning_rate: .01, 35 | init_momentum: .5, 36 | monitoring_dataset: 37 | { 38 | 'train' : *train, 39 | 'valid' : !obj:pylearn2.datasets.mnist.MNIST { 40 | which_set: 'train', 41 | one_hot: 1, 42 | start: 50000, 43 | stop: 60000 44 | }, 45 | 'test' : !obj:pylearn2.datasets.mnist.MNIST { 46 | which_set: 'test', 47 | one_hot: 1, 48 | } 49 | }, 50 | cost: !obj:pylearn2.costs.cost.SumOfCosts { costs: [ 51 | !obj:pylearn2.costs.cost.MethodCost { 52 | method: 'cost_from_X', 53 | supervised: 1 54 | } 55 | ] 56 | }, 57 | termination_criterion: !obj:pylearn2.termination_criteria.MonitorBased { 58 | channel_name: "valid_y_misclass", 59 | prop_decrease: 0., 60 | N: 10 61 | } 62 | }, 63 | extensions: [ 64 | !obj:pylearn2.train_extensions.best_params.MonitorBasedSaveBest { 65 | channel_name: 'valid_y_misclass', 66 | save_path: "${PYLEARN2_TRAIN_FILE_FULL_STEM}_best.pkl" 67 | }, !obj:pylearn2.training_algorithms.sgd.MomentumAdjustor { 68 | start: 1, 69 | saturate: 10, 70 | final_momentum: .99 71 | } 72 | ] 73 | } 74 | -------------------------------------------------------------------------------- /scratch/sgd_relu_mnist_task_1.yaml: -------------------------------------------------------------------------------- 1 | # Author: Ian Goodfellow 2 | !obj:pylearn2.train.Train { 3 | dataset: &train !obj:pylearn2.datasets.mnist.MNIST { 4 | which_set: 'train', 5 | one_hot: 1, 6 | start: 0, 7 | stop: 50000, 8 | preprocessor: !obj:forgetting.permute_and_flip { 9 | flip: 0 10 | } 11 | }, 12 | model: !obj:pylearn2.monitor.push_monitor { 13 | model: !pkl: "sgd_relu_mnist_task_0_best.pkl", 14 | name: "monitor_first" 15 | }, 16 | algorithm: !obj:pylearn2.training_algorithms.sgd.SGD { 17 | batch_size: 100, 18 | learning_rate: .01, 19 | init_momentum: .5, 20 | monitoring_dataset: 21 | { 22 | 'train' : *train, 23 | 'old_valid' : !obj:pylearn2.datasets.mnist.MNIST { 24 | which_set: 'train', 25 | one_hot: 1, 26 | start: 50000, 27 | stop: 60000, 28 | }, 29 | 'valid' : !obj:pylearn2.datasets.mnist.MNIST { 30 | which_set: 'train', 31 | one_hot: 1, 32 | start: 50000, 33 | stop: 60000, 34 | preprocessor: !obj:forgetting.permute_and_flip { 35 | flip: 0 36 | } 37 | }, 38 | 'test' : !obj:pylearn2.datasets.mnist.MNIST { 39 | which_set: 'test', 40 | one_hot: 1, 41 | preprocessor: !obj:forgetting.permute_and_flip { 42 | flip: 0 43 | } 44 | } 45 | }, 46 | cost: !obj:pylearn2.costs.cost.SumOfCosts { costs: [ 47 | !obj:pylearn2.costs.cost.MethodCost { 48 | method: 'cost_from_X', 49 | supervised: 1 50 | } 51 | ] 52 | }, 53 | termination_criterion: !obj:pylearn2.termination_criteria.MonitorBased { 54 | channel_name: "valid_y_misclass", 55 | prop_decrease: 0., 56 | N: 10 57 | } 58 | }, 59 | extensions: [ 60 | !obj:pylearn2.train_extensions.best_params.MonitorBasedSaveBest { 61 | channel_name: 'valid_y_misclass', 62 | save_path: "${PYLEARN2_TRAIN_FILE_FULL_STEM}_best.pkl" 63 | }, !obj:pylearn2.training_algorithms.sgd.MomentumAdjustor { 64 | start: 1, 65 | saturate: 10, 66 | final_momentum: .99 67 | } 68 | ] 69 | } 70 | -------------------------------------------------------------------------------- /scratch/sgd_relu_mnist_task_1a.yaml: -------------------------------------------------------------------------------- 1 | # Author: Ian Goodfellow 2 | !obj:pylearn2.train.Train { 3 | dataset: &train !obj:pylearn2.datasets.mnist.MNIST { 4 | which_set: 'train', 5 | one_hot: 1, 6 | start: 0, 7 | stop: 50000, 8 | preprocessor: !obj:forgetting.permute_and_flip { 9 | flip: 1 10 | } 11 | }, 12 | model: !obj:pylearn2.monitor.push_monitor { 13 | model: !pkl: "sgd_relu_mnist_task_0_best.pkl", 14 | name: "monitor_first" 15 | }, 16 | algorithm: !obj:pylearn2.training_algorithms.sgd.SGD { 17 | batch_size: 100, 18 | learning_rate: .01, 19 | init_momentum: .5, 20 | monitoring_dataset: 21 | { 22 | 'train' : *train, 23 | 'old_valid' : !obj:pylearn2.datasets.mnist.MNIST { 24 | which_set: 'train', 25 | one_hot: 1, 26 | start: 50000, 27 | stop: 60000, 28 | }, 29 | 'valid' : !obj:pylearn2.datasets.mnist.MNIST { 30 | which_set: 'train', 31 | one_hot: 1, 32 | start: 50000, 33 | stop: 60000, 34 | preprocessor: !obj:forgetting.permute_and_flip { 35 | flip: 1 36 | } 37 | }, 38 | 'test' : !obj:pylearn2.datasets.mnist.MNIST { 39 | which_set: 'test', 40 | one_hot: 1, 41 | preprocessor: !obj:forgetting.permute_and_flip { 42 | flip: 1 43 | } 44 | } 45 | }, 46 | cost: !obj:pylearn2.costs.cost.SumOfCosts { costs: [ 47 | !obj:pylearn2.costs.cost.MethodCost { 48 | method: 'cost_from_X', 49 | supervised: 1 50 | } 51 | ] 52 | }, 53 | termination_criterion: !obj:pylearn2.termination_criteria.MonitorBased { 54 | channel_name: "valid_y_misclass", 55 | prop_decrease: 0., 56 | N: 100 57 | } 58 | }, 59 | extensions: [ 60 | !obj:pylearn2.train_extensions.best_params.MonitorBasedSaveBest { 61 | channel_name: 'valid_y_misclass', 62 | save_path: "${PYLEARN2_TRAIN_FILE_FULL_STEM}_best.pkl" 63 | }, !obj:pylearn2.training_algorithms.sgd.MomentumAdjustor { 64 | start: 1, 65 | saturate: 10, 66 | final_momentum: .99 67 | } 68 | ] 69 | } 70 | -------------------------------------------------------------------------------- /scratch/sgd_relu_mnist_task_1b.yaml: -------------------------------------------------------------------------------- 1 | # Author: Ian Goodfellow 2 | !obj:pylearn2.train.Train { 3 | dataset: &train !obj:pylearn2.datasets.mnist.MNIST { 4 | which_set: 'train', 5 | one_hot: 1, 6 | start: 0, 7 | stop: 50000, 8 | preprocessor: !obj:forgetting.permute_and_flip { 9 | flip: 0 10 | } 11 | }, 12 | model: !obj:pylearn2.monitor.push_monitor { 13 | model: !pkl: "sgd_relu_mnist_task_0_best.pkl", 14 | name: "monitor_first" 15 | }, 16 | algorithm: !obj:pylearn2.training_algorithms.sgd.SGD { 17 | batch_size: 100, 18 | learning_rate: .01, 19 | init_momentum: .5, 20 | monitoring_dataset: 21 | { 22 | 'train' : *train, 23 | 'valid_both' : !obj:forgetting.concat { 24 | datasets: [ 25 | !obj:pylearn2.datasets.mnist.MNIST { 26 | which_set: 'train', 27 | one_hot: 1, 28 | start: 50000, 29 | stop: 60000, 30 | }, 31 | !obj:pylearn2.datasets.mnist.MNIST { 32 | which_set: 'train', 33 | one_hot: 1, 34 | start: 50000, 35 | stop: 60000, 36 | preprocessor: !obj:forgetting.permute_and_flip { 37 | flip: 0 38 | } 39 | } 40 | ] 41 | }, 42 | 'test' : !obj:pylearn2.datasets.mnist.MNIST { 43 | which_set: 'test', 44 | one_hot: 1, 45 | preprocessor: !obj:forgetting.permute_and_flip { 46 | flip: 0 47 | } 48 | }, 49 | 'test_old' : !obj:pylearn2.datasets.mnist.MNIST { 50 | which_set: 'test', 51 | one_hot: 1, 52 | } 53 | }, 54 | cost: !obj:pylearn2.costs.cost.SumOfCosts { costs: [ 55 | !obj:pylearn2.costs.cost.MethodCost { 56 | method: 'cost_from_X', 57 | supervised: 1 58 | } 59 | ] 60 | }, 61 | termination_criterion: !obj:pylearn2.termination_criteria.MonitorBased { 62 | channel_name: "valid_both_y_misclass", 63 | prop_decrease: 0., 64 | N: 100 65 | } 66 | }, 67 | extensions: [ 68 | !obj:pylearn2.train_extensions.best_params.MonitorBasedSaveBest { 69 | channel_name: 'valid_both_y_misclass', 70 | save_path: "${PYLEARN2_TRAIN_FILE_FULL_STEM}_best.pkl" 71 | }, !obj:pylearn2.training_algorithms.sgd.MomentumAdjustor { 72 | start: 1, 73 | saturate: 10, 74 | final_momentum: .99 75 | } 76 | ], 77 | save_path: "${PYLEARN2_TRAIN_FILE_FULL_STEM}.pkl", 78 | save_freq: 1 79 | } 80 | -------------------------------------------------------------------------------- /test_lwta.py: -------------------------------------------------------------------------------- 1 | __author__ = "Ian Goodfellow" 2 | 3 | import numpy as np 4 | 5 | from pylearn2.utils import sharedX 6 | 7 | from forgetting.lwta import lwta 8 | 9 | def test_lwta(): 10 | example_input = np.zeros((2, 6)) 11 | 12 | # begin block 13 | example_input[0, 0] = -2.5 14 | example_input[0, 1] = 1.3 # max 15 | example_input[0, 2] = 0.9 16 | # begin block 17 | example_input[0, 3] = -0.1 # tied for max 18 | example_input[0, 4] = -0.2 19 | example_input[0, 5] = -0.1 # tied for max 20 | # begin block 21 | example_input[1, 0] = 5.0 #max 22 | example_input[1, 1] = 4.0 23 | example_input[1, 2] = 3.0 24 | # begin block 25 | example_input[1, 3] = 0.0 26 | example_input[1, 4] = 1.0 27 | example_input[1, 5] = 2.0 # max 28 | 29 | output = lwta(sharedX(example_input), block_size=3).eval() 30 | 31 | num_zeros = (output == 0).sum() 32 | assert num_zeros == 8 33 | 34 | assert np.allclose(output[0, 1], 1.3), output[0, 1] 35 | assert np.allclose(output[0,3], -0.1) or np.allclose(output[0,5], -0.1) 36 | assert np.allclose(output[1, 0], 5.0) 37 | assert np.allclose(output[1, 5], 2.0) 38 | 39 | --------------------------------------------------------------------------------