├── README.md ├── dl ├── __init__.py ├── deepjets.py └── deepae.py ├── .gitignore ├── LICENSE ├── utils └── sampling.py ├── viz ├── visualize.py └── performance.py └── pretraining-experiment.py /README.md: -------------------------------------------------------------------------------- 1 | # deep-jets 2 | Deep Learning for Jet-Images 3 | -------------------------------------------------------------------------------- /dl/__init__.py: -------------------------------------------------------------------------------- 1 | from deepae import pretrain_deep_ae, unroll_deep_ae 2 | 3 | __all__ = ['pretrain_deep_ae', 'unroll_deep_ae'] -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Distribution / packaging 9 | .Python 10 | env/ 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | *.egg-info/ 23 | .installed.cfg 24 | *.egg 25 | 26 | # PyInstaller 27 | # Usually these files are written by a python script from a template 28 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 29 | *.manifest 30 | *.spec 31 | 32 | # Installer logs 33 | pip-log.txt 34 | pip-delete-this-directory.txt 35 | 36 | # Unit test / coverage reports 37 | htmlcov/ 38 | .tox/ 39 | .coverage 40 | .coverage.* 41 | .cache 42 | nosetests.xml 43 | coverage.xml 44 | *,cover 45 | 46 | # Translations 47 | *.mo 48 | *.pot 49 | 50 | # Django stuff: 51 | *.log 52 | 53 | # Sphinx documentation 54 | docs/_build/ 55 | 56 | # PyBuilder 57 | target/ 58 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 ml-slac 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /dl/deepjets.py: -------------------------------------------------------------------------------- 1 | ''' 2 | deepjets.py 3 | 4 | an interface to Keras that allows physics users 5 | to manipulate and understand deep networks. 6 | ''' 7 | 8 | import random 9 | import os 10 | 11 | import numpy as np 12 | 13 | from keras.layers import containers 14 | from keras.models import Sequential 15 | from keras.layers.core import Dense, Dropout, AutoEncoder, DenoisingAutoEncoder 16 | from keras.optimizers import SGD, RMSprop, Adagrad, Adam 17 | 18 | import logging 19 | 20 | logging.basicConfig(level=logging.INFO) 21 | logger = logging.getLogger(__name__) 22 | 23 | class DeepNet(object): 24 | ''' 25 | A wrapper class around Keras models -- allows activation inspection. 26 | ''' 27 | def __init__(self, pretraining = True, model=Sequential()): 28 | super(DeepNet, self).__init__() 29 | self.model = model 30 | self.pretraining = pretraining 31 | 32 | self._is_pretrained = False 33 | 34 | self._is_compiled = [] 35 | 36 | self.submodels = {} 37 | 38 | self.autoencoder = [] 39 | 40 | self._model_cache = [] 41 | 42 | self.weights = None 43 | 44 | self.callbacks = {} 45 | 46 | def add(layer, **kwargs): 47 | ''' 48 | Functionality to add a Keras layer to a DeepNet 49 | ''' 50 | loss, optimizer = None, None 51 | if 'loss' not in kwargs.keys(): 52 | loss = 'mse' 53 | if 'optimizer' not in kwargs.keys(): 54 | optimizer = Adam() 55 | logger.info('Adding {} layer'.format(layer.__class__)) 56 | if type(layer) == AutoEncoder and not self.pretraining: 57 | raise TypeError( 58 | 'pretraining = False set while trying to add an AutoEncoder' 59 | ) 60 | else: 61 | # self._is_compiled.append(False) 62 | self._model_cache.append(Sequential()) 63 | _model_cache[-1].add(layer) 64 | autoencoder[-1].compile(loss=loss, optimizer=optimizer) 65 | 66 | 67 | # def pretrain(X, **kwargs): 68 | # pass 69 | 70 | 71 | 72 | 73 | 74 | 75 | -------------------------------------------------------------------------------- /utils/sampling.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | ''' 4 | Functionality to have weighted sampling for training 5 | and testing -- i.e., matching pT distributions. 6 | ''' 7 | 8 | class MultinomialSampler(object): 9 | ''' 10 | Fast (O(log n)) sampling from a discrete probability 11 | distribution, with O(n) set-up time. 12 | ''' 13 | 14 | def __init__(self, p, verbose=False): 15 | ''' 16 | Initialize the sampler. 17 | 18 | Args: 19 | p (a list of np.array): list (length n) of numbers 20 | that represent the weights over an n-outcome distribution. 21 | verbose (bool): self explanatory 22 | ''' 23 | n = len(p) 24 | p = p.astype(float) / sum(p) 25 | self._cdf = np.cumsum(p) 26 | 27 | def sample(self, k=1): 28 | ''' 29 | Return k random index draws from the distribution. 30 | ''' 31 | rs = np.random.random(k) 32 | # binary search to get indices 33 | return np.searchsorted(self._cdf, rs) 34 | 35 | def __call__(self, **kwargs): 36 | return self.sample(**kwargs) 37 | 38 | def reconstruct_p(self): 39 | ''' 40 | Return the original probability vector. 41 | Helpful for debugging. 42 | ''' 43 | n = len(self._cdf) 44 | p = np.zeros(n) 45 | p[0] = self._cdf[0] 46 | p[1:] = (self._cdf[1:] - self._cdf[:-1]) 47 | return p 48 | 49 | 50 | def multinomial_sample(p, k=1): 51 | ''' 52 | Wrapper to generate a k samples, 53 | using the MultinomialSampler class. 54 | ''' 55 | return MultinomialSampler(p).sample(k) 56 | 57 | 58 | class WeightedDataset(object): 59 | ''' 60 | 61 | ''' 62 | def __init__(self, X, y=None, weights=None, copy = True): 63 | self._ix_buf = None 64 | if copy: 65 | self._X = X.copy() 66 | if not y is None: 67 | self._y = y.copy() 68 | else: 69 | self._y = None 70 | else: 71 | self._X = X 72 | self._y = y 73 | 74 | if weights is None: 75 | weights = np.ones(X.shape[0]) 76 | 77 | 78 | if not type(weights) in [np.array, list, np.ndarray]: 79 | raise TypeError('weights must be a numpy array or a list') 80 | 81 | if len(weights) != X.shape[0]: 82 | raise ValueError('weights must have the same length as the first axis of X') 83 | 84 | if type(weights) is list: 85 | self._weights = np.array(weights) 86 | else: 87 | self._weights = weights 88 | 89 | self._weights[self._weights < 0] = 0.0 90 | 91 | self._weights = self._weights / np.sum(self._weights) 92 | 93 | def sample_idx(self, n): 94 | self._ix_buf = multinomial_sample(self._weights, n) 95 | return self._ix_buf 96 | 97 | def sample(self, n): 98 | self._ix_buf = multinomial_sample(self._weights, n) 99 | if self._y is None: 100 | return self._X[self._ix_buf] 101 | else: 102 | return self._X[self._ix_buf], self._y[self._ix_buf] 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | -------------------------------------------------------------------------------- /viz/visualize.py: -------------------------------------------------------------------------------- 1 | ''' 2 | visualize.py 3 | author: Luke de Oliveira (lukedeo@stanford.edu) 4 | 5 | Utilities and functions to inspect neural net filters. 6 | ''' 7 | import matplotlib.pyplot as plt 8 | import matplotlib.gridspec as gridspec 9 | from matplotlib.colors import LinearSegmentedColormap 10 | from matplotlib.colors import LogNorm 11 | import numpy as np 12 | 13 | import logging 14 | logging.basicConfig(level=logging.INFO) 15 | logger = logging.getLogger(__name__) 16 | 17 | 18 | 19 | 20 | def custom_div_cmap(numcolors=21, name='custom_div_cmap', 21 | mincol='blue', midcol='white', maxcol='red'): 22 | ''' 23 | Create a custom diverging colormap with three colors 24 | 25 | Default is blue to white to red with 21 colors. 26 | 27 | Colors can be specified in any way understandable 28 | by matplotlib.colors.ColorConverter.to_rgb() 29 | ''' 30 | cmap = LinearSegmentedColormap.from_list( 31 | name=name, 32 | colors=[mincol, midcol, maxcol], 33 | N=numcolors 34 | ) 35 | return cmap 36 | 37 | def filter_grid(filters, labels=None, nfilters='all', shape=None, normalize=True, cmap=None, symmetric=True): 38 | ''' 39 | A tool for visualizing filters on a grid. 40 | 41 | Args: 42 | filters (iterable): each element should be an 43 | image with len(image.shape) == 2 44 | 45 | nfilters: (str or int): out of the total filters, 46 | how many to plot? If a str, must be 'all' 47 | 48 | shape (tuple): What shape of grid do we want? 49 | 50 | normalize (bool): do we normalize all filters to have 51 | magnitude 1? 52 | 53 | Returns: 54 | plt.figure 55 | ''' 56 | 57 | NUMERICAL_NOISE_THRESH = 1e-3 58 | 59 | if nfilters == 'all': 60 | side_length = int(np.round(np.sqrt(len(filters)))) 61 | else: 62 | side_length = int(np.round(np.sqrt(nfilters))) 63 | 64 | if cmap is None: 65 | cma = custom_div_cmap(50) 66 | else: 67 | cma = cmap 68 | fig = plt.figure(figsize=(15, 15), dpi=140) 69 | 70 | if shape is None: 71 | grid_layout = gridspec.GridSpec(side_length, side_length) 72 | nplots = side_length ** 2 73 | else: 74 | grid_layout = gridspec.GridSpec(shape[0], shape[1]) 75 | nplots = shape[0] * shape[1] 76 | # GmtoT1osfCpLCw6lzpnXh79y 77 | plt.title('plots') 78 | grid_layout.update(wspace=0.0, hspace=0.0) # set the spacing between axes. 79 | 80 | for i, filt in enumerate(filters): 81 | ax = plt.subplot(grid_layout[i]) 82 | if normalize: 83 | filt /= np.sum(filt ** 2) 84 | 85 | # -- trim off absurd values. 86 | # abs_max = np.percentile(np.abs(filt), 98) 87 | abs_max = np.max(np.abs(filt)) 88 | 89 | # -- trim out numerical zero noise 90 | filt[np.abs(filt) < NUMERICAL_NOISE_THRESH] = 0.0 91 | if symmetric: 92 | image = ax.imshow(filt, interpolation='nearest', 93 | cmap=cma, vmin=-abs_max, vmax=abs_max) 94 | else: 95 | image = plt.imshow(filt, interpolation='nearest', cmap=cma) 96 | if i % 10 == 0: 97 | logger.info('{} of {} completed.'.format(i, nplots)) 98 | plt.axis('off') 99 | if labels is not None: 100 | plt.title(labels[i]) 101 | plt.subplots_adjust(hspace = 0, wspace=0) 102 | 103 | return fig 104 | 105 | class FilterInspectionLayer(object): 106 | 107 | def __init__(self, L): 108 | super(FilterInspectionLayer, self).__init__() 109 | self.W, self.b = L.get_weights() 110 | if self.W.shape == 3: 111 | _, self.inputs, self.outputs = self.W.shape 112 | else: 113 | self.inputs, self.outputs = self.W.shape -------------------------------------------------------------------------------- /viz/performance.py: -------------------------------------------------------------------------------- 1 | ''' 2 | performance.py 3 | author: Luke de Oliveira (lukedeo@stanford.edu) 4 | 5 | 6 | Usage: 7 | 8 | >>> weights = np.ones(n_samples) 9 | >>> # -- going to match bkg to signal 10 | >>> weights[signal == True] = get_weights(sig_pt, bkg_pt) 11 | >>> discs = {} 12 | >>> add_curve(r'\tau_{32}', 'red', calculate_roc(signal, tau_32, weights=weights)) 13 | >>> fg = ROC_plotter(discs) 14 | >>> fg.savefig('myroc.pdf') 15 | 16 | ''' 17 | 18 | import numpy as np 19 | import matplotlib.pyplot as plt 20 | 21 | def get_weights(target, actual, bins = 10, cap = 10, match = True): 22 | ''' 23 | re-weights a actual distribution to a target. 24 | 25 | Args: 26 | target (array/list): observations drawn from target distribution 27 | actual (array/list): observations drawn from distribution to 28 | match to the target. 29 | 30 | bins (numeric or list/array of numerics): bins to use to do weighting 31 | 32 | cap (numeric): maximum weight value. 33 | 34 | match (bool): whether to make the sum of weights in actual equal to the 35 | number of samples in target 36 | 37 | Returns: 38 | numpy.array: returns array of shape len(actual). 39 | 40 | ''' 41 | target_counts, target_bins = np.histogram(target, bins=bins) 42 | counts, _ = np.histogram(actual, bins=target_bins) 43 | counts = (1.0 * counts) 44 | counts = np.array([max(a, 0.0001) for a in counts]) 45 | multiplier = target_counts / counts 46 | 47 | weights = np.array([min(multiplier[target_bins.searchsorted(point) - 1], cap) for point in actual]) 48 | 49 | if match: 50 | weights *= (len(target) / np.sum(weights)) 51 | 52 | return weights 53 | 54 | 55 | def calculate_roc(labels, discriminant, weights=None, bins = 2000): 56 | ''' 57 | makes a weighted ROC curve 58 | 59 | Args: 60 | labels (numpy.array): an array of 1/0 representing signal/background 61 | discriminant (numpy.array): an array that represents the discriminant 62 | weights: sample weights for each point. 63 | `assert(weights.shape == discriminant.shape) 64 | bins: binning to use -- can be an int or a list/array of bins. 65 | 66 | Returns: 67 | tuple: (signal_efficiency, background_rejection) where each are arrays 68 | 69 | ''' 70 | sig_ind = labels == 1 71 | bkg_ind = labels == 0 72 | if weights is None: 73 | bkg_total = np.sum(labels == 0) 74 | sig_total = np.sum(labels == 1) 75 | else: 76 | bkg_total = np.sum(weights[bkg_ind]) 77 | sig_total = np.sum(weights[sig_ind]) 78 | 79 | discriminant_bins = np.linspace(np.min(discriminant), np.max(discriminant), bins) 80 | 81 | if weights is None: 82 | sig, _ = np.histogram(discriminant[sig_ind], discriminant_bins) 83 | bkd, _ = np.histogram(discriminant[bkg_ind], discriminant_bins) 84 | else: 85 | sig, _ = np.histogram(discriminant[sig_ind], discriminant_bins, weights = weights[sig_ind]) 86 | bkd, _ = np.histogram(discriminant[bkg_ind], discriminant_bins, weights = weights[bkg_ind]) 87 | 88 | sig_eff = np.add.accumulate(sig[::-1]) / float(sig_total) 89 | bkg_rej = 1 / (np.add.accumulate(bkd[::-1]) / float(bkg_total)) 90 | 91 | return sig_eff, bkg_rej 92 | 93 | 94 | 95 | 96 | 97 | def ROC_plotter(curves, min_eff = 0, max_eff = 1, linewidth = 1.4, 98 | pp = False, signal = "$Z\rightarrow t\bar{t}$", background = "QCD", 99 | title = "Jet Image Tagging Comparison", logscale = True): 100 | 101 | fig = plt.figure(figsize=(11.69, 8.27), dpi=100) 102 | ax = fig.add_subplot(111) 103 | plt.xlim(min_eff,max_eff) 104 | plt.grid(b = True, which = 'minor') 105 | plt.grid(b = True, which = 'major') 106 | max_ = 0 107 | for tagger, data in curves.iteritems(): 108 | sel = (data['efficiency'] >= min_eff) & (data['efficiency'] <= max_eff) 109 | if np.max(data['rejection'][sel]) > max_: 110 | max_ = np.max(data['rejection'][sel]) 111 | plt.plot(data['efficiency'][sel], data['rejection'][sel], '-', label = r''+tagger, color = data['color'], linewidth=linewidth) 112 | 113 | ax = plt.subplot(1,1,1) 114 | for item in ([ax.title, ax.xaxis.label, ax.yaxis.label] + ax.get_xticklabels() + ax.get_yticklabels()): 115 | item.set_fontsize(20) 116 | if logscale == True: 117 | plt.ylim(1,10 ** 3) 118 | ax.set_yscale('log') 119 | ax.set_xlabel(r'$\epsilon_{\mathrm{signal}}$') 120 | ax.set_ylabel(r"$1 / \epsilon_{\mathrm{bkg}}$") 121 | 122 | plt.legend() 123 | plt.title(r''+title) 124 | if pp: 125 | pp.savefig(fig) 126 | else: 127 | plt.show() 128 | return fig 129 | 130 | 131 | def add_curve(name, color, curve_pair, dictref): 132 | dictref.update( 133 | { 134 | name : { 135 | 'efficiency' : curve_pair[0], 136 | 'rejection' : curve_pair[1], 137 | 'color' : color 138 | } 139 | } 140 | ) 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | -------------------------------------------------------------------------------- /pretraining-experiment.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from keras.layers import containers 4 | from keras.models import Sequential 5 | from keras.layers.convolutions import Convolution2D, MaxPooling2D 6 | from keras.layers.core import Dense, Dropout, AutoEncoder, MaxoutDense, Activation 7 | from keras.layers.embeddings import Embedding 8 | from keras.layers.noise import GaussianNoise 9 | from keras.optimizers import SGD, RMSprop, Adagrad, Adam 10 | from keras import regularizers 11 | 12 | from dl import pretrain_deep_ae, unroll_deep_ae 13 | from utils import WeightedDataset 14 | 15 | 16 | # -- Experiment mode 17 | EXPERIMENT_MODE = False 18 | 19 | if EXPERIMENT_MODE: 20 | for f in ['./viz/visualize.py', './dl/deepae.py', './utils/sampling.py', './utils/sampling.py']: 21 | %run f 22 | 23 | # -- swap with your own. 24 | data = np.load('data-wprime-qcd.npy') 25 | data = data[data['jet_pt'] > 150] 26 | 27 | # -- load and process daa 28 | X_ = np.array([x.ravel() for x in data['image']]).astype('float32') 29 | y_ = data['signal'].astype('float32') 30 | 31 | df = WeightedDataset(X_, y_) 32 | 33 | buf = df.sample(300000) 34 | 35 | n_train = 260000 36 | 37 | X, y = buf[0][:n_train], buf[1][:n_train] 38 | X_val, y_val = buf[0][n_train:], buf[1][n_train:] 39 | 40 | tau21 = data['tau_21'][df._ix_buf] 41 | mass = data['jet_mass'][df._ix_buf] 42 | pt = data['jet_pt'][df._ix_buf] 43 | 44 | train_sample = df._ix_buf[:n_train] 45 | test_sample = df._ix_buf[n_train:] 46 | 47 | # -- build pretrained nets. 48 | 49 | PRETRAINING = False 50 | 51 | if PRETRAINING: 52 | params = { 53 | 'structure' : [625, 256, 64, 28], 54 | 'activations' : 2 * [('sigmoid', 'sigmoid')] + 1 * [('sigmoid', 'sigmoid')], 55 | 'noise' : 4 * [Dropout(0.6)], 56 | 'optimizer' : Adam(), 57 | 'loss' : 2 * ['binary_crossentropy'] + 1 * ['binary_crossentropy'] 58 | } 59 | 60 | ae, config = pretrain_deep_ae(params, X, nb_epoch=30, batch_size=512) 61 | 62 | model = unroll_deep_ae(ae, config) 63 | 64 | model.compile(loss='binary_crossentropy', optimizer=Adam()) 65 | 66 | 67 | 68 | 69 | im = Sequential() 70 | im.add(Embedding(1, 625, W_constraint=NonNeg())) 71 | im.add(containers.Sequential(model.layers[0].encoder.layers[:-1])) 72 | 73 | w = model.layers[0].encoder.layers[-1].get_weights() 74 | 75 | im.add(Dense(64, 1, weights=[w[0][:, 0].reshape(64, 1), np.array(w[1][0]).reshape((1, ))])) 76 | 77 | im.add(Activation('sigmoid')) 78 | 79 | im.compile(loss='mse', optimizer=Adam()) 80 | 81 | 82 | weights = model.layers[0].encoder.layers[0].get_weights() 83 | 84 | 85 | 86 | # model.fit(X, X, batch_size=512) 87 | 88 | 89 | clf = Sequential() 90 | 91 | clf.add(model.layers[0].encoder) 92 | clf.add(Dropout(0.1)) 93 | clf.add(Dense(64, 1)) 94 | clf.add(Activation('sigmoid')) 95 | clf.compile(loss='binary_crossentropy', optimizer=Adam(), class_mode='binary') 96 | 97 | clf.fit(X, y, validation_data = (X_val, y_val), batch_size=100, nb_epoch=10, show_accuracy=True) 98 | 99 | if not PRETRAINING: 100 | # -- test a big maxout net 101 | mo = Sequential() 102 | mo.add(MaxoutDense(625, 200, 5)) 103 | mo.add(Dropout(0.3)) 104 | mo.add(Dense(200, 64)) 105 | mo.add(Activation('relu')) 106 | mo.add(Dropout(0.3)) 107 | mo.add(Dense(64, 10)) 108 | mo.add(Activation('relu')) 109 | mo.add(Dropout(0.3)) 110 | mo.add(Dense(10, 1)) 111 | mo.add(Activation('sigmoid')) 112 | 113 | mo.compile(loss='binary_crossentropy', optimizer=Adam(), class_mode='binary') 114 | mo.fit(X, y, validation_data = (X_val, y_val), batch_size=100, nb_epoch=10, show_accuracy=True) 115 | 116 | # -- performance 117 | 118 | 119 | tau21 = data['tau_21']#[df._ix_buf] 120 | mass = data['jet_mass']#[df._ix_buf] 121 | pt = data['jet_pt']#[df._ix_buf] 122 | signal = data['signal'] == True 123 | background = data['signal'] == False 124 | 125 | weights = np.ones(data.shape[0]) 126 | # -- going to match bkg to signal 127 | weights[signal] = get_weights(pt[signal], pt[background]) 128 | 129 | discs = {} 130 | add_curve(r'\tau_{32}', 'red', calculate_roc(signal, tau21)) 131 | fg = ROC_plotter(discs) 132 | fg.savefig('myroc.pdf') 133 | 134 | # -- nice viz 135 | 136 | hidden_act = Sequential() 137 | 138 | hidden_act.add( 139 | containers.Sequential(mo.layers[:-2]) 140 | ) 141 | hidden_act.compile('adam', 'mse') 142 | 143 | 144 | R = hidden_act.predict(X_, verbose=True) 145 | 146 | means = [data['jet_mass'][np.argsort(R[:, i])[::-1][:100]].mean() for i in xrange(20)] 147 | 148 | filter_grid(np.array([data['image'][np.argsort(R[:, i])[::-1][:100]].mean(axis=0) for i in xrange(20)])[np.argsort(means)], shape=(4, 5), cmap=custom_div_cmap(mincol='white', midcol='yellow', maxcol='red'), symmetric=False) 149 | 150 | 151 | top_n = 10 152 | 153 | title_map = { 154 | 'jet_pt' : r'Jet $\overline{p_T} = %.2f \mathrm{GeV}$', 155 | 'jet_mass' : r'Jet $\overline{ m } = %.2f \mathrm{GeV}$', 156 | 'tau_21' : r'Jet $\overline{\tau_{21}} = %.2f$' 157 | } 158 | 159 | n_hidden = 20 160 | 161 | top_n_images = np.array( 162 | [ 163 | data['image'][ 164 | np.argsort(R[:, i])[::-1][:top_n] 165 | ].mean(axis=0) 166 | for i in xrange(n_hidden) 167 | ] 168 | ) 169 | 170 | for feature_name, stylized in title_map.iteritems(): 171 | feature = [data[feature_name][ 172 | np.argsort(R[:, i])[::-1][:top_n] 173 | ].mean() 174 | for i in xrange(n_hidden) 175 | ] 176 | plot = filter_grid(filters = top_n_images[np.argsort(feature)], 177 | labels = [r''+ stylized % v for v in np.sort(feature)], 178 | shape=(4, 5), 179 | cmap=cm.hot, 180 | symmetric=False) 181 | plot.savefig(feature_name + 'node_acts.pdf') 182 | 183 | 184 | 185 | # filter_grid(np.array([data['image'][np.argsort(R[:, i])[::-1][:top_n]].mean(axis=0) for i in xrange(20)])[np.argsort(means)], labels=[r'Jet $\bar{p_T}=%.2f\mathrm{GeV}$' % m for m in np.sort(means)], ) 186 | 187 | 188 | 189 | 190 | 191 | 192 | -------------------------------------------------------------------------------- /dl/deepae.py: -------------------------------------------------------------------------------- 1 | import random 2 | import os 3 | 4 | import numpy as np 5 | 6 | from keras.layers import containers 7 | from keras.models import Sequential 8 | from keras.layers.core import Dense, Dropout, AutoEncoder, MaxoutDense, ActivityRegularization 9 | from keras.layers.noise import GaussianNoise 10 | from keras.optimizers import SGD, RMSprop, Adagrad, Adam 11 | from keras import regularizers 12 | 13 | 14 | import logging 15 | 16 | logging.basicConfig(level=logging.INFO) 17 | logger = logging.getLogger(__name__) 18 | 19 | # params = { 20 | # 'structure' : [625, 512, 128, 64], 21 | # 'activations' : 3 * [('sigmoid', 'relu')], 22 | # 'noise' : [GaussianNoise(0.01), None, None], 23 | # 'optimizer' : Adam(), 24 | # 'loss' : ['mse', 'mse', 'mse'] 25 | # } 26 | 27 | def pretrain_deep_ae(params, X, tie_weights=True, batch_size=100, nb_epoch=5, validation_data=None): 28 | ''' 29 | A function for building and greedily pretraining (interactively) 30 | a deep autoencoder. 31 | 32 | Args: 33 | params (dict): A dictionary with the following fields: 34 | * `structure`: a list of ints that describe the 35 | structure of the net, i.e., [10, 13, 2] 36 | * `activations`: a list of tuples of strings of 37 | length len(structure - 1) that describe the 38 | encoding and decoding activation function. 39 | For example, [('sigmoid', 'relu'), ('sigmoid', 'relu')] 40 | * `noise` (optional): a list of keras layers or None that describe 41 | the noise you want to add. i.e., [GaussianNoise(0.01), None] 42 | * `optimizer` (optional): one of the keras optimizers 43 | * `loss` (optional): a list of the loss functions to use. 44 | 45 | X (numpy.ndarray): the data to perform the unsupervised pretraining on. 46 | 47 | tie_weights (bool): tied or untied autoencoders. 48 | 49 | batch_size and nb_epoch: should be self explanatory... 50 | 51 | Usage: 52 | 53 | >>> params = { 54 | ....'structure' : [625, 512, 128, 64], 55 | ....'activations' : 3 * [('sigmoid', 'relu')], 56 | ....'noise' : [GaussianNoise(0.01), None, None], 57 | ....'optimizer' : Adam(), 58 | ....'loss' : ['mse', 'mse', 'mse'] 59 | ....} 60 | >>> ae, p = pretrain_deep_ae(params, X) 61 | 62 | Returns: 63 | a tuple (list, params), where list is a list of keras.Sequential(). 64 | ''' 65 | # -- check for logic errors. 66 | if type(params) is not dict: 67 | raise TypeError('params must be of class `dict`.') 68 | 69 | for k in ['structure', 'activations']: 70 | if k not in params.keys(): 71 | raise KeyError('key: `{}` must be in params dict'.format(k)) 72 | 73 | if len(params['structure']) != (len(params['activations']) + 1): 74 | raise ValueError( 75 | 'length of activations must be one less than length of structure.' 76 | ) 77 | 78 | if 'noise' not in params.keys(): 79 | logger.info('noise specifications not specified -- default to None') 80 | params['noise'] = len(params['activations']) * [None] 81 | 82 | 83 | if 'optimizer' not in params.keys(): 84 | logger.info('optimization specifications not specified -- using Adam()') 85 | params['optimizer'] = Adam() 86 | 87 | if 'loss' not in params.keys(): 88 | logger.info('loss specifications not specified -- using MSE') 89 | params['optimizer'] = len(params['activations']) * ['mse'] 90 | 91 | structure = params['structure'] 92 | autoencoder = [] 93 | 94 | # -- loop through the parameters 95 | for (inputs, hidden), (enc_act, dec_act), noise, loss in zip( 96 | zip( 97 | structure, # -- number of inputs 98 | structure[1:] # -- number of outputs 99 | ), 100 | params['activations'], 101 | params['noise'], 102 | params['loss'] 103 | ): 104 | 105 | logger.info('Building {} x {} structure.'.format(inputs, hidden)) 106 | autoencoder.append(Sequential()) 107 | if noise is not None: 108 | # -- noise should be a keras layer, so it can be in a Sequential() 109 | logger.info('using noise of type {}'.format(type(noise))) 110 | encoder = containers.Sequential( 111 | [ 112 | noise, 113 | Dense(inputs, hidden, activation=enc_act), 114 | ActivityRegularization(l1=0.001) 115 | ] 116 | ) 117 | else: 118 | # -- just a regular (non-denoising) ae. 119 | encoder = containers.Sequential( 120 | [ 121 | Dense(inputs, hidden, activation=enc_act), 122 | ActivityRegularization(l1=0.001) 123 | ] 124 | ) 125 | # encoder = Dense(inputs, hidden, activation=enc_act) 126 | 127 | # -- each element of the list is a Sequential(), so we add. 128 | autoencoder[-1].add( 129 | AutoEncoder( 130 | encoder=encoder, 131 | decoder=Dense(hidden, inputs, activation=dec_act), 132 | output_reconstruction=False, 133 | tie_weights=tie_weights 134 | ) 135 | ) 136 | logger.info('Compiling...') 137 | # -- each layer has it's own loss, but there is a global optimizer. 138 | logger.info('Loss: {}, Optimizer: {}'.format(loss, type(params['optimizer']))) 139 | autoencoder[-1].compile(loss=loss, optimizer=params['optimizer']) 140 | logger.info('Training...') 141 | 142 | # -- we allow people to end the training of each unit early. 143 | try: 144 | autoencoder[-1].fit(X, X, batch_size=batch_size, nb_epoch=nb_epoch, validation_data=validation_data) 145 | except KeyboardInterrupt: 146 | logger.info('Training ended early...') 147 | 148 | # -- embed in the new code space. 149 | X = autoencoder[-1].predict(X) 150 | 151 | return autoencoder, params 152 | 153 | def unroll_deep_ae(autoencoder, params, tie_weights=True): 154 | ''' 155 | Takes an autoencoder list generated by `pretrain_deep_ae` and 156 | unrolls it to make a deep autoencoder. NOTE this doesn't 157 | compile anything! This is simply a wrapper around the 158 | unrolling process to make it easier. 159 | 160 | Args: 161 | autoencoder (list): a list of keras layers. 162 | params (dict): the param dict returned by `pretrain_deep_ae` 163 | tie_weights (bool): whether or not to make the weights tied. 164 | 165 | Usage: 166 | 167 | >>> params = { 168 | ....'structure' : [625, 512, 128, 64], 169 | ....'activations' : 3 * [('sigmoid', 'relu')], 170 | ....'noise' : [GaussianNoise(0.01), None, None], 171 | ....'optimizer' : Adam(), 172 | ....'loss' : ['mse', 'mse', 'mse'] 173 | ....} 174 | >>> model = unroll_deep_ae(*pretrain_deep_ae(params, X)) 175 | 176 | Returns: 177 | keras.Sequential: a keras sequential model with one layer 178 | which is the unrolled autoencoder. 179 | ''' 180 | encoder = [] 181 | decoder = [] 182 | 183 | structure = params['structure'] 184 | 185 | for (layer_nb, (inputs, hidden)), (enc_act, dec_act) in zip( 186 | enumerate( 187 | zip( 188 | structure, 189 | structure[1:] 190 | ) 191 | ), 192 | params['activations'] 193 | ): 194 | 195 | logger.info('Unpacking structure from level {}.'.format(layer_nb)) 196 | encoder.append(Dense(inputs, hidden, activation=enc_act)) 197 | encoder[-1].set_weights(autoencoder[layer_nb].get_weights()[:2]) 198 | decoder.insert(0, Dense(hidden, inputs, activation=dec_act)) 199 | decoder[0].set_weights(autoencoder[layer_nb].get_weights()[2:]) 200 | 201 | encoder_sequence = containers.Sequential(encoder) 202 | decoder_sequence = containers.Sequential(decoder) 203 | 204 | stacked_autoencoder = Sequential() 205 | 206 | stacked_autoencoder.add(AutoEncoder(encoder=encoder_sequence, 207 | decoder=decoder_sequence, 208 | output_reconstruction=False, 209 | tie_weights=tie_weights)) 210 | return stacked_autoencoder 211 | 212 | 213 | 214 | 215 | 216 | 217 | --------------------------------------------------------------------------------