├── .gitignore
├── .hgignore
├── .jenkins
└── jenkins_buildbot_dlt.sh
├── .travis.yml
├── LICENSE.txt
├── README.rst
├── code
├── DBN.py
├── SdA.py
├── cA.py
├── cnn_1D_segm
│ ├── data_loader
│ │ ├── __init__.py
│ │ ├── cortical_layers.py
│ │ └── parallel_loader_1D.py
│ ├── fcn1D.py
│ └── train_fcn1D.py
├── conlleval.pl
├── convolutional_mlp.py
├── dA.py
├── fcn_2D_segm
│ ├── __init__.py
│ ├── fcn8.py
│ └── train_fcn8.py
├── guidelines_segm_tutos_with_conda.sh
├── hmc
│ ├── __init__.py
│ ├── hmc.py
│ └── test_hmc.py
├── imdb.py
├── imdb_preprocess.py
├── logistic_cg.py
├── logistic_sgd.py
├── lstm.py
├── mlp.py
├── rbm.py
├── rnnrbm.py
├── rnnslu.py
├── test.py
├── unet
│ ├── Unet_lasagne_recipes.py
│ └── train_unet.py
└── utils.py
├── data
├── download.sh
└── training_colorpatches_16x16_demo.mat
├── doc
├── .templates
│ └── layout.html
├── DBN.txt
├── LICENSE.txt
├── Makefile
├── SdA.txt
├── cnn_1D_segm.txt
├── conf.py
├── contents.txt
├── dA.txt
├── fcn_2D_segm.txt
├── gettingstarted.txt
├── hmc.txt
├── images
│ ├── 3wolfmoon.jpg
│ ├── 3wolfmoon_output.png
│ ├── DBN3.png
│ ├── big_brain.png
│ ├── big_brain_section.png
│ ├── bm.png
│ ├── cat_segmentation.png
│ ├── cnn_explained.png
│ ├── conv_1D_nn.png
│ ├── cortical_layers_net.png
│ ├── cortical_ray_result.png
│ ├── cortical_valid1.png
│ ├── cortical_valid2.png
│ ├── cortical_valid3_v1.png
│ ├── cortical_valid4.png
│ ├── fcn.png
│ ├── fcn32_16_8.png
│ ├── fcn_schema.png
│ ├── filters_at_epoch_14.png
│ ├── filters_corruption_0.png
│ ├── filters_corruption_30.png
│ ├── jaccard.png
│ ├── labels.png
│ ├── lstm.png
│ ├── lstm_memorycell.png
│ ├── markov_chain.png
│ ├── mlp.png
│ ├── mnist_0.png
│ ├── mnist_1.png
│ ├── mnist_2.png
│ ├── mnist_3.png
│ ├── mnist_4.png
│ ├── mnist_5.png
│ ├── mylenet.png
│ ├── polyps_results.png
│ ├── raw_smooth.png
│ ├── ray.png
│ ├── rbm.png
│ ├── rnnrbm.png
│ ├── rnnrbm.svg
│ ├── sample1.png
│ ├── sample2.png
│ ├── samples.png
│ ├── sparse_1D_nn.png
│ └── unet.jpg
├── index.txt
├── lenet.txt
├── logreg.txt
├── lstm.txt
├── mlp.txt
├── rbm.txt
├── references.txt
├── rnnrbm.txt
├── rnnslu.txt
├── scripts
│ └── docgen.py
├── unet.txt
└── utilities.txt
├── issues_closed
└── 2_RBM_cost_fn.txt
├── issues_open
├── 1_SdA_performance.txt
├── 3_RBM_scan_GPU.txt
├── 4_RBM_scan.txt
├── 5_results.txt
└── 6_benchmarking_pybrain.txt
└── misc
└── do_nightly_build
/.gitignore:
--------------------------------------------------------------------------------
1 | .idea
2 | code/*.pyc
3 | code/*_plots
4 | code/tmp*
5 | code/midi
6 | code/rnnslu
7 | data/atis.*
8 | data/mnist.pkl.gz
9 | data/mnist_py3k.pkl.gz
10 | data/Nottingham.zip
11 | data/Nottingham
12 | data/midi.zip
13 | html
14 | *.pyc
15 | *~
16 | *.swp
17 | # This directory may be created by scripts from segmentation tutorials.
18 | save_models
19 |
--------------------------------------------------------------------------------
/.hgignore:
--------------------------------------------------------------------------------
1 | syntax: glob
2 | *.pyc
3 | *.png
4 | *~
5 |
--------------------------------------------------------------------------------
/.jenkins/jenkins_buildbot_dlt.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # CUDA
4 | export PATH=/usr/local/cuda/bin:$PATH
5 | export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH
6 | export LIBRARY_PATH=/usr/local/cuda/lib64:$LIBRARY_PATH
7 |
8 | # MKL
9 | export MKL_THREADING_LAYER=GNU
10 |
11 | # Set OpenMP threads for stability of speedtests
12 | export OMP_NUM_THREADS=1
13 |
14 | BUILDBOT_DIR=$WORKSPACE/nightly_build
15 |
16 | mkdir -p ${BUILDBOT_DIR}
17 |
18 | date
19 | COMPILEDIR=$HOME/.theano/lisa_theano_buildbot_deeplearning
20 | NOSETESTS=${BUILDBOT_DIR}/Theano/bin/theano-nose
21 | XUNIT="--with-xunit --xunit-file="
22 | # name test suites
23 | SUITE="--xunit-testsuite-name="
24 |
25 | FLAGS=warn.ignore_bug_before=0.5,compiledir=${COMPILEDIR}
26 | export PYTHONPATH=${BUILDBOT_DIR}/Theano:${BUILDBOT_DIR}/Pylearn:$PYTHONPATH
27 |
28 | # Install libgpuarray and pygpu
29 | cd ${BUILDBOT_DIR}
30 |
31 | # Make fresh clone (with no history since we don't need it)
32 | rm -rf libgpuarray
33 | git clone "https://github.com/Theano/libgpuarray.git"
34 |
35 | (cd libgpuarray && echo "libgpuarray commit" && git rev-parse HEAD)
36 |
37 | # Clean up previous installs (to make sure no old files are left)
38 | rm -rf local
39 | mkdir local
40 |
41 | # Build libgpuarray and run C tests
42 | mkdir libgpuarray/build
43 | (cd libgpuarray/build && cmake .. -DCMAKE_BUILD_TYPE=${GPUARRAY_CONFIG} -DCMAKE_INSTALL_PREFIX=${BUILDBOT_DIR}/local && make)
44 |
45 | # Finally install
46 | (cd libgpuarray/build && make install)
47 | export LD_LIBRARY_PATH=${BUILDBOT_DIR}/local/lib:${LD_LIBRARY_PATH}
48 | export LIBRARY_PATH=${BUILDBOT_DIR}/local/lib:${LIBRARY_PATH}
49 | export CPATH=${BUILDBOT_DIR}/local/include:${CPATH}
50 |
51 | # Build the pygpu modules
52 | (cd libgpuarray && python setup.py build_ext --inplace -I${BUILDBOT_DIR}/local/include -L${BUILDBOT_DIR}/local/lib)
53 |
54 | mkdir ${BUILDBOT_DIR}/local/lib/python
55 | export PYTHONPATH=${PYTHONPATH}:${BUILDBOT_DIR}/local/lib/python
56 | # Then install
57 | (cd libgpuarray && python setup.py install --home=${BUILDBOT_DIR}/local)
58 |
59 | # Install Theano
60 | cd ${BUILDBOT_DIR}
61 | if [ ! -d ${BUILDBOT_DIR}/Theano ]; then
62 | git clone git://github.com/Theano/Theano.git
63 | fi
64 | # update repo
65 | cd ${BUILDBOT_DIR}/Theano; git pull
66 |
67 | cd ${WORKSPACE}/data
68 | ./download.sh
69 |
70 | cd ${BUILDBOT_DIR}/Theano
71 | echo "git version for Theano:" `git rev-parse HEAD`
72 | cd ${WORKSPACE}/code
73 | echo "git version:" `git rev-parse HEAD`
74 |
75 | echo "==== Executing nosetests speed with mode=FAST_RUN"
76 | NAME=dlt_speed
77 | FILE=${BUILDBOT_DIR}/${NAME}_tests.xml
78 | THEANO_FLAGS=${FLAGS},mode=FAST_RUN ${NOSETESTS} ${XUNIT}${FILE} ${SUITE}${NAME} test.py:speed
79 |
80 | echo "==== Executing nosetests with mode=FAST_RUN,floatX=float32"
81 | NAME=dlt_float32
82 | FILE=${BUILDBOT_DIR}/${NAME}_tests.xml
83 | THEANO_FLAGS=${FLAGS},mode=FAST_RUN,floatX=float32 ${NOSETESTS} ${XUNIT}${FILE} ${SUITE}${NAME}
84 |
85 | echo "==== Executing nosetests with mode=FAST_RUN,floatX=float32,device=cuda"
86 | NAME=dlt_float32_cuda
87 | FILE=${BUILDBOT_DIR}/${NAME}_tests.xml
88 | PYTHONPATH=${BUILDBOT_DIR}/Theano:${BUILDBOT_DIR}/DeepLearningTutorials/code:${PYTHONPATH} THEANO_FLAGS=${FLAGS},mode=FAST_RUN,floatX=float32,device=cuda nosetests test.py ${XUNIT}${FILE} ${SUITE}${NAME}
89 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | # After changing this file, check it on:
2 | # http://lint.travis-ci.org/
3 | sudo: false
4 |
5 | language: python
6 | #python:
7 | # - "2.6"
8 | # - "3.3"
9 | # command to install dependencies
10 | before_install:
11 | - wget http://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh -O miniconda.sh
12 | - chmod +x miniconda.sh
13 | - ./miniconda.sh -b
14 | - export PATH=/home/travis/miniconda/bin:/home/travis/miniconda2/bin:$PATH
15 | - conda update --yes conda
16 |
17 | install:
18 | - conda create --yes -q -n pyenv mkl python=2.7 numpy=1.10 scipy=0.16.1 pip nose yaml pyflakes pillow pyparsing=1.5
19 | - source activate pyenv
20 | - pip install git+git://github.com/Theano/Theano.git
21 |
22 | env:
23 | - PART="test.py:test_logistic_sgd test.py:test_logistic_cg test.py:test_mlp test.py:test_convolutional_mlp test.py:test_dA"
24 | - PART="test.py:test_SdA test.py:test_lstm"
25 | - PART="test.py:test_dbn"
26 | - PART="test.py:test_rbm test.py:test_rnnrbm test.py:test_rnnslu"
27 | - PART="-e test.py"
28 |
29 | #i7-2600K CPU @ 3.40GHz
30 | #166.572s #8 test.test_rbm OK
31 | #155.114s #7 test.test_dbn OK
32 | #152.365s #9 test.test_rnnrbm OK
33 | #127.286s #6 test.test_SdA OK
34 | #39.252s #5 test.test_dA OK
35 | #27.56s #4 test.test_convolutional_mlp OK
36 | #15.454s #3 test.test_mlp OK
37 | #12.732s #1 test.test_logistic_sgd OK
38 | #12.638s #2 test.test_logistic_cg OK
39 |
40 | #i7-920
41 | #296.475s #7 code.test.test_dbn OK
42 | #257.272s #6 code.test.test_SdA OK
43 | #234.776s #9 code.test.test_rnnrbm OK
44 | #233.896s #8 code.test.test_rbm OK
45 | #65.737s #5 code.test.test_dA OK
46 | #37.658s #4 code.test.test_convolutional_mlp OK
47 | #24.172s #3 code.test.test_mlp OK
48 | #20.401s #1 code.test.test_logistic_sgd OK
49 | #17.546s #2 code.test.test_logistic_cg OK
50 |
51 | # On Core2 duo E8500 with MRG
52 | #308.004s #7 code.test.test_dbn OK
53 | #277.268s #6 code.test.test_SdA OK
54 | #126.102s #8 code.test.test_rbm OK
55 | #123.652s #9 code.test.test_rnnrbm OK
56 | #77.101s #5 code.test.test_dA OK
57 | #39.75s #4 code.test.test_convolutional_mlp OK
58 | #30.406s #3 code.test.test_mlp OK
59 | #21.132s #2 code.test.test_logistic_cg OK
60 | #17.945s #1 code.test.test_logistic_sgd OK
61 |
62 | # Unknown computer with older version of Theano
63 | #569.882s #9 code.test.test_rbm OK
64 | #298.992s #8 code.test.test_dbn OK
65 | #268.901s #7 code.test.test_SdA OK
66 | #67.292s #6 code.test.test_dA OK
67 | #27.485s #4 code.test.test_mlp OK
68 | #26.204s #5 code.test.test_convolutional_mlp OK
69 | #14.676s #3 code.test.test_logistic_cg OK
70 | #10.66s #2 code.test.test_logistic_sgd OK
71 | #5.795s #1 code.hmc.test_hmc.test_hmc OK
72 |
73 | script:
74 | - cd data
75 | - ./download.sh
76 | - ls
77 | - cd ../code
78 | - pwd
79 | - ls
80 | - export THEANO_FLAGS=warn.ignore_bug_before=all,on_opt_error=raise,on_shape_error=raise
81 | - export MKL_THREADING_LAYER=GNU
82 | - python --version
83 | - nosetests -v $PART
84 |
85 |
--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
1 | .. _license:
2 |
3 | LICENSE
4 | =======
5 |
6 | Copyright (c) 2010--2015, Deep Learning Tutorials Development Team
7 | All rights reserved.
8 |
9 | Redistribution and use in source and binary forms, with or without
10 | modification, are permitted provided that the following conditions are met:
11 |
12 | * Redistributions of source code must retain the above copyright
13 | notice, this list of conditions and the following disclaimer.
14 | * Redistributions in binary form must reproduce the above copyright
15 | notice, this list of conditions and the following disclaimer in the
16 | documentation and/or other materials provided with the distribution.
17 | * Neither the name of Theano nor the names of its contributors may be
18 | used to endorse or promote products derived from this software without
19 | specific prior written permission.
20 |
21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
22 | EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
23 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
25 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
26 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
28 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
30 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 |
--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
1 | Deep Learning Tutorials
2 | =======================
3 |
4 | Deep Learning is a new area of Machine Learning research, which has been
5 | introduced with the objective of moving Machine Learning closer to one of its
6 | original goals: Artificial Intelligence. Deep Learning is about learning
7 | multiple levels of representation and abstraction that help to make sense of
8 | data such as images, sound, and text. The tutorials presented here will
9 | introduce you to some of the most important deep learning algorithms and will
10 | also show you how to run them using Theano. Theano is a python library that
11 | makes writing deep learning models easy, and gives the option of training them
12 | on a GPU.
13 |
14 | The easiest way to follow the tutorials is to `browse them online
15 | `_.
16 |
17 | `Main development `_
18 | of this project.
19 |
20 | .. image:: https://secure.travis-ci.org/lisa-lab/DeepLearningTutorials.png
21 | :target: http://travis-ci.org/lisa-lab/DeepLearningTutorials
22 |
23 | Project Layout
24 | --------------
25 |
26 | Subdirectories:
27 |
28 | - code - Python files corresponding to each tutorial
29 | - data - data and scripts to download data that is used by the tutorials
30 | - doc - restructured text used by Sphinx to build the tutorial website
31 | - html - built automatically by doc/Makefile, contains tutorial website
32 | - issues_closed - issue tracking
33 | - issues_open - issue tracking
34 | - misc - administrative scripts
35 |
36 |
37 | Build instructions
38 | ------------------
39 |
40 | To build the html version of the tutorials, run python doc/scripts/docgen.py
41 |
--------------------------------------------------------------------------------
/code/cnn_1D_segm/data_loader/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lisa-lab/DeepLearningTutorials/11c465105026cf87573937fc2d35ab7543678698/code/cnn_1D_segm/data_loader/__init__.py
--------------------------------------------------------------------------------
/code/cnn_1D_segm/data_loader/cortical_layers.py:
--------------------------------------------------------------------------------
1 | import os
2 | import time
3 |
4 | import numpy as np
5 | from PIL import Image
6 | import re
7 | import warnings
8 |
9 | from dataset_loaders.parallel_loader import ThreadedDataset
10 | from parallel_loader_1D import ThreadedDataset_1D
11 |
12 | floatX = 'float32'
13 |
14 | class Cortical6LayersDataset(ThreadedDataset_1D):
15 | '''The Cortical Layers Dataset.
16 | Parameters
17 | ----------
18 | which_set: string
19 | A string in ['train', 'val', 'valid', 'test'], corresponding to
20 | the set to be returned.
21 | split: float
22 | A float indicating the dataset split between training and validation.
23 | For example, if split=0.85, 85\% of the images will be used for training,
24 | whereas 15\% will be used for validation.
25 | '''
26 | name = 'cortical_layers'
27 |
28 | non_void_nclasses = 7
29 | GTclasses = [0, 1, 2, 3, 4, 5, 6]
30 | _cmap = {
31 | 0: (128, 128, 128), # padding
32 | 1: (128, 0, 0), # layer 1
33 | 2: (128, 64, ), # layer 2
34 | 3: (128, 64, 128), # layer 3
35 | 4: (0, 0, 128), # layer 4
36 | 5: (0, 0, 64), # layer 5
37 | 6: (64, 64, 128), # layer 6
38 | }
39 | _mask_labels = {0: 'padding', 1: 'layers1', 2: 'layer2', 3: 'layer3',
40 | 4: 'layer4', 5: 'layer5', 6: 'layer6'}
41 | _void_labels = []
42 |
43 |
44 | _filenames = None
45 |
46 | @property
47 | def filenames(self):
48 |
49 | if self._filenames is None:
50 | # Load filenames
51 | nfiles = sum(1 for line in open(self.mask_path))
52 | filenames = range(nfiles)
53 | np.random.seed(1609)
54 | np.random.shuffle(filenames)
55 |
56 | if self.which_set == 'train':
57 | filenames = filenames[:int(nfiles*self.split)]
58 | elif self.which_set == 'val':
59 | filenames = filenames[-(nfiles - int(nfiles*self.split)):]
60 |
61 | # Save the filenames list
62 | self._filenames = filenames
63 |
64 | return self._filenames
65 |
66 | def __init__(self,
67 | which_set="train",
68 | split=0.85,
69 | shuffle_at_each_epoch = True,
70 | smooth_or_raw = 'both',
71 | *args, **kwargs):
72 |
73 | self.task = 'segmentation'
74 |
75 | self.n_layers = 6
76 | n_layers_path = str(self.n_layers)+"layers_segmentation"
77 |
78 | self.which_set = "val" if which_set == "valid" else which_set
79 | if self.which_set not in ("train", "val", 'test'):
80 | raise ValueError("Unknown argument to which_set %s" %
81 | self.which_set)
82 |
83 | self.split = split
84 |
85 | self.image_path_raw = os.path.join(self.path,n_layers_path,"training_raw.txt")
86 | self.image_path_smooth = os.path.join(self.path,n_layers_path, "training_geo.txt")
87 | self.mask_path = os.path.join(self.path,n_layers_path, "training_cls.txt")
88 | self.regions_path = os.path.join(self.path, n_layers_path, "training_regions.txt")
89 |
90 | self.smooth_raw_both = smooth_or_raw
91 |
92 | if smooth_or_raw == 'both':
93 | self.data_shape = (200,2)
94 | else :
95 | self.data_shape = (200,1)
96 |
97 | super(Cortical6LayersDataset, self).__init__(*args, **kwargs)
98 |
99 | def get_names(self):
100 | """Return a dict of names, per prefix/subset."""
101 |
102 | return {'default': self.filenames}
103 |
104 |
105 |
106 | def test_6layers():
107 | train_iter = Cortical6LayersDataset(
108 | which_set='train',
109 | smooth_or_raw = 'both',
110 | batch_size=500,
111 | data_augm_kwargs={},
112 | return_one_hot=False,
113 | return_01c=False,
114 | return_list=True,
115 | use_threads=False)
116 |
117 | valid_iter = Cortical6LayersDataset(
118 | which_set='valid',
119 | smooth_or_raw = 'smooth',
120 | batch_size=500,
121 | data_augm_kwargs={},
122 | return_one_hot=False,
123 | return_01c=False,
124 | return_list=True,
125 | use_threads=False)
126 |
127 | valid_iter2 = Cortical6LayersDataset(
128 | which_set='valid',
129 | smooth_or_raw = 'raw',
130 | batch_size=500,
131 | data_augm_kwargs={},
132 | return_one_hot=False,
133 | return_01c=False,
134 | return_list=True,
135 | use_threads=False)
136 |
137 |
138 |
139 | train_nsamples = train_iter.nsamples
140 | train_nbatches = train_iter.nbatches
141 | valid_nbatches = valid_iter.nbatches
142 | valid_nbatches2 = valid_iter2.nbatches
143 |
144 |
145 |
146 | # Simulate training
147 | max_epochs = 1
148 | print "Simulate training for", str(max_epochs), "epochs"
149 | start_training = time.time()
150 | for epoch in range(max_epochs):
151 | print "Epoch #", str(epoch)
152 |
153 | start_epoch = time.time()
154 |
155 | print "Iterate on the training set", train_nbatches, "minibatches"
156 | for mb in range(train_nbatches):
157 | start_batch = time.time()
158 | batch = train_iter.next()
159 | if mb%5 ==0:
160 | print("Minibatch train {}: {} sec".format(mb, (time.time() -
161 | start_batch)))
162 |
163 | print "Iterate on the validation set", valid_nbatches, "minibatches"
164 | for mb in range(valid_nbatches):
165 | start_batch = time.time()
166 | batch = valid_iter.next()
167 | if mb%5 ==0:
168 | print("Minibatch valid {}: {} sec".format(mb, (time.time() -
169 | start_batch)))
170 |
171 | print "Iterate on the validation set (second time)", valid_nbatches2, "minibatches"
172 | for mb in range(valid_nbatches2):
173 | start_batch = time.time()
174 | batch = valid_iter2.next()
175 | if mb%5==0:
176 | print("Minibatch valid {}: {} sec".format(mb, (time.time() -
177 | start_batch)))
178 |
179 | print("Epoch time: %s" % str(time.time() - start_epoch))
180 | print("Training time: %s" % str(time.time() - start_training))
181 |
182 | if __name__ == '__main__':
183 | print "Loading the dataset 1 batch at a time"
184 | test_6layers()
185 | print "Success!"
186 |
--------------------------------------------------------------------------------
/code/cnn_1D_segm/fcn1D.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import theano.tensor as T
3 | import lasagne
4 | from lasagne.layers import InputLayer, DropoutLayer, ReshapeLayer, \
5 | NonlinearityLayer, DimshuffleLayer, ConcatLayer
6 | from lasagne.layers import batch_norm, BatchNormLayer
7 | from lasagne.layers import Pool1DLayer as PoolLayer
8 | from lasagne.layers import Conv1DLayer as ConvLayer
9 | from lasagne.layers import Upscale1DLayer as UpscaleLayer
10 | from lasagne.layers import PadLayer
11 | from lasagne.layers import ElemwiseSumLayer, ElemwiseMergeLayer
12 | from lasagne.nonlinearities import softmax, linear, rectify
13 |
14 |
15 | def conv_bn_relu(net, incoming_layer, depth, num_filters, filter_size, pad = 'same'):
16 | net['conv'+str(depth)] = ConvLayer(net[incoming_layer],
17 | num_filters = num_filters, filter_size = filter_size,
18 | pad = pad, nonlinearity=None)
19 | net['bn'+str(depth)] = BatchNormLayer(net['conv'+str(depth)])
20 | net['relu'+str(depth)] = NonlinearityLayer( net['bn'+str(depth)], nonlinearity = rectify)
21 | incoming_layer = 'relu'+str(depth)
22 |
23 | return incoming_layer
24 |
25 | # start-snippet-bn_relu_conv
26 | def bn_relu_conv(net, incoming_layer, depth, num_filters, filter_size, pad = 'same'):
27 |
28 | net['bn'+str(depth)] = BatchNormLayer(net[incoming_layer])
29 | net['relu'+str(depth)] = NonlinearityLayer( net['bn'+str(depth)], nonlinearity = rectify)
30 | net['conv'+str(depth)] = ConvLayer(net['relu'+str(depth)],
31 | num_filters = num_filters, filter_size = filter_size,
32 | pad = pad, nonlinearity=None)
33 | incoming_layer = 'conv'+str(depth)
34 |
35 | return incoming_layer
36 | # end-snippet-bn_relu_conv
37 |
38 | # start-snippet-convolutions
39 | def build_model(input_var,
40 | n_classes = 6,
41 | nb_in_channels = 2,
42 | filter_size=25,
43 | n_filters = 64,
44 | depth = 8,
45 | last_filter_size = 1,
46 | block = 'bn_relu_conv',
47 | out_nonlin = softmax):
48 | '''
49 | Parameters:
50 | -----------
51 | input_var : theano 3Dtensor shape(n_samples, n_in_channels, ray_length)
52 | filter_size : odd int (to fit with same padding)
53 | n_filters : int, number of filters for each convLayer
54 | n_classes : int, number of classes to segment
55 | depth : int, number of stacked convolution before concatenation
56 | last_filter_size : int, last convolution filter size to obtain n_classes feature maps
57 | out_nonlin : default=softmax, non linearity function
58 | '''
59 |
60 |
61 | net = {}
62 |
63 | net['input'] = InputLayer((None, nb_in_channels, 200), input_var)
64 | incoming_layer = 'input'
65 |
66 | #Convolution layers
67 | for d in range(depth):
68 | if block == 'bn_relu_conv':
69 | incoming_layer = bn_relu_conv(net, incoming_layer, depth = d,
70 | num_filters= n_filters, filter_size=filter_size)
71 | # end-snippet-convolutions
72 | elif block == 'conv_bn_relu':
73 | incoming_layer = conv_bn_relu(net, incoming_layer, depth = d,
74 | num_filters= n_filters, filter_size=filter_size)
75 | # start-snippet-output
76 | #Output layer
77 | net['final_conv'] = ConvLayer(net[incoming_layer],
78 | num_filters = n_classes,
79 | filter_size = last_filter_size,
80 | pad='same')
81 | incoming_layer = 'final_conv'
82 |
83 | #DimshuffleLayer and ReshapeLayer to fit the softmax implementation
84 | #(it needs a 1D or 2D tensor, not a 3D tensor)
85 | net['final_dimshuffle'] = DimshuffleLayer(net[incoming_layer], (0,2,1))
86 | incoming_layer = 'final_dimshuffle'
87 |
88 | layerSize = lasagne.layers.get_output(net[incoming_layer]).shape
89 | net['final_reshape'] = ReshapeLayer(net[incoming_layer],
90 | (T.prod(layerSize[0:2]),layerSize[2]))
91 | # (200*batch_size,n_classes))
92 | incoming_layer = 'final_reshape'
93 |
94 |
95 | #This is the layer that computes the prediction
96 | net['last_layer'] = NonlinearityLayer(net[incoming_layer],
97 | nonlinearity = out_nonlin)
98 | incoming_layer = 'last_layer'
99 |
100 | #Layers needed to visualize the prediction of the network
101 | net['probs_reshape'] = ReshapeLayer(net[incoming_layer],
102 | (layerSize[0], layerSize[1], n_classes))
103 | incoming_layer = 'probs_reshape'
104 |
105 | net['probs_dimshuffle'] = DimshuffleLayer(net[incoming_layer], (0,2,1))
106 |
107 |
108 | return [net[l] for l in ['last_layer']], net
109 | # end-snippet-output
110 |
--------------------------------------------------------------------------------
/code/fcn_2D_segm/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lisa-lab/DeepLearningTutorials/11c465105026cf87573937fc2d35ab7543678698/code/fcn_2D_segm/__init__.py
--------------------------------------------------------------------------------
/code/fcn_2D_segm/fcn8.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import scipy.io as sio
3 | import theano.tensor as T
4 | import lasagne
5 | from lasagne.layers import InputLayer, DropoutLayer, ReshapeLayer,\
6 | DimshuffleLayer
7 | from lasagne.layers import Pool2DLayer as PoolLayer
8 | from lasagne.layers import Conv2DLayer as ConvLayer
9 | from lasagne.layers import ElemwiseSumLayer, ElemwiseMergeLayer
10 | from lasagne.layers import Deconv2DLayer as DeconvLayer
11 | from lasagne.nonlinearities import softmax, linear
12 |
13 |
14 |
15 | def freezeParameters(net, single=True):
16 | """
17 | Freeze parameters of a layer or a network so that they are not trainable
18 | anymore
19 |
20 | Parameters
21 | ----------
22 | net: a network layer
23 | single: whether to freeze a single layer of all of the layers below as well
24 | """
25 | all_layers = lasagne.layers.get_all_layers(net)
26 |
27 | if single:
28 | all_layers = [all_layers[-1]]
29 |
30 | for layer in all_layers:
31 | layer_params = layer.get_params()
32 | for p in layer_params:
33 | try:
34 | layer.params[p].remove('trainable')
35 | except KeyError:
36 | pass
37 |
38 |
39 | # start-snippet-1
40 | def buildFCN8(nb_in_channels, input_var,
41 | path_weights='/Tmp/romerosa/itinf/models/' +
42 | 'camvid/new_fcn8_model_best.npz',
43 | n_classes=21, load_weights=True,
44 | void_labels=[], trainable=False,
45 | layer=['probs_dimshuffle'], pascal=False,
46 | temperature=1.0, dropout=0.5):
47 | '''
48 | Build fcn8 model
49 | '''
50 |
51 | net = {}
52 |
53 | # Contracting path
54 | net['input'] = InputLayer((None, nb_in_channels, None, None),input_var)
55 |
56 | # pool 1
57 | net['conv1_1'] = ConvLayer(net['input'], 64, 3, pad=100, flip_filters=False)
58 | net['conv1_2'] = ConvLayer(net['conv1_1'], 64, 3, pad='same', flip_filters=False)
59 | net['pool1'] = PoolLayer(net['conv1_2'], 2)
60 |
61 | # pool 2
62 | net['conv2_1'] = ConvLayer(net['pool1'], 128, 3, pad='same', flip_filters=False)
63 | net['conv2_2'] = ConvLayer(net['conv2_1'], 128, 3, pad='same', flip_filters=False)
64 | net['pool2'] = PoolLayer(net['conv2_2'], 2)
65 |
66 | # pool 3
67 | net['conv3_1'] = ConvLayer(net['pool2'], 256, 3, pad='same', flip_filters=False)
68 | net['conv3_2'] = ConvLayer(net['conv3_1'], 256, 3, pad='same', flip_filters=False)
69 | net['conv3_3'] = ConvLayer(net['conv3_2'], 256, 3, pad='same', flip_filters=False)
70 | net['pool3'] = PoolLayer(net['conv3_3'], 2)
71 |
72 | # pool 4
73 | net['conv4_1'] = ConvLayer(net['pool3'], 512, 3, pad='same', flip_filters=False)
74 | net['conv4_2'] = ConvLayer(net['conv4_1'], 512, 3, pad='same', flip_filters=False)
75 | net['conv4_3'] = ConvLayer(net['conv4_2'], 512, 3, pad='same', flip_filters=False)
76 | net['pool4'] = PoolLayer(net['conv4_3'], 2)
77 |
78 | # pool 5
79 | net['conv5_1'] = ConvLayer(net['pool4'], 512, 3, pad='same', flip_filters=False)
80 | net['conv5_2'] = ConvLayer(net['conv5_1'], 512, 3, pad='same', flip_filters=False)
81 | net['conv5_3'] = ConvLayer(net['conv5_2'], 512, 3, pad='same', flip_filters=False)
82 | net['pool5'] = PoolLayer(net['conv5_3'], 2)
83 |
84 | # fc6
85 | net['fc6'] = ConvLayer(net['pool5'], 4096, 7, pad='valid', flip_filters=False)
86 | net['fc6_dropout'] = DropoutLayer(net['fc6'], p=dropout)
87 |
88 | # fc7
89 | net['fc7'] = ConvLayer(net['fc6_dropout'], 4096, 1, pad='valid', flip_filters=False)
90 | net['fc7_dropout'] = DropoutLayer(net['fc7'], p=dropout)
91 |
92 | net['score_fr'] = ConvLayer(net['fc7_dropout'], n_classes, 1, pad='valid', flip_filters=False)
93 |
94 | # Upsampling path
95 |
96 | # Unpool
97 | net['score2'] = DeconvLayer(net['score_fr'], n_classes, 4,
98 | stride=2, crop='valid', nonlinearity=linear)
99 | net['score_pool4'] = ConvLayer(net['pool4'], n_classes, 1,pad='same')
100 | net['score_fused'] = ElemwiseSumLayer((net['score2'],net['score_pool4']),
101 | cropping=[None, None, 'center','center'])
102 |
103 | # Unpool
104 | net['score4'] = DeconvLayer(net['score_fused'], n_classes, 4,
105 | stride=2, crop='valid', nonlinearity=linear)
106 | net['score_pool3'] = ConvLayer(net['pool3'], n_classes, 1,pad='valid')
107 | net['score_final'] = ElemwiseSumLayer((net['score4'],net['score_pool3']),
108 | cropping=[None, None, 'center','center'])
109 | # Unpool
110 | net['upsample'] = DeconvLayer(net['score_final'], n_classes, 16,
111 | stride=8, crop='valid', nonlinearity=linear)
112 | upsample_shape = lasagne.layers.get_output_shape(net['upsample'])[1]
113 | net['input_tmp'] = InputLayer((None, upsample_shape, None, None), input_var)
114 |
115 | net['score'] = ElemwiseMergeLayer((net['input_tmp'], net['upsample']),
116 | merge_function=lambda input, deconv:
117 | deconv,
118 | cropping=[None, None, 'center',
119 | 'center'])
120 |
121 | # Final dimshuffle, reshape and softmax
122 | net['final_dimshuffle'] = \
123 | lasagne.layers.DimshuffleLayer(net['score'], (0, 2, 3, 1))
124 | laySize = lasagne.layers.get_output(net['final_dimshuffle']).shape
125 | net['final_reshape'] = \
126 | lasagne.layers.ReshapeLayer(net['final_dimshuffle'],
127 | (T.prod(laySize[0:3]),
128 | laySize[3]))
129 | net['probs'] = lasagne.layers.NonlinearityLayer(net['final_reshape'],
130 | nonlinearity=softmax)
131 | # end-snippet-1
132 |
133 |
134 | # Do not train
135 | if not trainable:
136 | freezeParameters(net['probs'])
137 |
138 | # Go back to 4D
139 | net['probs_reshape'] = ReshapeLayer(net['probs'], (laySize[0], laySize[1],
140 | laySize[2], n_classes))
141 |
142 | net['probs_dimshuffle'] = DimshuffleLayer(net['probs_reshape'],
143 | (0, 3, 1, 2))
144 |
145 | # Apply temperature
146 | if load_weights:
147 | soft_value = net['upsample'].W.get_value() / temperature
148 | net['upsample'].W.set_value(soft_value)
149 | soft_value = net['upsample'].b.get_value() / temperature
150 | net['upsample'].b.set_value(soft_value)
151 |
152 | return [net[el] for el in layer]
153 |
--------------------------------------------------------------------------------
/code/guidelines_segm_tutos_with_conda.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | ### Base installation.
3 |
4 | # Create and enter main directory.
5 | mkdir main_directory
6 | cd main_directory
7 | # Create and activate conda environment.
8 | conda create --yes -n tuto python=2
9 | source activate tuto
10 | # Install theano.
11 | conda install --yes -c mila-udem theano
12 | # Install Lasagne.
13 | git clone https://github.com/Lasagne/Lasagne.git
14 | cd Lasagne/
15 | pip install -e .
16 | cd ..
17 | # Install dataset_loaders.
18 | conda install --yes matplotlib numpy Pillow scipy scikit-image seaborn h5py
19 | git clone https://github.com/fvisin/dataset_loaders.git
20 | cd dataset_loaders/
21 | pip install -e .
22 | cd ..
23 | # Create config.ini.
24 | cd dataset_loaders/dataset_loaders
25 | touch config.ini
26 | cd ../../
27 | # Get tutorials code.
28 | git clone https://github.com/lisa-lab/DeepLearningTutorials.git
29 |
30 | # NB: Don't forget to correctly set config.ini with section [general]
31 | # and other relevant sections for segmentation tutorials before
32 | # running following lines.
33 | # Field `datasets_local_path` in [general] section should indicate a working
34 | # directory for dataset_loaders module. You can use a directory within
35 | # the main directory, for example main_directory/datasets_local_dir.
36 | # If specified folder does not exist, it will be created.
37 |
38 | # NB: Following lines should be executed in the main directory created above.
39 | # If any problem occures, consider deleting folder save_models (created by tutorial scripts)
40 | # and wordking directory you specified for dataset_loaders:
41 | # rm -rf save_models datasets_local_dir
42 |
43 | ### Tutorial FCN 2D.
44 | ## Get polyps_split7.zip from https://drive.google.com/file/d/0B_60jvsCt1hhZWNfcW4wbHE5N3M/view
45 | ## Directory for [polyps912] section in config.ini should be full path to main_directory/polyps_split7
46 | unzip polyps_split7.zip
47 | THEANO_FLAGS=device=cuda,floatX=float32 python DeepLearningTutorials/code/fcn_2D_segm/train_fcn8.py --num_epochs 60
48 |
49 | ### Tutorial UNET.
50 | ## Get test-volume.tif, train-labels.tif, train-volume.tif from ISBI challenge: http://brainiac2.mit.edu/isbi_challenge/home
51 | ## Directory for [isbi_em_stacks] section in config.ini should be full path to main_directory/isbi
52 | pip install simpleitk
53 | mkdir isbi
54 | mv test-volume.tif train-labels.tif train-volume.tif isbi
55 | THEANO_FLAGS=device=cuda,floatX=float32 python DeepLearningTutorials/code/unet/train_unet.py --num_epochs 60
56 |
57 | ### Tutorial FCN 1D.
58 | ## Get TrainingData190417.tar.gz from https://drive.google.com/file/d/0B3tbeSUS2FsVOVlIamlDdkNBQUE/edit
59 | ## Directory for [cortical_layers] section in config.ini should be full path to main_directory/cortical_layers
60 | mkdir cortical_layers
61 | cd cortical_layers/
62 | tar -xvf ../TrainingData190417.tar.gz
63 | mv TrainingData 6layers_segmentation
64 | cd ..
65 | THEANO_FLAGS=device=cuda,floatX=float32 python DeepLearningTutorials/code/cnn_1D_segm/train_fcn1D.py --num_epochs 60
66 |
--------------------------------------------------------------------------------
/code/hmc/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lisa-lab/DeepLearningTutorials/11c465105026cf87573937fc2d35ab7543678698/code/hmc/__init__.py
--------------------------------------------------------------------------------
/code/hmc/test_hmc.py:
--------------------------------------------------------------------------------
1 |
2 | from __future__ import print_function
3 |
4 | import numpy
5 | import theano
6 |
7 | try:
8 | from hmc import HMC_sampler
9 | except ImportError as e:
10 | # python 3 compatibility
11 | # http://stackoverflow.com/questions/3073259/python-nose-import-error
12 | from hmc.hmc import HMC_sampler
13 |
14 |
15 | def sampler_on_nd_gaussian(sampler_cls, burnin, n_samples, dim=10):
16 | batchsize = 3
17 |
18 | rng = numpy.random.RandomState(123)
19 |
20 | # Define a covariance and mu for a gaussian
21 | mu = numpy.array(rng.rand(dim) * 10, dtype=theano.config.floatX)
22 | cov = numpy.array(rng.rand(dim, dim), dtype=theano.config.floatX)
23 | cov = (cov + cov.T) / 2.
24 | cov[numpy.arange(dim), numpy.arange(dim)] = 1.0
25 | cov_inv = numpy.linalg.inv(cov)
26 |
27 | # Define energy function for a multi-variate Gaussian
28 | def gaussian_energy(x):
29 | return 0.5 * (theano.tensor.dot((x - mu), cov_inv) *
30 | (x - mu)).sum(axis=1)
31 |
32 | # Declared shared random variable for positions
33 | position = rng.randn(batchsize, dim).astype(theano.config.floatX)
34 | position = theano.shared(position)
35 |
36 | # Create HMC sampler
37 | sampler = sampler_cls(position, gaussian_energy,
38 | initial_stepsize=1e-3, stepsize_max=0.5)
39 |
40 | # Start with a burn-in process
41 | garbage = [sampler.draw() for r in range(burnin)] # burn-in Draw
42 | # `n_samples`: result is a 3D tensor of dim [n_samples, batchsize,
43 | # dim]
44 | _samples = numpy.asarray([sampler.draw() for r in range(n_samples)])
45 | # Flatten to [n_samples * batchsize, dim]
46 | samples = _samples.T.reshape(dim, -1).T
47 |
48 | print('****** TARGET VALUES ******')
49 | print('target mean:', mu)
50 | print('target cov:\n', cov)
51 |
52 | print('****** EMPIRICAL MEAN/COV USING HMC ******')
53 | print('empirical mean: ', samples.mean(axis=0))
54 | print('empirical_cov:\n', numpy.cov(samples.T))
55 |
56 | print('****** HMC INTERNALS ******')
57 | print('final stepsize', sampler.stepsize.get_value())
58 | print('final acceptance_rate', sampler.avg_acceptance_rate.get_value())
59 |
60 | return sampler
61 |
62 |
63 | def test_hmc():
64 | sampler = sampler_on_nd_gaussian(HMC_sampler.new_from_shared_positions,
65 | burnin=1000, n_samples=1000, dim=5)
66 | assert abs(sampler.avg_acceptance_rate.get_value() -
67 | sampler.target_acceptance_rate) < .1
68 | assert sampler.stepsize.get_value() >= sampler.stepsize_min
69 | assert sampler.stepsize.get_value() <= sampler.stepsize_max
70 |
--------------------------------------------------------------------------------
/code/imdb.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 | from six.moves import xrange
3 | import six.moves.cPickle as pickle
4 |
5 | import gzip
6 | import os
7 |
8 | import numpy
9 | import theano
10 |
11 |
12 | def prepare_data(seqs, labels, maxlen=None):
13 | """Create the matrices from the datasets.
14 |
15 | This pad each sequence to the same lenght: the lenght of the
16 | longuest sequence or maxlen.
17 |
18 | if maxlen is set, we will cut all sequence to this maximum
19 | lenght.
20 |
21 | This swap the axis!
22 | """
23 | # x: a list of sentences
24 | lengths = [len(s) for s in seqs]
25 |
26 | if maxlen is not None:
27 | new_seqs = []
28 | new_labels = []
29 | new_lengths = []
30 | for l, s, y in zip(lengths, seqs, labels):
31 | if l < maxlen:
32 | new_seqs.append(s)
33 | new_labels.append(y)
34 | new_lengths.append(l)
35 | lengths = new_lengths
36 | labels = new_labels
37 | seqs = new_seqs
38 |
39 | if len(lengths) < 1:
40 | return None, None, None
41 |
42 | n_samples = len(seqs)
43 | maxlen = numpy.max(lengths)
44 |
45 | x = numpy.zeros((maxlen, n_samples)).astype('int64')
46 | x_mask = numpy.zeros((maxlen, n_samples)).astype(theano.config.floatX)
47 | for idx, s in enumerate(seqs):
48 | x[:lengths[idx], idx] = s
49 | x_mask[:lengths[idx], idx] = 1.
50 |
51 | return x, x_mask, labels
52 |
53 |
54 | def get_dataset_file(dataset, default_dataset, origin):
55 | '''Look for it as if it was a full path, if not, try local file,
56 | if not try in the data directory.
57 |
58 | Download dataset if it is not present
59 |
60 | '''
61 | data_dir, data_file = os.path.split(dataset)
62 | if data_dir == "" and not os.path.isfile(dataset):
63 | # Check if dataset is in the data directory.
64 | new_path = os.path.join(
65 | os.path.split(__file__)[0],
66 | "..",
67 | "data",
68 | dataset
69 | )
70 | if os.path.isfile(new_path) or data_file == default_dataset:
71 | dataset = new_path
72 |
73 | if (not os.path.isfile(dataset)) and data_file == default_dataset:
74 | from six.moves import urllib
75 | print('Downloading data from %s' % origin)
76 | urllib.request.urlretrieve(origin, dataset)
77 |
78 |
79 | return dataset
80 |
81 |
82 | def load_data(path="imdb.pkl", n_words=100000, valid_portion=0.1, maxlen=None,
83 | sort_by_len=True):
84 | '''Loads the dataset
85 |
86 | :type path: String
87 | :param path: The path to the dataset (here IMDB)
88 | :type n_words: int
89 | :param n_words: The number of word to keep in the vocabulary.
90 | All extra words are set to unknow (1).
91 | :type valid_portion: float
92 | :param valid_portion: The proportion of the full train set used for
93 | the validation set.
94 | :type maxlen: None or positive int
95 | :param maxlen: the max sequence length we use in the train/valid set.
96 | :type sort_by_len: bool
97 | :name sort_by_len: Sort by the sequence lenght for the train,
98 | valid and test set. This allow faster execution as it cause
99 | less padding per minibatch. Another mechanism must be used to
100 | shuffle the train set at each epoch.
101 |
102 | '''
103 |
104 | #############
105 | # LOAD DATA #
106 | #############
107 |
108 | # Load the dataset
109 | path = get_dataset_file(
110 | path, "imdb.pkl",
111 | "http://www.iro.umontreal.ca/~lisa/deep/data/imdb.pkl")
112 |
113 | if path.endswith(".gz"):
114 | f = gzip.open(path, 'rb')
115 | else:
116 | f = open(path, 'rb')
117 |
118 | train_set = pickle.load(f)
119 | test_set = pickle.load(f)
120 | f.close()
121 | if maxlen:
122 | new_train_set_x = []
123 | new_train_set_y = []
124 | for x, y in zip(train_set[0], train_set[1]):
125 | if len(x) < maxlen:
126 | new_train_set_x.append(x)
127 | new_train_set_y.append(y)
128 | train_set = (new_train_set_x, new_train_set_y)
129 | del new_train_set_x, new_train_set_y
130 |
131 | # split training set into validation set
132 | train_set_x, train_set_y = train_set
133 | n_samples = len(train_set_x)
134 | sidx = numpy.random.permutation(n_samples)
135 | n_train = int(numpy.round(n_samples * (1. - valid_portion)))
136 | valid_set_x = [train_set_x[s] for s in sidx[n_train:]]
137 | valid_set_y = [train_set_y[s] for s in sidx[n_train:]]
138 | train_set_x = [train_set_x[s] for s in sidx[:n_train]]
139 | train_set_y = [train_set_y[s] for s in sidx[:n_train]]
140 |
141 | train_set = (train_set_x, train_set_y)
142 | valid_set = (valid_set_x, valid_set_y)
143 |
144 | def remove_unk(x):
145 | return [[1 if w >= n_words else w for w in sen] for sen in x]
146 |
147 | test_set_x, test_set_y = test_set
148 | valid_set_x, valid_set_y = valid_set
149 | train_set_x, train_set_y = train_set
150 |
151 | train_set_x = remove_unk(train_set_x)
152 | valid_set_x = remove_unk(valid_set_x)
153 | test_set_x = remove_unk(test_set_x)
154 |
155 | def len_argsort(seq):
156 | return sorted(range(len(seq)), key=lambda x: len(seq[x]))
157 |
158 | if sort_by_len:
159 | sorted_index = len_argsort(test_set_x)
160 | test_set_x = [test_set_x[i] for i in sorted_index]
161 | test_set_y = [test_set_y[i] for i in sorted_index]
162 |
163 | sorted_index = len_argsort(valid_set_x)
164 | valid_set_x = [valid_set_x[i] for i in sorted_index]
165 | valid_set_y = [valid_set_y[i] for i in sorted_index]
166 |
167 | sorted_index = len_argsort(train_set_x)
168 | train_set_x = [train_set_x[i] for i in sorted_index]
169 | train_set_y = [train_set_y[i] for i in sorted_index]
170 |
171 | train = (train_set_x, train_set_y)
172 | valid = (valid_set_x, valid_set_y)
173 | test = (test_set_x, test_set_y)
174 |
175 | return train, valid, test
176 |
--------------------------------------------------------------------------------
/code/imdb_preprocess.py:
--------------------------------------------------------------------------------
1 | """
2 | This script is what created the dataset pickled.
3 |
4 | 1) You need to download this file and put it in the same directory as this file.
5 | https://github.com/moses-smt/mosesdecoder/raw/master/scripts/tokenizer/tokenizer.perl . Give it execution permission.
6 |
7 | 2) Get the dataset from http://ai.stanford.edu/~amaas/data/sentiment/ and extract it in the current directory.
8 |
9 | 3) Then run this script.
10 | """
11 | from __future__ import print_function
12 | dataset_path='/Tmp/bastienf/aclImdb/'
13 |
14 | import numpy
15 | import cPickle as pkl
16 |
17 | from collections import OrderedDict
18 |
19 | import glob
20 | import os
21 |
22 | from subprocess import Popen, PIPE
23 |
24 | # tokenizer.perl is from Moses: https://github.com/moses-smt/mosesdecoder/tree/master/scripts/tokenizer
25 | tokenizer_cmd = ['./tokenizer.perl', '-l', 'en', '-q', '-']
26 |
27 |
28 | def tokenize(sentences):
29 |
30 | print('Tokenizing..', end=' ')
31 | text = "\n".join(sentences)
32 | tokenizer = Popen(tokenizer_cmd, stdin=PIPE, stdout=PIPE)
33 | tok_text, _ = tokenizer.communicate(text)
34 | toks = tok_text.split('\n')[:-1]
35 | print('Done')
36 |
37 | return toks
38 |
39 |
40 | def build_dict(path):
41 | sentences = []
42 | currdir = os.getcwd()
43 | os.chdir('%s/pos/' % path)
44 | for ff in glob.glob("*.txt"):
45 | with open(ff, 'r') as f:
46 | sentences.append(f.readline().strip())
47 | os.chdir('%s/neg/' % path)
48 | for ff in glob.glob("*.txt"):
49 | with open(ff, 'r') as f:
50 | sentences.append(f.readline().strip())
51 | os.chdir(currdir)
52 |
53 | sentences = tokenize(sentences)
54 |
55 | print('Building dictionary..', end=' ')
56 | wordcount = dict()
57 | for ss in sentences:
58 | words = ss.strip().lower().split()
59 | for w in words:
60 | if w not in wordcount:
61 | wordcount[w] = 1
62 | else:
63 | wordcount[w] += 1
64 |
65 | counts = wordcount.values()
66 | keys = wordcount.keys()
67 |
68 | sorted_idx = numpy.argsort(counts)[::-1]
69 |
70 | worddict = dict()
71 |
72 | for idx, ss in enumerate(sorted_idx):
73 | worddict[keys[ss]] = idx+2 # leave 0 and 1 (UNK)
74 |
75 | print(numpy.sum(counts), ' total words ', len(keys), ' unique words')
76 |
77 | return worddict
78 |
79 |
80 | def grab_data(path, dictionary):
81 | sentences = []
82 | currdir = os.getcwd()
83 | os.chdir(path)
84 | for ff in glob.glob("*.txt"):
85 | with open(ff, 'r') as f:
86 | sentences.append(f.readline().strip())
87 | os.chdir(currdir)
88 | sentences = tokenize(sentences)
89 |
90 | seqs = [None] * len(sentences)
91 | for idx, ss in enumerate(sentences):
92 | words = ss.strip().lower().split()
93 | seqs[idx] = [dictionary[w] if w in dictionary else 1 for w in words]
94 |
95 | return seqs
96 |
97 |
98 | def main():
99 | # Get the dataset from http://ai.stanford.edu/~amaas/data/sentiment/
100 | path = dataset_path
101 | dictionary = build_dict(os.path.join(path, 'train'))
102 |
103 | train_x_pos = grab_data(path+'train/pos', dictionary)
104 | train_x_neg = grab_data(path+'train/neg', dictionary)
105 | train_x = train_x_pos + train_x_neg
106 | train_y = [1] * len(train_x_pos) + [0] * len(train_x_neg)
107 |
108 | test_x_pos = grab_data(path+'test/pos', dictionary)
109 | test_x_neg = grab_data(path+'test/neg', dictionary)
110 | test_x = test_x_pos + test_x_neg
111 | test_y = [1] * len(test_x_pos) + [0] * len(test_x_neg)
112 |
113 | f = open('imdb.pkl', 'wb')
114 | pkl.dump((train_x, train_y), f, -1)
115 | pkl.dump((test_x, test_y), f, -1)
116 | f.close()
117 |
118 | f = open('imdb.dict.pkl', 'wb')
119 | pkl.dump(dictionary, f, -1)
120 | f.close()
121 |
122 | if __name__ == '__main__':
123 | main()
124 |
--------------------------------------------------------------------------------
/code/logistic_cg.py:
--------------------------------------------------------------------------------
1 | """
2 | This tutorial introduces logistic regression using Theano and conjugate
3 | gradient descent.
4 |
5 | Logistic regression is a probabilistic, linear classifier. It is parametrized
6 | by a weight matrix :math:`W` and a bias vector :math:`b`. Classification is
7 | done by projecting data points onto a set of hyperplanes, the distance to
8 | which is used to determine a class membership probability.
9 |
10 | Mathematically, this can be written as:
11 |
12 | .. math::
13 | P(Y=i|x, W,b) &= softmax_i(W x + b) \\
14 | &= \frac {e^{W_i x + b_i}} {\sum_j e^{W_j x + b_j}}
15 |
16 |
17 | The output of the model or prediction is then done by taking the argmax of
18 | the vector whose i'th element is P(Y=i|x).
19 |
20 | .. math::
21 |
22 | y_{pred} = argmax_i P(Y=i|x,W,b)
23 |
24 |
25 | This tutorial presents a conjugate gradient optimization method that is
26 | suitable for smaller datasets.
27 |
28 |
29 | References:
30 |
31 | - textbooks: "Pattern Recognition and Machine Learning" -
32 | Christopher M. Bishop, section 4.3.2
33 |
34 |
35 | """
36 | from __future__ import print_function, division
37 | __docformat__ = 'restructedtext en'
38 |
39 |
40 | import os
41 | import sys
42 | import timeit
43 |
44 | import numpy
45 |
46 | import theano
47 | import theano.tensor as T
48 |
49 | from logistic_sgd import load_data
50 |
51 |
52 | class LogisticRegression(object):
53 | """Multi-class Logistic Regression Class
54 |
55 | The logistic regression is fully described by a weight matrix :math:`W`
56 | and bias vector :math:`b`. Classification is done by projecting data
57 | points onto a set of hyperplanes, the distance to which is used to
58 | determine a class membership probability.
59 | """
60 |
61 | def __init__(self, input, n_in, n_out):
62 | """ Initialize the parameters of the logistic regression
63 |
64 | :type input: theano.tensor.TensorType
65 | :param input: symbolic variable that describes the input of the
66 | architecture ( one minibatch)
67 |
68 | :type n_in: int
69 | :param n_in: number of input units, the dimension of the space in
70 | which the datapoint lies
71 |
72 | :type n_out: int
73 | :param n_out: number of output units, the dimension of the space in
74 | which the target lies
75 |
76 | """
77 |
78 | # initialize theta = (W,b) with 0s; W gets the shape (n_in, n_out),
79 | # while b is a vector of n_out elements, making theta a vector of
80 | # n_in*n_out + n_out elements
81 | self.theta = theano.shared(
82 | value=numpy.zeros(
83 | n_in * n_out + n_out,
84 | dtype=theano.config.floatX
85 | ),
86 | name='theta',
87 | borrow=True
88 | )
89 | # W is represented by the fisr n_in*n_out elements of theta
90 | self.W = self.theta[0:n_in * n_out].reshape((n_in, n_out))
91 | # b is the rest (last n_out elements)
92 | self.b = self.theta[n_in * n_out:n_in * n_out + n_out]
93 |
94 | # compute vector of class-membership probabilities in symbolic form
95 | self.p_y_given_x = T.nnet.softmax(T.dot(input, self.W) + self.b)
96 |
97 | # compute prediction as class whose probability is maximal in
98 | # symbolic form
99 | self.y_pred = T.argmax(self.p_y_given_x, axis=1)
100 |
101 | # keep track of model input
102 | self.input = input
103 |
104 | def negative_log_likelihood(self, y):
105 | """Return the negative log-likelihood of the prediction of this model
106 | under a given target distribution.
107 |
108 | .. math::
109 |
110 | \frac{1}{|\mathcal{D}|}\mathcal{L} (\theta=\{W,b\}, \mathcal{D}) =
111 | \frac{1}{|\mathcal{D}|}\sum_{i=0}^{|\mathcal{D}|}
112 | \log(P(Y=y^{(i)}|x^{(i)}, W,b)) \\
113 | \ell (\theta=\{W,b\}, \mathcal{D})
114 |
115 | :type y: theano.tensor.TensorType
116 | :param y: corresponds to a vector that gives for each example the
117 | correct label
118 | """
119 | return -T.mean(T.log(self.p_y_given_x)[T.arange(y.shape[0]), y])
120 |
121 | def errors(self, y):
122 | """Return a float representing the number of errors in the minibatch
123 | over the total number of examples of the minibatch
124 |
125 | :type y: theano.tensor.TensorType
126 | :param y: corresponds to a vector that gives for each example
127 | the correct label
128 | """
129 |
130 | # check if y has same dimension of y_pred
131 | if y.ndim != self.y_pred.ndim:
132 | raise TypeError(
133 | 'y should have the same shape as self.y_pred',
134 | ('y', y.type, 'y_pred', self.y_pred.type)
135 | )
136 | # check if y is of the correct datatype
137 | if y.dtype.startswith('int'):
138 | # the T.neq operator returns a vector of 0s and 1s, where 1
139 | # represents a mistake in prediction
140 | return T.mean(T.neq(self.y_pred, y))
141 | else:
142 | raise NotImplementedError()
143 |
144 |
145 | def cg_optimization_mnist(n_epochs=50, mnist_pkl_gz='mnist.pkl.gz'):
146 | """Demonstrate conjugate gradient optimization of a log-linear model
147 |
148 | This is demonstrated on MNIST.
149 |
150 | :type n_epochs: int
151 | :param n_epochs: number of epochs to run the optimizer
152 |
153 | :type mnist_pkl_gz: string
154 | :param mnist_pkl_gz: the path of the mnist training file from
155 | http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz
156 |
157 | """
158 | #############
159 | # LOAD DATA #
160 | #############
161 | datasets = load_data(mnist_pkl_gz)
162 |
163 | train_set_x, train_set_y = datasets[0]
164 | valid_set_x, valid_set_y = datasets[1]
165 | test_set_x, test_set_y = datasets[2]
166 |
167 | batch_size = 600 # size of the minibatch
168 |
169 | n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size
170 | n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size
171 | n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size
172 |
173 | n_in = 28 * 28 # number of input units
174 | n_out = 10 # number of output units
175 |
176 | ######################
177 | # BUILD ACTUAL MODEL #
178 | ######################
179 | print('... building the model')
180 |
181 | # allocate symbolic variables for the data
182 | minibatch_offset = T.lscalar() # offset to the start of a [mini]batch
183 | x = T.matrix() # the data is presented as rasterized images
184 | y = T.ivector() # the labels are presented as 1D vector of
185 | # [int] labels
186 |
187 | # construct the logistic regression class
188 | classifier = LogisticRegression(input=x, n_in=28 * 28, n_out=10)
189 |
190 | # the cost we minimize during training is the negative log likelihood of
191 | # the model in symbolic format
192 | cost = classifier.negative_log_likelihood(y).mean()
193 |
194 | # compile a theano function that computes the mistakes that are made by
195 | # the model on a minibatch
196 | test_model = theano.function(
197 | [minibatch_offset],
198 | classifier.errors(y),
199 | givens={
200 | x: test_set_x[minibatch_offset:minibatch_offset + batch_size],
201 | y: test_set_y[minibatch_offset:minibatch_offset + batch_size]
202 | },
203 | name="test"
204 | )
205 |
206 | validate_model = theano.function(
207 | [minibatch_offset],
208 | classifier.errors(y),
209 | givens={
210 | x: valid_set_x[minibatch_offset: minibatch_offset + batch_size],
211 | y: valid_set_y[minibatch_offset: minibatch_offset + batch_size]
212 | },
213 | name="validate"
214 | )
215 |
216 | # compile a theano function that returns the cost of a minibatch
217 | batch_cost = theano.function(
218 | [minibatch_offset],
219 | cost,
220 | givens={
221 | x: train_set_x[minibatch_offset: minibatch_offset + batch_size],
222 | y: train_set_y[minibatch_offset: minibatch_offset + batch_size]
223 | },
224 | name="batch_cost"
225 | )
226 |
227 | # compile a theano function that returns the gradient of the minibatch
228 | # with respect to theta
229 | batch_grad = theano.function(
230 | [minibatch_offset],
231 | T.grad(cost, classifier.theta),
232 | givens={
233 | x: train_set_x[minibatch_offset: minibatch_offset + batch_size],
234 | y: train_set_y[minibatch_offset: minibatch_offset + batch_size]
235 | },
236 | name="batch_grad"
237 | )
238 |
239 | # creates a function that computes the average cost on the training set
240 | def train_fn(theta_value):
241 | classifier.theta.set_value(theta_value, borrow=True)
242 | train_losses = [batch_cost(i * batch_size)
243 | for i in range(n_train_batches)]
244 | return numpy.mean(train_losses)
245 |
246 | # creates a function that computes the average gradient of cost with
247 | # respect to theta
248 | def train_fn_grad(theta_value):
249 | classifier.theta.set_value(theta_value, borrow=True)
250 | grad = batch_grad(0)
251 | for i in range(1, n_train_batches):
252 | grad += batch_grad(i * batch_size)
253 | return grad / n_train_batches
254 |
255 | validation_scores = [numpy.inf, 0]
256 |
257 | # creates the validation function
258 | def callback(theta_value):
259 | classifier.theta.set_value(theta_value, borrow=True)
260 | #compute the validation loss
261 | validation_losses = [validate_model(i * batch_size)
262 | for i in range(n_valid_batches)]
263 | this_validation_loss = numpy.mean(validation_losses)
264 | print(('validation error %f %%' % (this_validation_loss * 100.,)))
265 |
266 | # check if it is better then best validation score got until now
267 | if this_validation_loss < validation_scores[0]:
268 | # if so, replace the old one, and compute the score on the
269 | # testing dataset
270 | validation_scores[0] = this_validation_loss
271 | test_losses = [test_model(i * batch_size)
272 | for i in range(n_test_batches)]
273 | validation_scores[1] = numpy.mean(test_losses)
274 |
275 | ###############
276 | # TRAIN MODEL #
277 | ###############
278 |
279 | # using scipy conjugate gradient optimizer
280 | import scipy.optimize
281 | print ("Optimizing using scipy.optimize.fmin_cg...")
282 | start_time = timeit.default_timer()
283 | best_w_b = scipy.optimize.fmin_cg(
284 | f=train_fn,
285 | x0=numpy.zeros((n_in + 1) * n_out, dtype=x.dtype),
286 | fprime=train_fn_grad,
287 | callback=callback,
288 | disp=0,
289 | maxiter=n_epochs
290 | )
291 | end_time = timeit.default_timer()
292 | print(('Optimization complete with best validation score of %f %%, with '
293 | 'test performance %f %%'
294 | ) % (validation_scores[0] * 100., validation_scores[1] * 100.)
295 | )
296 |
297 | print('The code for file ' + os.path.split(__file__)[1] +
298 | ' ran for %.1fs' % (end_time - start_time), file=sys.stderr)
299 |
300 |
301 | if __name__ == '__main__':
302 | cg_optimization_mnist()
303 |
--------------------------------------------------------------------------------
/code/rnnrbm.py:
--------------------------------------------------------------------------------
1 | # Author: Nicolas Boulanger-Lewandowski
2 | # University of Montreal (2012)
3 | # RNN-RBM deep learning tutorial
4 | # More information at http://deeplearning.net/tutorial/rnnrbm.html
5 |
6 | from __future__ import print_function
7 |
8 | import glob
9 | import os
10 | import sys
11 |
12 | import numpy
13 | try:
14 | import pylab
15 | except ImportError:
16 | print ("pylab isn't available. If you use its functionality, it will crash.")
17 | print("It can be installed with 'pip install -q Pillow'")
18 |
19 | from midi.utils import midiread, midiwrite
20 | import theano
21 | import theano.tensor as T
22 | from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams
23 |
24 | #Don't use a python long as this don't work on 32 bits computers.
25 | numpy.random.seed(0xbeef)
26 | rng = RandomStreams(seed=numpy.random.randint(1 << 30))
27 | theano.config.warn.subtensor_merge_bug = False
28 |
29 |
30 | def build_rbm(v, W, bv, bh, k):
31 | '''Construct a k-step Gibbs chain starting at v for an RBM.
32 |
33 | v : Theano vector or matrix
34 | If a matrix, multiple chains will be run in parallel (batch).
35 | W : Theano matrix
36 | Weight matrix of the RBM.
37 | bv : Theano vector
38 | Visible bias vector of the RBM.
39 | bh : Theano vector
40 | Hidden bias vector of the RBM.
41 | k : scalar or Theano scalar
42 | Length of the Gibbs chain.
43 |
44 | Return a (v_sample, cost, monitor, updates) tuple:
45 |
46 | v_sample : Theano vector or matrix with the same shape as `v`
47 | Corresponds to the generated sample(s).
48 | cost : Theano scalar
49 | Expression whose gradient with respect to W, bv, bh is the CD-k
50 | approximation to the log-likelihood of `v` (training example) under the
51 | RBM. The cost is averaged in the batch case.
52 | monitor: Theano scalar
53 | Pseudo log-likelihood (also averaged in the batch case).
54 | updates: dictionary of Theano variable -> Theano variable
55 | The `updates` object returned by scan.'''
56 |
57 | def gibbs_step(v):
58 | mean_h = T.nnet.sigmoid(T.dot(v, W) + bh)
59 | h = rng.binomial(size=mean_h.shape, n=1, p=mean_h,
60 | dtype=theano.config.floatX)
61 | mean_v = T.nnet.sigmoid(T.dot(h, W.T) + bv)
62 | v = rng.binomial(size=mean_v.shape, n=1, p=mean_v,
63 | dtype=theano.config.floatX)
64 | return mean_v, v
65 |
66 | chain, updates = theano.scan(lambda v: gibbs_step(v)[1], outputs_info=[v],
67 | n_steps=k)
68 | v_sample = chain[-1]
69 |
70 | mean_v = gibbs_step(v_sample)[0]
71 | monitor = T.xlogx.xlogy0(v, mean_v) + T.xlogx.xlogy0(1 - v, 1 - mean_v)
72 | monitor = monitor.sum() / v.shape[0]
73 |
74 | def free_energy(v):
75 | return -(v * bv).sum() - T.log(1 + T.exp(T.dot(v, W) + bh)).sum()
76 | cost = (free_energy(v) - free_energy(v_sample)) / v.shape[0]
77 |
78 | return v_sample, cost, monitor, updates
79 |
80 |
81 | def shared_normal(num_rows, num_cols, scale=1):
82 | '''Initialize a matrix shared variable with normally distributed
83 | elements.'''
84 | return theano.shared(numpy.random.normal(
85 | scale=scale, size=(num_rows, num_cols)).astype(theano.config.floatX))
86 |
87 |
88 | def shared_zeros(*shape):
89 | '''Initialize a vector shared variable with zero elements.'''
90 | return theano.shared(numpy.zeros(shape, dtype=theano.config.floatX))
91 |
92 |
93 | def build_rnnrbm(n_visible, n_hidden, n_hidden_recurrent):
94 | '''Construct a symbolic RNN-RBM and initialize parameters.
95 |
96 | n_visible : integer
97 | Number of visible units.
98 | n_hidden : integer
99 | Number of hidden units of the conditional RBMs.
100 | n_hidden_recurrent : integer
101 | Number of hidden units of the RNN.
102 |
103 | Return a (v, v_sample, cost, monitor, params, updates_train, v_t,
104 | updates_generate) tuple:
105 |
106 | v : Theano matrix
107 | Symbolic variable holding an input sequence (used during training)
108 | v_sample : Theano matrix
109 | Symbolic variable holding the negative particles for CD log-likelihood
110 | gradient estimation (used during training)
111 | cost : Theano scalar
112 | Expression whose gradient (considering v_sample constant) corresponds
113 | to the LL gradient of the RNN-RBM (used during training)
114 | monitor : Theano scalar
115 | Frame-level pseudo-likelihood (useful for monitoring during training)
116 | params : tuple of Theano shared variables
117 | The parameters of the model to be optimized during training.
118 | updates_train : dictionary of Theano variable -> Theano variable
119 | Update object that should be passed to theano.function when compiling
120 | the training function.
121 | v_t : Theano matrix
122 | Symbolic variable holding a generated sequence (used during sampling)
123 | updates_generate : dictionary of Theano variable -> Theano variable
124 | Update object that should be passed to theano.function when compiling
125 | the generation function.'''
126 |
127 | W = shared_normal(n_visible, n_hidden, 0.01)
128 | bv = shared_zeros(n_visible)
129 | bh = shared_zeros(n_hidden)
130 | Wuh = shared_normal(n_hidden_recurrent, n_hidden, 0.0001)
131 | Wuv = shared_normal(n_hidden_recurrent, n_visible, 0.0001)
132 | Wvu = shared_normal(n_visible, n_hidden_recurrent, 0.0001)
133 | Wuu = shared_normal(n_hidden_recurrent, n_hidden_recurrent, 0.0001)
134 | bu = shared_zeros(n_hidden_recurrent)
135 |
136 | params = W, bv, bh, Wuh, Wuv, Wvu, Wuu, bu # learned parameters as shared
137 | # variables
138 |
139 | v = T.matrix() # a training sequence
140 | u0 = T.zeros((n_hidden_recurrent,)) # initial value for the RNN hidden
141 | # units
142 |
143 | # If `v_t` is given, deterministic recurrence to compute the variable
144 | # biases bv_t, bh_t at each time step. If `v_t` is None, same recurrence
145 | # but with a separate Gibbs chain at each time step to sample (generate)
146 | # from the RNN-RBM. The resulting sample v_t is returned in order to be
147 | # passed down to the sequence history.
148 | def recurrence(v_t, u_tm1):
149 | bv_t = bv + T.dot(u_tm1, Wuv)
150 | bh_t = bh + T.dot(u_tm1, Wuh)
151 | generate = v_t is None
152 | if generate:
153 | v_t, _, _, updates = build_rbm(T.zeros((n_visible,)), W, bv_t,
154 | bh_t, k=25)
155 | u_t = T.tanh(bu + T.dot(v_t, Wvu) + T.dot(u_tm1, Wuu))
156 | return ([v_t, u_t], updates) if generate else [u_t, bv_t, bh_t]
157 |
158 | # For training, the deterministic recurrence is used to compute all the
159 | # {bv_t, bh_t, 1 <= t <= T} given v. Conditional RBMs can then be trained
160 | # in batches using those parameters.
161 | (u_t, bv_t, bh_t), updates_train = theano.scan(
162 | lambda v_t, u_tm1, *_: recurrence(v_t, u_tm1),
163 | sequences=v, outputs_info=[u0, None, None], non_sequences=params)
164 | v_sample, cost, monitor, updates_rbm = build_rbm(v, W, bv_t[:], bh_t[:],
165 | k=15)
166 | updates_train.update(updates_rbm)
167 |
168 | # symbolic loop for sequence generation
169 | (v_t, u_t), updates_generate = theano.scan(
170 | lambda u_tm1, *_: recurrence(None, u_tm1),
171 | outputs_info=[None, u0], non_sequences=params, n_steps=200)
172 |
173 | return (v, v_sample, cost, monitor, params, updates_train, v_t,
174 | updates_generate)
175 |
176 |
177 | class RnnRbm:
178 | '''Simple class to train an RNN-RBM from MIDI files and to generate sample
179 | sequences.'''
180 |
181 | def __init__(
182 | self,
183 | n_hidden=150,
184 | n_hidden_recurrent=100,
185 | lr=0.001,
186 | r=(21, 109),
187 | dt=0.3
188 | ):
189 | '''Constructs and compiles Theano functions for training and sequence
190 | generation.
191 |
192 | n_hidden : integer
193 | Number of hidden units of the conditional RBMs.
194 | n_hidden_recurrent : integer
195 | Number of hidden units of the RNN.
196 | lr : float
197 | Learning rate
198 | r : (integer, integer) tuple
199 | Specifies the pitch range of the piano-roll in MIDI note numbers,
200 | including r[0] but not r[1], such that r[1]-r[0] is the number of
201 | visible units of the RBM at a given time step. The default (21,
202 | 109) corresponds to the full range of piano (88 notes).
203 | dt : float
204 | Sampling period when converting the MIDI files into piano-rolls, or
205 | equivalently the time difference between consecutive time steps.'''
206 |
207 | self.r = r
208 | self.dt = dt
209 | (v, v_sample, cost, monitor, params, updates_train, v_t,
210 | updates_generate) = build_rnnrbm(
211 | r[1] - r[0],
212 | n_hidden,
213 | n_hidden_recurrent
214 | )
215 |
216 | gradient = T.grad(cost, params, consider_constant=[v_sample])
217 | updates_train.update(
218 | ((p, p - lr * g) for p, g in zip(params, gradient))
219 | )
220 | self.train_function = theano.function(
221 | [v],
222 | monitor,
223 | updates=updates_train
224 | )
225 | self.generate_function = theano.function(
226 | [],
227 | v_t,
228 | updates=updates_generate
229 | )
230 |
231 | def train(self, files, batch_size=100, num_epochs=200):
232 | '''Train the RNN-RBM via stochastic gradient descent (SGD) using MIDI
233 | files converted to piano-rolls.
234 |
235 | files : list of strings
236 | List of MIDI files that will be loaded as piano-rolls for training.
237 | batch_size : integer
238 | Training sequences will be split into subsequences of at most this
239 | size before applying the SGD updates.
240 | num_epochs : integer
241 | Number of epochs (pass over the training set) performed. The user
242 | can safely interrupt training with Ctrl+C at any time.'''
243 |
244 | assert len(files) > 0, 'Training set is empty!' \
245 | ' (did you download the data files?)'
246 | dataset = [midiread(f, self.r,
247 | self.dt).piano_roll.astype(theano.config.floatX)
248 | for f in files]
249 |
250 | try:
251 | for epoch in range(num_epochs):
252 | numpy.random.shuffle(dataset)
253 | costs = []
254 |
255 | for s, sequence in enumerate(dataset):
256 | for i in range(0, len(sequence), batch_size):
257 | cost = self.train_function(sequence[i:i + batch_size])
258 | costs.append(cost)
259 |
260 | print('Epoch %i/%i' % (epoch + 1, num_epochs))
261 | print(numpy.mean(costs))
262 | sys.stdout.flush()
263 |
264 | except KeyboardInterrupt:
265 | print('Interrupted by user.')
266 |
267 | def generate(self, filename, show=True):
268 | '''Generate a sample sequence, plot the resulting piano-roll and save
269 | it as a MIDI file.
270 |
271 | filename : string
272 | A MIDI file will be created at this location.
273 | show : boolean
274 | If True, a piano-roll of the generated sequence will be shown.'''
275 |
276 | piano_roll = self.generate_function()
277 | midiwrite(filename, piano_roll, self.r, self.dt)
278 | if show:
279 | extent = (0, self.dt * len(piano_roll)) + self.r
280 | pylab.figure()
281 | pylab.imshow(piano_roll.T, origin='lower', aspect='auto',
282 | interpolation='nearest', cmap=pylab.cm.gray_r,
283 | extent=extent)
284 | pylab.xlabel('time (s)')
285 | pylab.ylabel('MIDI note number')
286 | pylab.title('generated piano-roll')
287 |
288 |
289 | def test_rnnrbm(batch_size=100, num_epochs=200):
290 | model = RnnRbm()
291 | cwd = os.path.dirname(os.path.abspath(__file__))
292 | re = os.path.join(os.path.split(cwd)[0],
293 | 'data', 'Nottingham', 'train', '*.mid')
294 | model.train(glob.glob(re),
295 | batch_size=batch_size, num_epochs=num_epochs)
296 | return model
297 |
298 | if __name__ == '__main__':
299 | model = test_rnnrbm()
300 | model.generate('sample1.mid')
301 | model.generate('sample2.mid')
302 | pylab.show()
303 |
--------------------------------------------------------------------------------
/code/test.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import, print_function, division
2 | import sys
3 |
4 | import numpy
5 |
6 | import convolutional_mlp
7 | import dA
8 | import DBN
9 | import logistic_cg
10 | import logistic_sgd
11 | import mlp
12 | import rbm
13 | import rnnrbm
14 | import SdA
15 | import rnnslu
16 | import lstm
17 |
18 |
19 | def test_logistic_sgd():
20 | logistic_sgd.sgd_optimization_mnist(n_epochs=10)
21 |
22 |
23 | def test_logistic_cg():
24 | try:
25 | import scipy
26 | logistic_cg.cg_optimization_mnist(n_epochs=10)
27 | except ImportError:
28 | from nose.plugins.skip import SkipTest
29 | raise SkipTest(
30 | 'SciPy not available. Needed for the logistic_cg example.')
31 |
32 |
33 | def test_mlp():
34 | mlp.test_mlp(n_epochs=1)
35 |
36 |
37 | def test_convolutional_mlp():
38 | convolutional_mlp.evaluate_lenet5(n_epochs=1, nkerns=[5, 5])
39 |
40 |
41 | def test_dA():
42 | dA.test_dA(training_epochs=1, output_folder='tmp_dA_plots')
43 |
44 |
45 | def test_SdA():
46 | SdA.test_SdA(pretraining_epochs=1, training_epochs=1, batch_size=300)
47 |
48 |
49 | def test_dbn():
50 | DBN.test_DBN(pretraining_epochs=1, training_epochs=1, batch_size=300)
51 |
52 |
53 | def test_rbm():
54 | rbm.test_rbm(training_epochs=1, batch_size=300, n_chains=1, n_samples=1,
55 | n_hidden=20, output_folder='tmp_rbm_plots')
56 |
57 |
58 | def test_rnnrbm():
59 | rnnrbm.test_rnnrbm(num_epochs=1)
60 |
61 |
62 | def test_rnnslu():
63 | s = {'fold': 3,
64 | # 5 folds 0,1,2,3,4
65 | 'data': 'atis',
66 | 'lr': 0.0970806646812754,
67 | 'verbose': 1,
68 | 'decay': True,
69 | # decay on the learning rate if improvement stops
70 | 'win': 7,
71 | # number of words in the context window
72 | 'nhidden': 200,
73 | # number of hidden units
74 | 'seed': 345,
75 | 'emb_dimension': 50,
76 | # dimension of word embedding
77 | 'nepochs': 1, # CHANGED
78 | 'savemodel': False}
79 | rnnslu.main(s)
80 |
81 |
82 | def test_lstm():
83 | lstm.train_lstm(max_epochs=1, test_size=1000, saveto='')
84 |
85 |
86 | def speed():
87 | """
88 | This fonction modify the configuration theano and don't restore it!
89 | """
90 |
91 | algo = ['logistic_sgd', 'logistic_cg', 'mlp', 'convolutional_mlp',
92 | 'dA', 'SdA', 'DBN', 'rbm', 'rnnrbm', 'rnnslu', 'lstm']
93 | to_exec = [True] * len(algo)
94 | # to_exec = [False] * len(algo)
95 | # to_exec[-1] = True
96 | do_float64 = True
97 | do_float32 = True
98 | do_gpu = True
99 |
100 | algo_executed = [s for idx, s in enumerate(algo) if to_exec[idx]]
101 |
102 | def time_test(m, l, idx, f, **kwargs):
103 | if not to_exec[idx]:
104 | return
105 | print(algo[idx])
106 | ts = m.call_time
107 | try:
108 | f(**kwargs)
109 | except Exception as e:
110 | print('test', algo[idx], 'FAILED', e, file=sys.stderr)
111 | l.append(numpy.nan)
112 | return
113 | te = m.call_time
114 | l.append(te - ts)
115 |
116 | def do_tests():
117 | m = theano.compile.mode.get_default_mode()
118 | l = []
119 | time_test(m, l, 0, logistic_sgd.sgd_optimization_mnist, n_epochs=30)
120 | time_test(m, l, 1, logistic_cg.cg_optimization_mnist, n_epochs=30)
121 | time_test(m, l, 2, mlp.test_mlp, n_epochs=5)
122 | time_test(m, l, 3, convolutional_mlp.evaluate_lenet5, n_epochs=5,
123 | nkerns=[5, 5])
124 | time_test(m, l, 4, dA.test_dA, training_epochs=2,
125 | output_folder='tmp_dA_plots')
126 | time_test(m, l, 5, SdA.test_SdA, pretraining_epochs=1,
127 | training_epochs=2, batch_size=300)
128 | time_test(m, l, 6, DBN.test_DBN, pretraining_epochs=1,
129 | training_epochs=2, batch_size=300)
130 | time_test(m, l, 7, rbm.test_rbm, training_epochs=1, batch_size=300,
131 | n_chains=1, n_samples=1, output_folder='tmp_rbm_plots')
132 | time_test(m, l, 8, rnnrbm.test_rnnrbm, num_epochs=1)
133 | s = {'fold': 3,
134 | # 5 folds 0,1,2,3,4
135 | 'data': 'atis',
136 | 'lr': 0.0970806646812754,
137 | 'verbose': 1,
138 | 'decay': True,
139 | # decay on the learning rate if improvement stops
140 | 'win': 7,
141 | # number of words in the context window
142 | 'nhidden': 200,
143 | # number of hidden units
144 | 'seed': 345,
145 | 'emb_dimension': 50,
146 | # dimension of word embedding
147 | 'nepochs': 1,
148 | # 60 is recommended
149 | 'savemodel': False}
150 | time_test(m, l, 9, rnnslu.main, param=s)
151 | time_test(m, l, 10, lstm.train_lstm, max_epochs=1, test_size=1000,
152 | saveto='')
153 | return numpy.asarray(l)
154 |
155 | # Initialize test count and results dictionnary
156 | test_total = 0
157 | times_dic = {}
158 |
159 | #test in float64 in FAST_RUN mode on the cpu
160 | import theano
161 | if do_float64:
162 | theano.config.floatX = 'float64'
163 | theano.config.mode = 'FAST_RUN'
164 | float64_times = do_tests()
165 | times_dic['float64'] = float64_times
166 | test_total += numpy.size(float64_times)
167 | print(algo_executed, file=sys.stderr)
168 | print('float64 times', float64_times, file=sys.stderr)
169 |
170 | #test in float32 in FAST_RUN mode on the cpu
171 | theano.config.floatX = 'float32'
172 | if do_float32:
173 | float32_times = do_tests()
174 | times_dic['float32'] = float32_times
175 | test_total += numpy.size(float32_times)
176 | print(algo_executed, file=sys.stderr)
177 | print('float32 times', float32_times, file=sys.stderr)
178 |
179 | if do_float64:
180 | print('float64/float32', (
181 | float64_times / float32_times), file=sys.stderr)
182 | print(file=sys.stderr)
183 | print(('Duplicate the timing to have everything '
184 | 'in one place'), file=sys.stderr)
185 | print(algo_executed, file=sys.stderr)
186 | print('float64 times', float64_times, file=sys.stderr)
187 | print('float32 times', float32_times, file=sys.stderr)
188 |
189 | print('float64/float32', (
190 | float64_times / float32_times), file=sys.stderr)
191 |
192 | #test in float32 in FAST_RUN mode on the gpu
193 | import theano.gpuarray
194 | if do_gpu:
195 | theano.gpuarray.use('cuda')
196 | gpu_times = do_tests()
197 | times_dic['gpu'] = gpu_times
198 | test_total += numpy.size(gpu_times)
199 | print(algo_executed, file=sys.stderr)
200 | print('gpu times', gpu_times, file=sys.stderr)
201 |
202 | if do_float64:
203 | print('float64/gpu', float64_times / gpu_times, file=sys.stderr)
204 |
205 | if (do_float64 + do_float32 + do_gpu) > 1:
206 | print(file=sys.stderr)
207 | print(('Duplicate the timing to have everything '
208 | 'in one place'), file=sys.stderr)
209 | print(algo_executed, file=sys.stderr)
210 | if do_float64:
211 | print('float64 times', float64_times, file=sys.stderr)
212 | if do_float32:
213 | print('float32 times', float32_times, file=sys.stderr)
214 | if do_gpu:
215 | print('gpu times', gpu_times, file=sys.stderr)
216 |
217 | print()
218 | if do_float64 and do_float32:
219 | print('float64/float32', (
220 | float64_times / float32_times), file=sys.stderr)
221 | if do_float64 and do_gpu:
222 | print('float64/gpu', float64_times / gpu_times, file=sys.stderr)
223 | if do_float32 and do_gpu:
224 | print('float32/gpu', float32_times / gpu_times, file=sys.stderr)
225 |
226 | # Generate JUnit performance report
227 | for label, times in times_dic.items():
228 | with open('speedtests_{label}.xml'.format(label=label), 'w') as f:
229 | f.write('\n')
230 | f.write('\n'
231 | .format(label=label, ntests=test_total/len(times_dic)))
232 | for algo, time in zip(algo_executed, times):
233 | f.write(' '
234 | .format(label=label, algo=algo, time=time))
235 | f.write(' \n')
236 | f.write('\n')
237 |
238 | if do_gpu:
239 | assert not numpy.isnan(gpu_times).any()
240 |
--------------------------------------------------------------------------------
/code/unet/Unet_lasagne_recipes.py:
--------------------------------------------------------------------------------
1 | # start-snippet-1
2 | __author__ = 'Fabian Isensee'
3 | from collections import OrderedDict
4 | from lasagne.layers import (InputLayer, ConcatLayer, Pool2DLayer, ReshapeLayer, DimshuffleLayer, NonlinearityLayer,
5 | DropoutLayer, Deconv2DLayer, batch_norm)
6 | try:
7 | from lasagne.layers.dnn import Conv2DDNNLayer as ConvLayer
8 | except ImportError:
9 | from lasagne.layers import Conv2DLayer as ConvLayer
10 | import lasagne
11 | from lasagne.init import HeNormal
12 | # end-snippet-1
13 |
14 | # start-snippet-downsampling
15 | def build_UNet(n_input_channels=1, BATCH_SIZE=None, num_output_classes=2, pad='same', nonlinearity=lasagne.nonlinearities.elu, input_dim=(None, None), base_n_filters=64, do_dropout=False):
16 | net = OrderedDict()
17 | net['input'] = InputLayer((BATCH_SIZE, n_input_channels, input_dim[0], input_dim[1]))
18 |
19 | net['contr_1_1'] = batch_norm(ConvLayer(net['input'], base_n_filters, 3, nonlinearity=nonlinearity, pad=pad, W=HeNormal(gain="relu")))
20 | net['contr_1_2'] = batch_norm(ConvLayer(net['contr_1_1'], base_n_filters, 3, nonlinearity=nonlinearity, pad=pad, W=HeNormal(gain="relu")))
21 | net['pool1'] = Pool2DLayer(net['contr_1_2'], 2)
22 |
23 | net['contr_2_1'] = batch_norm(ConvLayer(net['pool1'], base_n_filters*2, 3, nonlinearity=nonlinearity, pad=pad, W=HeNormal(gain="relu")))
24 | net['contr_2_2'] = batch_norm(ConvLayer(net['contr_2_1'], base_n_filters*2, 3, nonlinearity=nonlinearity, pad=pad, W=HeNormal(gain="relu")))
25 | net['pool2'] = Pool2DLayer(net['contr_2_2'], 2)
26 |
27 | net['contr_3_1'] = batch_norm(ConvLayer(net['pool2'], base_n_filters*4, 3, nonlinearity=nonlinearity, pad=pad, W=HeNormal(gain="relu")))
28 | net['contr_3_2'] = batch_norm(ConvLayer(net['contr_3_1'], base_n_filters*4, 3, nonlinearity=nonlinearity, pad=pad, W=HeNormal(gain="relu")))
29 | net['pool3'] = Pool2DLayer(net['contr_3_2'], 2)
30 |
31 | net['contr_4_1'] = batch_norm(ConvLayer(net['pool3'], base_n_filters*8, 3, nonlinearity=nonlinearity, pad=pad, W=HeNormal(gain="relu")))
32 | net['contr_4_2'] = batch_norm(ConvLayer(net['contr_4_1'], base_n_filters*8, 3, nonlinearity=nonlinearity, pad=pad, W=HeNormal(gain="relu")))
33 | l = net['pool4'] = Pool2DLayer(net['contr_4_2'], 2)
34 | # end-snippet-downsampling
35 |
36 | # start-snippet-bottleneck
37 | # the paper does not really describe where and how dropout is added. Feel free to try more options
38 | if do_dropout:
39 | l = DropoutLayer(l, p=0.4)
40 |
41 | net['encode_1'] = batch_norm(ConvLayer(l, base_n_filters*16, 3, nonlinearity=nonlinearity, pad=pad, W=HeNormal(gain="relu")))
42 | net['encode_2'] = batch_norm(ConvLayer(net['encode_1'], base_n_filters*16, 3, nonlinearity=nonlinearity, pad=pad, W=HeNormal(gain="relu")))
43 | # end-snippet-bottleneck
44 |
45 | # start-snippet-upsampling
46 | net['upscale1'] = batch_norm(Deconv2DLayer(net['encode_2'], base_n_filters*16, 2, 2, crop="valid", nonlinearity=nonlinearity, W=HeNormal(gain="relu")))
47 | net['concat1'] = ConcatLayer([net['upscale1'], net['contr_4_2']], cropping=(None, None, "center", "center"))
48 | net['expand_1_1'] = batch_norm(ConvLayer(net['concat1'], base_n_filters*8, 3, nonlinearity=nonlinearity, pad=pad, W=HeNormal(gain="relu")))
49 | net['expand_1_2'] = batch_norm(ConvLayer(net['expand_1_1'], base_n_filters*8, 3, nonlinearity=nonlinearity, pad=pad, W=HeNormal(gain="relu")))
50 |
51 | net['upscale2'] = batch_norm(Deconv2DLayer(net['expand_1_2'], base_n_filters*8, 2, 2, crop="valid", nonlinearity=nonlinearity, W=HeNormal(gain="relu")))
52 | net['concat2'] = ConcatLayer([net['upscale2'], net['contr_3_2']], cropping=(None, None, "center", "center"))
53 | net['expand_2_1'] = batch_norm(ConvLayer(net['concat2'], base_n_filters*4, 3, nonlinearity=nonlinearity, pad=pad, W=HeNormal(gain="relu")))
54 | net['expand_2_2'] = batch_norm(ConvLayer(net['expand_2_1'], base_n_filters*4, 3, nonlinearity=nonlinearity, pad=pad, W=HeNormal(gain="relu")))
55 |
56 | net['upscale3'] = batch_norm(Deconv2DLayer(net['expand_2_2'], base_n_filters*4, 2, 2, crop="valid", nonlinearity=nonlinearity, W=HeNormal(gain="relu")))
57 | net['concat3'] = ConcatLayer([net['upscale3'], net['contr_2_2']], cropping=(None, None, "center", "center"))
58 | net['expand_3_1'] = batch_norm(ConvLayer(net['concat3'], base_n_filters*2, 3, nonlinearity=nonlinearity, pad=pad, W=HeNormal(gain="relu")))
59 | net['expand_3_2'] = batch_norm(ConvLayer(net['expand_3_1'], base_n_filters*2, 3, nonlinearity=nonlinearity, pad=pad, W=HeNormal(gain="relu")))
60 |
61 | net['upscale4'] = batch_norm(Deconv2DLayer(net['expand_3_2'], base_n_filters*2, 2, 2, crop="valid", nonlinearity=nonlinearity, W=HeNormal(gain="relu")))
62 | net['concat4'] = ConcatLayer([net['upscale4'], net['contr_1_2']], cropping=(None, None, "center", "center"))
63 | net['expand_4_1'] = batch_norm(ConvLayer(net['concat4'], base_n_filters, 3, nonlinearity=nonlinearity, pad=pad, W=HeNormal(gain="relu")))
64 | net['expand_4_2'] = batch_norm(ConvLayer(net['expand_4_1'], base_n_filters, 3, nonlinearity=nonlinearity, pad=pad, W=HeNormal(gain="relu")))
65 | # end-snippet-upsampling
66 |
67 | # start-snippet-output
68 | net['output_segmentation'] = ConvLayer(net['expand_4_2'], num_output_classes, 1, nonlinearity=None)
69 | net['dimshuffle'] = DimshuffleLayer(net['output_segmentation'], (1, 0, 2, 3))
70 | net['reshapeSeg'] = ReshapeLayer(net['dimshuffle'], (num_output_classes, -1))
71 | net['dimshuffle2'] = DimshuffleLayer(net['reshapeSeg'], (1, 0))
72 | net['output_flattened'] = NonlinearityLayer(net['dimshuffle2'], nonlinearity=lasagne.nonlinearities.softmax)
73 |
74 | return net
75 | # end-snippet-output
76 |
--------------------------------------------------------------------------------
/code/utils.py:
--------------------------------------------------------------------------------
1 | """ This file contains different utility functions that are not connected
2 | in anyway to the networks presented in the tutorials, but rather help in
3 | processing the outputs into a more understandable way.
4 |
5 | For example ``tile_raster_images`` helps in generating a easy to grasp
6 | image from a set of samples or weights.
7 | """
8 |
9 | import numpy
10 |
11 |
12 | def scale_to_unit_interval(ndar, eps=1e-8):
13 | """ Scales all values in the ndarray ndar to be between 0 and 1 """
14 | ndar = ndar.copy()
15 | ndar -= ndar.min()
16 | ndar *= 1.0 / (ndar.max() + eps)
17 | return ndar
18 |
19 |
20 | def tile_raster_images(X, img_shape, tile_shape, tile_spacing=(0, 0),
21 | scale_rows_to_unit_interval=True,
22 | output_pixel_vals=True):
23 | """
24 | Transform an array with one flattened image per row, into an array in
25 | which images are reshaped and layed out like tiles on a floor.
26 |
27 | This function is useful for visualizing datasets whose rows are images,
28 | and also columns of matrices for transforming those rows
29 | (such as the first layer of a neural net).
30 |
31 | :type X: a 2-D ndarray or a tuple of 4 channels, elements of which can
32 | be 2-D ndarrays or None;
33 | :param X: a 2-D array in which every row is a flattened image.
34 |
35 | :type img_shape: tuple; (height, width)
36 | :param img_shape: the original shape of each image
37 |
38 | :type tile_shape: tuple; (rows, cols)
39 | :param tile_shape: the number of images to tile (rows, cols)
40 |
41 | :param output_pixel_vals: if output should be pixel values (i.e. int8
42 | values) or floats
43 |
44 | :param scale_rows_to_unit_interval: if the values need to be scaled before
45 | being plotted to [0,1] or not
46 |
47 |
48 | :returns: array suitable for viewing as an image.
49 | (See:`Image.fromarray`.)
50 | :rtype: a 2-d array with same dtype as X.
51 |
52 | """
53 |
54 | assert len(img_shape) == 2
55 | assert len(tile_shape) == 2
56 | assert len(tile_spacing) == 2
57 |
58 | # The expression below can be re-written in a more C style as
59 | # follows :
60 | #
61 | # out_shape = [0,0]
62 | # out_shape[0] = (img_shape[0]+tile_spacing[0])*tile_shape[0] -
63 | # tile_spacing[0]
64 | # out_shape[1] = (img_shape[1]+tile_spacing[1])*tile_shape[1] -
65 | # tile_spacing[1]
66 | out_shape = [
67 | (ishp + tsp) * tshp - tsp
68 | for ishp, tshp, tsp in zip(img_shape, tile_shape, tile_spacing)
69 | ]
70 |
71 | if isinstance(X, tuple):
72 | assert len(X) == 4
73 | # Create an output numpy ndarray to store the image
74 | if output_pixel_vals:
75 | out_array = numpy.zeros((out_shape[0], out_shape[1], 4),
76 | dtype='uint8')
77 | else:
78 | out_array = numpy.zeros((out_shape[0], out_shape[1], 4),
79 | dtype=X.dtype)
80 |
81 | #colors default to 0, alpha defaults to 1 (opaque)
82 | if output_pixel_vals:
83 | channel_defaults = [0, 0, 0, 255]
84 | else:
85 | channel_defaults = [0., 0., 0., 1.]
86 |
87 | for i in range(4):
88 | if X[i] is None:
89 | # if channel is None, fill it with zeros of the correct
90 | # dtype
91 | dt = out_array.dtype
92 | if output_pixel_vals:
93 | dt = 'uint8'
94 | out_array[:, :, i] = numpy.zeros(
95 | out_shape,
96 | dtype=dt
97 | ) + channel_defaults[i]
98 | else:
99 | # use a recurrent call to compute the channel and store it
100 | # in the output
101 | out_array[:, :, i] = tile_raster_images(
102 | X[i], img_shape, tile_shape, tile_spacing,
103 | scale_rows_to_unit_interval, output_pixel_vals)
104 | return out_array
105 |
106 | else:
107 | # if we are dealing with only one channel
108 | H, W = img_shape
109 | Hs, Ws = tile_spacing
110 |
111 | # generate a matrix to store the output
112 | dt = X.dtype
113 | if output_pixel_vals:
114 | dt = 'uint8'
115 | out_array = numpy.zeros(out_shape, dtype=dt)
116 |
117 | for tile_row in range(tile_shape[0]):
118 | for tile_col in range(tile_shape[1]):
119 | if tile_row * tile_shape[1] + tile_col < X.shape[0]:
120 | this_x = X[tile_row * tile_shape[1] + tile_col]
121 | if scale_rows_to_unit_interval:
122 | # if we should scale values to be between 0 and 1
123 | # do this by calling the `scale_to_unit_interval`
124 | # function
125 | this_img = scale_to_unit_interval(
126 | this_x.reshape(img_shape))
127 | else:
128 | this_img = this_x.reshape(img_shape)
129 | # add the slice to the corresponding position in the
130 | # output array
131 | c = 1
132 | if output_pixel_vals:
133 | c = 255
134 | out_array[
135 | tile_row * (H + Hs): tile_row * (H + Hs) + H,
136 | tile_col * (W + Ws): tile_col * (W + Ws) + W
137 | ] = this_img * c
138 | return out_array
139 |
--------------------------------------------------------------------------------
/data/download.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | which wget >/dev/null 2>&1
4 | WGET=$?
5 | which curl >/dev/null 2>&1
6 | CURL=$?
7 | if [ "$WGET" -eq 0 ]; then
8 | DL_CMD="wget --no-verbose -c"
9 | elif [ "$CURL" -eq 0 ]; then
10 | DL_CMD="curl -C - -O"
11 | else
12 | echo "You need wget or curl installed to download"
13 | exit 1
14 | fi
15 |
16 | $DL_CMD http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz
17 | $DL_CMD http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist_py3k.pkl.gz
18 | $DL_CMD http://www.iro.umontreal.ca/~lisa/deep/data/imdb.pkl.gz && gunzip -f imdb.pkl.gz
19 | $DL_CMD http://www.iro.umontreal.ca/~lisa/deep/data/imdb.dict.pkl.gz && gunzip -f imdb.dict.pkl.gz
20 | $DL_CMD http://www.iro.umontreal.ca/~lisa/deep/data/Nottingham.zip && unzip -u Nottingham.zip
21 | $DL_CMD http://www.iro.umontreal.ca/~lisa/deep/midi.zip && unzip -u midi.zip -d ../code && echo "extracted Modified Python MIDI package (GPL)"
22 | $DL_CMD http://lisaweb.iro.umontreal.ca/transfert/lisa/users/mesnilgr/atis/atis.fold0.pkl.gz
23 | $DL_CMD http://lisaweb.iro.umontreal.ca/transfert/lisa/users/mesnilgr/atis/atis.fold1.pkl.gz
24 | $DL_CMD http://lisaweb.iro.umontreal.ca/transfert/lisa/users/mesnilgr/atis/atis.fold2.pkl.gz
25 | $DL_CMD http://lisaweb.iro.umontreal.ca/transfert/lisa/users/mesnilgr/atis/atis.fold3.pkl.gz
26 | $DL_CMD http://lisaweb.iro.umontreal.ca/transfert/lisa/users/mesnilgr/atis/atis.fold4.pkl.gz
27 |
--------------------------------------------------------------------------------
/data/training_colorpatches_16x16_demo.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lisa-lab/DeepLearningTutorials/11c465105026cf87573937fc2d35ab7543678698/data/training_colorpatches_16x16_demo.mat
--------------------------------------------------------------------------------
/doc/.templates/layout.html:
--------------------------------------------------------------------------------
1 | {% extends "!layout.html" %}
2 |
3 | {%- block extrahead %}
4 | {{ super() }}
5 |
10 | {% endblock %}
11 |
12 | {% block footer %}
13 | {{ super() }}
14 |
23 | {% endblock %}
24 |
25 |
--------------------------------------------------------------------------------
/doc/DBN.txt:
--------------------------------------------------------------------------------
1 | .. _DBN:
2 |
3 | Deep Belief Networks
4 | ====================
5 |
6 | .. note::
7 | This section assumes the reader has already read through :doc:`logreg`
8 | and :doc:`mlp` and :doc:`rbm`. Additionally it uses the following Theano
9 | functions and concepts: `T.tanh`_, `shared variables`_, `basic arithmetic
10 | ops`_, `T.grad`_, `Random numbers`_, `floatX`_. If you intend to run the
11 | code on GPU also read `GPU`_.
12 |
13 | .. _T.tanh: http://deeplearning.net/software/theano/tutorial/examples.html?highlight=tanh
14 |
15 | .. _shared variables: http://deeplearning.net/software/theano/tutorial/examples.html#using-shared-variables
16 |
17 | .. _basic arithmetic ops: http://deeplearning.net/software/theano/tutorial/adding.html#adding-two-scalars
18 |
19 | .. _T.grad: http://deeplearning.net/software/theano/tutorial/examples.html#computing-gradients
20 |
21 | .. _floatX: http://deeplearning.net/software/theano/library/config.html#config.floatX
22 |
23 | .. _GPU: http://deeplearning.net/software/theano/tutorial/using_gpu.html
24 |
25 | .. _Random numbers: http://deeplearning.net/software/theano/tutorial/examples.html#using-random-numbers
26 |
27 |
28 | .. note::
29 | The code for this section is available for download `here`_.
30 |
31 | .. _here: http://deeplearning.net/tutorial/code/DBN.py
32 |
33 |
34 | Deep Belief Networks
35 | ++++++++++++++++++++
36 |
37 | [Hinton06]_ showed that RBMs can be stacked and trained in a greedy manner
38 | to form so-called Deep Belief Networks (DBN). DBNs are graphical models which
39 | learn to extract a deep hierarchical representation of the training data.
40 | They model the joint distribution between observed vector :math:`x` and
41 | the :math:`\ell` hidden layers :math:`h^k` as follows:
42 |
43 | .. math::
44 | :label: dbn
45 |
46 | P(x, h^1, \ldots, h^{\ell}) = \left(\prod_{k=0}^{\ell-2} P(h^k|h^{k+1})\right) P(h^{\ell-1},h^{\ell})
47 |
48 | where :math:`x=h^0`, :math:`P(h^{k-1} | h^k)` is a conditional distribution
49 | for the visible units conditioned on the hidden units of the RBM at level
50 | :math:`k`, and :math:`P(h^{\ell-1}, h^{\ell})` is the visible-hidden joint
51 | distribution in the top-level RBM. This is illustrated in the figure below.
52 |
53 |
54 | .. figure:: images/DBN3.png
55 | :align: center
56 |
57 | The principle of greedy layer-wise unsupervised training can be applied to
58 | DBNs with RBMs as the building blocks for each layer [Hinton06]_, [Bengio07]_.
59 | The process is as follows:
60 |
61 | 1. Train the first layer as an RBM that models the raw input :math:`x =
62 | h^{(0)}` as its visible layer.
63 |
64 | 2. Use that first layer to obtain a representation of the input that will
65 | be used as data for the second layer. Two common solutions exist. This
66 | representation can be chosen as being the mean activations
67 | :math:`p(h^{(1)}=1|h^{(0)})` or samples of :math:`p(h^{(1)}|h^{(0)})`.
68 |
69 | 3. Train the second layer as an RBM, taking the transformed data (samples or
70 | mean activations) as training examples (for the visible layer of that RBM).
71 |
72 | 4. Iterate (2 and 3) for the desired number of layers, each time propagating
73 | upward either samples or mean values.
74 |
75 | 5. Fine-tune all the parameters of this deep architecture with respect to a
76 | proxy for the DBN log- likelihood, or with respect to a supervised training
77 | criterion (after adding extra learning machinery to convert the learned
78 | representation into supervised predictions, e.g. a linear classifier).
79 |
80 |
81 | In this tutorial, we focus on fine-tuning via supervised gradient descent.
82 | Specifically, we use a logistic regression classifier to classify the input
83 | :math:`x` based on the output of the last hidden layer :math:`h^{(l)}` of the
84 | DBN. Fine-tuning is then performed via supervised gradient descent of the
85 | negative log-likelihood cost function. Since the supervised gradient is only
86 | non-null for the weights and hidden layer biases of each layer (i.e. null for
87 | the visible biases of each RBM), this procedure is equivalent to initializing
88 | the parameters of a deep MLP with the weights and hidden layer biases obtained
89 | with the unsupervised training strategy.
90 |
91 | Justifying Greedy-Layer Wise Pre-Training
92 | +++++++++++++++++++++++++++++++++++++++++
93 |
94 | Why does such an algorithm work ? Taking as example a 2-layer DBN with hidden
95 | layers :math:`h^{(1)}` and :math:`h^{(2)}` (with respective weight parameters
96 | :math:`W^{(1)}` and :math:`W^{(2)}`), [Hinton06]_ established
97 | (see also Bengio09]_ for a detailed derivation) that :math:`\log
98 | p(x)` can be rewritten as,
99 |
100 | .. math::
101 | :label: dbn_bound
102 |
103 | \log p(x) = &KL(Q(h^{(1)}|x)||p(h^{(1)}|x)) + H_{Q(h^{(1)}|x)} + \\
104 | &\sum_h Q(h^{(1)}|x)(\log p(h^{(1)}) + \log p(x|h^{(1)})).
105 |
106 | :math:`KL(Q(h^{(1)}|x) || p(h^{(1)}|x))` represents the KL divergence between
107 | the posterior :math:`Q(h^{(1)}|x)` of the first RBM if it were standalone, and the
108 | probability :math:`p(h^{(1)}|x)` for the same layer but defined by the entire DBN
109 | (i.e. taking into account the prior :math:`p(h^{(1)},h^{(2)})` defined by the
110 | top-level RBM). :math:`H_{Q(h^{(1)}|x)}` is the entropy of the distribution
111 | :math:`Q(h^{(1)}|x)`.
112 |
113 | It can be shown that if we initialize both hidden layers such that
114 | :math:`W^{(2)}={W^{(1)}}^T`, :math:`Q(h^{(1)}|x)=p(h^{(1)}|x)` and the KL
115 | divergence term is null. If we learn the first level RBM and then keep its
116 | parameters :math:`W^{(1)}` fixed, optimizing Eq. :eq:`dbn_bound` with respect
117 | to :math:`W^{(2)}` can thus only increase the likelihood :math:`p(x)`.
118 |
119 | Also, notice that if we isolate the terms which depend only on :math:`W^{(2)}`, we
120 | get:
121 |
122 | .. math::
123 | \sum_h Q(h^{(1)}|x)p(h^{(1)})
124 |
125 | Optimizing this with respect to :math:`W^{(2)}` amounts to training a second-stage
126 | RBM, using the output of :math:`Q(h^{(1)}|x)` as the training distribution,
127 | when :math:`x` is sampled from the training distribution for the first RBM.
128 |
129 | Implementation
130 | ++++++++++++++
131 |
132 | To implement DBNs in Theano, we will use the class defined in the :doc:`rbm`
133 | tutorial. One can also observe that the code for the DBN is very similar with the one
134 | for SdA, because both involve the principle of unsupervised layer-wise
135 | pre-training followed by supervised fine-tuning as a deep MLP.
136 | The main difference is that we use the RBM class instead of the dA
137 | class.
138 |
139 | We start off by defining the DBN class which will store the layers of the
140 | MLP, along with their associated RBMs. Since we take the viewpoint of using
141 | the RBMs to initialize an MLP, the code will reflect this by seperating as
142 | much as possible the RBMs used to initialize the network and the MLP used for
143 | classification.
144 |
145 | .. literalinclude:: ../code/DBN.py
146 | :start-after: start-snippet-1
147 | :end-before: end-snippet-1
148 |
149 | ``self.sigmoid_layers`` will store the feed-forward graphs which together form
150 | the MLP, while ``self.rbm_layers`` will store the RBMs used to pretrain each
151 | layer of the MLP.
152 |
153 | Next step, we construct ``n_layers`` sigmoid layers (we use the
154 | ``HiddenLayer`` class introduced in :ref:`mlp`, with the only modification
155 | that we replaced the non-linearity from ``tanh`` to the logistic function
156 | :math:`s(x) = \frac{1}{1+e^{-x}}`) and ``n_layers`` RBMs, where ``n_layers``
157 | is the depth of our model. We link the sigmoid layers such that they form an
158 | MLP, and construct each RBM such that they share the weight matrix and the
159 | hidden bias with its corresponding sigmoid layer.
160 |
161 | .. literalinclude:: ../code/DBN.py
162 | :start-after: # MLP.
163 | :end-before: # We now need to add a logistic layer on top of the MLP
164 |
165 | All that is left is to stack one last logistic regression layer in order to
166 | form an MLP. We will use the ``LogisticRegression`` class introduced in
167 | :ref:`logreg`.
168 |
169 | .. literalinclude:: ../code/DBN.py
170 | :start-after: # We now need to add a logistic layer on top of the MLP
171 | :end-before: def pretraining_functions
172 |
173 | The class also provides a method which generates training functions for each
174 | of the RBMs. They are returned as a list, where element :math:`i` is a
175 | function which implements one step of training for the ``RBM`` at layer
176 | :math:`i`.
177 |
178 | .. literalinclude:: ../code/DBN.py
179 | :start-after: self.errors = self.logLayer.errors(self.y)
180 | :end-before: learning_rate = T.scalar('lr')
181 |
182 | In order to be able to change the learning rate during training, we associate a
183 | Theano variable to it that has a default value.
184 |
185 | .. literalinclude:: ../code/DBN.py
186 | :start-after: index = T.lscalar('index')
187 | :end-before: def build_finetune_functions
188 |
189 | Now any function ``pretrain_fns[i]`` takes as arguments ``index`` and
190 | optionally ``lr`` -- the learning rate. Note that the names of the parameters
191 | are the names given to the Theano variables (e.g. ``lr``) when they are
192 | constructed and not the name of the python variables (e.g. ``learning_rate``). Keep
193 | this in mind when working with Theano. Optionally, if you provide ``k`` (the
194 | number of Gibbs steps to perform in CD or PCD) this will also become an
195 | argument of your function.
196 |
197 | In the same fashion, the DBN class includes a method for building the
198 | functions required for finetuning ( a ``train_model``, a ``validate_model``
199 | and a ``test_model`` function).
200 |
201 | .. literalinclude:: ../code/DBN.py
202 | :pyobject: DBN.build_finetune_functions
203 |
204 | Note that the returned ``valid_score`` and ``test_score`` are not Theano
205 | functions, but rather Python functions. These loop over the entire
206 | validation set and the entire test set to produce a list of the losses
207 | obtained over these sets.
208 |
209 |
210 | Putting it all together
211 | +++++++++++++++++++++++
212 |
213 | The few lines of code below constructs the deep belief network:
214 |
215 | .. literalinclude:: ../code/DBN.py
216 | :start-after: # numpy random generator
217 | :end-before: start-snippet-2
218 |
219 | There are two stages in training this network: (1) a layer-wise pre-training and
220 | (2) a fine-tuning stage.
221 |
222 | For the pre-training stage, we loop over all the layers of the network. For
223 | each layer, we use the compiled theano function which determines the
224 | input to the ``i``-th level RBM and performs one step of CD-k within this RBM.
225 | This function is applied to the training set for a fixed number of epochs
226 | given by ``pretraining_epochs``.
227 |
228 | .. literalinclude:: ../code/DBN.py
229 | :start-after: start-snippet-2
230 | :end-before: end-snippet-2
231 |
232 | The fine-tuning loop is very similar to the one in the :ref:`mlp` tutorial,
233 | the only difference being that we now use the functions given by
234 | ``build_finetune_functions``.
235 |
236 | Running the Code
237 | ++++++++++++++++
238 |
239 | The user can run the code by calling:
240 |
241 | .. code-block:: bash
242 |
243 | python code/DBN.py
244 |
245 | With the default parameters, the code runs for 100 pre-training epochs with
246 | mini-batches of size 10. This corresponds to performing 500,000 unsupervised
247 | parameter updates. We use an unsupervised learning rate of 0.01, with a
248 | supervised learning rate of 0.1. The DBN itself consists of three
249 | hidden layers with 1000 units per layer. With early-stopping, this configuration
250 | achieved a minimal validation error of 1.27 with corresponding test
251 | error of 1.34 after 46 supervised epochs.
252 |
253 | On an Intel(R) Xeon(R) CPU X5560 running at 2.80GHz, using a multi-threaded MKL
254 | library (running on 4 cores), pretraining took 615 minutes with an average of
255 | 2.05 mins/(layer * epoch). Fine-tuning took only 101 minutes or approximately
256 | 2.20 mins/epoch.
257 |
258 | Hyper-parameters were selected by optimizing on the validation error. We tested
259 | unsupervised learning rates in :math:`\{10^{-1}, ..., 10^{-5}\}` and supervised
260 | learning rates in :math:`\{10^{-1}, ..., 10^{-4}\}`. We did not use any form of
261 | regularization besides early-stopping, nor did we optimize over the number of
262 | pretraining updates.
263 |
264 |
265 | Tips and Tricks
266 | +++++++++++++++
267 |
268 | One way to improve the running time of your code (given that you have
269 | sufficient memory available), is to compute the representation of the entire
270 | dataset at layer ``i`` in a single pass, once the weights of the
271 | :math:`i-1`-th layers have been fixed. Namely, start by training your first
272 | layer RBM. Once it is trained, you can compute the hidden units values for
273 | every example in the dataset and store this as a new dataset which is used to
274 | train the 2nd layer RBM. Once you trained the RBM for layer 2, you compute, in
275 | a similar fashion, the dataset for layer 3 and so on. This avoids calculating
276 | the intermediate (hidden layer) representations, ``pretraining_epochs`` times
277 | at the expense of increased memory usage.
278 |
--------------------------------------------------------------------------------
/doc/LICENSE.txt:
--------------------------------------------------------------------------------
1 | .. _license:
2 |
3 | LICENSE
4 | =======
5 |
6 | Copyright (c) 2008--2013, Theano Development Team
7 | All rights reserved.
8 |
9 | Redistribution and use in source and binary forms, with or without
10 | modification, are permitted provided that the following conditions are met:
11 |
12 | * Redistributions of source code must retain the above copyright
13 | notice, this list of conditions and the following disclaimer.
14 | * Redistributions in binary form must reproduce the above copyright
15 | notice, this list of conditions and the following disclaimer in the
16 | documentation and/or other materials provided with the distribution.
17 | * Neither the name of Theano nor the names of its contributors may be
18 | used to endorse or promote products derived from this software without
19 | specific prior written permission.
20 |
21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
22 | EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
23 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
25 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
26 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
28 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
30 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 |
--------------------------------------------------------------------------------
/doc/Makefile:
--------------------------------------------------------------------------------
1 | all:
2 | python scripts/docgen.py
3 |
--------------------------------------------------------------------------------
/doc/SdA.txt:
--------------------------------------------------------------------------------
1 | .. _SdA:
2 |
3 | Stacked Denoising Autoencoders (SdA)
4 | ====================================
5 |
6 | .. note::
7 | This section assumes you have already read through :doc:`logreg`
8 | and :doc:`mlp`. Additionally it uses the following Theano functions
9 | and concepts: `T.tanh`_, `shared variables`_, `basic arithmetic ops`_, `T.grad`_, `Random numbers`_, `floatX`_. If you intend to run the code on GPU also read `GPU`_.
10 |
11 | .. _T.tanh: http://deeplearning.net/software/theano/tutorial/examples.html?highlight=tanh
12 |
13 | .. _shared variables: http://deeplearning.net/software/theano/tutorial/examples.html#using-shared-variables
14 |
15 | .. _basic arithmetic ops: http://deeplearning.net/software/theano/tutorial/adding.html#adding-two-scalars
16 |
17 | .. _T.grad: http://deeplearning.net/software/theano/tutorial/examples.html#computing-gradients
18 |
19 | .. _floatX: http://deeplearning.net/software/theano/library/config.html#config.floatX
20 |
21 | .. _GPU: http://deeplearning.net/software/theano/tutorial/using_gpu.html
22 |
23 | .. _Random numbers: http://deeplearning.net/software/theano/tutorial/examples.html#using-random-numbers
24 |
25 |
26 | .. note::
27 | The code for this section is available for download `here`_.
28 |
29 | .. _here: http://deeplearning.net/tutorial/code/SdA.py
30 |
31 |
32 | The Stacked Denoising Autoencoder (SdA) is an extension of the stacked
33 | autoencoder [Bengio07]_ and it was introduced in [Vincent08]_.
34 |
35 | This tutorial builds on the previous tutorial :ref:`dA`.
36 | Especially if you do not have experience with autoencoders, we recommend reading it
37 | before going any further.
38 |
39 | .. _stacked_autoencoders:
40 |
41 | Stacked Autoencoders
42 | ++++++++++++++++++++
43 |
44 | Denoising autoencoders can be stacked to form a deep network by
45 | feeding the latent representation (output code)
46 | of the denoising autoencoder found on the layer
47 | below as input to the current layer. The **unsupervised pre-training** of such an
48 | architecture is done one layer at a time. Each layer is trained as
49 | a denoising autoencoder by minimizing the error in reconstructing its input
50 | (which is the output code of the previous layer).
51 | Once the first :math:`k` layers
52 | are trained, we can train the :math:`k+1`-th layer because we can now
53 | compute the code or latent representation from the layer below.
54 |
55 | Once all layers are pre-trained, the network goes through a second stage
56 | of training called **fine-tuning**. Here we consider **supervised fine-tuning**
57 | where we want to minimize prediction error on a supervised task.
58 | For this, we first add a logistic regression
59 | layer on top of the network (more precisely on the output code of the
60 | output layer). We then
61 | train the entire network as we would train a multilayer
62 | perceptron. At this point, we only consider the encoding parts of
63 | each auto-encoder.
64 | This stage is supervised, since now we use the target class during
65 | training. (See the :ref:`mlp` for details on the multilayer perceptron.)
66 |
67 | This can be easily implemented in Theano, using the class defined
68 | previously for a denoising autoencoder. We can see the stacked denoising
69 | autoencoder as having two facades: a list of
70 | autoencoders, and an MLP. During pre-training we use the first facade, i.e., we treat our model
71 | as a list of autoencoders, and train each autoencoder seperately. In the
72 | second stage of training, we use the second facade. These two facades are linked because:
73 |
74 | * the autoencoders and the sigmoid layers of the MLP share parameters, and
75 |
76 | * the latent representations computed by intermediate layers of the MLP are fed as input to the autoencoders.
77 |
78 | .. literalinclude:: ../code/SdA.py
79 | :start-after: start-snippet-1
80 | :end-before: end-snippet-1
81 |
82 | ``self.sigmoid_layers`` will store the sigmoid layers of the MLP facade, while
83 | ``self.dA_layers`` will store the denoising autoencoder associated with the layers of the MLP.
84 |
85 | Next, we construct ``n_layers`` sigmoid layers and ``n_layers`` denoising
86 | autoencoders, where ``n_layers`` is the depth of our model. We use the
87 | ``HiddenLayer`` class introduced in :ref:`mlp`, with one
88 | modification: we replace the ``tanh`` non-linearity with the
89 | logistic function :math:`s(x) = \frac{1}{1+e^{-x}}`).
90 | We link the sigmoid layers to form an MLP, and construct
91 | the denoising autoencoders such that each shares the weight matrix and the
92 | bias of its encoding part with its corresponding sigmoid layer.
93 |
94 | .. literalinclude:: ../code/SdA.py
95 | :start-after: start-snippet-2
96 | :end-before: end-snippet-2
97 |
98 | All we need now is to add a logistic layer on top of the sigmoid
99 | layers such that we have an MLP. We will
100 | use the ``LogisticRegression`` class introduced in :ref:`logreg`.
101 |
102 | .. literalinclude:: ../code/SdA.py
103 | :start-after: end-snippet-2
104 | :end-before: def pretraining_functions
105 |
106 | The ``SdA`` class also provides a method that generates training functions for
107 | the denoising autoencoders in its layers.
108 | They are returned as a list, where element :math:`i` is a function that
109 | implements one step of training the ``dA`` corresponding to layer
110 | :math:`i`.
111 |
112 | .. literalinclude:: ../code/SdA.py
113 | :start-after: self.errors = self.logLayer.errors(self.y)
114 | :end-before: corruption_level = T.scalar('corruption')
115 |
116 | To be able to change the corruption level or the learning rate
117 | during training, we associate Theano variables with them.
118 |
119 | .. literalinclude:: ../code/SdA.py
120 | :start-after: index = T.lscalar('index')
121 | :end-before: def build_finetune_functions
122 |
123 | Now any function ``pretrain_fns[i]`` takes as arguments ``index`` and
124 | optionally ``corruption``---the corruption level or ``lr``---the
125 | learning rate. Note that the names of the parameters are the names given
126 | to the Theano variables when they are constructed, not the names of the
127 | Python variables (``learning_rate`` or ``corruption_level``). Keep this
128 | in mind when working with Theano.
129 |
130 | In the same fashion we build a method for constructing the functions required
131 | during finetuning (``train_fn``, ``valid_score`` and
132 | ``test_score``).
133 |
134 | .. literalinclude:: ../code/SdA.py
135 | :pyobject: SdA.build_finetune_functions
136 |
137 | Note that ``valid_score`` and ``test_score`` are not Theano
138 | functions, but rather Python functions that loop over the entire
139 | validation set and the entire test set, respectively, producing a list of the losses
140 | over these sets.
141 |
142 | Putting it all together
143 | +++++++++++++++++++++++
144 |
145 | The few lines of code below construct the stacked denoising
146 | autoencoder:
147 |
148 | .. literalinclude:: ../code/SdA.py
149 | :start-after: start-snippet-3
150 | :end-before: end-snippet-3
151 |
152 | There are two stages of training for this network: layer-wise pre-training
153 | followed by fine-tuning.
154 |
155 | For the pre-training stage, we will loop over all the layers of the
156 | network. For each layer we will use the compiled Theano function that
157 | implements a SGD step towards optimizing the weights for reducing
158 | the reconstruction cost of that layer. This function will be applied
159 | to the training set for a fixed number of epochs given by
160 | ``pretraining_epochs``.
161 |
162 | .. literalinclude:: ../code/SdA.py
163 | :start-after: start-snippet-4
164 | :end-before: end-snippet-4
165 |
166 | The fine-tuning loop is very similar to the one in the :ref:`mlp`. The
167 | only difference is that it uses the functions given by
168 | ``build_finetune_functions``.
169 |
170 | Running the Code
171 | ++++++++++++++++
172 |
173 | The user can run the code by calling:
174 |
175 | .. code-block:: bash
176 |
177 | python code/SdA.py
178 |
179 | By default the code runs 15 pre-training epochs for each layer, with a batch
180 | size of 1. The corruption levels are 0.1 for the first layer, 0.2 for the second,
181 | and 0.3 for the third. The pretraining learning rate is 0.001 and
182 | the finetuning learning rate is 0.1. Pre-training takes 585.01 minutes, with
183 | an average of 13 minutes per epoch. Fine-tuning is completed after 36 epochs
184 | in 444.2 minutes, with an average of 12.34 minutes per epoch. The final
185 | validation score is 1.39% with a testing score of 1.3%.
186 | These results were obtained on a machine with an Intel
187 | Xeon E5430 @ 2.66GHz CPU, with a single-threaded GotoBLAS.
188 |
189 |
190 | Tips and Tricks
191 | +++++++++++++++
192 |
193 | One way to improve the running time of your code (assuming you have
194 | sufficient memory available), is to compute how the network, up to layer
195 | :math:`k-1`, transforms your data. Namely, you start by training your first
196 | layer dA. Once it is trained, you can compute the hidden units values for
197 | every datapoint in your dataset and store this as a new dataset that you will
198 | use to train the dA corresponding to layer 2. Once you have trained the dA for
199 | layer 2, you compute, in a similar fashion, the dataset for layer 3 and so on.
200 | You can see now, that at this point, the dAs are trained individually, and
201 | they just provide (one to the other) a non-linear transformation of the input.
202 | Once all dAs are trained, you can start fine-tuning the model.
203 |
--------------------------------------------------------------------------------
/doc/cnn_1D_segm.txt:
--------------------------------------------------------------------------------
1 | .. _cnn_1D_segm:
2 |
3 | Network for 1D segmentation
4 | ***************************
5 |
6 | .. note::
7 | This section assumes the reader has already read through :doc:`lenet` for
8 | convolutional networks motivation and :doc:`fcn_2D_segm` for segmentation
9 | standard network.
10 |
11 |
12 | Summary
13 | +++++++
14 |
15 | The fundamental notions behind segmentation have been explained in :doc:`fcn_2D_segm`.
16 | A particularity here is that some of these notions will be applied to 1D
17 | segmentation. However, almost every Lasagne layer used for 2D segmentation have
18 | their respective 1D layer, so the implementation would look alike if the same
19 | model was used.
20 |
21 |
22 |
23 |
24 | Data
25 | ++++
26 |
27 | The `BigBrain `__ dataset is a 3D ultra-high resolution model of the brain reconstructed from 2D sections.
28 | We are interested in the outer part of the brain, the cortex.
29 | More precisely, we are interested in segmenting the 6 different layers of the cortex in 3D.
30 | Creating an expertly labelled training dataset with each 2D section (shown in figure 1) is unfeasible. Instead of giving as input a 2D image of one section of the brain, we give as input 1D vectors with information from across the cortex, extracted from smaller portions of manually labelled cortex
31 | as shown in Figure 2. The final dataset is not available yet, a preliminary version
32 | is available `here `_ .
33 |
34 | .. figure:: images/big_brain_section.png
35 | :align: center
36 | :scale: 100%
37 |
38 | **Figure 1** : Big Brain section
39 |
40 | .. figure:: images/ray.png
41 | :align: center
42 | :scale: 50%
43 |
44 | **Figure 2** : Ray extraction from segmentated cortex
45 |
46 | We will call *rays* the vectors of size 200 going from outside the brain and
47 | through the cortex. As the images were stained for cell bodies, the intensity of each pixel of these rays represents the cell densities
48 | and sizes contained in the cortical layer to which the pixel belongs. Since the 6 cortical layers
49 | have different properties (cell density and size), the intensity profile can be used to
50 | detect boundaries of the cortical layers.
51 |
52 | Each ray has 2 input channels, one representing the smoothed intensity and the other,
53 | the raw version, as shown in Figure 3. The next figure, Figure 4, shows the
54 | ground truth segmentation map, where each different color represent
55 | a different label. The purple color indicate that these pixels are
56 | outside the cortex, while the 6 other colors represent the 6 cortical layers.
57 | For example, the first layer of the cortex is between pixels ~ 35-55. The cortex
58 | for this sample starts at pixel ~35 and ends at pixel ~170.
59 |
60 |
61 | .. figure:: images/raw_smooth.png
62 | :align: center
63 | :scale: 100%
64 |
65 | **Figure 3** : Raw and smooth intensity profiles (input channels)
66 |
67 |
68 | .. figure:: images/labels.png
69 | :align: center
70 | :scale: 100%
71 |
72 | **Figure 4** : Cortical layers labels for this ray
73 |
74 |
75 |
76 | Model
77 | +++++
78 |
79 | We first started our experiment with more complex models, but we finally found that
80 | the simpler model present here had enough capacity to learn how and where the layer boundaries are.
81 | This model (depicted in Figure 5) is composed of 8 identical blocks, followed by a
82 | last convolution and a softmax non linearity.
83 |
84 | Each block is composed of :
85 |
86 | * Batch Normalization layer
87 | * Rectify nonlinearity layer
88 | * Convolution layer, with kernel size 25, with enough padding such that the convolution does not change the feature resolution, and 64 features maps
89 |
90 | The last convolution has kernel size 1 and *number of classes* feature maps.
91 | The softmax is then
92 | used to detect which of these classes is more likely for each pixel.
93 | Note that any input image size could be used here, since the model is built from
94 | locally connected layers exclusively.
95 |
96 | .. figure:: images/cortical_layers_net.png
97 | :align: center
98 | :scale: 100%
99 |
100 | **Figure 5** : Model
101 |
102 | Note that we didn't use any pooling, because it was not needed. However, if
103 | pooling layers were used, an upsampling path would have been necessary to recover full
104 | spatial size of the input ray. Also, since each pixel of the output prediction has
105 | a receptive field that includes all of the input pixel, the network is able to extract
106 | enough contextual information.
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 | Results
115 | +++++++
116 |
117 | The model outputs a vector of the same size as the input (here, 200).
118 | There are 7 class labels, including the 6 cortical layers and the 'not in the brain yet'
119 | label. You can see in Figure 6 below the output of the model for some ray. The top
120 | of the plot represent the ground truth segmentation, while the bottoms represent
121 | the predicted segmentation. As you can see, there is only a small number of pixels
122 | not correctly segmented.
123 |
124 | .. figure:: images/cortical_ray_result.png
125 | :align: center
126 | :scale: 100%
127 |
128 | **Figure 6** : Ground truth (top) vs prediction (bottom) for 1 ray
129 |
130 | However, since the purpose was to do 3D segmentation by using 1D segmentation
131 | of the rays, we needed to put back the rays on the brain section. After interpolation
132 | between those rays and smoothing, we get the results shown in Figure 7. The colored
133 | lines are from 3D meshes based on the prediction from the model, intersected with a 2D section, and the grayscale stripes correspond to the
134 | ground truth. As you can see, it achieves really good results on the small manually labelled
135 | sample, which extend well to previously unsegmented cortex.
136 |
137 |
138 |
139 | .. figure:: images/cortical_valid1.png
140 | :align: center
141 | :scale: 40%
142 |
143 | **Figure 7** : Results put on the brain section
144 |
145 |
146 | Code
147 | ++++
148 |
149 | .. warning::
150 |
151 | * Current code works with Python 2 only.
152 | * If you use Theano with GPU backend (e.g. with Theano flag ``device=cuda``),
153 | you will need at least 12GB free in your video RAM.
154 |
155 | The FCN implementation can be found in the following file:
156 |
157 | * `fcn1D.py <../code/cnn_1D_segm/fcn1D.py>`_ : Main script. Defines the model.
158 | * `train_fcn1D.py <../code/cnn_1D_segm/train_fcn1D.py>`_ : Training loop
159 |
160 | Change the ``dataset_loaders/config.ini`` file and add the right path for the dataset:
161 |
162 | .. code-block:: cfg
163 |
164 | [cortical_layers]
165 | shared_path = /path/to/DeepLearningTutorials/data/cortical_layers/
166 |
167 | Folder indicated at section ``[cortical_layers]`` should contain a sub-folder named ``6layers_segmentation``
168 | (you can obtain it by just renaming the folder extracted from ``TrainingData190417.tar.gz``) which should
169 | itself contain files:
170 |
171 | * ``training_cls_indices.txt``
172 | * ``training_cls.txt``
173 | * ``training_geo.txt``
174 | * ``training_raw.txt``
175 | * ``training_regions.txt``
176 |
177 |
178 | First define a *bn+relu+conv* block that returns the name of the last layer of
179 | the block. Since the implementation uses a dictionary variable *net* that keeps
180 | the layer's name as key and the actual layer object as variable, the name of the
181 | last layer is sufficient
182 |
183 | .. literalinclude:: ../code/cnn_1D_segm/fcn1D.py
184 | :start-after: start-snippet-bn_relu_conv
185 | :end-before: end-snippet-bn_relu_conv
186 |
187 | The model is composed of 8 of these blocks, as seen below. Note that the
188 | model implementation is very tweakable, since the depth (number of blocks), the
189 | type of block, the filter size are the number of filters can all be changed by user.
190 | However, the hyperparameters used here were:
191 |
192 | * filter_size = 25
193 | * n_filters = 64
194 | * depth = 8
195 | * block = bn_relu_conv
196 |
197 | .. literalinclude:: ../code/cnn_1D_segm/fcn1D.py
198 | :start-after: start-snippet-convolutions
199 | :end-before: end-snippet-convolutions
200 |
201 | Finally, the last convolution and softmax are achieved by :
202 |
203 | .. literalinclude:: ../code/cnn_1D_segm/fcn1D.py
204 | :start-after: start-snippet-output
205 | :end-before: end-snippet-output
206 |
207 | Running ``train_fcn1D.py`` on a Titan X lasted for around 4 hours, ending with the following:
208 |
209 | .. code-block:: text
210 |
211 | THEANO_FLAGS=device=cuda0,floatX=float32,dnn.conv.algo_fwd=time_once,dnn.conv.algo_bwd_data=time_once,dnn.conv.algo_bwd_filter=time_once,gpuarray.preallocate=1 python train_fcn1D.py
212 | [...]
213 | EPOCH 412: Avg cost train 0.065615, acc train 0.993349, cost val 0.041758, acc val 0.984398, jacc val per class ['0: 0.981183', '1: 0.953546', '2: 0.945765', '3: 0.980471', '4: 0.914617', '5: 0.968710', '6: 0.971049'], jacc val 0.959335 took 31.422823 s
214 | saving last model
215 |
216 |
217 | References
218 | ++++++++++
219 |
220 | If you use this tutorial, please cite the following papers:
221 |
222 | * References for BigBrain:
223 |
224 | * `[pdf] `__ Lewis, L.B. et al.: BigBrain: Initial Tissue Classification and Surface Extraction, HBM 2014.
225 | * `[website] `__ Amunts, K. et al.: "BigBrain: An Ultrahigh-Resolution 3D Human Brain Model", Science (2013) 340 no. 6139 1472-1475, June 2013.
226 | * `[pdf] `__ Bludau, S. et al.: Two new Cytoarchitectonic Areas of the Human Frontal Pole, OHBM 2012.
227 | * `[pdf] `__ Lepage, C. et al.: Automatic Repair of Acquisition Defects in Reconstruction of Histology Sections of a Human Brain, HBM 2010.
228 |
229 | * `[GitHub Repo] `__ Francesco Visin, Adriana Romero - Dataset loaders: a python library to load and preprocess datasets. 2017
230 |
231 | Papers related to Theano/Lasagne:
232 |
233 | * `[pdf] `_ Theano Development Team. Theano: A Python framework for fast computation of mathematical expresssions. May 2016.
234 | * `[website] `__ Sander Dieleman, Jan Schluter, Colin Raffel, Eben Olson, Søren Kaae Sønderby, Daniel Nouri, Daniel Maturana, Martin Thoma, Eric Battenberg, Jack Kelly, Jeffrey De Fauw, Michael Heilman, diogo149, Brian McFee, Hendrik Weideman, takacsg84, peterderivaz, Jon, instagibbs, Dr. Kashif Rasul, CongLiu, Britefury, and Jonas Degrave, “Lasagne: First release.” (2015).
235 |
236 |
237 | Acknowledgements
238 | ================
239 |
240 | This work was done in collaboration with Konrad Wagstyl, PhD student, University of Cambridge.
241 | We would like to thank Professor Alan Evans' `[MCIN lab] `_ and Professor Katrin Amunts' `[INM-1 lab] `_.
242 |
243 | Thank you!
244 |
--------------------------------------------------------------------------------
/doc/conf.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | #
3 | # theano documentation build configuration file, created by
4 | # sphinx-quickstart on Tue Oct 7 16:34:06 2008.
5 | #
6 | # This file is execfile()d with the current directory set to its containing dir.
7 | #
8 | # The contents of this file are pickled, so don't put values in the namespace
9 | # that aren't pickleable (module imports are okay, they're removed automatically).
10 | #
11 | # All configuration values have a default value; values that are commented out
12 | # serve to show the default value.
13 | import sys, os
14 |
15 | # If your extensions are in another directory, add it here. If the directory
16 | # is relative to the documentation root, use os.path.abspath to make it
17 | # absolute, like shown here.
18 | #sys.path.append(os.path.abspath('some/directory'))
19 |
20 | # General configuration
21 | # ---------------------
22 |
23 | # Add any Sphinx extension module names here, as strings. They can be extensions
24 | # coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
25 | extensions = ['sphinx.ext.autodoc', 'sphinx.ext.todo']
26 |
27 | try:
28 | from sphinx.ext import imgmath
29 | extensions.append('sphinx.ext.imgmath')
30 | except ImportError:
31 | try:
32 | from sphinx.ext import pngmath
33 | extensions.append('sphinx.ext.pngmath')
34 | except ImportError:
35 | pass
36 |
37 | # Add any paths that contain templates here, relative to this directory.
38 | templates_path = ['.templates']
39 |
40 | # The suffix of source filenames.
41 | source_suffix = '.txt'
42 |
43 | # The master toctree document.
44 | master_doc = 'contents'
45 |
46 | # General substitutions.
47 | project = 'DeepLearning'
48 | copyright = '2008--2010, LISA lab'
49 |
50 | # The default replacements for |version| and |release|, also used in various
51 | # other places throughout the built documents.
52 | #
53 | # The short X.Y version.
54 | version = '0.1'
55 | # The full version, including alpha/beta/rc tags.
56 | release = '0.1'
57 |
58 | # There are two options for replacing |today|: either, you set today to some
59 | # non-false value, then it is used:
60 | #today = ''
61 | # Else, today_fmt is used as the format for a strftime call.
62 | today_fmt = '%B %d, %Y'
63 |
64 | # List of documents that shouldn't be included in the build.
65 | #unused_docs = []
66 |
67 | # List of directories, relative to source directories, that shouldn't be searched
68 | # for source files.
69 | exclude_dirs = ['scripts']
70 |
71 | # The reST default role (used for this markup: `text`) to use for all documents.
72 | #default_role = None
73 |
74 | # If true, '()' will be appended to :func: etc. cross-reference text.
75 | #add_function_parentheses = True
76 |
77 | # If true, the current module name will be prepended to all description
78 | # unit titles (such as .. function::).
79 | #add_module_names = True
80 |
81 | # If true, sectionauthor and moduleauthor directives will be shown in the
82 | # output. They are ignored by default.
83 | #show_authors = False
84 |
85 | # The name of the Pygments (syntax highlighting) style to use.
86 | pygments_style = 'sphinx'
87 |
88 |
89 | # Options for HTML output
90 | # -----------------------
91 |
92 | # The style sheet to use for HTML and HTML Help pages. A file of that name
93 | # must exist either in Sphinx' static/ path, or in one of the custom paths
94 | # given in html_static_path.
95 | #html_style = 'default.css'
96 | html_theme = 'sphinxdoc'
97 |
98 | # The name for this set of Sphinx documents. If None, it defaults to
99 | # " v documentation".
100 | #html_title = None
101 |
102 | # A shorter title for the navigation bar. Default is the same as html_title.
103 | #html_short_title = None
104 |
105 | # The name of an image file (within the static path) to place at the top of
106 | # the sidebar.
107 | #html_logo = None
108 |
109 | # The name of an image file (within the static path) to use as favicon of the
110 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
111 | # pixels large.
112 | #html_favicon = None
113 |
114 | # Add any paths that contain custom static files (such as style sheets) here,
115 | # relative to this directory. They are copied after the builtin static files,
116 | # so a file named "default.css" will overwrite the builtin "default.css".
117 | #html_static_path = ['.static', 'images']
118 | html_static_path = ['images']
119 |
120 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
121 | # using the given strftime format.
122 | html_last_updated_fmt = '%b %d, %Y'
123 |
124 | # If true, SmartyPants will be used to convert quotes and dashes to
125 | # typographically correct entities.
126 | html_use_smartypants = True
127 |
128 | # Custom sidebar templates, maps document names to template names.
129 | #html_sidebars = {}
130 |
131 | # Additional templates that should be rendered to pages, maps page names to
132 | # template names.
133 | #html_additional_pages = {}
134 |
135 | # If false, no module index is generated.
136 | html_use_modindex = True
137 |
138 | # If false, no index is generated.
139 | html_use_index = True
140 |
141 | # If true, the index is split into individual pages for each letter.
142 | #html_split_index = False
143 |
144 | # If true, the reST sources are included in the HTML build as _sources/.
145 | #html_copy_source = True
146 |
147 | # If true, an OpenSearch description file will be output, and all pages will
148 | # contain a tag referring to it. The value of this option must be the
149 | # base URL from which the finished HTML is served.
150 | #html_use_opensearch = ''
151 |
152 | # If nonempty, this is the file name suffix for HTML files (e.g. ".xhtml").
153 | #html_file_suffix = ''
154 |
155 | # Output file base name for HTML help builder.
156 | htmlhelp_basename = 'deeplearningdoc'
157 |
158 |
159 | # Options for LaTeX output
160 | # ------------------------
161 | latex_elements = {
162 | # The paper size ('letter' or 'a4').
163 | #latex_paper_size = 'letter',
164 |
165 | # The font size ('10pt', '11pt' or '12pt').
166 | 'pointsize': '11pt',
167 |
168 | # Additional stuff for the LaTeX preamble.
169 | #latex_preamble = '',
170 | }
171 |
172 | # Grouping the document tree into LaTeX files. List of tuples
173 | # (source start file, target name, title, author, document class [howto/manual]).
174 | latex_documents = [
175 | ('contents', 'deeplearning.tex', 'Deep Learning Tutorial',
176 | 'LISA lab, University of Montreal', 'manual'),
177 | ]
178 |
179 | # The name of an image file (relative to this directory) to place at the top of
180 | # the title page.
181 | latex_logo = None
182 |
183 | # For "manual" documents, if this is true, then toplevel headings are parts,
184 | # not chapters.
185 | #latex_use_parts = False
186 |
187 | # Additional stuff for the LaTeX preamble.
188 | #latex_preamble = ''
189 |
190 | # Documents to append as an appendix to all manuals.
191 | #latex_appendices = []
192 |
193 | # If false, no module index is generated.
194 | #latex_use_modindex = True
195 |
196 | default_role = 'math'
197 | pngmath_divpng_args = ['-gamma 1.5','-D 110']
198 | pngmath_latex_preamble = '\\usepackage{amsmath}\n'+\
199 | '\\usepackage{amsfonts}\n'+\
200 | '\\usepackage{amssymb}\n'+\
201 | '\\def\\E{\\mathbf{E}}\n'+\
202 | '\\def\\F{\\mathbf{F}}\n'+\
203 | '\\def\\x{\\mathbf{x}}\n'+\
204 | '\\def\\h{\\mathbf{h}}\n'+\
205 | '\\def\\v{\\mathbf{v}}\n'+\
206 | '\\def\\nv{\\mathbf{v^{{\bf -}}}}\n'+\
207 | '\\def\\nh{\\mathbf{h^{{\bf -}}}}\n'+\
208 | '\\def\\s{\\mathbf{s}}\n'+\
209 | '\\def\\b{\\mathbf{b}}\n'+\
210 | '\\def\\c{\\mathbf{c}}\n'+\
211 | '\\def\\W{\\mathbf{W}}\n'+\
212 | '\\def\\C{\\mathbf{C}}\n'+\
213 | '\\def\\P{\\mathbf{P}}\n'+\
214 | '\\def\\T{{\\bf \\mathcal T}}\n'+\
215 | '\\def\\B{{\\bf \\mathcal B}}\n'
216 |
--------------------------------------------------------------------------------
/doc/contents.txt:
--------------------------------------------------------------------------------
1 |
2 | .. _contents:
3 |
4 | ========
5 | Contents
6 | ========
7 |
8 | .. toctree::
9 | :maxdepth: 2
10 |
11 | LICENSE
12 | index
13 | gettingstarted
14 | logreg
15 | mlp
16 | lenet
17 | dA
18 | SdA
19 | rbm
20 | DBN
21 | hmc
22 | rnnslu
23 | lstm
24 | rnnrbm
25 | utilities
26 | references
27 | fcn_2D_segm
28 | cnn_1D_segm
29 | unet
30 |
--------------------------------------------------------------------------------
/doc/fcn_2D_segm.txt:
--------------------------------------------------------------------------------
1 | .. _fcn_2D_segm:
2 |
3 | Fully Convolutional Networks (FCN) for 2D segmentation
4 | ******************************************************
5 |
6 | .. note::
7 | This section assumes the reader has already read through :doc:`lenet` for
8 | convolutional networks motivation.
9 |
10 | Summary
11 | +++++++
12 |
13 | Segmentation task is different from classification task because it requires predicting
14 | a class for each pixel of the input image, instead of only 1 class for the whole input.
15 | Classification needs to understand *what* is in the input (namely, the context). However,
16 | in order to predict what is in the input for each pixel, segmentation needs to recover
17 | not only *what* is in the input, but also *where*.
18 |
19 | .. figure:: images/cat_segmentation.png
20 | :align: center
21 | :scale: 35%
22 |
23 | **Figure 1** : Segmentation network (from FCN paper)
24 |
25 | **Fully Convolutional Networks** (FCNs) owe their name to their architecture, which is
26 | built only from locally connected layers, such as convolution, pooling and upsampling.
27 | Note that no dense layer is used in this kind of architecture. This reduces the number
28 | of parameters and computation time. Also, the network can work regardless of the original
29 | image size, without requiring any fixed number of units at any stage, givent that all
30 | connections are local. To obtain a segmentation map (output), segmentation
31 | networks usually have 2 parts :
32 |
33 | * Downsampling path : capture semantic/contextual information
34 | * Upsampling path : recover spatial information
35 |
36 | The **downsampling path** is used to extract and interpret the context (*what*), while the
37 | **upsampling path** is used to enable precise localization (*where*). Furthermore, to fully
38 | recover the fine-grained spatial information lost in the pooling or downsampling layers, we
39 | often use skip connections.
40 |
41 | A skip connection is a connection that bypasses at least one layer. Here, it
42 | is often used to transfer local information by concatenating or summing feature
43 | maps from the downsampling path with feature maps from the upsampling path. Merging features
44 | from various resolution levels helps combining context information with spatial information.
45 |
46 |
47 | Data
48 | ++++
49 |
50 | The polyps dataset can be found `here `__.
51 | There is a total of 912 images taken from 36 patients.
52 |
53 | * Training set : 20 patients and 547 frames
54 | * Validation set : 8 patients and 183 frames
55 | * Test set : 8 patients and 182 frames
56 |
57 | Each pixel is labelled between 2 classes : polype or background.
58 | The size of the images vary. We use data augmentation for training, as specified
59 | in the default arguments in the code given below. Note that
60 | the data augmentation is necessary for training with batch size greater than 1
61 | in order to have same image size with a random cropping. If no random cropping,
62 | the batch size for the training set must be set to 1, like for validation and test
63 | sets (where there is no data augmentation).
64 |
65 |
66 | In each of the training, validation and test directory, the input images are in the
67 | ``/images`` directory and the polyps masks (segmentation maps) are in ``/masks2``. The
68 | segmentation maps in the ``/masks2`` directory indicate the presence or absence
69 | of polyps for each pixel. The other subdirectories (``/masks3`` and ``/masks4``) are,
70 | respectively, for a segmentation task with 3 and 4 classes, but will not be
71 | presented here.
72 |
73 |
74 | Model
75 | +++++
76 |
77 | There are variants of the FCN architecture, which mainly differ in the spatial precision of
78 | their output. For example, the figures below show the FCN-32, FCN-16 and FCN-8 variants. In the
79 | figures, convolutional layers are represented as vertical lines between pooling layers, which
80 | explicitely show the relative size of the feature maps.
81 |
82 | .. figure:: images/fcn.png
83 | :align: center
84 | :scale: 50%
85 |
86 | **Figure 2** : FCN architecture (from FCN paper)
87 |
88 | **Difference between the 3 FCN variants**
89 |
90 | As shown below, these 3 different architectures differ in the stride of the last convolution,
91 | and the skip connections used to obtain the output segmentation maps. We will use the term
92 | *downsampling path* to refer to the network up to *conv7* layer and we will use the term
93 | *upsampling path* to refer to the network composed of all layers after *conv7*. It is worth
94 | noting that the 3 FCN architectures share the same downsampling path, but differ in their
95 | respective upsampling paths.
96 |
97 |
98 | 1. **FCN-32** : Directly produces the segmentation map from *conv7*, by using a
99 | transposed convolution layer with stride 32.
100 |
101 | 2. **FCN-16** : Sums the 2x upsampled prediction from *conv7*
102 | (using a transposed convolution with stride 2) with *pool4* and then
103 | produces the segmentation map, by using a transposed convolution layer with stride 16
104 | on top of that.
105 |
106 | 3. **FCN-8** : Sums the 2x upsampled *conv7* (with a stride 2 transposed convolution)
107 | with *pool4*, upsamples them with a stride 2 transposed convolution and sums them
108 | with *pool3*, and applies a transposed convolution layer with stride 8 on the resulting
109 | feature maps to obtain the segmentation map.
110 |
111 |
112 | .. figure:: images/fcn_schema.png
113 | :align: center
114 | :scale: 65%
115 |
116 | **Figure 3** : FCN architecture (from FCN paper)
117 |
118 | As explained above, the upsampling paths of the FCN variants are different, since they
119 | use different skip connection layers and strides for the last convolution, yielding
120 | different segmentations, as shown in Figure 4. Combining layers that have different
121 | precision helps retrieving fine-grained spatial information, as well as coarse
122 | contextual information.
123 |
124 | .. figure:: images/fcn32_16_8.png
125 | :align: center
126 | :scale: 30%
127 |
128 | **Figure 4** : FCN results (from FCN paper)
129 |
130 | Note that the FCN-8 architecture was used on the polyps dataset below,
131 | since it produces more precise segmentation maps.
132 |
133 |
134 | Metrics
135 | =======
136 |
137 | **Per pixel accuracy**
138 |
139 | This metric is self explanatory, since it outputs the class prediction accuracy
140 | per pixel.
141 |
142 | .. math::
143 | :label: jaccard
144 |
145 | acc(P, GT) = \frac{|\text{pixels correctly predicted}|}{|\text{total nb of pixels}|}
146 |
147 |
148 | **Jaccard (Intersection over Union)**
149 |
150 | This evaluation metric is often used for image segmentation, since it is more structured.
151 | The jaccard is a per class evaluation metric, which computes the number of pixels in
152 | the intersection between the
153 | predicted and ground truth segmentation maps for a given class, divided by the
154 | number of pixels in the union between those two segmentation maps,
155 | also for that given class.
156 |
157 | .. math::
158 | :label: jaccard_equation
159 |
160 | jacc(P(class), GT(class)) = \frac{|P(class)\cap GT(class)|}{|P(class)\cup GT(class)|}
161 |
162 | where `P` is the predicted segmentation map and `GT` is the ground
163 | truth segmentation map. `P(class)` is then the binary mask indicating if each
164 | pixel is predicted as *class* or not. In general, the closer to 1, the better.
165 |
166 | .. figure:: images/jaccard.png
167 | :align: center
168 | :scale: 40%
169 |
170 | **Figure 5** : Jaccard visualisation (from this `website `__)
171 |
172 | Code
173 | ++++
174 |
175 | .. warning::
176 |
177 | * Current code works with Python 2 only.
178 | * If you use Theano with GPU backend (e.g. with Theano flag ``device=cuda``),
179 | you will need at least 12GB free in your video RAM.
180 |
181 | The FCN-8 implementation can be found in the following files:
182 |
183 | * `fcn8.py <../code/fcn_2D_segm/fcn8.py>`_ : Defines the model.
184 | * `train_fcn8.py <../code/fcn_2D_segm/train_fcn8.py>`_ : Training loop (main script to use).
185 |
186 |
187 | The user must install `Lasagne `_ ,
188 | and clone the GitHub repo `Dataset Loaders `_.
189 |
190 | .. code-block:: bash
191 |
192 | ## Installation of dataset_loaders.
193 |
194 | # dataset_loaders depends on Python modules matplotlib, numpy, scipy, Pillow, scikit-image, seaborn, and h5py.
195 | # They can all be installed via conda.
196 | conda install matplotlib numpy Pillow scipy scikit-image seaborn h5py
197 |
198 | git clone https://github.com/fvisin/dataset_loaders.git
199 |
200 | cd dataset_loaders/
201 |
202 | pip install -e .
203 |
204 |
205 | Change the ``dataset_loaders/config.ini`` file and add the right path for the dataset:
206 |
207 | .. code-block:: bash
208 |
209 | ## Into `dataset_loaders` git folder.
210 |
211 | # If ``config.ini`` does not yet exit, create it:
212 | cd dataset_loaders
213 | touch config.ini
214 |
215 | # ``config.ini`` must have at least the section ``[general]`` which indicates a work directory.
216 |
217 | .. code-block:: cfg
218 |
219 | [general]
220 | datasets_local_path = /the/local/path/where/the/datasets/will/be/copied
221 |
222 | [polyps912]
223 | shared_path = /path/to/DeepLearningTutorials/data/polyps_split7/
224 |
225 | Folder indicated at section ``[polyps912]`` should be the unzipped dataset archive ``polyps_split7.zip``, with sub-folders:
226 |
227 | * ``test``,
228 | * ``train``
229 | * ``valid``
230 |
231 | We used Lasagne layers, as you can see in the code below.
232 |
233 | .. literalinclude:: ../code/fcn_2D_segm/fcn8.py
234 | :start-after: start-snippet-1
235 | :end-before: end-snippet-1
236 |
237 | Running ``train_fcn8.py`` on a Titan X lasted for around 3.5 hours, ending with the following:
238 |
239 | .. code-block:: text
240 |
241 | $ THEANO_FLAGS=device=cuda0,floatX=float32,dnn.conv.algo_fwd=time_on_shape_change,dnn.conv.algo_bwd_filter=time_on_shape_change,dnn.conv.algo_bwd_data=time_on_shape_change python train_fcn8.py
242 | [...]
243 | EPOCH 221: Avg epoch training cost train 0.031036, cost val 0.313757, acc val 0.954686, jacc val class 0 0.952469, jacc val class 1 0.335233, jacc val 0.643851 took 56.401966 s
244 | FINAL MODEL: err test 0.473100, acc test 0.924871, jacc test class 0 0.941239, jacc test class 1 0.426777, jacc test 0.684008
245 |
246 | There is some variability in the training process. Another run of the same command gave the following after 6.5 hours:
247 |
248 | .. code-block:: text
249 |
250 | EPOCH 344: Avg epoch training cost train 0.089571, cost val 0.272069, acc val 0.923673, jacc val class 0 0.926739, jacc val class 1 0.204083, jacc val 0.565411 took 56.540339 s
251 | FINAL MODEL: err test 0.541459, acc test 0.846444, jacc test class 0 0.875290, jacc test class 1 0.186454, jacc test 0.530872
252 |
253 |
254 | References
255 | ++++++++++
256 |
257 | If you use this tutorial, please cite the following papers.
258 |
259 | * `[pdf] `__ Long, J., Shelhamer, E., Darrell, T. Fully Convolutional Networks for Semantic Segmentation. 2014.
260 | * `[pdf] `__ David Vázquez, Jorge Bernal, F. Javier Sánchez, Gloria Fernández-Esparrach, Antonio M. López, Adriana Romero, Michal Drozdzal, Aaron Courville. A Benchmark for Endoluminal Scene Segmentation of Colonoscopy Images. (2016).
261 | * `[GitHub Repo] `__ Francesco Visin, Adriana Romero - Dataset loaders: a python library to load and preprocess datasets. 2017.
262 |
263 | Papers related to Theano/Lasagne:
264 |
265 | * `[pdf] `__ Theano Development Team. Theano: A Python framework for fast computation of mathematical expresssions. May 2016.
266 | * `[website] `__ Sander Dieleman, Jan Schluter, Colin Raffel, Eben Olson, Søren Kaae Sønderby, Daniel Nouri, Daniel Maturana, Martin Thoma, Eric Battenberg, Jack Kelly, Jeffrey De Fauw, Michael Heilman, diogo149, Brian McFee, Hendrik Weideman, takacsg84, peterderivaz, Jon, instagibbs, Dr. Kashif Rasul, CongLiu, Britefury, and Jonas Degrave, “Lasagne: First release.” (2015).
267 |
268 |
269 | Thank you!
270 |
271 |
272 |
--------------------------------------------------------------------------------
/doc/images/3wolfmoon.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lisa-lab/DeepLearningTutorials/11c465105026cf87573937fc2d35ab7543678698/doc/images/3wolfmoon.jpg
--------------------------------------------------------------------------------
/doc/images/3wolfmoon_output.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lisa-lab/DeepLearningTutorials/11c465105026cf87573937fc2d35ab7543678698/doc/images/3wolfmoon_output.png
--------------------------------------------------------------------------------
/doc/images/DBN3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lisa-lab/DeepLearningTutorials/11c465105026cf87573937fc2d35ab7543678698/doc/images/DBN3.png
--------------------------------------------------------------------------------
/doc/images/big_brain.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lisa-lab/DeepLearningTutorials/11c465105026cf87573937fc2d35ab7543678698/doc/images/big_brain.png
--------------------------------------------------------------------------------
/doc/images/big_brain_section.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lisa-lab/DeepLearningTutorials/11c465105026cf87573937fc2d35ab7543678698/doc/images/big_brain_section.png
--------------------------------------------------------------------------------
/doc/images/bm.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lisa-lab/DeepLearningTutorials/11c465105026cf87573937fc2d35ab7543678698/doc/images/bm.png
--------------------------------------------------------------------------------
/doc/images/cat_segmentation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lisa-lab/DeepLearningTutorials/11c465105026cf87573937fc2d35ab7543678698/doc/images/cat_segmentation.png
--------------------------------------------------------------------------------
/doc/images/cnn_explained.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lisa-lab/DeepLearningTutorials/11c465105026cf87573937fc2d35ab7543678698/doc/images/cnn_explained.png
--------------------------------------------------------------------------------
/doc/images/conv_1D_nn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lisa-lab/DeepLearningTutorials/11c465105026cf87573937fc2d35ab7543678698/doc/images/conv_1D_nn.png
--------------------------------------------------------------------------------
/doc/images/cortical_layers_net.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lisa-lab/DeepLearningTutorials/11c465105026cf87573937fc2d35ab7543678698/doc/images/cortical_layers_net.png
--------------------------------------------------------------------------------
/doc/images/cortical_ray_result.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lisa-lab/DeepLearningTutorials/11c465105026cf87573937fc2d35ab7543678698/doc/images/cortical_ray_result.png
--------------------------------------------------------------------------------
/doc/images/cortical_valid1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lisa-lab/DeepLearningTutorials/11c465105026cf87573937fc2d35ab7543678698/doc/images/cortical_valid1.png
--------------------------------------------------------------------------------
/doc/images/cortical_valid2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lisa-lab/DeepLearningTutorials/11c465105026cf87573937fc2d35ab7543678698/doc/images/cortical_valid2.png
--------------------------------------------------------------------------------
/doc/images/cortical_valid3_v1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lisa-lab/DeepLearningTutorials/11c465105026cf87573937fc2d35ab7543678698/doc/images/cortical_valid3_v1.png
--------------------------------------------------------------------------------
/doc/images/cortical_valid4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lisa-lab/DeepLearningTutorials/11c465105026cf87573937fc2d35ab7543678698/doc/images/cortical_valid4.png
--------------------------------------------------------------------------------
/doc/images/fcn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lisa-lab/DeepLearningTutorials/11c465105026cf87573937fc2d35ab7543678698/doc/images/fcn.png
--------------------------------------------------------------------------------
/doc/images/fcn32_16_8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lisa-lab/DeepLearningTutorials/11c465105026cf87573937fc2d35ab7543678698/doc/images/fcn32_16_8.png
--------------------------------------------------------------------------------
/doc/images/fcn_schema.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lisa-lab/DeepLearningTutorials/11c465105026cf87573937fc2d35ab7543678698/doc/images/fcn_schema.png
--------------------------------------------------------------------------------
/doc/images/filters_at_epoch_14.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lisa-lab/DeepLearningTutorials/11c465105026cf87573937fc2d35ab7543678698/doc/images/filters_at_epoch_14.png
--------------------------------------------------------------------------------
/doc/images/filters_corruption_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lisa-lab/DeepLearningTutorials/11c465105026cf87573937fc2d35ab7543678698/doc/images/filters_corruption_0.png
--------------------------------------------------------------------------------
/doc/images/filters_corruption_30.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lisa-lab/DeepLearningTutorials/11c465105026cf87573937fc2d35ab7543678698/doc/images/filters_corruption_30.png
--------------------------------------------------------------------------------
/doc/images/jaccard.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lisa-lab/DeepLearningTutorials/11c465105026cf87573937fc2d35ab7543678698/doc/images/jaccard.png
--------------------------------------------------------------------------------
/doc/images/labels.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lisa-lab/DeepLearningTutorials/11c465105026cf87573937fc2d35ab7543678698/doc/images/labels.png
--------------------------------------------------------------------------------
/doc/images/lstm.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lisa-lab/DeepLearningTutorials/11c465105026cf87573937fc2d35ab7543678698/doc/images/lstm.png
--------------------------------------------------------------------------------
/doc/images/lstm_memorycell.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lisa-lab/DeepLearningTutorials/11c465105026cf87573937fc2d35ab7543678698/doc/images/lstm_memorycell.png
--------------------------------------------------------------------------------
/doc/images/markov_chain.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lisa-lab/DeepLearningTutorials/11c465105026cf87573937fc2d35ab7543678698/doc/images/markov_chain.png
--------------------------------------------------------------------------------
/doc/images/mlp.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lisa-lab/DeepLearningTutorials/11c465105026cf87573937fc2d35ab7543678698/doc/images/mlp.png
--------------------------------------------------------------------------------
/doc/images/mnist_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lisa-lab/DeepLearningTutorials/11c465105026cf87573937fc2d35ab7543678698/doc/images/mnist_0.png
--------------------------------------------------------------------------------
/doc/images/mnist_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lisa-lab/DeepLearningTutorials/11c465105026cf87573937fc2d35ab7543678698/doc/images/mnist_1.png
--------------------------------------------------------------------------------
/doc/images/mnist_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lisa-lab/DeepLearningTutorials/11c465105026cf87573937fc2d35ab7543678698/doc/images/mnist_2.png
--------------------------------------------------------------------------------
/doc/images/mnist_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lisa-lab/DeepLearningTutorials/11c465105026cf87573937fc2d35ab7543678698/doc/images/mnist_3.png
--------------------------------------------------------------------------------
/doc/images/mnist_4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lisa-lab/DeepLearningTutorials/11c465105026cf87573937fc2d35ab7543678698/doc/images/mnist_4.png
--------------------------------------------------------------------------------
/doc/images/mnist_5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lisa-lab/DeepLearningTutorials/11c465105026cf87573937fc2d35ab7543678698/doc/images/mnist_5.png
--------------------------------------------------------------------------------
/doc/images/mylenet.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lisa-lab/DeepLearningTutorials/11c465105026cf87573937fc2d35ab7543678698/doc/images/mylenet.png
--------------------------------------------------------------------------------
/doc/images/polyps_results.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lisa-lab/DeepLearningTutorials/11c465105026cf87573937fc2d35ab7543678698/doc/images/polyps_results.png
--------------------------------------------------------------------------------
/doc/images/raw_smooth.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lisa-lab/DeepLearningTutorials/11c465105026cf87573937fc2d35ab7543678698/doc/images/raw_smooth.png
--------------------------------------------------------------------------------
/doc/images/ray.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lisa-lab/DeepLearningTutorials/11c465105026cf87573937fc2d35ab7543678698/doc/images/ray.png
--------------------------------------------------------------------------------
/doc/images/rbm.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lisa-lab/DeepLearningTutorials/11c465105026cf87573937fc2d35ab7543678698/doc/images/rbm.png
--------------------------------------------------------------------------------
/doc/images/rnnrbm.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lisa-lab/DeepLearningTutorials/11c465105026cf87573937fc2d35ab7543678698/doc/images/rnnrbm.png
--------------------------------------------------------------------------------
/doc/images/sample1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lisa-lab/DeepLearningTutorials/11c465105026cf87573937fc2d35ab7543678698/doc/images/sample1.png
--------------------------------------------------------------------------------
/doc/images/sample2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lisa-lab/DeepLearningTutorials/11c465105026cf87573937fc2d35ab7543678698/doc/images/sample2.png
--------------------------------------------------------------------------------
/doc/images/samples.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lisa-lab/DeepLearningTutorials/11c465105026cf87573937fc2d35ab7543678698/doc/images/samples.png
--------------------------------------------------------------------------------
/doc/images/sparse_1D_nn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lisa-lab/DeepLearningTutorials/11c465105026cf87573937fc2d35ab7543678698/doc/images/sparse_1D_nn.png
--------------------------------------------------------------------------------
/doc/images/unet.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lisa-lab/DeepLearningTutorials/11c465105026cf87573937fc2d35ab7543678698/doc/images/unet.jpg
--------------------------------------------------------------------------------
/doc/index.txt:
--------------------------------------------------------------------------------
1 | =======================
2 | Deep Learning Tutorials
3 | =======================
4 |
5 | Deep Learning is a new area of Machine Learning research, which
6 | has been introduced with the objective of moving Machine Learning
7 | closer to one of its original goals: Artificial Intelligence.
8 | See these course notes for a `brief introduction to Machine Learning for AI `_
9 | and an `introduction to Deep Learning algorithms `_.
10 |
11 | Deep Learning is about learning multiple levels of representation
12 | and abstraction that help to
13 | make sense of data such as images, sound, and text.
14 | For more about deep learning algorithms, see for example:
15 |
16 | - The monograph or review paper `Learning Deep Architectures for AI `_ (Foundations & Trends in Machine Learning, 2009).
17 | - The ICML 2009 Workshop on Learning Feature Hierarchies `webpage `_ has a `list of references `_.
18 | - The LISA `public wiki `_ has a `reading list `_ and a `bibliography `_.
19 | - Geoff Hinton has `readings `_ from 2009's `NIPS tutorial `_.
20 |
21 | The tutorials presented here will introduce you to some of the most important deep learning
22 | algorithms and will also show you how to run them using Theano_. Theano is a python library that makes writing deep learning models easy, and gives the option of
23 | training them on a GPU.
24 |
25 | The algorithm tutorials have some prerequisites. You should know some python,
26 | and be familiar with numpy. Since this tutorial is about using Theano, you
27 | should read over the `Theano basic tutorial`_ first. Once you've done that,
28 | read through our :ref:`gettingstarted` chapter -- it introduces the notation, and downloadable datasets used in the algorithm tutorials, and the way we do optimization by stochastic gradient descent.
29 |
30 | The code is available on the `Deep Learning Tutorial repositories `_.
31 |
32 | The purely supervised learning algorithms are meant to be read in order:
33 |
34 | #. :ref:`Logistic Regression ` - using Theano for something simple
35 | #. :ref:`Multilayer perceptron ` - introduction to layers
36 | #. :ref:`Deep Convolutional Network ` - a simplified version of LeNet5
37 |
38 | The unsupervised and semi-supervised learning algorithms can be read in any
39 | order (the auto-encoders can be read independently of the RBM/DBN thread):
40 |
41 | * :ref:`Auto Encoders, Denoising Autoencoders ` - description of autoencoders
42 | * :ref:`Stacked Denoising Auto-Encoders ` - easy steps into unsupervised pre-training for deep nets
43 | * :ref:`Restricted Boltzmann Machines ` - single layer generative RBM model
44 | * :ref:`Deep Belief Networks ` - unsupervised generative pre-training of stacked RBMs followed by supervised fine-tuning
45 |
46 | Building towards including the mcRBM model, we have a new tutorial on sampling
47 | from energy models:
48 |
49 | * :ref:`HMC Sampling ` - hybrid (aka Hamiltonian) Monte-Carlo sampling with scan()
50 |
51 | Building towards including the Contractive auto-encoders tutorial, we have the code for now:
52 | * `Contractive auto-encoders`_ code - There is some basic doc in the code.
53 |
54 | Recurrent neural networks with word embeddings and context window:
55 | * :ref:`Semantic Parsing of Speech using Recurrent Net `
56 |
57 | LSTM network for sentiment analysis:
58 | * :ref:`LSTM network `
59 |
60 | Energy-based recurrent neural network (RNN-RBM):
61 | * :ref:`Modeling and generating sequences of polyphonic music `
62 |
63 | Segmentation for medical imagery (meant to be read in order):
64 | * :ref:`Fully Convolutional Networks (FCN) for 2D segmentation `
65 | * :ref:`U-Net `
66 | * :ref:`1D segmentation `
67 |
68 |
69 | .. _Theano: http://deeplearning.net/software/theano
70 |
71 | .. _Theano basic tutorial: http://deeplearning.net/software/theano/tutorial
72 |
73 | .. _Contractive auto-encoders: https://github.com/lisa-lab/DeepLearningTutorials/blob/master/code/cA.py
74 |
75 |
76 |
77 |
78 | Note that the tutorials here are all compatible with Python 2 and 3,
79 | with the exception of :ref:`rnnrbm` which is only available for Python 2, like
80 | the tutorials in medical imagery segmentation.
81 |
82 | If you work with ``conda``, `these command-line guidelines <../code/guidelines_segm_tutos_with_conda.sh>`__
83 | may also help you run segmentation tutorials.
84 |
85 |
86 |
--------------------------------------------------------------------------------
/doc/logreg.txt:
--------------------------------------------------------------------------------
1 | .. index:: Logistic Regression
2 |
3 | .. _logreg :
4 |
5 |
6 | Classifying MNIST digits using Logistic Regression
7 | ==================================================
8 |
9 | .. note::
10 | This sections assumes familiarity with the following Theano
11 | concepts: `shared variables`_ , `basic arithmetic ops`_ , `T.grad`_ ,
12 | `floatX`_. If you intend to run the code on GPU also read `GPU`_.
13 |
14 | .. note::
15 | The code for this section is available for download `here`_.
16 |
17 | .. _here: http://deeplearning.net/tutorial/code/logistic_sgd.py
18 |
19 | .. _shared variables: http://deeplearning.net/software/theano/tutorial/examples.html#using-shared-variables
20 |
21 | .. _basic arithmetic ops: http://deeplearning.net/software/theano/tutorial/adding.html#adding-two-scalars
22 |
23 | .. _T.grad: http://deeplearning.net/software/theano/tutorial/examples.html#computing-gradients
24 |
25 | .. _floatX: http://deeplearning.net/software/theano/library/config.html#config.floatX
26 |
27 | .. _GPU: http://deeplearning.net/software/theano/tutorial/using_gpu.html
28 |
29 | In this section, we show how Theano can be used to implement the most basic
30 | classifier: the logistic regression. We start off with a quick primer of the
31 | model, which serves both as a refresher but also to anchor the notation and
32 | show how mathematical expressions are mapped onto Theano graphs.
33 |
34 | In the deepest of machine learning traditions, this tutorial will tackle the exciting
35 | problem of MNIST digit classification.
36 |
37 | The Model
38 | +++++++++
39 |
40 | Logistic regression is a probabilistic, linear classifier. It is parametrized
41 | by a weight matrix :math:`W` and a bias vector :math:`b`. Classification is
42 | done by projecting an input vector onto a set of hyperplanes, each of which
43 | corresponds to a class. The distance from the input to a hyperplane reflects
44 | the probability that the input is a member of the corresponding class.
45 |
46 | Mathematically, the probability that an input vector :math:`x` is a member of a
47 | class :math:`i`, a value of a stochastic variable :math:`Y`, can be written as:
48 |
49 | .. math::
50 | P(Y=i|x, W,b) &= softmax_i(W x + b) \\
51 | &= \frac {e^{W_i x + b_i}} {\sum_j e^{W_j x + b_j}}
52 |
53 | The model's prediction :math:`y_{pred}` is the class whose probability is maximal, specifically:
54 |
55 | .. math::
56 | y_{pred} = {\rm argmax}_i P(Y=i|x,W,b)
57 |
58 | The code to do this in Theano is the following:
59 |
60 | .. literalinclude:: ../code/logistic_sgd.py
61 | :start-after: start-snippet-1
62 | :end-before: end-snippet-1
63 |
64 | Since the parameters of the model must maintain a persistent state throughout
65 | training, we allocate shared variables for :math:`W,b`. This declares them both
66 | as being symbolic Theano variables, but also initializes their contents. The
67 | dot and softmax operators are then used to compute the vector :math:`P(Y|x,
68 | W,b)`. The result ``p_y_given_x`` is a symbolic variable of vector-type.
69 |
70 | To get the actual model prediction, we can use the ``T.argmax`` operator, which
71 | will return the index at which ``p_y_given_x`` is maximal (i.e. the class with
72 | maximum probability).
73 |
74 | Now of course, the model we have defined so far does not do anything useful
75 | yet, since its parameters are still in their initial state. The following
76 | section will thus cover how to learn the optimal parameters.
77 |
78 |
79 | .. note::
80 | For a complete list of Theano ops, see: `list of ops `_
81 |
82 |
83 | Defining a Loss Function
84 | ++++++++++++++++++++++++
85 |
86 | Learning optimal model parameters involves minimizing a loss function. In the
87 | case of multi-class logistic regression, it is very common to use the negative
88 | log-likelihood as the loss. This is equivalent to maximizing the likelihood of the
89 | data set :math:`\cal{D}` under the model parameterized by :math:`\theta`. Let
90 | us first start by defining the likelihood :math:`\cal{L}` and loss
91 | :math:`\ell`:
92 |
93 | .. math::
94 |
95 | \mathcal{L} (\theta=\{W,b\}, \mathcal{D}) =
96 | \sum_{i=0}^{|\mathcal{D}|} \log(P(Y=y^{(i)}|x^{(i)}, W,b)) \\
97 | \ell (\theta=\{W,b\}, \mathcal{D}) = - \mathcal{L} (\theta=\{W,b\}, \mathcal{D})
98 |
99 | While entire books are dedicated to the topic of minimization, gradient
100 | descent is by far the simplest method for minimizing arbitrary non-linear
101 | functions. This tutorial will use the method of stochastic gradient method with
102 | mini-batches (MSGD). See :ref:`opt_SGD` for more details.
103 |
104 | The following Theano code defines the (symbolic) loss for a given minibatch:
105 |
106 | .. literalinclude:: ../code/logistic_sgd.py
107 | :start-after: start-snippet-2
108 | :end-before: end-snippet-2
109 |
110 | .. note::
111 |
112 | Even though the loss is formally defined as the *sum*, over the data set,
113 | of individual error terms, in practice, we use the *mean* (``T.mean``)
114 | in the code. This allows for the learning rate choice to be less dependent
115 | of the minibatch size.
116 |
117 |
118 | Creating a LogisticRegression class
119 | +++++++++++++++++++++++++++++++++++
120 |
121 | We now have all the tools we need to define a ``LogisticRegression`` class, which
122 | encapsulates the basic behaviour of logistic regression. The code is very
123 | similar to what we have covered so far, and should be self explanatory.
124 |
125 | .. literalinclude:: ../code/logistic_sgd.py
126 | :pyobject: LogisticRegression
127 |
128 | We instantiate this class as follows:
129 |
130 | .. literalinclude:: ../code/logistic_sgd.py
131 | :start-after: index = T.lscalar()
132 | :end-before: # the cost we minimize during
133 |
134 | We start by allocating symbolic variables for the training inputs :math:`x` and
135 | their corresponding classes :math:`y`. Note that ``x`` and ``y`` are defined
136 | outside the scope of the ``LogisticRegression`` object. Since the class
137 | requires the input to build its graph, it is passed as a parameter of the
138 | ``__init__`` function. This is useful in case you want to connect instances of
139 | such classes to form a deep network. The output of one layer can be passed as
140 | the input of the layer above. (This tutorial does not build a multi-layer
141 | network, but this code will be reused in future tutorials that do.)
142 |
143 | Finally, we define a (symbolic) ``cost`` variable to minimize, using the instance
144 | method ``classifier.negative_log_likelihood``.
145 |
146 | .. literalinclude:: ../code/logistic_sgd.py
147 | :start-after: classifier = LogisticRegression(input=x, n_in=28 * 28, n_out=10)
148 | :end-before: # compiling a Theano function that computes the mistakes
149 |
150 | Note that ``x`` is an implicit symbolic input to the definition of ``cost``,
151 | because the symbolic variables of ``classifier`` were defined in terms of ``x``
152 | at initialization.
153 |
154 | Learning the Model
155 | ++++++++++++++++++
156 |
157 | To implement MSGD in most programming languages (C/C++, Matlab, Python), one
158 | would start by manually deriving the expressions for the gradient of the loss
159 | with respect to the parameters: in this case :math:`\partial{\ell}/\partial{W}`,
160 | and :math:`\partial{\ell}/\partial{b}`, This can get pretty tricky for complex
161 | models, as expressions for :math:`\partial{\ell}/\partial{\theta}` can get
162 | fairly complex, especially when taking into account problems of numerical
163 | stability.
164 |
165 | With Theano, this work is greatly simplified. It performs
166 | automatic differentiation and applies certain math transforms to improve
167 | numerical stability.
168 |
169 | To get the gradients :math:`\partial{\ell}/\partial{W}` and
170 | :math:`\partial{\ell}/\partial{b}` in Theano, simply do the following:
171 |
172 | .. literalinclude:: ../code/logistic_sgd.py
173 | :start-after: # compute the gradient of cost
174 | :end-before: # start-snippet-3
175 |
176 | ``g_W`` and ``g_b`` are symbolic variables, which can be used as part
177 | of a computation graph. The function ``train_model``, which performs one step
178 | of gradient descent, can then be defined as follows:
179 |
180 | .. literalinclude:: ../code/logistic_sgd.py
181 | :start-after: start-snippet-3
182 | :end-before: end-snippet-3
183 |
184 | ``updates`` is a list of pairs. In each pair, the first element is the symbolic
185 | variable to be updated in the step, and the second element is the symbolic
186 | function for calculating its new value. Similarly, ``givens`` is a dictionary
187 | whose keys are symbolic variables and whose values specify
188 | their replacements during the step. The function ``train_model`` is then defined such
189 | that:
190 |
191 | * the input is the mini-batch index ``index`` that, together with the batch
192 | size (which is not an input since it is fixed) defines :math:`x` with
193 | corresponding labels :math:`y`
194 | * the return value is the cost/loss associated with the x, y defined by
195 | the ``index``
196 | * on every function call, it will first replace ``x`` and ``y`` with the slices
197 | from the training set specified by ``index``. Then, it will evaluate the cost
198 | associated with that minibatch and apply the operations defined by the
199 | ``updates`` list.
200 |
201 | Each time ``train_model(index)`` is called, it will thus compute and return the
202 | cost of a minibatch, while also performing a step of MSGD. The entire learning
203 | algorithm thus consists in looping over all examples in the dataset, considering
204 | all the examples in one minibatch at a time,
205 | and repeatedly calling the ``train_model`` function.
206 |
207 |
208 | Testing the model
209 | +++++++++++++++++
210 |
211 | As explained in :ref:`opt_learn_classifier`, when testing the model we are
212 | interested in the number of misclassified examples (and not only in the likelihood).
213 | The ``LogisticRegression`` class therefore has an extra instance method, which
214 | builds the symbolic graph for retrieving the number of misclassified examples in
215 | each minibatch.
216 |
217 | The code is as follows:
218 |
219 | .. literalinclude:: ../code/logistic_sgd.py
220 | :pyobject: LogisticRegression.errors
221 |
222 | We then create a function ``test_model`` and a function ``validate_model``,
223 | which we can call to retrieve this value. As you will see shortly,
224 | ``validate_model`` is key to our early-stopping implementation (see
225 | :ref:`opt_early_stopping`). These functions take a minibatch index and compute,
226 | for the examples in that minibatch, the number that were misclassified by the
227 | model. The only difference between them is that ``test_model`` draws its
228 | minibatches from the testing set, while ``validate_model`` draws its from the
229 | validation set.
230 |
231 | .. literalinclude:: ../code/logistic_sgd.py
232 | :start-after: cost = classifier.negative_log_likelihood(y)
233 | :end-before: # compute the gradient of cost
234 |
235 | Putting it All Together
236 | +++++++++++++++++++++++
237 |
238 | The finished product is as follows.
239 |
240 | .. literalinclude:: ../code/logistic_sgd.py
241 |
242 | The user can learn to classify MNIST digits with SGD logistic regression, by typing, from
243 | within the DeepLearningTutorials folder:
244 |
245 | .. code-block:: bash
246 |
247 | python code/logistic_sgd.py
248 |
249 | The output one should expect is of the form:
250 |
251 | .. code-block:: bash
252 |
253 | ...
254 | epoch 72, minibatch 83/83, validation error 7.510417 %
255 | epoch 72, minibatch 83/83, test error of best model 7.510417 %
256 | epoch 73, minibatch 83/83, validation error 7.500000 %
257 | epoch 73, minibatch 83/83, test error of best model 7.489583 %
258 | Optimization complete with best validation score of 7.500000 %,with test performance 7.489583 %
259 | The code run for 74 epochs, with 1.936983 epochs/sec
260 |
261 |
262 | On an Intel(R) Core(TM)2 Duo CPU E8400 @ 3.00 Ghz the code runs with
263 | approximately 1.936 epochs/sec and it took 75 epochs to reach a test
264 | error of 7.489%. On the GPU the code does almost 10.0 epochs/sec. For this
265 | instance we used a batch size of 600.
266 |
267 |
268 | Prediction Using a Trained Model
269 | ++++++++++++++++++++++++++++++++
270 |
271 | ``sgd_optimization_mnist`` serialize and pickle the model each time new
272 | lowest validation error is reached. We can reload this model and predict
273 | labels of new data. ``predict`` function shows an example of how
274 | this could be done.
275 |
276 | .. literalinclude:: ../code/logistic_sgd.py
277 | :pyobject: predict
278 |
279 |
280 | .. rubric:: Footnotes
281 |
282 | .. [#f1] For smaller datasets and simpler models, more sophisticated descent
283 | algorithms can be more effective. The sample code
284 | `logistic_cg.py `_
285 | demonstrates how to use SciPy's conjugate gradient solver with Theano
286 | on the logistic regression task.
287 |
--------------------------------------------------------------------------------
/doc/lstm.txt:
--------------------------------------------------------------------------------
1 | .. _lstm:
2 |
3 | LSTM Networks for Sentiment Analysis
4 | **********************************************
5 |
6 | Summary
7 | +++++++
8 |
9 | This tutorial aims to provide an example of how a Recurrent Neural Network
10 | (RNN) using the Long Short Term Memory (LSTM) architecture can be implemented
11 | using Theano. In this tutorial, this model is used to perform sentiment
12 | analysis on movie reviews from the `Large Movie Review Dataset
13 | `_, sometimes known as the
14 | IMDB dataset.
15 |
16 | In this task, given a movie review, the model attempts to predict whether it
17 | is positive or negative. This is a binary classification task.
18 |
19 | Data
20 | ++++
21 |
22 | As previously mentioned, the provided scripts are used to train a LSTM
23 | recurrent neural network on the Large Movie Review Dataset dataset.
24 |
25 | While the dataset is public, in this tutorial we provide a copy of the dataset
26 | that has previously been preprocessed according to the needs of this LSTM
27 | implementation. Running the code provided in this tutorial will automatically
28 | download the data to the local directory. In order to use your own data, please
29 | use a (`preprocessing script
30 | `_)
31 | provided as a part of this tutorial.
32 |
33 | Once the model is trained, you can test it with your own corpus using the
34 | word-index dictionary
35 | (`imdb.dict.pkl.gz `_)
36 | provided as a part of this tutorial.
37 |
38 | Model
39 | +++++
40 |
41 | LSTM
42 | ====
43 |
44 | In a *traditional* recurrent neural network, during the gradient
45 | back-propagation phase, the gradient signal can end up being multiplied a
46 | large number of times (as many as the number of timesteps) by the weight
47 | matrix associated with the connections between the neurons of the recurrent
48 | hidden layer. This means that, the magnitude of weights in the transition
49 | matrix can have a strong impact on the learning process.
50 |
51 | If the weights in this matrix are small (or, more formally, if the leading
52 | eigenvalue of the weight matrix is smaller than 1.0), it can lead to a
53 | situation called *vanishing gradients* where the gradient signal gets so small
54 | that learning either becomes very slow or stops working altogether. It can
55 | also make more difficult the task of learning long-term dependencies in the
56 | data. Conversely, if the weights in this matrix are large (or, again, more
57 | formally, if the leading eigenvalue of the weight matrix is larger than 1.0),
58 | it can lead to a situation where the gradient signal is so large that it can
59 | cause learning to diverge. This is often referred to as *exploding gradients*.
60 |
61 | These issues are the main motivation behind the LSTM model which introduces a
62 | new structure called a *memory cell* (see Figure 1 below). A memory cell is
63 | composed of four main elements: an input gate, a neuron with a self-recurrent
64 | connection (a connection to itself), a forget gate and an output gate. The
65 | self-recurrent connection has a weight of 1.0 and ensures that, barring any
66 | outside interference, the state of a memory cell can remain constant from one
67 | timestep to another. The gates serve to modulate the interactions between the
68 | memory cell itself and its environment. The input gate can allow incoming
69 | signal to alter the state of the memory cell or block it. On the other hand,
70 | the output gate can allow the state of the memory cell to have an effect on
71 | other neurons or prevent it. Finally, the forget gate can modulate the memory
72 | cell’s self-recurrent connection, allowing the cell to remember or forget its
73 | previous state, as needed.
74 |
75 | .. figure:: images/lstm_memorycell.png
76 | :align: center
77 |
78 | **Figure 1**: Illustration of an LSTM memory cell.
79 |
80 | The equations below describe how a layer of memory cells is updated at every
81 | timestep :math:`t`. In these equations:
82 |
83 | * :math:`x_t` is the input to the memory cell layer at time :math:`t`
84 | * :math:`W_i`, :math:`W_f`, :math:`W_c`, :math:`W_o`, :math:`U_i`,
85 | :math:`U_f`, :math:`U_c`, :math:`U_o` and :math:`V_o` are weight
86 | matrices
87 | * :math:`b_i`, :math:`b_f`, :math:`b_c` and :math:`b_o` are bias vectors
88 |
89 |
90 | First, we compute the values for :math:`i_t`, the input gate, and
91 | :math:`\widetilde{C_t}` the candidate value for the states of the memory
92 | cells at time :math:`t`:
93 |
94 | .. math::
95 | :label: 1
96 |
97 | i_t = \sigma(W_i x_t + U_i h_{t-1} + b_i)
98 |
99 | .. math::
100 | :label: 2
101 |
102 | \widetilde{C_t} = tanh(W_c x_t + U_c h_{t-1} + b_c)
103 |
104 | Second, we compute the value for :math:`f_t`, the activation of the memory
105 | cells' forget gates at time :math:`t`:
106 |
107 | .. math::
108 | :label: 3
109 |
110 | f_t = \sigma(W_f x_t + U_f h_{t-1} + b_f)
111 |
112 | Given the value of the input gate activation :math:`i_t`, the forget gate
113 | activation :math:`f_t` and the candidate state value :math:`\widetilde{C_t}`,
114 | we can compute :math:`C_t` the memory cells' new state at time :math:`t`:
115 |
116 | .. math::
117 | :label: 4
118 |
119 | C_t = i_t * \widetilde{C_t} + f_t * C_{t-1}
120 |
121 | With the new state of the memory cells, we can compute the value of their
122 | output gates and, subsequently, their outputs:
123 |
124 | .. math::
125 | :label: 5
126 |
127 | o_t = \sigma(W_o x_t + U_o h_{t-1} + V_o C_t + b_o)
128 |
129 | .. math::
130 | :label: 6
131 |
132 | h_t = o_t * tanh(C_t)
133 |
134 | Our model
135 | =========
136 |
137 | The model we used in this tutorial is a variation of the standard LSTM model.
138 | In this variant, the activation of a cell’s output gate does not depend on the
139 | memory cell’s state :math:`C_t`. This allows us to perform part of the
140 | computation more efficiently (see the implementation note, below, for
141 | details). This means that, in the variant we have implemented, there is no
142 | matrix :math:`V_o` and equation :eq:`5` is replaced by equation :eq:`5-alt`:
143 |
144 | .. math::
145 | :label: 5-alt
146 |
147 | o_t = \sigma(W_o x_t + U_o h_{t-1} + b_o)
148 |
149 | Our model is composed of a single LSTM layer followed by an average pooling
150 | and a logistic regression layer as illustrated in Figure 2 below. Thus, from
151 | an input sequence :math:`x_0, x_1, x_2, ..., x_n`, the memory cells in the
152 | LSTM layer will produce a representation sequence :math:`h_0, h_1, h_2, ...,
153 | h_n`. This representation sequence is then averaged over all timesteps
154 | resulting in representation h. Finally, this representation is fed to a
155 | logistic regression layer whose target is the class label associated with the
156 | input sequence.
157 |
158 | .. figure:: images/lstm.png
159 | :align: center
160 |
161 | **Figure 2** : Illustration of the model used in this tutorial. It is
162 | composed of a single LSTM layer followed by mean pooling over time and
163 | logistic regression.
164 |
165 | **Implementation note** : In the code included this tutorial, the equations
166 | :eq:`1`, :eq:`2`, :eq:`3` and :eq:`5-alt` are performed in parallel to make
167 | the computation more efficient. This is possible because none of these
168 | equations rely on a result produced by the other ones. It is achieved by
169 | concatenating the four matrices :math:`W_*` into a single weight matrix
170 | :math:`W` and performing the same concatenation on the weight matrices
171 | :math:`U_*` to produce the matrix :math:`U` and the bias vectors :math:`b_*`
172 | to produce the vector :math:`b`. Then, the pre-nonlinearity activations can
173 | be computed with:
174 |
175 | .. math::
176 |
177 | z = W x_t + U h_{t-1} + b
178 |
179 | The result is then sliced to obtain the pre-nonlinearity activations for
180 | :math:`i`, :math:`f`, :math:`\widetilde{C_t}`, and :math:`o` and the
181 | non-linearities are then applied independently for each.
182 |
183 |
184 | Code - Citations - Contact
185 | ++++++++++++++++++++++++++
186 |
187 | Code
188 | ====
189 |
190 | The LSTM implementation can be found in the two following files:
191 |
192 | * `lstm.py `_: Main script. Defines and train the model.
193 |
194 | * `imdb.py `_: Secondary script. Handles the loading and preprocessing of the IMDB dataset.
195 |
196 | After downloading both scripts and putting both in the same folder, the user
197 | can run the code by calling:
198 |
199 | .. code-block:: bash
200 |
201 | THEANO_FLAGS="floatX=float32" python lstm.py
202 |
203 | The script will automatically download the data and decompress it.
204 |
205 | **Note**: The provided code supports the Stochastic Gradient Descent (SGD),
206 | AdaDelta and RMSProp optimization methods. You are advised to use AdaDelta or
207 | RMSProp because SGD appears to performs poorly on this task with this
208 | particular model.
209 |
210 | Papers
211 | ======
212 |
213 | If you use this tutorial, please cite the following papers.
214 |
215 | Introduction of the LSTM model:
216 |
217 | * `[pdf] `__ Hochreiter, S., & Schmidhuber, J. (1997). Long short-term memory. Neural computation, 9(8), 1735-1780.
218 |
219 | Addition of the forget gate to the LSTM model:
220 |
221 | * `[pdf] `__ Gers, F. A., Schmidhuber, J., & Cummins, F. (2000). Learning to forget: Continual prediction with LSTM. Neural computation, 12(10), 2451-2471.
222 |
223 | More recent LSTM paper:
224 |
225 | * `[pdf] `__ Graves, Alex. Supervised sequence labelling with recurrent neural networks. Vol. 385. Springer, 2012.
226 |
227 | Papers related to Theano:
228 |
229 | * `[pdf] `__ Bastien, Frédéric, Lamblin, Pascal, Pascanu, Razvan, Bergstra, James, Goodfellow, Ian, Bergeron, Arnaud, Bouchard, Nicolas, and Bengio, Yoshua. Theano: new features and speed improvements. NIPS Workshop on Deep Learning and Unsupervised Feature Learning, 2012.
230 |
231 | * `[pdf] `__ Bergstra, James, Breuleux, Olivier, Bastien, Frédéric, Lamblin, Pascal, Pascanu, Razvan, Desjardins, Guillaume, Turian, Joseph, Warde-Farley, David, and Bengio, Yoshua. Theano: a CPU and GPU math expression compiler. In Proceedings of the Python for Scientific Computing Conference (SciPy), June 2010.
232 |
233 | Thank you!
234 |
235 | Contact
236 | =======
237 |
238 | Please email `Pierre Luc Carrier `_ or
239 | `Kyunghyun Cho `_ for any problem report or
240 | feedback. We will be glad to hear from you.
241 |
242 | References
243 | ++++++++++
244 |
245 | * Hochreiter, S., & Schmidhuber, J. (1997). Long short-term memory. Neural computation, 9(8), 1735-1780.
246 |
247 | * Gers, F. A., Schmidhuber, J., & Cummins, F. (2000). Learning to forget: Continual prediction with LSTM. Neural computation, 12(10), 2451-2471.
248 |
249 | * Graves, A. (2012). Supervised sequence labelling with recurrent neural networks (Vol. 385). Springer.
250 |
251 | * Hochreiter, S., Bengio, Y., Frasconi, P., & Schmidhuber, J. (2001). Gradient flow in recurrent nets: the difficulty of learning long-term dependencies.
252 |
253 | * Bengio, Y., Simard, P., & Frasconi, P. (1994). Learning long-term dependencies with gradient descent is difficult. Neural Networks, IEEE Transactions on, 5(2), 157-166.
254 |
255 | * Maas, A. L., Daly, R. E., Pham, P. T., Huang, D., Ng, A. Y., & Potts, C. (2011, June). Learning word vectors for sentiment analysis. In Proceedings of the 49th Annual Meeting of the Association for Computational Linguistics: Human Language Technologies-Volume 1 (pp. 142-150). Association for Computational Linguistics.
256 |
--------------------------------------------------------------------------------
/doc/references.txt:
--------------------------------------------------------------------------------
1 | .. _references:
2 |
3 | ==========
4 | References
5 | ==========
6 |
7 | .. [Bengio07] Y. Bengio, P. Lamblin, D. Popovici and H. Larochelle, `Greedy Layer-Wise Training of Deep Networks `_, in Advances in Neural Information Processing Systems 19 (NIPS'06), pages 153-160, MIT Press 2007.
8 |
9 | .. [Bengio09] Y. Bengio, `Learning deep architectures for AI `_, Foundations and Trends in Machine Learning 1(2) pages 1-127.
10 |
11 | .. [BengioDelalleau09] Y. Bengio, O. Delalleau, Justifying and Generalizing Contrastive Divergence (2009), Neural Computation, 21(6): 1601-1621.
12 |
13 | .. [BoulangerLewandowski12] N Boulanger-Lewandowski, Y. Bengio and P. Vincent, `Modeling Temporal Dependencies in High-Dimensional Sequences: Application to Polyphonic Music Generation and Transcription `_, in Proceedings of the 29th International Conference on Machine Learning (ICML), 2012.
14 |
15 | .. [Fukushima] Fukushima, K. (1980). Neocognitron: A self-organizing neural network model for a mechanism of pattern recognition unaffected by shift in position. Biological Cybernetics, 36, 193–202.
16 |
17 | .. [Hinton06] G.E. Hinton and R.R. Salakhutdinov, `Reducing the Dimensionality of Data with Neural Networks `_, Science, 28 July 2006, Vol. 313. no. 5786, pp. 504 - 507.
18 |
19 | .. [Hinton07] G.E. Hinton, S. Osindero, and Y. Teh, "A fast learning algorithm for deep belief nets", Neural Computation, vol 18, 2006
20 |
21 | .. [Hubel68] Hubel, D. and Wiesel, T. (1968). Receptive fields and functional architecture of monkey striate cortex. Journal of Physiology (London), 195, 215–243.
22 |
23 | .. [LeCun98] LeCun, Y., Bottou, L., Bengio, Y., and Haffner, P. (1998d). Gradient-based learning applied to document recognition. Proceedings of the IEEE, 86(11), 2278–2324.
24 |
25 | .. [Lee08] H. Lee, C. Ekanadham, and A.Y. Ng., `Sparse deep belief net model for visual area V2 `_, in Advances in Neural Information Processing Systems (NIPS) 20, 2008.
26 |
27 | .. [Lee09] H. Lee, R. Grosse, R. Ranganath, and A.Y. Ng, "Convolutional deep belief networks for scalable unsupervised learning of hierarchical representations.", ICML 2009
28 |
29 | .. [Ranzato10] M. Ranzato, A. Krizhevsky, G. Hinton, "Factored 3-Way Restricted Boltzmann Machines for Modeling Natural Images". Proc. of the 13-th International Conference on Artificial Intelligence and Statistics (AISTATS 2010), Italy, 2010
30 |
31 | .. [Ranzato07] M.A. Ranzato, C. Poultney, S. Chopra and Y. LeCun, in J. Platt et al., `Efficient Learning of Sparse Representations with an Energy-Based Model `_, Advances in Neural Information Processing Systems (NIPS 2006), MIT Press, 2007.
32 |
33 | .. [Serre07] Serre, T., Wolf, L., Bileschi, S., and Riesenhuber, M. (2007). Robust object recog- nition with cortex-like mechanisms. IEEE Trans. Pattern Anal. Mach. Intell., 29(3), 411–426. Member-Poggio, Tomaso.
34 |
35 | .. [Vincent08] P. Vincent, H. Larochelle Y. Bengio and P.A. Manzagol, `Extracting and Composing Robust Features with Denoising Autoencoders `_, Proceedings of the Twenty-fifth International Conference on Machine Learning (ICML'08), pages 1096 - 1103, ACM, 2008.
36 |
37 | .. [Tieleman08] T. Tieleman, Training restricted boltzmann machines using approximations to the likelihood gradient, ICML 2008.
38 |
39 | .. [Xavier10] Y. Bengio, X. Glorot, Understanding the difficulty of training deep feedforward neuralnetworks, AISTATS 2010
40 |
--------------------------------------------------------------------------------
/doc/rnnrbm.txt:
--------------------------------------------------------------------------------
1 | .. _rnnrbm:
2 |
3 | Modeling and generating sequences of polyphonic music with the RNN-RBM
4 | ========================================================================
5 |
6 | .. note::
7 | This tutorial demonstrates a basic implementation of the RNN-RBM as described in [BoulangerLewandowski12]_
8 | (`pdf `_).
9 | We assume the reader is familiar with
10 | `recurrent neural networks using the scan op `_
11 | and `restricted Boltzmann machines (RBM) `_.
12 |
13 | .. note::
14 | The code for this section is available for download here: `rnnrbm.py `_.
15 |
16 | You will need the modified `Python MIDI package (GPL license) `_ in your ``$PYTHONPATH`` or in the working directory in order to convert MIDI files to and from piano-rolls.
17 | The script also assumes that the content of the `Nottingham Database of folk tunes `_ has been extracted in the ``../data`` directory.
18 | Alternative MIDI datasets are available `here `_.
19 |
20 | Note that both dependencies above can be setup automatically by running the `download.sh `_ script in the ``../data`` directory of the `Deep Learning Tutorials repository `_.
21 |
22 | .. caution::
23 | Need Theano 0.6 or more recent.
24 |
25 |
26 | The RNN-RBM
27 | +++++++++++++++++++++++++
28 |
29 | The RNN-RBM is an energy-based model for density estimation of temporal sequences, where the feature vector :math:`v^{(t)}` at time step :math:`t` may be high-dimensional.
30 | It allows to describe multimodal conditional distributions of :math:`v^{(t)}|\mathcal A^{(t)}`, where :math:`\mathcal A^{(t)}\equiv \{v_\tau|\tau`_
146 |
147 | .. figure:: images/sample2.png
148 | :scale: 60%
149 |
150 | Listen to `sample2.mid `_
151 |
152 |
153 | How to improve this code
154 | +++++++++++++++++++++++++
155 |
156 | The code shown in this tutorial is a stripped-down version that can be improved in the following ways:
157 |
158 | * Preprocessing: transposing the sequences in a common tonality (e.g. C major / minor) and normalizing the tempo in beats (quarternotes) per minute can have the most effect on the generative quality of the model.
159 | * Pretraining techniques: initialize the :math:`W,b_v,b_h` parameters with independent RBMs with fully shuffled frames (i.e. :math:`W_{uh}=W_{uv}=W_{uu}=W_{vu}=0`); initialize the :math:`W_{uv},W_{uu},W_{vu},b_u` parameters of the RNN with the auxiliary cross-entropy objective via either SGD or, preferably, Hessian-free optimization [BoulangerLewandowski12]_.
160 | * Optimization techniques: gradient clipping, Nesterov momentum and the use of NADE for conditional density estimation.
161 | * Hyperparameter search: learning rate (separately for the RBM and RNN parts), learning rate schedules, batch size, number of hidden units (recurrent and RBM), momentum coefficient, momentum schedule, Gibbs chain length :math:`k` and early stopping.
162 | * Learn the initial condition :math:`u^{(0)}` as a model parameter.
163 |
164 |
165 | A few samples generated with code including these features are available here: `sequences.zip `_.
166 |
167 |
--------------------------------------------------------------------------------
/doc/scripts/docgen.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 | import sys
3 | import os
4 | import shutil
5 |
6 | import getopt
7 | from collections import defaultdict
8 |
9 | if __name__ == '__main__':
10 |
11 | throot = "/".join(sys.path[0].split("/")[:-2])
12 |
13 | options = defaultdict(bool)
14 | output_arg = getopt.getopt(sys.argv[1:], 'o:', ['rst', 'help', 'nopdf'])[0]
15 | options.update(dict([x, y or True] for x, y in output_arg))
16 | if options['--help']:
17 | print('Usage: %s [OPTIONS]' % sys.argv[0])
18 | print(' -o : output the html files in the specified dir')
19 | print(' --rst: only compile the doc (requires sphinx)')
20 | print(' --nopdf: do not produce a PDF file from the doc, only HTML')
21 | print(' --help: this help')
22 | sys.exit(0)
23 |
24 | options['--all'] = not bool(options['--rst'])
25 |
26 | def mkdir(path):
27 | try:
28 | os.mkdir(path)
29 | except OSError:
30 | pass
31 |
32 | outdir = options['-o'] or (throot + '/html')
33 | mkdir(outdir)
34 | os.chdir(outdir)
35 | mkdir("doc")
36 |
37 | # Make sure the appropriate 'deeplearning' directory is in the PYTHONPATH
38 | pythonpath = os.environ.get('PYTHONPATH', '')
39 | pythonpath = throot + ':' + pythonpath
40 | os.environ['PYTHONPATH'] = pythonpath
41 |
42 | if options['--all'] or options['--rst']:
43 | import sphinx
44 | sys.path[0:0] = [os.path.join(throot, 'doc')]
45 | sphinx.main(['', '-E', os.path.join(throot, 'doc'), '.'])
46 |
47 | if not options['--nopdf']:
48 | # Generate latex file in a temp directory
49 | import tempfile
50 | workdir = tempfile.mkdtemp()
51 | sphinx.main(['', '-E', '-b', 'latex',
52 | os.path.join(throot, 'doc'), workdir])
53 | # Compile to PDF
54 | os.chdir(workdir)
55 | os.system('make')
56 | try:
57 | shutil.copy(os.path.join(workdir, 'deeplearning.pdf'), outdir)
58 | os.chdir(outdir)
59 | shutil.rmtree(workdir)
60 | except OSError as e:
61 | print('OSError:', e)
62 | except IOError as e:
63 | print('IOError:', e)
64 |
--------------------------------------------------------------------------------
/doc/unet.txt:
--------------------------------------------------------------------------------
1 | .. _unet:
2 |
3 | U-Net
4 | **********************************************
5 |
6 | .. note::
7 | This section assumes the reader has already read through :doc:`lenet` for
8 | convolutional networks motivation and :doc:`fcn_2D_segm` for segmentation
9 | network.
10 |
11 | Summary
12 | +++++++
13 |
14 | This tutorial provides a brief explanation of the U-Net architecture as well as a way to implement
15 | it using Theano and Lasagne. U-Net is a Fully Convolutional Network (FCN) that does image segmentation.
16 | Its goal is then to predict each pixel's class. See :doc:`fcn_2D_segm` for differences between
17 | network architecture for classification and segmentation tasks.
18 |
19 | Data
20 | ++++
21 |
22 | The data is from ISBI challenge and can be found `here `_.
23 | We use data augmentation for training, as specified
24 | in the defaults arguments in the code given below.
25 |
26 | Model
27 | +++++
28 |
29 | The U-Net architecture is built upon the Fully Convolutional Network and modified
30 | in a way that it yields better segmentation in medical imaging.
31 | Compared to FCN-8, the two main differences are (1) U-net is symmetric and (2) the skip
32 | connections between the downsampling path and the upsampling path apply a concatenation
33 | operator instead of a sum. These skip connections intend to provide local information
34 | to the global information while upsampling.
35 | Because of its symmetry, the network has a large number of feature maps in the upsampling
36 | path, which allows to transfer information. By comparison, the basic FCN architecture only had
37 | *number of classes* feature maps in its upsampling path.
38 |
39 | The U-Net owes its name to its symmetric shape, which is different from other FCN variants.
40 |
41 | U-Net architecture is separated in 3 parts:
42 |
43 | - 1 : The contracting/downsampling path
44 | - 2 : Bottleneck
45 | - 3 : The expanding/upsampling path
46 |
47 | .. figure:: images/unet.jpg
48 | :align: center
49 | :scale: 60%
50 |
51 | **Figure 1** : Illustration of U-Net architecture (from U-Net paper)
52 |
53 |
54 | Contracting/downsampling path
55 | =============================
56 |
57 | The contracting path is composed of 4 blocks. Each block is composed of
58 |
59 | * 3x3 Convolution Layer + activation function (with batch normalization)
60 | * 3x3 Convolution Layer + activation function (with batch normalization)
61 | * 2x2 Max Pooling
62 |
63 | Note that the number of feature maps doubles at each pooling, starting with
64 | 64 feature maps for the first block, 128 for the second, and so on.
65 | The purpose of this contracting path is to capture the context of the input image
66 | in order to be able to do segmentation. This coarse contextual information will
67 | then be transfered to the upsampling path by means of skip connections.
68 |
69 |
70 | Bottleneck
71 | ==========
72 |
73 | This part of the network is between the contracting and expanding paths.
74 | The bottleneck is built from simply 2 convolutional layers (with batch
75 | normalization), with dropout.
76 |
77 |
78 | Expanding/upsampling path
79 | =========================
80 |
81 | The expanding path is also composed of 4 blocks. Each of these blocks is composed of
82 |
83 | * Deconvolution layer with stride 2
84 | * Concatenation with the corresponding cropped feature map from the contracting path
85 | * 3x3 Convolution layer + activation function (with batch normalization)
86 | * 3x3 Convolution layer + activation function (with batch normalization)
87 |
88 |
89 | The purpose of this expanding path is to enable precise localization combined
90 | with contextual information from the contracting path.
91 |
92 | Advantages
93 | ==========
94 |
95 | * The U-Net combines the location information from the downsampling path with the contextual information in the upsampling path to finally obtain a general information combining localisation and context, which is necessary to predict a good segmentation map.
96 |
97 | * No dense layer, so images of different sizes can be used as input (since the only parameters to learn on convolution layers are the kernel, and the size of the kernel is independent from input image' size).
98 |
99 | * The use of massive data augmentation is important in domains like biomedical segmentation, since the number of annotated samples is usually limited.
100 |
101 |
102 | Code
103 | ++++
104 |
105 | .. warning::
106 |
107 | * Current code works with Python 2 only.
108 | * If you use Theano with GPU backend (e.g. with Theano flag ``device=cuda``),
109 | you will need at least 12GB free in your video RAM.
110 |
111 | The U-Net implementation can be found in the following GitHub repo:
112 |
113 | * `Unet_lasagne_recipes.py <../code/unet/Unet_lasagne_recipes.py>`_, from original main script
114 | `Unet.py `_. Defines the model.
115 |
116 | * `train_unet.py <../code/unet/train_unet.py>`_ : Training loop (main script to use).
117 |
118 |
119 | The user must install `Lasagne `_ ,
120 | `SimpleITK `_ and
121 | clone the GitHub repo `Dataset Loaders `_.
122 |
123 | Change the ``dataset_loaders/config.ini`` file to set the right path for the dataset:
124 |
125 | .. code-block:: cfg
126 |
127 | [isbi_em_stacks]
128 | shared_path = /path/to/DeepLearningTutorials/data/isbi_challenge_em_stacks/
129 |
130 | Folder indicated at section ``[isbi_em_stacks]`` should contain files:
131 |
132 | * ``test-volume.tif``
133 | * ``train-labels.tif``
134 | * ``train-volume.tif``
135 |
136 | The user can now build a U-Net with a specified number of input channels and number of classes.
137 | First include the Lasagne layers needed to define the U-Net architecture :
138 |
139 | .. literalinclude:: ../code/unet/Unet_lasagne_recipes.py
140 | :start-after: start-snippet-1
141 | :end-before: end-snippet-1
142 |
143 | The *net* variable will be an ordered dictionary containing layers names as keys and layers instances as value.
144 | This is needed to be able to concatenate the feature maps from the contracting to expanding path.
145 |
146 |
147 | First the contracting path :
148 |
149 | .. literalinclude:: ../code/unet/Unet_lasagne_recipes.py
150 | :start-after: start-snippet-downsampling
151 | :end-before: end-snippet-downsampling
152 |
153 | And then the bottleneck :
154 |
155 | .. literalinclude:: ../code/unet/Unet_lasagne_recipes.py
156 | :start-after: start-snippet-bottleneck
157 | :end-before: end-snippet-bottleneck
158 |
159 | Followed by the expanding path :
160 |
161 | .. literalinclude:: ../code/unet/Unet_lasagne_recipes.py
162 | :start-after: start-snippet-upsampling
163 | :end-before: end-snippet-upsampling
164 |
165 | And finally the output path (to obtain *number of classes* feature maps):
166 |
167 | .. literalinclude:: ../code/unet/Unet_lasagne_recipes.py
168 | :start-after: start-snippet-output
169 | :end-before: end-snippet-output
170 |
171 | Running ``train_unet.py`` on a Titan X lasted for around 60 minutes, ending with the following:
172 |
173 | .. code-block:: text
174 |
175 | $ THEANO_FLAGS=device=cuda0,floatX=float32,dnn.conv.algo_fwd=time_once,dnn.conv.algo_bwd_data=time_once,dnn.conv.algo_bwd_filter=time_once,gpuarray.preallocate=1 python train_unet.py
176 | [...]
177 | EPOCH 364: Avg epoch training cost train 0.160667, cost val 0.265909, acc val 0.888796, jacc val class 0 0.636058, jacc val class 1 0.861970, jacc val 0.749014 took 4.379772 s
178 |
179 |
180 | References
181 | ++++++++++
182 |
183 | If you use this tutorial, please cite the following papers.
184 |
185 | * `[pdf] `__ Olaf Ronneberger, Philipp Fischer, Thomas Brox. U_Net: Convolutional Networks for Biomedical Image Segmentation. May 2015.
186 | * `[GitHub Repo] `__ Francesco Visin, Adriana Romero - Dataset loaders: a python library to load and preprocess datasets. 2017.
187 |
188 | Papers related to Theano/Lasagne:
189 |
190 | * `[pdf] `__ Theano Development Team. Theano: A Python framework for fast computation of mathematical expresssions. May 2016.
191 | * `[website] `__ Sander Dieleman, Jan Schluter, Colin Raffel, Eben Olson, Søren Kaae Sønderby, Daniel Nouri, Daniel Maturana, Martin Thoma, Eric Battenberg, Jack Kelly, Jeffrey De Fauw, Michael Heilman, diogo149, Brian McFee, Hendrik Weideman, takacsg84, peterderivaz, Jon, instagibbs, Dr. Kashif Rasul, CongLiu, Britefury, and Jonas Degrave, “Lasagne: First release.” (2015).
192 |
193 |
194 | Thank you!
195 |
--------------------------------------------------------------------------------
/doc/utilities.txt:
--------------------------------------------------------------------------------
1 | =============
2 | Miscellaneous
3 | =============
4 |
5 | .. _how-to-plot:
6 |
7 | Plotting Samples and Filters
8 | ++++++++++++++++++++++++++++
9 |
10 | .. note::
11 | The code for this section is available for download `here`_.
12 |
13 | .. _here: http://deeplearning.net/tutorial/code/utils.py
14 |
15 |
16 | To plot a sample, what we need to do is to take the visible units, which
17 | are a flattened image (there is no 2D structure to the visible units,
18 | just a 1D string of unit activations) and reshape it into a 2D image. The order in
19 | which the points from the 1D array go into the 2D image is given by the
20 | order in which the inital MNIST images where converted into a 1D array.
21 | Lucky for us this is just a call of the ``numpy.reshape`` function.
22 |
23 | Plotting the weights is a bit more tricky. We have ``n_hidden`` hidden
24 | units, each of them corresponding to a column of the weight matrix. A
25 | column has the same shape as the visible, where the weight corresponding
26 | to the connection with visible unit `j` is at position `j`. Therefore,
27 | if we reshape every such column, using ``numpy.reshape``, we get a
28 | filter image that tells us how this hidden unit is influenced by
29 | the input image.
30 |
31 | We need a utility function that takes a minibatch, or the weight matrix,
32 | and converts each row ( for the weight matrix we do a transpose ) into a
33 | 2D image and then tile these images together. Once we converted the
34 | minibatch or the weights in this image of tiles, we can use PIL to plot
35 | and save. `PIL `_ is a standard
36 | python libarary to deal with images.
37 |
38 | Tiling minibatches together is done for us by the
39 | ``tile_raster_image`` function which we provide here.
40 |
41 | .. code-block:: python
42 |
43 |
44 | def scale_to_unit_interval(ndar, eps=1e-8):
45 | """ Scales all values in the ndarray ndar to be between 0 and 1 """
46 | ndar = ndar.copy()
47 | ndar -= ndar.min()
48 | ndar *= 1.0 / (ndar.max() + eps)
49 | return ndar
50 |
51 |
52 | def tile_raster_images(X, img_shape, tile_shape, tile_spacing=(0, 0),
53 | scale_rows_to_unit_interval=True,
54 | output_pixel_vals=True):
55 | """
56 | Transform an array with one flattened image per row, into an array in
57 | which images are reshaped and layed out like tiles on a floor.
58 |
59 | This function is useful for visualizing datasets whose rows are images,
60 | and also columns of matrices for transforming those rows
61 | (such as the first layer of a neural net).
62 |
63 | :type X: a 2-D ndarray or a tuple of 4 channels, elements of which can
64 | be 2-D ndarrays or None;
65 | :param X: a 2-D array in which every row is a flattened image.
66 |
67 | :type img_shape: tuple; (height, width)
68 | :param img_shape: the original shape of each image
69 |
70 | :type tile_shape: tuple; (rows, cols)
71 | :param tile_shape: the number of images to tile (rows, cols)
72 |
73 | :param output_pixel_vals: if output should be pixel values (i.e. int8
74 | values) or floats
75 |
76 | :param scale_rows_to_unit_interval: if the values need to be scaled before
77 | being plotted to [0,1] or not
78 |
79 |
80 | :returns: array suitable for viewing as an image.
81 | (See:`Image.fromarray`.)
82 | :rtype: a 2-d array with same dtype as X.
83 |
84 | """
85 |
86 | assert len(img_shape) == 2
87 | assert len(tile_shape) == 2
88 | assert len(tile_spacing) == 2
89 |
90 | # The expression below can be re-written in a more C style as
91 | # follows :
92 | #
93 | # out_shape = [0,0]
94 | # out_shape[0] = (img_shape[0] + tile_spacing[0]) * tile_shape[0] -
95 | # tile_spacing[0]
96 | # out_shape[1] = (img_shape[1] + tile_spacing[1]) * tile_shape[1] -
97 | # tile_spacing[1]
98 | out_shape = [(ishp + tsp) * tshp - tsp for ishp, tshp, tsp
99 | in zip(img_shape, tile_shape, tile_spacing)]
100 |
101 | if isinstance(X, tuple):
102 | assert len(X) == 4
103 | # Create an output numpy ndarray to store the image
104 | if output_pixel_vals:
105 | out_array = numpy.zeros((out_shape[0], out_shape[1], 4), dtype='uint8')
106 | else:
107 | out_array = numpy.zeros((out_shape[0], out_shape[1], 4), dtype=X.dtype)
108 |
109 | #colors default to 0, alpha defaults to 1 (opaque)
110 | if output_pixel_vals:
111 | channel_defaults = [0, 0, 0, 255]
112 | else:
113 | channel_defaults = [0., 0., 0., 1.]
114 |
115 | for i in range(4):
116 | if X[i] is None:
117 | # if channel is None, fill it with zeros of the correct
118 | # dtype
119 | out_array[:, :, i] = numpy.zeros(out_shape,
120 | dtype='uint8' if output_pixel_vals else out_array.dtype
121 | ) + channel_defaults[i]
122 | else:
123 | # use a recurrent call to compute the channel and store it
124 | # in the output
125 | out_array[:, :, i] = tile_raster_images(X[i], img_shape, tile_shape, tile_spacing, scale_rows_to_unit_interval, output_pixel_vals)
126 | return out_array
127 |
128 | else:
129 | # if we are dealing with only one channel
130 | H, W = img_shape
131 | Hs, Ws = tile_spacing
132 |
133 | # generate a matrix to store the output
134 | out_array = numpy.zeros(out_shape, dtype='uint8' if output_pixel_vals else X.dtype)
135 |
136 |
137 | for tile_row in range(tile_shape[0]):
138 | for tile_col in range(tile_shape[1]):
139 | if tile_row * tile_shape[1] + tile_col < X.shape[0]:
140 | if scale_rows_to_unit_interval:
141 | # if we should scale values to be between 0 and 1
142 | # do this by calling the `scale_to_unit_interval`
143 | # function
144 | this_img = scale_to_unit_interval(X[tile_row * tile_shape[1] + tile_col].reshape(img_shape))
145 | else:
146 | this_img = X[tile_row * tile_shape[1] + tile_col].reshape(img_shape)
147 | # add the slice to the corresponding position in the
148 | # output array
149 | out_array[
150 | tile_row * (H+Hs): tile_row * (H + Hs) + H,
151 | tile_col * (W+Ws): tile_col * (W + Ws) + W
152 | ] \
153 | = this_img * (255 if output_pixel_vals else 1)
154 | return out_array
155 |
--------------------------------------------------------------------------------
/issues_closed/2_RBM_cost_fn.txt:
--------------------------------------------------------------------------------
1 | Reported by : Razvan
2 |
3 | Cost function (delta of free energy) has a reversed sign (i.e. free_energy(positive) - free_energy(negative) ). I'm not sure
4 | where the minus pops in .. but is confusing when going from theory to code.
5 |
6 |
7 | FIXED
8 |
--------------------------------------------------------------------------------
/issues_open/1_SdA_performance.txt:
--------------------------------------------------------------------------------
1 | Reported by : Razvan
2 |
3 | Best performance for SdA float64 CPU : 1.23%
4 | float32 CPU : 1.30%
5 | target : 1.10%
6 |
7 | Possible reasons:
8 | - bug !?
9 | - random seed / weights initialization / finetuning early stopping parameters
10 |
--------------------------------------------------------------------------------
/issues_open/3_RBM_scan_GPU.txt:
--------------------------------------------------------------------------------
1 | Reported by : Razvan
2 |
3 | Scan is not GPU ready.. making RBM tutorial slow on GPU (not tested yet).
4 | Quick fix is a optimization that removes scan if you're doing CD-1.
5 |
--------------------------------------------------------------------------------
/issues_open/4_RBM_scan.txt:
--------------------------------------------------------------------------------
1 | Reported by : Razvan
2 |
3 | The bug can be reproduced if you do :
4 | z = scan(..)
5 | c = f(z[-1])
6 | gp = T.grad(c, p, consider_constant = [ z[-1] ] )
7 |
8 | In this case grad will not consider z[-1] constant. Workaround:
9 |
10 | z = scan(..)
11 | z_1 = z[-1]
12 | c = f(z_1)
13 | gp = T.grad(c,p, consider_constant = [z_1])
14 |
15 | Note : I need to make sure this actually happens .. it might have been an
16 | artifact of something else when I first got this.
17 |
--------------------------------------------------------------------------------
/issues_open/5_results.txt:
--------------------------------------------------------------------------------
1 | Reported by : Razvan
2 |
3 | We should produce results + time for CPU float32 / CPU float64 / GPU . We should also
4 | specify the batchsize (or number of updates) pointing out that you can't always just
5 | compare the number of epochs.
6 |
--------------------------------------------------------------------------------
/issues_open/6_benchmarking_pybrain.txt:
--------------------------------------------------------------------------------
1 | Reported by : Razvan
2 |
3 | Observations :
4 |
5 | 1. First thing, working with their dataset model is a pain ! Either I had
6 | not figure it out, or it allows you to add only one datapoint at a time
7 | in the dataset. This seems to me highly unoptimal ...
8 |
9 | 2. You do not get batches for sgd ! The only thing you can do is compare with
10 | batch size of 1.
11 |
12 | 3. Their early stopping is different from ours. Differences :
13 | - You can not set how often you do a pass on the validation set
14 | (i.e. ``patience`` in our case). You always do one epoch of training
15 | and then you go through the validation set.
16 | - You do not have an improvement thereshold, any improvement in
17 | validation score leads to storing the new best parameters, and
18 | increasing the time you will still look for better parameters
19 | - The increase is not by multiplication but summation. So if at
20 | epoch x you do better on the validation step, you will go on for
21 | x+y epochs to look for something better ( we do x*y )
22 |
23 | 4. The errors return by pyBrain are divided by the number of
24 | classes. So if you do classification, you take the number of
25 | errors and divide it by the number of test examples times the
26 | number of classes. For MNIST this yields 10 times smaller
27 | errors. Is this something standard .. should we do it ? It
28 | definetelly makes error look smaller.
29 |
30 | 5. There is no straight forward way of adding L1/L2 regularization (from
31 | what I've seen), unless you go into their code and change it. That is not
32 | ard to do .. but for now I do not want to meangle with the library
33 |
34 | 6. The code for RBM is not ready (they say that it is work in progress). It seems to me that the
35 | code is wrong .. They have 3 loops, which to me would mean that the inner most is for CD-k (
36 | second is for one epoch / third for training). But they update the weights after each Gibbs
37 | step in CD-k .. which results in a strage form of CD-1 that sees same example several time before
38 | moving to the next one. I could (?) potentially fix the code but it is outside the scope of
39 | benchmarking.
40 |
41 | 7. There are question marks of how easy it would be to implement a SdA ( autoassociators might be
42 | easy to do though).
43 |
44 |
45 | RESULTS :
46 | logistic_sgd on maggie46
47 |
48 | Total error: 0.015611011103
49 | Total error: 0.00966772673335
50 | Total error: 0.00860664508883
51 | Time spend per epoch: 43.32
52 | Final error is : 10.44
53 | Time spend per epoch: 43.32
54 | Final error is : 10.44
55 |
56 | Arac :
57 |
58 | Total error: 0.0366924968888
59 | Total error: 0.0366576944937
60 | Total error: 0.0367442383338
61 | Time spend per epoch: 24.71
62 | Final error is : 69.28
63 | Time spend per epoch: 24.71
64 | Final error is : 69.28
65 |
66 |
67 | ** Our thing with batchsize =1 **
68 |
69 | test error of best model 8.45
70 | time : 12.99
71 | 12.01
72 |
73 |
74 |
75 |
76 | Results :
77 | mlp on maggie46
78 |
79 |
80 | pybrain ::
81 |
82 | Total error: 0.0124744609817
83 | Total error: 0.00722484141084
84 | Total error: 0.00599591269763
85 | Time spend per epoch : 1226.69
86 | Final error is : 8.68
87 | Time spend per epoch: 1226.69
88 | Final error is : 8.68
89 |
90 | 20.4448 min
91 |
92 | arac::
93 |
94 | Total error: 0.0318599056504
95 | Total error: 0.0316029246672
96 | Total error: 0.0315542295953
97 | Time spend per epoch: 860.336666667 (s)
98 | Final error is : 58.59
99 |
100 | our thing::
101 |
102 | test error of best model 3.88
103 | time: 381.92
104 |
105 |
--------------------------------------------------------------------------------
/misc/do_nightly_build:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # If not jenkins, set workspace to local Tmp
4 | if [ -v $WORKSPACE ]; then
5 | if [ -v $TMPDIR ]; then
6 | TMPDIR=/tmp
7 | fi
8 | WORKSPACE=$TMPDIR
9 | fi
10 |
11 | date
12 | ROOT_CWD=$WORKSPACE/nightly_build
13 | COMPILEDIR=$WORKSPACE/compile/lisa_theano_compile_dir_deeplearning
14 | NOSETESTS=${ROOT_CWD}/Theano/bin/theano-nose
15 | XUNIT="--with-xunit --xunit-file="
16 |
17 | FLAGS=warn.ignore_bug_before=0.5,compiledir=${COMPILEDIR}
18 | export PYTHONPATH=${ROOT_CWD}/Theano:${ROOT_CWD}/Pylearn:$PYTHONPATH
19 |
20 | cd ${ROOT_CWD}/DeepLearningTutorials/data
21 | ./download.sh
22 |
23 | cd ${ROOT_CWD}/Theano
24 | echo "git version for Theano:" `git rev-parse HEAD`
25 | cd ${ROOT_CWD}/DeepLearningTutorials/code
26 | echo "git version:" `git rev-parse HEAD`
27 |
28 | #echo "executing nosetests with mode=FAST_COMPILE"
29 | #THEANO_FLAGS=${FLAGS},mode=FAST_COMPILE ${NOSETESTS}
30 | echo "executing nosetests speed with mode=FAST_RUN"
31 | FILE=${ROOT_CWD}/dlt_tests.xml
32 | THEANO_FLAGS=${FLAGS},mode=FAST_RUN ${NOSETESTS} ${XUNIT}${FILE} test.py:speed
33 | #echo "executing nosetests speed with mode=FAST_RUN and OMP_NUM_THREADS=2"
34 | #OMP_NUM_THREADS=2 THEANO_FLAGS=${FLAGS},mode=FAST_RUN ${NOSETESTS} test.py:speed
35 | echo "executing nosetests with mode=FAST_RUN,floatX=float32"
36 | FILE=${ROOT_CWD}/dlt_float32_tests.xml
37 | THEANO_FLAGS=${FLAGS},mode=FAST_RUN,floatX=float32 ${NOSETESTS} ${XUNIT}${FILE}
38 |
39 | #we change the seed and record it everyday to test different combination. We record it to be able to reproduce bug caused by different seed. We don't want multiple test in DEBUG_MODE each day as this take too long.
40 | #seed=$RANDOM
41 | #echo "executing nosetests with mode=DEBUG_MODE with seed of the day $seed"
42 | #FILE=${ROOT_CWD}/'dlt_debug_tests.xml'
43 | #THEANO_DEBUGMODE_CHECK_STRIDES=0 THEANO_DEBUGMODE_PATIENCE=3 THEANO_COMPILEDIR=$WORKSPACE/lisa_theano_compile_dir_deeplearning THEANO_UNITTEST_SEED=$seed THEANO_DEFAULT_MODE=DEBUG_MODE ${NOSETESTS} ${XUNIT}${FILE}
44 |
45 |
--------------------------------------------------------------------------------