├── .gitignore
├── Dockerfile
├── LICENSE.md
├── README.md
├── Seminar1
    ├── Classwork_ru.ipynb
    ├── Homework 1 (Face Recognition).ipynb
    ├── faces_data.mat
    ├── inpainting.ipynb
    └── zebrafish_drawing_factory.py
├── Seminar10
    ├── Bonus-handcrafted-rnn.ipynb
    ├── README.md
    ├── Seminar10-RNN-homework-en.ipynb
    ├── Seminar10-RNN-homework-ru.ipynb
    ├── codex
    │   ├── Arbitrazhnyj_processualbnyj_kodeks_RF.txt
    │   ├── Grazhdanskij_kodeks_RF._Chastb_pervaja.txt
    │   ├── Grazhdanskij_kodeks_RF._Chastb_tretbja.txt
    │   ├── Grazhdanskij_kodeks_RF._Chastb_vtoraja.txt
    │   ├── Grazhdanskij_kodeks_RF.txt
    │   ├── Kodeks_RF_ob_administrativnyh_pravonarushenijah.txt
    │   ├── Konstitucija_RF.txt
    │   ├── Tamozhennyj_kodeks_RF.txt
    │   ├── Ustav_Patrulbno-Postovoj_Sluzhby_Milicii_Obwestvennoj_Bezopasnosti_RF.txt
    │   ├── Zakon_o_milicii.txt
    │   └── Zakon_ob_avtorskom_prave_i_smezhnyh_pravah.txt
    ├── data_copyright
    ├── molecules.tsv
    ├── mtg_card_names.txt
    ├── names
    └── seminar4-RNN-intro.ipynb
├── Seminar11
    ├── Seminar11_homework.ipynb
    ├── Seminar11_intro.ipynb
    ├── broadcast.py
    ├── pretrained_lenet.py
    ├── test.csv
    └── train.csv
├── Seminar2
    ├── README.md
    └── Seminar2.ipynb
├── Seminar3
    ├── HW3_Differentiation.ipynb
    ├── HW3_Modules.ipynb
    ├── HW3_main.ipynb
    ├── README.md
    ├── Seminar3_Differentiation.ipynb
    ├── Seminar3_NN.ipynb
    ├── autoencoder.png
    ├── googlenet.png
    ├── grad.png
    └── outdated
    │   ├── Seminar 3.ipynb
    │   └── interpolation.mp4
├── Seminar4
    ├── README.md
    ├── Seminar-intro-slide.ipynb
    ├── Seminar4-en.ipynb
    ├── Seminar4-ru.ipynb
    ├── bonus
    │   ├── Bonus-advanced-cnn.ipynb
    │   ├── Bonus-advanced-theano.ipynb
    │   └── cifar.py
    └── mnist.py
├── Seminar5
    ├── README.md
    ├── Seminar5.ipynb
    ├── classes.pkl
    ├── classes.txt
    └── sample_images
    │   ├── albatross.jpg
    │   ├── fox.jpg
    │   ├── frog.jpg
    │   ├── hen.jpg
    │   ├── kermit.jpg
    │   ├── kitten.jpg
    │   ├── puppy.jpg
    │   ├── steve_martin.jpg
    │   ├── teapot.jpg
    │   └── tiger.jpg
├── Seminar6
    ├── Seminar6.ipynb
    ├── custom
    │   ├── __init__.py
    │   ├── net.py
    │   ├── solver.py
    │   └── tester.py
    ├── data
    │   ├── .gitignore
    │   ├── pylintrc
    │   └── scripts
    │   │   └── fetch_selective_search_data.sh
    ├── experiments
    │   ├── cfgs
    │   │   └── rcnn.yml
    │   └── scripts
    │   │   ├── fast_rcnn.sh
    │   │   ├── test_rcnn.sh
    │   │   └── train_rcnn.sh
    ├── lib
    │   ├── Makefile
    │   ├── datasets
    │   │   ├── VOCdevkit-matlab-wrapper
    │   │   │   ├── get_voc_opts.m
    │   │   │   ├── voc_eval.m
    │   │   │   └── xVOCap.m
    │   │   ├── __init__.py
    │   │   ├── ds_utils.py
    │   │   ├── factory.py
    │   │   ├── imdb.py
    │   │   ├── pascal_voc.py
    │   │   ├── tools
    │   │   │   └── mcg_munge.py
    │   │   └── voc_eval.py
    │   ├── fast_rcnn
    │   │   ├── __init__.py
    │   │   ├── bbox_transform.py
    │   │   ├── config.py
    │   │   ├── nms_wrapper.py
    │   │   ├── test.py
    │   │   └── train.py
    │   ├── nms
    │   │   ├── .gitignore
    │   │   ├── __init__.py
    │   │   ├── cpu_nms.pyx
    │   │   ├── gpu_nms.hpp
    │   │   ├── gpu_nms.pyx
    │   │   ├── nms_kernel.cu
    │   │   └── py_cpu_nms.py
    │   ├── roi_data_layer
    │   │   ├── __init__.py
    │   │   ├── layer.py
    │   │   ├── minibatch.py
    │   │   └── roidb.py
    │   ├── setup.py
    │   ├── transform
    │   │   ├── __init__.py
    │   │   └── torch_image_transform_layer.py
    │   └── utils
    │   │   ├── .gitignore
    │   │   ├── __init__.py
    │   │   ├── bbox.pyx
    │   │   ├── blob.py
    │   │   └── timer.py
    ├── notebook
    │   └── img
    │   │   └── rcnn_slide.jpg
    └── tools
    │   ├── _init_paths.py
    │   ├── eval_recall.py
    │   ├── reval.py
    │   ├── test_net.py
    │   └── train_net.py
├── Seminar7
    ├── HW_GAN.ipynb
    ├── HW_textures_style.ipynb
    ├── README.md
    └── sem7.ipynb
├── Seminar8
    ├── Autoencoder_structure.png
    ├── GS.py
    ├── README.md
    ├── VAE_homework.ipynb
    ├── __init__.py
    ├── lfw_dataset.py
    └── linear.png
└── Seminar9
    ├── Bonus-seminar.ipynb
    ├── Seminar9_en.ipynb
    ├── Seminar9_ru.ipynb
    └── oracle.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | # node and NPM
 2 | npm-debug.log
 3 | node_modules
 4 | 
 5 | # swap files
 6 | *~
 7 | *.swp
 8 | 
 9 | 
10 | 
11 | env.sh
12 | # Byte-compiled / optimized / DLL files
13 | __pycache__/
14 | *.py[cod]
15 | 
16 | # C extensions
17 | *.so
18 | 
19 | # Distribution / packaging
20 | .Python
21 | env/
22 | bin/
23 | build/
24 | develop-eggs/
25 | dist/
26 | eggs/
27 | lib/
28 | lib64/
29 | parts/
30 | sdist/
31 | var/
32 | *.egg-info/
33 | .installed.cfg
34 | *.egg/
35 | 
36 | # Installer logs
37 | pip-log.txt
38 | pip-delete-this-directory.txt
39 | 
40 | # Unit test / coverage reports
41 | htmlcov/
42 | .tox/
43 | .coverage
44 | .cache
45 | nosetests.xml
46 | coverage.xml
47 | 
48 | # Translations
49 | *.mo
50 | 
51 | # Mr Developer
52 | .mr.developer.cfg
53 | .project
54 | .pydevproject
55 | .idea
56 | .ipynb_checkpoints
57 | 
58 | # Rope
59 | .ropeproject
60 | 
61 | # Django stuff:
62 | *.log
63 | *.pot
64 | 
65 | # Sphinx documentation
66 | docs/_build/
67 | docs/tmp*
68 | 
69 | # OS X garbage
70 | .DS_Store
71 | 
72 | # Debian things
73 | debian/reproducible-experiment-platform
74 | debian/files
75 | *.substvars
76 | *.debhelper.log


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM andrewosh/binder-base
 2 | 
 3 | MAINTAINER Alexander Panin <justheuristic@gmail.com>
 4 | 
 5 | USER root
 6 | 
 7 | RUN apt-get update
 8 | RUN apt-get install -y htop
 9 | RUN apt-get install -y unzip
10 | RUN apt-get install -y cmake
11 | 
12 | USER main
13 | 
14 | RUN pip install --upgrade https://github.com/Theano/Theano/archive/master.zip
15 | RUN pip install --upgrade https://github.com/Lasagne/Lasagne/archive/master.zip
16 | RUN pip install --upgrade https://github.com/yandexdataschool/AgentNet/archive/master.zip
17 | 
18 | RUN /home/main/anaconda/envs/python3/bin/pip install --upgrade https://github.com/Theano/Theano/archive/master.zip
19 | RUN /home/main/anaconda/envs/python3/bin/pip install --upgrade https://github.com/Lasagne/Lasagne/archive/master.zip
20 | RUN /home/main/anaconda/envs/python3/bin/pip install --upgrade https://github.com/yandexdataschool/AgentNet/archive/master.zip
21 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2016 Yandex School of Data Analysis and contributors
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Deep Learning course
 2 | Lecture and seminar materials for each week are in ./Seminar* folders
 3 | 
 4 | Instant dive-in button: [![Binder](http://mybinder.org/badge.svg)](http://mybinder.org:/repo/ddtm/dl-course) 
 5 | (servers may be down time to time, sry)
 6 | 
 7 | 
 8 | # Coordinates
 9 | * YSDA every wednesday at 18-00
10 | * Skoltech TBA
11 | 
12 | # Announcements
13 | * First lecture will happen on 8.02 -- or will it?
14 | 
15 | # Syllabus
16 | - __week1__ lecturename
17 |   - [ ] Lecture: Intro
18 |   - [ ] Seminar: Unsupervised feature learning on faces dataset
19 |      - [ ] HW due: xx.yy.zz, 23:59.
20 |   - [ ] Please get bleeding edge theano+lasagne installed for the next seminar. 
21 |     - [Issue](https://github.com/yandexdataschool/HSE_deeplearning/issues/1)
22 |     - [Linux Guidelines](http://agentnet.readthedocs.io/en/latest/user/install.html)
23 | 
24 | # Contributors & course staff
25 | Course materials and teaching performed by
26 | - [Viktor Lempitsky](http://sites.skoltech.ru/compvision/members/vilem/)
27 | - [Dmitry Ulyanov](https://github.com/DmitryUlyanov) - seminars, homeworks 
28 | - [Vadim Lebedev](https://github.com/vadim-v-lebedev) - seminars, homeworks
29 | - [Victor Yurchenko](https://github.com/simflin) - seminars, homeworks
30 | - [Just Heuristic](https://github.com/justheuristic/) - seminars, homeworks
31 | 
32 | Contributors:
33 | - [Oleg Vasilev](https://github.com/Omrigan) - a lot of miscelaneous improvements
34 | - [Arseniy Ashukha](https://github.com/ars-ashuha) - image captioning, notes
35 | - [Mikhail Khalman](https://github.com/mihaha?tab=activity) - variational autoencoders, notes
36 | 


--------------------------------------------------------------------------------
/Seminar1/Classwork_ru.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Чем думает рыба?"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": null,
 13 |    "metadata": {
 14 |     "collapsed": false
 15 |    },
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "!wget https://github.com/goto-ru/Unsupervised_ML/raw/20779daf2aebca80bfe38401bc87cf41fc7b493d/03_zebrafish/zebrafish.npy -O zebrafish.npy\n",
 19 |     "#alternative link: https://www.dropbox.com/s/hhep0wj4c11qibu/zebrafish.npy?dl=1"
 20 |    ]
 21 |   },
 22 |   {
 23 |    "cell_type": "markdown",
 24 |    "metadata": {},
 25 |    "source": [
 26 |     "# Данные\n",
 27 |     "\n",
 28 |     "* Сейчас в вашем распоряжении - данные о мозговой активности малька рыбы вида Danio Rerio https://en.wikipedia.org/wiki/Zebrafish .\n",
 29 |     "* Мальку введено вещество, которое светится от электрической активности (например, от спайков нейронов). Мальки почти прозрачны, поэтому такое свечение видно извне.\n",
 30 |     "* Сами данные содержат 240 фотографий головной части рыбки, на которых видна мозговая активность в каждой точке. Каждая фотография имеет размер __230 x 202__ пикселей\n",
 31 |     "* Ваша задача - попытаться восстановить структуру мозга рыбки. Для этого можно попытаться найти, например, группы нейронов, реагирующих вместе или с одинаковой частотой.\n",
 32 |     "* Никакой разметки в данных нет, поэтому вам придётся использовать методы понижения размерности и кластеризации, чтобы эффективно анализировать данные.\n",
 33 |     "\n",
 34 |     "![img](http://static1.squarespace.com/static/5355ec0de4b02760ee889a8f/t/5357cbfee4b03a3c7d9e4831/1398262791647/fish)"
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "code",
 39 |    "execution_count": null,
 40 |    "metadata": {
 41 |     "collapsed": false
 42 |    },
 43 |    "outputs": [],
 44 |    "source": [
 45 |     "import numpy as np\n",
 46 |     "data = np.load(\"zebrafish.npy\")/255."
 47 |    ]
 48 |   },
 49 |   {
 50 |    "cell_type": "code",
 51 |    "execution_count": null,
 52 |    "metadata": {
 53 |     "collapsed": false
 54 |    },
 55 |    "outputs": [],
 56 |    "source": [
 57 |     "import matplotlib.pyplot as plt\n",
 58 |     "%matplotlib inline\n",
 59 |     "\n",
 60 |     "tick0 = data[:,0]\n",
 61 |     "tick0_image = tick0.reshape(230, 202)\n",
 62 |     "\n",
 63 |     "print \"размер 1 картинки:\", tick0_image.shape\n",
 64 |     "\n",
 65 |     "plt.imshow(tick0_image.T);"
 66 |    ]
 67 |   },
 68 |   {
 69 |    "cell_type": "code",
 70 |    "execution_count": null,
 71 |    "metadata": {
 72 |     "collapsed": false,
 73 |     "scrolled": true
 74 |    },
 75 |    "outputs": [],
 76 |    "source": [
 77 |     "#мини-библиотека для рисования рыбы\n",
 78 |     "from zebrafish_drawing_factory import draw_component\n",
 79 |     "\n",
 80 |     "draw_component(data[:,0])"
 81 |    ]
 82 |   },
 83 |   {
 84 |    "cell_type": "markdown",
 85 |    "metadata": {},
 86 |    "source": [
 87 |     "# Временные ряды\n",
 88 |     "\n",
 89 |     "* Посмотрим на активность отдельных пикселей в течение времени:\n",
 90 |     "* Попробуйте вручную найти какие-то характерные группы нейронов"
 91 |    ]
 92 |   },
 93 |   {
 94 |    "cell_type": "code",
 95 |    "execution_count": null,
 96 |    "metadata": {
 97 |     "collapsed": false
 98 |    },
 99 |    "outputs": [],
100 |    "source": [
101 |     "import matplotlib.pyplot as plt\n",
102 |     "%matplotlib inline\n",
103 |     "plt.figure(figsize=[10,10])\n",
104 |     "for i in range(0,240,10):\n",
105 |     "    plt.plot(data[i])\n"
106 |    ]
107 |   },
108 |   {
109 |    "cell_type": "markdown",
110 |    "metadata": {
111 |     "collapsed": true
112 |    },
113 |    "source": [
114 |     "# Поищем характерные группы нейронов\n",
115 |     "\n",
116 |     "Давайте разложим временные ряды активности нейронов при помощи метода главных компонент.\n",
117 |     "\n",
118 |     "__Важно!__ в этой части задания объектом выборки является временной ряд активности 1 точки на картинке, а не картинка целиком."
119 |    ]
120 |   },
121 |   {
122 |    "cell_type": "code",
123 |    "execution_count": null,
124 |    "metadata": {
125 |     "collapsed": false
126 |    },
127 |    "outputs": [],
128 |    "source": [
129 |     "from sklearn.decomposition import PCA\n",
130 |     "\n",
131 |     "pca = <создайте и обучите PCA с 20+ компонентами>"
132 |    ]
133 |   },
134 |   {
135 |    "cell_type": "code",
136 |    "execution_count": null,
137 |    "metadata": {
138 |     "collapsed": false
139 |    },
140 |    "outputs": [],
141 |    "source": [
142 |     "data_pca = <преобразуйте данные в пространство главных компонент pca.transform>"
143 |    ]
144 |   },
145 |   {
146 |    "cell_type": "markdown",
147 |    "metadata": {},
148 |    "source": [
149 |     "## Визуализируем компоненты"
150 |    ]
151 |   },
152 |   {
153 |    "cell_type": "code",
154 |    "execution_count": null,
155 |    "metadata": {
156 |     "collapsed": false
157 |    },
158 |    "outputs": [],
159 |    "source": [
160 |     "draw_component(data_pca[:,1])"
161 |    ]
162 |   },
163 |   {
164 |    "cell_type": "code",
165 |    "execution_count": null,
166 |    "metadata": {
167 |     "collapsed": false
168 |    },
169 |    "outputs": [],
170 |    "source": [
171 |     "draw_component(data_pca[:,2])"
172 |    ]
173 |   },
174 |   {
175 |    "cell_type": "code",
176 |    "execution_count": null,
177 |    "metadata": {
178 |     "collapsed": false
179 |    },
180 |    "outputs": [],
181 |    "source": [
182 |     "from zebrafish_drawing_factory import draw_components\n",
183 |     "\n",
184 |     "draw_components(data_pca[:,2],data_pca[:,3])"
185 |    ]
186 |   },
187 |   {
188 |    "cell_type": "markdown",
189 |    "metadata": {},
190 |    "source": [
191 |     "# Поищем фичи"
192 |    ]
193 |   },
194 |   {
195 |    "cell_type": "code",
196 |    "execution_count": null,
197 |    "metadata": {
198 |     "collapsed": true
199 |    },
200 |    "outputs": [],
201 |    "source": [
202 |     "def extract_features(impulses):\n",
203 |     "    \"\"\"given time series(array) of region activity, compute some feature representation of those time series\n",
204 |     "    Ideas:\n",
205 |     "      - fourier transform\n",
206 |     "      - mean, variance and percentiles\n",
207 |     "      - sums of every k-th element with shift b\n",
208 |     "      \"\"\"\n",
209 |     "    features = []<любые фичи>\n",
210 |     "    return features"
211 |    ]
212 |   },
213 |   {
214 |    "cell_type": "code",
215 |    "execution_count": null,
216 |    "metadata": {
217 |     "collapsed": false
218 |    },
219 |    "outputs": [],
220 |    "source": [
221 |     "data_features = np.array(list(map(extract_features, data)))\n",
222 |     "\n",
223 |     "print \"shape:\",data_features.shape"
224 |    ]
225 |   },
226 |   {
227 |    "cell_type": "code",
228 |    "execution_count": null,
229 |    "metadata": {
230 |     "collapsed": false
231 |    },
232 |    "outputs": [],
233 |    "source": [
234 |     "from sklearn.decomposition import PCA\n",
235 |     "\n",
236 |     "pca = <обучи PCA>"
237 |    ]
238 |   },
239 |   {
240 |    "cell_type": "code",
241 |    "execution_count": null,
242 |    "metadata": {
243 |     "collapsed": true
244 |    },
245 |    "outputs": [],
246 |    "source": [
247 |     "data_pca = <преобразуй в пространство PCA>"
248 |    ]
249 |   },
250 |   {
251 |    "cell_type": "code",
252 |    "execution_count": null,
253 |    "metadata": {
254 |     "collapsed": false
255 |    },
256 |    "outputs": [],
257 |    "source": [
258 |     "<визуализируй полученные компоненты>\n",
259 |     "draw_component(...)\n",
260 |     "draw_components(...)"
261 |    ]
262 |   },
263 |   {
264 |    "cell_type": "markdown",
265 |    "metadata": {},
266 |    "source": [
267 |     "# Bonus: clustering in PCA space"
268 |    ]
269 |   },
270 |   {
271 |    "cell_type": "code",
272 |    "execution_count": null,
273 |    "metadata": {
274 |     "collapsed": true
275 |    },
276 |    "outputs": [],
277 |    "source": [
278 |     "from sklearn.cluster import KMeans\n",
279 |     "from sklearn.mixture import GMM\n",
280 |     "\n",
281 |     "<покластеризуй области изображения на основе двух полученных PCA-представлений, используй любой метод на выбор>"
282 |    ]
283 |   },
284 |   {
285 |    "cell_type": "code",
286 |    "execution_count": null,
287 |    "metadata": {
288 |     "collapsed": true
289 |    },
290 |    "outputs": [],
291 |    "source": [
292 |     "cluster_ids = <предскажи номер кластера для каждого пикселя>"
293 |    ]
294 |   },
295 |   {
296 |    "cell_type": "code",
297 |    "execution_count": null,
298 |    "metadata": {
299 |     "collapsed": true
300 |    },
301 |    "outputs": [],
302 |    "source": [
303 |     "#cluster_ids должен содержать по 1 чиселке на пиксель\n",
304 |     "assert np.prod(cluster_ids.shape) == (230*202)"
305 |    ]
306 |   },
307 |   {
308 |    "cell_type": "code",
309 |    "execution_count": null,
310 |    "metadata": {
311 |     "collapsed": true
312 |    },
313 |    "outputs": [],
314 |    "source": [
315 |     "plt.imshow(cluster_ids.reshape(230,202),cmap='spectral')"
316 |    ]
317 |   }
318 |  ],
319 |  "metadata": {
320 |   "kernelspec": {
321 |    "display_name": "Python [Root]",
322 |    "language": "python",
323 |    "name": "Python [Root]"
324 |   },
325 |   "language_info": {
326 |    "codemirror_mode": {
327 |     "name": "ipython",
328 |     "version": 2
329 |    },
330 |    "file_extension": ".py",
331 |    "mimetype": "text/x-python",
332 |    "name": "python",
333 |    "nbconvert_exporter": "python",
334 |    "pygments_lexer": "ipython2",
335 |    "version": "2.7.12"
336 |   }
337 |  },
338 |  "nbformat": 4,
339 |  "nbformat_minor": 0
340 | }
341 | 


--------------------------------------------------------------------------------
/Seminar1/Homework 1 (Face Recognition).ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {
  7 |     "collapsed": false
  8 |    },
  9 |    "outputs": [
 10 |     {
 11 |      "name": "stdout",
 12 |      "output_type": "stream",
 13 |      "text": [
 14 |       "Populating the interactive namespace from numpy and matplotlib\n"
 15 |      ]
 16 |     }
 17 |    ],
 18 |    "source": [
 19 |     "%pylab inline\n",
 20 |     "\n",
 21 |     "import numpy as np\n",
 22 |     "from matplotlib import pyplot as plt"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "markdown",
 27 |    "metadata": {},
 28 |    "source": [
 29 |     "# Face recognition"
 30 |    ]
 31 |   },
 32 |   {
 33 |    "cell_type": "markdown",
 34 |    "metadata": {},
 35 |    "source": [
 36 |     "The goal of this seminar is to build two simple (anv very similar) face recognition pipelines using **`scikit-learn`** package. Overall, we'd like to explore different representations and see which one works better. "
 37 |    ]
 38 |   },
 39 |   {
 40 |    "cell_type": "markdown",
 41 |    "metadata": {},
 42 |    "source": [
 43 |     "## Prepare dataset"
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "code",
 48 |    "execution_count": 2,
 49 |    "metadata": {
 50 |     "collapsed": false
 51 |    },
 52 |    "outputs": [
 53 |     {
 54 |      "name": "stdout",
 55 |      "output_type": "stream",
 56 |      "text": [
 57 |       "Dataset loaded.\n",
 58 |       "  Image size        : 32x32\n",
 59 |       "  Train images      : 280\n",
 60 |       "  Test images       : 120\n",
 61 |       "  Number of classes : 40\n"
 62 |      ]
 63 |     }
 64 |    ],
 65 |    "source": [
 66 |     "import scipy.io\n",
 67 |     "\n",
 68 |     "image_h, image_w = 32, 32\n",
 69 |     "\n",
 70 |     "data = scipy.io.loadmat('faces_data.mat')\n",
 71 |     "\n",
 72 |     "X_train = data['train_faces'].reshape((image_w, image_h, -1)).transpose((2, 1, 0)).reshape((-1, image_h * image_w))\n",
 73 |     "y_train = data['train_labels'] - 1\n",
 74 |     "X_test = data['test_faces'].reshape((image_w, image_h, -1)).transpose((2, 1, 0)).reshape((-1, image_h * image_w))\n",
 75 |     "y_test = data['test_labels'] - 1\n",
 76 |     "\n",
 77 |     "n_features = X_train.shape[1]\n",
 78 |     "n_train = len(y_train)\n",
 79 |     "n_test = len(y_test)\n",
 80 |     "n_classes = len(np.unique(y_train))\n",
 81 |     "\n",
 82 |     "print('Dataset loaded.')\n",
 83 |     "print('  Image size        : {}x{}'.format(image_h, image_w))\n",
 84 |     "print('  Train images      : {}'.format(n_train))\n",
 85 |     "print('  Test images       : {}'.format(n_test))\n",
 86 |     "print('  Number of classes : {}'.format(n_classes))"
 87 |    ]
 88 |   },
 89 |   {
 90 |    "cell_type": "markdown",
 91 |    "metadata": {},
 92 |    "source": [
 93 |     "Now we are going to plot some samples from the dataset using the provided helper function."
 94 |    ]
 95 |   },
 96 |   {
 97 |    "cell_type": "code",
 98 |    "execution_count": null,
 99 |    "metadata": {
100 |     "collapsed": false
101 |    },
102 |    "outputs": [],
103 |    "source": [
104 |     "def plot_gallery(images, titles, h, w, n_row=3, n_col=6):\n",
105 |     "    \"\"\"Helper function to plot a gallery of portraits\"\"\"\n",
106 |     "    plt.figure(figsize=(1.5 * n_col, 1.7 * n_row))\n",
107 |     "    plt.subplots_adjust(bottom=0, left=.01, right=.99, top=.90, hspace=.35)\n",
108 |     "    for i in range(n_row * n_col):\n",
109 |     "        plt.subplot(n_row, n_col, i + 1)\n",
110 |     "        plt.imshow(images[i].reshape((h, w)), cmap=plt.cm.gray, interpolation='nearest')\n",
111 |     "        plt.title(titles[i], size=12)\n",
112 |     "        plt.xticks(())\n",
113 |     "        plt.yticks(())"
114 |    ]
115 |   },
116 |   {
117 |    "cell_type": "code",
118 |    "execution_count": null,
119 |    "metadata": {
120 |     "collapsed": false
121 |    },
122 |    "outputs": [],
123 |    "source": [
124 |     "titles = [str(y[0]) for y in y_train]\n",
125 |     "\n",
126 |     "plot_gallery(X_train, titles, image_h, image_w)"
127 |    ]
128 |   },
129 |   {
130 |    "cell_type": "markdown",
131 |    "metadata": {},
132 |    "source": [
133 |     "## Nearest Neighbour baseline"
134 |    ]
135 |   },
136 |   {
137 |    "cell_type": "markdown",
138 |    "metadata": {},
139 |    "source": [
140 |     "The simplest way to do face recognition is to treat raw pixels as features and perform **Nearest Neighbor Search** in the Euclidean space. Let's use **`KNeighborsClassifier`** class."
141 |    ]
142 |   },
143 |   {
144 |    "cell_type": "code",
145 |    "execution_count": null,
146 |    "metadata": {
147 |     "collapsed": false
148 |    },
149 |    "outputs": [],
150 |    "source": [
151 |     "from sklearn.neighbors import KNeighborsClassifier\n",
152 |     "\n",
153 |     "# Use KNeighborsClassifier to calculate test score for the Nearest Neighbour classifier.\n",
154 |     "\n",
155 |     "print('Test score: {}'.format(test_score))"
156 |    ]
157 |   },
158 |   {
159 |    "cell_type": "markdown",
160 |    "metadata": {},
161 |    "source": [
162 |     "Not very imperssive, is it?"
163 |    ]
164 |   },
165 |   {
166 |    "cell_type": "markdown",
167 |    "metadata": {},
168 |    "source": [
169 |     "## Eigenfaces"
170 |    ]
171 |   },
172 |   {
173 |    "cell_type": "markdown",
174 |    "metadata": {},
175 |    "source": [
176 |     "All the dirty work will be done by the scikit-learn package. First we need to learn a dictionary of codewords. For that we preprocess the training set by making each face normalized (zero mean and unit variance).."
177 |    ]
178 |   },
179 |   {
180 |    "cell_type": "code",
181 |    "execution_count": null,
182 |    "metadata": {
183 |     "collapsed": true
184 |    },
185 |    "outputs": [],
186 |    "source": [
187 |     "# Populate variable 'X_train_processed' with samples each of which has zero mean and unit variance."
188 |    ]
189 |   },
190 |   {
191 |    "cell_type": "markdown",
192 |    "metadata": {},
193 |    "source": [
194 |     "Now we are going to apply **PCA** to obtain a dictionary of codewords. \n",
195 |     "**`RamdomizedPCA`** class is what we need."
196 |    ]
197 |   },
198 |   {
199 |    "cell_type": "code",
200 |    "execution_count": null,
201 |    "metadata": {
202 |     "collapsed": false
203 |    },
204 |    "outputs": [],
205 |    "source": [
206 |     "from sklearn.decomposition import RandomizedPCA\n",
207 |     "\n",
208 |     "n_components = 64\n",
209 |     "\n",
210 |     "# Populate 'pca' with a trained instance of RamdomizedPCA."
211 |    ]
212 |   },
213 |   {
214 |    "cell_type": "markdown",
215 |    "metadata": {},
216 |    "source": [
217 |     "We plot a bunch of principal components."
218 |    ]
219 |   },
220 |   {
221 |    "cell_type": "code",
222 |    "execution_count": null,
223 |    "metadata": {
224 |     "collapsed": false
225 |    },
226 |    "outputs": [],
227 |    "source": [
228 |     "# Visualize principal components."
229 |    ]
230 |   },
231 |   {
232 |    "cell_type": "markdown",
233 |    "metadata": {},
234 |    "source": [
235 |     "This time we don't have any restriction on number of non-zero coefficients in the vector decomposition, so the codes are not sparse anymore:"
236 |    ]
237 |   },
238 |   {
239 |    "cell_type": "code",
240 |    "execution_count": null,
241 |    "metadata": {
242 |     "collapsed": false
243 |    },
244 |    "outputs": [],
245 |    "source": [
246 |     "# Transform training data and plot decomposition coefficients."
247 |    ]
248 |   },
249 |   {
250 |    "cell_type": "markdown",
251 |    "metadata": {},
252 |    "source": [
253 |     "Train an SVM and apply it to the encoded test data."
254 |    ]
255 |   },
256 |   {
257 |    "cell_type": "code",
258 |    "execution_count": null,
259 |    "metadata": {
260 |     "collapsed": false
261 |    },
262 |    "outputs": [],
263 |    "source": [
264 |     "# Populate 'test_score' with test accuracy of an SVM classifier.\n",
265 |     "\n",
266 |     "print('Test score: {}'.format(test_score))"
267 |    ]
268 |   },
269 |   {
270 |    "cell_type": "markdown",
271 |    "metadata": {},
272 |    "source": [
273 |     "How many components are sufficient to reach the same accuracy level?"
274 |    ]
275 |   },
276 |   {
277 |    "cell_type": "code",
278 |    "execution_count": null,
279 |    "metadata": {
280 |     "collapsed": false
281 |    },
282 |    "outputs": [],
283 |    "source": [
284 |     "n_components = [1, 2, 4, 8, 16, 32, 64]\n",
285 |     "accuracy = []\n",
286 |     "\n",
287 |     "# Try different numbers of components and populate 'accuracy' list.\n",
288 |     "    \n",
289 |     "plt.figure(figsize=(10, 6))\n",
290 |     "plt.plot(n_nonzero, accuracy)\n",
291 |     "\n",
292 |     "print('Max accuracy: {}'.format(max(accuracy)))"
293 |    ]
294 |   }
295 |  ],
296 |  "metadata": {
297 |   "kernelspec": {
298 |    "display_name": "Python 2",
299 |    "language": "python",
300 |    "name": "python2"
301 |   },
302 |   "language_info": {
303 |    "codemirror_mode": {
304 |     "name": "ipython",
305 |     "version": 2
306 |    },
307 |    "file_extension": ".py",
308 |    "mimetype": "text/x-python",
309 |    "name": "python",
310 |    "nbconvert_exporter": "python",
311 |    "pygments_lexer": "ipython2",
312 |    "version": "2.7.6"
313 |   }
314 |  },
315 |  "nbformat": 4,
316 |  "nbformat_minor": 0
317 | }
318 | 


--------------------------------------------------------------------------------
/Seminar1/faces_data.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ddtm/dl-course/9b04d2dda741c0786a9de40a7dfce89d06d0487e/Seminar1/faces_data.mat


--------------------------------------------------------------------------------
/Seminar1/zebrafish_drawing_factory.py:
--------------------------------------------------------------------------------
 1 | import numpy
 2 | import matplotlib.pyplot as plt
 3 | import matplotlib.cm as cm
 4 | def preparePlot(xticks, yticks, figsize=(10.5, 6), hideLabels=False, gridColor='#999999',
 5 |                 gridWidth=1.0):
 6 |     """Template for generating the plot layout."""
 7 |     plt.close()
 8 |     fig, ax = plt.subplots(figsize=figsize, facecolor='white', edgecolor='white')
 9 |     ax.axes.tick_params(labelcolor='#999999', labelsize='10')
10 |     for axis, ticks in [(ax.get_xaxis(), xticks), (ax.get_yaxis(), yticks)]:
11 |         axis.set_ticks_position('none')
12 |         axis.set_ticks(ticks)
13 |         axis.label.set_color('#999999')
14 |         if hideLabels: axis.set_ticklabels([])
15 |     plt.grid(color=gridColor, linewidth=gridWidth, linestyle='-')
16 |     map(lambda position: ax.spines[position].set_visible(False), ['bottom', 'top', 'left', 'right'])
17 |     return fig, ax
18 | 
19 | def draw_component(component):
20 | 
21 |     image = component.reshape(230, 202).T
22 | 
23 |     fig, ax = preparePlot(numpy.arange(0, 10, 1), numpy.arange(0, 10, 1), figsize=(9.0, 7.2), hideLabels=True)
24 |     ax.grid(False)
25 |     image = plt.imshow(image,interpolation='nearest', aspect='auto', cmap=cm.gray)
26 |     plt.show()
27 |     
28 | # Adapted from python-thunder's Colorize.transform where cmap='polar'.
29 | # Checkout the library at: https://github.com/thunder-project/thunder and
30 | # http://thunder-project.org/
31 | import numpy as np
32 | def polarTransform(scale, img):
33 |     """Convert points from cartesian to polar coordinates and map to colors."""
34 |     from matplotlib.colors import hsv_to_rgb
35 |     
36 |     
37 |     img = np.asarray(img)
38 |     dims = img.shape
39 | 
40 |     phi = ((np.arctan2(-img[0], -img[1]) + np.pi/2) % (np.pi*2)) / (2 * np.pi)
41 |     rho = np.sqrt(img[0]**2 + img[1]**2)
42 |     saturation = np.ones((dims[1], dims[2]))
43 | 
44 |     out = hsv_to_rgb(np.dstack((phi, saturation, scale * rho)))
45 | 
46 |     return np.clip(out * scale, 0, 1)
47 | 
48 | def draw_components(*components):
49 |     assert len(components)==2,"this method only accepts 2 components at once"
50 |     components = [i.reshape(230, 202).T for i in components]
51 |     # Use the same transformation on the image data
52 |     # Try changing the first parameter to lower values
53 |     brainmap = polarTransform(2.0, components)
54 | 
55 |     # generate layout and plot data
56 |     fig, ax = preparePlot(np.arange(0, 10, 1), np.arange(0, 10, 1), figsize=(9.0, 7.2), hideLabels=True)
57 |     ax.grid(False)
58 |     image = plt.imshow(brainmap,interpolation='nearest', aspect='auto')
59 |     plt.show()


--------------------------------------------------------------------------------
/Seminar10/README.md:
--------------------------------------------------------------------------------
1 | More materials
2 | - http://karpathy.github.io/2015/05/21/rnn-effectiveness/
3 | - [moar advanced slides](http://www.machinelearning.ru/wiki/images/6/6c/RNN_and_LSTM_16102015.pdf) by bayesgroup
4 | - [random example](https://larseidnes.com/2015/10/13/auto-generating-clickbait-with-recurrent-neural-networks/) is random
5 | 


--------------------------------------------------------------------------------
/Seminar10/codex/Arbitrazhnyj_processualbnyj_kodeks_RF.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ddtm/dl-course/9b04d2dda741c0786a9de40a7dfce89d06d0487e/Seminar10/codex/Arbitrazhnyj_processualbnyj_kodeks_RF.txt


--------------------------------------------------------------------------------
/Seminar10/codex/Grazhdanskij_kodeks_RF._Chastb_pervaja.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ddtm/dl-course/9b04d2dda741c0786a9de40a7dfce89d06d0487e/Seminar10/codex/Grazhdanskij_kodeks_RF._Chastb_pervaja.txt


--------------------------------------------------------------------------------
/Seminar10/codex/Grazhdanskij_kodeks_RF._Chastb_tretbja.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ddtm/dl-course/9b04d2dda741c0786a9de40a7dfce89d06d0487e/Seminar10/codex/Grazhdanskij_kodeks_RF._Chastb_tretbja.txt


--------------------------------------------------------------------------------
/Seminar10/codex/Grazhdanskij_kodeks_RF._Chastb_vtoraja.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ddtm/dl-course/9b04d2dda741c0786a9de40a7dfce89d06d0487e/Seminar10/codex/Grazhdanskij_kodeks_RF._Chastb_vtoraja.txt


--------------------------------------------------------------------------------
/Seminar10/codex/Grazhdanskij_kodeks_RF.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ddtm/dl-course/9b04d2dda741c0786a9de40a7dfce89d06d0487e/Seminar10/codex/Grazhdanskij_kodeks_RF.txt


--------------------------------------------------------------------------------
/Seminar10/codex/Kodeks_RF_ob_administrativnyh_pravonarushenijah.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ddtm/dl-course/9b04d2dda741c0786a9de40a7dfce89d06d0487e/Seminar10/codex/Kodeks_RF_ob_administrativnyh_pravonarushenijah.txt


--------------------------------------------------------------------------------
/Seminar10/codex/Konstitucija_RF.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ddtm/dl-course/9b04d2dda741c0786a9de40a7dfce89d06d0487e/Seminar10/codex/Konstitucija_RF.txt


--------------------------------------------------------------------------------
/Seminar10/codex/Tamozhennyj_kodeks_RF.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ddtm/dl-course/9b04d2dda741c0786a9de40a7dfce89d06d0487e/Seminar10/codex/Tamozhennyj_kodeks_RF.txt


--------------------------------------------------------------------------------
/Seminar10/codex/Ustav_Patrulbno-Postovoj_Sluzhby_Milicii_Obwestvennoj_Bezopasnosti_RF.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ddtm/dl-course/9b04d2dda741c0786a9de40a7dfce89d06d0487e/Seminar10/codex/Ustav_Patrulbno-Postovoj_Sluzhby_Milicii_Obwestvennoj_Bezopasnosti_RF.txt


--------------------------------------------------------------------------------
/Seminar10/codex/Zakon_o_milicii.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ddtm/dl-course/9b04d2dda741c0786a9de40a7dfce89d06d0487e/Seminar10/codex/Zakon_o_milicii.txt


--------------------------------------------------------------------------------
/Seminar10/codex/Zakon_ob_avtorskom_prave_i_smezhnyh_pravah.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ddtm/dl-course/9b04d2dda741c0786a9de40a7dfce89d06d0487e/Seminar10/codex/Zakon_ob_avtorskom_prave_i_smezhnyh_pravah.txt


--------------------------------------------------------------------------------
/Seminar10/data_copyright:
--------------------------------------------------------------------------------
1 | @names
2 | # Copyright (c) January 1991 by Mark Kantrowitz.
3 | # Thanks to Bill Ross for about 1000 additional names.
4 | # Version 1.3 (29-MAR-94)
5 | 
6 | @mtg cards
7 | https://mtgjson.com/
8 | 
9 | 


--------------------------------------------------------------------------------
/Seminar11/broadcast.py:
--------------------------------------------------------------------------------
  1 | from agentnet.utils.format import check_list
  2 | from lasagne.layers import Layer
  3 | import numpy as np
  4 | 
  5 | 
  6 | class BroadcastLayer(Layer):
  7 |     """
  8 |     Merges certain axes of network into first (batch) axis to allow broadcasting over them.
  9 |     :param incoming: layer to be broadcasted
 10 |     :type incoming: Layer
 11 |     :param broadcasted_axes: an axis (or axes) to be broadcasted
 12 |     :type broadcasted_axes: int or tuple of int
 13 |     :force_broadcastable_batch: if True, raises an eror whenever batch (0'th) axis is not included in broadcasted_axes
 14 | 
 15 |     """
 16 | 
 17 |     def __init__(self, incoming, broadcasted_axes, force_broadcastable_batch=True, **kwargs):
 18 | 
 19 |         self.incoming_ndim = len(incoming.output_shape)
 20 | 
 21 |         # axes that are to be broadcasted -- in ascending order
 22 |         # ax%self.incoming_ndim is used to replace negative axes with N-ax+1 so that -1 becomes last axis
 23 |         self.broadcasted_axes = sorted([ax % self.incoming_ndim for ax in check_list(broadcasted_axes)])
 24 | 
 25 |         # sanity checks
 26 |         assert max(self.broadcasted_axes) < self.incoming_ndim
 27 |         assert len(self.broadcasted_axes) > 0
 28 |         if force_broadcastable_batch and (0 not in self.broadcasted_axes):
 29 |             raise ValueError("BroadcastLayer was asked NOT to broadcast over batch (0'th) axis.\n"
 30 |                              "If you know what you're doing, set force_broadcastable_batch=False.\n"
 31 |                              "Otherwise just add 0 to the broadcasted_axes")
 32 | 
 33 |         # axed that are NOT broadcasted = all other axes in respective order
 34 |         self.non_broadcasted_axes = [ax for ax in range(self.incoming_ndim) if ax not in self.broadcasted_axes]
 35 | 
 36 | 
 37 |         super(BroadcastLayer, self).__init__(incoming, **kwargs)
 38 | 
 39 |     def get_output_for(self, input, **kwargs):
 40 |         """
 41 |         performs theanic magic (see layer description)
 42 |         :param input: activation to be reshaped into broadcastable shape
 43 |         :param kwargs: no effect
 44 |         :return: symbolic expression for reshaped layer activation
 45 |         """
 46 | 
 47 |         # save symbolic input shape for unbroadcaster
 48 |         self.symbolic_input_shape = input.shape
 49 | 
 50 |         # dimshuffle so that the new order is [ all_broadcasted_axes, all_non_broadcasted_axes]
 51 | 
 52 |         input = input.dimshuffle(self.broadcasted_axes + self.non_broadcasted_axes)
 53 | 
 54 |         # flatten broadcasted axes into a single axis
 55 |         input = input.reshape((-1,) + tuple(input.shape[len(self.broadcasted_axes):]))
 56 | 
 57 |         # now shape should be [ product(broadcasted_axes_shapes), non_broadcasted_axes ]
 58 | 
 59 |         return input
 60 | 
 61 |     def get_output_shape_for(self, input_shape):
 62 | 
 63 |         broadcasted_shapes = [input_shape[ax] for ax in self.broadcasted_axes]
 64 | 
 65 |         if None not in broadcasted_shapes:
 66 |             new_batch_size = np.prod(broadcasted_shapes)
 67 |         else:
 68 |             new_batch_size = None
 69 | 
 70 |         non_broadcasted_shapes = tuple(input_shape[ax] for ax in self.non_broadcasted_axes)
 71 | 
 72 |         return (new_batch_size,) + non_broadcasted_shapes
 73 | 
 74 | 
 75 | class UnbroadcastLayer(Layer):
 76 |     """
 77 |     Does the inverse of BroadcastLayer
 78 |     :param incoming: a layer to be unbroadcasted. (!) Must have same number of dimensions as before broadcasting
 79 |     :type incoming: Layer
 80 |     :param broadcast_layer: a broadcasting to be be undone
 81 |     :type broadcast_layer: BroadcastLayer
 82 | 
 83 |     """
 84 | 
 85 |     def __init__(self, incoming, broadcast_layer, **kwargs):
 86 |         self.broadcast_layer = broadcast_layer
 87 | 
 88 |         #assert that dimensionality is same as before broadcast
 89 |         assert len(incoming.output_shape) == len(self.broadcast_layer.output_shape)
 90 | 
 91 |         super(UnbroadcastLayer, self).__init__(incoming, **kwargs)
 92 | 
 93 |     def get_output_for(self, input, **kwargs):
 94 |         """
 95 |         Un-broadcasts the broadcast layer (see class description)
 96 |         :param input: input tensor
 97 |         :param kwargs: no effect
 98 |         :return: un-broadcasted tensor
 99 |         """
100 | 
101 |         if not hasattr(self.broadcast_layer,"symbolic_input_shape"):
102 |             raise ValueError("UnbroadcastLayer.get_output_for must be called after respective BroadcastLayer.get_output_for")
103 | 
104 |         # symbolic shape. dirty hack to handle "None" axes
105 |         pre_broadcast_shape = self.broadcast_layer.symbolic_input_shape
106 | 
107 |         broadcasted_axes_shapes = tuple(pre_broadcast_shape[ax] for ax in self.broadcast_layer.broadcasted_axes)
108 | 
109 |         # convert shape from [bc_ax0*bc_ax1*.., non_bc_ax0, non_bc_ax1,...] to [bc_ax0,bc_ax1,...,non_bc_ax0,non_bc_ax1,...]
110 |         unrolled_shape = broadcasted_axes_shapes + tuple(input.shape)[1:]
111 |         input = input.reshape(unrolled_shape)
112 | 
113 |         # rearrange axes to their order before broadcasting
114 |         current_dim_order = self.broadcast_layer.broadcasted_axes + self.broadcast_layer.non_broadcasted_axes
115 | 
116 |         dimshuffle_order = [current_dim_order.index(i) for i in range(len(current_dim_order))]
117 | 
118 |         return input.dimshuffle(dimshuffle_order)
119 | 
120 | 
121 |     def get_output_shape_for(self, input_shape, **kwargs):
122 | 
123 |         new_non_broadcast_shapes = input_shape[1:]
124 | 
125 |         # this one is NOT symbolic. list() is used as a shallow copy op.
126 |         original_shape = list(self.broadcast_layer.input_shape)
127 | 
128 |         # set new non-broadcasted axes shapes instead of old ones
129 |         for ax,new_ax_shape in zip(self.broadcast_layer.non_broadcasted_axes,
130 |                              new_non_broadcast_shapes):
131 |             original_shape[ax] = new_ax_shape
132 | 
133 |         #return updated shape
134 |         return tuple(original_shape)


--------------------------------------------------------------------------------
/Seminar11/pretrained_lenet.py:
--------------------------------------------------------------------------------
  1 | from lasagne.layers import InputLayer
  2 | from lasagne.layers import DenseLayer
  3 | from lasagne.layers import ConcatLayer
  4 | from lasagne.layers import NonlinearityLayer
  5 | from lasagne.layers import GlobalPoolLayer
  6 | from lasagne.layers import Conv2DLayer as ConvLayer
  7 | from lasagne.layers import MaxPool2DLayer as PoolLayerDNN
  8 | from lasagne.layers import MaxPool2DLayer as PoolLayer
  9 | from lasagne.layers import LocalResponseNormalization2DLayer as LRNLayer
 10 | from lasagne.nonlinearities import softmax, linear
 11 | 
 12 | 
 13 | def build_inception_module(name, input_layer, nfilters):
 14 |     # nfilters: (pool_proj, 1x1, 3x3_reduce, 3x3, 5x5_reduce, 5x5)
 15 |     net = {}
 16 |     net['pool'] = PoolLayerDNN(input_layer, pool_size=3, stride=1, pad=1)
 17 |     net['pool_proj'] = ConvLayer(net['pool'], nfilters[0], 1)
 18 | 
 19 |     net['1x1'] = ConvLayer(input_layer, nfilters[1], 1)
 20 | 
 21 |     net['3x3_reduce'] = ConvLayer(input_layer, nfilters[2], 1)
 22 |     net['3x3'] = ConvLayer(net['3x3_reduce'], nfilters[3], 3, pad=1)
 23 | 
 24 |     net['5x5_reduce'] = ConvLayer(input_layer, nfilters[4], 1)
 25 |     net['5x5'] = ConvLayer(net['5x5_reduce'], nfilters[5], 5, pad=2)
 26 | 
 27 |     net['output'] = ConcatLayer([
 28 |         net['1x1'],
 29 |         net['3x3'],
 30 |         net['5x5'],
 31 |         net['pool_proj'],
 32 |         ])
 33 | 
 34 |     return {'{}/{}'.format(name, k): v for k, v in net.items()}
 35 | 
 36 | 
 37 | def build_model():
 38 |     net = {}
 39 |     net['input'] = InputLayer((None, 3, None, None))
 40 |     net['conv1/7x7_s2'] = ConvLayer(net['input'], 64, 7, stride=2, pad=3)
 41 |     net['pool1/3x3_s2'] = PoolLayer(net['conv1/7x7_s2'],
 42 |                                     pool_size=3,
 43 |                                     stride=2,
 44 |                                     ignore_border=False)
 45 |     net['pool1/norm1'] = LRNLayer(net['pool1/3x3_s2'], alpha=0.00002, k=1)
 46 |     net['conv2/3x3_reduce'] = ConvLayer(net['pool1/norm1'], 64, 1)
 47 |     net['conv2/3x3'] = ConvLayer(net['conv2/3x3_reduce'], 192, 3, pad=1)
 48 |     net['conv2/norm2'] = LRNLayer(net['conv2/3x3'], alpha=0.00002, k=1)
 49 |     net['pool2/3x3_s2'] = PoolLayer(net['conv2/norm2'], pool_size=3, stride=2)
 50 | 
 51 |     net.update(build_inception_module('inception_3a',
 52 |                                       net['pool2/3x3_s2'],
 53 |                                       [32, 64, 96, 128, 16, 32]))
 54 |     net.update(build_inception_module('inception_3b',
 55 |                                       net['inception_3a/output'],
 56 |                                       [64, 128, 128, 192, 32, 96]))
 57 |     net['pool3/3x3_s2'] = PoolLayer(net['inception_3b/output'],
 58 |                                     pool_size=3, stride=2)
 59 | 
 60 |     net.update(build_inception_module('inception_4a',
 61 |                                       net['pool3/3x3_s2'],
 62 |                                       [64, 192, 96, 208, 16, 48]))
 63 |     net.update(build_inception_module('inception_4b',
 64 |                                       net['inception_4a/output'],
 65 |                                       [64, 160, 112, 224, 24, 64]))
 66 |     net.update(build_inception_module('inception_4c',
 67 |                                       net['inception_4b/output'],
 68 |                                       [64, 128, 128, 256, 24, 64]))
 69 |     net.update(build_inception_module('inception_4d',
 70 |                                       net['inception_4c/output'],
 71 |                                       [64, 112, 144, 288, 32, 64]))
 72 |     net.update(build_inception_module('inception_4e',
 73 |                                       net['inception_4d/output'],
 74 |                                       [128, 256, 160, 320, 32, 128]))
 75 |     net['pool4/3x3_s2'] = PoolLayer(net['inception_4e/output'],
 76 |                                     pool_size=3, stride=2)
 77 | 
 78 |     net.update(build_inception_module('inception_5a',
 79 |                                       net['pool4/3x3_s2'],
 80 |                                       [128, 256, 160, 320, 32, 128]))
 81 |     net.update(build_inception_module('inception_5b',
 82 |                                       net['inception_5a/output'],
 83 |                                       [128, 384, 192, 384, 48, 128]))
 84 | 
 85 |     net['pool5/7x7_s1'] = GlobalPoolLayer(net['inception_5b/output'])
 86 |     net['loss3/classifier'] = DenseLayer(net['pool5/7x7_s1'],
 87 |                                          num_units=1000,
 88 |                                          nonlinearity=linear)
 89 |     net['prob'] = NonlinearityLayer(net['loss3/classifier'],
 90 |                                     nonlinearity=softmax)
 91 |     return net
 92 | 
 93 | 
 94 | import skimage.transform
 95 | import numpy as np
 96 | MEAN_VALUES = np.array([104, 117, 123]).reshape((3,1,1))
 97 | def preprocess(im):
 98 |     if len(im.shape) == 2:
 99 |         im = im[:, :, np.newaxis]
100 |         im = np.repeat(im, 3, axis=2)
101 |     # Resize so smallest dim = 224, preserving aspect ratio
102 |     h, w, _ = im.shape
103 |     if h < w:
104 |         im = skimage.transform.resize(im, (224, w*224//h), preserve_range=True)
105 |     else:
106 |         im = skimage.transform.resize(im, (h*224//w, 224), preserve_range=True)
107 | 
108 |     # Central crop to 224x224
109 |     h, w, _ = im.shape
110 |     im = im[h//2-112:h//2+112, w//2-112:w//2+112]
111 |     
112 |     rawim = np.copy(im).astype('uint8')
113 |     
114 |     # Shuffle axes to c01
115 |     im = np.swapaxes(np.swapaxes(im, 1, 2), 0, 1)
116 |     
117 |     # Convert to BGR
118 |     im = im[::-1, :, :]
119 | 
120 |     im = im - MEAN_VALUES
121 |     return im[np.newaxis].astype('float32')
122 | 


--------------------------------------------------------------------------------
/Seminar2/README.md:
--------------------------------------------------------------------------------
1 | Materials you may want to view:
2 | - [main stuff from cs231](http://cs231n.github.io/linear-classify/)
3 | - [wikipedia :)](https://en.wikipedia.org/wiki/Stochastic_gradient_descent), expecially the "extensions and variants" section
4 | - [RMSPROP video](https://www.youtube.com/watch?v=defQQqkXEfE)
5 | 


--------------------------------------------------------------------------------
/Seminar3/HW3_Differentiation.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Home work 3: Differentiation "
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "Since it easy to google every task please please please try to undestand what's going on. The \"just answer\" thing will be not counted, make sure to present derivation of your solution. It is absolutely OK if you found an answer on web then just exercise in $\\LaTeX$ copying it into here."
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "markdown",
 19 |    "metadata": {},
 20 |    "source": [
 21 |     "Useful links: \n",
 22 |     "[1](http://www.machinelearning.ru/wiki/images/2/2a/Matrix-Gauss.pdf)\n",
 23 |     "[2](http://www.atmos.washington.edu/~dennis/MatrixCalculus.pdf)\n",
 24 |     "[3](http://cal.cs.illinois.edu/~johannes/research/matrix%20calculus.pdf)\n",
 25 |     "[4](http://research.microsoft.com/en-us/um/people/cmbishop/prml/index.htm)"
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "markdown",
 30 |    "metadata": {},
 31 |    "source": [
 32 |     "## ex. 1"
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "markdown",
 37 |    "metadata": {},
 38 |    "source": [
 39 |     "$$  \n",
 40 |     "y = x^Tx,  \\quad x \\in \\mathbb{R}^N \n",
 41 |     "$$"
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "markdown",
 46 |    "metadata": {},
 47 |    "source": [
 48 |     "$$\n",
 49 |     "\\frac{dy}{dx} = \n",
 50 |     "$$ "
 51 |    ]
 52 |   },
 53 |   {
 54 |    "cell_type": "code",
 55 |    "execution_count": null,
 56 |    "metadata": {
 57 |     "collapsed": true
 58 |    },
 59 |    "outputs": [],
 60 |    "source": []
 61 |   },
 62 |   {
 63 |    "cell_type": "markdown",
 64 |    "metadata": {},
 65 |    "source": [
 66 |     "## ex. 2"
 67 |    ]
 68 |   },
 69 |   {
 70 |    "cell_type": "markdown",
 71 |    "metadata": {},
 72 |    "source": [
 73 |     "$$ y = tr(AB) \\quad A,B \\in \\mathbb{R}^{N \\times N} $$ "
 74 |    ]
 75 |   },
 76 |   {
 77 |    "cell_type": "markdown",
 78 |    "metadata": {},
 79 |    "source": [
 80 |     "$$\n",
 81 |     "\\frac{dy}{dA} =\n",
 82 |     "$$"
 83 |    ]
 84 |   },
 85 |   {
 86 |    "cell_type": "code",
 87 |    "execution_count": null,
 88 |    "metadata": {
 89 |     "collapsed": true
 90 |    },
 91 |    "outputs": [],
 92 |    "source": []
 93 |   },
 94 |   {
 95 |    "cell_type": "markdown",
 96 |    "metadata": {},
 97 |    "source": [
 98 |     "## ex. 3"
 99 |    ]
100 |   },
101 |   {
102 |    "cell_type": "markdown",
103 |    "metadata": {},
104 |    "source": [
105 |     "$$  \n",
106 |     "y = x^TAc , \\quad A\\in \\mathbb{R}^{N \\times N}, x\\in \\mathbb{R}^{N}, c\\in \\mathbb{R}^{N} \n",
107 |     "$$"
108 |    ]
109 |   },
110 |   {
111 |    "cell_type": "markdown",
112 |    "metadata": {},
113 |    "source": [
114 |     "$$\n",
115 |     "\\frac{dy}{dx} =\n",
116 |     "$$"
117 |    ]
118 |   },
119 |   {
120 |    "cell_type": "markdown",
121 |    "metadata": {},
122 |    "source": [
123 |     "$$\n",
124 |     "\\frac{dy}{dA} =\n",
125 |     "$$ "
126 |    ]
127 |   },
128 |   {
129 |    "cell_type": "markdown",
130 |    "metadata": {},
131 |    "source": [
132 |     "Hint for the latter (one of the ways): use *ex. 2* result and the fact \n",
133 |     "$$\n",
134 |     "tr(ABC) = tr (CAB)\n",
135 |     "$$"
136 |    ]
137 |   },
138 |   {
139 |    "cell_type": "code",
140 |    "execution_count": null,
141 |    "metadata": {
142 |     "collapsed": true
143 |    },
144 |    "outputs": [],
145 |    "source": []
146 |   },
147 |   {
148 |    "cell_type": "markdown",
149 |    "metadata": {},
150 |    "source": [
151 |     "## ex. 4"
152 |    ]
153 |   },
154 |   {
155 |    "cell_type": "markdown",
156 |    "metadata": {},
157 |    "source": [
158 |     "Classic matrix factorization example. Given matrix $X$ you need to find $A$, $S$ to approximate $X$. This can be done by simple gradient descent iteratively alternating $A$ and $S$ updates.\n",
159 |     "$$\n",
160 |     "J = || X - AS ||_2^2  , \\quad A\\in \\mathbb{R}^{N \\times R} , \\quad S\\in \\mathbb{R}^{R \\times M}\n",
161 |     "$$\n",
162 |     "$$\n",
163 |     "\\frac{dJ}{dS} = ? \n",
164 |     "$$ "
165 |    ]
166 |   },
167 |   {
168 |    "cell_type": "markdown",
169 |    "metadata": {},
170 |    "source": [
171 |     "### First approach\n",
172 |     "Using ex.2 and the fact:\n",
173 |     "$$\n",
174 |     "|| X ||_2^2 = tr(XX^T) \n",
175 |     "$$ \n",
176 |     "it is easy to derive gradients (you can find it in one of the refs). "
177 |    ]
178 |   },
179 |   {
180 |    "cell_type": "markdown",
181 |    "metadata": {
182 |     "collapsed": true
183 |    },
184 |    "source": [
185 |     "### Second approach\n",
186 |     "You can use *slightly different techniques* if they suits you. Take a look at this derivation:\n",
187 |     "<img src=\"grad.png\">\n",
188 |     "(excerpt from [Handbook of blind source separation, Jutten, page 517](https://books.google.ru/books?id=PTbj03bYH6kC&printsec=frontcover&dq=Handbook+of+Blind+Source+Separation&hl=en&sa=X&ved=0ahUKEwi-q_apiJDLAhULvXIKHVXJDWcQ6AEIHDAA#v=onepage&q=Handbook%20of%20Blind%20Source%20Separation&f=false), open for better picture)."
189 |    ]
190 |   },
191 |   {
192 |    "cell_type": "markdown",
193 |    "metadata": {
194 |     "collapsed": true
195 |    },
196 |    "source": [
197 |     "### Third approach\n",
198 |     "And finally we can use chain rule! **YOUR TURN** to do it.\n",
199 |     "let $ F = AS $ \n",
200 |     "\n",
201 |     "**Find**\n",
202 |     "$$\n",
203 |     "\\frac{dJ}{dF} =  \n",
204 |     "$$ \n",
205 |     "and \n",
206 |     "$$\n",
207 |     "\\frac{dF}{dS} =  \n",
208 |     "$$ \n",
209 |     "(the shape should be $ NM \\times RM )$.\n",
210 |     "\n",
211 |     "Now it is easy do get desired gradients:\n",
212 |     "$$\n",
213 |     "\\frac{dJ}{dS} =  \n",
214 |     "$$ "
215 |    ]
216 |   },
217 |   {
218 |    "cell_type": "code",
219 |    "execution_count": null,
220 |    "metadata": {
221 |     "collapsed": true
222 |    },
223 |    "outputs": [],
224 |    "source": []
225 |   }
226 |  ],
227 |  "metadata": {
228 |   "kernelspec": {
229 |    "display_name": "Python 2",
230 |    "language": "python",
231 |    "name": "python2"
232 |   },
233 |   "language_info": {
234 |    "codemirror_mode": {
235 |     "name": "ipython",
236 |     "version": 2
237 |    },
238 |    "file_extension": ".py",
239 |    "mimetype": "text/x-python",
240 |    "name": "python",
241 |    "nbconvert_exporter": "python",
242 |    "pygments_lexer": "ipython2",
243 |    "version": "2.7.11"
244 |   }
245 |  },
246 |  "nbformat": 4,
247 |  "nbformat_minor": 0
248 | }
249 | 


--------------------------------------------------------------------------------
/Seminar3/README.md:
--------------------------------------------------------------------------------
1 | Materials
2 | - [Backprop by cs231](http://cs231n.github.io/optimization-2/)
3 | - [Notation](http://cs231n.github.io/neural-networks-1/#nn)
4 | - pretty much all the module 1 of http://cs231n.github.io/
5 | 


--------------------------------------------------------------------------------
/Seminar3/Seminar3_Differentiation.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Seminar 3: Differentiation"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "Let's go from simple to complex. \n",
 15 |     "\n",
 16 |     "Consider a function $ y = f(x) $ and we want to find its derivative\n",
 17 |     "-   let $ f : \\mathbb{R} \\rightarrow \\mathbb{R} $, everyone knows what to do\n",
 18 |     "-   let $ f : \\mathbb{R}^n \\rightarrow \\mathbb{R} $\n",
 19 |     "    \n",
 20 |     "    now x is a vector $ [x_1, x_2, \\dots x_n] $. The gradient is defined as a vector of partial direvatives\n",
 21 |     "    $$ \\frac{df}{dx} = [\\frac{\\partial f}{\\partial x_1}, \\frac{\\partial f}{\\partial x_2}, \\dots, \\frac{\\partial f}{\\partial x_n} ] $$\n",
 22 |     "\n",
 23 |     "    ***Note***: there are different conventions on what shape the gradient will have (column or row), choose whatever easier for you, but keep in mind other people may prefer different convention.\n",
 24 |     "    \n",
 25 |     "    \n",
 26 |     "-   let $ \\mathbf{f} : \\mathbb{R}^n \\rightarrow \\mathbb{R}^m $\n",
 27 |     "    \n",
 28 |     "    now $x$ is a vector $ [x_1, x_2, \\dots x_n] $ *and* $y$ is a vector $ [y_1, y_2, \\dots y_n] $. The derivative is expressed by the jacobian *matrix*. \n",
 29 |     "    \n",
 30 |     "$$\n",
 31 |     "    \\frac{d\\mathbf f}{d\\mathbf x} = \\begin{bmatrix}\n",
 32 |     "    \\dfrac{\\partial \\mathbf{f}}{\\partial x_1} & \\cdots & \\dfrac{\\partial \\mathbf{f}}{\\partial x_n} \\end{bmatrix}\n",
 33 |     "= \\begin{bmatrix}\n",
 34 |     "    \\dfrac{\\partial f_1}{\\partial x_1} & \\cdots & \\dfrac{\\partial f_1}{\\partial x_n}\\\\\n",
 35 |     "    \\vdots & \\ddots & \\vdots\\\\\n",
 36 |     "    \\dfrac{\\partial f_m}{\\partial x_1} & \\cdots & \\dfrac{\\partial f_m}{\\partial x_n} \\end{bmatrix}\n",
 37 |     "$$\n",
 38 |     "\n",
 39 |     "-   let $ \\mathbf{f} : \\mathbb{R}^{n  \\times k}  \\rightarrow \\mathbb{R}^{ m \\times p} $\n",
 40 |     "    \n",
 41 |     "    think of $x$ as of vector with $nk$ elements, $y$ as of vector with $mp$ elements, it is previous case now."
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "markdown",
 46 |    "metadata": {},
 47 |    "source": [
 48 |     "# Chain rule\n",
 49 |     "\n",
 50 |     "Let $$ L(x) = g(f(x)) $$\n",
 51 |     "\n",
 52 |     "We aim to find $\\nabla_x L$. Obvious, if $f,g: \\mathbb{R} \\rightarrow \\mathbb{R}$ using rule:  \n",
 53 |     "\n",
 54 |     "$$ \\frac{dL}{dx} = \\frac{dg}{df}\\frac{df}{dx}$$\n",
 55 |     "\n",
 56 |     "and practical formula:\n",
 57 |     "\n",
 58 |     "$$ \\left.\\frac{dL}{dx}\\right|_{x=x_0} = \\left.\\frac{dg}{df}\\right|_{u = f(x_0)} \\cdot \\left.\\frac{df}{dx}\\right|_{x=x_0} $$\n",
 59 |     "\n",
 60 |     "What's up with multidimensional case ? Barely the same. It is the sum of 1-dimentional chains.\n",
 61 |     "$$\n",
 62 |     "\\frac{\\partial L}{\\partial x_i} = \\sum_{j = 1}^m \\frac{\\partial g}{\\partial f_j} \\frac{\\partial f_j}{\\partial x_i}.\n",
 63 |     "$$"
 64 |    ]
 65 |   },
 66 |   {
 67 |    "cell_type": "markdown",
 68 |    "metadata": {},
 69 |    "source": [
 70 |     "### Seminar practice"
 71 |    ]
 72 |   },
 73 |   {
 74 |    "cell_type": "markdown",
 75 |    "metadata": {},
 76 |    "source": [
 77 |     "#### ex.1 (dot product)"
 78 |    ]
 79 |   },
 80 |   {
 81 |    "cell_type": "markdown",
 82 |    "metadata": {},
 83 |    "source": [
 84 |     "$$  \n",
 85 |     "y = a^Tx = \\sum_{i=1}^N a_i x_i \\\\\n",
 86 |     "\\frac{\\partial y}{\\partial x_i} = a_i \\\\\n",
 87 |     "\\frac{dy}{dx} =  a\n",
 88 |     "$$ "
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "markdown",
 93 |    "metadata": {},
 94 |    "source": [
 95 |     "#### ex.2 (Matrix-vector multiplication)"
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "markdown",
100 |    "metadata": {},
101 |    "source": [
102 |     "$$  \n",
103 |     "y = Ax , \\quad A \\in \\mathbb{R}^{M \\times N} \\\\\n",
104 |     "y_i = a_{i,:}^T x \\\\\n",
105 |     "\\frac{dy}{dx} = \\begin{bmatrix}\n",
106 |     "    a_{11} & \\cdots & a_{1n}\\\\\n",
107 |     "    \\vdots & \\ddots & \\vdots\\\\\n",
108 |     "    a_{m1} & \\cdots & a_{mn} \\end{bmatrix}  = A \\\\\n",
109 |     "$$ "
110 |    ]
111 |   },
112 |   {
113 |    "cell_type": "markdown",
114 |    "metadata": {},
115 |    "source": [
116 |     "#### ex.3 (Matrix-Matrix multiplication)"
117 |    ]
118 |   },
119 |   {
120 |    "cell_type": "markdown",
121 |    "metadata": {},
122 |    "source": [
123 |     "$$  \n",
124 |     "F = AS , \\quad A \\in \\mathbb{R}^{M \\times N}, S \\in \\mathbb{R}^{N \\times K} \\\\\n",
125 |     "\\frac{dF}{dS} = ?\n",
126 |     "$$\n",
127 |     "\n",
128 |     "The result should be of shape $\\frac{dF}{dS} \\in \\mathbb{R}^{MK \\times NK}$ and let us vectorize column by column.\n",
129 |     "\n",
130 |     "When $K = 1$ it fallbacks to the previous example. Let's try $K = 2$ to build an intuition.\n",
131 |     "\n",
132 |     "Notice, that first column in $F$ does not depend on second column in $S$, and second column in $F$ does not depend on first column in $S$. And we already know what dependence (in terms of gradient) is between corresponding columns. Thus the answer is block-diagonal matrix:\n",
133 |     "\n",
134 |     "$$\n",
135 |     "\\frac{dF}{dS} = \\begin{bmatrix}\n",
136 |     "    A &  0\\\\\n",
137 |     "    0 & A \\end{bmatrix} \\\\\n",
138 |     "$$ \n",
139 |     "And in general case:\n",
140 |     "$$\n",
141 |     "\\frac{dF}{dS} = \\begin{bmatrix}\n",
142 |     "    A & \\cdots & 0\\\\\n",
143 |     "    \\vdots & \\ddots & \\vdots\\\\\n",
144 |     "    0 & \\cdots & A \\end{bmatrix} \\\\\n",
145 |     "$$ "
146 |    ]
147 |   },
148 |   {
149 |    "cell_type": "markdown",
150 |    "metadata": {},
151 |    "source": [
152 |     "#### ex. 4 (Chain rule)"
153 |    ]
154 |   },
155 |   {
156 |    "cell_type": "markdown",
157 |    "metadata": {},
158 |    "source": [
159 |     "In this example you can recognize a model! It is simple linear regression with multiple objectives. \n",
160 |     "$$  L = || Ax - y ||_2^2 , \\quad A \\in \\mathbb{R}^{M \\times N}, x \\in \\mathbb{R}^{N}  $$ \n",
161 |     "Let $f = Ax$. Find $\\frac{dL}{dA}$ using chain rule. \n",
162 |     "\n",
163 |     "- Note, that\n",
164 |     "$$\n",
165 |     "|| Ax - y ||_2^2 = \\sum_{i=1}^{M} (A_{i,:}x - y_i)^2 \n",
166 |     "$$ \n",
167 |     "so you can easily find the gradient with respect to each row (the gradient w.r.t. vector is easier, isn't it?) and then stack these gradients to obtain gradient w.r.t. matrix $A$. **But we will go the hard way** and do it straightforward using chain rule. Let $f = Ax$ \n",
168 |     "\n",
169 |     "$$\n",
170 |     "L = || f - y ||_2^2 = (f-y)^T(f-y) = f^Tf - 2f^Ty + y^Ty \\\\\n",
171 |     "\\frac{dL}{df} = 2(f-y)\n",
172 |     "$$\n",
173 |     "\n",
174 |     "- Now hint, look at *ex.3* last result (block-diag matrix), what if we multiply something by this matrix ? In fact, suppose we vectorized a given matrix $B$ into vector $B_{vec}$ of size $N^2$ and we multiply a block-diagonal matrix of size $N^2 \\times N^2$ with $C$ on diagonal by $B_{vec}$. The resulting vector $D_{vec}$ has $N^2$ elements but if reshaped is exactly $D = CB^T$. This can look idiosyncratic for the first time but it is easy.\n",
175 |     "\n",
176 |     "- So what we should learn from the example above? That $\\frac{df}{dA}$ is something block-diagonal-like with $x$ on diagonal and the resulting $\\frac{dL}{dA}$ is just a multiplication of $\\frac{dL}{df}$ and $x$ (transpose something to get correct dimentions). Finally, \n",
177 |     "\n",
178 |     "$$\n",
179 |     "\\frac{df}{dA} = 2(f-y)x^T \n",
180 |     "$$\n",
181 |     "\n",
182 |     "\n"
183 |    ]
184 |   }
185 |  ],
186 |  "metadata": {
187 |   "kernelspec": {
188 |    "display_name": "Python 2",
189 |    "language": "python",
190 |    "name": "python2"
191 |   },
192 |   "language_info": {
193 |    "codemirror_mode": {
194 |     "name": "ipython",
195 |     "version": 2
196 |    },
197 |    "file_extension": ".py",
198 |    "mimetype": "text/x-python",
199 |    "name": "python",
200 |    "nbconvert_exporter": "python",
201 |    "pygments_lexer": "ipython2",
202 |    "version": "2.7.11"
203 |   }
204 |  },
205 |  "nbformat": 4,
206 |  "nbformat_minor": 0
207 | }
208 | 


--------------------------------------------------------------------------------
/Seminar3/Seminar3_NN.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Seminar 3: Basic Artificial Neural Networks"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "Neural Networks (NN) became popular due to many facts. One of them is *extensibility*. NN is composed of modules (blocks), where each module implements some functionality. By combining these modules one can build state-of-the-art NNs with existing NN packages. Recent NN wonderful ideas often require just defining a new module or slightly changing an existing one. This notebook should help you to understand what the modules are and what other abstractions are used in NNs. "
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "markdown",
 19 |    "metadata": {
 20 |     "collapsed": true
 21 |    },
 22 |    "source": [
 23 |     "At first, let's think of NN as of black box model (we don't care or know how it works inside, but when we ask it to do something it politely does). What functionality then should the black box implement to be practical? Well, the same as other discriminative models! \n",
 24 |     "- it should be able to give a predictions (let's call it **output**) if provided with **input** data\n",
 25 |     "- it should be learnable (there should be a mean to adapt model to the given data)\n",
 26 |     "\n",
 27 |     "The first point implies the black box should implement a function (we call it **forward**).\n",
 28 |     "\n",
 29 |     "$$\\text{output = NN.forward(input)}$$\n",
 30 |     "\n",
 31 |     "The second point means the model should be able to compute a gradient with respect to (w.r.t.) its parameters and return them to us. We would use this gradient to perform parameters update. The computation of the gradient is done during **backward** call.\n",
 32 |     "\n",
 33 |     "$$\\text{NN.backward(input, criterion (output, target))}$$\n",
 34 |     "\n",
 35 |     "and gradients retrieved with, lets say:\n",
 36 |     "\n",
 37 |     "$$\\text{gradParameters = NN.getGradParameters()}$$\n",
 38 |     "\n",
 39 |     "the **criterion** should tell quantively how wrong your model is if predicting **output** when **target** expected. \n",
 40 |     "\n",
 41 |     "After the *Seminar 2* it should be clear, how we use the gradient: we use one of the **optimizers** (*sgd*, *adaGrad*, *Adam*, *nag*) to perform parametrs update. "
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "markdown",
 46 |    "metadata": {},
 47 |    "source": [
 48 |     "### Summary\n",
 49 |     "At this point we have seen three important abstractions: \n",
 50 |     "- black box\n",
 51 |     "- criterion\n",
 52 |     "- optimizer\n",
 53 |     "\n",
 54 |     "### Workflow\n",
 55 |     "The workflow is split into 3 steps (yeah, kind of abstractions):\n",
 56 |     "- forward pass\n",
 57 |     "- backward pass\n",
 58 |     "- parameters update\n",
 59 |     "\n",
 60 |     "Let's detail furthur the workflow."
 61 |    ]
 62 |   },
 63 |   {
 64 |    "cell_type": "markdown",
 65 |    "metadata": {},
 66 |    "source": [
 67 |     "Forward pass: \n",
 68 |     "\n",
 69 |     "$$\n",
 70 |     "\\text{output = NN.forward(input)} \\\\\n",
 71 |     "\\text{loss =  criterion.forward(output, target)}\n",
 72 |     "$$\n",
 73 |     "\n",
 74 |     "Backward pass: \n",
 75 |     "\n",
 76 |     "$$\n",
 77 |     "\\text{NNGrad = criterion.backward(output, target)} \\\\\n",
 78 |     "\\text{NN.forward(input, NNGrad)} \\\\\n",
 79 |     "$$\n",
 80 |     "\n",
 81 |     "Parameters update:\n",
 82 |     "\n",
 83 |     "$$\n",
 84 |     "\\text{gradParameters = NN.getGradParameters()} \\\\\n",
 85 |     "\\text{optimizer.update(currentParams, gradParameters)} \\\\\n",
 86 |     "$$\n",
 87 |     "\n",
 88 |     "There can be slight technical variations, but the high level idea is always the same. It should be clear about forward pass and parameters update, the most struggling is to understand backprop. "
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "markdown",
 93 |    "metadata": {},
 94 |    "source": [
 95 |     "# White box"
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "markdown",
100 |    "metadata": {},
101 |    "source": [
102 |     "Last thing before discussing backprop is to whiten our black box, we are old enough to know the truth. \n",
103 |     "\n",
104 |     "As said in introduction NN is composed of modules and surprisingly these modules are NNs too by definition! Remember, left or right child in binary tree is also a tree, and the leaves are trees themselfs. Kind of the same logic it is here too, but is about directed acyclic graphs (you can think of a chain for the first time). You can find \"starter\" and \"final\" nodes in these graphs (start and end of a chain), the data goes through the graph according to the directions, each node applies its **forward** function till the last node is reached. On backward pass the graph is traversed form \"final\" nodes to \"starter\" and each node applies **backward** function to whatever previous node passed. \n",
105 |     "\n",
106 |     "Here is one of the real-world NNs, the data goes from left to right. \n",
107 |     "\n",
108 |     "<img src=\"googlenet.png\">\n",
109 |     "\n",
110 |     "So the cool thing is: each node is a NN, every connected subgraph is NN. We defined everything we need already, you just need a set of \"simple\" NNs which are used as building blocks for comlex models! That is exactly what the NN packges implements for you and what you are to do in homework."
111 |    ]
112 |   },
113 |   {
114 |    "cell_type": "markdown",
115 |    "metadata": {},
116 |    "source": [
117 |     "### Backprop"
118 |    ]
119 |   },
120 |   {
121 |    "cell_type": "markdown",
122 |    "metadata": {},
123 |    "source": [
124 |     "**Be careful!** In this section the variable $x$ designates the parameters in NN and not the input data. Think that we fixed the data now, and loss is a function of parametrs, we try to find the best parameters to lower the loss.\n",
125 |     "\n",
126 |     "Let's define as $ f(x) $ the function NN applies to input data and $ g(o) $ is a criterion. Then\n",
127 |     "$$ L(x) = g(f(x); target) $$\n",
128 |     "\n",
129 |     "We aim to find $\\nabla_x L$. Obvious, if $f,g: \\mathbb{R} \\rightarrow \\mathbb{R}$ using chain rule:  \n",
130 |     "\n",
131 |     "$$ \\frac{dL}{dx} = \\frac{dg}{df}\\frac{df}{dx}$$\n",
132 |     "\n",
133 |     "and practical formula:\n",
134 |     "\n",
135 |     "$$ \\left.\\frac{dL}{dx}\\right|_{x=x_0} = \\left.\\frac{dg}{df}\\right|_{u = f(x_0)} \\cdot \\left.\\frac{df}{dx}\\right|_{x=x_0} $$\n",
136 |     "\n",
137 |     "What's up with multidimensional case ? Barely the same. It is the sum of 1-dimentional chains.\n",
138 |     "$$\n",
139 |     "\\frac{\\partial L}{\\partial x_i} = \\sum_{j = 1}^m \\frac{\\partial L}{\\partial f_j} \\frac{\\partial f_j}{\\partial x_i}.\n",
140 |     "$$\n",
141 |     "\n",
142 |     "Actually that is all you need to write backprop functions! Go to differenciation notebook to for some practice before homework."
143 |    ]
144 |   }
145 |  ],
146 |  "metadata": {
147 |   "kernelspec": {
148 |    "display_name": "Python 2",
149 |    "language": "python",
150 |    "name": "python2"
151 |   },
152 |   "language_info": {
153 |    "codemirror_mode": {
154 |     "name": "ipython",
155 |     "version": 2
156 |    },
157 |    "file_extension": ".py",
158 |    "mimetype": "text/x-python",
159 |    "name": "python",
160 |    "nbconvert_exporter": "python",
161 |    "pygments_lexer": "ipython2",
162 |    "version": "2.7.11"
163 |   }
164 |  },
165 |  "nbformat": 4,
166 |  "nbformat_minor": 0
167 | }
168 | 


--------------------------------------------------------------------------------
/Seminar3/autoencoder.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ddtm/dl-course/9b04d2dda741c0786a9de40a7dfce89d06d0487e/Seminar3/autoencoder.png


--------------------------------------------------------------------------------
/Seminar3/googlenet.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ddtm/dl-course/9b04d2dda741c0786a9de40a7dfce89d06d0487e/Seminar3/googlenet.png


--------------------------------------------------------------------------------
/Seminar3/grad.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ddtm/dl-course/9b04d2dda741c0786a9de40a7dfce89d06d0487e/Seminar3/grad.png


--------------------------------------------------------------------------------
/Seminar3/outdated/interpolation.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ddtm/dl-course/9b04d2dda741c0786a9de40a7dfce89d06d0487e/Seminar3/outdated/interpolation.mp4


--------------------------------------------------------------------------------
/Seminar4/README.md:
--------------------------------------------------------------------------------
1 | More materials:
2 | - http://cs231n.github.io/convolutional-networks/
3 | - http://cs231n.github.io/understanding-cnn/
4 | - [a deep learning neophite cheat sheet](http://www.kdnuggets.com/2016/03/must-know-tips-deep-learning-part-1.html)
5 | - [more stuff for vision](https://bavm2013.splashthat.com/img/events/46439/assets/34a7.ranzato.pdf)
6 | - a [CNN trainer in a browser](https://cs.stanford.edu/people/karpathy/convnetjs/demo/cifar10.html)
7 | 


--------------------------------------------------------------------------------
/Seminar4/Seminar-intro-slide.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "from lasagne.layers import *\n",
 12 |     "from lasagne.nonlinearities import *\n",
 13 |     "from lasagne import init"
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "code",
 18 |    "execution_count": 2,
 19 |    "metadata": {
 20 |     "collapsed": false
 21 |    },
 22 |    "outputs": [],
 23 |    "source": [
 24 |     "nn = InputLayer([None,3,100,100])\n",
 25 |     "\n",
 26 |     "nn = Conv2DLayer(nn,num_filters=512, filter_size=(3,3),\n",
 27 |     "                 W = init.Constant(0))\n",
 28 |     "\n",
 29 |     "nn = Conv2DLayer(nn,num_filters=128,filter_size=(3,3),\n",
 30 |     "                 W = init.Constant(0))\n",
 31 |     "\n",
 32 |     "nn = Conv2DLayer(nn,num_filters=32,filter_size=(3,3),\n",
 33 |     "                 W = init.Constant(0))\n",
 34 |     "\n",
 35 |     "nn = Pool2DLayer(nn,pool_size=(6,6),mode='max')\n",
 36 |     "\n",
 37 |     "nn = Conv2DLayer(nn,num_filters=8,filter_size=(10,10),\n",
 38 |     "                W = init.Normal(std=0.01))\n",
 39 |     "\n",
 40 |     "nn = Conv2DLayer(nn,num_filters=8,filter_size=(10,10),\n",
 41 |     "                W = init.Normal(std=0.01))\n",
 42 |     "\n",
 43 |     "nn = Pool2DLayer(nn,pool_size=(3,3),mode='max')\n",
 44 |     "\n",
 45 |     "nn = DenseLayer(nn,512,nonlinearity=softmax)\n",
 46 |     "\n",
 47 |     "nn = DropoutLayer(nn,p=0.5)\n",
 48 |     "\n",
 49 |     "nn = DenseLayer(nn,512,nonlinearity=softmax)\n",
 50 |     "\n",
 51 |     "nn = DenseLayer(nn,10,nonlinearity=sigmoid)\n",
 52 |     "\n",
 53 |     "nn = DropoutLayer(nn,p=0.5)"
 54 |    ]
 55 |   },
 56 |   {
 57 |    "cell_type": "markdown",
 58 |    "metadata": {},
 59 |    "source": [
 60 |     "```\n",
 61 |     "\n",
 62 |     "```\n",
 63 |     "\n",
 64 |     "```\n",
 65 |     "\n",
 66 |     "```\n",
 67 |     "\n",
 68 |     "```\n",
 69 |     "\n",
 70 |     "```\n",
 71 |     "\n",
 72 |     "```\n",
 73 |     "\n",
 74 |     "```\n",
 75 |     "\n",
 76 |     "```\n",
 77 |     "\n",
 78 |     "```\n",
 79 |     "\n",
 80 |     "```\n",
 81 |     "\n",
 82 |     "```\n",
 83 |     "\n",
 84 |     "```\n",
 85 |     "\n",
 86 |     "```\n",
 87 |     "\n",
 88 |     "```\n",
 89 |     "\n",
 90 |     "```\n",
 91 |     "\n",
 92 |     "```\n",
 93 |     "\n",
 94 |     "```\n",
 95 |     "\n",
 96 |     "```\n",
 97 |     "\n",
 98 |     "```\n",
 99 |     "\n",
100 |     "```\n",
101 |     "\n",
102 |     "```\n",
103 |     "\n",
104 |     "```\n",
105 |     "\n",
106 |     "```\n",
107 |     "\n",
108 |     "```\n",
109 |     "\n",
110 |     "```\n",
111 |     "\n",
112 |     "```\n",
113 |     "\n",
114 |     "```\n",
115 |     "\n",
116 |     "```\n",
117 |     "\n",
118 |     "```\n",
119 |     "\n",
120 |     "\n",
121 |     "# Book of grudges\n",
122 |     "* zero init for weights will cause symmetry effect\n",
123 |     "* Too many filters for first 3x3 convolution - will lead to enormous matrix while there's just not enough relevant combinations of 3x3 images (overkill).\n",
124 |     "* Usually the further you go, the more filters you need.\n",
125 |     "* large filters (10x10 is generally a bad pactice, and you definitely need more than 10 of them\n",
126 |     "* the second of 10x10 convolution gets 8x6x6 image as input, so it's technically unable to perform such convolution.\n",
127 |     "* Softmax nonlinearity effectively makes only 1 or a few neurons from the entire layer to \"fire\", rendering 512-neuron layer almost useless. Softmax at the output layer is okay though\n",
128 |     "* Dropout after probability prediciton is just lame. A few random classes get probability of 0, so your probabilities no longer sum to 1 and crossentropy goes -inf."
129 |    ]
130 |   },
131 |   {
132 |    "cell_type": "code",
133 |    "execution_count": null,
134 |    "metadata": {
135 |     "collapsed": true
136 |    },
137 |    "outputs": [],
138 |    "source": []
139 |   }
140 |  ],
141 |  "metadata": {
142 |   "kernelspec": {
143 |    "display_name": "Python [Root]",
144 |    "language": "python",
145 |    "name": "Python [Root]"
146 |   },
147 |   "language_info": {
148 |    "codemirror_mode": {
149 |     "name": "ipython",
150 |     "version": 2
151 |    },
152 |    "file_extension": ".py",
153 |    "mimetype": "text/x-python",
154 |    "name": "python",
155 |    "nbconvert_exporter": "python",
156 |    "pygments_lexer": "ipython2",
157 |    "version": "2.7.12"
158 |   }
159 |  },
160 |  "nbformat": 4,
161 |  "nbformat_minor": 0
162 | }
163 | 


--------------------------------------------------------------------------------
/Seminar4/bonus/cifar.py:
--------------------------------------------------------------------------------
 1 | """I load some cifar"""
 2 | 
 3 | import numpy as np
 4 | from sklearn.cross_validation import train_test_split
 5 | import urllib2
 6 | import urllib
 7 | def unpickle(file):
 8 |     import cPickle
 9 |     fo = open(file, 'rb')
10 |     dict = cPickle.load(fo)
11 |     fo.close()
12 |     return dict
13 | 
14 | 
15 | import os
16 | def download_cifar10(path,
17 |                      url='https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz',
18 |                      tarname='cifar-10-python.tar.gz'):
19 |     import tarfile
20 |     if not os.path.exists(path):
21 |         os.mkdir(path)
22 |     
23 |         
24 | 
25 |     urllib.urlretrieve(url, os.path.join(path,tarname))
26 |     tfile = tarfile.open(os.path.join(path,tarname))
27 |     tfile.extractall(path=path)
28 |     
29 | 
30 | def load_cifar10(data_path=".",test_size=0.2,random_state=1337):
31 |     
32 |     test_path = os.path.join(data_path,"cifar-10-batches-py/test_batch")
33 |     train_paths = [os.path.join(data_path,"cifar-10-batches-py/data_batch_%i"%i) for i in range(1,6)]
34 |     
35 |     if not os.path.exists(test_path) or not all(list(map(os.path.exists, train_paths))):
36 |         print "Dataset not found. Downloading..."
37 |         download_cifar10(data_path)
38 | 
39 |     train_batches = list(map(unpickle,train_paths))
40 |     test_batch = unpickle(test_path)
41 | 
42 |     X = np.concatenate([batch["data"] for batch in train_batches]).reshape([-1,3,32,32]).astype('float32')/255
43 |     y = np.concatenate([batch["labels"] for batch in train_batches]).astype('int32')
44 |     X_train,X_val,y_train,y_val = train_test_split(X,y,
45 |                                                    test_size=test_size,
46 |                                                    random_state=random_state)
47 |     
48 |     X_test = test_batch["data"].reshape([-1,3,32,32]).astype('float32')/255
49 |     y_test = np.array(test_batch["labels"]).astype('int32')
50 |     
51 |     return X_train,y_train,X_val,y_val,X_test,y_test
52 |     


--------------------------------------------------------------------------------
/Seminar4/mnist.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import os
 3 | import time
 4 | 
 5 | import numpy as np
 6 | 
 7 | __doc__="""taken from https://github.com/Lasagne/Lasagne/blob/master/examples/mnist.py"""
 8 | 
 9 | def load_dataset():
10 |     # We first define a download function, supporting both Python 2 and 3.
11 |     if sys.version_info[0] == 2:
12 |         from urllib import urlretrieve
13 |     else:
14 |         from urllib.request import urlretrieve
15 | 
16 |     def download(filename, source='http://yann.lecun.com/exdb/mnist/'):
17 |         print("Downloading %s" % filename)
18 |         urlretrieve(source + filename, filename)
19 | 
20 |     # We then define functions for loading MNIST images and labels.
21 |     # For convenience, they also download the requested files if needed.
22 |     import gzip
23 | 
24 |     def load_mnist_images(filename):
25 |         if not os.path.exists(filename):
26 |             download(filename)
27 |         # Read the inputs in Yann LeCun's binary format.
28 |         with gzip.open(filename, 'rb') as f:
29 |             data = np.frombuffer(f.read(), np.uint8, offset=16)
30 |         # The inputs are vectors now, we reshape them to monochrome 2D images,
31 |         # following the shape convention: (examples, channels, rows, columns)
32 |         data = data.reshape(-1, 1, 28, 28)
33 |         # The inputs come as bytes, we convert them to float32 in range [0,1].
34 |         # (Actually to range [0, 255/256], for compatibility to the version
35 |         # provided at http://deeplearning.net/data/mnist/mnist.pkl.gz.)
36 |         return data / np.float32(256)
37 | 
38 |     def load_mnist_labels(filename):
39 |         if not os.path.exists(filename):
40 |             download(filename)
41 |         # Read the labels in Yann LeCun's binary format.
42 |         with gzip.open(filename, 'rb') as f:
43 |             data = np.frombuffer(f.read(), np.uint8, offset=8)
44 |         # The labels are vectors of integers now, that's exactly what we want.
45 |         return data
46 | 
47 |     # We can now download and read the training and test set images and labels.
48 |     X_train = load_mnist_images('train-images-idx3-ubyte.gz')
49 |     y_train = load_mnist_labels('train-labels-idx1-ubyte.gz')
50 |     X_test = load_mnist_images('t10k-images-idx3-ubyte.gz')
51 |     y_test = load_mnist_labels('t10k-labels-idx1-ubyte.gz')
52 | 
53 |     # We reserve the last 10000 training examples for validation.
54 |     X_train, X_val = X_train[:-10000], X_train[-10000:]
55 |     y_train, y_val = y_train[:-10000], y_train[-10000:]
56 | 
57 |     # We just return all the arrays in order, as expected in main().
58 |     # (It doesn't matter how we do this as long as we can read them again.)
59 |     return X_train, y_train, X_val, y_val, X_test, y_test
60 | 
61 | 
62 | 
63 | 
64 | 


--------------------------------------------------------------------------------
/Seminar5/README.md:
--------------------------------------------------------------------------------
1 | More materials:
2 | - http://cs231n.github.io/transfer-learning/
3 | - lasagne [recipes](https://github.com/Lasagne/Recipes)
4 | - [a few words on soft-targets](http://www.kdnuggets.com/2015/05/dark-knowledge-neural-network.html)
5 | 


--------------------------------------------------------------------------------
/Seminar5/sample_images/albatross.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ddtm/dl-course/9b04d2dda741c0786a9de40a7dfce89d06d0487e/Seminar5/sample_images/albatross.jpg


--------------------------------------------------------------------------------
/Seminar5/sample_images/fox.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ddtm/dl-course/9b04d2dda741c0786a9de40a7dfce89d06d0487e/Seminar5/sample_images/fox.jpg


--------------------------------------------------------------------------------
/Seminar5/sample_images/frog.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ddtm/dl-course/9b04d2dda741c0786a9de40a7dfce89d06d0487e/Seminar5/sample_images/frog.jpg


--------------------------------------------------------------------------------
/Seminar5/sample_images/hen.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ddtm/dl-course/9b04d2dda741c0786a9de40a7dfce89d06d0487e/Seminar5/sample_images/hen.jpg


--------------------------------------------------------------------------------
/Seminar5/sample_images/kermit.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ddtm/dl-course/9b04d2dda741c0786a9de40a7dfce89d06d0487e/Seminar5/sample_images/kermit.jpg


--------------------------------------------------------------------------------
/Seminar5/sample_images/kitten.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ddtm/dl-course/9b04d2dda741c0786a9de40a7dfce89d06d0487e/Seminar5/sample_images/kitten.jpg


--------------------------------------------------------------------------------
/Seminar5/sample_images/puppy.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ddtm/dl-course/9b04d2dda741c0786a9de40a7dfce89d06d0487e/Seminar5/sample_images/puppy.jpg


--------------------------------------------------------------------------------
/Seminar5/sample_images/steve_martin.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ddtm/dl-course/9b04d2dda741c0786a9de40a7dfce89d06d0487e/Seminar5/sample_images/steve_martin.jpg


--------------------------------------------------------------------------------
/Seminar5/sample_images/teapot.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ddtm/dl-course/9b04d2dda741c0786a9de40a7dfce89d06d0487e/Seminar5/sample_images/teapot.jpg


--------------------------------------------------------------------------------
/Seminar5/sample_images/tiger.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ddtm/dl-course/9b04d2dda741c0786a9de40a7dfce89d06d0487e/Seminar5/sample_images/tiger.jpg


--------------------------------------------------------------------------------
/Seminar6/custom/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ddtm/dl-course/9b04d2dda741c0786a9de40a7dfce89d06d0487e/Seminar6/custom/__init__.py


--------------------------------------------------------------------------------
/Seminar6/custom/net.py:
--------------------------------------------------------------------------------
 1 | ################################################# You MIGHT need these imports.
 2 | import cPickle
 3 | 
 4 | from fast_rcnn.config import cfg
 5 | 
 6 | class Net(object):
 7 |   """A class for holding a symbolic representation of the neural network.
 8 |   Instances of this class are going to be used both in the solver and 
 9 |   in the tester. 
10 |   """
11 | 
12 |   def __init__(self, snapshot_path=None):
13 |     """Constructs a symbolic graph for a neural network.
14 | 
15 |     Arguments:
16 |       snapshot_path (str): path to the pretrained network
17 |     """
18 |     pass
19 | 
20 |   def save(self, filename):
21 |     """Saves model weights."""
22 |     pass
23 | 
24 |   @property
25 |   def input(self):
26 |     """Returns symbolic inputs of the model."""
27 |     pass
28 | 
29 |   @property
30 |   def prediction(self):
31 |     """Returns symbolic variable containing the model predictions."""
32 |     pass
33 | 
34 |   @property
35 |   def params(self):
36 |     """Returns shared variables containing the model weights."""
37 |     pass
38 | 
39 |   @property
40 |   def param_values(self):
41 |     """Returns a list of the model weights (values)."""
42 |     pass
43 | 


--------------------------------------------------------------------------------
/Seminar6/custom/solver.py:
--------------------------------------------------------------------------------
 1 | ################################################# You MIGHT need these imports.
 2 | from fast_rcnn.config import cfg
 3 | from net import Net
 4 | from roi_data_layer.layer import RoIDataLayer
 5 | 
 6 | class Solver(object):
 7 |   def __init__(self):
 8 |     # Holds current iteration number. 
 9 |     self.iter = 0
10 | 
11 |     # How frequently we should print the training info.
12 |     self.display_freq = 1
13 | 
14 |     # Holds the path prefix for snapshots.
15 |     self.snapshot_prefix = 'snapshot'
16 | 
17 |     ###################################################### Your code goes here.
18 | 
19 |   # This might be a useful static method to have.
20 |   @staticmethod
21 |   def build_step_fn(net):
22 |     """Takes a symbolic network and compiles a function for weights updates."""
23 |     pass
24 | 
25 |   def get_training_batch(self):
26 |     """Uses ROIDataLayer to fetch a training batch.
27 | 
28 |     Returns:
29 |       input_data (ndarray): input data suitable for R-CNN processing
30 |       labels (ndarray): batch labels (of type int32)
31 |     """
32 | 
33 |     ###################################################### Your code goes here.
34 | 
35 |     return input_data, labels
36 | 
37 |   def step(self):
38 |     """Conducts a single step of SGD."""
39 |     
40 |     ###################################################### Your code goes here.
41 |     # Among other things, assign the current loss value to self.loss.
42 | 
43 |     self.iter += 1
44 |     if self.iter % self.display_freq == 0:
45 |       print 'Iteration {:<5} Train loss: {}'.format(self.iter, self.loss)
46 | 
47 |   def save(self, filename):
48 |     """Saves model weights."""
49 |     pass
50 | 


--------------------------------------------------------------------------------
/Seminar6/custom/tester.py:
--------------------------------------------------------------------------------
 1 | ################################################### You MIGHT need this import.
 2 | from net import Net
 3 | 
 4 | class Tester(object):
 5 |   def __init__(self, snapshot_path):
 6 |     # The original Girshick's code requires this field to exist.
 7 |     self.name = ''
 8 | 
 9 |     ###################################################### Your code goes here.
10 |     # Load your network into, say, self.net.
11 | 
12 |   def forward(self, data, rois):
13 |     """Performs a forward pass through the neural network.
14 | 
15 |     Arguments:
16 |       data (ndarray): tensor containing the whole scenes (images)
17 |       rois (ndarray): tensor containg ROIs; rois[:, 0] are indices of scenes 
18 |                       in data, the rest are (left, top, bottom, right) 
19 |                       coordinates
20 | 
21 |     Returns:
22 |       output (dict): a dictionary with a single key 'cls_prob' holding
23 |                      probability distributions produced by the network
24 |     """
25 | 
26 |     ###################################################### Your code goes here.
27 |     # You should have the following line:
28 |     # output = {'cls_prob': net_output}.
29 | 
30 |     return output
31 | 


--------------------------------------------------------------------------------
/Seminar6/data/.gitignore:
--------------------------------------------------------------------------------
1 | selective_search*
2 | imagenet_models*
3 | fast_rcnn_models*
4 | VOCdevkit*
5 | cache
6 | 


--------------------------------------------------------------------------------
/Seminar6/data/pylintrc:
--------------------------------------------------------------------------------
1 | [TYPECHECK]
2 | 
3 | ignored-modules = numpy, numpy.random, cv2
4 | 


--------------------------------------------------------------------------------
/Seminar6/data/scripts/fetch_selective_search_data.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )/../" && pwd )"
 4 | cd $DIR
 5 | 
 6 | FILE=selective_search_data.tgz
 7 | URL=http://www.cs.berkeley.edu/~rbg/fast-rcnn-data/$FILE
 8 | CHECKSUM=7078c1db87a7851b31966b96774cd9b9
 9 | 
10 | if [ -f $FILE ]; then
11 |   echo "File already exists. Checking md5..."
12 |   os=`uname -s`
13 |   if [ "$os" = "Linux" ]; then
14 |     checksum=`md5sum $FILE | awk '{ print $1 }'`
15 |   elif [ "$os" = "Darwin" ]; then
16 |     checksum=`cat $FILE | md5`
17 |   fi
18 |   if [ "$checksum" = "$CHECKSUM" ]; then
19 |     echo "Checksum is correct. No need to download."
20 |     exit 0
21 |   else
22 |     echo "Checksum is incorrect. Need to download again."
23 |   fi
24 | fi
25 | 
26 | echo "Downloading precomputed selective search boxes (0.5G)..."
27 | 
28 | wget $URL -O $FILE
29 | 
30 | echo "Unzipping..."
31 | 
32 | tar zxvf $FILE
33 | 
34 | echo "Done. Please run this command again to verify that checksum = $CHECKSUM."
35 | 


--------------------------------------------------------------------------------
/Seminar6/experiments/cfgs/rcnn.yml:
--------------------------------------------------------------------------------
1 | EXP_DIR: rcnn
2 | TRAIN:
3 |   PROPOSAL_METHOD: 'selective_search' # or 'rpn'
4 |   SNAPSHOT_ITERS: 5000
5 |   BBOX_REG: False
6 | TEST:
7 |   PROPOSAL_METHOD: 'selective_search' # or 'rpn'
8 |   BBOX_REG: False


--------------------------------------------------------------------------------
/Seminar6/experiments/scripts/fast_rcnn.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Usage:
 3 | # ./experiments/scripts/fast_rcnn.sh DATASET [options args to {train,test}_net.py]
 4 | # DATASET is either pascal_voc or coco.
 5 | #
 6 | # Example:
 7 | # ./experiments/scripts/fast_rcnn.sh pascal_voc
 8 | 
 9 | set -x
10 | set -e
11 | 
12 | export PYTHONUNBUFFERED="True"
13 | 
14 | DATASET=$1
15 | 
16 | array=( $@ )
17 | len=${#array[@]}
18 | EXTRA_ARGS=${array[@]:3:$len}
19 | EXTRA_ARGS_SLUG=${EXTRA_ARGS// /_}
20 | 
21 | case $DATASET in
22 |   pascal_voc)
23 |     TRAIN_IMDB="voc_2007_trainval"
24 |     TEST_IMDB="voc_2007_test"
25 |     ITERS=40000
26 |     ;;
27 |   *)
28 |     echo "No dataset given"
29 |     exit
30 |     ;;
31 | esac
32 | 
33 | LOG="experiments/logs/fast_rcnn_${EXTRA_ARGS_SLUG}.txt.`date +'%Y-%m-%d_%H-%M-%S'`"
34 | exec &> >(tee -a "$LOG")
35 | echo Logging output to "$LOG"
36 | 
37 | time ./tools/train_net.py \
38 |   --imdb ${TRAIN_IMDB} \
39 |   --iters ${ITERS} \
40 |   ${EXTRA_ARGS}
41 | 
42 | set +x
43 | NET_FINAL=`grep -B 1 "done solving" ${LOG} | grep "Wrote snapshot" | awk '{print $4}'`
44 | set -x
45 | 
46 | time ./tools/test_net.py \
47 |   --imdb ${TEST_IMDB} \
48 |   ${EXTRA_ARGS}
49 | 


--------------------------------------------------------------------------------
/Seminar6/experiments/scripts/test_rcnn.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -x
 3 | set -e
 4 | 
 5 | export PYTHONUNBUFFERED="True"
 6 | 
 7 | SNAPSHOT=$1
 8 | 
 9 | TEST_IMDB="voc_2007_test"
10 | 
11 | LOG="experiments/logs/rcnn.test.txt.`date +'%Y-%m-%d_%H-%M-%S'`"
12 | exec &> >(tee -a "$LOG")
13 | echo Logging output to "$LOG"
14 | 
15 | time ./tools/test_net.py \
16 |   --snapshot $SNAPSHOT \
17 |   --imdb ${TEST_IMDB} \
18 |   --cfg ./experiments/cfgs/rcnn.yml
19 | 


--------------------------------------------------------------------------------
/Seminar6/experiments/scripts/train_rcnn.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -x
 3 | set -e
 4 | 
 5 | export PYTHONUNBUFFERED="True"
 6 | 
 7 | ITERS=$1
 8 | 
 9 | TRAIN_IMDB="voc_2007_trainval"
10 | 
11 | LOG="experiments/logs/rcnn.train.txt.`date +'%Y-%m-%d_%H-%M-%S'`"
12 | exec &> >(tee -a "$LOG")
13 | echo Logging output to "$LOG"
14 | 
15 | time ./tools/train_net.py \
16 |   --imdb ${TRAIN_IMDB} \
17 |   --iters ${ITERS} \
18 |   --cfg ./experiments/cfgs/rcnn.yml
19 | 


--------------------------------------------------------------------------------
/Seminar6/lib/Makefile:
--------------------------------------------------------------------------------
1 | all:
2 | 	python setup.py build_ext --inplace
3 | 	rm -rf build
4 | 
5 | clean:
6 | 	find ./ -name *.so -type f -delete
7 | 


--------------------------------------------------------------------------------
/Seminar6/lib/datasets/VOCdevkit-matlab-wrapper/get_voc_opts.m:
--------------------------------------------------------------------------------
 1 | function VOCopts = get_voc_opts(path)
 2 | 
 3 | tmp = pwd;
 4 | cd(path);
 5 | try
 6 |   addpath('VOCcode');
 7 |   VOCinit;
 8 | catch
 9 |   rmpath('VOCcode');
10 |   cd(tmp);
11 |   error(sprintf('VOCcode directory not found under %s', path));
12 | end
13 | rmpath('VOCcode');
14 | cd(tmp);
15 | 


--------------------------------------------------------------------------------
/Seminar6/lib/datasets/VOCdevkit-matlab-wrapper/voc_eval.m:
--------------------------------------------------------------------------------
 1 | function res = voc_eval(path, comp_id, test_set, output_dir)
 2 | 
 3 | VOCopts = get_voc_opts(path);
 4 | VOCopts.testset = test_set;
 5 | 
 6 | for i = 1:length(VOCopts.classes)
 7 |   cls = VOCopts.classes{i};
 8 |   res(i) = voc_eval_cls(cls, VOCopts, comp_id, output_dir);
 9 | end
10 | 
11 | fprintf('\n~~~~~~~~~~~~~~~~~~~~\n');
12 | fprintf('Results:\n');
13 | aps = [res(:).ap]';
14 | fprintf('%.1f\n', aps * 100);
15 | fprintf('%.1f\n', mean(aps) * 100);
16 | fprintf('~~~~~~~~~~~~~~~~~~~~\n');
17 | 
18 | function res = voc_eval_cls(cls, VOCopts, comp_id, output_dir)
19 | 
20 | test_set = VOCopts.testset;
21 | year = VOCopts.dataset(4:end);
22 | 
23 | addpath(fullfile(VOCopts.datadir, 'VOCcode'));
24 | 
25 | res_fn = sprintf(VOCopts.detrespath, comp_id, cls);
26 | 
27 | recall = [];
28 | prec = [];
29 | ap = 0;
30 | ap_auc = 0;
31 | 
32 | do_eval = (str2num(year) <= 2007) | ~strcmp(test_set, 'test');
33 | if do_eval
34 |   % Bug in VOCevaldet requires that tic has been called first
35 |   tic;
36 |   [recall, prec, ap] = VOCevaldet(VOCopts, comp_id, cls, true);
37 |   ap_auc = xVOCap(recall, prec);
38 | 
39 |   % force plot limits
40 |   ylim([0 1]);
41 |   xlim([0 1]);
42 | 
43 |   print(gcf, '-djpeg', '-r0', ...
44 |         [output_dir '/' cls '_pr.jpg']);
45 | end
46 | fprintf('!!! %s : %.4f %.4f\n', cls, ap, ap_auc);
47 | 
48 | res.recall = recall;
49 | res.prec = prec;
50 | res.ap = ap;
51 | res.ap_auc = ap_auc;
52 | 
53 | save([output_dir '/' cls '_pr.mat'], ...
54 |      'res', 'recall', 'prec', 'ap', 'ap_auc');
55 | 
56 | rmpath(fullfile(VOCopts.datadir, 'VOCcode'));
57 | 


--------------------------------------------------------------------------------
/Seminar6/lib/datasets/VOCdevkit-matlab-wrapper/xVOCap.m:
--------------------------------------------------------------------------------
 1 | function ap = xVOCap(rec,prec)
 2 | % From the PASCAL VOC 2011 devkit
 3 | 
 4 | mrec=[0 ; rec ; 1];
 5 | mpre=[0 ; prec ; 0];
 6 | for i=numel(mpre)-1:-1:1
 7 |     mpre(i)=max(mpre(i),mpre(i+1));
 8 | end
 9 | i=find(mrec(2:end)~=mrec(1:end-1))+1;
10 | ap=sum((mrec(i)-mrec(i-1)).*mpre(i));
11 | 


--------------------------------------------------------------------------------
/Seminar6/lib/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 | 


--------------------------------------------------------------------------------
/Seminar6/lib/datasets/ds_utils.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast/er R-CNN
 3 | # Licensed under The MIT License [see LICENSE for details]
 4 | # Written by Ross Girshick
 5 | # --------------------------------------------------------
 6 | 
 7 | import numpy as np
 8 | 
 9 | def unique_boxes(boxes, scale=1.0):
10 |     """Return indices of unique boxes."""
11 |     v = np.array([1, 1e3, 1e6, 1e9])
12 |     hashes = np.round(boxes * scale).dot(v)
13 |     _, index = np.unique(hashes, return_index=True)
14 |     return np.sort(index)
15 | 
16 | def xywh_to_xyxy(boxes):
17 |     """Convert [x y w h] box format to [x1 y1 x2 y2] format."""
18 |     return np.hstack((boxes[:, 0:2], boxes[:, 0:2] + boxes[:, 2:4] - 1))
19 | 
20 | def xyxy_to_xywh(boxes):
21 |     """Convert [x1 y1 x2 y2] box format to [x y w h] format."""
22 |     return np.hstack((boxes[:, 0:2], boxes[:, 2:4] - boxes[:, 0:2] + 1))
23 | 
24 | def validate_boxes(boxes, width=0, height=0):
25 |     """Check that a set of boxes are valid."""
26 |     x1 = boxes[:, 0]
27 |     y1 = boxes[:, 1]
28 |     x2 = boxes[:, 2]
29 |     y2 = boxes[:, 3]
30 |     assert (x1 >= 0).all()
31 |     assert (y1 >= 0).all()
32 |     assert (x2 >= x1).all()
33 |     assert (y2 >= y1).all()
34 |     assert (x2 < width).all()
35 |     assert (y2 < height).all()
36 | 
37 | def filter_small_boxes(boxes, min_size):
38 |     w = boxes[:, 2] - boxes[:, 0]
39 |     h = boxes[:, 3] - boxes[:, 1]
40 |     keep = np.where((w >= min_size) & (h > min_size))[0]
41 |     return keep
42 | 


--------------------------------------------------------------------------------
/Seminar6/lib/datasets/factory.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | """Factory method for easily getting imdbs by name."""
 9 | 
10 | __sets = {}
11 | 
12 | from datasets.pascal_voc import pascal_voc
13 | import numpy as np
14 | 
15 | # Set up voc_<year>_<split> using selective search "fast" mode
16 | for year in ['2007', '2012']:
17 |     for split in ['train', 'val', 'trainval', 'test']:
18 |         name = 'voc_{}_{}'.format(year, split)
19 |         __sets[name] = (lambda split=split, year=year: pascal_voc(split, year))
20 | 
21 | def get_imdb(name):
22 |     """Get an imdb (image database) by name."""
23 |     if not __sets.has_key(name):
24 |         raise KeyError('Unknown dataset: {}'.format(name))
25 |     return __sets[name]()
26 | 
27 | def list_imdbs():
28 |     """List all registered imdbs."""
29 |     return __sets.keys()
30 | 


--------------------------------------------------------------------------------
/Seminar6/lib/datasets/imdb.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick
  6 | # --------------------------------------------------------
  7 | 
  8 | import os
  9 | import os.path as osp
 10 | import PIL
 11 | from utils.cython_bbox import bbox_overlaps
 12 | import numpy as np
 13 | import scipy.sparse
 14 | from fast_rcnn.config import cfg
 15 | 
 16 | class imdb(object):
 17 |     """Image database."""
 18 | 
 19 |     def __init__(self, name):
 20 |         self._name = name
 21 |         self._num_classes = 0
 22 |         self._classes = []
 23 |         self._image_index = []
 24 |         self._obj_proposer = 'selective_search'
 25 |         self._roidb = None
 26 |         self._roidb_handler = self.default_roidb
 27 |         # Use this dict for storing dataset specific config options
 28 |         self.config = {}
 29 | 
 30 |     @property
 31 |     def name(self):
 32 |         return self._name
 33 | 
 34 |     @property
 35 |     def num_classes(self):
 36 |         return len(self._classes)
 37 | 
 38 |     @property
 39 |     def classes(self):
 40 |         return self._classes
 41 | 
 42 |     @property
 43 |     def image_index(self):
 44 |         return self._image_index
 45 | 
 46 |     @property
 47 |     def roidb_handler(self):
 48 |         return self._roidb_handler
 49 | 
 50 |     @roidb_handler.setter
 51 |     def roidb_handler(self, val):
 52 |         self._roidb_handler = val
 53 | 
 54 |     def set_proposal_method(self, method):
 55 |         method = eval('self.' + method + '_roidb')
 56 |         self.roidb_handler = method
 57 | 
 58 |     @property
 59 |     def roidb(self):
 60 |         # A roidb is a list of dictionaries, each with the following keys:
 61 |         #   boxes
 62 |         #   gt_overlaps
 63 |         #   gt_classes
 64 |         #   flipped
 65 |         if self._roidb is not None:
 66 |             return self._roidb
 67 |         self._roidb = self.roidb_handler()
 68 |         return self._roidb
 69 | 
 70 |     @property
 71 |     def cache_path(self):
 72 |         cache_path = osp.abspath(osp.join(cfg.DATA_DIR, 'cache'))
 73 |         if not os.path.exists(cache_path):
 74 |             os.makedirs(cache_path)
 75 |         return cache_path
 76 | 
 77 |     @property
 78 |     def num_images(self):
 79 |       return len(self.image_index)
 80 | 
 81 |     def image_path_at(self, i):
 82 |         raise NotImplementedError
 83 | 
 84 |     def default_roidb(self):
 85 |         raise NotImplementedError
 86 | 
 87 |     def evaluate_detections(self, all_boxes, output_dir=None):
 88 |         """
 89 |         all_boxes is a list of length number-of-classes.
 90 |         Each list element is a list of length number-of-images.
 91 |         Each of those list elements is either an empty list []
 92 |         or a numpy array of detection.
 93 | 
 94 |         all_boxes[class][image] = [] or np.array of shape #dets x 5
 95 |         """
 96 |         raise NotImplementedError
 97 | 
 98 |     def _get_widths(self):
 99 |       return [PIL.Image.open(self.image_path_at(i)).size[0]
100 |               for i in xrange(self.num_images)]
101 | 
102 |     def append_flipped_images(self):
103 |         num_images = self.num_images
104 |         widths = self._get_widths()
105 |         for i in xrange(num_images):
106 |             boxes = self.roidb[i]['boxes'].copy()
107 |             oldx1 = boxes[:, 0].copy()
108 |             oldx2 = boxes[:, 2].copy()
109 |             boxes[:, 0] = widths[i] - oldx2 - 1
110 |             boxes[:, 2] = widths[i] - oldx1 - 1
111 |             assert (boxes[:, 2] >= boxes[:, 0]).all()
112 |             entry = {'boxes' : boxes,
113 |                      'gt_overlaps' : self.roidb[i]['gt_overlaps'],
114 |                      'gt_classes' : self.roidb[i]['gt_classes'],
115 |                      'flipped' : True}
116 |             self.roidb.append(entry)
117 |         self._image_index = self._image_index * 2
118 | 
119 |     def evaluate_recall(self, candidate_boxes=None, thresholds=None,
120 |                         area='all', limit=None):
121 |         """Evaluate detection proposal recall metrics.
122 | 
123 |         Returns:
124 |             results: dictionary of results with keys
125 |                 'ar': average recall
126 |                 'recalls': vector recalls at each IoU overlap threshold
127 |                 'thresholds': vector of IoU overlap thresholds
128 |                 'gt_overlaps': vector of all ground-truth overlaps
129 |         """
130 |         # Record max overlap value for each gt box
131 |         # Return vector of overlap values
132 |         areas = { 'all': 0, 'small': 1, 'medium': 2, 'large': 3,
133 |                   '96-128': 4, '128-256': 5, '256-512': 6, '512-inf': 7}
134 |         area_ranges = [ [0**2, 1e5**2],    # all
135 |                         [0**2, 32**2],     # small
136 |                         [32**2, 96**2],    # medium
137 |                         [96**2, 1e5**2],   # large
138 |                         [96**2, 128**2],   # 96-128
139 |                         [128**2, 256**2],  # 128-256
140 |                         [256**2, 512**2],  # 256-512
141 |                         [512**2, 1e5**2],  # 512-inf
142 |                       ]
143 |         assert areas.has_key(area), 'unknown area range: {}'.format(area)
144 |         area_range = area_ranges[areas[area]]
145 |         gt_overlaps = np.zeros(0)
146 |         num_pos = 0
147 |         for i in xrange(self.num_images):
148 |             # Checking for max_overlaps == 1 avoids including crowd annotations
149 |             # (...pretty hacking :/)
150 |             max_gt_overlaps = self.roidb[i]['gt_overlaps'].toarray().max(axis=1)
151 |             gt_inds = np.where((self.roidb[i]['gt_classes'] > 0) &
152 |                                (max_gt_overlaps == 1))[0]
153 |             gt_boxes = self.roidb[i]['boxes'][gt_inds, :]
154 |             gt_areas = self.roidb[i]['seg_areas'][gt_inds]
155 |             valid_gt_inds = np.where((gt_areas >= area_range[0]) &
156 |                                      (gt_areas <= area_range[1]))[0]
157 |             gt_boxes = gt_boxes[valid_gt_inds, :]
158 |             num_pos += len(valid_gt_inds)
159 | 
160 |             if candidate_boxes is None:
161 |                 # If candidate_boxes is not supplied, the default is to use the
162 |                 # non-ground-truth boxes from this roidb
163 |                 non_gt_inds = np.where(self.roidb[i]['gt_classes'] == 0)[0]
164 |                 boxes = self.roidb[i]['boxes'][non_gt_inds, :]
165 |             else:
166 |                 boxes = candidate_boxes[i]
167 |             if boxes.shape[0] == 0:
168 |                 continue
169 |             if limit is not None and boxes.shape[0] > limit:
170 |                 boxes = boxes[:limit, :]
171 | 
172 |             overlaps = bbox_overlaps(boxes.astype(np.float),
173 |                                      gt_boxes.astype(np.float))
174 | 
175 |             _gt_overlaps = np.zeros((gt_boxes.shape[0]))
176 |             for j in xrange(gt_boxes.shape[0]):
177 |                 # find which proposal box maximally covers each gt box
178 |                 argmax_overlaps = overlaps.argmax(axis=0)
179 |                 # and get the iou amount of coverage for each gt box
180 |                 max_overlaps = overlaps.max(axis=0)
181 |                 # find which gt box is 'best' covered (i.e. 'best' = most iou)
182 |                 gt_ind = max_overlaps.argmax()
183 |                 gt_ovr = max_overlaps.max()
184 |                 assert(gt_ovr >= 0)
185 |                 # find the proposal box that covers the best covered gt box
186 |                 box_ind = argmax_overlaps[gt_ind]
187 |                 # record the iou coverage of this gt box
188 |                 _gt_overlaps[j] = overlaps[box_ind, gt_ind]
189 |                 assert(_gt_overlaps[j] == gt_ovr)
190 |                 # mark the proposal box and the gt box as used
191 |                 overlaps[box_ind, :] = -1
192 |                 overlaps[:, gt_ind] = -1
193 |             # append recorded iou coverage level
194 |             gt_overlaps = np.hstack((gt_overlaps, _gt_overlaps))
195 | 
196 |         gt_overlaps = np.sort(gt_overlaps)
197 |         if thresholds is None:
198 |             step = 0.05
199 |             thresholds = np.arange(0.5, 0.95 + 1e-5, step)
200 |         recalls = np.zeros_like(thresholds)
201 |         # compute recall for each iou threshold
202 |         for i, t in enumerate(thresholds):
203 |             recalls[i] = (gt_overlaps >= t).sum() / float(num_pos)
204 |         # ar = 2 * np.trapz(recalls, thresholds)
205 |         ar = recalls.mean()
206 |         return {'ar': ar, 'recalls': recalls, 'thresholds': thresholds,
207 |                 'gt_overlaps': gt_overlaps}
208 | 
209 |     def create_roidb_from_box_list(self, box_list, gt_roidb):
210 |         assert len(box_list) == self.num_images, \
211 |                 'Number of boxes must match number of ground-truth images'
212 |         roidb = []
213 |         for i in xrange(self.num_images):
214 |             boxes = box_list[i]
215 |             num_boxes = boxes.shape[0]
216 |             overlaps = np.zeros((num_boxes, self.num_classes), dtype=np.float32)
217 | 
218 |             if gt_roidb is not None and gt_roidb[i]['boxes'].size > 0:
219 |                 gt_boxes = gt_roidb[i]['boxes']
220 |                 gt_classes = gt_roidb[i]['gt_classes']
221 |                 gt_overlaps = bbox_overlaps(boxes.astype(np.float),
222 |                                             gt_boxes.astype(np.float))
223 |                 argmaxes = gt_overlaps.argmax(axis=1)
224 |                 maxes = gt_overlaps.max(axis=1)
225 |                 I = np.where(maxes > 0)[0]
226 |                 overlaps[I, gt_classes[argmaxes[I]]] = maxes[I]
227 | 
228 |             overlaps = scipy.sparse.csr_matrix(overlaps)
229 |             roidb.append({
230 |                 'boxes' : boxes,
231 |                 'gt_classes' : np.zeros((num_boxes,), dtype=np.int32),
232 |                 'gt_overlaps' : overlaps,
233 |                 'flipped' : False,
234 |                 'seg_areas' : np.zeros((num_boxes,), dtype=np.float32),
235 |             })
236 |         return roidb
237 | 
238 |     @staticmethod
239 |     def merge_roidbs(a, b):
240 |         assert len(a) == len(b)
241 |         for i in xrange(len(a)):
242 |             a[i]['boxes'] = np.vstack((a[i]['boxes'], b[i]['boxes']))
243 |             a[i]['gt_classes'] = np.hstack((a[i]['gt_classes'],
244 |                                             b[i]['gt_classes']))
245 |             a[i]['gt_overlaps'] = scipy.sparse.vstack([a[i]['gt_overlaps'],
246 |                                                        b[i]['gt_overlaps']])
247 |             a[i]['seg_areas'] = np.hstack((a[i]['seg_areas'],
248 |                                            b[i]['seg_areas']))
249 |         return a
250 | 
251 |     def competition_mode(self, on):
252 |         """Turn competition mode on or off."""
253 |         pass
254 | 


--------------------------------------------------------------------------------
/Seminar6/lib/datasets/tools/mcg_munge.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | 
 4 | """Hacky tool to convert file system layout of MCG boxes downloaded from
 5 | http://www.eecs.berkeley.edu/Research/Projects/CS/vision/grouping/mcg/
 6 | so that it's consistent with those computed by Jan Hosang (see:
 7 | http://www.mpi-inf.mpg.de/departments/computer-vision-and-multimodal-
 8 |   computing/research/object-recognition-and-scene-understanding/how-
 9 |   good-are-detection-proposals-really/)
10 | 
11 | NB: Boxes from the MCG website are in (y1, x1, y2, x2) order.
12 | Boxes from Hosang et al. are in (x1, y1, x2, y2) order.
13 | """
14 | 
15 | def munge(src_dir):
16 |     # stored as: ./MCG-COCO-val2014-boxes/COCO_val2014_000000193401.mat
17 |     # want:      ./MCG/mat/COCO_val2014_0/COCO_val2014_000000141/COCO_val2014_000000141334.mat
18 | 
19 |     files = os.listdir(src_dir)
20 |     for fn in files:
21 |         base, ext = os.path.splitext(fn)
22 |         # first 14 chars / first 22 chars / all chars + .mat
23 |         # COCO_val2014_0/COCO_val2014_000000447/COCO_val2014_000000447991.mat
24 |         first = base[:14]
25 |         second = base[:22]
26 |         dst_dir = os.path.join('MCG', 'mat', first, second)
27 |         if not os.path.exists(dst_dir):
28 |             os.makedirs(dst_dir)
29 |         src = os.path.join(src_dir, fn)
30 |         dst = os.path.join(dst_dir, fn)
31 |         print 'MV: {} -> {}'.format(src, dst)
32 |         os.rename(src, dst)
33 | 
34 | if __name__ == '__main__':
35 |     # src_dir should look something like:
36 |     #  src_dir = 'MCG-COCO-val2014-boxes'
37 |     src_dir = sys.argv[1]
38 |     munge(src_dir)
39 | 


--------------------------------------------------------------------------------
/Seminar6/lib/datasets/voc_eval.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast/er R-CNN
  3 | # Licensed under The MIT License [see LICENSE for details]
  4 | # Written by Bharath Hariharan
  5 | # --------------------------------------------------------
  6 | 
  7 | import xml.etree.ElementTree as ET
  8 | import os
  9 | import cPickle
 10 | import numpy as np
 11 | 
 12 | def parse_rec(filename):
 13 |     """ Parse a PASCAL VOC xml file """
 14 |     tree = ET.parse(filename)
 15 |     objects = []
 16 |     for obj in tree.findall('object'):
 17 |         obj_struct = {}
 18 |         obj_struct['name'] = obj.find('name').text
 19 |         obj_struct['pose'] = obj.find('pose').text
 20 |         obj_struct['truncated'] = int(obj.find('truncated').text)
 21 |         obj_struct['difficult'] = int(obj.find('difficult').text)
 22 |         bbox = obj.find('bndbox')
 23 |         obj_struct['bbox'] = [int(bbox.find('xmin').text),
 24 |                               int(bbox.find('ymin').text),
 25 |                               int(bbox.find('xmax').text),
 26 |                               int(bbox.find('ymax').text)]
 27 |         objects.append(obj_struct)
 28 | 
 29 |     return objects
 30 | 
 31 | def voc_ap(rec, prec, use_07_metric=False):
 32 |     """ ap = voc_ap(rec, prec, [use_07_metric])
 33 |     Compute VOC AP given precision and recall.
 34 |     If use_07_metric is true, uses the
 35 |     VOC 07 11 point method (default:False).
 36 |     """
 37 |     if use_07_metric:
 38 |         # 11 point metric
 39 |         ap = 0.
 40 |         for t in np.arange(0., 1.1, 0.1):
 41 |             if np.sum(rec >= t) == 0:
 42 |                 p = 0
 43 |             else:
 44 |                 p = np.max(prec[rec >= t])
 45 |             ap = ap + p / 11.
 46 |     else:
 47 |         # correct AP calculation
 48 |         # first append sentinel values at the end
 49 |         mrec = np.concatenate(([0.], rec, [1.]))
 50 |         mpre = np.concatenate(([0.], prec, [0.]))
 51 | 
 52 |         # compute the precision envelope
 53 |         for i in range(mpre.size - 1, 0, -1):
 54 |             mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
 55 | 
 56 |         # to calculate area under PR curve, look for points
 57 |         # where X axis (recall) changes value
 58 |         i = np.where(mrec[1:] != mrec[:-1])[0]
 59 | 
 60 |         # and sum (\Delta recall) * prec
 61 |         ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
 62 |     return ap
 63 | 
 64 | def voc_eval(detpath,
 65 |              annopath,
 66 |              imagesetfile,
 67 |              classname,
 68 |              cachedir,
 69 |              ovthresh=0.5,
 70 |              use_07_metric=False):
 71 |     """rec, prec, ap = voc_eval(detpath,
 72 |                                 annopath,
 73 |                                 imagesetfile,
 74 |                                 classname,
 75 |                                 [ovthresh],
 76 |                                 [use_07_metric])
 77 | 
 78 |     Top level function that does the PASCAL VOC evaluation.
 79 | 
 80 |     detpath: Path to detections
 81 |         detpath.format(classname) should produce the detection results file.
 82 |     annopath: Path to annotations
 83 |         annopath.format(imagename) should be the xml annotations file.
 84 |     imagesetfile: Text file containing the list of images, one image per line.
 85 |     classname: Category name (duh)
 86 |     cachedir: Directory for caching the annotations
 87 |     [ovthresh]: Overlap threshold (default = 0.5)
 88 |     [use_07_metric]: Whether to use VOC07's 11 point AP computation
 89 |         (default False)
 90 |     """
 91 |     # assumes detections are in detpath.format(classname)
 92 |     # assumes annotations are in annopath.format(imagename)
 93 |     # assumes imagesetfile is a text file with each line an image name
 94 |     # cachedir caches the annotations in a pickle file
 95 | 
 96 |     # first load gt
 97 |     if not os.path.isdir(cachedir):
 98 |         os.mkdir(cachedir)
 99 |     cachefile = os.path.join(cachedir, 'annots.pkl')
100 |     # read list of images
101 |     with open(imagesetfile, 'r') as f:
102 |         lines = f.readlines()
103 |     imagenames = [x.strip() for x in lines]
104 | 
105 |     if not os.path.isfile(cachefile):
106 |         # load annots
107 |         recs = {}
108 |         for i, imagename in enumerate(imagenames):
109 |             recs[imagename] = parse_rec(annopath.format(imagename))
110 |             if i % 100 == 0:
111 |                 print 'Reading annotation for {:d}/{:d}'.format(
112 |                     i + 1, len(imagenames))
113 |         # save
114 |         print 'Saving cached annotations to {:s}'.format(cachefile)
115 |         with open(cachefile, 'w') as f:
116 |             cPickle.dump(recs, f)
117 |     else:
118 |         # load
119 |         with open(cachefile, 'r') as f:
120 |             recs = cPickle.load(f)
121 | 
122 |     # extract gt objects for this class
123 |     class_recs = {}
124 |     npos = 0
125 |     for imagename in imagenames:
126 |         R = [obj for obj in recs[imagename] if obj['name'] == classname]
127 |         bbox = np.array([x['bbox'] for x in R])
128 |         difficult = np.array([x['difficult'] for x in R]).astype(np.bool)
129 |         det = [False] * len(R)
130 |         npos = npos + sum(~difficult)
131 |         class_recs[imagename] = {'bbox': bbox,
132 |                                  'difficult': difficult,
133 |                                  'det': det}
134 | 
135 |     # read dets
136 |     detfile = detpath.format(classname)
137 |     with open(detfile, 'r') as f:
138 |         lines = f.readlines()
139 | 
140 |     splitlines = [x.strip().split(' ') for x in lines]
141 |     image_ids = [x[0] for x in splitlines]
142 |     confidence = np.array([float(x[1]) for x in splitlines])
143 |     BB = np.array([[float(z) for z in x[2:]] for x in splitlines])
144 |     print BB
145 | 
146 |     # sort by confidence
147 |     sorted_ind = np.argsort(-confidence)
148 |     sorted_scores = np.sort(-confidence)
149 |     BB = BB[sorted_ind, :]
150 |     image_ids = [image_ids[x] for x in sorted_ind]
151 | 
152 |     # go down dets and mark TPs and FPs
153 |     nd = len(image_ids)
154 |     tp = np.zeros(nd)
155 |     fp = np.zeros(nd)
156 |     for d in range(nd):
157 |         R = class_recs[image_ids[d]]
158 |         bb = BB[d, :].astype(float)
159 |         ovmax = -np.inf
160 |         BBGT = R['bbox'].astype(float)
161 | 
162 |         if BBGT.size > 0:
163 |             # compute overlaps
164 |             # intersection
165 |             ixmin = np.maximum(BBGT[:, 0], bb[0])
166 |             iymin = np.maximum(BBGT[:, 1], bb[1])
167 |             ixmax = np.minimum(BBGT[:, 2], bb[2])
168 |             iymax = np.minimum(BBGT[:, 3], bb[3])
169 |             iw = np.maximum(ixmax - ixmin + 1., 0.)
170 |             ih = np.maximum(iymax - iymin + 1., 0.)
171 |             inters = iw * ih
172 | 
173 |             # union
174 |             uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) +
175 |                    (BBGT[:, 2] - BBGT[:, 0] + 1.) *
176 |                    (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters)
177 | 
178 |             overlaps = inters / uni
179 |             ovmax = np.max(overlaps)
180 |             jmax = np.argmax(overlaps)
181 | 
182 |         if ovmax > ovthresh:
183 |             if not R['difficult'][jmax]:
184 |                 if not R['det'][jmax]:
185 |                     tp[d] = 1.
186 |                     R['det'][jmax] = 1
187 |                 else:
188 |                     fp[d] = 1.
189 |         else:
190 |             fp[d] = 1.
191 | 
192 |     # compute precision recall
193 |     fp = np.cumsum(fp)
194 |     tp = np.cumsum(tp)
195 |     rec = tp / float(npos)
196 |     # avoid divide by zero in case the first detection matches a difficult
197 |     # ground truth
198 |     prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
199 |     ap = voc_ap(rec, prec, use_07_metric)
200 | 
201 |     return rec, prec, ap
202 | 


--------------------------------------------------------------------------------
/Seminar6/lib/fast_rcnn/__init__.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 | 


--------------------------------------------------------------------------------
/Seminar6/lib/fast_rcnn/bbox_transform.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import numpy as np
 9 | 
10 | def bbox_transform(ex_rois, gt_rois):
11 |     ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0
12 |     ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0
13 |     ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths
14 |     ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights
15 | 
16 |     gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0
17 |     gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0
18 |     gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths
19 |     gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights
20 | 
21 |     targets_dx = (gt_ctr_x - ex_ctr_x) / ex_widths
22 |     targets_dy = (gt_ctr_y - ex_ctr_y) / ex_heights
23 |     targets_dw = np.log(gt_widths / ex_widths)
24 |     targets_dh = np.log(gt_heights / ex_heights)
25 | 
26 |     targets = np.vstack(
27 |         (targets_dx, targets_dy, targets_dw, targets_dh)).transpose()
28 |     return targets
29 | 
30 | def bbox_transform_inv(boxes, deltas):
31 |     if boxes.shape[0] == 0:
32 |         return np.zeros((0, deltas.shape[1]), dtype=deltas.dtype)
33 | 
34 |     boxes = boxes.astype(deltas.dtype, copy=False)
35 | 
36 |     widths = boxes[:, 2] - boxes[:, 0] + 1.0
37 |     heights = boxes[:, 3] - boxes[:, 1] + 1.0
38 |     ctr_x = boxes[:, 0] + 0.5 * widths
39 |     ctr_y = boxes[:, 1] + 0.5 * heights
40 | 
41 |     dx = deltas[:, 0::4]
42 |     dy = deltas[:, 1::4]
43 |     dw = deltas[:, 2::4]
44 |     dh = deltas[:, 3::4]
45 | 
46 |     pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis]
47 |     pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis]
48 |     pred_w = np.exp(dw) * widths[:, np.newaxis]
49 |     pred_h = np.exp(dh) * heights[:, np.newaxis]
50 | 
51 |     pred_boxes = np.zeros(deltas.shape, dtype=deltas.dtype)
52 |     # x1
53 |     pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w
54 |     # y1
55 |     pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h
56 |     # x2
57 |     pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w
58 |     # y2
59 |     pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h
60 | 
61 |     return pred_boxes
62 | 
63 | def clip_boxes(boxes, im_shape):
64 |     """
65 |     Clip boxes to image boundaries.
66 |     """
67 | 
68 |     # x1 >= 0
69 |     boxes[:, 0::4] = np.maximum(np.minimum(boxes[:, 0::4], im_shape[1] - 1), 0)
70 |     # y1 >= 0
71 |     boxes[:, 1::4] = np.maximum(np.minimum(boxes[:, 1::4], im_shape[0] - 1), 0)
72 |     # x2 < im_shape[1]
73 |     boxes[:, 2::4] = np.maximum(np.minimum(boxes[:, 2::4], im_shape[1] - 1), 0)
74 |     # y2 < im_shape[0]
75 |     boxes[:, 3::4] = np.maximum(np.minimum(boxes[:, 3::4], im_shape[0] - 1), 0)
76 |     return boxes
77 | 


--------------------------------------------------------------------------------
/Seminar6/lib/fast_rcnn/config.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick
  6 | # --------------------------------------------------------
  7 | 
  8 | """Fast R-CNN config system.
  9 | 
 10 | This file specifies default config options for Fast R-CNN. You should not
 11 | change values in this file. Instead, you should write a config file (in yaml)
 12 | and use cfg_from_file(yaml_file) to load it and override the default options.
 13 | 
 14 | Most tools in $ROOT/tools take a --cfg option to specify an override file.
 15 |     - See tools/{train,test}_net.py for example code that uses cfg_from_file()
 16 |     - See experiments/cfgs/*.yml for example YAML config override files
 17 | """
 18 | 
 19 | import os
 20 | import os.path as osp
 21 | import numpy as np
 22 | # `pip install easydict` if you don't have it
 23 | from easydict import EasyDict as edict
 24 | 
 25 | __C = edict()
 26 | # Consumers can get config by:
 27 | #   from fast_rcnn_config import cfg
 28 | cfg = __C
 29 | 
 30 | #
 31 | # Training options
 32 | #
 33 | 
 34 | __C.TRAIN = edict()
 35 | 
 36 | # Scales to use during training (can list multiple scales)
 37 | # Each scale is the pixel size of an image's shortest side
 38 | __C.TRAIN.SCALES = (600,)
 39 | 
 40 | # Max pixel size of the longest side of a scaled input image
 41 | __C.TRAIN.MAX_SIZE = 1000
 42 | 
 43 | # Images to use per minibatch
 44 | __C.TRAIN.IMS_PER_BATCH = 2
 45 | 
 46 | # Minibatch size (number of regions of interest [ROIs])
 47 | __C.TRAIN.BATCH_SIZE = 128
 48 | 
 49 | # Fraction of minibatch that is labeled foreground (i.e. class > 0)
 50 | __C.TRAIN.FG_FRACTION = 0.25
 51 | 
 52 | # Overlap threshold for a ROI to be considered foreground (if >= FG_THRESH)
 53 | __C.TRAIN.FG_THRESH = 0.5
 54 | 
 55 | # Overlap threshold for a ROI to be considered background (class = 0 if
 56 | # overlap in [LO, HI))
 57 | __C.TRAIN.BG_THRESH_HI = 0.5
 58 | __C.TRAIN.BG_THRESH_LO = 0.1
 59 | 
 60 | # Use horizontally-flipped images during training?
 61 | __C.TRAIN.USE_FLIPPED = True
 62 | 
 63 | # Train bounding-box regressors
 64 | __C.TRAIN.BBOX_REG = True
 65 | 
 66 | # Overlap required between a ROI and ground-truth box in order for that ROI to
 67 | # be used as a bounding-box regression training example
 68 | __C.TRAIN.BBOX_THRESH = 0.5
 69 | 
 70 | # Iterations between snapshots
 71 | __C.TRAIN.SNAPSHOT_ITERS = 10000
 72 | 
 73 | # solver.prototxt specifies the snapshot path prefix, this adds an optional
 74 | # infix to yield the path: <prefix>[_<infix>]_iters_XYZ.caffemodel
 75 | __C.TRAIN.SNAPSHOT_INFIX = ''
 76 | 
 77 | # Normalize the targets (subtract empirical mean, divide by empirical stddev)
 78 | __C.TRAIN.BBOX_NORMALIZE_TARGETS = True
 79 | # Deprecated (inside weights)
 80 | __C.TRAIN.BBOX_INSIDE_WEIGHTS = (1.0, 1.0, 1.0, 1.0)
 81 | # Normalize the targets using "precomputed" (or made up) means and stdevs
 82 | # (BBOX_NORMALIZE_TARGETS must also be True)
 83 | __C.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED = False
 84 | __C.TRAIN.BBOX_NORMALIZE_MEANS = (0.0, 0.0, 0.0, 0.0)
 85 | __C.TRAIN.BBOX_NORMALIZE_STDS = (0.1, 0.1, 0.2, 0.2)
 86 | 
 87 | # Train using these proposals
 88 | __C.TRAIN.PROPOSAL_METHOD = 'selective_search'
 89 | 
 90 | # Make minibatches from images that have similar aspect ratios (i.e. both
 91 | # tall and thin or both short and wide) in order to avoid wasting computation
 92 | # on zero-padding.
 93 | __C.TRAIN.ASPECT_GROUPING = True
 94 | 
 95 | #
 96 | # Testing options
 97 | #
 98 | 
 99 | __C.TEST = edict()
100 | 
101 | # Scales to use during testing (can list multiple scales)
102 | # Each scale is the pixel size of an image's shortest side
103 | __C.TEST.SCALES = (600,)
104 | 
105 | # Max pixel size of the longest side of a scaled input image
106 | __C.TEST.MAX_SIZE = 1000
107 | 
108 | # Overlap threshold used for non-maximum suppression (suppress boxes with
109 | # IoU >= this threshold)
110 | __C.TEST.NMS = 0.3
111 | 
112 | # Experimental: treat the (K+1) units in the cls_score layer as linear
113 | # predictors (trained, eg, with one-vs-rest SVMs).
114 | __C.TEST.SVM = False
115 | 
116 | # Test using bounding-box regressors
117 | __C.TEST.BBOX_REG = True
118 | 
119 | # Test using these proposals
120 | __C.TEST.PROPOSAL_METHOD = 'selective_search'
121 | 
122 | #
123 | # MISC
124 | #
125 | 
126 | # Number of classes in the dataset.
127 | # For Pascal VOC 2007 it is 20 + 1 (background class).
128 | __C.NUM_CLASSES = 21
129 | 
130 | # The mapping from image coordinates to feature map coordinates might cause
131 | # some boxes that are distinct in image space to become identical in feature
132 | # coordinates. If DEDUP_BOXES > 0, then DEDUP_BOXES is used as the scale factor
133 | # for identifying duplicate boxes.
134 | # 1/16 is correct for {Alex,Caffe}Net, VGG_CNN_M_1024, and VGG16
135 | __C.DEDUP_BOXES = 1./16.
136 | 
137 | # Pixel mean values (BGR order) as a (1, 1, 3) array
138 | # We use the same pixel mean for all networks even though it's not exactly what
139 | # they were trained with
140 | __C.PIXEL_MEANS = np.array([[[102.9801, 115.9465, 122.7717]]])
141 | 
142 | # For reproducibility
143 | __C.RNG_SEED = 3
144 | 
145 | # A small number that's used many times
146 | __C.EPS = 1e-14
147 | 
148 | # Root directory of project
149 | __C.ROOT_DIR = osp.abspath(osp.join(osp.dirname(__file__), '..', '..'))
150 | 
151 | # Data directory
152 | __C.DATA_DIR = osp.abspath(osp.join(__C.ROOT_DIR, 'data'))
153 | 
154 | # Model directory
155 | __C.MODELS_DIR = osp.abspath(osp.join(__C.ROOT_DIR, 'models', 'pascal_voc'))
156 | 
157 | # Place outputs under an experiments directory
158 | __C.EXP_DIR = 'default'
159 | 
160 | # Use GPU implementation of non-maximum suppression
161 | __C.USE_GPU_NMS = False
162 | 
163 | # Default GPU device id
164 | __C.GPU_ID = 0
165 | 
166 | 
167 | def get_output_dir(imdb, net=None):
168 |     """Return the directory where experimental artifacts are placed.
169 |     If the directory does not exist, it is created.
170 | 
171 |     A canonical path is built using the name from an imdb and a network
172 |     (if not None).
173 |     """
174 |     outdir = osp.abspath(osp.join(__C.ROOT_DIR, 'output', __C.EXP_DIR, imdb.name))
175 |     if net is not None:
176 |         outdir = osp.join(outdir, net.name)
177 |     if not os.path.exists(outdir):
178 |         os.makedirs(outdir)
179 |     return outdir
180 | 
181 | def _merge_a_into_b(a, b):
182 |     """Merge config dictionary a into config dictionary b, clobbering the
183 |     options in b whenever they are also specified in a.
184 |     """
185 |     if type(a) is not edict:
186 |         return
187 | 
188 |     for k, v in a.iteritems():
189 |         # a must specify keys that are in b
190 |         if not b.has_key(k):
191 |             raise KeyError('{} is not a valid config key'.format(k))
192 | 
193 |         # the types must match, too
194 |         old_type = type(b[k])
195 |         if old_type is not type(v):
196 |             if isinstance(b[k], np.ndarray):
197 |                 v = np.array(v, dtype=b[k].dtype)
198 |             else:
199 |                 raise ValueError(('Type mismatch ({} vs. {}) '
200 |                                 'for config key: {}').format(type(b[k]),
201 |                                                             type(v), k))
202 | 
203 |         # recursively merge dicts
204 |         if type(v) is edict:
205 |             try:
206 |                 _merge_a_into_b(a[k], b[k])
207 |             except:
208 |                 print('Error under config key: {}'.format(k))
209 |                 raise
210 |         else:
211 |             b[k] = v
212 | 
213 | def cfg_from_file(filename):
214 |     """Load a config file and merge it into the default options."""
215 |     import yaml
216 |     with open(filename, 'r') as f:
217 |         yaml_cfg = edict(yaml.load(f))
218 | 
219 |     _merge_a_into_b(yaml_cfg, __C)
220 | 
221 | def cfg_from_list(cfg_list):
222 |     """Set config keys via list (e.g., from command line)."""
223 |     from ast import literal_eval
224 |     assert len(cfg_list) % 2 == 0
225 |     for k, v in zip(cfg_list[0::2], cfg_list[1::2]):
226 |         key_list = k.split('.')
227 |         d = __C
228 |         for subkey in key_list[:-1]:
229 |             assert d.has_key(subkey)
230 |             d = d[subkey]
231 |         subkey = key_list[-1]
232 |         assert d.has_key(subkey)
233 |         try:
234 |             value = literal_eval(v)
235 |         except:
236 |             # handle the case when v is a string literal
237 |             value = v
238 |         assert type(value) == type(d[subkey]), \
239 |             'type {} does not match original type {}'.format(
240 |             type(value), type(d[subkey]))
241 |         d[subkey] = value
242 | 


--------------------------------------------------------------------------------
/Seminar6/lib/fast_rcnn/nms_wrapper.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | from fast_rcnn.config import cfg
 9 | from nms.cpu_nms import cpu_nms
10 | 
11 | def nms(dets, thresh, force_cpu=False):
12 |     """Dispatch to either CPU or GPU NMS implementations."""
13 | 
14 |     if dets.shape[0] == 0:
15 |         return []
16 |     
17 |     return cpu_nms(dets, thresh)
18 | 


--------------------------------------------------------------------------------
/Seminar6/lib/fast_rcnn/test.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick
  6 | # --------------------------------------------------------
  7 | 
  8 | """Test a Fast R-CNN network on an imdb (image database)."""
  9 | 
 10 | from fast_rcnn.config import cfg, get_output_dir
 11 | from fast_rcnn.bbox_transform import clip_boxes, bbox_transform_inv
 12 | import argparse
 13 | from utils.timer import Timer
 14 | import numpy as np
 15 | import cv2
 16 | from fast_rcnn.nms_wrapper import nms
 17 | import cPickle
 18 | from utils.blob import im_list_to_blob
 19 | import os
 20 | 
 21 | def _get_image_blob(im):
 22 |     """Converts an image into a network input.
 23 | 
 24 |     Arguments:
 25 |         im (ndarray): a color image in BGR order
 26 | 
 27 |     Returns:
 28 |         blob (ndarray): a data blob holding an image pyramid
 29 |         im_scale_factors (list): list of image scales (relative to im) used
 30 |             in the image pyramid
 31 |     """
 32 |     im_orig = im.astype(np.float32, copy=True)
 33 |     im_orig -= cfg.PIXEL_MEANS
 34 | 
 35 |     im_shape = im_orig.shape
 36 |     im_size_min = np.min(im_shape[0:2])
 37 |     im_size_max = np.max(im_shape[0:2])
 38 | 
 39 |     processed_ims = []
 40 |     im_scale_factors = []
 41 | 
 42 |     for target_size in cfg.TEST.SCALES:
 43 |         im_scale = float(target_size) / float(im_size_min)
 44 |         # Prevent the biggest axis from being more than MAX_SIZE
 45 |         if np.round(im_scale * im_size_max) > cfg.TEST.MAX_SIZE:
 46 |             im_scale = float(cfg.TEST.MAX_SIZE) / float(im_size_max)
 47 |         im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale,
 48 |                         interpolation=cv2.INTER_LINEAR)
 49 |         im_scale_factors.append(im_scale)
 50 |         processed_ims.append(im)
 51 | 
 52 |     # Create a blob to hold the input images
 53 |     blob = im_list_to_blob(processed_ims)
 54 | 
 55 |     return blob, np.array(im_scale_factors)
 56 | 
 57 | def _get_rois_blob(im_rois, im_scale_factors):
 58 |     """Converts RoIs into network inputs.
 59 | 
 60 |     Arguments:
 61 |         im_rois (ndarray): R x 4 matrix of RoIs in original image coordinates
 62 |         im_scale_factors (list): scale factors as returned by _get_image_blob
 63 | 
 64 |     Returns:
 65 |         blob (ndarray): R x 5 matrix of RoIs in the image pyramid
 66 |     """
 67 |     rois, levels = _project_im_rois(im_rois, im_scale_factors)
 68 |     rois_blob = np.hstack((levels, rois))
 69 |     return rois_blob.astype(np.float32, copy=False)
 70 | 
 71 | def _project_im_rois(im_rois, scales):
 72 |     """Project image RoIs into the image pyramid built by _get_image_blob.
 73 | 
 74 |     Arguments:
 75 |         im_rois (ndarray): R x 4 matrix of RoIs in original image coordinates
 76 |         scales (list): scale factors as returned by _get_image_blob
 77 | 
 78 |     Returns:
 79 |         rois (ndarray): R x 4 matrix of projected RoI coordinates
 80 |         levels (list): image pyramid levels used by each projected RoI
 81 |     """
 82 |     im_rois = im_rois.astype(np.float, copy=False)
 83 | 
 84 |     if len(scales) > 1:
 85 |         widths = im_rois[:, 2] - im_rois[:, 0] + 1
 86 |         heights = im_rois[:, 3] - im_rois[:, 1] + 1
 87 | 
 88 |         areas = widths * heights
 89 |         scaled_areas = areas[:, np.newaxis] * (scales[np.newaxis, :] ** 2)
 90 |         diff_areas = np.abs(scaled_areas - 224 * 224)
 91 |         levels = diff_areas.argmin(axis=1)[:, np.newaxis]
 92 |     else:
 93 |         levels = np.zeros((im_rois.shape[0], 1), dtype=np.int)
 94 | 
 95 |     rois = im_rois * scales[levels]
 96 | 
 97 |     return rois, levels
 98 | 
 99 | def _get_blobs(im, rois):
100 |     """Convert an image and RoIs within that image into network inputs."""
101 |     blobs = {'data' : None, 'rois' : None}
102 |     blobs['data'], im_scale_factors = _get_image_blob(im)
103 |     blobs['rois'] = _get_rois_blob(rois, im_scale_factors)
104 |     return blobs, im_scale_factors
105 | 
106 | def im_detect(net, im, boxes):
107 |     """Detect object classes in an image given object proposals.
108 | 
109 |     Arguments:
110 |         net (caffe.Net): Fast R-CNN network to use
111 |         im (ndarray): color image to test (in BGR order)
112 |         boxes (ndarray): R x 4 array of object proposals
113 | 
114 |     Returns:
115 |         scores (ndarray): R x K array of object class scores (K includes
116 |             background as object category 0)
117 |         boxes (ndarray): R x (4*K) array of predicted bounding boxes
118 |     """
119 |     blobs, im_scales = _get_blobs(im, boxes)
120 | 
121 |     # When mapping from image ROIs to feature map ROIs, there's some aliasing
122 |     # (some distinct image ROIs get mapped to the same feature ROI).
123 |     # Here, we identify duplicate feature ROIs, so we only compute features
124 |     # on the unique subset.
125 |     if cfg.DEDUP_BOXES > 0:
126 |         v = np.array([1, 1e3, 1e6, 1e9, 1e12])
127 |         hashes = np.round(blobs['rois'] * cfg.DEDUP_BOXES).dot(v)
128 |         _, index, inv_index = np.unique(hashes, return_index=True,
129 |                                         return_inverse=True)
130 |         blobs['rois'] = blobs['rois'][index, :]
131 |         boxes = boxes[index, :]
132 | 
133 |     # do forward
134 |     forward_kwargs = {'data': blobs['data'].astype(np.float32, copy=False)}
135 |     forward_kwargs['rois'] = blobs['rois'].astype(np.float32, copy=False)
136 |     blobs_out = net.forward(**forward_kwargs)
137 | 
138 |     # use softmax estimated probabilities
139 |     scores = blobs_out['cls_prob']
140 | 
141 |     if cfg.TEST.BBOX_REG:
142 |         # Apply bounding-box regression deltas
143 |         box_deltas = blobs_out['bbox_pred']
144 |         pred_boxes = bbox_transform_inv(boxes, box_deltas)
145 |         pred_boxes = clip_boxes(pred_boxes, im.shape)
146 |     else:
147 |         # Simply repeat the boxes, once for each class
148 |         pred_boxes = np.tile(boxes, (1, scores.shape[1]))
149 | 
150 |     if cfg.DEDUP_BOXES > 0:
151 |         # Map scores and predictions back to the original set of boxes
152 |         scores = scores[inv_index, :]
153 |         pred_boxes = pred_boxes[inv_index, :]
154 | 
155 |     return scores, pred_boxes
156 | 
157 | def vis_detections(im, class_name, dets, thresh=0.3):
158 |     """Visual debugging of detections."""
159 |     import matplotlib.pyplot as plt
160 |     im = im[:, :, (2, 1, 0)]
161 |     for i in xrange(np.minimum(10, dets.shape[0])):
162 |         bbox = dets[i, :4]
163 |         score = dets[i, -1]
164 |         if score > thresh:
165 |             plt.cla()
166 |             plt.imshow(im)
167 |             plt.gca().add_patch(
168 |                 plt.Rectangle((bbox[0], bbox[1]),
169 |                               bbox[2] - bbox[0],
170 |                               bbox[3] - bbox[1], fill=False,
171 |                               edgecolor='g', linewidth=3)
172 |                 )
173 |             plt.title('{}  {:.3f}'.format(class_name, score))
174 |             plt.show()
175 | 
176 | def apply_nms(all_boxes, thresh):
177 |     """Apply non-maximum suppression to all predicted boxes output by the
178 |     test_net method.
179 |     """
180 |     num_classes = len(all_boxes)
181 |     num_images = len(all_boxes[0])
182 |     nms_boxes = [[[] for _ in xrange(num_images)]
183 |                  for _ in xrange(num_classes)]
184 |     for cls_ind in xrange(num_classes):
185 |         for im_ind in xrange(num_images):
186 |             dets = all_boxes[cls_ind][im_ind]
187 |             if dets == []:
188 |                 continue
189 |             # CPU NMS is much faster than GPU NMS when the number of boxes
190 |             # is relative small (e.g., < 10k)
191 |             # TODO(rbg): autotune NMS dispatch
192 |             keep = nms(dets, thresh, force_cpu=True)
193 |             if len(keep) == 0:
194 |                 continue
195 |             nms_boxes[cls_ind][im_ind] = dets[keep, :].copy()
196 |     return nms_boxes
197 | 
198 | def test_net(net, imdb, max_per_image=100, thresh=0.05, vis=False):
199 |     """Test a Fast R-CNN network on an image database."""
200 |     num_images = len(imdb.image_index)
201 |     # all detections are collected into:
202 |     #    all_boxes[cls][image] = N x 5 array of detections in
203 |     #    (x1, y1, x2, y2, score)
204 |     all_boxes = [[[] for _ in xrange(num_images)]
205 |                  for _ in xrange(imdb.num_classes)]
206 | 
207 |     output_dir = get_output_dir(imdb, net)
208 | 
209 |     # timers
210 |     _t = {'im_detect' : Timer(), 'misc' : Timer()}
211 | 
212 |     roidb = imdb.roidb
213 | 
214 |     for i in xrange(num_images):
215 |         # filter out any ground truth boxes
216 | 
217 |         # The roidb may contain ground-truth rois (for example, if the roidb
218 |         # comes from the training or val split). We only want to evaluate
219 |         # detection on the *non*-ground-truth rois. We select those the rois
220 |         # that have the gt_classes field set to 0, which means there's no
221 |         # ground truth.
222 |         box_proposals = roidb[i]['boxes'][roidb[i]['gt_classes'] == 0]
223 | 
224 |         im = cv2.imread(imdb.image_path_at(i))
225 |         _t['im_detect'].tic()
226 |         scores, boxes = im_detect(net, im, box_proposals)
227 |         _t['im_detect'].toc()
228 | 
229 |         _t['misc'].tic()
230 |         # skip j = 0, because it's the background class
231 |         for j in xrange(1, imdb.num_classes):
232 |             inds = np.where(scores[:, j] > thresh)[0]
233 |             cls_scores = scores[inds, j]
234 |             cls_boxes = boxes[inds, j*4:(j+1)*4]
235 |             cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \
236 |                 .astype(np.float32, copy=False)
237 |             keep = nms(cls_dets, cfg.TEST.NMS)
238 |             cls_dets = cls_dets[keep, :]
239 |             if vis:
240 |                 vis_detections(im, imdb.classes[j], cls_dets)
241 |             all_boxes[j][i] = cls_dets
242 | 
243 |         # Limit to max_per_image detections *over all classes*
244 |         if max_per_image > 0:
245 |             image_scores = np.hstack([all_boxes[j][i][:, -1]
246 |                                       for j in xrange(1, imdb.num_classes)])
247 |             if len(image_scores) > max_per_image:
248 |                 image_thresh = np.sort(image_scores)[-max_per_image]
249 |                 for j in xrange(1, imdb.num_classes):
250 |                     keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
251 |                     all_boxes[j][i] = all_boxes[j][i][keep, :]
252 |         _t['misc'].toc()
253 | 
254 |         print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \
255 |               .format(i + 1, num_images, _t['im_detect'].average_time,
256 |                       _t['misc'].average_time)
257 | 
258 |     det_file = os.path.join(output_dir, 'detections.pkl')
259 |     with open(det_file, 'wb') as f:
260 |         cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL)
261 | 
262 |     print 'Evaluating detections'
263 |     imdb.evaluate_detections(all_boxes, output_dir)
264 | 


--------------------------------------------------------------------------------
/Seminar6/lib/fast_rcnn/train.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick
  6 | # --------------------------------------------------------
  7 | 
  8 | """Train a Fast R-CNN network."""
  9 | 
 10 | from fast_rcnn.config import cfg
 11 | import roi_data_layer.roidb as rdl_roidb
 12 | from utils.timer import Timer
 13 | import numpy as np
 14 | import os
 15 | 
 16 | from custom.solver import Solver
 17 | 
 18 | class SolverWrapper(object):
 19 |     """A simple wrapper around Caffe's solver.
 20 |     This wrapper gives us control over he snapshotting process, which we
 21 |     use to unnormalize the learned bounding-box regression weights.
 22 |     """
 23 | 
 24 |     def __init__(self, roidb, output_dir):
 25 |         """Initialize the SolverWrapper."""
 26 |         self.output_dir = output_dir
 27 | 
 28 |         if cfg.TRAIN.BBOX_REG:
 29 |             print 'Computing bounding-box regression targets...'
 30 |             self.bbox_means, self.bbox_stds = \
 31 |                     rdl_roidb.add_bbox_regression_targets(roidb)
 32 |             print 'done'
 33 | 
 34 |         ################ You MIGHT want to instantiate your custom solver here.
 35 |         # Don't forget to supply roidb to the ROIPoolingLayer!
 36 |         # You should have the following line:
 37 |         # self.solver = Solver()
 38 | 
 39 |     def snapshot(self):
 40 |         """Saves the state the solver state."""
 41 | 
 42 |         infix = ('_' + cfg.TRAIN.SNAPSHOT_INFIX
 43 |                  if cfg.TRAIN.SNAPSHOT_INFIX != '' else '')
 44 |         filename = (self.solver.snapshot_prefix + infix +
 45 |                     '_iter_{:d}'.format(self.solver.iter) + '.pkl')
 46 |         filename = os.path.join(self.output_dir, filename)
 47 | 
 48 |         self.solver.save(str(filename))
 49 |         print 'Wrote snapshot to: {:s}'.format(filename)
 50 | 
 51 |         return filename
 52 | 
 53 |     def train_model(self, max_iters):
 54 |         """Network training loop."""
 55 |         last_snapshot_iter = -1
 56 |         timer = Timer()
 57 |         model_paths = []
 58 |         while self.solver.iter < max_iters:
 59 |             # Make one SGD update
 60 |             timer.tic()
 61 | 
 62 |             self.solver.step()
 63 |             
 64 |             timer.toc()
 65 |             if self.solver.iter % (10 * self.solver.display_freq) == 0:
 66 |                 print 'speed: {:.3f}s / iter'.format(timer.average_time)
 67 | 
 68 |             if self.solver.iter % cfg.TRAIN.SNAPSHOT_ITERS == 0:
 69 |                 last_snapshot_iter = self.solver.iter
 70 |                 model_paths.append(self.snapshot())
 71 | 
 72 |         if last_snapshot_iter != self.solver.iter:
 73 |             model_paths.append(self.snapshot())
 74 |         return model_paths
 75 | 
 76 | def get_training_roidb(imdb):
 77 |     """Returns a roidb (Region of Interest database) for use in training."""
 78 |     if cfg.TRAIN.USE_FLIPPED:
 79 |         print 'Appending horizontally-flipped training examples...'
 80 |         imdb.append_flipped_images()
 81 |         print 'done'
 82 | 
 83 |     print 'Preparing training data...'
 84 |     rdl_roidb.prepare_roidb(imdb)
 85 |     print 'done'
 86 | 
 87 |     return imdb.roidb
 88 | 
 89 | def filter_roidb(roidb):
 90 |     """Remove roidb entries that have no usable RoIs."""
 91 | 
 92 |     def is_valid(entry):
 93 |         # Valid images have:
 94 |         #   (1) At least one foreground RoI OR
 95 |         #   (2) At least one background RoI
 96 |         overlaps = entry['max_overlaps']
 97 |         # find boxes with sufficient overlap
 98 |         fg_inds = np.where(overlaps >= cfg.TRAIN.FG_THRESH)[0]
 99 |         # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
100 |         bg_inds = np.where((overlaps < cfg.TRAIN.BG_THRESH_HI) &
101 |                            (overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]
102 |         # image is only valid if such boxes exist
103 |         valid = len(fg_inds) > 0 or len(bg_inds) > 0
104 |         return valid
105 | 
106 |     num = len(roidb)
107 |     filtered_roidb = [entry for entry in roidb if is_valid(entry)]
108 |     num_after = len(filtered_roidb)
109 |     print 'Filtered {} roidb entries: {} -> {}'.format(num - num_after,
110 |                                                        num, num_after)
111 |     return filtered_roidb
112 | 
113 | def train_net(roidb, output_dir, max_iters=40000):
114 |     """Train a Fast R-CNN network."""
115 | 
116 |     roidb = filter_roidb(roidb)
117 |     sw = SolverWrapper(roidb, output_dir)
118 | 
119 |     print 'Solving...'
120 |     model_paths = sw.train_model(max_iters)
121 |     print 'done solving'
122 |     return model_paths
123 | 


--------------------------------------------------------------------------------
/Seminar6/lib/nms/.gitignore:
--------------------------------------------------------------------------------
1 | *.c
2 | *.cpp
3 | *.so
4 | 


--------------------------------------------------------------------------------
/Seminar6/lib/nms/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ddtm/dl-course/9b04d2dda741c0786a9de40a7dfce89d06d0487e/Seminar6/lib/nms/__init__.py


--------------------------------------------------------------------------------
/Seminar6/lib/nms/cpu_nms.pyx:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import numpy as np
 9 | cimport numpy as np
10 | 
11 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b):
12 |     return a if a >= b else b
13 | 
14 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b):
15 |     return a if a <= b else b
16 | 
17 | def cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh):
18 |     cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0]
19 |     cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1]
20 |     cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2]
21 |     cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3]
22 |     cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4]
23 | 
24 |     cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1)
25 |     cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1]
26 | 
27 |     cdef int ndets = dets.shape[0]
28 |     cdef np.ndarray[np.int_t, ndim=1] suppressed = \
29 |             np.zeros((ndets), dtype=np.int)
30 | 
31 |     # nominal indices
32 |     cdef int _i, _j
33 |     # sorted indices
34 |     cdef int i, j
35 |     # temp variables for box i's (the box currently under consideration)
36 |     cdef np.float32_t ix1, iy1, ix2, iy2, iarea
37 |     # variables for computing overlap with box j (lower scoring box)
38 |     cdef np.float32_t xx1, yy1, xx2, yy2
39 |     cdef np.float32_t w, h
40 |     cdef np.float32_t inter, ovr
41 | 
42 |     keep = []
43 |     for _i in range(ndets):
44 |         i = order[_i]
45 |         if suppressed[i] == 1:
46 |             continue
47 |         keep.append(i)
48 |         ix1 = x1[i]
49 |         iy1 = y1[i]
50 |         ix2 = x2[i]
51 |         iy2 = y2[i]
52 |         iarea = areas[i]
53 |         for _j in range(_i + 1, ndets):
54 |             j = order[_j]
55 |             if suppressed[j] == 1:
56 |                 continue
57 |             xx1 = max(ix1, x1[j])
58 |             yy1 = max(iy1, y1[j])
59 |             xx2 = min(ix2, x2[j])
60 |             yy2 = min(iy2, y2[j])
61 |             w = max(0.0, xx2 - xx1 + 1)
62 |             h = max(0.0, yy2 - yy1 + 1)
63 |             inter = w * h
64 |             ovr = inter / (iarea + areas[j] - inter)
65 |             if ovr >= thresh:
66 |                 suppressed[j] = 1
67 | 
68 |     return keep
69 | 


--------------------------------------------------------------------------------
/Seminar6/lib/nms/gpu_nms.hpp:
--------------------------------------------------------------------------------
1 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
2 |           int boxes_dim, float nms_overlap_thresh, int device_id);
3 | 


--------------------------------------------------------------------------------
/Seminar6/lib/nms/gpu_nms.pyx:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Faster R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import numpy as np
 9 | cimport numpy as np
10 | 
11 | assert sizeof(int) == sizeof(np.int32_t)
12 | 
13 | cdef extern from "gpu_nms.hpp":
14 |     void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int)
15 | 
16 | def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh,
17 |             np.int32_t device_id=0):
18 |     cdef int boxes_num = dets.shape[0]
19 |     cdef int boxes_dim = dets.shape[1]
20 |     cdef int num_out
21 |     cdef np.ndarray[np.int32_t, ndim=1] \
22 |         keep = np.zeros(boxes_num, dtype=np.int32)
23 |     cdef np.ndarray[np.float32_t, ndim=1] \
24 |         scores = dets[:, 4]
25 |     cdef np.ndarray[np.int_t, ndim=1] \
26 |         order = scores.argsort()[::-1]
27 |     cdef np.ndarray[np.float32_t, ndim=2] \
28 |         sorted_dets = dets[order, :]
29 |     _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id)
30 |     keep = keep[:num_out]
31 |     return list(order[keep])
32 | 


--------------------------------------------------------------------------------
/Seminar6/lib/nms/nms_kernel.cu:
--------------------------------------------------------------------------------
  1 | // ------------------------------------------------------------------
  2 | // Faster R-CNN
  3 | // Copyright (c) 2015 Microsoft
  4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details]
  5 | // Written by Shaoqing Ren
  6 | // ------------------------------------------------------------------
  7 | 
  8 | #include "gpu_nms.hpp"
  9 | #include <vector>
 10 | #include <iostream>
 11 | 
 12 | #define CUDA_CHECK(condition) \
 13 |   /* Code block avoids redefinition of cudaError_t error */ \
 14 |   do { \
 15 |     cudaError_t error = condition; \
 16 |     if (error != cudaSuccess) { \
 17 |       std::cout << cudaGetErrorString(error) << std::endl; \
 18 |     } \
 19 |   } while (0)
 20 | 
 21 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
 22 | int const threadsPerBlock = sizeof(unsigned long long) * 8;
 23 | 
 24 | __device__ inline float devIoU(float const * const a, float const * const b) {
 25 |   float left = max(a[0], b[0]), right = min(a[2], b[2]);
 26 |   float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
 27 |   float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);
 28 |   float interS = width * height;
 29 |   float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
 30 |   float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
 31 |   return interS / (Sa + Sb - interS);
 32 | }
 33 | 
 34 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,
 35 |                            const float *dev_boxes, unsigned long long *dev_mask) {
 36 |   const int row_start = blockIdx.y;
 37 |   const int col_start = blockIdx.x;
 38 | 
 39 |   // if (row_start > col_start) return;
 40 | 
 41 |   const int row_size =
 42 |         min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
 43 |   const int col_size =
 44 |         min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
 45 | 
 46 |   __shared__ float block_boxes[threadsPerBlock * 5];
 47 |   if (threadIdx.x < col_size) {
 48 |     block_boxes[threadIdx.x * 5 + 0] =
 49 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
 50 |     block_boxes[threadIdx.x * 5 + 1] =
 51 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
 52 |     block_boxes[threadIdx.x * 5 + 2] =
 53 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
 54 |     block_boxes[threadIdx.x * 5 + 3] =
 55 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
 56 |     block_boxes[threadIdx.x * 5 + 4] =
 57 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
 58 |   }
 59 |   __syncthreads();
 60 | 
 61 |   if (threadIdx.x < row_size) {
 62 |     const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
 63 |     const float *cur_box = dev_boxes + cur_box_idx * 5;
 64 |     int i = 0;
 65 |     unsigned long long t = 0;
 66 |     int start = 0;
 67 |     if (row_start == col_start) {
 68 |       start = threadIdx.x + 1;
 69 |     }
 70 |     for (i = start; i < col_size; i++) {
 71 |       if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
 72 |         t |= 1ULL << i;
 73 |       }
 74 |     }
 75 |     const int col_blocks = DIVUP(n_boxes, threadsPerBlock);
 76 |     dev_mask[cur_box_idx * col_blocks + col_start] = t;
 77 |   }
 78 | }
 79 | 
 80 | void _set_device(int device_id) {
 81 |   int current_device;
 82 |   CUDA_CHECK(cudaGetDevice(&current_device));
 83 |   if (current_device == device_id) {
 84 |     return;
 85 |   }
 86 |   // The call to cudaSetDevice must come before any calls to Get, which
 87 |   // may perform initialization using the GPU.
 88 |   CUDA_CHECK(cudaSetDevice(device_id));
 89 | }
 90 | 
 91 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
 92 |           int boxes_dim, float nms_overlap_thresh, int device_id) {
 93 |   _set_device(device_id);
 94 | 
 95 |   float* boxes_dev = NULL;
 96 |   unsigned long long* mask_dev = NULL;
 97 | 
 98 |   const int col_blocks = DIVUP(boxes_num, threadsPerBlock);
 99 | 
100 |   CUDA_CHECK(cudaMalloc(&boxes_dev,
101 |                         boxes_num * boxes_dim * sizeof(float)));
102 |   CUDA_CHECK(cudaMemcpy(boxes_dev,
103 |                         boxes_host,
104 |                         boxes_num * boxes_dim * sizeof(float),
105 |                         cudaMemcpyHostToDevice));
106 | 
107 |   CUDA_CHECK(cudaMalloc(&mask_dev,
108 |                         boxes_num * col_blocks * sizeof(unsigned long long)));
109 | 
110 |   dim3 blocks(DIVUP(boxes_num, threadsPerBlock),
111 |               DIVUP(boxes_num, threadsPerBlock));
112 |   dim3 threads(threadsPerBlock);
113 |   nms_kernel<<<blocks, threads>>>(boxes_num,
114 |                                   nms_overlap_thresh,
115 |                                   boxes_dev,
116 |                                   mask_dev);
117 | 
118 |   std::vector<unsigned long long> mask_host(boxes_num * col_blocks);
119 |   CUDA_CHECK(cudaMemcpy(&mask_host[0],
120 |                         mask_dev,
121 |                         sizeof(unsigned long long) * boxes_num * col_blocks,
122 |                         cudaMemcpyDeviceToHost));
123 | 
124 |   std::vector<unsigned long long> remv(col_blocks);
125 |   memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
126 | 
127 |   int num_to_keep = 0;
128 |   for (int i = 0; i < boxes_num; i++) {
129 |     int nblock = i / threadsPerBlock;
130 |     int inblock = i % threadsPerBlock;
131 | 
132 |     if (!(remv[nblock] & (1ULL << inblock))) {
133 |       keep_out[num_to_keep++] = i;
134 |       unsigned long long *p = &mask_host[0] + i * col_blocks;
135 |       for (int j = nblock; j < col_blocks; j++) {
136 |         remv[j] |= p[j];
137 |       }
138 |     }
139 |   }
140 |   *num_out = num_to_keep;
141 | 
142 |   CUDA_CHECK(cudaFree(boxes_dev));
143 |   CUDA_CHECK(cudaFree(mask_dev));
144 | }
145 | 


--------------------------------------------------------------------------------
/Seminar6/lib/nms/py_cpu_nms.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import numpy as np
 9 | 
10 | def py_cpu_nms(dets, thresh):
11 |     """Pure Python NMS baseline."""
12 |     x1 = dets[:, 0]
13 |     y1 = dets[:, 1]
14 |     x2 = dets[:, 2]
15 |     y2 = dets[:, 3]
16 |     scores = dets[:, 4]
17 | 
18 |     areas = (x2 - x1 + 1) * (y2 - y1 + 1)
19 |     order = scores.argsort()[::-1]
20 | 
21 |     keep = []
22 |     while order.size > 0:
23 |         i = order[0]
24 |         keep.append(i)
25 |         xx1 = np.maximum(x1[i], x1[order[1:]])
26 |         yy1 = np.maximum(y1[i], y1[order[1:]])
27 |         xx2 = np.minimum(x2[i], x2[order[1:]])
28 |         yy2 = np.minimum(y2[i], y2[order[1:]])
29 | 
30 |         w = np.maximum(0.0, xx2 - xx1 + 1)
31 |         h = np.maximum(0.0, yy2 - yy1 + 1)
32 |         inter = w * h
33 |         ovr = inter / (areas[i] + areas[order[1:]] - inter)
34 | 
35 |         inds = np.where(ovr <= thresh)[0]
36 |         order = order[inds + 1]
37 | 
38 |     return keep
39 | 


--------------------------------------------------------------------------------
/Seminar6/lib/roi_data_layer/__init__.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 | 


--------------------------------------------------------------------------------
/Seminar6/lib/roi_data_layer/layer.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick
  6 | # Adapted for Theano usage by Yaroslav Ganin
  7 | # --------------------------------------------------------
  8 | 
  9 | """The data layer used during training to train a Fast R-CNN network.
 10 | 
 11 | RoIDataLayer implements a Caffe Python layer.
 12 | """
 13 | 
 14 | from fast_rcnn.config import cfg
 15 | from roi_data_layer.minibatch import get_minibatch
 16 | import numpy as np
 17 | import yaml
 18 | 
 19 | class RoIDataLayer(object):
 20 |     """Fast R-CNN data layer used for training."""
 21 | 
 22 |     def __init__(self):
 23 |         self.top = []
 24 | 
 25 |     def _shuffle_roidb_inds(self):
 26 |         """Randomly permute the training roidb."""
 27 |         if cfg.TRAIN.ASPECT_GROUPING:
 28 |             widths = np.array([r['width'] for r in self._roidb])
 29 |             heights = np.array([r['height'] for r in self._roidb])
 30 |             horz = (widths >= heights)
 31 |             vert = np.logical_not(horz)
 32 |             horz_inds = np.where(horz)[0]
 33 |             vert_inds = np.where(vert)[0]
 34 |             inds = np.hstack((
 35 |                 np.random.permutation(horz_inds),
 36 |                 np.random.permutation(vert_inds)))
 37 |             inds = np.reshape(inds, (-1, 2))
 38 |             row_perm = np.random.permutation(np.arange(inds.shape[0]))
 39 |             inds = np.reshape(inds[row_perm, :], (-1,))
 40 |             self._perm = inds
 41 |         else:
 42 |             self._perm = np.random.permutation(np.arange(len(self._roidb)))
 43 |         self._cur = 0
 44 | 
 45 |     def _get_next_minibatch_inds(self):
 46 |         """Return the roidb indices for the next minibatch."""
 47 |         if self._cur + cfg.TRAIN.IMS_PER_BATCH >= len(self._roidb):
 48 |             self._shuffle_roidb_inds()
 49 | 
 50 |         db_inds = self._perm[self._cur:self._cur + cfg.TRAIN.IMS_PER_BATCH]
 51 |         self._cur += cfg.TRAIN.IMS_PER_BATCH
 52 |         return db_inds
 53 | 
 54 |     def _get_next_minibatch(self):
 55 |         """Return the blobs to be used for the next minibatch."""
 56 |         db_inds = self._get_next_minibatch_inds()
 57 |         minibatch_db = [self._roidb[i] for i in db_inds]
 58 |         return get_minibatch(minibatch_db, self._num_classes)
 59 | 
 60 |     def set_roidb(self, roidb):
 61 |         """Set the roidb to be used by this layer during training."""
 62 |         self._roidb = roidb
 63 |         self._shuffle_roidb_inds()
 64 | 
 65 |     def setup(self):
 66 |         """Setup the RoIDataLayer."""
 67 | 
 68 |         top = self.top
 69 | 
 70 |         self._num_classes = 21
 71 | 
 72 |         self._name_to_top_map = {}
 73 | 
 74 |         # data blob: holds a batch of N images, each with 3 channels
 75 |         idx = 0
 76 |         top.append(np.zeros((cfg.TRAIN.IMS_PER_BATCH, 3,
 77 |             max(cfg.TRAIN.SCALES), cfg.TRAIN.MAX_SIZE), dtype=np.single))
 78 |         self._name_to_top_map['data'] = idx
 79 |         idx += 1
 80 | 
 81 |         # rois blob: holds R regions of interest, each is a 5-tuple
 82 |         # (n, x1, y1, x2, y2) specifying an image batch index n and a
 83 |         # rectangle (x1, y1, x2, y2)
 84 |         top.append(np.zeros((1, 5), dtype=np.single))
 85 |         self._name_to_top_map['rois'] = idx
 86 |         idx += 1
 87 | 
 88 |         # labels blob: R categorical labels in [0, ..., K] for K foreground
 89 |         # classes plus background
 90 |         top.append(np.zeros((1,), dtype=np.single))
 91 |         self._name_to_top_map['labels'] = idx
 92 |         idx += 1
 93 | 
 94 |         if cfg.TRAIN.BBOX_REG:
 95 |             # bbox_targets blob: R bounding-box regression targets with 4
 96 |             # targets per class
 97 |             top.append(np.zeros((1, self._num_classes * 4), dtype=np.single))
 98 |             self._name_to_top_map['bbox_targets'] = idx
 99 |             idx += 1
100 | 
101 |             # bbox_inside_weights blob: At most 4 targets per roi are active;
102 |             # thisbinary vector sepcifies the subset of active targets
103 |             top.append(np.zeros((1, self._num_classes * 4), dtype=np.single))
104 |             self._name_to_top_map['bbox_inside_weights'] = idx
105 |             idx += 1
106 | 
107 |             top.append(np.zeros((1, self._num_classes * 4), dtype=np.single))
108 |             self._name_to_top_map['bbox_outside_weights'] = idx
109 |             idx += 1
110 | 
111 |         print 'RoiDataLayer: name_to_top:', self._name_to_top_map
112 |         assert len(top) == len(self._name_to_top_map)
113 | 
114 |     def forward(self):
115 |         """Get blobs and copy them into this layer's top blob vector."""
116 |         blobs = self._get_next_minibatch()
117 | 
118 |         top = self.top
119 | 
120 |         for blob_name, blob in blobs.iteritems():
121 |             top_ind = self._name_to_top_map[blob_name]
122 |             # Reshape net's input blobs
123 |             top[top_ind].resize(blob.shape)
124 |             # Copy data into net's input blobs
125 |             top[top_ind][...] = blob.astype(np.float32, copy=False)
126 | 


--------------------------------------------------------------------------------
/Seminar6/lib/roi_data_layer/minibatch.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick
  6 | # --------------------------------------------------------
  7 | 
  8 | """Compute minibatch blobs for training a Fast R-CNN network."""
  9 | 
 10 | import numpy as np
 11 | import numpy.random as npr
 12 | import cv2
 13 | from fast_rcnn.config import cfg
 14 | from utils.blob import prep_im_for_blob, im_list_to_blob
 15 | 
 16 | def get_minibatch(roidb, num_classes):
 17 |     """Given a roidb, construct a minibatch sampled from it."""
 18 |     num_images = len(roidb)
 19 |     # Sample random scales to use for each image in this batch
 20 |     random_scale_inds = npr.randint(0, high=len(cfg.TRAIN.SCALES),
 21 |                                     size=num_images)
 22 |     assert(cfg.TRAIN.BATCH_SIZE % num_images == 0), \
 23 |         'num_images ({}) must divide BATCH_SIZE ({})'. \
 24 |         format(num_images, cfg.TRAIN.BATCH_SIZE)
 25 |     rois_per_image = cfg.TRAIN.BATCH_SIZE / num_images
 26 |     fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image)
 27 | 
 28 |     # Get the input image blob, formatted for caffe
 29 |     im_blob, im_scales = _get_image_blob(roidb, random_scale_inds)
 30 | 
 31 |     blobs = {'data': im_blob}
 32 | 
 33 |     # Now, build the region of interest and label blobs
 34 |     rois_blob = np.zeros((0, 5), dtype=np.float32)
 35 |     labels_blob = np.zeros((0), dtype=np.float32)
 36 |     bbox_targets_blob = np.zeros((0, 4 * num_classes), dtype=np.float32)
 37 |     bbox_inside_blob = np.zeros(bbox_targets_blob.shape, dtype=np.float32)
 38 |     # all_overlaps = []
 39 |     for im_i in xrange(num_images):
 40 |         labels, overlaps, im_rois, bbox_targets, bbox_inside_weights \
 41 |             = _sample_rois(roidb[im_i], fg_rois_per_image, rois_per_image,
 42 |                            num_classes)
 43 | 
 44 |         # Add to RoIs blob
 45 |         rois = _project_im_rois(im_rois, im_scales[im_i])
 46 |         batch_ind = im_i * np.ones((rois.shape[0], 1))
 47 |         rois_blob_this_image = np.hstack((batch_ind, rois))
 48 |         rois_blob = np.vstack((rois_blob, rois_blob_this_image))
 49 | 
 50 |         # Add to labels, bbox targets, and bbox loss blobs
 51 |         labels_blob = np.hstack((labels_blob, labels))
 52 |         if cfg.TRAIN.BBOX_REG:
 53 |             bbox_targets_blob = np.vstack((bbox_targets_blob, bbox_targets))
 54 |             bbox_inside_blob = np.vstack((bbox_inside_blob, 
 55 |                                           bbox_inside_weights))
 56 |         # all_overlaps = np.hstack((all_overlaps, overlaps))
 57 | 
 58 |     # For debug visualizations
 59 |     # _vis_minibatch(im_blob, rois_blob, labels_blob, all_overlaps)
 60 | 
 61 |     blobs['rois'] = rois_blob
 62 |     blobs['labels'] = labels_blob
 63 | 
 64 |     if cfg.TRAIN.BBOX_REG:
 65 |         blobs['bbox_targets'] = bbox_targets_blob
 66 |         blobs['bbox_inside_weights'] = bbox_inside_blob
 67 |         blobs['bbox_outside_weights'] = \
 68 |             np.array(bbox_inside_blob > 0).astype(np.float32)
 69 | 
 70 |     return blobs
 71 | 
 72 | def _sample_rois(roidb, fg_rois_per_image, rois_per_image, num_classes):
 73 |     """Generate a random sample of RoIs comprising foreground and background
 74 |     examples.
 75 |     """
 76 |     # label = class RoI has max overlap with
 77 |     labels = roidb['max_classes']
 78 |     overlaps = roidb['max_overlaps']
 79 |     rois = roidb['boxes']
 80 | 
 81 |     # Select foreground RoIs as those with >= FG_THRESH overlap
 82 |     fg_inds = np.where(overlaps >= cfg.TRAIN.FG_THRESH)[0]
 83 |     # Guard against the case when an image has fewer than fg_rois_per_image
 84 |     # foreground RoIs
 85 |     fg_rois_per_this_image = np.minimum(fg_rois_per_image, fg_inds.size)
 86 |     # Sample foreground regions without replacement
 87 |     if fg_inds.size > 0:
 88 |         fg_inds = npr.choice(
 89 |                 fg_inds, size=fg_rois_per_this_image, replace=False)
 90 | 
 91 |     # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
 92 |     bg_inds = np.where((overlaps < cfg.TRAIN.BG_THRESH_HI) &
 93 |                        (overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]
 94 |     # Compute number of background RoIs to take from this image (guarding
 95 |     # against there being fewer than desired)
 96 |     bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
 97 |     bg_rois_per_this_image = np.minimum(bg_rois_per_this_image,
 98 |                                         bg_inds.size)
 99 |     # Sample foreground regions without replacement
100 |     if bg_inds.size > 0:
101 |         bg_inds = npr.choice(
102 |                 bg_inds, size=bg_rois_per_this_image, replace=False)
103 | 
104 |     # The indices that we're selecting (both fg and bg)
105 |     keep_inds = np.append(fg_inds, bg_inds)
106 |     # Select sampled values from various arrays:
107 |     labels = labels[keep_inds]
108 |     # Clamp labels for the background RoIs to 0
109 |     labels[fg_rois_per_this_image:] = 0
110 |     overlaps = overlaps[keep_inds]
111 |     rois = rois[keep_inds]
112 | 
113 |     if cfg.TRAIN.BBOX_REG:
114 |         bbox_targets, bbox_inside_weights = _get_bbox_regression_labels(
115 |                 roidb['bbox_targets'][keep_inds, :], num_classes)
116 |     else:
117 |         bbox_targets, bbox_inside_weights = [], []
118 | 
119 |     return labels, overlaps, rois, bbox_targets, bbox_inside_weights
120 | 
121 | def _get_image_blob(roidb, scale_inds):
122 |     """Builds an input blob from the images in the roidb at the specified
123 |     scales.
124 |     """
125 |     num_images = len(roidb)
126 |     processed_ims = []
127 |     im_scales = []
128 |     for i in xrange(num_images):
129 |         im = cv2.imread(roidb[i]['image'])
130 |         if roidb[i]['flipped']:
131 |             im = im[:, ::-1, :]
132 |         target_size = cfg.TRAIN.SCALES[scale_inds[i]]
133 |         im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size,
134 |                                         cfg.TRAIN.MAX_SIZE)
135 |         im_scales.append(im_scale)
136 |         processed_ims.append(im)
137 | 
138 |     # Create a blob to hold the input images
139 |     blob = im_list_to_blob(processed_ims)
140 | 
141 |     return blob, im_scales
142 | 
143 | def _project_im_rois(im_rois, im_scale_factor):
144 |     """Project image RoIs into the rescaled training image."""
145 |     rois = im_rois * im_scale_factor
146 |     return rois
147 | 
148 | def _get_bbox_regression_labels(bbox_target_data, num_classes):
149 |     """Bounding-box regression targets are stored in a compact form in the
150 |     roidb.
151 | 
152 |     This function expands those targets into the 4-of-4*K representation used
153 |     by the network (i.e. only one class has non-zero targets). The loss weights
154 |     are similarly expanded.
155 | 
156 |     Returns:
157 |         bbox_target_data (ndarray): N x 4K blob of regression targets
158 |         bbox_inside_weights (ndarray): N x 4K blob of loss weights
159 |     """
160 |     clss = bbox_target_data[:, 0]
161 |     bbox_targets = np.zeros((clss.size, 4 * num_classes), dtype=np.float32)
162 |     bbox_inside_weights = np.zeros(bbox_targets.shape, dtype=np.float32)
163 |     inds = np.where(clss > 0)[0]
164 |     for ind in inds:
165 |         cls = clss[ind]
166 |         start = 4 * cls
167 |         end = start + 4
168 |         bbox_targets[ind, start:end] = bbox_target_data[ind, 1:]
169 |         bbox_inside_weights[ind, start:end] = cfg.TRAIN.BBOX_INSIDE_WEIGHTS
170 |     return bbox_targets, bbox_inside_weights
171 | 
172 | def _vis_minibatch(im_blob, rois_blob, labels_blob, overlaps):
173 |     """Visualize a mini-batch for debugging."""
174 |     import matplotlib.pyplot as plt
175 |     for i in xrange(rois_blob.shape[0]):
176 |         rois = rois_blob[i, :]
177 |         im_ind = rois[0]
178 |         roi = rois[1:]
179 |         im = im_blob[im_ind, :, :, :].transpose((1, 2, 0)).copy()
180 |         im += cfg.PIXEL_MEANS
181 |         im = im[:, :, (2, 1, 0)]
182 |         im = im.astype(np.uint8)
183 |         cls = labels_blob[i]
184 |         plt.imshow(im)
185 |         print 'class: ', cls, ' overlap: ', overlaps[i]
186 |         plt.gca().add_patch(
187 |             plt.Rectangle((roi[0], roi[1]), roi[2] - roi[0],
188 |                           roi[3] - roi[1], fill=False,
189 |                           edgecolor='r', linewidth=3)
190 |             )
191 |         plt.show()
192 | 


--------------------------------------------------------------------------------
/Seminar6/lib/roi_data_layer/roidb.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick
  6 | # --------------------------------------------------------
  7 | 
  8 | """Transform a roidb into a trainable roidb by adding a bunch of metadata."""
  9 | 
 10 | import numpy as np
 11 | from fast_rcnn.config import cfg
 12 | from fast_rcnn.bbox_transform import bbox_transform
 13 | from utils.cython_bbox import bbox_overlaps
 14 | import PIL
 15 | 
 16 | def prepare_roidb(imdb):
 17 |     """Enrich the imdb's roidb by adding some derived quantities that
 18 |     are useful for training. This function precomputes the maximum
 19 |     overlap, taken over ground-truth boxes, between each ROI and
 20 |     each ground-truth box. The class with maximum overlap is also
 21 |     recorded.
 22 |     """
 23 |     sizes = [PIL.Image.open(imdb.image_path_at(i)).size
 24 |              for i in xrange(imdb.num_images)]
 25 |     roidb = imdb.roidb
 26 |     for i in xrange(len(imdb.image_index)):
 27 |         roidb[i]['image'] = imdb.image_path_at(i)
 28 |         roidb[i]['width'] = sizes[i][0]
 29 |         roidb[i]['height'] = sizes[i][1]
 30 |         # need gt_overlaps as a dense array for argmax
 31 |         gt_overlaps = roidb[i]['gt_overlaps'].toarray()
 32 |         # max overlap with gt over classes (columns)
 33 |         max_overlaps = gt_overlaps.max(axis=1)
 34 |         # gt class that had the max overlap
 35 |         max_classes = gt_overlaps.argmax(axis=1)
 36 |         roidb[i]['max_classes'] = max_classes
 37 |         roidb[i]['max_overlaps'] = max_overlaps
 38 |         # sanity checks
 39 |         # max overlap of 0 => class should be zero (background)
 40 |         zero_inds = np.where(max_overlaps == 0)[0]
 41 |         assert all(max_classes[zero_inds] == 0)
 42 |         # max overlap > 0 => class should not be zero (must be a fg class)
 43 |         nonzero_inds = np.where(max_overlaps > 0)[0]
 44 |         assert all(max_classes[nonzero_inds] != 0)
 45 | 
 46 | def add_bbox_regression_targets(roidb):
 47 |     """Add information needed to train bounding-box regressors."""
 48 |     assert len(roidb) > 0
 49 |     assert 'max_classes' in roidb[0], 'Did you call prepare_roidb first?'
 50 | 
 51 |     num_images = len(roidb)
 52 |     # Infer number of classes from the number of columns in gt_overlaps
 53 |     num_classes = roidb[0]['gt_overlaps'].shape[1]
 54 |     for im_i in xrange(num_images):
 55 |         rois = roidb[im_i]['boxes']
 56 |         max_overlaps = roidb[im_i]['max_overlaps']
 57 |         max_classes = roidb[im_i]['max_classes']
 58 |         roidb[im_i]['bbox_targets'] = \
 59 |                 _compute_targets(rois, max_overlaps, max_classes)
 60 | 
 61 |     if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
 62 |         # Use fixed / precomputed "means" and "stds" instead of empirical values
 63 |         means = np.tile(
 64 |                 np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS), (num_classes, 1))
 65 |         stds = np.tile(
 66 |                 np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS), (num_classes, 1))
 67 |     else:
 68 |         # Compute values needed for means and stds
 69 |         # var(x) = E(x^2) - E(x)^2
 70 |         class_counts = np.zeros((num_classes, 1)) + cfg.EPS
 71 |         sums = np.zeros((num_classes, 4))
 72 |         squared_sums = np.zeros((num_classes, 4))
 73 |         for im_i in xrange(num_images):
 74 |             targets = roidb[im_i]['bbox_targets']
 75 |             for cls in xrange(1, num_classes):
 76 |                 cls_inds = np.where(targets[:, 0] == cls)[0]
 77 |                 if cls_inds.size > 0:
 78 |                     class_counts[cls] += cls_inds.size
 79 |                     sums[cls, :] += targets[cls_inds, 1:].sum(axis=0)
 80 |                     squared_sums[cls, :] += \
 81 |                             (targets[cls_inds, 1:] ** 2).sum(axis=0)
 82 | 
 83 |         means = sums / class_counts
 84 |         stds = np.sqrt(squared_sums / class_counts - means ** 2)
 85 | 
 86 |     print 'bbox target means:'
 87 |     print means
 88 |     print means[1:, :].mean(axis=0) # ignore bg class
 89 |     print 'bbox target stdevs:'
 90 |     print stds
 91 |     print stds[1:, :].mean(axis=0) # ignore bg class
 92 | 
 93 |     # Normalize targets
 94 |     if cfg.TRAIN.BBOX_NORMALIZE_TARGETS:
 95 |         print "Normalizing targets"
 96 |         for im_i in xrange(num_images):
 97 |             targets = roidb[im_i]['bbox_targets']
 98 |             for cls in xrange(1, num_classes):
 99 |                 cls_inds = np.where(targets[:, 0] == cls)[0]
100 |                 roidb[im_i]['bbox_targets'][cls_inds, 1:] -= means[cls, :]
101 |                 roidb[im_i]['bbox_targets'][cls_inds, 1:] /= stds[cls, :]
102 |     else:
103 |         print "NOT normalizing targets"
104 | 
105 |     # These values will be needed for making predictions
106 |     # (the predicts will need to be unnormalized and uncentered)
107 |     return means.ravel(), stds.ravel()
108 | 
109 | def _compute_targets(rois, overlaps, labels):
110 |     """Compute bounding-box regression targets for an image."""
111 |     # Indices of ground-truth ROIs
112 |     gt_inds = np.where(overlaps == 1)[0]
113 |     if len(gt_inds) == 0:
114 |         # Bail if the image has no ground-truth ROIs
115 |         return np.zeros((rois.shape[0], 5), dtype=np.float32)
116 |     # Indices of examples for which we try to make predictions
117 |     ex_inds = np.where(overlaps >= cfg.TRAIN.BBOX_THRESH)[0]
118 | 
119 |     # Get IoU overlap between each ex ROI and gt ROI
120 |     ex_gt_overlaps = bbox_overlaps(
121 |         np.ascontiguousarray(rois[ex_inds, :], dtype=np.float),
122 |         np.ascontiguousarray(rois[gt_inds, :], dtype=np.float))
123 | 
124 |     # Find which gt ROI each ex ROI has max overlap with:
125 |     # this will be the ex ROI's gt target
126 |     gt_assignment = ex_gt_overlaps.argmax(axis=1)
127 |     gt_rois = rois[gt_inds[gt_assignment], :]
128 |     ex_rois = rois[ex_inds, :]
129 | 
130 |     targets = np.zeros((rois.shape[0], 5), dtype=np.float32)
131 |     targets[ex_inds, 0] = labels[ex_inds]
132 |     targets[ex_inds, 1:] = bbox_transform(ex_rois, gt_rois)
133 |     return targets
134 | 


--------------------------------------------------------------------------------
/Seminar6/lib/setup.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import os
 9 | from os.path import join as pjoin
10 | from setuptools import setup
11 | from distutils.extension import Extension
12 | from Cython.Distutils import build_ext
13 | import subprocess
14 | import numpy as np
15 | 
16 | def find_in_path(name, path):
17 |     "Find a file in a search path"
18 |     # Adapted fom
19 |     # http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/
20 |     for dir in path.split(os.pathsep):
21 |         binpath = pjoin(dir, name)
22 |         if os.path.exists(binpath):
23 |             return os.path.abspath(binpath)
24 |     return None
25 | 
26 | # Obtain the numpy include directory.  This logic works across numpy versions.
27 | try:
28 |     numpy_include = np.get_include()
29 | except AttributeError:
30 |     numpy_include = np.get_numpy_include()
31 | 
32 | def customize_compiler_for_nvcc(self):
33 |     """inject deep into distutils to customize how the dispatch
34 |     to gcc/nvcc works.
35 | 
36 |     If you subclass UnixCCompiler, it's not trivial to get your subclass
37 |     injected in, and still have the right customizations (i.e.
38 |     distutils.sysconfig.customize_compiler) run on it. So instead of going
39 |     the OO route, I have this. Note, it's kindof like a wierd functional
40 |     subclassing going on."""
41 | 
42 |     # tell the compiler it can processes .cu
43 |     self.src_extensions.append('.cu')
44 | 
45 |     # save references to the default compiler_so and _comple methods
46 |     default_compiler_so = self.compiler_so
47 |     super = self._compile
48 | 
49 |     # now redefine the _compile method. This gets executed for each
50 |     # object but distutils doesn't have the ability to change compilers
51 |     # based on source extension: we add it.
52 |     def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts):
53 |         if os.path.splitext(src)[1] == '.cu':
54 |             # use the cuda for .cu files
55 |             self.set_executable('compiler_so', CUDA['nvcc'])
56 |             # use only a subset of the extra_postargs, which are 1-1 translated
57 |             # from the extra_compile_args in the Extension class
58 |             postargs = extra_postargs['nvcc']
59 |         else:
60 |             postargs = extra_postargs['gcc']
61 | 
62 |         super(obj, src, ext, cc_args, postargs, pp_opts)
63 |         # reset the default compiler_so, which we might have changed for cuda
64 |         self.compiler_so = default_compiler_so
65 | 
66 |     # inject our redefined _compile method into the class
67 |     self._compile = _compile
68 | 
69 | 
70 | # run the customize_compiler
71 | class custom_build_ext(build_ext):
72 |     def build_extensions(self):
73 |         customize_compiler_for_nvcc(self.compiler)
74 |         build_ext.build_extensions(self)
75 | 
76 | 
77 | ext_modules = [
78 |     Extension(
79 |         "utils.cython_bbox",
80 |         ["utils/bbox.pyx"],
81 |         extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]},
82 |         include_dirs = [numpy_include]
83 |     ),
84 |     Extension(
85 |         "nms.cpu_nms",
86 |         ["nms/cpu_nms.pyx"],
87 |         extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]},
88 |         include_dirs = [numpy_include]
89 |     ),
90 | ]
91 | 
92 | setup(
93 |     name='fast_rcnn',
94 |     ext_modules=ext_modules,
95 |     # inject our custom trigger
96 |     cmdclass={'build_ext': custom_build_ext},
97 | )
98 | 


--------------------------------------------------------------------------------
/Seminar6/lib/transform/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ddtm/dl-course/9b04d2dda741c0786a9de40a7dfce89d06d0487e/Seminar6/lib/transform/__init__.py


--------------------------------------------------------------------------------
/Seminar6/lib/transform/torch_image_transform_layer.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast/er R-CNN
 3 | # Licensed under The MIT License [see LICENSE for details]
 4 | # --------------------------------------------------------
 5 | 
 6 | """ Transform images for compatibility with models trained with
 7 | https://github.com/facebook/fb.resnet.torch.
 8 | 
 9 | Usage in model prototxt:
10 | 
11 | layer {
12 |   name: 'data_xform'
13 |   type: 'Python'
14 |   bottom: 'data_caffe'
15 |   top: 'data'
16 |   python_param {
17 |     module: 'transform.torch_image_transform_layer'
18 |     layer: 'TorchImageTransformLayer'
19 |   }
20 | }
21 | """
22 | 
23 | import caffe
24 | from fast_rcnn.config import cfg
25 | import numpy as np
26 | 
27 | class TorchImageTransformLayer(caffe.Layer):
28 |     def setup(self, bottom, top):
29 |         # (1, 3, 1, 1) shaped arrays
30 |         self.PIXEL_MEANS = \
31 |             np.array([[[[0.48462227599918]],
32 |                        [[0.45624044862054]],
33 |                        [[0.40588363755159]]]])
34 |         self.PIXEL_STDS = \
35 |             np.array([[[[0.22889466674951]],
36 |                        [[0.22446679341259]],
37 |                        [[0.22495548344775]]]])
38 |         # The default ("old") pixel means that were already subtracted
39 |         channel_swap = (0, 3, 1, 2)
40 |         self.OLD_PIXEL_MEANS = \
41 |             cfg.PIXEL_MEANS[np.newaxis, :, :, :].transpose(channel_swap)
42 | 
43 |         top[0].reshape(*(bottom[0].shape))
44 | 
45 |     def forward(self, bottom, top):
46 |         ims = bottom[0].data
47 |         # Invert the channel means that were already subtracted
48 |         ims += self.OLD_PIXEL_MEANS
49 |         # 1. Permute BGR to RGB and normalize to [0, 1]
50 |         ims = ims[:, [2, 1, 0], :, :] / 255.0
51 |         # 2. Remove channel means
52 |         ims -= self.PIXEL_MEANS
53 |         # 3. Standardize channels
54 |         ims /= self.PIXEL_STDS
55 |         top[0].reshape(*(ims.shape))
56 |         top[0].data[...] = ims
57 | 
58 |     def backward(self, top, propagate_down, bottom):
59 |         """This layer does not propagate gradients."""
60 |         pass
61 | 
62 |     def reshape(self, bottom, top):
63 |         """Reshaping happens during the call to forward."""
64 |         pass
65 | 


--------------------------------------------------------------------------------
/Seminar6/lib/utils/.gitignore:
--------------------------------------------------------------------------------
1 | *.c
2 | *.so
3 | 


--------------------------------------------------------------------------------
/Seminar6/lib/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 | 


--------------------------------------------------------------------------------
/Seminar6/lib/utils/bbox.pyx:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Sergey Karayev
 6 | # --------------------------------------------------------
 7 | 
 8 | cimport cython
 9 | import numpy as np
10 | cimport numpy as np
11 | 
12 | DTYPE = np.float
13 | ctypedef np.float_t DTYPE_t
14 | 
15 | def bbox_overlaps(
16 |         np.ndarray[DTYPE_t, ndim=2] boxes,
17 |         np.ndarray[DTYPE_t, ndim=2] query_boxes):
18 |     """
19 |     Parameters
20 |     ----------
21 |     boxes: (N, 4) ndarray of float
22 |     query_boxes: (K, 4) ndarray of float
23 |     Returns
24 |     -------
25 |     overlaps: (N, K) ndarray of overlap between boxes and query_boxes
26 |     """
27 |     cdef unsigned int N = boxes.shape[0]
28 |     cdef unsigned int K = query_boxes.shape[0]
29 |     cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE)
30 |     cdef DTYPE_t iw, ih, box_area
31 |     cdef DTYPE_t ua
32 |     cdef unsigned int k, n
33 |     for k in range(K):
34 |         box_area = (
35 |             (query_boxes[k, 2] - query_boxes[k, 0] + 1) *
36 |             (query_boxes[k, 3] - query_boxes[k, 1] + 1)
37 |         )
38 |         for n in range(N):
39 |             iw = (
40 |                 min(boxes[n, 2], query_boxes[k, 2]) -
41 |                 max(boxes[n, 0], query_boxes[k, 0]) + 1
42 |             )
43 |             if iw > 0:
44 |                 ih = (
45 |                     min(boxes[n, 3], query_boxes[k, 3]) -
46 |                     max(boxes[n, 1], query_boxes[k, 1]) + 1
47 |                 )
48 |                 if ih > 0:
49 |                     ua = float(
50 |                         (boxes[n, 2] - boxes[n, 0] + 1) *
51 |                         (boxes[n, 3] - boxes[n, 1] + 1) +
52 |                         box_area - iw * ih
53 |                     )
54 |                     overlaps[n, k] = iw * ih / ua
55 |     return overlaps
56 | 


--------------------------------------------------------------------------------
/Seminar6/lib/utils/blob.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | """Blob helper functions."""
 9 | 
10 | import numpy as np
11 | import cv2
12 | 
13 | def im_list_to_blob(ims):
14 |     """Convert a list of images into a network input.
15 | 
16 |     Assumes images are already prepared (means subtracted, BGR order, ...).
17 |     """
18 |     max_shape = np.array([im.shape for im in ims]).max(axis=0)
19 |     num_images = len(ims)
20 |     blob = np.zeros((num_images, max_shape[0], max_shape[1], 3),
21 |                     dtype=np.float32)
22 |     for i in xrange(num_images):
23 |         im = ims[i]
24 |         blob[i, 0:im.shape[0], 0:im.shape[1], :] = im
25 |     # Move channels (axis 3) to axis 1
26 |     # Axis order will become: (batch elem, channel, height, width)
27 |     channel_swap = (0, 3, 1, 2)
28 |     blob = blob.transpose(channel_swap)
29 |     return blob
30 | 
31 | def prep_im_for_blob(im, pixel_means, target_size, max_size):
32 |     """Mean subtract and scale an image for use in a blob."""
33 |     im = im.astype(np.float32, copy=False)
34 |     im -= pixel_means
35 |     im_shape = im.shape
36 |     im_size_min = np.min(im_shape[0:2])
37 |     im_size_max = np.max(im_shape[0:2])
38 |     im_scale = float(target_size) / float(im_size_min)
39 |     # Prevent the biggest axis from being more than MAX_SIZE
40 |     if np.round(im_scale * im_size_max) > max_size:
41 |         im_scale = float(max_size) / float(im_size_max)
42 |     im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale,
43 |                     interpolation=cv2.INTER_LINEAR)
44 | 
45 |     return im, im_scale
46 | 


--------------------------------------------------------------------------------
/Seminar6/lib/utils/timer.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import time
 9 | 
10 | class Timer(object):
11 |     """A simple timer."""
12 |     def __init__(self):
13 |         self.total_time = 0.
14 |         self.calls = 0
15 |         self.start_time = 0.
16 |         self.diff = 0.
17 |         self.average_time = 0.
18 | 
19 |     def tic(self):
20 |         # using time.time instead of time.clock because time time.clock
21 |         # does not normalize for multithreading
22 |         self.start_time = time.time()
23 | 
24 |     def toc(self, average=True):
25 |         self.diff = time.time() - self.start_time
26 |         self.total_time += self.diff
27 |         self.calls += 1
28 |         self.average_time = self.total_time / self.calls
29 |         if average:
30 |             return self.average_time
31 |         else:
32 |             return self.diff
33 | 


--------------------------------------------------------------------------------
/Seminar6/notebook/img/rcnn_slide.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ddtm/dl-course/9b04d2dda741c0786a9de40a7dfce89d06d0487e/Seminar6/notebook/img/rcnn_slide.jpg


--------------------------------------------------------------------------------
/Seminar6/tools/_init_paths.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | """Set up paths for Fast R-CNN."""
 9 | 
10 | import os.path as osp
11 | import sys
12 | 
13 | def add_path(path):
14 |     if path not in sys.path:
15 |         sys.path.insert(0, path)
16 | 
17 | this_dir = osp.dirname(__file__)
18 | 
19 | # Add lib to PYTHONPATH
20 | lib_path = osp.join(this_dir, '..', 'lib')
21 | add_path(lib_path)
22 | 
23 | # Add root dir to PYTHONPATH
24 | lib_path = osp.join(this_dir, '..')
25 | add_path(lib_path)
26 | 


--------------------------------------------------------------------------------
/Seminar6/tools/eval_recall.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import _init_paths
 4 | from fast_rcnn.config import cfg, cfg_from_file, cfg_from_list
 5 | from datasets.factory import get_imdb
 6 | import argparse
 7 | import time, os, sys
 8 | import numpy as np
 9 | 
10 | def parse_args():
11 |     """
12 |     Parse input arguments
13 |     """
14 |     parser = argparse.ArgumentParser(description='Test a Fast R-CNN network')
15 |     parser.add_argument('--imdb', dest='imdb_name',
16 |                         help='dataset to test',
17 |                         default='voc_2007_test', type=str)
18 |     parser.add_argument('--method', dest='method',
19 |                         help='proposal method',
20 |                         default='selective_search', type=str)
21 |     parser.add_argument('--rpn-file', dest='rpn_file',
22 |                         default=None, type=str)
23 | 
24 |     if len(sys.argv) == 1:
25 |         parser.print_help()
26 |         sys.exit(1)
27 | 
28 |     args = parser.parse_args()
29 |     return args
30 | 
31 | if __name__ == '__main__':
32 |     args = parse_args()
33 | 
34 |     print('Called with args:')
35 |     print(args)
36 | 
37 |     imdb = get_imdb(args.imdb_name)
38 |     imdb.set_proposal_method(args.method)
39 |     if args.rpn_file is not None:
40 |         imdb.config['rpn_file'] = args.rpn_file
41 | 
42 |     candidate_boxes = None
43 |     if 0:
44 |         import scipy.io as sio
45 |         filename = 'debug/stage1_rpn_voc_2007_test.mat'
46 |         raw_data = sio.loadmat(filename)['aboxes'].ravel()
47 |         candidate_boxes = raw_data
48 | 
49 |     ar, gt_overlaps, recalls, thresholds = \
50 |         imdb.evaluate_recall(candidate_boxes=candidate_boxes)
51 |     print 'Method: {}'.format(args.method)
52 |     print 'AverageRec: {:.3f}'.format(ar)
53 | 
54 |     def recall_at(t):
55 |         ind = np.where(thresholds > t - 1e-5)[0][0]
56 |         assert np.isclose(thresholds[ind], t)
57 |         return recalls[ind]
58 | 
59 |     print 'Recall@0.5: {:.3f}'.format(recall_at(0.5))
60 |     print 'Recall@0.6: {:.3f}'.format(recall_at(0.6))
61 |     print 'Recall@0.7: {:.3f}'.format(recall_at(0.7))
62 |     print 'Recall@0.8: {:.3f}'.format(recall_at(0.8))
63 |     print 'Recall@0.9: {:.3f}'.format(recall_at(0.9))
64 |     # print again for easy spreadsheet copying
65 |     print '{:.3f}'.format(ar)
66 |     print '{:.3f}'.format(recall_at(0.5))
67 |     print '{:.3f}'.format(recall_at(0.6))
68 |     print '{:.3f}'.format(recall_at(0.7))
69 |     print '{:.3f}'.format(recall_at(0.8))
70 |     print '{:.3f}'.format(recall_at(0.9))
71 | 


--------------------------------------------------------------------------------
/Seminar6/tools/reval.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # --------------------------------------------------------
 4 | # Fast R-CNN
 5 | # Copyright (c) 2015 Microsoft
 6 | # Licensed under The MIT License [see LICENSE for details]
 7 | # Written by Ross Girshick
 8 | # --------------------------------------------------------
 9 | 
10 | """Reval = re-eval. Re-evaluate saved detections."""
11 | 
12 | import _init_paths
13 | from fast_rcnn.test import apply_nms
14 | from fast_rcnn.config import cfg
15 | from datasets.factory import get_imdb
16 | import cPickle
17 | import os, sys, argparse
18 | import numpy as np
19 | 
20 | def parse_args():
21 |     """
22 |     Parse input arguments
23 |     """
24 |     parser = argparse.ArgumentParser(description='Re-evaluate results')
25 |     parser.add_argument('output_dir', nargs=1, help='results directory',
26 |                         type=str)
27 |     parser.add_argument('--imdb', dest='imdb_name',
28 |                         help='dataset to re-evaluate',
29 |                         default='voc_2007_test', type=str)
30 |     parser.add_argument('--matlab', dest='matlab_eval',
31 |                         help='use matlab for evaluation',
32 |                         action='store_true')
33 |     parser.add_argument('--comp', dest='comp_mode', help='competition mode',
34 |                         action='store_true')
35 |     parser.add_argument('--nms', dest='apply_nms', help='apply nms',
36 |                         action='store_true')
37 | 
38 |     if len(sys.argv) == 1:
39 |         parser.print_help()
40 |         sys.exit(1)
41 | 
42 |     args = parser.parse_args()
43 |     return args
44 | 
45 | def from_dets(imdb_name, output_dir, args):
46 |     imdb = get_imdb(imdb_name)
47 |     imdb.competition_mode(args.comp_mode)
48 |     imdb.config['matlab_eval'] = args.matlab_eval
49 |     with open(os.path.join(output_dir, 'detections.pkl'), 'rb') as f:
50 |         dets = cPickle.load(f)
51 | 
52 |     if args.apply_nms:
53 |         print 'Applying NMS to all detections'
54 |         nms_dets = apply_nms(dets, cfg.TEST.NMS)
55 |     else:
56 |         nms_dets = dets
57 | 
58 |     print 'Evaluating detections'
59 |     imdb.evaluate_detections(nms_dets, output_dir)
60 | 
61 | if __name__ == '__main__':
62 |     args = parse_args()
63 | 
64 |     output_dir = os.path.abspath(args.output_dir[0])
65 |     imdb_name = args.imdb_name
66 |     from_dets(imdb_name, output_dir, args)
67 | 


--------------------------------------------------------------------------------
/Seminar6/tools/test_net.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # --------------------------------------------------------
 4 | # Fast R-CNN
 5 | # Copyright (c) 2015 Microsoft
 6 | # Licensed under The MIT License [see LICENSE for details]
 7 | # Written by Ross Girshick
 8 | # --------------------------------------------------------
 9 | 
10 | """Test a Fast R-CNN network on an image database."""
11 | 
12 | import _init_paths
13 | from fast_rcnn.test import test_net
14 | from fast_rcnn.config import cfg, cfg_from_file, cfg_from_list
15 | from datasets.factory import get_imdb
16 | import argparse
17 | import pprint
18 | import time, os, sys
19 | 
20 | from custom.tester import Tester
21 | 
22 | def parse_args():
23 |     """
24 |     Parse input arguments
25 |     """
26 |     parser = argparse.ArgumentParser(description='Test a Fast R-CNN network')
27 |     parser.add_argument('--snapshot', dest='snapshot',
28 |                         help='model to test',
29 |                         default=None, type=str)
30 |     parser.add_argument('--cfg', dest='cfg_file',
31 |                         help='optional config file', default=None, type=str)
32 |     parser.add_argument('--wait', dest='wait',
33 |                         help='wait until net file exists',
34 |                         default=True, type=bool)
35 |     parser.add_argument('--imdb', dest='imdb_name',
36 |                         help='dataset to test',
37 |                         default='voc_2007_test', type=str)
38 |     parser.add_argument('--comp', dest='comp_mode', help='competition mode',
39 |                         action='store_true')
40 |     parser.add_argument('--set', dest='set_cfgs',
41 |                         help='set config keys', default=None,
42 |                         nargs=argparse.REMAINDER)
43 |     parser.add_argument('--vis', dest='vis', help='visualize detections',
44 |                         action='store_true')
45 |     parser.add_argument('--num_dets', dest='max_per_image',
46 |                         help='max number of detections per image',
47 |                         default=100, type=int)
48 | 
49 |     if len(sys.argv) == 1:
50 |         parser.print_help()
51 |         sys.exit(1)
52 | 
53 |     args = parser.parse_args()
54 |     return args
55 | 
56 | if __name__ == '__main__':
57 |     args = parse_args()
58 | 
59 |     print('Called with args:')
60 |     print(args)
61 | 
62 |     if args.cfg_file is not None:
63 |         cfg_from_file(args.cfg_file)
64 |     if args.set_cfgs is not None:
65 |         cfg_from_list(args.set_cfgs)
66 | 
67 |     print('Using config:')
68 |     pprint.pprint(cfg)
69 | 
70 |     tester = Tester(args.snapshot)
71 |     tester.name = os.path.splitext(os.path.basename(args.snapshot))[0]
72 | 
73 |     imdb = get_imdb(args.imdb_name)
74 |     imdb.competition_mode(args.comp_mode)
75 |     imdb.set_proposal_method(cfg.TEST.PROPOSAL_METHOD)
76 | 
77 |     test_net(tester, imdb, max_per_image=args.max_per_image, vis=args.vis)
78 | 


--------------------------------------------------------------------------------
/Seminar6/tools/train_net.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # --------------------------------------------------------
 4 | # Fast R-CNN
 5 | # Copyright (c) 2015 Microsoft
 6 | # Licensed under The MIT License [see LICENSE for details]
 7 | # Written by Ross Girshick
 8 | # --------------------------------------------------------
 9 | 
10 | """Train a Fast R-CNN network on a region of interest database."""
11 | 
12 | import _init_paths
13 | from fast_rcnn.train import get_training_roidb, train_net
14 | from fast_rcnn.config import cfg, cfg_from_file, cfg_from_list, get_output_dir
15 | from datasets.factory import get_imdb
16 | import datasets.imdb
17 | import argparse
18 | import pprint
19 | import numpy as np
20 | import sys
21 | 
22 | def parse_args():
23 |     """
24 |     Parse input arguments
25 |     """
26 |     parser = argparse.ArgumentParser(description='Train a Fast R-CNN network')
27 |     parser.add_argument('--iters', dest='max_iters',
28 |                         help='number of iterations to train',
29 |                         default=40000, type=int)
30 |     parser.add_argument('--cfg', dest='cfg_file',
31 |                         help='optional config file',
32 |                         default=None, type=str)
33 |     parser.add_argument('--imdb', dest='imdb_name',
34 |                         help='dataset to train on',
35 |                         default='voc_2007_trainval', type=str)
36 |     parser.add_argument('--rand', dest='randomize',
37 |                         help='randomize (do not use a fixed seed)',
38 |                         action='store_true')
39 |     parser.add_argument('--set', dest='set_cfgs',
40 |                         help='set config keys', default=None,
41 |                         nargs=argparse.REMAINDER)
42 | 
43 |     if len(sys.argv) == 1:
44 |         parser.print_help()
45 |         sys.exit(1)
46 | 
47 |     args = parser.parse_args()
48 |     return args
49 | 
50 | def combined_roidb(imdb_names):
51 |     def get_roidb(imdb_name):
52 |         imdb = get_imdb(imdb_name)
53 |         print 'Loaded dataset `{:s}` for training'.format(imdb.name)
54 |         imdb.set_proposal_method(cfg.TRAIN.PROPOSAL_METHOD)
55 |         print 'Set proposal method: {:s}'.format(cfg.TRAIN.PROPOSAL_METHOD)
56 |         roidb = get_training_roidb(imdb)
57 |         return roidb
58 | 
59 |     roidbs = [get_roidb(s) for s in imdb_names.split('+')]
60 |     roidb = roidbs[0]
61 |     if len(roidbs) > 1:
62 |         for r in roidbs[1:]:
63 |             roidb.extend(r)
64 |         imdb = datasets.imdb.imdb(imdb_names)
65 |     else:
66 |         imdb = get_imdb(imdb_names)
67 |     return imdb, roidb
68 | 
69 | if __name__ == '__main__':
70 |     args = parse_args()
71 | 
72 |     print('Called with args:')
73 |     print(args)
74 | 
75 |     if args.cfg_file is not None:
76 |         cfg_from_file(args.cfg_file)
77 |     if args.set_cfgs is not None:
78 |         cfg_from_list(args.set_cfgs)
79 | 
80 |     print('Using config:')
81 |     pprint.pprint(cfg)
82 | 
83 |     imdb, roidb = combined_roidb(args.imdb_name)
84 |     print '{:d} roidb entries'.format(len(roidb))
85 | 
86 |     output_dir = get_output_dir(imdb)
87 |     print 'Output will be saved to `{:s}`'.format(output_dir)
88 | 
89 |     train_net(roidb, output_dir,
90 |               max_iters=args.max_iters)
91 | 


--------------------------------------------------------------------------------
/Seminar7/HW_GAN.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "This requires you to write a 2D GAN game. I let you to get into the topic yourself, whitout any explonations from my side. You can watch lecture, seminar, read papers and tutorials (fun, fun, fun)."
 8 |    ]
 9 |   },
10 |   {
11 |    "cell_type": "markdown",
12 |    "metadata": {},
13 |    "source": [
14 |     "### Homework"
15 |    ]
16 |   },
17 |   {
18 |    "cell_type": "markdown",
19 |    "metadata": {
20 |     "collapsed": true
21 |    },
22 |    "source": [
23 |     "I want you to implement a simple 2D GAN game. The kind of animation, I want to see is like in [this video](https://www.youtube.com/watch?v=KeJINHjyzOU) at 15:30 or in [here](https://habrahabr.ru/post/275429/) but in 2D. You can google, search code at github, whatever, but the network should be based on Theano. \n",
24 |     "\n",
25 |     "Basically you will need to come up with true distribution $P$, say mixture of gaussians (surprise me), sample some data from it. Visualize it as a heatmap. To visualize $G$ density you can fix $N$ noise vectors $\\{z_i\\} \\quad i=1,\\dots, N$ and draw a circle for each $G(z_i)$. It is also funny to visualize discriminator as a vector field (can be done with `plt.arrow`, `plt.quiver plo). Look how it should be in the middle of [this page](http://www.inference.vc/an-alternative-update-rule-for-generative-adversarial-networks/).\n",
26 |     "\n",
27 |     "Please, make sure your code works if 'Run All' is pressed and it draws some animation.\n",
28 |     "\n",
29 |     "Good luck!"
30 |    ]
31 |   }
32 |  ],
33 |  "metadata": {
34 |   "kernelspec": {
35 |    "display_name": "Python 2",
36 |    "language": "python",
37 |    "name": "python2"
38 |   },
39 |   "language_info": {
40 |    "codemirror_mode": {
41 |     "name": "ipython",
42 |     "version": 2
43 |    },
44 |    "file_extension": ".py",
45 |    "mimetype": "text/x-python",
46 |    "name": "python",
47 |    "nbconvert_exporter": "python",
48 |    "pygments_lexer": "ipython2",
49 |    "version": "2.7.11"
50 |   }
51 |  },
52 |  "nbformat": 4,
53 |  "nbformat_minor": 0
54 | }
55 | 


--------------------------------------------------------------------------------
/Seminar7/HW_textures_style.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Texture synthesis and artistic style transfer\n",
  8 |     "\n",
  9 |     "In this homework you are to imlement [A Neural algorithm of artistic style](http://arxiv.org/pdf/1508.06576v2.pdf). This is an extension of [Texture Synthesis Using Convolutional Neural Networks](http://arxiv.org/pdf/1505.07376v3.pdf) method.\n",
 10 |     "\n",
 11 |     "The core of the method -- VGG and constrained optimization. The constrains are of two types: *content* and *style*. Given a content image **C** and style image **S** we want to generate an image **X** with content from **C** and style (whatever it really means) from **S**. \n",
 12 |     "\n",
 13 |     "We want to design a loss function for the optimization process. Considering \\[1\\], \\[2\\], an input image is easily invertable from the outputs at intermediate layers. This explains the idea of making an intermediate representation $F_X$ of **X** close to **C** representation $F_C$. \n",
 14 |     "\n",
 15 |     "$$\n",
 16 |     "   L_{content} = || F_X - F_C || \\rightarrow \\min_X\n",
 17 |     "$$\n",
 18 |     "\n",
 19 |     "Note, that representation $F$ preserve spatial information. Idea: let us dismiss it, so we will know what objects are there on the picture, but will not be able to reestablish their localtion. The style can be thought as something independent of content, something we are left with if we let the content off. L. Gatys suggests to dismiss spatial information by computing correlations between the feature maps $F$. If $F$ has dimensions `CxWxH`, then correlation matrix will be `CxC`, and look there's no spatial dimentions. So the style term will be responsible for mathing these correlation (Gram) matrices. \n",
 20 |     "\n",
 21 |     "$$\n",
 22 |     "   L_{style} = || Gram(F_X) - Gram(F_C) || \\rightarrow \\min_X\n",
 23 |     "$$\n",
 24 |     "\n",
 25 |     "And finaly we combine the two.\n",
 26 |     "\n",
 27 |     "$$\n",
 28 |     "   L = \\alpha L_{content} + \\beta L_{style} \\min_X\n",
 29 |     "$$\n",
 30 |     "\n",
 31 |     "Read the paper and the code for the details on layers, features $F$ are got from."
 32 |    ]
 33 |   },
 34 |   {
 35 |    "cell_type": "markdown",
 36 |    "metadata": {},
 37 |    "source": [
 38 |     "#### A little bit of history behind this texture generation method\n",
 39 |     "\n",
 40 |     "Actually the idea comes from 90th, when mathematical models of texures were developed \\[3\\]. They defined a probabolistic model for texture generation. They used an idea, that two images are indeed two samples of a particular texture iff their statistics match. The statistics used are histograms of given texture $I$ filtered with a number of filters: $\\{hist(F_i * I), \\quad i = 1,\\dots, k\\}$. And whatever image has the same statistics is thought as a sample of texture $I$. The main drawback was the Gibbs sampling was employed (which is very slow). \\[4\\] suggested exactly the scheme we use now: starting from a random image, let's adjust its statistics iteratively so they match the desired. \n",
 41 |     "\n",
 42 |     "Now, what is changed: the filters. \\[4\\] used carefully crafted set of filters, and now we use neural network based non-linear filters. We still use the idea of matching statistics, but the statistics improved. \n",
 43 |     "\n",
 44 |     "\\[1\\] *A.Mahendran, A.Vedaldi [Understanding Deep Image Representations by Inverting Them](https://www.robots.ox.ac.uk/~vgg/publications/2015/Mahendran15/mahendran15.pdf)*\n",
 45 |     "\n",
 46 |     "\\[2\\] *A.Dosovitsky, T.Brox [Inverting Visual Representations with Convolutional Networks](http://arxiv.org/pdf/1506.02753v3.pdf)*\n",
 47 |     "\n",
 48 |     "\\[3\\] *Zhu et. al. Filters, 1997 [Random Fields and Maximum Entropy (FRAME):\n",
 49 |     "Towards a Unified Theory for Texture Modeling](http://www.stat.ucla.edu/~ywu/research/papers/ijcv.pdf)*\n",
 50 |     "\n",
 51 |     "\\[4\\] *Portilla & Simoncelli, 2000  [A Parametric Texture Model Based on Joint Statistics\n",
 52 |     "of Complex Wavelet Coefficients](http://www.cns.nyu.edu/pub/lcv/portilla99-reprint.pdf)*"
 53 |    ]
 54 |   },
 55 |   {
 56 |    "cell_type": "markdown",
 57 |    "metadata": {},
 58 |    "source": [
 59 |     "# Homework"
 60 |    ]
 61 |   },
 62 |   {
 63 |    "cell_type": "markdown",
 64 |    "metadata": {},
 65 |    "source": [
 66 |     "To prevent you from technical problems, you may use a [complete code for the method](https://github.com/Lasagne/Recipes/tree/master/examples/styletransfer). \n",
 67 |     "Your task will be to play around with it. "
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "markdown",
 72 |    "metadata": {},
 73 |    "source": [
 74 |     "### First part\n",
 75 |     "**Common mandatory part**:\n",
 76 |     "- Generate your favourite texture (please, do not use starry night). All you need to do is to set content weight to 0.  \n",
 77 |     "- Stylize your favourite photo with your favourite style (hope you use something interesting).\n",
 78 |     "- Give an explanation for matching Gram matrices. What does it mean to minimize distance between them in terms of random variables? Assume a true distripution $P$, and model distibution $Q$. What class does $Q$ belong when matching gram matrices? Show, that $KL (P || Q)$ is minimized when Gram matrices are matched. In other words you need to come up with $Q$ such that $KL$ divergence is minimized when models gram matrix is equal to a target Gram matix. If you do not understand the question spend more time, please. If you want a hint after all, here is a [Telegram bot for you](https://telegram.me/rdl_hw7_bot) (send /hint to him)."
 79 |    ]
 80 |   },
 81 |   {
 82 |    "cell_type": "markdown",
 83 |    "metadata": {},
 84 |    "source": [
 85 |     "### Second part\n",
 86 |     "We give you **two options** for the second part.\n",
 87 |     "\n",
 88 |     "**First one** (if you are lazy or do not have GPU do just this):\n",
 89 |     "- Implement **Mean** and **Covariance** matching functions instead of $Gram$ matching. That is: \n",
 90 |     "    - *Mean* is a vector of size `C` which containes means over feature maps\n",
 91 |     "    - *Covariance* matrix is a *Gram* matrix of $Feats-mean$\n",
 92 |     "- What is $Q$ now? \n",
 93 |     "- Generate texture and stylize with $mean$ loss only; with $mean$ + $Covariance$ loss. Plot the results, side by side (3 textures and 3 stylized). What do you think? Actually, $Gram$ matrix or $Mean$ or $Mean$ + $Covariance$ matrix can be thought as texture descriptors. Does $mean$ encoding have enough parameters to represent texures?  \n",
 94 |     "- ***OR*** come up with your method to remove spatial information instead of above.\n",
 95 |     "- Bonus: you can mix several styles, averaging their representations. It can be fun. Some examples are [here](https://github.com/jcjohnson/neural-style).\n",
 96 |     "\n",
 97 |     "**Second one** (hardcore):\n",
 98 |     "- Substitute gram matrices with discriminator as in GAN. That is, you match distributions matching gram matrices and discriminator is designed to match distributions. Probably $Q$, we have defined is weak or too constraintive. Neural network based discriminator should be more flexible in this sense.\n",
 99 |     "    - The procedure will be a little bit unusual: we will optimize NN inside optimization loop w.r.t. image.\n",
100 |     "    - You need to define a pixel level discriminator (at each layer you have $WH$ objects, each with $C$ features). Basically it should decide whether a pixel came from style image or from current image $X$. \n",
101 |     "    - So the process is like that: \n",
102 |     "        - At each image optimization iteration update D (actually you do not need to do minibatches updates here, you can simulate fully-connected layers with 1x1 convolutions, softmax with sigmoids). You will need to find a trade-off, for how long and how frequent should updates be. \n",
103 |     "        - Then propagate gradient just like in GAN when optimizing $G$ i.e. swap labels (another strategy is in [here](https://www.robots.ox.ac.uk/~vgg/rg/papers/Tzeng_ICCV2015.pdf), eq. 4).\n",
104 |     "        - Let L-BFGS (or whatever, probably adam will be more stable) update $X$.\n",
105 |     "    - Discriminator architecture is up to you. It's better to start with logistic regression which should emulate $Mean$ + $Cov$ matching (isn't it?). \n",
106 |     "    - I tried this myself without content loss only."
107 |    ]
108 |   },
109 |   {
110 |    "cell_type": "markdown",
111 |    "metadata": {},
112 |    "source": [
113 |     "**Do everything in this notebook, I need your code as well as the generated images**"
114 |    ]
115 |   },
116 |   {
117 |    "cell_type": "markdown",
118 |    "metadata": {},
119 |    "source": [
120 |     "HINTS: \n",
121 |     "\n",
122 |     "- In case you do not have GPU, you need to substitute the line:\n",
123 |     "  \n",
124 |     "  `from lasagne.layers.dnn import Conv2DDNNLayer as ConvLayer`\n",
125 |     "\n",
126 |     "   with\n",
127 |     "\n",
128 |     "   `from lasagne.layers import Conv2DLayer as ConvLayer`\n",
129 |     "   \n",
130 |     "   \n",
131 |     "- If you do not have GPU, resize your images to 256x256 and no more. Even at this resolution it may take an hour. You can decrease the number of iterations if it takes too long. "
132 |    ]
133 |   }
134 |  ],
135 |  "metadata": {
136 |   "kernelspec": {
137 |    "display_name": "Python 2",
138 |    "language": "python",
139 |    "name": "python2"
140 |   },
141 |   "language_info": {
142 |    "codemirror_mode": {
143 |     "name": "ipython",
144 |     "version": 2
145 |    },
146 |    "file_extension": ".py",
147 |    "mimetype": "text/x-python",
148 |    "name": "python",
149 |    "nbconvert_exporter": "python",
150 |    "pygments_lexer": "ipython2",
151 |    "version": "2.7.11"
152 |   }
153 |  },
154 |  "nbformat": 4,
155 |  "nbformat_minor": 0
156 | }
157 | 


--------------------------------------------------------------------------------
/Seminar7/README.md:
--------------------------------------------------------------------------------
1 | More reading:
2 | - [likemo.net](http://likemo.net/)
3 | - Conditional GAN [example](https://github.com/TIXFeniks/Recipes/blob/4b83a1248a9eb73ca70777333f54f2598e762c6b/examples/Generating%20fonts%20with%20adversarial%20networks/Generating%2Bfonts%2Bwith%2Badversarial%2Bnetworks.ipynb) from the zoo
4 | - Some example from [habr](https://habrahabr.ru/post/278425/)
5 | - Fast neural doodle [repo](https://github.com/DmitryUlyanov/fast-neural-doodle)
6 | 


--------------------------------------------------------------------------------
/Seminar7/sem7.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Seminar 7"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "## Definition\n",
 15 |     "$$\n",
 16 |     "min_G max_D V(D,G) =  \\mathbb{E}_{x\\sim P} \\log D(x) + \\mathbb{E}_{z\\sim \\mathcal{N}} \\log(1 - D(G(z)))\n",
 17 |     "$$"
 18 |    ]
 19 |   },
 20 |   {
 21 |    "cell_type": "markdown",
 22 |    "metadata": {},
 23 |    "source": [
 24 |     "Let generator $G$ have parameters $\\theta$ and discriminator $D$ paramenters $\\psi$."
 25 |    ]
 26 |   },
 27 |   {
 28 |    "cell_type": "markdown",
 29 |    "metadata": {},
 30 |    "source": [
 31 |     "## Learning"
 32 |    ]
 33 |   },
 34 |   {
 35 |    "cell_type": "markdown",
 36 |    "metadata": {},
 37 |    "source": [
 38 |     "### D update\n",
 39 |     "Theory requires:\n",
 40 |     "$$\n",
 41 |     "\\psi_{t+1} \\leftarrow \\operatorname{argmax}_{\\psi} \\mathbb{E}_{x\\sim P} \\log D\\left(x;\\psi\\right) + \\mathbb{E}_{z\\sim \\mathcal{N}} \\log \\left(1 - D\\left(G(z;\\theta_t);\\psi\\right)\\right)\n",
 42 |     "$$\n",
 43 |     "\n",
 44 |     "In practice gradient step only. "
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "markdown",
 49 |    "metadata": {},
 50 |    "source": [
 51 |     "### G update, variant 1\n",
 52 |     "$$\n",
 53 |     "\\theta_{t+1} \\leftarrow \\theta_t - \\epsilon_t \\frac{\\partial}{\\partial\\theta} \\mathbb{E}_{z\\sim \\mathcal{N}} \\log \\left(1 - D\\left(G(z;\\theta_t);\\psi_{t+1}\\right)\\right)\n",
 54 |     "$$\n",
 55 |     "### G update, variant 2\n",
 56 |     "$$\n",
 57 |     "\\theta_{t+1} \\leftarrow \\theta_t + \\epsilon_t \\frac{\\partial}{\\partial\\theta} \\mathbb{E}_{z\\sim \\mathcal{N}} \\log D\\left(G(z;\\theta_t);\\psi_{t+1}\\right)$$"
 58 |    ]
 59 |   },
 60 |   {
 61 |    "cell_type": "markdown",
 62 |    "metadata": {},
 63 |    "source": [
 64 |     "The first corresponds to definition. What does the second correspond to? \n",
 65 |     "\n",
 66 |     "- $$\n",
 67 |     "min_G max_D V(D,G) =  \\mathbb{E}_{x\\sim P} \\log D(x) - \\mathbb{E}_{z\\sim \\mathcal{N}} \\log(D(G(z)))\n",
 68 |     "$$\n",
 69 |     "- $$\n",
 70 |     "max_G max_D V(D,G) =  \\mathbb{E}_{x\\sim P} \\log D(x) + \\mathbb{E}_{z\\sim \\mathcal{N}} \\log(D(G(z)))\n",
 71 |     "$$\n"
 72 |    ]
 73 |   },
 74 |   {
 75 |    "cell_type": "markdown",
 76 |    "metadata": {},
 77 |    "source": [
 78 |     "[Nice article](http://www.inference.vc/an-alternative-update-rule-for-generative-adversarial-networks/) about GAN (not tutorial)."
 79 |    ]
 80 |   },
 81 |   {
 82 |    "cell_type": "markdown",
 83 |    "metadata": {},
 84 |    "source": [
 85 |     "# Evaluating generative models"
 86 |    ]
 87 |   },
 88 |   {
 89 |    "cell_type": "markdown",
 90 |    "metadata": {},
 91 |    "source": [
 92 |     "[Theis](http://arxiv.org/pdf/1511.01844v2.pdf)"
 93 |    ]
 94 |   }
 95 |  ],
 96 |  "metadata": {
 97 |   "kernelspec": {
 98 |    "display_name": "Python 2",
 99 |    "language": "python",
100 |    "name": "python2"
101 |   },
102 |   "language_info": {
103 |    "codemirror_mode": {
104 |     "name": "ipython",
105 |     "version": 2
106 |    },
107 |    "file_extension": ".py",
108 |    "mimetype": "text/x-python",
109 |    "name": "python",
110 |    "nbconvert_exporter": "python",
111 |    "pygments_lexer": "ipython2",
112 |    "version": "2.7.11"
113 |   }
114 |  },
115 |  "nbformat": 4,
116 |  "nbformat_minor": 0
117 | }
118 | 


--------------------------------------------------------------------------------
/Seminar8/Autoencoder_structure.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ddtm/dl-course/9b04d2dda741c0786a9de40a7dfce89d06d0487e/Seminar8/Autoencoder_structure.png


--------------------------------------------------------------------------------
/Seminar8/GS.py:
--------------------------------------------------------------------------------
 1 | import lasagne
 2 | import theano.tensor as T
 3 | from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams
 4 | 
 5 | class GaussianSampleLayer(lasagne.layers.MergeLayer):
 6 |     def __init__(self, mu, logsigma, **kwargs):
 7 |         self.rng = RandomStreams(lasagne.random.get_rng().randint(1,2147462579))
 8 |         super(GaussianSampleLayer, self).__init__([mu, logsigma], **kwargs)
 9 | 
10 |     def get_output_shape_for(self, input_shapes):
11 |         return input_shapes[0]
12 | 
13 |     def get_output_for(self, inputs, deterministic=False, **kwargs):
14 |         mu, logsigma = inputs
15 |         shape=(self.input_shapes[0][0] or inputs[0].shape[0],
16 |                 self.input_shapes[0][1] or inputs[0].shape[1])
17 |         if deterministic:
18 |             return mu
19 |         return mu + T.exp(logsigma) * self.rng.normal(shape)


--------------------------------------------------------------------------------
/Seminar8/README.md:
--------------------------------------------------------------------------------
1 | More materials:
2 | - [VAE explained](http://kvfrans.com/variational-autoencoders-explained/)
3 | - [VAEs for "categorical" variables](http://blog.evjang.com/2016/11/tutorial-categorical-variational.html)
4 | 


--------------------------------------------------------------------------------
/Seminar8/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ddtm/dl-course/9b04d2dda741c0786a9de40a7dfce89d06d0487e/Seminar8/__init__.py


--------------------------------------------------------------------------------
/Seminar8/lfw_dataset.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import os
 3 | from scipy.misc import imread,imresize
 4 | import pandas as pd
 5 | 
 6 | def fetch_lfw_dataset(attrs_name = "lfw_attributes.txt",
 7 |                       images_name = "lfw-deepfunneled",
 8 |                       dx=80,dy=80,
 9 |                       dimx=45,dimy=45
10 |     ):#sad smile
11 | 
12 |     #download if not exists
13 |     if not os.path.exists(images_name):
14 |         print("images not found, donwloading...")
15 |         os.system("wget http://vis-www.cs.umass.edu/lfw/lfw-deepfunneled.tgz -O tmp.tgz")
16 |         print("extracting...")
17 |         os.system("tar xvzf tmp.tgz && rm tmp.tgz")
18 |         print("done")
19 |         assert os.path.exists(images_name)
20 | 
21 |     if not os.path.exists(attrs_name):
22 |         print("attributes not found, downloading...")
23 |         os.system("wget http://www.cs.columbia.edu/CAVE/databases/pubfig/download/%s"%attrs_name)
24 |         print("done")
25 | 
26 |     #read attrs
27 |     df_attrs = pd.read_csv("lfw_attributes.txt",sep='\t',skiprows=1,) 
28 |     df_attrs = pd.DataFrame(df_attrs.iloc[:,:-1].values, columns = df_attrs.columns[1:])
29 | 
30 | 
31 |     #read photos
32 |     photo_ids = []
33 |     for dirpath, dirnames, filenames in os.walk(images_name):
34 |         for fname in filenames:
35 |             if fname.endswith(".jpg"):
36 |                 fpath = os.path.join(dirpath,fname)
37 |                 photo_id = fname[:-4].replace('_',' ').split()
38 |                 person_id = ' '.join(photo_id[:-1])
39 |                 photo_number = int(photo_id[-1])
40 |                 photo_ids.append({'person':person_id,'imagenum':photo_number,'photo_path':fpath})
41 | 
42 |     photo_ids = pd.DataFrame(photo_ids)
43 | 
44 |     #mass-merge
45 |     #(photos now have same order as attributes)
46 |     df = pd.merge(df_attrs,photo_ids,on=('person','imagenum'))
47 | 
48 |     assert len(df)==len(df_attrs),"lost some data when merging dataframes"
49 | 
50 |     #image preprocessing
51 |     all_photos =df['photo_path'].apply(imread)\
52 |                                 .apply(lambda img:img[dy:-dy,dx:-dx])\
53 |                                 .apply(lambda img: imresize(img,[dimx,dimy]))
54 | 
55 |     all_photos = np.stack(all_photos.values).astype('uint8')
56 |     all_attrs = df.drop(["photo_path","person","imagenum"],axis=1)
57 |     
58 |     return all_photos,all_attrs
59 |     
60 | 


--------------------------------------------------------------------------------
/Seminar8/linear.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ddtm/dl-course/9b04d2dda741c0786a9de40a7dfce89d06d0487e/Seminar8/linear.png


--------------------------------------------------------------------------------
/Seminar9/oracle.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import numpy as np
  3 | 
  4 | def APatK ( y_true,y_predicted, K =32500):
  5 |     """Calculates AP@k given true Y and predictions (probabilities).
  6 |     Sorts answers by y_predicted to obtain ranking"""
  7 | 
  8 |     sort_by_ypred = np.argsort(-y_predicted)
  9 |     
 10 |     y_true = y_true[sort_by_ypred]
 11 |     y_predicted = y_predicted[sort_by_ypred]
 12 |     
 13 |     countRelevants = 0
 14 |     listOfPrecisions = []
 15 |     
 16 |     for i in range(min(K,len(y_true))):
 17 |         currentk = i + 1.0
 18 |         if y_true[i] !=0:
 19 |             countRelevants+=1
 20 |         precisionAtK = countRelevants / currentk 
 21 |         listOfPrecisions.append(precisionAtK)
 22 |     return np.sum( listOfPrecisions ) / min(K,len(y_true)) 
 23 | 
 24 | 
 25 | 
 26 | 
 27 | 
 28 | import sys
 29 | import socket
 30 | 
 31 | def score(final_accuracy,final_auc,final_apatk):
 32 |     
 33 |     print "\nAUC:"
 34 |     if final_auc >= 0.99:
 35 |         print "\tПиши статью. (great)"
 36 |     elif final_auc >= 0.97:
 37 |         print "\tОтличное решение! (good)"
 38 |     elif final_auc >= 0.95:
 39 |         print "\tСойдёт, хотя можно ещё поднажать (ok)"
 40 |     elif final_auc >= 0.9:
 41 |         print "\tНеплохо, но ты можешь лучше! (not ok)"
 42 |     elif final_auc > 0.8:
 43 |         print "\tТы на правильном пути! (not ok)"
 44 |     elif final_auc > 0.65:
 45 |         print "\tДобавь жару! (not ok)"
 46 |     else:
 47 |         print "\tМожет быть, она недоучилась? Ну или слишком маленькая? Или в детстве болела? (not ok)"
 48 |     
 49 |         
 50 |     print "\nAccuracy:"
 51 |     if final_accuracy >= 0.97:
 52 |         print "\tОчешуенно! (great)"
 53 |     elif final_accuracy >= 0.95:
 54 |         print "\tОтличный результат! (good)"
 55 |     elif final_accuracy >= 0.9:
 56 |         print "\tВсё ок (ok)"
 57 |     else:
 58 |         print "Надо бы подтянуть. (not ok)"
 59 | 
 60 |     print "\nAverage precision at K:"
 61 |     if final_apatk > 0.99:
 62 |         print "\tЗасабмить на kaggle! (great) \n\t Нет, ну честно - выкачай avito_test.tsv, засабмить и скажи, что вышло."
 63 |     elif final_apatk > 0.95:
 64 |         print "\tОтличный результат (good)"
 65 |     elif final_apatk > 0.92:
 66 |         print "\tВы побили baseline (ok)"
 67 |     else:
 68 |         print "\tНадо бы поднажать (not ok)"
 69 |         
 70 |     if socket.gethostname().startswith("cv-gpu" ):
 71 |         if final_apatk>0.92 and final_accuracy >0.9 and final_auc > 0.95 :
 72 |             warn_them()
 73 |         else:
 74 |             print 
 75 | 
 76 | def warn_them():
 77 |     """warns some users of what is impending"""
 78 |     sys.stderr.write("""\nWe Are Watching You!
 79 |                                                       .             ..                           
 80 |                                     .      ...;c:,::..  '  ':oococ:. ..                          
 81 |                               ..    .':dodxkkxxxxxxxkxddxkkkkkkkkkxkkdl:.                        
 82 |                              .'':lodxxxdxxxxxddxxddodddxkkxdxxxxxxxxxxxxk:d;,.                   
 83 |                           .;dxkxxxxxxxxk000Okdooooooloodxxdddxxkkxxddddodkkkkd'  .               
 84 |                        .:lkkkkkkkkkkkOOOkkxxxxdollllllloooddxxkOOOOkxdddldkkkOxc.                
 85 |                      .lkkkOOOkkkkOOOkkxdollcccccccccccccccllddxkOOOOkddddddxxxddd;.              
 86 |                    .cxkkOOOOOOOOkkkdolc:::;;;;;;;;;;;;;::::ccclodxkOOxddooodddooodo'             
 87 |                   ,dxkkOOOOOOOOkxolc::;;;;;;;;;;;;;;;;;;;;::::cclodxkkxddooolooollooc..          
 88 |                  ,xxkkkOOO0OOkxdol:;;;;;;,,,,;;;;;;;;;;;;;;;;;::ccloxxxxdoooollllollod,          
 89 |                 .xxkkkkOOOkxxollc;;;;,,,,,,,,,,,,,;,,;;;;;;;;;;::cclodxxddoollllcclllclc.        
 90 |                 ;xkkxkOOkdoolllc;;;,,,,,,,,,,,,,,;;;,,;;;;;;;;;:::clloddddoooolccc::lcclo:       
 91 |                .ddkkxOOxdoolllc:;;;;;;;;;;;;,,,,,;;,,,;;;;;;;::::::ccodlcdddooolccc::cllll..     
 92 |               'xdddxxOOkxolclc::;;:coooooollllc:;;,,;;;;::ccoddddddoollc;coxkxoc:::cc:clcco.     
 93 |               oxddddxkxxdoc::::;:lolccccccccloolc;,,;:ccloddxxxkkOOOkdl::lxkkkxoc:;:cccccoo.     
 94 |               xdododdkkxdlc::c::coc::::clooollllc:;;:cllldxxxddddddxkkdodxkkkxxkdl:;::::odc      
 95 |              ,dddoodkkkdlc::cc::lccccldkkOOxdddol:;;:codkOO000OkdooodkkxxkOkxxkkkdlc:::;cd.      
 96 |              :odoodkkxolc:ccc::cllloxkdollloooddoc;;:lxO00OkxxxxkxdddkkkxkkkxkO0Okxolc;;:c .     
 97 |              oddodkOxolc:::c::::cloddo;;:c:;:llc::,,;lxOOko:;:cccdxxkOkkkkkxkkOO00kdllc;:o..     
 98 |              cddddkxdlc:::c::;;::cc:cc;:clccc::;;,,,;:oxkxc;:cloloxkkkkkxxkxdxollkxl;:cc:c;.     
 99 |              .oddxkkdc::cc::;;;;;;;;;cllllllcc:;,,,,;codxkdllooddxkxddddddxkooc::dOd:;:lolc'     
100 |               'dddxxolcllcc:;;,,,,;;;;;;;:;;;;;;,,,,;:loddoccllodddlccclodxkxl:::dOdl::codlc     
101 |                cxdddooodolo:;;,,,,,,,,,,,,,,,;;;,',,;:codol:;::::c::;:clodxxko::cdkolc;ccolc.    
102 |                  cdxxxdkdodl:;,,,,,,,''''',::ll:;,;:llodddoc;;,;;;;::cclloxkkxlcokxolc:lcll:.    
103 |                   ldoddkxodl::;,,,,,,,',;:c::ooccldxkO0K0kxxo:;,;;;:ccllclxkkxlcdkxol::dol:      
104 |                   .dddoddoxc::;;,;;;,,;:cc;;;;;;:lxkkkkkkkkkko;;;;:cloooccxkxxocxkooocc;cd.      
105 |                    ,cldddl:;;:;;;;;;;;:c:;,,,,,,,;;:cllodddxddc:::clooddlcxOxoclxkooool  :       
106 |                      'xdd:;;;::;;;;;;;::;,,,,,,,,;;;;;:cccllllcc:cclooddl:xkxoloxxdodoc          
107 |                   ..  cdxoccc::;;;;;;;;;;;;::cccc:ccllloooodolccc:clloodccxxdoodxxdodc .         
108 |                        .;;  .::;;;;;;;;;:lxO0kdollodxxdkkOOOkdlcc;:cclodclodoodxdl:,.            
109 |                             .c::;;;;;,;ccodOxlc:codkOOOOO0KK0xc:;;:cllddc,.do;.',.  .            
110 |                            .clc::;;;;;;;;;::::::cclcllodxddooc,;;:clldxo:..;,                    
111 |                          .;clllcc:::;;,,,,;;;;;;;::cccclodddl;;;:looxkxl.                        
112 |                         .c:ccooclcc::;;,,,;;;:::ccllooddddol:::coxxkkkc .  .                     
113 |                         :l:clodccllcc:;;;;,;;;;:::cccccc:::cclodkkkkx:                           
114 |                        ;ll:clddcccloolc:;;,,,,;,,,,;;:::::cldxkkkOkc                             
115 |                       .ddcclld,lcclloooolc;;;,,,;;;;:ccclldxxdc;,;.                              
116 |                       :docclod,'lccclloddxdollclllloooool:;.                                     
117 |                      .dxlcclldo :cccllloodxkd.   ....                                            
118 |                      ;dxlcclodx.'lcclllloodxx.                     
119 |  \n""")
120 |     sys.stderr.write("""
121 |                            ______________________________________
122 |                  _\|/^    /       Молодцы, а теперь слезайте     \    \|| /
123 |                   (_oo   /              с казённой GPU            \   oo   /  
124 |                    |     \________________________________________/  О_    --
125 |                   /|\                                                 )    =
126 |                    |                                                 (.   --
127 |                    LL                                                 1 1\ 
128 |                 mborisyak@                                           jheuristic@
129 |     """)
130 | 
131 | 


--------------------------------------------------------------------------------