├── .gitignore ├── Dockerfile ├── LICENSE.md ├── README.md ├── Seminar1 ├── Classwork_ru.ipynb ├── Homework 1 (Face Recognition).ipynb ├── faces_data.mat ├── inpainting.ipynb └── zebrafish_drawing_factory.py ├── Seminar10 ├── Bonus-handcrafted-rnn.ipynb ├── README.md ├── Seminar10-RNN-homework-en.ipynb ├── Seminar10-RNN-homework-ru.ipynb ├── codex │ ├── Arbitrazhnyj_processualbnyj_kodeks_RF.txt │ ├── Grazhdanskij_kodeks_RF._Chastb_pervaja.txt │ ├── Grazhdanskij_kodeks_RF._Chastb_tretbja.txt │ ├── Grazhdanskij_kodeks_RF._Chastb_vtoraja.txt │ ├── Grazhdanskij_kodeks_RF.txt │ ├── Kodeks_RF_ob_administrativnyh_pravonarushenijah.txt │ ├── Konstitucija_RF.txt │ ├── Tamozhennyj_kodeks_RF.txt │ ├── Ustav_Patrulbno-Postovoj_Sluzhby_Milicii_Obwestvennoj_Bezopasnosti_RF.txt │ ├── Zakon_o_milicii.txt │ └── Zakon_ob_avtorskom_prave_i_smezhnyh_pravah.txt ├── data_copyright ├── molecules.tsv ├── mtg_card_names.txt ├── names └── seminar4-RNN-intro.ipynb ├── Seminar11 ├── Seminar11_homework.ipynb ├── Seminar11_intro.ipynb ├── broadcast.py ├── pretrained_lenet.py ├── test.csv └── train.csv ├── Seminar2 ├── README.md └── Seminar2.ipynb ├── Seminar3 ├── HW3_Differentiation.ipynb ├── HW3_Modules.ipynb ├── HW3_main.ipynb ├── README.md ├── Seminar3_Differentiation.ipynb ├── Seminar3_NN.ipynb ├── autoencoder.png ├── googlenet.png ├── grad.png └── outdated │ ├── Seminar 3.ipynb │ └── interpolation.mp4 ├── Seminar4 ├── README.md ├── Seminar-intro-slide.ipynb ├── Seminar4-en.ipynb ├── Seminar4-ru.ipynb ├── bonus │ ├── Bonus-advanced-cnn.ipynb │ ├── Bonus-advanced-theano.ipynb │ └── cifar.py └── mnist.py ├── Seminar5 ├── README.md ├── Seminar5.ipynb ├── classes.pkl ├── classes.txt └── sample_images │ ├── albatross.jpg │ ├── fox.jpg │ ├── frog.jpg │ ├── hen.jpg │ ├── kermit.jpg │ ├── kitten.jpg │ ├── puppy.jpg │ ├── steve_martin.jpg │ ├── teapot.jpg │ └── tiger.jpg ├── Seminar6 ├── Seminar6.ipynb ├── custom │ ├── __init__.py │ ├── net.py │ ├── solver.py │ └── tester.py ├── data │ ├── .gitignore │ ├── pylintrc │ └── scripts │ │ └── fetch_selective_search_data.sh ├── experiments │ ├── cfgs │ │ └── rcnn.yml │ └── scripts │ │ ├── fast_rcnn.sh │ │ ├── test_rcnn.sh │ │ └── train_rcnn.sh ├── lib │ ├── Makefile │ ├── datasets │ │ ├── VOCdevkit-matlab-wrapper │ │ │ ├── get_voc_opts.m │ │ │ ├── voc_eval.m │ │ │ └── xVOCap.m │ │ ├── __init__.py │ │ ├── ds_utils.py │ │ ├── factory.py │ │ ├── imdb.py │ │ ├── pascal_voc.py │ │ ├── tools │ │ │ └── mcg_munge.py │ │ └── voc_eval.py │ ├── fast_rcnn │ │ ├── __init__.py │ │ ├── bbox_transform.py │ │ ├── config.py │ │ ├── nms_wrapper.py │ │ ├── test.py │ │ └── train.py │ ├── nms │ │ ├── .gitignore │ │ ├── __init__.py │ │ ├── cpu_nms.pyx │ │ ├── gpu_nms.hpp │ │ ├── gpu_nms.pyx │ │ ├── nms_kernel.cu │ │ └── py_cpu_nms.py │ ├── roi_data_layer │ │ ├── __init__.py │ │ ├── layer.py │ │ ├── minibatch.py │ │ └── roidb.py │ ├── setup.py │ ├── transform │ │ ├── __init__.py │ │ └── torch_image_transform_layer.py │ └── utils │ │ ├── .gitignore │ │ ├── __init__.py │ │ ├── bbox.pyx │ │ ├── blob.py │ │ └── timer.py ├── notebook │ └── img │ │ └── rcnn_slide.jpg └── tools │ ├── _init_paths.py │ ├── eval_recall.py │ ├── reval.py │ ├── test_net.py │ └── train_net.py ├── Seminar7 ├── HW_GAN.ipynb ├── HW_textures_style.ipynb ├── README.md └── sem7.ipynb ├── Seminar8 ├── Autoencoder_structure.png ├── GS.py ├── README.md ├── VAE_homework.ipynb ├── __init__.py ├── lfw_dataset.py └── linear.png └── Seminar9 ├── Bonus-seminar.ipynb ├── Seminar9_en.ipynb ├── Seminar9_ru.ipynb └── oracle.py /.gitignore: -------------------------------------------------------------------------------- 1 | # node and NPM 2 | npm-debug.log 3 | node_modules 4 | 5 | # swap files 6 | *~ 7 | *.swp 8 | 9 | 10 | 11 | env.sh 12 | # Byte-compiled / optimized / DLL files 13 | __pycache__/ 14 | *.py[cod] 15 | 16 | # C extensions 17 | *.so 18 | 19 | # Distribution / packaging 20 | .Python 21 | env/ 22 | bin/ 23 | build/ 24 | develop-eggs/ 25 | dist/ 26 | eggs/ 27 | lib/ 28 | lib64/ 29 | parts/ 30 | sdist/ 31 | var/ 32 | *.egg-info/ 33 | .installed.cfg 34 | *.egg/ 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .coverage 44 | .cache 45 | nosetests.xml 46 | coverage.xml 47 | 48 | # Translations 49 | *.mo 50 | 51 | # Mr Developer 52 | .mr.developer.cfg 53 | .project 54 | .pydevproject 55 | .idea 56 | .ipynb_checkpoints 57 | 58 | # Rope 59 | .ropeproject 60 | 61 | # Django stuff: 62 | *.log 63 | *.pot 64 | 65 | # Sphinx documentation 66 | docs/_build/ 67 | docs/tmp* 68 | 69 | # OS X garbage 70 | .DS_Store 71 | 72 | # Debian things 73 | debian/reproducible-experiment-platform 74 | debian/files 75 | *.substvars 76 | *.debhelper.log -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM andrewosh/binder-base 2 | 3 | MAINTAINER Alexander Panin 4 | 5 | USER root 6 | 7 | RUN apt-get update 8 | RUN apt-get install -y htop 9 | RUN apt-get install -y unzip 10 | RUN apt-get install -y cmake 11 | 12 | USER main 13 | 14 | RUN pip install --upgrade https://github.com/Theano/Theano/archive/master.zip 15 | RUN pip install --upgrade https://github.com/Lasagne/Lasagne/archive/master.zip 16 | RUN pip install --upgrade https://github.com/yandexdataschool/AgentNet/archive/master.zip 17 | 18 | RUN /home/main/anaconda/envs/python3/bin/pip install --upgrade https://github.com/Theano/Theano/archive/master.zip 19 | RUN /home/main/anaconda/envs/python3/bin/pip install --upgrade https://github.com/Lasagne/Lasagne/archive/master.zip 20 | RUN /home/main/anaconda/envs/python3/bin/pip install --upgrade https://github.com/yandexdataschool/AgentNet/archive/master.zip 21 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2016 Yandex School of Data Analysis and contributors 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Deep Learning course 2 | Lecture and seminar materials for each week are in ./Seminar* folders 3 | 4 | Instant dive-in button: [![Binder](http://mybinder.org/badge.svg)](http://mybinder.org:/repo/ddtm/dl-course) 5 | (servers may be down time to time, sry) 6 | 7 | 8 | # Coordinates 9 | * YSDA every wednesday at 18-00 10 | * Skoltech TBA 11 | 12 | # Announcements 13 | * First lecture will happen on 8.02 -- or will it? 14 | 15 | # Syllabus 16 | - __week1__ lecturename 17 | - [ ] Lecture: Intro 18 | - [ ] Seminar: Unsupervised feature learning on faces dataset 19 | - [ ] HW due: xx.yy.zz, 23:59. 20 | - [ ] Please get bleeding edge theano+lasagne installed for the next seminar. 21 | - [Issue](https://github.com/yandexdataschool/HSE_deeplearning/issues/1) 22 | - [Linux Guidelines](http://agentnet.readthedocs.io/en/latest/user/install.html) 23 | 24 | # Contributors & course staff 25 | Course materials and teaching performed by 26 | - [Viktor Lempitsky](http://sites.skoltech.ru/compvision/members/vilem/) 27 | - [Dmitry Ulyanov](https://github.com/DmitryUlyanov) - seminars, homeworks 28 | - [Vadim Lebedev](https://github.com/vadim-v-lebedev) - seminars, homeworks 29 | - [Victor Yurchenko](https://github.com/simflin) - seminars, homeworks 30 | - [Just Heuristic](https://github.com/justheuristic/) - seminars, homeworks 31 | 32 | Contributors: 33 | - [Oleg Vasilev](https://github.com/Omrigan) - a lot of miscelaneous improvements 34 | - [Arseniy Ashukha](https://github.com/ars-ashuha) - image captioning, notes 35 | - [Mikhail Khalman](https://github.com/mihaha?tab=activity) - variational autoencoders, notes 36 | -------------------------------------------------------------------------------- /Seminar1/Classwork_ru.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Чем думает рыба?" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": { 14 | "collapsed": false 15 | }, 16 | "outputs": [], 17 | "source": [ 18 | "!wget https://github.com/goto-ru/Unsupervised_ML/raw/20779daf2aebca80bfe38401bc87cf41fc7b493d/03_zebrafish/zebrafish.npy -O zebrafish.npy\n", 19 | "#alternative link: https://www.dropbox.com/s/hhep0wj4c11qibu/zebrafish.npy?dl=1" 20 | ] 21 | }, 22 | { 23 | "cell_type": "markdown", 24 | "metadata": {}, 25 | "source": [ 26 | "# Данные\n", 27 | "\n", 28 | "* Сейчас в вашем распоряжении - данные о мозговой активности малька рыбы вида Danio Rerio https://en.wikipedia.org/wiki/Zebrafish .\n", 29 | "* Мальку введено вещество, которое светится от электрической активности (например, от спайков нейронов). Мальки почти прозрачны, поэтому такое свечение видно извне.\n", 30 | "* Сами данные содержат 240 фотографий головной части рыбки, на которых видна мозговая активность в каждой точке. Каждая фотография имеет размер __230 x 202__ пикселей\n", 31 | "* Ваша задача - попытаться восстановить структуру мозга рыбки. Для этого можно попытаться найти, например, группы нейронов, реагирующих вместе или с одинаковой частотой.\n", 32 | "* Никакой разметки в данных нет, поэтому вам придётся использовать методы понижения размерности и кластеризации, чтобы эффективно анализировать данные.\n", 33 | "\n", 34 | "![img](http://static1.squarespace.com/static/5355ec0de4b02760ee889a8f/t/5357cbfee4b03a3c7d9e4831/1398262791647/fish)" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": null, 40 | "metadata": { 41 | "collapsed": false 42 | }, 43 | "outputs": [], 44 | "source": [ 45 | "import numpy as np\n", 46 | "data = np.load(\"zebrafish.npy\")/255." 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": null, 52 | "metadata": { 53 | "collapsed": false 54 | }, 55 | "outputs": [], 56 | "source": [ 57 | "import matplotlib.pyplot as plt\n", 58 | "%matplotlib inline\n", 59 | "\n", 60 | "tick0 = data[:,0]\n", 61 | "tick0_image = tick0.reshape(230, 202)\n", 62 | "\n", 63 | "print \"размер 1 картинки:\", tick0_image.shape\n", 64 | "\n", 65 | "plt.imshow(tick0_image.T);" 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": null, 71 | "metadata": { 72 | "collapsed": false, 73 | "scrolled": true 74 | }, 75 | "outputs": [], 76 | "source": [ 77 | "#мини-библиотека для рисования рыбы\n", 78 | "from zebrafish_drawing_factory import draw_component\n", 79 | "\n", 80 | "draw_component(data[:,0])" 81 | ] 82 | }, 83 | { 84 | "cell_type": "markdown", 85 | "metadata": {}, 86 | "source": [ 87 | "# Временные ряды\n", 88 | "\n", 89 | "* Посмотрим на активность отдельных пикселей в течение времени:\n", 90 | "* Попробуйте вручную найти какие-то характерные группы нейронов" 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": null, 96 | "metadata": { 97 | "collapsed": false 98 | }, 99 | "outputs": [], 100 | "source": [ 101 | "import matplotlib.pyplot as plt\n", 102 | "%matplotlib inline\n", 103 | "plt.figure(figsize=[10,10])\n", 104 | "for i in range(0,240,10):\n", 105 | " plt.plot(data[i])\n" 106 | ] 107 | }, 108 | { 109 | "cell_type": "markdown", 110 | "metadata": { 111 | "collapsed": true 112 | }, 113 | "source": [ 114 | "# Поищем характерные группы нейронов\n", 115 | "\n", 116 | "Давайте разложим временные ряды активности нейронов при помощи метода главных компонент.\n", 117 | "\n", 118 | "__Важно!__ в этой части задания объектом выборки является временной ряд активности 1 точки на картинке, а не картинка целиком." 119 | ] 120 | }, 121 | { 122 | "cell_type": "code", 123 | "execution_count": null, 124 | "metadata": { 125 | "collapsed": false 126 | }, 127 | "outputs": [], 128 | "source": [ 129 | "from sklearn.decomposition import PCA\n", 130 | "\n", 131 | "pca = <создайте и обучите PCA с 20+ компонентами>" 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": null, 137 | "metadata": { 138 | "collapsed": false 139 | }, 140 | "outputs": [], 141 | "source": [ 142 | "data_pca = <преобразуйте данные в пространство главных компонент pca.transform>" 143 | ] 144 | }, 145 | { 146 | "cell_type": "markdown", 147 | "metadata": {}, 148 | "source": [ 149 | "## Визуализируем компоненты" 150 | ] 151 | }, 152 | { 153 | "cell_type": "code", 154 | "execution_count": null, 155 | "metadata": { 156 | "collapsed": false 157 | }, 158 | "outputs": [], 159 | "source": [ 160 | "draw_component(data_pca[:,1])" 161 | ] 162 | }, 163 | { 164 | "cell_type": "code", 165 | "execution_count": null, 166 | "metadata": { 167 | "collapsed": false 168 | }, 169 | "outputs": [], 170 | "source": [ 171 | "draw_component(data_pca[:,2])" 172 | ] 173 | }, 174 | { 175 | "cell_type": "code", 176 | "execution_count": null, 177 | "metadata": { 178 | "collapsed": false 179 | }, 180 | "outputs": [], 181 | "source": [ 182 | "from zebrafish_drawing_factory import draw_components\n", 183 | "\n", 184 | "draw_components(data_pca[:,2],data_pca[:,3])" 185 | ] 186 | }, 187 | { 188 | "cell_type": "markdown", 189 | "metadata": {}, 190 | "source": [ 191 | "# Поищем фичи" 192 | ] 193 | }, 194 | { 195 | "cell_type": "code", 196 | "execution_count": null, 197 | "metadata": { 198 | "collapsed": true 199 | }, 200 | "outputs": [], 201 | "source": [ 202 | "def extract_features(impulses):\n", 203 | " \"\"\"given time series(array) of region activity, compute some feature representation of those time series\n", 204 | " Ideas:\n", 205 | " - fourier transform\n", 206 | " - mean, variance and percentiles\n", 207 | " - sums of every k-th element with shift b\n", 208 | " \"\"\"\n", 209 | " features = []<любые фичи>\n", 210 | " return features" 211 | ] 212 | }, 213 | { 214 | "cell_type": "code", 215 | "execution_count": null, 216 | "metadata": { 217 | "collapsed": false 218 | }, 219 | "outputs": [], 220 | "source": [ 221 | "data_features = np.array(list(map(extract_features, data)))\n", 222 | "\n", 223 | "print \"shape:\",data_features.shape" 224 | ] 225 | }, 226 | { 227 | "cell_type": "code", 228 | "execution_count": null, 229 | "metadata": { 230 | "collapsed": false 231 | }, 232 | "outputs": [], 233 | "source": [ 234 | "from sklearn.decomposition import PCA\n", 235 | "\n", 236 | "pca = <обучи PCA>" 237 | ] 238 | }, 239 | { 240 | "cell_type": "code", 241 | "execution_count": null, 242 | "metadata": { 243 | "collapsed": true 244 | }, 245 | "outputs": [], 246 | "source": [ 247 | "data_pca = <преобразуй в пространство PCA>" 248 | ] 249 | }, 250 | { 251 | "cell_type": "code", 252 | "execution_count": null, 253 | "metadata": { 254 | "collapsed": false 255 | }, 256 | "outputs": [], 257 | "source": [ 258 | "<визуализируй полученные компоненты>\n", 259 | "draw_component(...)\n", 260 | "draw_components(...)" 261 | ] 262 | }, 263 | { 264 | "cell_type": "markdown", 265 | "metadata": {}, 266 | "source": [ 267 | "# Bonus: clustering in PCA space" 268 | ] 269 | }, 270 | { 271 | "cell_type": "code", 272 | "execution_count": null, 273 | "metadata": { 274 | "collapsed": true 275 | }, 276 | "outputs": [], 277 | "source": [ 278 | "from sklearn.cluster import KMeans\n", 279 | "from sklearn.mixture import GMM\n", 280 | "\n", 281 | "<покластеризуй области изображения на основе двух полученных PCA-представлений, используй любой метод на выбор>" 282 | ] 283 | }, 284 | { 285 | "cell_type": "code", 286 | "execution_count": null, 287 | "metadata": { 288 | "collapsed": true 289 | }, 290 | "outputs": [], 291 | "source": [ 292 | "cluster_ids = <предскажи номер кластера для каждого пикселя>" 293 | ] 294 | }, 295 | { 296 | "cell_type": "code", 297 | "execution_count": null, 298 | "metadata": { 299 | "collapsed": true 300 | }, 301 | "outputs": [], 302 | "source": [ 303 | "#cluster_ids должен содержать по 1 чиселке на пиксель\n", 304 | "assert np.prod(cluster_ids.shape) == (230*202)" 305 | ] 306 | }, 307 | { 308 | "cell_type": "code", 309 | "execution_count": null, 310 | "metadata": { 311 | "collapsed": true 312 | }, 313 | "outputs": [], 314 | "source": [ 315 | "plt.imshow(cluster_ids.reshape(230,202),cmap='spectral')" 316 | ] 317 | } 318 | ], 319 | "metadata": { 320 | "kernelspec": { 321 | "display_name": "Python [Root]", 322 | "language": "python", 323 | "name": "Python [Root]" 324 | }, 325 | "language_info": { 326 | "codemirror_mode": { 327 | "name": "ipython", 328 | "version": 2 329 | }, 330 | "file_extension": ".py", 331 | "mimetype": "text/x-python", 332 | "name": "python", 333 | "nbconvert_exporter": "python", 334 | "pygments_lexer": "ipython2", 335 | "version": "2.7.12" 336 | } 337 | }, 338 | "nbformat": 4, 339 | "nbformat_minor": 0 340 | } 341 | -------------------------------------------------------------------------------- /Seminar1/Homework 1 (Face Recognition).ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [ 10 | { 11 | "name": "stdout", 12 | "output_type": "stream", 13 | "text": [ 14 | "Populating the interactive namespace from numpy and matplotlib\n" 15 | ] 16 | } 17 | ], 18 | "source": [ 19 | "%pylab inline\n", 20 | "\n", 21 | "import numpy as np\n", 22 | "from matplotlib import pyplot as plt" 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "metadata": {}, 28 | "source": [ 29 | "# Face recognition" 30 | ] 31 | }, 32 | { 33 | "cell_type": "markdown", 34 | "metadata": {}, 35 | "source": [ 36 | "The goal of this seminar is to build two simple (anv very similar) face recognition pipelines using **`scikit-learn`** package. Overall, we'd like to explore different representations and see which one works better. " 37 | ] 38 | }, 39 | { 40 | "cell_type": "markdown", 41 | "metadata": {}, 42 | "source": [ 43 | "## Prepare dataset" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": 2, 49 | "metadata": { 50 | "collapsed": false 51 | }, 52 | "outputs": [ 53 | { 54 | "name": "stdout", 55 | "output_type": "stream", 56 | "text": [ 57 | "Dataset loaded.\n", 58 | " Image size : 32x32\n", 59 | " Train images : 280\n", 60 | " Test images : 120\n", 61 | " Number of classes : 40\n" 62 | ] 63 | } 64 | ], 65 | "source": [ 66 | "import scipy.io\n", 67 | "\n", 68 | "image_h, image_w = 32, 32\n", 69 | "\n", 70 | "data = scipy.io.loadmat('faces_data.mat')\n", 71 | "\n", 72 | "X_train = data['train_faces'].reshape((image_w, image_h, -1)).transpose((2, 1, 0)).reshape((-1, image_h * image_w))\n", 73 | "y_train = data['train_labels'] - 1\n", 74 | "X_test = data['test_faces'].reshape((image_w, image_h, -1)).transpose((2, 1, 0)).reshape((-1, image_h * image_w))\n", 75 | "y_test = data['test_labels'] - 1\n", 76 | "\n", 77 | "n_features = X_train.shape[1]\n", 78 | "n_train = len(y_train)\n", 79 | "n_test = len(y_test)\n", 80 | "n_classes = len(np.unique(y_train))\n", 81 | "\n", 82 | "print('Dataset loaded.')\n", 83 | "print(' Image size : {}x{}'.format(image_h, image_w))\n", 84 | "print(' Train images : {}'.format(n_train))\n", 85 | "print(' Test images : {}'.format(n_test))\n", 86 | "print(' Number of classes : {}'.format(n_classes))" 87 | ] 88 | }, 89 | { 90 | "cell_type": "markdown", 91 | "metadata": {}, 92 | "source": [ 93 | "Now we are going to plot some samples from the dataset using the provided helper function." 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": null, 99 | "metadata": { 100 | "collapsed": false 101 | }, 102 | "outputs": [], 103 | "source": [ 104 | "def plot_gallery(images, titles, h, w, n_row=3, n_col=6):\n", 105 | " \"\"\"Helper function to plot a gallery of portraits\"\"\"\n", 106 | " plt.figure(figsize=(1.5 * n_col, 1.7 * n_row))\n", 107 | " plt.subplots_adjust(bottom=0, left=.01, right=.99, top=.90, hspace=.35)\n", 108 | " for i in range(n_row * n_col):\n", 109 | " plt.subplot(n_row, n_col, i + 1)\n", 110 | " plt.imshow(images[i].reshape((h, w)), cmap=plt.cm.gray, interpolation='nearest')\n", 111 | " plt.title(titles[i], size=12)\n", 112 | " plt.xticks(())\n", 113 | " plt.yticks(())" 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": null, 119 | "metadata": { 120 | "collapsed": false 121 | }, 122 | "outputs": [], 123 | "source": [ 124 | "titles = [str(y[0]) for y in y_train]\n", 125 | "\n", 126 | "plot_gallery(X_train, titles, image_h, image_w)" 127 | ] 128 | }, 129 | { 130 | "cell_type": "markdown", 131 | "metadata": {}, 132 | "source": [ 133 | "## Nearest Neighbour baseline" 134 | ] 135 | }, 136 | { 137 | "cell_type": "markdown", 138 | "metadata": {}, 139 | "source": [ 140 | "The simplest way to do face recognition is to treat raw pixels as features and perform **Nearest Neighbor Search** in the Euclidean space. Let's use **`KNeighborsClassifier`** class." 141 | ] 142 | }, 143 | { 144 | "cell_type": "code", 145 | "execution_count": null, 146 | "metadata": { 147 | "collapsed": false 148 | }, 149 | "outputs": [], 150 | "source": [ 151 | "from sklearn.neighbors import KNeighborsClassifier\n", 152 | "\n", 153 | "# Use KNeighborsClassifier to calculate test score for the Nearest Neighbour classifier.\n", 154 | "\n", 155 | "print('Test score: {}'.format(test_score))" 156 | ] 157 | }, 158 | { 159 | "cell_type": "markdown", 160 | "metadata": {}, 161 | "source": [ 162 | "Not very imperssive, is it?" 163 | ] 164 | }, 165 | { 166 | "cell_type": "markdown", 167 | "metadata": {}, 168 | "source": [ 169 | "## Eigenfaces" 170 | ] 171 | }, 172 | { 173 | "cell_type": "markdown", 174 | "metadata": {}, 175 | "source": [ 176 | "All the dirty work will be done by the scikit-learn package. First we need to learn a dictionary of codewords. For that we preprocess the training set by making each face normalized (zero mean and unit variance).." 177 | ] 178 | }, 179 | { 180 | "cell_type": "code", 181 | "execution_count": null, 182 | "metadata": { 183 | "collapsed": true 184 | }, 185 | "outputs": [], 186 | "source": [ 187 | "# Populate variable 'X_train_processed' with samples each of which has zero mean and unit variance." 188 | ] 189 | }, 190 | { 191 | "cell_type": "markdown", 192 | "metadata": {}, 193 | "source": [ 194 | "Now we are going to apply **PCA** to obtain a dictionary of codewords. \n", 195 | "**`RamdomizedPCA`** class is what we need." 196 | ] 197 | }, 198 | { 199 | "cell_type": "code", 200 | "execution_count": null, 201 | "metadata": { 202 | "collapsed": false 203 | }, 204 | "outputs": [], 205 | "source": [ 206 | "from sklearn.decomposition import RandomizedPCA\n", 207 | "\n", 208 | "n_components = 64\n", 209 | "\n", 210 | "# Populate 'pca' with a trained instance of RamdomizedPCA." 211 | ] 212 | }, 213 | { 214 | "cell_type": "markdown", 215 | "metadata": {}, 216 | "source": [ 217 | "We plot a bunch of principal components." 218 | ] 219 | }, 220 | { 221 | "cell_type": "code", 222 | "execution_count": null, 223 | "metadata": { 224 | "collapsed": false 225 | }, 226 | "outputs": [], 227 | "source": [ 228 | "# Visualize principal components." 229 | ] 230 | }, 231 | { 232 | "cell_type": "markdown", 233 | "metadata": {}, 234 | "source": [ 235 | "This time we don't have any restriction on number of non-zero coefficients in the vector decomposition, so the codes are not sparse anymore:" 236 | ] 237 | }, 238 | { 239 | "cell_type": "code", 240 | "execution_count": null, 241 | "metadata": { 242 | "collapsed": false 243 | }, 244 | "outputs": [], 245 | "source": [ 246 | "# Transform training data and plot decomposition coefficients." 247 | ] 248 | }, 249 | { 250 | "cell_type": "markdown", 251 | "metadata": {}, 252 | "source": [ 253 | "Train an SVM and apply it to the encoded test data." 254 | ] 255 | }, 256 | { 257 | "cell_type": "code", 258 | "execution_count": null, 259 | "metadata": { 260 | "collapsed": false 261 | }, 262 | "outputs": [], 263 | "source": [ 264 | "# Populate 'test_score' with test accuracy of an SVM classifier.\n", 265 | "\n", 266 | "print('Test score: {}'.format(test_score))" 267 | ] 268 | }, 269 | { 270 | "cell_type": "markdown", 271 | "metadata": {}, 272 | "source": [ 273 | "How many components are sufficient to reach the same accuracy level?" 274 | ] 275 | }, 276 | { 277 | "cell_type": "code", 278 | "execution_count": null, 279 | "metadata": { 280 | "collapsed": false 281 | }, 282 | "outputs": [], 283 | "source": [ 284 | "n_components = [1, 2, 4, 8, 16, 32, 64]\n", 285 | "accuracy = []\n", 286 | "\n", 287 | "# Try different numbers of components and populate 'accuracy' list.\n", 288 | " \n", 289 | "plt.figure(figsize=(10, 6))\n", 290 | "plt.plot(n_nonzero, accuracy)\n", 291 | "\n", 292 | "print('Max accuracy: {}'.format(max(accuracy)))" 293 | ] 294 | } 295 | ], 296 | "metadata": { 297 | "kernelspec": { 298 | "display_name": "Python 2", 299 | "language": "python", 300 | "name": "python2" 301 | }, 302 | "language_info": { 303 | "codemirror_mode": { 304 | "name": "ipython", 305 | "version": 2 306 | }, 307 | "file_extension": ".py", 308 | "mimetype": "text/x-python", 309 | "name": "python", 310 | "nbconvert_exporter": "python", 311 | "pygments_lexer": "ipython2", 312 | "version": "2.7.6" 313 | } 314 | }, 315 | "nbformat": 4, 316 | "nbformat_minor": 0 317 | } 318 | -------------------------------------------------------------------------------- /Seminar1/faces_data.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ddtm/dl-course/9b04d2dda741c0786a9de40a7dfce89d06d0487e/Seminar1/faces_data.mat -------------------------------------------------------------------------------- /Seminar1/zebrafish_drawing_factory.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | import matplotlib.pyplot as plt 3 | import matplotlib.cm as cm 4 | def preparePlot(xticks, yticks, figsize=(10.5, 6), hideLabels=False, gridColor='#999999', 5 | gridWidth=1.0): 6 | """Template for generating the plot layout.""" 7 | plt.close() 8 | fig, ax = plt.subplots(figsize=figsize, facecolor='white', edgecolor='white') 9 | ax.axes.tick_params(labelcolor='#999999', labelsize='10') 10 | for axis, ticks in [(ax.get_xaxis(), xticks), (ax.get_yaxis(), yticks)]: 11 | axis.set_ticks_position('none') 12 | axis.set_ticks(ticks) 13 | axis.label.set_color('#999999') 14 | if hideLabels: axis.set_ticklabels([]) 15 | plt.grid(color=gridColor, linewidth=gridWidth, linestyle='-') 16 | map(lambda position: ax.spines[position].set_visible(False), ['bottom', 'top', 'left', 'right']) 17 | return fig, ax 18 | 19 | def draw_component(component): 20 | 21 | image = component.reshape(230, 202).T 22 | 23 | fig, ax = preparePlot(numpy.arange(0, 10, 1), numpy.arange(0, 10, 1), figsize=(9.0, 7.2), hideLabels=True) 24 | ax.grid(False) 25 | image = plt.imshow(image,interpolation='nearest', aspect='auto', cmap=cm.gray) 26 | plt.show() 27 | 28 | # Adapted from python-thunder's Colorize.transform where cmap='polar'. 29 | # Checkout the library at: https://github.com/thunder-project/thunder and 30 | # http://thunder-project.org/ 31 | import numpy as np 32 | def polarTransform(scale, img): 33 | """Convert points from cartesian to polar coordinates and map to colors.""" 34 | from matplotlib.colors import hsv_to_rgb 35 | 36 | 37 | img = np.asarray(img) 38 | dims = img.shape 39 | 40 | phi = ((np.arctan2(-img[0], -img[1]) + np.pi/2) % (np.pi*2)) / (2 * np.pi) 41 | rho = np.sqrt(img[0]**2 + img[1]**2) 42 | saturation = np.ones((dims[1], dims[2])) 43 | 44 | out = hsv_to_rgb(np.dstack((phi, saturation, scale * rho))) 45 | 46 | return np.clip(out * scale, 0, 1) 47 | 48 | def draw_components(*components): 49 | assert len(components)==2,"this method only accepts 2 components at once" 50 | components = [i.reshape(230, 202).T for i in components] 51 | # Use the same transformation on the image data 52 | # Try changing the first parameter to lower values 53 | brainmap = polarTransform(2.0, components) 54 | 55 | # generate layout and plot data 56 | fig, ax = preparePlot(np.arange(0, 10, 1), np.arange(0, 10, 1), figsize=(9.0, 7.2), hideLabels=True) 57 | ax.grid(False) 58 | image = plt.imshow(brainmap,interpolation='nearest', aspect='auto') 59 | plt.show() -------------------------------------------------------------------------------- /Seminar10/README.md: -------------------------------------------------------------------------------- 1 | More materials 2 | - http://karpathy.github.io/2015/05/21/rnn-effectiveness/ 3 | - [moar advanced slides](http://www.machinelearning.ru/wiki/images/6/6c/RNN_and_LSTM_16102015.pdf) by bayesgroup 4 | - [random example](https://larseidnes.com/2015/10/13/auto-generating-clickbait-with-recurrent-neural-networks/) is random 5 | -------------------------------------------------------------------------------- /Seminar10/codex/Arbitrazhnyj_processualbnyj_kodeks_RF.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ddtm/dl-course/9b04d2dda741c0786a9de40a7dfce89d06d0487e/Seminar10/codex/Arbitrazhnyj_processualbnyj_kodeks_RF.txt -------------------------------------------------------------------------------- /Seminar10/codex/Grazhdanskij_kodeks_RF._Chastb_pervaja.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ddtm/dl-course/9b04d2dda741c0786a9de40a7dfce89d06d0487e/Seminar10/codex/Grazhdanskij_kodeks_RF._Chastb_pervaja.txt -------------------------------------------------------------------------------- /Seminar10/codex/Grazhdanskij_kodeks_RF._Chastb_tretbja.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ddtm/dl-course/9b04d2dda741c0786a9de40a7dfce89d06d0487e/Seminar10/codex/Grazhdanskij_kodeks_RF._Chastb_tretbja.txt -------------------------------------------------------------------------------- /Seminar10/codex/Grazhdanskij_kodeks_RF._Chastb_vtoraja.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ddtm/dl-course/9b04d2dda741c0786a9de40a7dfce89d06d0487e/Seminar10/codex/Grazhdanskij_kodeks_RF._Chastb_vtoraja.txt -------------------------------------------------------------------------------- /Seminar10/codex/Grazhdanskij_kodeks_RF.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ddtm/dl-course/9b04d2dda741c0786a9de40a7dfce89d06d0487e/Seminar10/codex/Grazhdanskij_kodeks_RF.txt -------------------------------------------------------------------------------- /Seminar10/codex/Kodeks_RF_ob_administrativnyh_pravonarushenijah.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ddtm/dl-course/9b04d2dda741c0786a9de40a7dfce89d06d0487e/Seminar10/codex/Kodeks_RF_ob_administrativnyh_pravonarushenijah.txt -------------------------------------------------------------------------------- /Seminar10/codex/Konstitucija_RF.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ddtm/dl-course/9b04d2dda741c0786a9de40a7dfce89d06d0487e/Seminar10/codex/Konstitucija_RF.txt -------------------------------------------------------------------------------- /Seminar10/codex/Tamozhennyj_kodeks_RF.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ddtm/dl-course/9b04d2dda741c0786a9de40a7dfce89d06d0487e/Seminar10/codex/Tamozhennyj_kodeks_RF.txt -------------------------------------------------------------------------------- /Seminar10/codex/Ustav_Patrulbno-Postovoj_Sluzhby_Milicii_Obwestvennoj_Bezopasnosti_RF.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ddtm/dl-course/9b04d2dda741c0786a9de40a7dfce89d06d0487e/Seminar10/codex/Ustav_Patrulbno-Postovoj_Sluzhby_Milicii_Obwestvennoj_Bezopasnosti_RF.txt -------------------------------------------------------------------------------- /Seminar10/codex/Zakon_o_milicii.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ddtm/dl-course/9b04d2dda741c0786a9de40a7dfce89d06d0487e/Seminar10/codex/Zakon_o_milicii.txt -------------------------------------------------------------------------------- /Seminar10/codex/Zakon_ob_avtorskom_prave_i_smezhnyh_pravah.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ddtm/dl-course/9b04d2dda741c0786a9de40a7dfce89d06d0487e/Seminar10/codex/Zakon_ob_avtorskom_prave_i_smezhnyh_pravah.txt -------------------------------------------------------------------------------- /Seminar10/data_copyright: -------------------------------------------------------------------------------- 1 | @names 2 | # Copyright (c) January 1991 by Mark Kantrowitz. 3 | # Thanks to Bill Ross for about 1000 additional names. 4 | # Version 1.3 (29-MAR-94) 5 | 6 | @mtg cards 7 | https://mtgjson.com/ 8 | 9 | -------------------------------------------------------------------------------- /Seminar11/broadcast.py: -------------------------------------------------------------------------------- 1 | from agentnet.utils.format import check_list 2 | from lasagne.layers import Layer 3 | import numpy as np 4 | 5 | 6 | class BroadcastLayer(Layer): 7 | """ 8 | Merges certain axes of network into first (batch) axis to allow broadcasting over them. 9 | :param incoming: layer to be broadcasted 10 | :type incoming: Layer 11 | :param broadcasted_axes: an axis (or axes) to be broadcasted 12 | :type broadcasted_axes: int or tuple of int 13 | :force_broadcastable_batch: if True, raises an eror whenever batch (0'th) axis is not included in broadcasted_axes 14 | 15 | """ 16 | 17 | def __init__(self, incoming, broadcasted_axes, force_broadcastable_batch=True, **kwargs): 18 | 19 | self.incoming_ndim = len(incoming.output_shape) 20 | 21 | # axes that are to be broadcasted -- in ascending order 22 | # ax%self.incoming_ndim is used to replace negative axes with N-ax+1 so that -1 becomes last axis 23 | self.broadcasted_axes = sorted([ax % self.incoming_ndim for ax in check_list(broadcasted_axes)]) 24 | 25 | # sanity checks 26 | assert max(self.broadcasted_axes) < self.incoming_ndim 27 | assert len(self.broadcasted_axes) > 0 28 | if force_broadcastable_batch and (0 not in self.broadcasted_axes): 29 | raise ValueError("BroadcastLayer was asked NOT to broadcast over batch (0'th) axis.\n" 30 | "If you know what you're doing, set force_broadcastable_batch=False.\n" 31 | "Otherwise just add 0 to the broadcasted_axes") 32 | 33 | # axed that are NOT broadcasted = all other axes in respective order 34 | self.non_broadcasted_axes = [ax for ax in range(self.incoming_ndim) if ax not in self.broadcasted_axes] 35 | 36 | 37 | super(BroadcastLayer, self).__init__(incoming, **kwargs) 38 | 39 | def get_output_for(self, input, **kwargs): 40 | """ 41 | performs theanic magic (see layer description) 42 | :param input: activation to be reshaped into broadcastable shape 43 | :param kwargs: no effect 44 | :return: symbolic expression for reshaped layer activation 45 | """ 46 | 47 | # save symbolic input shape for unbroadcaster 48 | self.symbolic_input_shape = input.shape 49 | 50 | # dimshuffle so that the new order is [ all_broadcasted_axes, all_non_broadcasted_axes] 51 | 52 | input = input.dimshuffle(self.broadcasted_axes + self.non_broadcasted_axes) 53 | 54 | # flatten broadcasted axes into a single axis 55 | input = input.reshape((-1,) + tuple(input.shape[len(self.broadcasted_axes):])) 56 | 57 | # now shape should be [ product(broadcasted_axes_shapes), non_broadcasted_axes ] 58 | 59 | return input 60 | 61 | def get_output_shape_for(self, input_shape): 62 | 63 | broadcasted_shapes = [input_shape[ax] for ax in self.broadcasted_axes] 64 | 65 | if None not in broadcasted_shapes: 66 | new_batch_size = np.prod(broadcasted_shapes) 67 | else: 68 | new_batch_size = None 69 | 70 | non_broadcasted_shapes = tuple(input_shape[ax] for ax in self.non_broadcasted_axes) 71 | 72 | return (new_batch_size,) + non_broadcasted_shapes 73 | 74 | 75 | class UnbroadcastLayer(Layer): 76 | """ 77 | Does the inverse of BroadcastLayer 78 | :param incoming: a layer to be unbroadcasted. (!) Must have same number of dimensions as before broadcasting 79 | :type incoming: Layer 80 | :param broadcast_layer: a broadcasting to be be undone 81 | :type broadcast_layer: BroadcastLayer 82 | 83 | """ 84 | 85 | def __init__(self, incoming, broadcast_layer, **kwargs): 86 | self.broadcast_layer = broadcast_layer 87 | 88 | #assert that dimensionality is same as before broadcast 89 | assert len(incoming.output_shape) == len(self.broadcast_layer.output_shape) 90 | 91 | super(UnbroadcastLayer, self).__init__(incoming, **kwargs) 92 | 93 | def get_output_for(self, input, **kwargs): 94 | """ 95 | Un-broadcasts the broadcast layer (see class description) 96 | :param input: input tensor 97 | :param kwargs: no effect 98 | :return: un-broadcasted tensor 99 | """ 100 | 101 | if not hasattr(self.broadcast_layer,"symbolic_input_shape"): 102 | raise ValueError("UnbroadcastLayer.get_output_for must be called after respective BroadcastLayer.get_output_for") 103 | 104 | # symbolic shape. dirty hack to handle "None" axes 105 | pre_broadcast_shape = self.broadcast_layer.symbolic_input_shape 106 | 107 | broadcasted_axes_shapes = tuple(pre_broadcast_shape[ax] for ax in self.broadcast_layer.broadcasted_axes) 108 | 109 | # convert shape from [bc_ax0*bc_ax1*.., non_bc_ax0, non_bc_ax1,...] to [bc_ax0,bc_ax1,...,non_bc_ax0,non_bc_ax1,...] 110 | unrolled_shape = broadcasted_axes_shapes + tuple(input.shape)[1:] 111 | input = input.reshape(unrolled_shape) 112 | 113 | # rearrange axes to their order before broadcasting 114 | current_dim_order = self.broadcast_layer.broadcasted_axes + self.broadcast_layer.non_broadcasted_axes 115 | 116 | dimshuffle_order = [current_dim_order.index(i) for i in range(len(current_dim_order))] 117 | 118 | return input.dimshuffle(dimshuffle_order) 119 | 120 | 121 | def get_output_shape_for(self, input_shape, **kwargs): 122 | 123 | new_non_broadcast_shapes = input_shape[1:] 124 | 125 | # this one is NOT symbolic. list() is used as a shallow copy op. 126 | original_shape = list(self.broadcast_layer.input_shape) 127 | 128 | # set new non-broadcasted axes shapes instead of old ones 129 | for ax,new_ax_shape in zip(self.broadcast_layer.non_broadcasted_axes, 130 | new_non_broadcast_shapes): 131 | original_shape[ax] = new_ax_shape 132 | 133 | #return updated shape 134 | return tuple(original_shape) -------------------------------------------------------------------------------- /Seminar11/pretrained_lenet.py: -------------------------------------------------------------------------------- 1 | from lasagne.layers import InputLayer 2 | from lasagne.layers import DenseLayer 3 | from lasagne.layers import ConcatLayer 4 | from lasagne.layers import NonlinearityLayer 5 | from lasagne.layers import GlobalPoolLayer 6 | from lasagne.layers import Conv2DLayer as ConvLayer 7 | from lasagne.layers import MaxPool2DLayer as PoolLayerDNN 8 | from lasagne.layers import MaxPool2DLayer as PoolLayer 9 | from lasagne.layers import LocalResponseNormalization2DLayer as LRNLayer 10 | from lasagne.nonlinearities import softmax, linear 11 | 12 | 13 | def build_inception_module(name, input_layer, nfilters): 14 | # nfilters: (pool_proj, 1x1, 3x3_reduce, 3x3, 5x5_reduce, 5x5) 15 | net = {} 16 | net['pool'] = PoolLayerDNN(input_layer, pool_size=3, stride=1, pad=1) 17 | net['pool_proj'] = ConvLayer(net['pool'], nfilters[0], 1) 18 | 19 | net['1x1'] = ConvLayer(input_layer, nfilters[1], 1) 20 | 21 | net['3x3_reduce'] = ConvLayer(input_layer, nfilters[2], 1) 22 | net['3x3'] = ConvLayer(net['3x3_reduce'], nfilters[3], 3, pad=1) 23 | 24 | net['5x5_reduce'] = ConvLayer(input_layer, nfilters[4], 1) 25 | net['5x5'] = ConvLayer(net['5x5_reduce'], nfilters[5], 5, pad=2) 26 | 27 | net['output'] = ConcatLayer([ 28 | net['1x1'], 29 | net['3x3'], 30 | net['5x5'], 31 | net['pool_proj'], 32 | ]) 33 | 34 | return {'{}/{}'.format(name, k): v for k, v in net.items()} 35 | 36 | 37 | def build_model(): 38 | net = {} 39 | net['input'] = InputLayer((None, 3, None, None)) 40 | net['conv1/7x7_s2'] = ConvLayer(net['input'], 64, 7, stride=2, pad=3) 41 | net['pool1/3x3_s2'] = PoolLayer(net['conv1/7x7_s2'], 42 | pool_size=3, 43 | stride=2, 44 | ignore_border=False) 45 | net['pool1/norm1'] = LRNLayer(net['pool1/3x3_s2'], alpha=0.00002, k=1) 46 | net['conv2/3x3_reduce'] = ConvLayer(net['pool1/norm1'], 64, 1) 47 | net['conv2/3x3'] = ConvLayer(net['conv2/3x3_reduce'], 192, 3, pad=1) 48 | net['conv2/norm2'] = LRNLayer(net['conv2/3x3'], alpha=0.00002, k=1) 49 | net['pool2/3x3_s2'] = PoolLayer(net['conv2/norm2'], pool_size=3, stride=2) 50 | 51 | net.update(build_inception_module('inception_3a', 52 | net['pool2/3x3_s2'], 53 | [32, 64, 96, 128, 16, 32])) 54 | net.update(build_inception_module('inception_3b', 55 | net['inception_3a/output'], 56 | [64, 128, 128, 192, 32, 96])) 57 | net['pool3/3x3_s2'] = PoolLayer(net['inception_3b/output'], 58 | pool_size=3, stride=2) 59 | 60 | net.update(build_inception_module('inception_4a', 61 | net['pool3/3x3_s2'], 62 | [64, 192, 96, 208, 16, 48])) 63 | net.update(build_inception_module('inception_4b', 64 | net['inception_4a/output'], 65 | [64, 160, 112, 224, 24, 64])) 66 | net.update(build_inception_module('inception_4c', 67 | net['inception_4b/output'], 68 | [64, 128, 128, 256, 24, 64])) 69 | net.update(build_inception_module('inception_4d', 70 | net['inception_4c/output'], 71 | [64, 112, 144, 288, 32, 64])) 72 | net.update(build_inception_module('inception_4e', 73 | net['inception_4d/output'], 74 | [128, 256, 160, 320, 32, 128])) 75 | net['pool4/3x3_s2'] = PoolLayer(net['inception_4e/output'], 76 | pool_size=3, stride=2) 77 | 78 | net.update(build_inception_module('inception_5a', 79 | net['pool4/3x3_s2'], 80 | [128, 256, 160, 320, 32, 128])) 81 | net.update(build_inception_module('inception_5b', 82 | net['inception_5a/output'], 83 | [128, 384, 192, 384, 48, 128])) 84 | 85 | net['pool5/7x7_s1'] = GlobalPoolLayer(net['inception_5b/output']) 86 | net['loss3/classifier'] = DenseLayer(net['pool5/7x7_s1'], 87 | num_units=1000, 88 | nonlinearity=linear) 89 | net['prob'] = NonlinearityLayer(net['loss3/classifier'], 90 | nonlinearity=softmax) 91 | return net 92 | 93 | 94 | import skimage.transform 95 | import numpy as np 96 | MEAN_VALUES = np.array([104, 117, 123]).reshape((3,1,1)) 97 | def preprocess(im): 98 | if len(im.shape) == 2: 99 | im = im[:, :, np.newaxis] 100 | im = np.repeat(im, 3, axis=2) 101 | # Resize so smallest dim = 224, preserving aspect ratio 102 | h, w, _ = im.shape 103 | if h < w: 104 | im = skimage.transform.resize(im, (224, w*224//h), preserve_range=True) 105 | else: 106 | im = skimage.transform.resize(im, (h*224//w, 224), preserve_range=True) 107 | 108 | # Central crop to 224x224 109 | h, w, _ = im.shape 110 | im = im[h//2-112:h//2+112, w//2-112:w//2+112] 111 | 112 | rawim = np.copy(im).astype('uint8') 113 | 114 | # Shuffle axes to c01 115 | im = np.swapaxes(np.swapaxes(im, 1, 2), 0, 1) 116 | 117 | # Convert to BGR 118 | im = im[::-1, :, :] 119 | 120 | im = im - MEAN_VALUES 121 | return im[np.newaxis].astype('float32') 122 | -------------------------------------------------------------------------------- /Seminar2/README.md: -------------------------------------------------------------------------------- 1 | Materials you may want to view: 2 | - [main stuff from cs231](http://cs231n.github.io/linear-classify/) 3 | - [wikipedia :)](https://en.wikipedia.org/wiki/Stochastic_gradient_descent), expecially the "extensions and variants" section 4 | - [RMSPROP video](https://www.youtube.com/watch?v=defQQqkXEfE) 5 | -------------------------------------------------------------------------------- /Seminar3/HW3_Differentiation.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Home work 3: Differentiation " 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "Since it easy to google every task please please please try to undestand what's going on. The \"just answer\" thing will be not counted, make sure to present derivation of your solution. It is absolutely OK if you found an answer on web then just exercise in $\\LaTeX$ copying it into here." 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "Useful links: \n", 22 | "[1](http://www.machinelearning.ru/wiki/images/2/2a/Matrix-Gauss.pdf)\n", 23 | "[2](http://www.atmos.washington.edu/~dennis/MatrixCalculus.pdf)\n", 24 | "[3](http://cal.cs.illinois.edu/~johannes/research/matrix%20calculus.pdf)\n", 25 | "[4](http://research.microsoft.com/en-us/um/people/cmbishop/prml/index.htm)" 26 | ] 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "metadata": {}, 31 | "source": [ 32 | "## ex. 1" 33 | ] 34 | }, 35 | { 36 | "cell_type": "markdown", 37 | "metadata": {}, 38 | "source": [ 39 | "$$ \n", 40 | "y = x^Tx, \\quad x \\in \\mathbb{R}^N \n", 41 | "$$" 42 | ] 43 | }, 44 | { 45 | "cell_type": "markdown", 46 | "metadata": {}, 47 | "source": [ 48 | "$$\n", 49 | "\\frac{dy}{dx} = \n", 50 | "$$ " 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": null, 56 | "metadata": { 57 | "collapsed": true 58 | }, 59 | "outputs": [], 60 | "source": [] 61 | }, 62 | { 63 | "cell_type": "markdown", 64 | "metadata": {}, 65 | "source": [ 66 | "## ex. 2" 67 | ] 68 | }, 69 | { 70 | "cell_type": "markdown", 71 | "metadata": {}, 72 | "source": [ 73 | "$$ y = tr(AB) \\quad A,B \\in \\mathbb{R}^{N \\times N} $$ " 74 | ] 75 | }, 76 | { 77 | "cell_type": "markdown", 78 | "metadata": {}, 79 | "source": [ 80 | "$$\n", 81 | "\\frac{dy}{dA} =\n", 82 | "$$" 83 | ] 84 | }, 85 | { 86 | "cell_type": "code", 87 | "execution_count": null, 88 | "metadata": { 89 | "collapsed": true 90 | }, 91 | "outputs": [], 92 | "source": [] 93 | }, 94 | { 95 | "cell_type": "markdown", 96 | "metadata": {}, 97 | "source": [ 98 | "## ex. 3" 99 | ] 100 | }, 101 | { 102 | "cell_type": "markdown", 103 | "metadata": {}, 104 | "source": [ 105 | "$$ \n", 106 | "y = x^TAc , \\quad A\\in \\mathbb{R}^{N \\times N}, x\\in \\mathbb{R}^{N}, c\\in \\mathbb{R}^{N} \n", 107 | "$$" 108 | ] 109 | }, 110 | { 111 | "cell_type": "markdown", 112 | "metadata": {}, 113 | "source": [ 114 | "$$\n", 115 | "\\frac{dy}{dx} =\n", 116 | "$$" 117 | ] 118 | }, 119 | { 120 | "cell_type": "markdown", 121 | "metadata": {}, 122 | "source": [ 123 | "$$\n", 124 | "\\frac{dy}{dA} =\n", 125 | "$$ " 126 | ] 127 | }, 128 | { 129 | "cell_type": "markdown", 130 | "metadata": {}, 131 | "source": [ 132 | "Hint for the latter (one of the ways): use *ex. 2* result and the fact \n", 133 | "$$\n", 134 | "tr(ABC) = tr (CAB)\n", 135 | "$$" 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": null, 141 | "metadata": { 142 | "collapsed": true 143 | }, 144 | "outputs": [], 145 | "source": [] 146 | }, 147 | { 148 | "cell_type": "markdown", 149 | "metadata": {}, 150 | "source": [ 151 | "## ex. 4" 152 | ] 153 | }, 154 | { 155 | "cell_type": "markdown", 156 | "metadata": {}, 157 | "source": [ 158 | "Classic matrix factorization example. Given matrix $X$ you need to find $A$, $S$ to approximate $X$. This can be done by simple gradient descent iteratively alternating $A$ and $S$ updates.\n", 159 | "$$\n", 160 | "J = || X - AS ||_2^2 , \\quad A\\in \\mathbb{R}^{N \\times R} , \\quad S\\in \\mathbb{R}^{R \\times M}\n", 161 | "$$\n", 162 | "$$\n", 163 | "\\frac{dJ}{dS} = ? \n", 164 | "$$ " 165 | ] 166 | }, 167 | { 168 | "cell_type": "markdown", 169 | "metadata": {}, 170 | "source": [ 171 | "### First approach\n", 172 | "Using ex.2 and the fact:\n", 173 | "$$\n", 174 | "|| X ||_2^2 = tr(XX^T) \n", 175 | "$$ \n", 176 | "it is easy to derive gradients (you can find it in one of the refs). " 177 | ] 178 | }, 179 | { 180 | "cell_type": "markdown", 181 | "metadata": { 182 | "collapsed": true 183 | }, 184 | "source": [ 185 | "### Second approach\n", 186 | "You can use *slightly different techniques* if they suits you. Take a look at this derivation:\n", 187 | "\n", 188 | "(excerpt from [Handbook of blind source separation, Jutten, page 517](https://books.google.ru/books?id=PTbj03bYH6kC&printsec=frontcover&dq=Handbook+of+Blind+Source+Separation&hl=en&sa=X&ved=0ahUKEwi-q_apiJDLAhULvXIKHVXJDWcQ6AEIHDAA#v=onepage&q=Handbook%20of%20Blind%20Source%20Separation&f=false), open for better picture)." 189 | ] 190 | }, 191 | { 192 | "cell_type": "markdown", 193 | "metadata": { 194 | "collapsed": true 195 | }, 196 | "source": [ 197 | "### Third approach\n", 198 | "And finally we can use chain rule! **YOUR TURN** to do it.\n", 199 | "let $ F = AS $ \n", 200 | "\n", 201 | "**Find**\n", 202 | "$$\n", 203 | "\\frac{dJ}{dF} = \n", 204 | "$$ \n", 205 | "and \n", 206 | "$$\n", 207 | "\\frac{dF}{dS} = \n", 208 | "$$ \n", 209 | "(the shape should be $ NM \\times RM )$.\n", 210 | "\n", 211 | "Now it is easy do get desired gradients:\n", 212 | "$$\n", 213 | "\\frac{dJ}{dS} = \n", 214 | "$$ " 215 | ] 216 | }, 217 | { 218 | "cell_type": "code", 219 | "execution_count": null, 220 | "metadata": { 221 | "collapsed": true 222 | }, 223 | "outputs": [], 224 | "source": [] 225 | } 226 | ], 227 | "metadata": { 228 | "kernelspec": { 229 | "display_name": "Python 2", 230 | "language": "python", 231 | "name": "python2" 232 | }, 233 | "language_info": { 234 | "codemirror_mode": { 235 | "name": "ipython", 236 | "version": 2 237 | }, 238 | "file_extension": ".py", 239 | "mimetype": "text/x-python", 240 | "name": "python", 241 | "nbconvert_exporter": "python", 242 | "pygments_lexer": "ipython2", 243 | "version": "2.7.11" 244 | } 245 | }, 246 | "nbformat": 4, 247 | "nbformat_minor": 0 248 | } 249 | -------------------------------------------------------------------------------- /Seminar3/README.md: -------------------------------------------------------------------------------- 1 | Materials 2 | - [Backprop by cs231](http://cs231n.github.io/optimization-2/) 3 | - [Notation](http://cs231n.github.io/neural-networks-1/#nn) 4 | - pretty much all the module 1 of http://cs231n.github.io/ 5 | -------------------------------------------------------------------------------- /Seminar3/Seminar3_Differentiation.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Seminar 3: Differentiation" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "Let's go from simple to complex. \n", 15 | "\n", 16 | "Consider a function $ y = f(x) $ and we want to find its derivative\n", 17 | "- let $ f : \\mathbb{R} \\rightarrow \\mathbb{R} $, everyone knows what to do\n", 18 | "- let $ f : \\mathbb{R}^n \\rightarrow \\mathbb{R} $\n", 19 | " \n", 20 | " now x is a vector $ [x_1, x_2, \\dots x_n] $. The gradient is defined as a vector of partial direvatives\n", 21 | " $$ \\frac{df}{dx} = [\\frac{\\partial f}{\\partial x_1}, \\frac{\\partial f}{\\partial x_2}, \\dots, \\frac{\\partial f}{\\partial x_n} ] $$\n", 22 | "\n", 23 | " ***Note***: there are different conventions on what shape the gradient will have (column or row), choose whatever easier for you, but keep in mind other people may prefer different convention.\n", 24 | " \n", 25 | " \n", 26 | "- let $ \\mathbf{f} : \\mathbb{R}^n \\rightarrow \\mathbb{R}^m $\n", 27 | " \n", 28 | " now $x$ is a vector $ [x_1, x_2, \\dots x_n] $ *and* $y$ is a vector $ [y_1, y_2, \\dots y_n] $. The derivative is expressed by the jacobian *matrix*. \n", 29 | " \n", 30 | "$$\n", 31 | " \\frac{d\\mathbf f}{d\\mathbf x} = \\begin{bmatrix}\n", 32 | " \\dfrac{\\partial \\mathbf{f}}{\\partial x_1} & \\cdots & \\dfrac{\\partial \\mathbf{f}}{\\partial x_n} \\end{bmatrix}\n", 33 | "= \\begin{bmatrix}\n", 34 | " \\dfrac{\\partial f_1}{\\partial x_1} & \\cdots & \\dfrac{\\partial f_1}{\\partial x_n}\\\\\n", 35 | " \\vdots & \\ddots & \\vdots\\\\\n", 36 | " \\dfrac{\\partial f_m}{\\partial x_1} & \\cdots & \\dfrac{\\partial f_m}{\\partial x_n} \\end{bmatrix}\n", 37 | "$$\n", 38 | "\n", 39 | "- let $ \\mathbf{f} : \\mathbb{R}^{n \\times k} \\rightarrow \\mathbb{R}^{ m \\times p} $\n", 40 | " \n", 41 | " think of $x$ as of vector with $nk$ elements, $y$ as of vector with $mp$ elements, it is previous case now." 42 | ] 43 | }, 44 | { 45 | "cell_type": "markdown", 46 | "metadata": {}, 47 | "source": [ 48 | "# Chain rule\n", 49 | "\n", 50 | "Let $$ L(x) = g(f(x)) $$\n", 51 | "\n", 52 | "We aim to find $\\nabla_x L$. Obvious, if $f,g: \\mathbb{R} \\rightarrow \\mathbb{R}$ using rule: \n", 53 | "\n", 54 | "$$ \\frac{dL}{dx} = \\frac{dg}{df}\\frac{df}{dx}$$\n", 55 | "\n", 56 | "and practical formula:\n", 57 | "\n", 58 | "$$ \\left.\\frac{dL}{dx}\\right|_{x=x_0} = \\left.\\frac{dg}{df}\\right|_{u = f(x_0)} \\cdot \\left.\\frac{df}{dx}\\right|_{x=x_0} $$\n", 59 | "\n", 60 | "What's up with multidimensional case ? Barely the same. It is the sum of 1-dimentional chains.\n", 61 | "$$\n", 62 | "\\frac{\\partial L}{\\partial x_i} = \\sum_{j = 1}^m \\frac{\\partial g}{\\partial f_j} \\frac{\\partial f_j}{\\partial x_i}.\n", 63 | "$$" 64 | ] 65 | }, 66 | { 67 | "cell_type": "markdown", 68 | "metadata": {}, 69 | "source": [ 70 | "### Seminar practice" 71 | ] 72 | }, 73 | { 74 | "cell_type": "markdown", 75 | "metadata": {}, 76 | "source": [ 77 | "#### ex.1 (dot product)" 78 | ] 79 | }, 80 | { 81 | "cell_type": "markdown", 82 | "metadata": {}, 83 | "source": [ 84 | "$$ \n", 85 | "y = a^Tx = \\sum_{i=1}^N a_i x_i \\\\\n", 86 | "\\frac{\\partial y}{\\partial x_i} = a_i \\\\\n", 87 | "\\frac{dy}{dx} = a\n", 88 | "$$ " 89 | ] 90 | }, 91 | { 92 | "cell_type": "markdown", 93 | "metadata": {}, 94 | "source": [ 95 | "#### ex.2 (Matrix-vector multiplication)" 96 | ] 97 | }, 98 | { 99 | "cell_type": "markdown", 100 | "metadata": {}, 101 | "source": [ 102 | "$$ \n", 103 | "y = Ax , \\quad A \\in \\mathbb{R}^{M \\times N} \\\\\n", 104 | "y_i = a_{i,:}^T x \\\\\n", 105 | "\\frac{dy}{dx} = \\begin{bmatrix}\n", 106 | " a_{11} & \\cdots & a_{1n}\\\\\n", 107 | " \\vdots & \\ddots & \\vdots\\\\\n", 108 | " a_{m1} & \\cdots & a_{mn} \\end{bmatrix} = A \\\\\n", 109 | "$$ " 110 | ] 111 | }, 112 | { 113 | "cell_type": "markdown", 114 | "metadata": {}, 115 | "source": [ 116 | "#### ex.3 (Matrix-Matrix multiplication)" 117 | ] 118 | }, 119 | { 120 | "cell_type": "markdown", 121 | "metadata": {}, 122 | "source": [ 123 | "$$ \n", 124 | "F = AS , \\quad A \\in \\mathbb{R}^{M \\times N}, S \\in \\mathbb{R}^{N \\times K} \\\\\n", 125 | "\\frac{dF}{dS} = ?\n", 126 | "$$\n", 127 | "\n", 128 | "The result should be of shape $\\frac{dF}{dS} \\in \\mathbb{R}^{MK \\times NK}$ and let us vectorize column by column.\n", 129 | "\n", 130 | "When $K = 1$ it fallbacks to the previous example. Let's try $K = 2$ to build an intuition.\n", 131 | "\n", 132 | "Notice, that first column in $F$ does not depend on second column in $S$, and second column in $F$ does not depend on first column in $S$. And we already know what dependence (in terms of gradient) is between corresponding columns. Thus the answer is block-diagonal matrix:\n", 133 | "\n", 134 | "$$\n", 135 | "\\frac{dF}{dS} = \\begin{bmatrix}\n", 136 | " A & 0\\\\\n", 137 | " 0 & A \\end{bmatrix} \\\\\n", 138 | "$$ \n", 139 | "And in general case:\n", 140 | "$$\n", 141 | "\\frac{dF}{dS} = \\begin{bmatrix}\n", 142 | " A & \\cdots & 0\\\\\n", 143 | " \\vdots & \\ddots & \\vdots\\\\\n", 144 | " 0 & \\cdots & A \\end{bmatrix} \\\\\n", 145 | "$$ " 146 | ] 147 | }, 148 | { 149 | "cell_type": "markdown", 150 | "metadata": {}, 151 | "source": [ 152 | "#### ex. 4 (Chain rule)" 153 | ] 154 | }, 155 | { 156 | "cell_type": "markdown", 157 | "metadata": {}, 158 | "source": [ 159 | "In this example you can recognize a model! It is simple linear regression with multiple objectives. \n", 160 | "$$ L = || Ax - y ||_2^2 , \\quad A \\in \\mathbb{R}^{M \\times N}, x \\in \\mathbb{R}^{N} $$ \n", 161 | "Let $f = Ax$. Find $\\frac{dL}{dA}$ using chain rule. \n", 162 | "\n", 163 | "- Note, that\n", 164 | "$$\n", 165 | "|| Ax - y ||_2^2 = \\sum_{i=1}^{M} (A_{i,:}x - y_i)^2 \n", 166 | "$$ \n", 167 | "so you can easily find the gradient with respect to each row (the gradient w.r.t. vector is easier, isn't it?) and then stack these gradients to obtain gradient w.r.t. matrix $A$. **But we will go the hard way** and do it straightforward using chain rule. Let $f = Ax$ \n", 168 | "\n", 169 | "$$\n", 170 | "L = || f - y ||_2^2 = (f-y)^T(f-y) = f^Tf - 2f^Ty + y^Ty \\\\\n", 171 | "\\frac{dL}{df} = 2(f-y)\n", 172 | "$$\n", 173 | "\n", 174 | "- Now hint, look at *ex.3* last result (block-diag matrix), what if we multiply something by this matrix ? In fact, suppose we vectorized a given matrix $B$ into vector $B_{vec}$ of size $N^2$ and we multiply a block-diagonal matrix of size $N^2 \\times N^2$ with $C$ on diagonal by $B_{vec}$. The resulting vector $D_{vec}$ has $N^2$ elements but if reshaped is exactly $D = CB^T$. This can look idiosyncratic for the first time but it is easy.\n", 175 | "\n", 176 | "- So what we should learn from the example above? That $\\frac{df}{dA}$ is something block-diagonal-like with $x$ on diagonal and the resulting $\\frac{dL}{dA}$ is just a multiplication of $\\frac{dL}{df}$ and $x$ (transpose something to get correct dimentions). Finally, \n", 177 | "\n", 178 | "$$\n", 179 | "\\frac{df}{dA} = 2(f-y)x^T \n", 180 | "$$\n", 181 | "\n", 182 | "\n" 183 | ] 184 | } 185 | ], 186 | "metadata": { 187 | "kernelspec": { 188 | "display_name": "Python 2", 189 | "language": "python", 190 | "name": "python2" 191 | }, 192 | "language_info": { 193 | "codemirror_mode": { 194 | "name": "ipython", 195 | "version": 2 196 | }, 197 | "file_extension": ".py", 198 | "mimetype": "text/x-python", 199 | "name": "python", 200 | "nbconvert_exporter": "python", 201 | "pygments_lexer": "ipython2", 202 | "version": "2.7.11" 203 | } 204 | }, 205 | "nbformat": 4, 206 | "nbformat_minor": 0 207 | } 208 | -------------------------------------------------------------------------------- /Seminar3/Seminar3_NN.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Seminar 3: Basic Artificial Neural Networks" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "Neural Networks (NN) became popular due to many facts. One of them is *extensibility*. NN is composed of modules (blocks), where each module implements some functionality. By combining these modules one can build state-of-the-art NNs with existing NN packages. Recent NN wonderful ideas often require just defining a new module or slightly changing an existing one. This notebook should help you to understand what the modules are and what other abstractions are used in NNs. " 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": { 20 | "collapsed": true 21 | }, 22 | "source": [ 23 | "At first, let's think of NN as of black box model (we don't care or know how it works inside, but when we ask it to do something it politely does). What functionality then should the black box implement to be practical? Well, the same as other discriminative models! \n", 24 | "- it should be able to give a predictions (let's call it **output**) if provided with **input** data\n", 25 | "- it should be learnable (there should be a mean to adapt model to the given data)\n", 26 | "\n", 27 | "The first point implies the black box should implement a function (we call it **forward**).\n", 28 | "\n", 29 | "$$\\text{output = NN.forward(input)}$$\n", 30 | "\n", 31 | "The second point means the model should be able to compute a gradient with respect to (w.r.t.) its parameters and return them to us. We would use this gradient to perform parameters update. The computation of the gradient is done during **backward** call.\n", 32 | "\n", 33 | "$$\\text{NN.backward(input, criterion (output, target))}$$\n", 34 | "\n", 35 | "and gradients retrieved with, lets say:\n", 36 | "\n", 37 | "$$\\text{gradParameters = NN.getGradParameters()}$$\n", 38 | "\n", 39 | "the **criterion** should tell quantively how wrong your model is if predicting **output** when **target** expected. \n", 40 | "\n", 41 | "After the *Seminar 2* it should be clear, how we use the gradient: we use one of the **optimizers** (*sgd*, *adaGrad*, *Adam*, *nag*) to perform parametrs update. " 42 | ] 43 | }, 44 | { 45 | "cell_type": "markdown", 46 | "metadata": {}, 47 | "source": [ 48 | "### Summary\n", 49 | "At this point we have seen three important abstractions: \n", 50 | "- black box\n", 51 | "- criterion\n", 52 | "- optimizer\n", 53 | "\n", 54 | "### Workflow\n", 55 | "The workflow is split into 3 steps (yeah, kind of abstractions):\n", 56 | "- forward pass\n", 57 | "- backward pass\n", 58 | "- parameters update\n", 59 | "\n", 60 | "Let's detail furthur the workflow." 61 | ] 62 | }, 63 | { 64 | "cell_type": "markdown", 65 | "metadata": {}, 66 | "source": [ 67 | "Forward pass: \n", 68 | "\n", 69 | "$$\n", 70 | "\\text{output = NN.forward(input)} \\\\\n", 71 | "\\text{loss = criterion.forward(output, target)}\n", 72 | "$$\n", 73 | "\n", 74 | "Backward pass: \n", 75 | "\n", 76 | "$$\n", 77 | "\\text{NNGrad = criterion.backward(output, target)} \\\\\n", 78 | "\\text{NN.forward(input, NNGrad)} \\\\\n", 79 | "$$\n", 80 | "\n", 81 | "Parameters update:\n", 82 | "\n", 83 | "$$\n", 84 | "\\text{gradParameters = NN.getGradParameters()} \\\\\n", 85 | "\\text{optimizer.update(currentParams, gradParameters)} \\\\\n", 86 | "$$\n", 87 | "\n", 88 | "There can be slight technical variations, but the high level idea is always the same. It should be clear about forward pass and parameters update, the most struggling is to understand backprop. " 89 | ] 90 | }, 91 | { 92 | "cell_type": "markdown", 93 | "metadata": {}, 94 | "source": [ 95 | "# White box" 96 | ] 97 | }, 98 | { 99 | "cell_type": "markdown", 100 | "metadata": {}, 101 | "source": [ 102 | "Last thing before discussing backprop is to whiten our black box, we are old enough to know the truth. \n", 103 | "\n", 104 | "As said in introduction NN is composed of modules and surprisingly these modules are NNs too by definition! Remember, left or right child in binary tree is also a tree, and the leaves are trees themselfs. Kind of the same logic it is here too, but is about directed acyclic graphs (you can think of a chain for the first time). You can find \"starter\" and \"final\" nodes in these graphs (start and end of a chain), the data goes through the graph according to the directions, each node applies its **forward** function till the last node is reached. On backward pass the graph is traversed form \"final\" nodes to \"starter\" and each node applies **backward** function to whatever previous node passed. \n", 105 | "\n", 106 | "Here is one of the real-world NNs, the data goes from left to right. \n", 107 | "\n", 108 | "\n", 109 | "\n", 110 | "So the cool thing is: each node is a NN, every connected subgraph is NN. We defined everything we need already, you just need a set of \"simple\" NNs which are used as building blocks for comlex models! That is exactly what the NN packges implements for you and what you are to do in homework." 111 | ] 112 | }, 113 | { 114 | "cell_type": "markdown", 115 | "metadata": {}, 116 | "source": [ 117 | "### Backprop" 118 | ] 119 | }, 120 | { 121 | "cell_type": "markdown", 122 | "metadata": {}, 123 | "source": [ 124 | "**Be careful!** In this section the variable $x$ designates the parameters in NN and not the input data. Think that we fixed the data now, and loss is a function of parametrs, we try to find the best parameters to lower the loss.\n", 125 | "\n", 126 | "Let's define as $ f(x) $ the function NN applies to input data and $ g(o) $ is a criterion. Then\n", 127 | "$$ L(x) = g(f(x); target) $$\n", 128 | "\n", 129 | "We aim to find $\\nabla_x L$. Obvious, if $f,g: \\mathbb{R} \\rightarrow \\mathbb{R}$ using chain rule: \n", 130 | "\n", 131 | "$$ \\frac{dL}{dx} = \\frac{dg}{df}\\frac{df}{dx}$$\n", 132 | "\n", 133 | "and practical formula:\n", 134 | "\n", 135 | "$$ \\left.\\frac{dL}{dx}\\right|_{x=x_0} = \\left.\\frac{dg}{df}\\right|_{u = f(x_0)} \\cdot \\left.\\frac{df}{dx}\\right|_{x=x_0} $$\n", 136 | "\n", 137 | "What's up with multidimensional case ? Barely the same. It is the sum of 1-dimentional chains.\n", 138 | "$$\n", 139 | "\\frac{\\partial L}{\\partial x_i} = \\sum_{j = 1}^m \\frac{\\partial L}{\\partial f_j} \\frac{\\partial f_j}{\\partial x_i}.\n", 140 | "$$\n", 141 | "\n", 142 | "Actually that is all you need to write backprop functions! Go to differenciation notebook to for some practice before homework." 143 | ] 144 | } 145 | ], 146 | "metadata": { 147 | "kernelspec": { 148 | "display_name": "Python 2", 149 | "language": "python", 150 | "name": "python2" 151 | }, 152 | "language_info": { 153 | "codemirror_mode": { 154 | "name": "ipython", 155 | "version": 2 156 | }, 157 | "file_extension": ".py", 158 | "mimetype": "text/x-python", 159 | "name": "python", 160 | "nbconvert_exporter": "python", 161 | "pygments_lexer": "ipython2", 162 | "version": "2.7.11" 163 | } 164 | }, 165 | "nbformat": 4, 166 | "nbformat_minor": 0 167 | } 168 | -------------------------------------------------------------------------------- /Seminar3/autoencoder.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ddtm/dl-course/9b04d2dda741c0786a9de40a7dfce89d06d0487e/Seminar3/autoencoder.png -------------------------------------------------------------------------------- /Seminar3/googlenet.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ddtm/dl-course/9b04d2dda741c0786a9de40a7dfce89d06d0487e/Seminar3/googlenet.png -------------------------------------------------------------------------------- /Seminar3/grad.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ddtm/dl-course/9b04d2dda741c0786a9de40a7dfce89d06d0487e/Seminar3/grad.png -------------------------------------------------------------------------------- /Seminar3/outdated/interpolation.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ddtm/dl-course/9b04d2dda741c0786a9de40a7dfce89d06d0487e/Seminar3/outdated/interpolation.mp4 -------------------------------------------------------------------------------- /Seminar4/README.md: -------------------------------------------------------------------------------- 1 | More materials: 2 | - http://cs231n.github.io/convolutional-networks/ 3 | - http://cs231n.github.io/understanding-cnn/ 4 | - [a deep learning neophite cheat sheet](http://www.kdnuggets.com/2016/03/must-know-tips-deep-learning-part-1.html) 5 | - [more stuff for vision](https://bavm2013.splashthat.com/img/events/46439/assets/34a7.ranzato.pdf) 6 | - a [CNN trainer in a browser](https://cs.stanford.edu/people/karpathy/convnetjs/demo/cifar10.html) 7 | -------------------------------------------------------------------------------- /Seminar4/Seminar-intro-slide.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "from lasagne.layers import *\n", 12 | "from lasagne.nonlinearities import *\n", 13 | "from lasagne import init" 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": 2, 19 | "metadata": { 20 | "collapsed": false 21 | }, 22 | "outputs": [], 23 | "source": [ 24 | "nn = InputLayer([None,3,100,100])\n", 25 | "\n", 26 | "nn = Conv2DLayer(nn,num_filters=512, filter_size=(3,3),\n", 27 | " W = init.Constant(0))\n", 28 | "\n", 29 | "nn = Conv2DLayer(nn,num_filters=128,filter_size=(3,3),\n", 30 | " W = init.Constant(0))\n", 31 | "\n", 32 | "nn = Conv2DLayer(nn,num_filters=32,filter_size=(3,3),\n", 33 | " W = init.Constant(0))\n", 34 | "\n", 35 | "nn = Pool2DLayer(nn,pool_size=(6,6),mode='max')\n", 36 | "\n", 37 | "nn = Conv2DLayer(nn,num_filters=8,filter_size=(10,10),\n", 38 | " W = init.Normal(std=0.01))\n", 39 | "\n", 40 | "nn = Conv2DLayer(nn,num_filters=8,filter_size=(10,10),\n", 41 | " W = init.Normal(std=0.01))\n", 42 | "\n", 43 | "nn = Pool2DLayer(nn,pool_size=(3,3),mode='max')\n", 44 | "\n", 45 | "nn = DenseLayer(nn,512,nonlinearity=softmax)\n", 46 | "\n", 47 | "nn = DropoutLayer(nn,p=0.5)\n", 48 | "\n", 49 | "nn = DenseLayer(nn,512,nonlinearity=softmax)\n", 50 | "\n", 51 | "nn = DenseLayer(nn,10,nonlinearity=sigmoid)\n", 52 | "\n", 53 | "nn = DropoutLayer(nn,p=0.5)" 54 | ] 55 | }, 56 | { 57 | "cell_type": "markdown", 58 | "metadata": {}, 59 | "source": [ 60 | "```\n", 61 | "\n", 62 | "```\n", 63 | "\n", 64 | "```\n", 65 | "\n", 66 | "```\n", 67 | "\n", 68 | "```\n", 69 | "\n", 70 | "```\n", 71 | "\n", 72 | "```\n", 73 | "\n", 74 | "```\n", 75 | "\n", 76 | "```\n", 77 | "\n", 78 | "```\n", 79 | "\n", 80 | "```\n", 81 | "\n", 82 | "```\n", 83 | "\n", 84 | "```\n", 85 | "\n", 86 | "```\n", 87 | "\n", 88 | "```\n", 89 | "\n", 90 | "```\n", 91 | "\n", 92 | "```\n", 93 | "\n", 94 | "```\n", 95 | "\n", 96 | "```\n", 97 | "\n", 98 | "```\n", 99 | "\n", 100 | "```\n", 101 | "\n", 102 | "```\n", 103 | "\n", 104 | "```\n", 105 | "\n", 106 | "```\n", 107 | "\n", 108 | "```\n", 109 | "\n", 110 | "```\n", 111 | "\n", 112 | "```\n", 113 | "\n", 114 | "```\n", 115 | "\n", 116 | "```\n", 117 | "\n", 118 | "```\n", 119 | "\n", 120 | "\n", 121 | "# Book of grudges\n", 122 | "* zero init for weights will cause symmetry effect\n", 123 | "* Too many filters for first 3x3 convolution - will lead to enormous matrix while there's just not enough relevant combinations of 3x3 images (overkill).\n", 124 | "* Usually the further you go, the more filters you need.\n", 125 | "* large filters (10x10 is generally a bad pactice, and you definitely need more than 10 of them\n", 126 | "* the second of 10x10 convolution gets 8x6x6 image as input, so it's technically unable to perform such convolution.\n", 127 | "* Softmax nonlinearity effectively makes only 1 or a few neurons from the entire layer to \"fire\", rendering 512-neuron layer almost useless. Softmax at the output layer is okay though\n", 128 | "* Dropout after probability prediciton is just lame. A few random classes get probability of 0, so your probabilities no longer sum to 1 and crossentropy goes -inf." 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": null, 134 | "metadata": { 135 | "collapsed": true 136 | }, 137 | "outputs": [], 138 | "source": [] 139 | } 140 | ], 141 | "metadata": { 142 | "kernelspec": { 143 | "display_name": "Python [Root]", 144 | "language": "python", 145 | "name": "Python [Root]" 146 | }, 147 | "language_info": { 148 | "codemirror_mode": { 149 | "name": "ipython", 150 | "version": 2 151 | }, 152 | "file_extension": ".py", 153 | "mimetype": "text/x-python", 154 | "name": "python", 155 | "nbconvert_exporter": "python", 156 | "pygments_lexer": "ipython2", 157 | "version": "2.7.12" 158 | } 159 | }, 160 | "nbformat": 4, 161 | "nbformat_minor": 0 162 | } 163 | -------------------------------------------------------------------------------- /Seminar4/bonus/cifar.py: -------------------------------------------------------------------------------- 1 | """I load some cifar""" 2 | 3 | import numpy as np 4 | from sklearn.cross_validation import train_test_split 5 | import urllib2 6 | import urllib 7 | def unpickle(file): 8 | import cPickle 9 | fo = open(file, 'rb') 10 | dict = cPickle.load(fo) 11 | fo.close() 12 | return dict 13 | 14 | 15 | import os 16 | def download_cifar10(path, 17 | url='https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz', 18 | tarname='cifar-10-python.tar.gz'): 19 | import tarfile 20 | if not os.path.exists(path): 21 | os.mkdir(path) 22 | 23 | 24 | 25 | urllib.urlretrieve(url, os.path.join(path,tarname)) 26 | tfile = tarfile.open(os.path.join(path,tarname)) 27 | tfile.extractall(path=path) 28 | 29 | 30 | def load_cifar10(data_path=".",test_size=0.2,random_state=1337): 31 | 32 | test_path = os.path.join(data_path,"cifar-10-batches-py/test_batch") 33 | train_paths = [os.path.join(data_path,"cifar-10-batches-py/data_batch_%i"%i) for i in range(1,6)] 34 | 35 | if not os.path.exists(test_path) or not all(list(map(os.path.exists, train_paths))): 36 | print "Dataset not found. Downloading..." 37 | download_cifar10(data_path) 38 | 39 | train_batches = list(map(unpickle,train_paths)) 40 | test_batch = unpickle(test_path) 41 | 42 | X = np.concatenate([batch["data"] for batch in train_batches]).reshape([-1,3,32,32]).astype('float32')/255 43 | y = np.concatenate([batch["labels"] for batch in train_batches]).astype('int32') 44 | X_train,X_val,y_train,y_val = train_test_split(X,y, 45 | test_size=test_size, 46 | random_state=random_state) 47 | 48 | X_test = test_batch["data"].reshape([-1,3,32,32]).astype('float32')/255 49 | y_test = np.array(test_batch["labels"]).astype('int32') 50 | 51 | return X_train,y_train,X_val,y_val,X_test,y_test 52 | -------------------------------------------------------------------------------- /Seminar4/mnist.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import time 4 | 5 | import numpy as np 6 | 7 | __doc__="""taken from https://github.com/Lasagne/Lasagne/blob/master/examples/mnist.py""" 8 | 9 | def load_dataset(): 10 | # We first define a download function, supporting both Python 2 and 3. 11 | if sys.version_info[0] == 2: 12 | from urllib import urlretrieve 13 | else: 14 | from urllib.request import urlretrieve 15 | 16 | def download(filename, source='http://yann.lecun.com/exdb/mnist/'): 17 | print("Downloading %s" % filename) 18 | urlretrieve(source + filename, filename) 19 | 20 | # We then define functions for loading MNIST images and labels. 21 | # For convenience, they also download the requested files if needed. 22 | import gzip 23 | 24 | def load_mnist_images(filename): 25 | if not os.path.exists(filename): 26 | download(filename) 27 | # Read the inputs in Yann LeCun's binary format. 28 | with gzip.open(filename, 'rb') as f: 29 | data = np.frombuffer(f.read(), np.uint8, offset=16) 30 | # The inputs are vectors now, we reshape them to monochrome 2D images, 31 | # following the shape convention: (examples, channels, rows, columns) 32 | data = data.reshape(-1, 1, 28, 28) 33 | # The inputs come as bytes, we convert them to float32 in range [0,1]. 34 | # (Actually to range [0, 255/256], for compatibility to the version 35 | # provided at http://deeplearning.net/data/mnist/mnist.pkl.gz.) 36 | return data / np.float32(256) 37 | 38 | def load_mnist_labels(filename): 39 | if not os.path.exists(filename): 40 | download(filename) 41 | # Read the labels in Yann LeCun's binary format. 42 | with gzip.open(filename, 'rb') as f: 43 | data = np.frombuffer(f.read(), np.uint8, offset=8) 44 | # The labels are vectors of integers now, that's exactly what we want. 45 | return data 46 | 47 | # We can now download and read the training and test set images and labels. 48 | X_train = load_mnist_images('train-images-idx3-ubyte.gz') 49 | y_train = load_mnist_labels('train-labels-idx1-ubyte.gz') 50 | X_test = load_mnist_images('t10k-images-idx3-ubyte.gz') 51 | y_test = load_mnist_labels('t10k-labels-idx1-ubyte.gz') 52 | 53 | # We reserve the last 10000 training examples for validation. 54 | X_train, X_val = X_train[:-10000], X_train[-10000:] 55 | y_train, y_val = y_train[:-10000], y_train[-10000:] 56 | 57 | # We just return all the arrays in order, as expected in main(). 58 | # (It doesn't matter how we do this as long as we can read them again.) 59 | return X_train, y_train, X_val, y_val, X_test, y_test 60 | 61 | 62 | 63 | 64 | -------------------------------------------------------------------------------- /Seminar5/README.md: -------------------------------------------------------------------------------- 1 | More materials: 2 | - http://cs231n.github.io/transfer-learning/ 3 | - lasagne [recipes](https://github.com/Lasagne/Recipes) 4 | - [a few words on soft-targets](http://www.kdnuggets.com/2015/05/dark-knowledge-neural-network.html) 5 | -------------------------------------------------------------------------------- /Seminar5/sample_images/albatross.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ddtm/dl-course/9b04d2dda741c0786a9de40a7dfce89d06d0487e/Seminar5/sample_images/albatross.jpg -------------------------------------------------------------------------------- /Seminar5/sample_images/fox.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ddtm/dl-course/9b04d2dda741c0786a9de40a7dfce89d06d0487e/Seminar5/sample_images/fox.jpg -------------------------------------------------------------------------------- /Seminar5/sample_images/frog.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ddtm/dl-course/9b04d2dda741c0786a9de40a7dfce89d06d0487e/Seminar5/sample_images/frog.jpg -------------------------------------------------------------------------------- /Seminar5/sample_images/hen.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ddtm/dl-course/9b04d2dda741c0786a9de40a7dfce89d06d0487e/Seminar5/sample_images/hen.jpg -------------------------------------------------------------------------------- /Seminar5/sample_images/kermit.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ddtm/dl-course/9b04d2dda741c0786a9de40a7dfce89d06d0487e/Seminar5/sample_images/kermit.jpg -------------------------------------------------------------------------------- /Seminar5/sample_images/kitten.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ddtm/dl-course/9b04d2dda741c0786a9de40a7dfce89d06d0487e/Seminar5/sample_images/kitten.jpg -------------------------------------------------------------------------------- /Seminar5/sample_images/puppy.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ddtm/dl-course/9b04d2dda741c0786a9de40a7dfce89d06d0487e/Seminar5/sample_images/puppy.jpg -------------------------------------------------------------------------------- /Seminar5/sample_images/steve_martin.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ddtm/dl-course/9b04d2dda741c0786a9de40a7dfce89d06d0487e/Seminar5/sample_images/steve_martin.jpg -------------------------------------------------------------------------------- /Seminar5/sample_images/teapot.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ddtm/dl-course/9b04d2dda741c0786a9de40a7dfce89d06d0487e/Seminar5/sample_images/teapot.jpg -------------------------------------------------------------------------------- /Seminar5/sample_images/tiger.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ddtm/dl-course/9b04d2dda741c0786a9de40a7dfce89d06d0487e/Seminar5/sample_images/tiger.jpg -------------------------------------------------------------------------------- /Seminar6/custom/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ddtm/dl-course/9b04d2dda741c0786a9de40a7dfce89d06d0487e/Seminar6/custom/__init__.py -------------------------------------------------------------------------------- /Seminar6/custom/net.py: -------------------------------------------------------------------------------- 1 | ################################################# You MIGHT need these imports. 2 | import cPickle 3 | 4 | from fast_rcnn.config import cfg 5 | 6 | class Net(object): 7 | """A class for holding a symbolic representation of the neural network. 8 | Instances of this class are going to be used both in the solver and 9 | in the tester. 10 | """ 11 | 12 | def __init__(self, snapshot_path=None): 13 | """Constructs a symbolic graph for a neural network. 14 | 15 | Arguments: 16 | snapshot_path (str): path to the pretrained network 17 | """ 18 | pass 19 | 20 | def save(self, filename): 21 | """Saves model weights.""" 22 | pass 23 | 24 | @property 25 | def input(self): 26 | """Returns symbolic inputs of the model.""" 27 | pass 28 | 29 | @property 30 | def prediction(self): 31 | """Returns symbolic variable containing the model predictions.""" 32 | pass 33 | 34 | @property 35 | def params(self): 36 | """Returns shared variables containing the model weights.""" 37 | pass 38 | 39 | @property 40 | def param_values(self): 41 | """Returns a list of the model weights (values).""" 42 | pass 43 | -------------------------------------------------------------------------------- /Seminar6/custom/solver.py: -------------------------------------------------------------------------------- 1 | ################################################# You MIGHT need these imports. 2 | from fast_rcnn.config import cfg 3 | from net import Net 4 | from roi_data_layer.layer import RoIDataLayer 5 | 6 | class Solver(object): 7 | def __init__(self): 8 | # Holds current iteration number. 9 | self.iter = 0 10 | 11 | # How frequently we should print the training info. 12 | self.display_freq = 1 13 | 14 | # Holds the path prefix for snapshots. 15 | self.snapshot_prefix = 'snapshot' 16 | 17 | ###################################################### Your code goes here. 18 | 19 | # This might be a useful static method to have. 20 | @staticmethod 21 | def build_step_fn(net): 22 | """Takes a symbolic network and compiles a function for weights updates.""" 23 | pass 24 | 25 | def get_training_batch(self): 26 | """Uses ROIDataLayer to fetch a training batch. 27 | 28 | Returns: 29 | input_data (ndarray): input data suitable for R-CNN processing 30 | labels (ndarray): batch labels (of type int32) 31 | """ 32 | 33 | ###################################################### Your code goes here. 34 | 35 | return input_data, labels 36 | 37 | def step(self): 38 | """Conducts a single step of SGD.""" 39 | 40 | ###################################################### Your code goes here. 41 | # Among other things, assign the current loss value to self.loss. 42 | 43 | self.iter += 1 44 | if self.iter % self.display_freq == 0: 45 | print 'Iteration {:<5} Train loss: {}'.format(self.iter, self.loss) 46 | 47 | def save(self, filename): 48 | """Saves model weights.""" 49 | pass 50 | -------------------------------------------------------------------------------- /Seminar6/custom/tester.py: -------------------------------------------------------------------------------- 1 | ################################################### You MIGHT need this import. 2 | from net import Net 3 | 4 | class Tester(object): 5 | def __init__(self, snapshot_path): 6 | # The original Girshick's code requires this field to exist. 7 | self.name = '' 8 | 9 | ###################################################### Your code goes here. 10 | # Load your network into, say, self.net. 11 | 12 | def forward(self, data, rois): 13 | """Performs a forward pass through the neural network. 14 | 15 | Arguments: 16 | data (ndarray): tensor containing the whole scenes (images) 17 | rois (ndarray): tensor containg ROIs; rois[:, 0] are indices of scenes 18 | in data, the rest are (left, top, bottom, right) 19 | coordinates 20 | 21 | Returns: 22 | output (dict): a dictionary with a single key 'cls_prob' holding 23 | probability distributions produced by the network 24 | """ 25 | 26 | ###################################################### Your code goes here. 27 | # You should have the following line: 28 | # output = {'cls_prob': net_output}. 29 | 30 | return output 31 | -------------------------------------------------------------------------------- /Seminar6/data/.gitignore: -------------------------------------------------------------------------------- 1 | selective_search* 2 | imagenet_models* 3 | fast_rcnn_models* 4 | VOCdevkit* 5 | cache 6 | -------------------------------------------------------------------------------- /Seminar6/data/pylintrc: -------------------------------------------------------------------------------- 1 | [TYPECHECK] 2 | 3 | ignored-modules = numpy, numpy.random, cv2 4 | -------------------------------------------------------------------------------- /Seminar6/data/scripts/fetch_selective_search_data.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )/../" && pwd )" 4 | cd $DIR 5 | 6 | FILE=selective_search_data.tgz 7 | URL=http://www.cs.berkeley.edu/~rbg/fast-rcnn-data/$FILE 8 | CHECKSUM=7078c1db87a7851b31966b96774cd9b9 9 | 10 | if [ -f $FILE ]; then 11 | echo "File already exists. Checking md5..." 12 | os=`uname -s` 13 | if [ "$os" = "Linux" ]; then 14 | checksum=`md5sum $FILE | awk '{ print $1 }'` 15 | elif [ "$os" = "Darwin" ]; then 16 | checksum=`cat $FILE | md5` 17 | fi 18 | if [ "$checksum" = "$CHECKSUM" ]; then 19 | echo "Checksum is correct. No need to download." 20 | exit 0 21 | else 22 | echo "Checksum is incorrect. Need to download again." 23 | fi 24 | fi 25 | 26 | echo "Downloading precomputed selective search boxes (0.5G)..." 27 | 28 | wget $URL -O $FILE 29 | 30 | echo "Unzipping..." 31 | 32 | tar zxvf $FILE 33 | 34 | echo "Done. Please run this command again to verify that checksum = $CHECKSUM." 35 | -------------------------------------------------------------------------------- /Seminar6/experiments/cfgs/rcnn.yml: -------------------------------------------------------------------------------- 1 | EXP_DIR: rcnn 2 | TRAIN: 3 | PROPOSAL_METHOD: 'selective_search' # or 'rpn' 4 | SNAPSHOT_ITERS: 5000 5 | BBOX_REG: False 6 | TEST: 7 | PROPOSAL_METHOD: 'selective_search' # or 'rpn' 8 | BBOX_REG: False -------------------------------------------------------------------------------- /Seminar6/experiments/scripts/fast_rcnn.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Usage: 3 | # ./experiments/scripts/fast_rcnn.sh DATASET [options args to {train,test}_net.py] 4 | # DATASET is either pascal_voc or coco. 5 | # 6 | # Example: 7 | # ./experiments/scripts/fast_rcnn.sh pascal_voc 8 | 9 | set -x 10 | set -e 11 | 12 | export PYTHONUNBUFFERED="True" 13 | 14 | DATASET=$1 15 | 16 | array=( $@ ) 17 | len=${#array[@]} 18 | EXTRA_ARGS=${array[@]:3:$len} 19 | EXTRA_ARGS_SLUG=${EXTRA_ARGS// /_} 20 | 21 | case $DATASET in 22 | pascal_voc) 23 | TRAIN_IMDB="voc_2007_trainval" 24 | TEST_IMDB="voc_2007_test" 25 | ITERS=40000 26 | ;; 27 | *) 28 | echo "No dataset given" 29 | exit 30 | ;; 31 | esac 32 | 33 | LOG="experiments/logs/fast_rcnn_${EXTRA_ARGS_SLUG}.txt.`date +'%Y-%m-%d_%H-%M-%S'`" 34 | exec &> >(tee -a "$LOG") 35 | echo Logging output to "$LOG" 36 | 37 | time ./tools/train_net.py \ 38 | --imdb ${TRAIN_IMDB} \ 39 | --iters ${ITERS} \ 40 | ${EXTRA_ARGS} 41 | 42 | set +x 43 | NET_FINAL=`grep -B 1 "done solving" ${LOG} | grep "Wrote snapshot" | awk '{print $4}'` 44 | set -x 45 | 46 | time ./tools/test_net.py \ 47 | --imdb ${TEST_IMDB} \ 48 | ${EXTRA_ARGS} 49 | -------------------------------------------------------------------------------- /Seminar6/experiments/scripts/test_rcnn.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -x 3 | set -e 4 | 5 | export PYTHONUNBUFFERED="True" 6 | 7 | SNAPSHOT=$1 8 | 9 | TEST_IMDB="voc_2007_test" 10 | 11 | LOG="experiments/logs/rcnn.test.txt.`date +'%Y-%m-%d_%H-%M-%S'`" 12 | exec &> >(tee -a "$LOG") 13 | echo Logging output to "$LOG" 14 | 15 | time ./tools/test_net.py \ 16 | --snapshot $SNAPSHOT \ 17 | --imdb ${TEST_IMDB} \ 18 | --cfg ./experiments/cfgs/rcnn.yml 19 | -------------------------------------------------------------------------------- /Seminar6/experiments/scripts/train_rcnn.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -x 3 | set -e 4 | 5 | export PYTHONUNBUFFERED="True" 6 | 7 | ITERS=$1 8 | 9 | TRAIN_IMDB="voc_2007_trainval" 10 | 11 | LOG="experiments/logs/rcnn.train.txt.`date +'%Y-%m-%d_%H-%M-%S'`" 12 | exec &> >(tee -a "$LOG") 13 | echo Logging output to "$LOG" 14 | 15 | time ./tools/train_net.py \ 16 | --imdb ${TRAIN_IMDB} \ 17 | --iters ${ITERS} \ 18 | --cfg ./experiments/cfgs/rcnn.yml 19 | -------------------------------------------------------------------------------- /Seminar6/lib/Makefile: -------------------------------------------------------------------------------- 1 | all: 2 | python setup.py build_ext --inplace 3 | rm -rf build 4 | 5 | clean: 6 | find ./ -name *.so -type f -delete 7 | -------------------------------------------------------------------------------- /Seminar6/lib/datasets/VOCdevkit-matlab-wrapper/get_voc_opts.m: -------------------------------------------------------------------------------- 1 | function VOCopts = get_voc_opts(path) 2 | 3 | tmp = pwd; 4 | cd(path); 5 | try 6 | addpath('VOCcode'); 7 | VOCinit; 8 | catch 9 | rmpath('VOCcode'); 10 | cd(tmp); 11 | error(sprintf('VOCcode directory not found under %s', path)); 12 | end 13 | rmpath('VOCcode'); 14 | cd(tmp); 15 | -------------------------------------------------------------------------------- /Seminar6/lib/datasets/VOCdevkit-matlab-wrapper/voc_eval.m: -------------------------------------------------------------------------------- 1 | function res = voc_eval(path, comp_id, test_set, output_dir) 2 | 3 | VOCopts = get_voc_opts(path); 4 | VOCopts.testset = test_set; 5 | 6 | for i = 1:length(VOCopts.classes) 7 | cls = VOCopts.classes{i}; 8 | res(i) = voc_eval_cls(cls, VOCopts, comp_id, output_dir); 9 | end 10 | 11 | fprintf('\n~~~~~~~~~~~~~~~~~~~~\n'); 12 | fprintf('Results:\n'); 13 | aps = [res(:).ap]'; 14 | fprintf('%.1f\n', aps * 100); 15 | fprintf('%.1f\n', mean(aps) * 100); 16 | fprintf('~~~~~~~~~~~~~~~~~~~~\n'); 17 | 18 | function res = voc_eval_cls(cls, VOCopts, comp_id, output_dir) 19 | 20 | test_set = VOCopts.testset; 21 | year = VOCopts.dataset(4:end); 22 | 23 | addpath(fullfile(VOCopts.datadir, 'VOCcode')); 24 | 25 | res_fn = sprintf(VOCopts.detrespath, comp_id, cls); 26 | 27 | recall = []; 28 | prec = []; 29 | ap = 0; 30 | ap_auc = 0; 31 | 32 | do_eval = (str2num(year) <= 2007) | ~strcmp(test_set, 'test'); 33 | if do_eval 34 | % Bug in VOCevaldet requires that tic has been called first 35 | tic; 36 | [recall, prec, ap] = VOCevaldet(VOCopts, comp_id, cls, true); 37 | ap_auc = xVOCap(recall, prec); 38 | 39 | % force plot limits 40 | ylim([0 1]); 41 | xlim([0 1]); 42 | 43 | print(gcf, '-djpeg', '-r0', ... 44 | [output_dir '/' cls '_pr.jpg']); 45 | end 46 | fprintf('!!! %s : %.4f %.4f\n', cls, ap, ap_auc); 47 | 48 | res.recall = recall; 49 | res.prec = prec; 50 | res.ap = ap; 51 | res.ap_auc = ap_auc; 52 | 53 | save([output_dir '/' cls '_pr.mat'], ... 54 | 'res', 'recall', 'prec', 'ap', 'ap_auc'); 55 | 56 | rmpath(fullfile(VOCopts.datadir, 'VOCcode')); 57 | -------------------------------------------------------------------------------- /Seminar6/lib/datasets/VOCdevkit-matlab-wrapper/xVOCap.m: -------------------------------------------------------------------------------- 1 | function ap = xVOCap(rec,prec) 2 | % From the PASCAL VOC 2011 devkit 3 | 4 | mrec=[0 ; rec ; 1]; 5 | mpre=[0 ; prec ; 0]; 6 | for i=numel(mpre)-1:-1:1 7 | mpre(i)=max(mpre(i),mpre(i+1)); 8 | end 9 | i=find(mrec(2:end)~=mrec(1:end-1))+1; 10 | ap=sum((mrec(i)-mrec(i-1)).*mpre(i)); 11 | -------------------------------------------------------------------------------- /Seminar6/lib/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | -------------------------------------------------------------------------------- /Seminar6/lib/datasets/ds_utils.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast/er R-CNN 3 | # Licensed under The MIT License [see LICENSE for details] 4 | # Written by Ross Girshick 5 | # -------------------------------------------------------- 6 | 7 | import numpy as np 8 | 9 | def unique_boxes(boxes, scale=1.0): 10 | """Return indices of unique boxes.""" 11 | v = np.array([1, 1e3, 1e6, 1e9]) 12 | hashes = np.round(boxes * scale).dot(v) 13 | _, index = np.unique(hashes, return_index=True) 14 | return np.sort(index) 15 | 16 | def xywh_to_xyxy(boxes): 17 | """Convert [x y w h] box format to [x1 y1 x2 y2] format.""" 18 | return np.hstack((boxes[:, 0:2], boxes[:, 0:2] + boxes[:, 2:4] - 1)) 19 | 20 | def xyxy_to_xywh(boxes): 21 | """Convert [x1 y1 x2 y2] box format to [x y w h] format.""" 22 | return np.hstack((boxes[:, 0:2], boxes[:, 2:4] - boxes[:, 0:2] + 1)) 23 | 24 | def validate_boxes(boxes, width=0, height=0): 25 | """Check that a set of boxes are valid.""" 26 | x1 = boxes[:, 0] 27 | y1 = boxes[:, 1] 28 | x2 = boxes[:, 2] 29 | y2 = boxes[:, 3] 30 | assert (x1 >= 0).all() 31 | assert (y1 >= 0).all() 32 | assert (x2 >= x1).all() 33 | assert (y2 >= y1).all() 34 | assert (x2 < width).all() 35 | assert (y2 < height).all() 36 | 37 | def filter_small_boxes(boxes, min_size): 38 | w = boxes[:, 2] - boxes[:, 0] 39 | h = boxes[:, 3] - boxes[:, 1] 40 | keep = np.where((w >= min_size) & (h > min_size))[0] 41 | return keep 42 | -------------------------------------------------------------------------------- /Seminar6/lib/datasets/factory.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | """Factory method for easily getting imdbs by name.""" 9 | 10 | __sets = {} 11 | 12 | from datasets.pascal_voc import pascal_voc 13 | import numpy as np 14 | 15 | # Set up voc__ using selective search "fast" mode 16 | for year in ['2007', '2012']: 17 | for split in ['train', 'val', 'trainval', 'test']: 18 | name = 'voc_{}_{}'.format(year, split) 19 | __sets[name] = (lambda split=split, year=year: pascal_voc(split, year)) 20 | 21 | def get_imdb(name): 22 | """Get an imdb (image database) by name.""" 23 | if not __sets.has_key(name): 24 | raise KeyError('Unknown dataset: {}'.format(name)) 25 | return __sets[name]() 26 | 27 | def list_imdbs(): 28 | """List all registered imdbs.""" 29 | return __sets.keys() 30 | -------------------------------------------------------------------------------- /Seminar6/lib/datasets/imdb.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import os 9 | import os.path as osp 10 | import PIL 11 | from utils.cython_bbox import bbox_overlaps 12 | import numpy as np 13 | import scipy.sparse 14 | from fast_rcnn.config import cfg 15 | 16 | class imdb(object): 17 | """Image database.""" 18 | 19 | def __init__(self, name): 20 | self._name = name 21 | self._num_classes = 0 22 | self._classes = [] 23 | self._image_index = [] 24 | self._obj_proposer = 'selective_search' 25 | self._roidb = None 26 | self._roidb_handler = self.default_roidb 27 | # Use this dict for storing dataset specific config options 28 | self.config = {} 29 | 30 | @property 31 | def name(self): 32 | return self._name 33 | 34 | @property 35 | def num_classes(self): 36 | return len(self._classes) 37 | 38 | @property 39 | def classes(self): 40 | return self._classes 41 | 42 | @property 43 | def image_index(self): 44 | return self._image_index 45 | 46 | @property 47 | def roidb_handler(self): 48 | return self._roidb_handler 49 | 50 | @roidb_handler.setter 51 | def roidb_handler(self, val): 52 | self._roidb_handler = val 53 | 54 | def set_proposal_method(self, method): 55 | method = eval('self.' + method + '_roidb') 56 | self.roidb_handler = method 57 | 58 | @property 59 | def roidb(self): 60 | # A roidb is a list of dictionaries, each with the following keys: 61 | # boxes 62 | # gt_overlaps 63 | # gt_classes 64 | # flipped 65 | if self._roidb is not None: 66 | return self._roidb 67 | self._roidb = self.roidb_handler() 68 | return self._roidb 69 | 70 | @property 71 | def cache_path(self): 72 | cache_path = osp.abspath(osp.join(cfg.DATA_DIR, 'cache')) 73 | if not os.path.exists(cache_path): 74 | os.makedirs(cache_path) 75 | return cache_path 76 | 77 | @property 78 | def num_images(self): 79 | return len(self.image_index) 80 | 81 | def image_path_at(self, i): 82 | raise NotImplementedError 83 | 84 | def default_roidb(self): 85 | raise NotImplementedError 86 | 87 | def evaluate_detections(self, all_boxes, output_dir=None): 88 | """ 89 | all_boxes is a list of length number-of-classes. 90 | Each list element is a list of length number-of-images. 91 | Each of those list elements is either an empty list [] 92 | or a numpy array of detection. 93 | 94 | all_boxes[class][image] = [] or np.array of shape #dets x 5 95 | """ 96 | raise NotImplementedError 97 | 98 | def _get_widths(self): 99 | return [PIL.Image.open(self.image_path_at(i)).size[0] 100 | for i in xrange(self.num_images)] 101 | 102 | def append_flipped_images(self): 103 | num_images = self.num_images 104 | widths = self._get_widths() 105 | for i in xrange(num_images): 106 | boxes = self.roidb[i]['boxes'].copy() 107 | oldx1 = boxes[:, 0].copy() 108 | oldx2 = boxes[:, 2].copy() 109 | boxes[:, 0] = widths[i] - oldx2 - 1 110 | boxes[:, 2] = widths[i] - oldx1 - 1 111 | assert (boxes[:, 2] >= boxes[:, 0]).all() 112 | entry = {'boxes' : boxes, 113 | 'gt_overlaps' : self.roidb[i]['gt_overlaps'], 114 | 'gt_classes' : self.roidb[i]['gt_classes'], 115 | 'flipped' : True} 116 | self.roidb.append(entry) 117 | self._image_index = self._image_index * 2 118 | 119 | def evaluate_recall(self, candidate_boxes=None, thresholds=None, 120 | area='all', limit=None): 121 | """Evaluate detection proposal recall metrics. 122 | 123 | Returns: 124 | results: dictionary of results with keys 125 | 'ar': average recall 126 | 'recalls': vector recalls at each IoU overlap threshold 127 | 'thresholds': vector of IoU overlap thresholds 128 | 'gt_overlaps': vector of all ground-truth overlaps 129 | """ 130 | # Record max overlap value for each gt box 131 | # Return vector of overlap values 132 | areas = { 'all': 0, 'small': 1, 'medium': 2, 'large': 3, 133 | '96-128': 4, '128-256': 5, '256-512': 6, '512-inf': 7} 134 | area_ranges = [ [0**2, 1e5**2], # all 135 | [0**2, 32**2], # small 136 | [32**2, 96**2], # medium 137 | [96**2, 1e5**2], # large 138 | [96**2, 128**2], # 96-128 139 | [128**2, 256**2], # 128-256 140 | [256**2, 512**2], # 256-512 141 | [512**2, 1e5**2], # 512-inf 142 | ] 143 | assert areas.has_key(area), 'unknown area range: {}'.format(area) 144 | area_range = area_ranges[areas[area]] 145 | gt_overlaps = np.zeros(0) 146 | num_pos = 0 147 | for i in xrange(self.num_images): 148 | # Checking for max_overlaps == 1 avoids including crowd annotations 149 | # (...pretty hacking :/) 150 | max_gt_overlaps = self.roidb[i]['gt_overlaps'].toarray().max(axis=1) 151 | gt_inds = np.where((self.roidb[i]['gt_classes'] > 0) & 152 | (max_gt_overlaps == 1))[0] 153 | gt_boxes = self.roidb[i]['boxes'][gt_inds, :] 154 | gt_areas = self.roidb[i]['seg_areas'][gt_inds] 155 | valid_gt_inds = np.where((gt_areas >= area_range[0]) & 156 | (gt_areas <= area_range[1]))[0] 157 | gt_boxes = gt_boxes[valid_gt_inds, :] 158 | num_pos += len(valid_gt_inds) 159 | 160 | if candidate_boxes is None: 161 | # If candidate_boxes is not supplied, the default is to use the 162 | # non-ground-truth boxes from this roidb 163 | non_gt_inds = np.where(self.roidb[i]['gt_classes'] == 0)[0] 164 | boxes = self.roidb[i]['boxes'][non_gt_inds, :] 165 | else: 166 | boxes = candidate_boxes[i] 167 | if boxes.shape[0] == 0: 168 | continue 169 | if limit is not None and boxes.shape[0] > limit: 170 | boxes = boxes[:limit, :] 171 | 172 | overlaps = bbox_overlaps(boxes.astype(np.float), 173 | gt_boxes.astype(np.float)) 174 | 175 | _gt_overlaps = np.zeros((gt_boxes.shape[0])) 176 | for j in xrange(gt_boxes.shape[0]): 177 | # find which proposal box maximally covers each gt box 178 | argmax_overlaps = overlaps.argmax(axis=0) 179 | # and get the iou amount of coverage for each gt box 180 | max_overlaps = overlaps.max(axis=0) 181 | # find which gt box is 'best' covered (i.e. 'best' = most iou) 182 | gt_ind = max_overlaps.argmax() 183 | gt_ovr = max_overlaps.max() 184 | assert(gt_ovr >= 0) 185 | # find the proposal box that covers the best covered gt box 186 | box_ind = argmax_overlaps[gt_ind] 187 | # record the iou coverage of this gt box 188 | _gt_overlaps[j] = overlaps[box_ind, gt_ind] 189 | assert(_gt_overlaps[j] == gt_ovr) 190 | # mark the proposal box and the gt box as used 191 | overlaps[box_ind, :] = -1 192 | overlaps[:, gt_ind] = -1 193 | # append recorded iou coverage level 194 | gt_overlaps = np.hstack((gt_overlaps, _gt_overlaps)) 195 | 196 | gt_overlaps = np.sort(gt_overlaps) 197 | if thresholds is None: 198 | step = 0.05 199 | thresholds = np.arange(0.5, 0.95 + 1e-5, step) 200 | recalls = np.zeros_like(thresholds) 201 | # compute recall for each iou threshold 202 | for i, t in enumerate(thresholds): 203 | recalls[i] = (gt_overlaps >= t).sum() / float(num_pos) 204 | # ar = 2 * np.trapz(recalls, thresholds) 205 | ar = recalls.mean() 206 | return {'ar': ar, 'recalls': recalls, 'thresholds': thresholds, 207 | 'gt_overlaps': gt_overlaps} 208 | 209 | def create_roidb_from_box_list(self, box_list, gt_roidb): 210 | assert len(box_list) == self.num_images, \ 211 | 'Number of boxes must match number of ground-truth images' 212 | roidb = [] 213 | for i in xrange(self.num_images): 214 | boxes = box_list[i] 215 | num_boxes = boxes.shape[0] 216 | overlaps = np.zeros((num_boxes, self.num_classes), dtype=np.float32) 217 | 218 | if gt_roidb is not None and gt_roidb[i]['boxes'].size > 0: 219 | gt_boxes = gt_roidb[i]['boxes'] 220 | gt_classes = gt_roidb[i]['gt_classes'] 221 | gt_overlaps = bbox_overlaps(boxes.astype(np.float), 222 | gt_boxes.astype(np.float)) 223 | argmaxes = gt_overlaps.argmax(axis=1) 224 | maxes = gt_overlaps.max(axis=1) 225 | I = np.where(maxes > 0)[0] 226 | overlaps[I, gt_classes[argmaxes[I]]] = maxes[I] 227 | 228 | overlaps = scipy.sparse.csr_matrix(overlaps) 229 | roidb.append({ 230 | 'boxes' : boxes, 231 | 'gt_classes' : np.zeros((num_boxes,), dtype=np.int32), 232 | 'gt_overlaps' : overlaps, 233 | 'flipped' : False, 234 | 'seg_areas' : np.zeros((num_boxes,), dtype=np.float32), 235 | }) 236 | return roidb 237 | 238 | @staticmethod 239 | def merge_roidbs(a, b): 240 | assert len(a) == len(b) 241 | for i in xrange(len(a)): 242 | a[i]['boxes'] = np.vstack((a[i]['boxes'], b[i]['boxes'])) 243 | a[i]['gt_classes'] = np.hstack((a[i]['gt_classes'], 244 | b[i]['gt_classes'])) 245 | a[i]['gt_overlaps'] = scipy.sparse.vstack([a[i]['gt_overlaps'], 246 | b[i]['gt_overlaps']]) 247 | a[i]['seg_areas'] = np.hstack((a[i]['seg_areas'], 248 | b[i]['seg_areas'])) 249 | return a 250 | 251 | def competition_mode(self, on): 252 | """Turn competition mode on or off.""" 253 | pass 254 | -------------------------------------------------------------------------------- /Seminar6/lib/datasets/tools/mcg_munge.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | """Hacky tool to convert file system layout of MCG boxes downloaded from 5 | http://www.eecs.berkeley.edu/Research/Projects/CS/vision/grouping/mcg/ 6 | so that it's consistent with those computed by Jan Hosang (see: 7 | http://www.mpi-inf.mpg.de/departments/computer-vision-and-multimodal- 8 | computing/research/object-recognition-and-scene-understanding/how- 9 | good-are-detection-proposals-really/) 10 | 11 | NB: Boxes from the MCG website are in (y1, x1, y2, x2) order. 12 | Boxes from Hosang et al. are in (x1, y1, x2, y2) order. 13 | """ 14 | 15 | def munge(src_dir): 16 | # stored as: ./MCG-COCO-val2014-boxes/COCO_val2014_000000193401.mat 17 | # want: ./MCG/mat/COCO_val2014_0/COCO_val2014_000000141/COCO_val2014_000000141334.mat 18 | 19 | files = os.listdir(src_dir) 20 | for fn in files: 21 | base, ext = os.path.splitext(fn) 22 | # first 14 chars / first 22 chars / all chars + .mat 23 | # COCO_val2014_0/COCO_val2014_000000447/COCO_val2014_000000447991.mat 24 | first = base[:14] 25 | second = base[:22] 26 | dst_dir = os.path.join('MCG', 'mat', first, second) 27 | if not os.path.exists(dst_dir): 28 | os.makedirs(dst_dir) 29 | src = os.path.join(src_dir, fn) 30 | dst = os.path.join(dst_dir, fn) 31 | print 'MV: {} -> {}'.format(src, dst) 32 | os.rename(src, dst) 33 | 34 | if __name__ == '__main__': 35 | # src_dir should look something like: 36 | # src_dir = 'MCG-COCO-val2014-boxes' 37 | src_dir = sys.argv[1] 38 | munge(src_dir) 39 | -------------------------------------------------------------------------------- /Seminar6/lib/datasets/voc_eval.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast/er R-CNN 3 | # Licensed under The MIT License [see LICENSE for details] 4 | # Written by Bharath Hariharan 5 | # -------------------------------------------------------- 6 | 7 | import xml.etree.ElementTree as ET 8 | import os 9 | import cPickle 10 | import numpy as np 11 | 12 | def parse_rec(filename): 13 | """ Parse a PASCAL VOC xml file """ 14 | tree = ET.parse(filename) 15 | objects = [] 16 | for obj in tree.findall('object'): 17 | obj_struct = {} 18 | obj_struct['name'] = obj.find('name').text 19 | obj_struct['pose'] = obj.find('pose').text 20 | obj_struct['truncated'] = int(obj.find('truncated').text) 21 | obj_struct['difficult'] = int(obj.find('difficult').text) 22 | bbox = obj.find('bndbox') 23 | obj_struct['bbox'] = [int(bbox.find('xmin').text), 24 | int(bbox.find('ymin').text), 25 | int(bbox.find('xmax').text), 26 | int(bbox.find('ymax').text)] 27 | objects.append(obj_struct) 28 | 29 | return objects 30 | 31 | def voc_ap(rec, prec, use_07_metric=False): 32 | """ ap = voc_ap(rec, prec, [use_07_metric]) 33 | Compute VOC AP given precision and recall. 34 | If use_07_metric is true, uses the 35 | VOC 07 11 point method (default:False). 36 | """ 37 | if use_07_metric: 38 | # 11 point metric 39 | ap = 0. 40 | for t in np.arange(0., 1.1, 0.1): 41 | if np.sum(rec >= t) == 0: 42 | p = 0 43 | else: 44 | p = np.max(prec[rec >= t]) 45 | ap = ap + p / 11. 46 | else: 47 | # correct AP calculation 48 | # first append sentinel values at the end 49 | mrec = np.concatenate(([0.], rec, [1.])) 50 | mpre = np.concatenate(([0.], prec, [0.])) 51 | 52 | # compute the precision envelope 53 | for i in range(mpre.size - 1, 0, -1): 54 | mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i]) 55 | 56 | # to calculate area under PR curve, look for points 57 | # where X axis (recall) changes value 58 | i = np.where(mrec[1:] != mrec[:-1])[0] 59 | 60 | # and sum (\Delta recall) * prec 61 | ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) 62 | return ap 63 | 64 | def voc_eval(detpath, 65 | annopath, 66 | imagesetfile, 67 | classname, 68 | cachedir, 69 | ovthresh=0.5, 70 | use_07_metric=False): 71 | """rec, prec, ap = voc_eval(detpath, 72 | annopath, 73 | imagesetfile, 74 | classname, 75 | [ovthresh], 76 | [use_07_metric]) 77 | 78 | Top level function that does the PASCAL VOC evaluation. 79 | 80 | detpath: Path to detections 81 | detpath.format(classname) should produce the detection results file. 82 | annopath: Path to annotations 83 | annopath.format(imagename) should be the xml annotations file. 84 | imagesetfile: Text file containing the list of images, one image per line. 85 | classname: Category name (duh) 86 | cachedir: Directory for caching the annotations 87 | [ovthresh]: Overlap threshold (default = 0.5) 88 | [use_07_metric]: Whether to use VOC07's 11 point AP computation 89 | (default False) 90 | """ 91 | # assumes detections are in detpath.format(classname) 92 | # assumes annotations are in annopath.format(imagename) 93 | # assumes imagesetfile is a text file with each line an image name 94 | # cachedir caches the annotations in a pickle file 95 | 96 | # first load gt 97 | if not os.path.isdir(cachedir): 98 | os.mkdir(cachedir) 99 | cachefile = os.path.join(cachedir, 'annots.pkl') 100 | # read list of images 101 | with open(imagesetfile, 'r') as f: 102 | lines = f.readlines() 103 | imagenames = [x.strip() for x in lines] 104 | 105 | if not os.path.isfile(cachefile): 106 | # load annots 107 | recs = {} 108 | for i, imagename in enumerate(imagenames): 109 | recs[imagename] = parse_rec(annopath.format(imagename)) 110 | if i % 100 == 0: 111 | print 'Reading annotation for {:d}/{:d}'.format( 112 | i + 1, len(imagenames)) 113 | # save 114 | print 'Saving cached annotations to {:s}'.format(cachefile) 115 | with open(cachefile, 'w') as f: 116 | cPickle.dump(recs, f) 117 | else: 118 | # load 119 | with open(cachefile, 'r') as f: 120 | recs = cPickle.load(f) 121 | 122 | # extract gt objects for this class 123 | class_recs = {} 124 | npos = 0 125 | for imagename in imagenames: 126 | R = [obj for obj in recs[imagename] if obj['name'] == classname] 127 | bbox = np.array([x['bbox'] for x in R]) 128 | difficult = np.array([x['difficult'] for x in R]).astype(np.bool) 129 | det = [False] * len(R) 130 | npos = npos + sum(~difficult) 131 | class_recs[imagename] = {'bbox': bbox, 132 | 'difficult': difficult, 133 | 'det': det} 134 | 135 | # read dets 136 | detfile = detpath.format(classname) 137 | with open(detfile, 'r') as f: 138 | lines = f.readlines() 139 | 140 | splitlines = [x.strip().split(' ') for x in lines] 141 | image_ids = [x[0] for x in splitlines] 142 | confidence = np.array([float(x[1]) for x in splitlines]) 143 | BB = np.array([[float(z) for z in x[2:]] for x in splitlines]) 144 | print BB 145 | 146 | # sort by confidence 147 | sorted_ind = np.argsort(-confidence) 148 | sorted_scores = np.sort(-confidence) 149 | BB = BB[sorted_ind, :] 150 | image_ids = [image_ids[x] for x in sorted_ind] 151 | 152 | # go down dets and mark TPs and FPs 153 | nd = len(image_ids) 154 | tp = np.zeros(nd) 155 | fp = np.zeros(nd) 156 | for d in range(nd): 157 | R = class_recs[image_ids[d]] 158 | bb = BB[d, :].astype(float) 159 | ovmax = -np.inf 160 | BBGT = R['bbox'].astype(float) 161 | 162 | if BBGT.size > 0: 163 | # compute overlaps 164 | # intersection 165 | ixmin = np.maximum(BBGT[:, 0], bb[0]) 166 | iymin = np.maximum(BBGT[:, 1], bb[1]) 167 | ixmax = np.minimum(BBGT[:, 2], bb[2]) 168 | iymax = np.minimum(BBGT[:, 3], bb[3]) 169 | iw = np.maximum(ixmax - ixmin + 1., 0.) 170 | ih = np.maximum(iymax - iymin + 1., 0.) 171 | inters = iw * ih 172 | 173 | # union 174 | uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) + 175 | (BBGT[:, 2] - BBGT[:, 0] + 1.) * 176 | (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters) 177 | 178 | overlaps = inters / uni 179 | ovmax = np.max(overlaps) 180 | jmax = np.argmax(overlaps) 181 | 182 | if ovmax > ovthresh: 183 | if not R['difficult'][jmax]: 184 | if not R['det'][jmax]: 185 | tp[d] = 1. 186 | R['det'][jmax] = 1 187 | else: 188 | fp[d] = 1. 189 | else: 190 | fp[d] = 1. 191 | 192 | # compute precision recall 193 | fp = np.cumsum(fp) 194 | tp = np.cumsum(tp) 195 | rec = tp / float(npos) 196 | # avoid divide by zero in case the first detection matches a difficult 197 | # ground truth 198 | prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps) 199 | ap = voc_ap(rec, prec, use_07_metric) 200 | 201 | return rec, prec, ap 202 | -------------------------------------------------------------------------------- /Seminar6/lib/fast_rcnn/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | -------------------------------------------------------------------------------- /Seminar6/lib/fast_rcnn/bbox_transform.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | 10 | def bbox_transform(ex_rois, gt_rois): 11 | ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0 12 | ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0 13 | ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths 14 | ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights 15 | 16 | gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0 17 | gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0 18 | gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths 19 | gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights 20 | 21 | targets_dx = (gt_ctr_x - ex_ctr_x) / ex_widths 22 | targets_dy = (gt_ctr_y - ex_ctr_y) / ex_heights 23 | targets_dw = np.log(gt_widths / ex_widths) 24 | targets_dh = np.log(gt_heights / ex_heights) 25 | 26 | targets = np.vstack( 27 | (targets_dx, targets_dy, targets_dw, targets_dh)).transpose() 28 | return targets 29 | 30 | def bbox_transform_inv(boxes, deltas): 31 | if boxes.shape[0] == 0: 32 | return np.zeros((0, deltas.shape[1]), dtype=deltas.dtype) 33 | 34 | boxes = boxes.astype(deltas.dtype, copy=False) 35 | 36 | widths = boxes[:, 2] - boxes[:, 0] + 1.0 37 | heights = boxes[:, 3] - boxes[:, 1] + 1.0 38 | ctr_x = boxes[:, 0] + 0.5 * widths 39 | ctr_y = boxes[:, 1] + 0.5 * heights 40 | 41 | dx = deltas[:, 0::4] 42 | dy = deltas[:, 1::4] 43 | dw = deltas[:, 2::4] 44 | dh = deltas[:, 3::4] 45 | 46 | pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis] 47 | pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis] 48 | pred_w = np.exp(dw) * widths[:, np.newaxis] 49 | pred_h = np.exp(dh) * heights[:, np.newaxis] 50 | 51 | pred_boxes = np.zeros(deltas.shape, dtype=deltas.dtype) 52 | # x1 53 | pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w 54 | # y1 55 | pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h 56 | # x2 57 | pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w 58 | # y2 59 | pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h 60 | 61 | return pred_boxes 62 | 63 | def clip_boxes(boxes, im_shape): 64 | """ 65 | Clip boxes to image boundaries. 66 | """ 67 | 68 | # x1 >= 0 69 | boxes[:, 0::4] = np.maximum(np.minimum(boxes[:, 0::4], im_shape[1] - 1), 0) 70 | # y1 >= 0 71 | boxes[:, 1::4] = np.maximum(np.minimum(boxes[:, 1::4], im_shape[0] - 1), 0) 72 | # x2 < im_shape[1] 73 | boxes[:, 2::4] = np.maximum(np.minimum(boxes[:, 2::4], im_shape[1] - 1), 0) 74 | # y2 < im_shape[0] 75 | boxes[:, 3::4] = np.maximum(np.minimum(boxes[:, 3::4], im_shape[0] - 1), 0) 76 | return boxes 77 | -------------------------------------------------------------------------------- /Seminar6/lib/fast_rcnn/config.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | """Fast R-CNN config system. 9 | 10 | This file specifies default config options for Fast R-CNN. You should not 11 | change values in this file. Instead, you should write a config file (in yaml) 12 | and use cfg_from_file(yaml_file) to load it and override the default options. 13 | 14 | Most tools in $ROOT/tools take a --cfg option to specify an override file. 15 | - See tools/{train,test}_net.py for example code that uses cfg_from_file() 16 | - See experiments/cfgs/*.yml for example YAML config override files 17 | """ 18 | 19 | import os 20 | import os.path as osp 21 | import numpy as np 22 | # `pip install easydict` if you don't have it 23 | from easydict import EasyDict as edict 24 | 25 | __C = edict() 26 | # Consumers can get config by: 27 | # from fast_rcnn_config import cfg 28 | cfg = __C 29 | 30 | # 31 | # Training options 32 | # 33 | 34 | __C.TRAIN = edict() 35 | 36 | # Scales to use during training (can list multiple scales) 37 | # Each scale is the pixel size of an image's shortest side 38 | __C.TRAIN.SCALES = (600,) 39 | 40 | # Max pixel size of the longest side of a scaled input image 41 | __C.TRAIN.MAX_SIZE = 1000 42 | 43 | # Images to use per minibatch 44 | __C.TRAIN.IMS_PER_BATCH = 2 45 | 46 | # Minibatch size (number of regions of interest [ROIs]) 47 | __C.TRAIN.BATCH_SIZE = 128 48 | 49 | # Fraction of minibatch that is labeled foreground (i.e. class > 0) 50 | __C.TRAIN.FG_FRACTION = 0.25 51 | 52 | # Overlap threshold for a ROI to be considered foreground (if >= FG_THRESH) 53 | __C.TRAIN.FG_THRESH = 0.5 54 | 55 | # Overlap threshold for a ROI to be considered background (class = 0 if 56 | # overlap in [LO, HI)) 57 | __C.TRAIN.BG_THRESH_HI = 0.5 58 | __C.TRAIN.BG_THRESH_LO = 0.1 59 | 60 | # Use horizontally-flipped images during training? 61 | __C.TRAIN.USE_FLIPPED = True 62 | 63 | # Train bounding-box regressors 64 | __C.TRAIN.BBOX_REG = True 65 | 66 | # Overlap required between a ROI and ground-truth box in order for that ROI to 67 | # be used as a bounding-box regression training example 68 | __C.TRAIN.BBOX_THRESH = 0.5 69 | 70 | # Iterations between snapshots 71 | __C.TRAIN.SNAPSHOT_ITERS = 10000 72 | 73 | # solver.prototxt specifies the snapshot path prefix, this adds an optional 74 | # infix to yield the path: [_]_iters_XYZ.caffemodel 75 | __C.TRAIN.SNAPSHOT_INFIX = '' 76 | 77 | # Normalize the targets (subtract empirical mean, divide by empirical stddev) 78 | __C.TRAIN.BBOX_NORMALIZE_TARGETS = True 79 | # Deprecated (inside weights) 80 | __C.TRAIN.BBOX_INSIDE_WEIGHTS = (1.0, 1.0, 1.0, 1.0) 81 | # Normalize the targets using "precomputed" (or made up) means and stdevs 82 | # (BBOX_NORMALIZE_TARGETS must also be True) 83 | __C.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED = False 84 | __C.TRAIN.BBOX_NORMALIZE_MEANS = (0.0, 0.0, 0.0, 0.0) 85 | __C.TRAIN.BBOX_NORMALIZE_STDS = (0.1, 0.1, 0.2, 0.2) 86 | 87 | # Train using these proposals 88 | __C.TRAIN.PROPOSAL_METHOD = 'selective_search' 89 | 90 | # Make minibatches from images that have similar aspect ratios (i.e. both 91 | # tall and thin or both short and wide) in order to avoid wasting computation 92 | # on zero-padding. 93 | __C.TRAIN.ASPECT_GROUPING = True 94 | 95 | # 96 | # Testing options 97 | # 98 | 99 | __C.TEST = edict() 100 | 101 | # Scales to use during testing (can list multiple scales) 102 | # Each scale is the pixel size of an image's shortest side 103 | __C.TEST.SCALES = (600,) 104 | 105 | # Max pixel size of the longest side of a scaled input image 106 | __C.TEST.MAX_SIZE = 1000 107 | 108 | # Overlap threshold used for non-maximum suppression (suppress boxes with 109 | # IoU >= this threshold) 110 | __C.TEST.NMS = 0.3 111 | 112 | # Experimental: treat the (K+1) units in the cls_score layer as linear 113 | # predictors (trained, eg, with one-vs-rest SVMs). 114 | __C.TEST.SVM = False 115 | 116 | # Test using bounding-box regressors 117 | __C.TEST.BBOX_REG = True 118 | 119 | # Test using these proposals 120 | __C.TEST.PROPOSAL_METHOD = 'selective_search' 121 | 122 | # 123 | # MISC 124 | # 125 | 126 | # Number of classes in the dataset. 127 | # For Pascal VOC 2007 it is 20 + 1 (background class). 128 | __C.NUM_CLASSES = 21 129 | 130 | # The mapping from image coordinates to feature map coordinates might cause 131 | # some boxes that are distinct in image space to become identical in feature 132 | # coordinates. If DEDUP_BOXES > 0, then DEDUP_BOXES is used as the scale factor 133 | # for identifying duplicate boxes. 134 | # 1/16 is correct for {Alex,Caffe}Net, VGG_CNN_M_1024, and VGG16 135 | __C.DEDUP_BOXES = 1./16. 136 | 137 | # Pixel mean values (BGR order) as a (1, 1, 3) array 138 | # We use the same pixel mean for all networks even though it's not exactly what 139 | # they were trained with 140 | __C.PIXEL_MEANS = np.array([[[102.9801, 115.9465, 122.7717]]]) 141 | 142 | # For reproducibility 143 | __C.RNG_SEED = 3 144 | 145 | # A small number that's used many times 146 | __C.EPS = 1e-14 147 | 148 | # Root directory of project 149 | __C.ROOT_DIR = osp.abspath(osp.join(osp.dirname(__file__), '..', '..')) 150 | 151 | # Data directory 152 | __C.DATA_DIR = osp.abspath(osp.join(__C.ROOT_DIR, 'data')) 153 | 154 | # Model directory 155 | __C.MODELS_DIR = osp.abspath(osp.join(__C.ROOT_DIR, 'models', 'pascal_voc')) 156 | 157 | # Place outputs under an experiments directory 158 | __C.EXP_DIR = 'default' 159 | 160 | # Use GPU implementation of non-maximum suppression 161 | __C.USE_GPU_NMS = False 162 | 163 | # Default GPU device id 164 | __C.GPU_ID = 0 165 | 166 | 167 | def get_output_dir(imdb, net=None): 168 | """Return the directory where experimental artifacts are placed. 169 | If the directory does not exist, it is created. 170 | 171 | A canonical path is built using the name from an imdb and a network 172 | (if not None). 173 | """ 174 | outdir = osp.abspath(osp.join(__C.ROOT_DIR, 'output', __C.EXP_DIR, imdb.name)) 175 | if net is not None: 176 | outdir = osp.join(outdir, net.name) 177 | if not os.path.exists(outdir): 178 | os.makedirs(outdir) 179 | return outdir 180 | 181 | def _merge_a_into_b(a, b): 182 | """Merge config dictionary a into config dictionary b, clobbering the 183 | options in b whenever they are also specified in a. 184 | """ 185 | if type(a) is not edict: 186 | return 187 | 188 | for k, v in a.iteritems(): 189 | # a must specify keys that are in b 190 | if not b.has_key(k): 191 | raise KeyError('{} is not a valid config key'.format(k)) 192 | 193 | # the types must match, too 194 | old_type = type(b[k]) 195 | if old_type is not type(v): 196 | if isinstance(b[k], np.ndarray): 197 | v = np.array(v, dtype=b[k].dtype) 198 | else: 199 | raise ValueError(('Type mismatch ({} vs. {}) ' 200 | 'for config key: {}').format(type(b[k]), 201 | type(v), k)) 202 | 203 | # recursively merge dicts 204 | if type(v) is edict: 205 | try: 206 | _merge_a_into_b(a[k], b[k]) 207 | except: 208 | print('Error under config key: {}'.format(k)) 209 | raise 210 | else: 211 | b[k] = v 212 | 213 | def cfg_from_file(filename): 214 | """Load a config file and merge it into the default options.""" 215 | import yaml 216 | with open(filename, 'r') as f: 217 | yaml_cfg = edict(yaml.load(f)) 218 | 219 | _merge_a_into_b(yaml_cfg, __C) 220 | 221 | def cfg_from_list(cfg_list): 222 | """Set config keys via list (e.g., from command line).""" 223 | from ast import literal_eval 224 | assert len(cfg_list) % 2 == 0 225 | for k, v in zip(cfg_list[0::2], cfg_list[1::2]): 226 | key_list = k.split('.') 227 | d = __C 228 | for subkey in key_list[:-1]: 229 | assert d.has_key(subkey) 230 | d = d[subkey] 231 | subkey = key_list[-1] 232 | assert d.has_key(subkey) 233 | try: 234 | value = literal_eval(v) 235 | except: 236 | # handle the case when v is a string literal 237 | value = v 238 | assert type(value) == type(d[subkey]), \ 239 | 'type {} does not match original type {}'.format( 240 | type(value), type(d[subkey])) 241 | d[subkey] = value 242 | -------------------------------------------------------------------------------- /Seminar6/lib/fast_rcnn/nms_wrapper.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | from fast_rcnn.config import cfg 9 | from nms.cpu_nms import cpu_nms 10 | 11 | def nms(dets, thresh, force_cpu=False): 12 | """Dispatch to either CPU or GPU NMS implementations.""" 13 | 14 | if dets.shape[0] == 0: 15 | return [] 16 | 17 | return cpu_nms(dets, thresh) 18 | -------------------------------------------------------------------------------- /Seminar6/lib/fast_rcnn/test.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | """Test a Fast R-CNN network on an imdb (image database).""" 9 | 10 | from fast_rcnn.config import cfg, get_output_dir 11 | from fast_rcnn.bbox_transform import clip_boxes, bbox_transform_inv 12 | import argparse 13 | from utils.timer import Timer 14 | import numpy as np 15 | import cv2 16 | from fast_rcnn.nms_wrapper import nms 17 | import cPickle 18 | from utils.blob import im_list_to_blob 19 | import os 20 | 21 | def _get_image_blob(im): 22 | """Converts an image into a network input. 23 | 24 | Arguments: 25 | im (ndarray): a color image in BGR order 26 | 27 | Returns: 28 | blob (ndarray): a data blob holding an image pyramid 29 | im_scale_factors (list): list of image scales (relative to im) used 30 | in the image pyramid 31 | """ 32 | im_orig = im.astype(np.float32, copy=True) 33 | im_orig -= cfg.PIXEL_MEANS 34 | 35 | im_shape = im_orig.shape 36 | im_size_min = np.min(im_shape[0:2]) 37 | im_size_max = np.max(im_shape[0:2]) 38 | 39 | processed_ims = [] 40 | im_scale_factors = [] 41 | 42 | for target_size in cfg.TEST.SCALES: 43 | im_scale = float(target_size) / float(im_size_min) 44 | # Prevent the biggest axis from being more than MAX_SIZE 45 | if np.round(im_scale * im_size_max) > cfg.TEST.MAX_SIZE: 46 | im_scale = float(cfg.TEST.MAX_SIZE) / float(im_size_max) 47 | im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale, 48 | interpolation=cv2.INTER_LINEAR) 49 | im_scale_factors.append(im_scale) 50 | processed_ims.append(im) 51 | 52 | # Create a blob to hold the input images 53 | blob = im_list_to_blob(processed_ims) 54 | 55 | return blob, np.array(im_scale_factors) 56 | 57 | def _get_rois_blob(im_rois, im_scale_factors): 58 | """Converts RoIs into network inputs. 59 | 60 | Arguments: 61 | im_rois (ndarray): R x 4 matrix of RoIs in original image coordinates 62 | im_scale_factors (list): scale factors as returned by _get_image_blob 63 | 64 | Returns: 65 | blob (ndarray): R x 5 matrix of RoIs in the image pyramid 66 | """ 67 | rois, levels = _project_im_rois(im_rois, im_scale_factors) 68 | rois_blob = np.hstack((levels, rois)) 69 | return rois_blob.astype(np.float32, copy=False) 70 | 71 | def _project_im_rois(im_rois, scales): 72 | """Project image RoIs into the image pyramid built by _get_image_blob. 73 | 74 | Arguments: 75 | im_rois (ndarray): R x 4 matrix of RoIs in original image coordinates 76 | scales (list): scale factors as returned by _get_image_blob 77 | 78 | Returns: 79 | rois (ndarray): R x 4 matrix of projected RoI coordinates 80 | levels (list): image pyramid levels used by each projected RoI 81 | """ 82 | im_rois = im_rois.astype(np.float, copy=False) 83 | 84 | if len(scales) > 1: 85 | widths = im_rois[:, 2] - im_rois[:, 0] + 1 86 | heights = im_rois[:, 3] - im_rois[:, 1] + 1 87 | 88 | areas = widths * heights 89 | scaled_areas = areas[:, np.newaxis] * (scales[np.newaxis, :] ** 2) 90 | diff_areas = np.abs(scaled_areas - 224 * 224) 91 | levels = diff_areas.argmin(axis=1)[:, np.newaxis] 92 | else: 93 | levels = np.zeros((im_rois.shape[0], 1), dtype=np.int) 94 | 95 | rois = im_rois * scales[levels] 96 | 97 | return rois, levels 98 | 99 | def _get_blobs(im, rois): 100 | """Convert an image and RoIs within that image into network inputs.""" 101 | blobs = {'data' : None, 'rois' : None} 102 | blobs['data'], im_scale_factors = _get_image_blob(im) 103 | blobs['rois'] = _get_rois_blob(rois, im_scale_factors) 104 | return blobs, im_scale_factors 105 | 106 | def im_detect(net, im, boxes): 107 | """Detect object classes in an image given object proposals. 108 | 109 | Arguments: 110 | net (caffe.Net): Fast R-CNN network to use 111 | im (ndarray): color image to test (in BGR order) 112 | boxes (ndarray): R x 4 array of object proposals 113 | 114 | Returns: 115 | scores (ndarray): R x K array of object class scores (K includes 116 | background as object category 0) 117 | boxes (ndarray): R x (4*K) array of predicted bounding boxes 118 | """ 119 | blobs, im_scales = _get_blobs(im, boxes) 120 | 121 | # When mapping from image ROIs to feature map ROIs, there's some aliasing 122 | # (some distinct image ROIs get mapped to the same feature ROI). 123 | # Here, we identify duplicate feature ROIs, so we only compute features 124 | # on the unique subset. 125 | if cfg.DEDUP_BOXES > 0: 126 | v = np.array([1, 1e3, 1e6, 1e9, 1e12]) 127 | hashes = np.round(blobs['rois'] * cfg.DEDUP_BOXES).dot(v) 128 | _, index, inv_index = np.unique(hashes, return_index=True, 129 | return_inverse=True) 130 | blobs['rois'] = blobs['rois'][index, :] 131 | boxes = boxes[index, :] 132 | 133 | # do forward 134 | forward_kwargs = {'data': blobs['data'].astype(np.float32, copy=False)} 135 | forward_kwargs['rois'] = blobs['rois'].astype(np.float32, copy=False) 136 | blobs_out = net.forward(**forward_kwargs) 137 | 138 | # use softmax estimated probabilities 139 | scores = blobs_out['cls_prob'] 140 | 141 | if cfg.TEST.BBOX_REG: 142 | # Apply bounding-box regression deltas 143 | box_deltas = blobs_out['bbox_pred'] 144 | pred_boxes = bbox_transform_inv(boxes, box_deltas) 145 | pred_boxes = clip_boxes(pred_boxes, im.shape) 146 | else: 147 | # Simply repeat the boxes, once for each class 148 | pred_boxes = np.tile(boxes, (1, scores.shape[1])) 149 | 150 | if cfg.DEDUP_BOXES > 0: 151 | # Map scores and predictions back to the original set of boxes 152 | scores = scores[inv_index, :] 153 | pred_boxes = pred_boxes[inv_index, :] 154 | 155 | return scores, pred_boxes 156 | 157 | def vis_detections(im, class_name, dets, thresh=0.3): 158 | """Visual debugging of detections.""" 159 | import matplotlib.pyplot as plt 160 | im = im[:, :, (2, 1, 0)] 161 | for i in xrange(np.minimum(10, dets.shape[0])): 162 | bbox = dets[i, :4] 163 | score = dets[i, -1] 164 | if score > thresh: 165 | plt.cla() 166 | plt.imshow(im) 167 | plt.gca().add_patch( 168 | plt.Rectangle((bbox[0], bbox[1]), 169 | bbox[2] - bbox[0], 170 | bbox[3] - bbox[1], fill=False, 171 | edgecolor='g', linewidth=3) 172 | ) 173 | plt.title('{} {:.3f}'.format(class_name, score)) 174 | plt.show() 175 | 176 | def apply_nms(all_boxes, thresh): 177 | """Apply non-maximum suppression to all predicted boxes output by the 178 | test_net method. 179 | """ 180 | num_classes = len(all_boxes) 181 | num_images = len(all_boxes[0]) 182 | nms_boxes = [[[] for _ in xrange(num_images)] 183 | for _ in xrange(num_classes)] 184 | for cls_ind in xrange(num_classes): 185 | for im_ind in xrange(num_images): 186 | dets = all_boxes[cls_ind][im_ind] 187 | if dets == []: 188 | continue 189 | # CPU NMS is much faster than GPU NMS when the number of boxes 190 | # is relative small (e.g., < 10k) 191 | # TODO(rbg): autotune NMS dispatch 192 | keep = nms(dets, thresh, force_cpu=True) 193 | if len(keep) == 0: 194 | continue 195 | nms_boxes[cls_ind][im_ind] = dets[keep, :].copy() 196 | return nms_boxes 197 | 198 | def test_net(net, imdb, max_per_image=100, thresh=0.05, vis=False): 199 | """Test a Fast R-CNN network on an image database.""" 200 | num_images = len(imdb.image_index) 201 | # all detections are collected into: 202 | # all_boxes[cls][image] = N x 5 array of detections in 203 | # (x1, y1, x2, y2, score) 204 | all_boxes = [[[] for _ in xrange(num_images)] 205 | for _ in xrange(imdb.num_classes)] 206 | 207 | output_dir = get_output_dir(imdb, net) 208 | 209 | # timers 210 | _t = {'im_detect' : Timer(), 'misc' : Timer()} 211 | 212 | roidb = imdb.roidb 213 | 214 | for i in xrange(num_images): 215 | # filter out any ground truth boxes 216 | 217 | # The roidb may contain ground-truth rois (for example, if the roidb 218 | # comes from the training or val split). We only want to evaluate 219 | # detection on the *non*-ground-truth rois. We select those the rois 220 | # that have the gt_classes field set to 0, which means there's no 221 | # ground truth. 222 | box_proposals = roidb[i]['boxes'][roidb[i]['gt_classes'] == 0] 223 | 224 | im = cv2.imread(imdb.image_path_at(i)) 225 | _t['im_detect'].tic() 226 | scores, boxes = im_detect(net, im, box_proposals) 227 | _t['im_detect'].toc() 228 | 229 | _t['misc'].tic() 230 | # skip j = 0, because it's the background class 231 | for j in xrange(1, imdb.num_classes): 232 | inds = np.where(scores[:, j] > thresh)[0] 233 | cls_scores = scores[inds, j] 234 | cls_boxes = boxes[inds, j*4:(j+1)*4] 235 | cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \ 236 | .astype(np.float32, copy=False) 237 | keep = nms(cls_dets, cfg.TEST.NMS) 238 | cls_dets = cls_dets[keep, :] 239 | if vis: 240 | vis_detections(im, imdb.classes[j], cls_dets) 241 | all_boxes[j][i] = cls_dets 242 | 243 | # Limit to max_per_image detections *over all classes* 244 | if max_per_image > 0: 245 | image_scores = np.hstack([all_boxes[j][i][:, -1] 246 | for j in xrange(1, imdb.num_classes)]) 247 | if len(image_scores) > max_per_image: 248 | image_thresh = np.sort(image_scores)[-max_per_image] 249 | for j in xrange(1, imdb.num_classes): 250 | keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] 251 | all_boxes[j][i] = all_boxes[j][i][keep, :] 252 | _t['misc'].toc() 253 | 254 | print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \ 255 | .format(i + 1, num_images, _t['im_detect'].average_time, 256 | _t['misc'].average_time) 257 | 258 | det_file = os.path.join(output_dir, 'detections.pkl') 259 | with open(det_file, 'wb') as f: 260 | cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL) 261 | 262 | print 'Evaluating detections' 263 | imdb.evaluate_detections(all_boxes, output_dir) 264 | -------------------------------------------------------------------------------- /Seminar6/lib/fast_rcnn/train.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | """Train a Fast R-CNN network.""" 9 | 10 | from fast_rcnn.config import cfg 11 | import roi_data_layer.roidb as rdl_roidb 12 | from utils.timer import Timer 13 | import numpy as np 14 | import os 15 | 16 | from custom.solver import Solver 17 | 18 | class SolverWrapper(object): 19 | """A simple wrapper around Caffe's solver. 20 | This wrapper gives us control over he snapshotting process, which we 21 | use to unnormalize the learned bounding-box regression weights. 22 | """ 23 | 24 | def __init__(self, roidb, output_dir): 25 | """Initialize the SolverWrapper.""" 26 | self.output_dir = output_dir 27 | 28 | if cfg.TRAIN.BBOX_REG: 29 | print 'Computing bounding-box regression targets...' 30 | self.bbox_means, self.bbox_stds = \ 31 | rdl_roidb.add_bbox_regression_targets(roidb) 32 | print 'done' 33 | 34 | ################ You MIGHT want to instantiate your custom solver here. 35 | # Don't forget to supply roidb to the ROIPoolingLayer! 36 | # You should have the following line: 37 | # self.solver = Solver() 38 | 39 | def snapshot(self): 40 | """Saves the state the solver state.""" 41 | 42 | infix = ('_' + cfg.TRAIN.SNAPSHOT_INFIX 43 | if cfg.TRAIN.SNAPSHOT_INFIX != '' else '') 44 | filename = (self.solver.snapshot_prefix + infix + 45 | '_iter_{:d}'.format(self.solver.iter) + '.pkl') 46 | filename = os.path.join(self.output_dir, filename) 47 | 48 | self.solver.save(str(filename)) 49 | print 'Wrote snapshot to: {:s}'.format(filename) 50 | 51 | return filename 52 | 53 | def train_model(self, max_iters): 54 | """Network training loop.""" 55 | last_snapshot_iter = -1 56 | timer = Timer() 57 | model_paths = [] 58 | while self.solver.iter < max_iters: 59 | # Make one SGD update 60 | timer.tic() 61 | 62 | self.solver.step() 63 | 64 | timer.toc() 65 | if self.solver.iter % (10 * self.solver.display_freq) == 0: 66 | print 'speed: {:.3f}s / iter'.format(timer.average_time) 67 | 68 | if self.solver.iter % cfg.TRAIN.SNAPSHOT_ITERS == 0: 69 | last_snapshot_iter = self.solver.iter 70 | model_paths.append(self.snapshot()) 71 | 72 | if last_snapshot_iter != self.solver.iter: 73 | model_paths.append(self.snapshot()) 74 | return model_paths 75 | 76 | def get_training_roidb(imdb): 77 | """Returns a roidb (Region of Interest database) for use in training.""" 78 | if cfg.TRAIN.USE_FLIPPED: 79 | print 'Appending horizontally-flipped training examples...' 80 | imdb.append_flipped_images() 81 | print 'done' 82 | 83 | print 'Preparing training data...' 84 | rdl_roidb.prepare_roidb(imdb) 85 | print 'done' 86 | 87 | return imdb.roidb 88 | 89 | def filter_roidb(roidb): 90 | """Remove roidb entries that have no usable RoIs.""" 91 | 92 | def is_valid(entry): 93 | # Valid images have: 94 | # (1) At least one foreground RoI OR 95 | # (2) At least one background RoI 96 | overlaps = entry['max_overlaps'] 97 | # find boxes with sufficient overlap 98 | fg_inds = np.where(overlaps >= cfg.TRAIN.FG_THRESH)[0] 99 | # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) 100 | bg_inds = np.where((overlaps < cfg.TRAIN.BG_THRESH_HI) & 101 | (overlaps >= cfg.TRAIN.BG_THRESH_LO))[0] 102 | # image is only valid if such boxes exist 103 | valid = len(fg_inds) > 0 or len(bg_inds) > 0 104 | return valid 105 | 106 | num = len(roidb) 107 | filtered_roidb = [entry for entry in roidb if is_valid(entry)] 108 | num_after = len(filtered_roidb) 109 | print 'Filtered {} roidb entries: {} -> {}'.format(num - num_after, 110 | num, num_after) 111 | return filtered_roidb 112 | 113 | def train_net(roidb, output_dir, max_iters=40000): 114 | """Train a Fast R-CNN network.""" 115 | 116 | roidb = filter_roidb(roidb) 117 | sw = SolverWrapper(roidb, output_dir) 118 | 119 | print 'Solving...' 120 | model_paths = sw.train_model(max_iters) 121 | print 'done solving' 122 | return model_paths 123 | -------------------------------------------------------------------------------- /Seminar6/lib/nms/.gitignore: -------------------------------------------------------------------------------- 1 | *.c 2 | *.cpp 3 | *.so 4 | -------------------------------------------------------------------------------- /Seminar6/lib/nms/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ddtm/dl-course/9b04d2dda741c0786a9de40a7dfce89d06d0487e/Seminar6/lib/nms/__init__.py -------------------------------------------------------------------------------- /Seminar6/lib/nms/cpu_nms.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | cimport numpy as np 10 | 11 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b): 12 | return a if a >= b else b 13 | 14 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b): 15 | return a if a <= b else b 16 | 17 | def cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh): 18 | cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0] 19 | cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1] 20 | cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2] 21 | cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3] 22 | cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4] 23 | 24 | cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1) 25 | cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1] 26 | 27 | cdef int ndets = dets.shape[0] 28 | cdef np.ndarray[np.int_t, ndim=1] suppressed = \ 29 | np.zeros((ndets), dtype=np.int) 30 | 31 | # nominal indices 32 | cdef int _i, _j 33 | # sorted indices 34 | cdef int i, j 35 | # temp variables for box i's (the box currently under consideration) 36 | cdef np.float32_t ix1, iy1, ix2, iy2, iarea 37 | # variables for computing overlap with box j (lower scoring box) 38 | cdef np.float32_t xx1, yy1, xx2, yy2 39 | cdef np.float32_t w, h 40 | cdef np.float32_t inter, ovr 41 | 42 | keep = [] 43 | for _i in range(ndets): 44 | i = order[_i] 45 | if suppressed[i] == 1: 46 | continue 47 | keep.append(i) 48 | ix1 = x1[i] 49 | iy1 = y1[i] 50 | ix2 = x2[i] 51 | iy2 = y2[i] 52 | iarea = areas[i] 53 | for _j in range(_i + 1, ndets): 54 | j = order[_j] 55 | if suppressed[j] == 1: 56 | continue 57 | xx1 = max(ix1, x1[j]) 58 | yy1 = max(iy1, y1[j]) 59 | xx2 = min(ix2, x2[j]) 60 | yy2 = min(iy2, y2[j]) 61 | w = max(0.0, xx2 - xx1 + 1) 62 | h = max(0.0, yy2 - yy1 + 1) 63 | inter = w * h 64 | ovr = inter / (iarea + areas[j] - inter) 65 | if ovr >= thresh: 66 | suppressed[j] = 1 67 | 68 | return keep 69 | -------------------------------------------------------------------------------- /Seminar6/lib/nms/gpu_nms.hpp: -------------------------------------------------------------------------------- 1 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num, 2 | int boxes_dim, float nms_overlap_thresh, int device_id); 3 | -------------------------------------------------------------------------------- /Seminar6/lib/nms/gpu_nms.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Faster R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | cimport numpy as np 10 | 11 | assert sizeof(int) == sizeof(np.int32_t) 12 | 13 | cdef extern from "gpu_nms.hpp": 14 | void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int) 15 | 16 | def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh, 17 | np.int32_t device_id=0): 18 | cdef int boxes_num = dets.shape[0] 19 | cdef int boxes_dim = dets.shape[1] 20 | cdef int num_out 21 | cdef np.ndarray[np.int32_t, ndim=1] \ 22 | keep = np.zeros(boxes_num, dtype=np.int32) 23 | cdef np.ndarray[np.float32_t, ndim=1] \ 24 | scores = dets[:, 4] 25 | cdef np.ndarray[np.int_t, ndim=1] \ 26 | order = scores.argsort()[::-1] 27 | cdef np.ndarray[np.float32_t, ndim=2] \ 28 | sorted_dets = dets[order, :] 29 | _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id) 30 | keep = keep[:num_out] 31 | return list(order[keep]) 32 | -------------------------------------------------------------------------------- /Seminar6/lib/nms/nms_kernel.cu: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // Faster R-CNN 3 | // Copyright (c) 2015 Microsoft 4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details] 5 | // Written by Shaoqing Ren 6 | // ------------------------------------------------------------------ 7 | 8 | #include "gpu_nms.hpp" 9 | #include 10 | #include 11 | 12 | #define CUDA_CHECK(condition) \ 13 | /* Code block avoids redefinition of cudaError_t error */ \ 14 | do { \ 15 | cudaError_t error = condition; \ 16 | if (error != cudaSuccess) { \ 17 | std::cout << cudaGetErrorString(error) << std::endl; \ 18 | } \ 19 | } while (0) 20 | 21 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0)) 22 | int const threadsPerBlock = sizeof(unsigned long long) * 8; 23 | 24 | __device__ inline float devIoU(float const * const a, float const * const b) { 25 | float left = max(a[0], b[0]), right = min(a[2], b[2]); 26 | float top = max(a[1], b[1]), bottom = min(a[3], b[3]); 27 | float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f); 28 | float interS = width * height; 29 | float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1); 30 | float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1); 31 | return interS / (Sa + Sb - interS); 32 | } 33 | 34 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh, 35 | const float *dev_boxes, unsigned long long *dev_mask) { 36 | const int row_start = blockIdx.y; 37 | const int col_start = blockIdx.x; 38 | 39 | // if (row_start > col_start) return; 40 | 41 | const int row_size = 42 | min(n_boxes - row_start * threadsPerBlock, threadsPerBlock); 43 | const int col_size = 44 | min(n_boxes - col_start * threadsPerBlock, threadsPerBlock); 45 | 46 | __shared__ float block_boxes[threadsPerBlock * 5]; 47 | if (threadIdx.x < col_size) { 48 | block_boxes[threadIdx.x * 5 + 0] = 49 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0]; 50 | block_boxes[threadIdx.x * 5 + 1] = 51 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1]; 52 | block_boxes[threadIdx.x * 5 + 2] = 53 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2]; 54 | block_boxes[threadIdx.x * 5 + 3] = 55 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3]; 56 | block_boxes[threadIdx.x * 5 + 4] = 57 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4]; 58 | } 59 | __syncthreads(); 60 | 61 | if (threadIdx.x < row_size) { 62 | const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x; 63 | const float *cur_box = dev_boxes + cur_box_idx * 5; 64 | int i = 0; 65 | unsigned long long t = 0; 66 | int start = 0; 67 | if (row_start == col_start) { 68 | start = threadIdx.x + 1; 69 | } 70 | for (i = start; i < col_size; i++) { 71 | if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) { 72 | t |= 1ULL << i; 73 | } 74 | } 75 | const int col_blocks = DIVUP(n_boxes, threadsPerBlock); 76 | dev_mask[cur_box_idx * col_blocks + col_start] = t; 77 | } 78 | } 79 | 80 | void _set_device(int device_id) { 81 | int current_device; 82 | CUDA_CHECK(cudaGetDevice(¤t_device)); 83 | if (current_device == device_id) { 84 | return; 85 | } 86 | // The call to cudaSetDevice must come before any calls to Get, which 87 | // may perform initialization using the GPU. 88 | CUDA_CHECK(cudaSetDevice(device_id)); 89 | } 90 | 91 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num, 92 | int boxes_dim, float nms_overlap_thresh, int device_id) { 93 | _set_device(device_id); 94 | 95 | float* boxes_dev = NULL; 96 | unsigned long long* mask_dev = NULL; 97 | 98 | const int col_blocks = DIVUP(boxes_num, threadsPerBlock); 99 | 100 | CUDA_CHECK(cudaMalloc(&boxes_dev, 101 | boxes_num * boxes_dim * sizeof(float))); 102 | CUDA_CHECK(cudaMemcpy(boxes_dev, 103 | boxes_host, 104 | boxes_num * boxes_dim * sizeof(float), 105 | cudaMemcpyHostToDevice)); 106 | 107 | CUDA_CHECK(cudaMalloc(&mask_dev, 108 | boxes_num * col_blocks * sizeof(unsigned long long))); 109 | 110 | dim3 blocks(DIVUP(boxes_num, threadsPerBlock), 111 | DIVUP(boxes_num, threadsPerBlock)); 112 | dim3 threads(threadsPerBlock); 113 | nms_kernel<<>>(boxes_num, 114 | nms_overlap_thresh, 115 | boxes_dev, 116 | mask_dev); 117 | 118 | std::vector mask_host(boxes_num * col_blocks); 119 | CUDA_CHECK(cudaMemcpy(&mask_host[0], 120 | mask_dev, 121 | sizeof(unsigned long long) * boxes_num * col_blocks, 122 | cudaMemcpyDeviceToHost)); 123 | 124 | std::vector remv(col_blocks); 125 | memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks); 126 | 127 | int num_to_keep = 0; 128 | for (int i = 0; i < boxes_num; i++) { 129 | int nblock = i / threadsPerBlock; 130 | int inblock = i % threadsPerBlock; 131 | 132 | if (!(remv[nblock] & (1ULL << inblock))) { 133 | keep_out[num_to_keep++] = i; 134 | unsigned long long *p = &mask_host[0] + i * col_blocks; 135 | for (int j = nblock; j < col_blocks; j++) { 136 | remv[j] |= p[j]; 137 | } 138 | } 139 | } 140 | *num_out = num_to_keep; 141 | 142 | CUDA_CHECK(cudaFree(boxes_dev)); 143 | CUDA_CHECK(cudaFree(mask_dev)); 144 | } 145 | -------------------------------------------------------------------------------- /Seminar6/lib/nms/py_cpu_nms.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | 10 | def py_cpu_nms(dets, thresh): 11 | """Pure Python NMS baseline.""" 12 | x1 = dets[:, 0] 13 | y1 = dets[:, 1] 14 | x2 = dets[:, 2] 15 | y2 = dets[:, 3] 16 | scores = dets[:, 4] 17 | 18 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 19 | order = scores.argsort()[::-1] 20 | 21 | keep = [] 22 | while order.size > 0: 23 | i = order[0] 24 | keep.append(i) 25 | xx1 = np.maximum(x1[i], x1[order[1:]]) 26 | yy1 = np.maximum(y1[i], y1[order[1:]]) 27 | xx2 = np.minimum(x2[i], x2[order[1:]]) 28 | yy2 = np.minimum(y2[i], y2[order[1:]]) 29 | 30 | w = np.maximum(0.0, xx2 - xx1 + 1) 31 | h = np.maximum(0.0, yy2 - yy1 + 1) 32 | inter = w * h 33 | ovr = inter / (areas[i] + areas[order[1:]] - inter) 34 | 35 | inds = np.where(ovr <= thresh)[0] 36 | order = order[inds + 1] 37 | 38 | return keep 39 | -------------------------------------------------------------------------------- /Seminar6/lib/roi_data_layer/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | -------------------------------------------------------------------------------- /Seminar6/lib/roi_data_layer/layer.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # Adapted for Theano usage by Yaroslav Ganin 7 | # -------------------------------------------------------- 8 | 9 | """The data layer used during training to train a Fast R-CNN network. 10 | 11 | RoIDataLayer implements a Caffe Python layer. 12 | """ 13 | 14 | from fast_rcnn.config import cfg 15 | from roi_data_layer.minibatch import get_minibatch 16 | import numpy as np 17 | import yaml 18 | 19 | class RoIDataLayer(object): 20 | """Fast R-CNN data layer used for training.""" 21 | 22 | def __init__(self): 23 | self.top = [] 24 | 25 | def _shuffle_roidb_inds(self): 26 | """Randomly permute the training roidb.""" 27 | if cfg.TRAIN.ASPECT_GROUPING: 28 | widths = np.array([r['width'] for r in self._roidb]) 29 | heights = np.array([r['height'] for r in self._roidb]) 30 | horz = (widths >= heights) 31 | vert = np.logical_not(horz) 32 | horz_inds = np.where(horz)[0] 33 | vert_inds = np.where(vert)[0] 34 | inds = np.hstack(( 35 | np.random.permutation(horz_inds), 36 | np.random.permutation(vert_inds))) 37 | inds = np.reshape(inds, (-1, 2)) 38 | row_perm = np.random.permutation(np.arange(inds.shape[0])) 39 | inds = np.reshape(inds[row_perm, :], (-1,)) 40 | self._perm = inds 41 | else: 42 | self._perm = np.random.permutation(np.arange(len(self._roidb))) 43 | self._cur = 0 44 | 45 | def _get_next_minibatch_inds(self): 46 | """Return the roidb indices for the next minibatch.""" 47 | if self._cur + cfg.TRAIN.IMS_PER_BATCH >= len(self._roidb): 48 | self._shuffle_roidb_inds() 49 | 50 | db_inds = self._perm[self._cur:self._cur + cfg.TRAIN.IMS_PER_BATCH] 51 | self._cur += cfg.TRAIN.IMS_PER_BATCH 52 | return db_inds 53 | 54 | def _get_next_minibatch(self): 55 | """Return the blobs to be used for the next minibatch.""" 56 | db_inds = self._get_next_minibatch_inds() 57 | minibatch_db = [self._roidb[i] for i in db_inds] 58 | return get_minibatch(minibatch_db, self._num_classes) 59 | 60 | def set_roidb(self, roidb): 61 | """Set the roidb to be used by this layer during training.""" 62 | self._roidb = roidb 63 | self._shuffle_roidb_inds() 64 | 65 | def setup(self): 66 | """Setup the RoIDataLayer.""" 67 | 68 | top = self.top 69 | 70 | self._num_classes = 21 71 | 72 | self._name_to_top_map = {} 73 | 74 | # data blob: holds a batch of N images, each with 3 channels 75 | idx = 0 76 | top.append(np.zeros((cfg.TRAIN.IMS_PER_BATCH, 3, 77 | max(cfg.TRAIN.SCALES), cfg.TRAIN.MAX_SIZE), dtype=np.single)) 78 | self._name_to_top_map['data'] = idx 79 | idx += 1 80 | 81 | # rois blob: holds R regions of interest, each is a 5-tuple 82 | # (n, x1, y1, x2, y2) specifying an image batch index n and a 83 | # rectangle (x1, y1, x2, y2) 84 | top.append(np.zeros((1, 5), dtype=np.single)) 85 | self._name_to_top_map['rois'] = idx 86 | idx += 1 87 | 88 | # labels blob: R categorical labels in [0, ..., K] for K foreground 89 | # classes plus background 90 | top.append(np.zeros((1,), dtype=np.single)) 91 | self._name_to_top_map['labels'] = idx 92 | idx += 1 93 | 94 | if cfg.TRAIN.BBOX_REG: 95 | # bbox_targets blob: R bounding-box regression targets with 4 96 | # targets per class 97 | top.append(np.zeros((1, self._num_classes * 4), dtype=np.single)) 98 | self._name_to_top_map['bbox_targets'] = idx 99 | idx += 1 100 | 101 | # bbox_inside_weights blob: At most 4 targets per roi are active; 102 | # thisbinary vector sepcifies the subset of active targets 103 | top.append(np.zeros((1, self._num_classes * 4), dtype=np.single)) 104 | self._name_to_top_map['bbox_inside_weights'] = idx 105 | idx += 1 106 | 107 | top.append(np.zeros((1, self._num_classes * 4), dtype=np.single)) 108 | self._name_to_top_map['bbox_outside_weights'] = idx 109 | idx += 1 110 | 111 | print 'RoiDataLayer: name_to_top:', self._name_to_top_map 112 | assert len(top) == len(self._name_to_top_map) 113 | 114 | def forward(self): 115 | """Get blobs and copy them into this layer's top blob vector.""" 116 | blobs = self._get_next_minibatch() 117 | 118 | top = self.top 119 | 120 | for blob_name, blob in blobs.iteritems(): 121 | top_ind = self._name_to_top_map[blob_name] 122 | # Reshape net's input blobs 123 | top[top_ind].resize(blob.shape) 124 | # Copy data into net's input blobs 125 | top[top_ind][...] = blob.astype(np.float32, copy=False) 126 | -------------------------------------------------------------------------------- /Seminar6/lib/roi_data_layer/minibatch.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | """Compute minibatch blobs for training a Fast R-CNN network.""" 9 | 10 | import numpy as np 11 | import numpy.random as npr 12 | import cv2 13 | from fast_rcnn.config import cfg 14 | from utils.blob import prep_im_for_blob, im_list_to_blob 15 | 16 | def get_minibatch(roidb, num_classes): 17 | """Given a roidb, construct a minibatch sampled from it.""" 18 | num_images = len(roidb) 19 | # Sample random scales to use for each image in this batch 20 | random_scale_inds = npr.randint(0, high=len(cfg.TRAIN.SCALES), 21 | size=num_images) 22 | assert(cfg.TRAIN.BATCH_SIZE % num_images == 0), \ 23 | 'num_images ({}) must divide BATCH_SIZE ({})'. \ 24 | format(num_images, cfg.TRAIN.BATCH_SIZE) 25 | rois_per_image = cfg.TRAIN.BATCH_SIZE / num_images 26 | fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image) 27 | 28 | # Get the input image blob, formatted for caffe 29 | im_blob, im_scales = _get_image_blob(roidb, random_scale_inds) 30 | 31 | blobs = {'data': im_blob} 32 | 33 | # Now, build the region of interest and label blobs 34 | rois_blob = np.zeros((0, 5), dtype=np.float32) 35 | labels_blob = np.zeros((0), dtype=np.float32) 36 | bbox_targets_blob = np.zeros((0, 4 * num_classes), dtype=np.float32) 37 | bbox_inside_blob = np.zeros(bbox_targets_blob.shape, dtype=np.float32) 38 | # all_overlaps = [] 39 | for im_i in xrange(num_images): 40 | labels, overlaps, im_rois, bbox_targets, bbox_inside_weights \ 41 | = _sample_rois(roidb[im_i], fg_rois_per_image, rois_per_image, 42 | num_classes) 43 | 44 | # Add to RoIs blob 45 | rois = _project_im_rois(im_rois, im_scales[im_i]) 46 | batch_ind = im_i * np.ones((rois.shape[0], 1)) 47 | rois_blob_this_image = np.hstack((batch_ind, rois)) 48 | rois_blob = np.vstack((rois_blob, rois_blob_this_image)) 49 | 50 | # Add to labels, bbox targets, and bbox loss blobs 51 | labels_blob = np.hstack((labels_blob, labels)) 52 | if cfg.TRAIN.BBOX_REG: 53 | bbox_targets_blob = np.vstack((bbox_targets_blob, bbox_targets)) 54 | bbox_inside_blob = np.vstack((bbox_inside_blob, 55 | bbox_inside_weights)) 56 | # all_overlaps = np.hstack((all_overlaps, overlaps)) 57 | 58 | # For debug visualizations 59 | # _vis_minibatch(im_blob, rois_blob, labels_blob, all_overlaps) 60 | 61 | blobs['rois'] = rois_blob 62 | blobs['labels'] = labels_blob 63 | 64 | if cfg.TRAIN.BBOX_REG: 65 | blobs['bbox_targets'] = bbox_targets_blob 66 | blobs['bbox_inside_weights'] = bbox_inside_blob 67 | blobs['bbox_outside_weights'] = \ 68 | np.array(bbox_inside_blob > 0).astype(np.float32) 69 | 70 | return blobs 71 | 72 | def _sample_rois(roidb, fg_rois_per_image, rois_per_image, num_classes): 73 | """Generate a random sample of RoIs comprising foreground and background 74 | examples. 75 | """ 76 | # label = class RoI has max overlap with 77 | labels = roidb['max_classes'] 78 | overlaps = roidb['max_overlaps'] 79 | rois = roidb['boxes'] 80 | 81 | # Select foreground RoIs as those with >= FG_THRESH overlap 82 | fg_inds = np.where(overlaps >= cfg.TRAIN.FG_THRESH)[0] 83 | # Guard against the case when an image has fewer than fg_rois_per_image 84 | # foreground RoIs 85 | fg_rois_per_this_image = np.minimum(fg_rois_per_image, fg_inds.size) 86 | # Sample foreground regions without replacement 87 | if fg_inds.size > 0: 88 | fg_inds = npr.choice( 89 | fg_inds, size=fg_rois_per_this_image, replace=False) 90 | 91 | # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) 92 | bg_inds = np.where((overlaps < cfg.TRAIN.BG_THRESH_HI) & 93 | (overlaps >= cfg.TRAIN.BG_THRESH_LO))[0] 94 | # Compute number of background RoIs to take from this image (guarding 95 | # against there being fewer than desired) 96 | bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image 97 | bg_rois_per_this_image = np.minimum(bg_rois_per_this_image, 98 | bg_inds.size) 99 | # Sample foreground regions without replacement 100 | if bg_inds.size > 0: 101 | bg_inds = npr.choice( 102 | bg_inds, size=bg_rois_per_this_image, replace=False) 103 | 104 | # The indices that we're selecting (both fg and bg) 105 | keep_inds = np.append(fg_inds, bg_inds) 106 | # Select sampled values from various arrays: 107 | labels = labels[keep_inds] 108 | # Clamp labels for the background RoIs to 0 109 | labels[fg_rois_per_this_image:] = 0 110 | overlaps = overlaps[keep_inds] 111 | rois = rois[keep_inds] 112 | 113 | if cfg.TRAIN.BBOX_REG: 114 | bbox_targets, bbox_inside_weights = _get_bbox_regression_labels( 115 | roidb['bbox_targets'][keep_inds, :], num_classes) 116 | else: 117 | bbox_targets, bbox_inside_weights = [], [] 118 | 119 | return labels, overlaps, rois, bbox_targets, bbox_inside_weights 120 | 121 | def _get_image_blob(roidb, scale_inds): 122 | """Builds an input blob from the images in the roidb at the specified 123 | scales. 124 | """ 125 | num_images = len(roidb) 126 | processed_ims = [] 127 | im_scales = [] 128 | for i in xrange(num_images): 129 | im = cv2.imread(roidb[i]['image']) 130 | if roidb[i]['flipped']: 131 | im = im[:, ::-1, :] 132 | target_size = cfg.TRAIN.SCALES[scale_inds[i]] 133 | im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, 134 | cfg.TRAIN.MAX_SIZE) 135 | im_scales.append(im_scale) 136 | processed_ims.append(im) 137 | 138 | # Create a blob to hold the input images 139 | blob = im_list_to_blob(processed_ims) 140 | 141 | return blob, im_scales 142 | 143 | def _project_im_rois(im_rois, im_scale_factor): 144 | """Project image RoIs into the rescaled training image.""" 145 | rois = im_rois * im_scale_factor 146 | return rois 147 | 148 | def _get_bbox_regression_labels(bbox_target_data, num_classes): 149 | """Bounding-box regression targets are stored in a compact form in the 150 | roidb. 151 | 152 | This function expands those targets into the 4-of-4*K representation used 153 | by the network (i.e. only one class has non-zero targets). The loss weights 154 | are similarly expanded. 155 | 156 | Returns: 157 | bbox_target_data (ndarray): N x 4K blob of regression targets 158 | bbox_inside_weights (ndarray): N x 4K blob of loss weights 159 | """ 160 | clss = bbox_target_data[:, 0] 161 | bbox_targets = np.zeros((clss.size, 4 * num_classes), dtype=np.float32) 162 | bbox_inside_weights = np.zeros(bbox_targets.shape, dtype=np.float32) 163 | inds = np.where(clss > 0)[0] 164 | for ind in inds: 165 | cls = clss[ind] 166 | start = 4 * cls 167 | end = start + 4 168 | bbox_targets[ind, start:end] = bbox_target_data[ind, 1:] 169 | bbox_inside_weights[ind, start:end] = cfg.TRAIN.BBOX_INSIDE_WEIGHTS 170 | return bbox_targets, bbox_inside_weights 171 | 172 | def _vis_minibatch(im_blob, rois_blob, labels_blob, overlaps): 173 | """Visualize a mini-batch for debugging.""" 174 | import matplotlib.pyplot as plt 175 | for i in xrange(rois_blob.shape[0]): 176 | rois = rois_blob[i, :] 177 | im_ind = rois[0] 178 | roi = rois[1:] 179 | im = im_blob[im_ind, :, :, :].transpose((1, 2, 0)).copy() 180 | im += cfg.PIXEL_MEANS 181 | im = im[:, :, (2, 1, 0)] 182 | im = im.astype(np.uint8) 183 | cls = labels_blob[i] 184 | plt.imshow(im) 185 | print 'class: ', cls, ' overlap: ', overlaps[i] 186 | plt.gca().add_patch( 187 | plt.Rectangle((roi[0], roi[1]), roi[2] - roi[0], 188 | roi[3] - roi[1], fill=False, 189 | edgecolor='r', linewidth=3) 190 | ) 191 | plt.show() 192 | -------------------------------------------------------------------------------- /Seminar6/lib/roi_data_layer/roidb.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | """Transform a roidb into a trainable roidb by adding a bunch of metadata.""" 9 | 10 | import numpy as np 11 | from fast_rcnn.config import cfg 12 | from fast_rcnn.bbox_transform import bbox_transform 13 | from utils.cython_bbox import bbox_overlaps 14 | import PIL 15 | 16 | def prepare_roidb(imdb): 17 | """Enrich the imdb's roidb by adding some derived quantities that 18 | are useful for training. This function precomputes the maximum 19 | overlap, taken over ground-truth boxes, between each ROI and 20 | each ground-truth box. The class with maximum overlap is also 21 | recorded. 22 | """ 23 | sizes = [PIL.Image.open(imdb.image_path_at(i)).size 24 | for i in xrange(imdb.num_images)] 25 | roidb = imdb.roidb 26 | for i in xrange(len(imdb.image_index)): 27 | roidb[i]['image'] = imdb.image_path_at(i) 28 | roidb[i]['width'] = sizes[i][0] 29 | roidb[i]['height'] = sizes[i][1] 30 | # need gt_overlaps as a dense array for argmax 31 | gt_overlaps = roidb[i]['gt_overlaps'].toarray() 32 | # max overlap with gt over classes (columns) 33 | max_overlaps = gt_overlaps.max(axis=1) 34 | # gt class that had the max overlap 35 | max_classes = gt_overlaps.argmax(axis=1) 36 | roidb[i]['max_classes'] = max_classes 37 | roidb[i]['max_overlaps'] = max_overlaps 38 | # sanity checks 39 | # max overlap of 0 => class should be zero (background) 40 | zero_inds = np.where(max_overlaps == 0)[0] 41 | assert all(max_classes[zero_inds] == 0) 42 | # max overlap > 0 => class should not be zero (must be a fg class) 43 | nonzero_inds = np.where(max_overlaps > 0)[0] 44 | assert all(max_classes[nonzero_inds] != 0) 45 | 46 | def add_bbox_regression_targets(roidb): 47 | """Add information needed to train bounding-box regressors.""" 48 | assert len(roidb) > 0 49 | assert 'max_classes' in roidb[0], 'Did you call prepare_roidb first?' 50 | 51 | num_images = len(roidb) 52 | # Infer number of classes from the number of columns in gt_overlaps 53 | num_classes = roidb[0]['gt_overlaps'].shape[1] 54 | for im_i in xrange(num_images): 55 | rois = roidb[im_i]['boxes'] 56 | max_overlaps = roidb[im_i]['max_overlaps'] 57 | max_classes = roidb[im_i]['max_classes'] 58 | roidb[im_i]['bbox_targets'] = \ 59 | _compute_targets(rois, max_overlaps, max_classes) 60 | 61 | if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: 62 | # Use fixed / precomputed "means" and "stds" instead of empirical values 63 | means = np.tile( 64 | np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS), (num_classes, 1)) 65 | stds = np.tile( 66 | np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS), (num_classes, 1)) 67 | else: 68 | # Compute values needed for means and stds 69 | # var(x) = E(x^2) - E(x)^2 70 | class_counts = np.zeros((num_classes, 1)) + cfg.EPS 71 | sums = np.zeros((num_classes, 4)) 72 | squared_sums = np.zeros((num_classes, 4)) 73 | for im_i in xrange(num_images): 74 | targets = roidb[im_i]['bbox_targets'] 75 | for cls in xrange(1, num_classes): 76 | cls_inds = np.where(targets[:, 0] == cls)[0] 77 | if cls_inds.size > 0: 78 | class_counts[cls] += cls_inds.size 79 | sums[cls, :] += targets[cls_inds, 1:].sum(axis=0) 80 | squared_sums[cls, :] += \ 81 | (targets[cls_inds, 1:] ** 2).sum(axis=0) 82 | 83 | means = sums / class_counts 84 | stds = np.sqrt(squared_sums / class_counts - means ** 2) 85 | 86 | print 'bbox target means:' 87 | print means 88 | print means[1:, :].mean(axis=0) # ignore bg class 89 | print 'bbox target stdevs:' 90 | print stds 91 | print stds[1:, :].mean(axis=0) # ignore bg class 92 | 93 | # Normalize targets 94 | if cfg.TRAIN.BBOX_NORMALIZE_TARGETS: 95 | print "Normalizing targets" 96 | for im_i in xrange(num_images): 97 | targets = roidb[im_i]['bbox_targets'] 98 | for cls in xrange(1, num_classes): 99 | cls_inds = np.where(targets[:, 0] == cls)[0] 100 | roidb[im_i]['bbox_targets'][cls_inds, 1:] -= means[cls, :] 101 | roidb[im_i]['bbox_targets'][cls_inds, 1:] /= stds[cls, :] 102 | else: 103 | print "NOT normalizing targets" 104 | 105 | # These values will be needed for making predictions 106 | # (the predicts will need to be unnormalized and uncentered) 107 | return means.ravel(), stds.ravel() 108 | 109 | def _compute_targets(rois, overlaps, labels): 110 | """Compute bounding-box regression targets for an image.""" 111 | # Indices of ground-truth ROIs 112 | gt_inds = np.where(overlaps == 1)[0] 113 | if len(gt_inds) == 0: 114 | # Bail if the image has no ground-truth ROIs 115 | return np.zeros((rois.shape[0], 5), dtype=np.float32) 116 | # Indices of examples for which we try to make predictions 117 | ex_inds = np.where(overlaps >= cfg.TRAIN.BBOX_THRESH)[0] 118 | 119 | # Get IoU overlap between each ex ROI and gt ROI 120 | ex_gt_overlaps = bbox_overlaps( 121 | np.ascontiguousarray(rois[ex_inds, :], dtype=np.float), 122 | np.ascontiguousarray(rois[gt_inds, :], dtype=np.float)) 123 | 124 | # Find which gt ROI each ex ROI has max overlap with: 125 | # this will be the ex ROI's gt target 126 | gt_assignment = ex_gt_overlaps.argmax(axis=1) 127 | gt_rois = rois[gt_inds[gt_assignment], :] 128 | ex_rois = rois[ex_inds, :] 129 | 130 | targets = np.zeros((rois.shape[0], 5), dtype=np.float32) 131 | targets[ex_inds, 0] = labels[ex_inds] 132 | targets[ex_inds, 1:] = bbox_transform(ex_rois, gt_rois) 133 | return targets 134 | -------------------------------------------------------------------------------- /Seminar6/lib/setup.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import os 9 | from os.path import join as pjoin 10 | from setuptools import setup 11 | from distutils.extension import Extension 12 | from Cython.Distutils import build_ext 13 | import subprocess 14 | import numpy as np 15 | 16 | def find_in_path(name, path): 17 | "Find a file in a search path" 18 | # Adapted fom 19 | # http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/ 20 | for dir in path.split(os.pathsep): 21 | binpath = pjoin(dir, name) 22 | if os.path.exists(binpath): 23 | return os.path.abspath(binpath) 24 | return None 25 | 26 | # Obtain the numpy include directory. This logic works across numpy versions. 27 | try: 28 | numpy_include = np.get_include() 29 | except AttributeError: 30 | numpy_include = np.get_numpy_include() 31 | 32 | def customize_compiler_for_nvcc(self): 33 | """inject deep into distutils to customize how the dispatch 34 | to gcc/nvcc works. 35 | 36 | If you subclass UnixCCompiler, it's not trivial to get your subclass 37 | injected in, and still have the right customizations (i.e. 38 | distutils.sysconfig.customize_compiler) run on it. So instead of going 39 | the OO route, I have this. Note, it's kindof like a wierd functional 40 | subclassing going on.""" 41 | 42 | # tell the compiler it can processes .cu 43 | self.src_extensions.append('.cu') 44 | 45 | # save references to the default compiler_so and _comple methods 46 | default_compiler_so = self.compiler_so 47 | super = self._compile 48 | 49 | # now redefine the _compile method. This gets executed for each 50 | # object but distutils doesn't have the ability to change compilers 51 | # based on source extension: we add it. 52 | def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts): 53 | if os.path.splitext(src)[1] == '.cu': 54 | # use the cuda for .cu files 55 | self.set_executable('compiler_so', CUDA['nvcc']) 56 | # use only a subset of the extra_postargs, which are 1-1 translated 57 | # from the extra_compile_args in the Extension class 58 | postargs = extra_postargs['nvcc'] 59 | else: 60 | postargs = extra_postargs['gcc'] 61 | 62 | super(obj, src, ext, cc_args, postargs, pp_opts) 63 | # reset the default compiler_so, which we might have changed for cuda 64 | self.compiler_so = default_compiler_so 65 | 66 | # inject our redefined _compile method into the class 67 | self._compile = _compile 68 | 69 | 70 | # run the customize_compiler 71 | class custom_build_ext(build_ext): 72 | def build_extensions(self): 73 | customize_compiler_for_nvcc(self.compiler) 74 | build_ext.build_extensions(self) 75 | 76 | 77 | ext_modules = [ 78 | Extension( 79 | "utils.cython_bbox", 80 | ["utils/bbox.pyx"], 81 | extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]}, 82 | include_dirs = [numpy_include] 83 | ), 84 | Extension( 85 | "nms.cpu_nms", 86 | ["nms/cpu_nms.pyx"], 87 | extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]}, 88 | include_dirs = [numpy_include] 89 | ), 90 | ] 91 | 92 | setup( 93 | name='fast_rcnn', 94 | ext_modules=ext_modules, 95 | # inject our custom trigger 96 | cmdclass={'build_ext': custom_build_ext}, 97 | ) 98 | -------------------------------------------------------------------------------- /Seminar6/lib/transform/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ddtm/dl-course/9b04d2dda741c0786a9de40a7dfce89d06d0487e/Seminar6/lib/transform/__init__.py -------------------------------------------------------------------------------- /Seminar6/lib/transform/torch_image_transform_layer.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast/er R-CNN 3 | # Licensed under The MIT License [see LICENSE for details] 4 | # -------------------------------------------------------- 5 | 6 | """ Transform images for compatibility with models trained with 7 | https://github.com/facebook/fb.resnet.torch. 8 | 9 | Usage in model prototxt: 10 | 11 | layer { 12 | name: 'data_xform' 13 | type: 'Python' 14 | bottom: 'data_caffe' 15 | top: 'data' 16 | python_param { 17 | module: 'transform.torch_image_transform_layer' 18 | layer: 'TorchImageTransformLayer' 19 | } 20 | } 21 | """ 22 | 23 | import caffe 24 | from fast_rcnn.config import cfg 25 | import numpy as np 26 | 27 | class TorchImageTransformLayer(caffe.Layer): 28 | def setup(self, bottom, top): 29 | # (1, 3, 1, 1) shaped arrays 30 | self.PIXEL_MEANS = \ 31 | np.array([[[[0.48462227599918]], 32 | [[0.45624044862054]], 33 | [[0.40588363755159]]]]) 34 | self.PIXEL_STDS = \ 35 | np.array([[[[0.22889466674951]], 36 | [[0.22446679341259]], 37 | [[0.22495548344775]]]]) 38 | # The default ("old") pixel means that were already subtracted 39 | channel_swap = (0, 3, 1, 2) 40 | self.OLD_PIXEL_MEANS = \ 41 | cfg.PIXEL_MEANS[np.newaxis, :, :, :].transpose(channel_swap) 42 | 43 | top[0].reshape(*(bottom[0].shape)) 44 | 45 | def forward(self, bottom, top): 46 | ims = bottom[0].data 47 | # Invert the channel means that were already subtracted 48 | ims += self.OLD_PIXEL_MEANS 49 | # 1. Permute BGR to RGB and normalize to [0, 1] 50 | ims = ims[:, [2, 1, 0], :, :] / 255.0 51 | # 2. Remove channel means 52 | ims -= self.PIXEL_MEANS 53 | # 3. Standardize channels 54 | ims /= self.PIXEL_STDS 55 | top[0].reshape(*(ims.shape)) 56 | top[0].data[...] = ims 57 | 58 | def backward(self, top, propagate_down, bottom): 59 | """This layer does not propagate gradients.""" 60 | pass 61 | 62 | def reshape(self, bottom, top): 63 | """Reshaping happens during the call to forward.""" 64 | pass 65 | -------------------------------------------------------------------------------- /Seminar6/lib/utils/.gitignore: -------------------------------------------------------------------------------- 1 | *.c 2 | *.so 3 | -------------------------------------------------------------------------------- /Seminar6/lib/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | -------------------------------------------------------------------------------- /Seminar6/lib/utils/bbox.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Sergey Karayev 6 | # -------------------------------------------------------- 7 | 8 | cimport cython 9 | import numpy as np 10 | cimport numpy as np 11 | 12 | DTYPE = np.float 13 | ctypedef np.float_t DTYPE_t 14 | 15 | def bbox_overlaps( 16 | np.ndarray[DTYPE_t, ndim=2] boxes, 17 | np.ndarray[DTYPE_t, ndim=2] query_boxes): 18 | """ 19 | Parameters 20 | ---------- 21 | boxes: (N, 4) ndarray of float 22 | query_boxes: (K, 4) ndarray of float 23 | Returns 24 | ------- 25 | overlaps: (N, K) ndarray of overlap between boxes and query_boxes 26 | """ 27 | cdef unsigned int N = boxes.shape[0] 28 | cdef unsigned int K = query_boxes.shape[0] 29 | cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE) 30 | cdef DTYPE_t iw, ih, box_area 31 | cdef DTYPE_t ua 32 | cdef unsigned int k, n 33 | for k in range(K): 34 | box_area = ( 35 | (query_boxes[k, 2] - query_boxes[k, 0] + 1) * 36 | (query_boxes[k, 3] - query_boxes[k, 1] + 1) 37 | ) 38 | for n in range(N): 39 | iw = ( 40 | min(boxes[n, 2], query_boxes[k, 2]) - 41 | max(boxes[n, 0], query_boxes[k, 0]) + 1 42 | ) 43 | if iw > 0: 44 | ih = ( 45 | min(boxes[n, 3], query_boxes[k, 3]) - 46 | max(boxes[n, 1], query_boxes[k, 1]) + 1 47 | ) 48 | if ih > 0: 49 | ua = float( 50 | (boxes[n, 2] - boxes[n, 0] + 1) * 51 | (boxes[n, 3] - boxes[n, 1] + 1) + 52 | box_area - iw * ih 53 | ) 54 | overlaps[n, k] = iw * ih / ua 55 | return overlaps 56 | -------------------------------------------------------------------------------- /Seminar6/lib/utils/blob.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | """Blob helper functions.""" 9 | 10 | import numpy as np 11 | import cv2 12 | 13 | def im_list_to_blob(ims): 14 | """Convert a list of images into a network input. 15 | 16 | Assumes images are already prepared (means subtracted, BGR order, ...). 17 | """ 18 | max_shape = np.array([im.shape for im in ims]).max(axis=0) 19 | num_images = len(ims) 20 | blob = np.zeros((num_images, max_shape[0], max_shape[1], 3), 21 | dtype=np.float32) 22 | for i in xrange(num_images): 23 | im = ims[i] 24 | blob[i, 0:im.shape[0], 0:im.shape[1], :] = im 25 | # Move channels (axis 3) to axis 1 26 | # Axis order will become: (batch elem, channel, height, width) 27 | channel_swap = (0, 3, 1, 2) 28 | blob = blob.transpose(channel_swap) 29 | return blob 30 | 31 | def prep_im_for_blob(im, pixel_means, target_size, max_size): 32 | """Mean subtract and scale an image for use in a blob.""" 33 | im = im.astype(np.float32, copy=False) 34 | im -= pixel_means 35 | im_shape = im.shape 36 | im_size_min = np.min(im_shape[0:2]) 37 | im_size_max = np.max(im_shape[0:2]) 38 | im_scale = float(target_size) / float(im_size_min) 39 | # Prevent the biggest axis from being more than MAX_SIZE 40 | if np.round(im_scale * im_size_max) > max_size: 41 | im_scale = float(max_size) / float(im_size_max) 42 | im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale, 43 | interpolation=cv2.INTER_LINEAR) 44 | 45 | return im, im_scale 46 | -------------------------------------------------------------------------------- /Seminar6/lib/utils/timer.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import time 9 | 10 | class Timer(object): 11 | """A simple timer.""" 12 | def __init__(self): 13 | self.total_time = 0. 14 | self.calls = 0 15 | self.start_time = 0. 16 | self.diff = 0. 17 | self.average_time = 0. 18 | 19 | def tic(self): 20 | # using time.time instead of time.clock because time time.clock 21 | # does not normalize for multithreading 22 | self.start_time = time.time() 23 | 24 | def toc(self, average=True): 25 | self.diff = time.time() - self.start_time 26 | self.total_time += self.diff 27 | self.calls += 1 28 | self.average_time = self.total_time / self.calls 29 | if average: 30 | return self.average_time 31 | else: 32 | return self.diff 33 | -------------------------------------------------------------------------------- /Seminar6/notebook/img/rcnn_slide.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ddtm/dl-course/9b04d2dda741c0786a9de40a7dfce89d06d0487e/Seminar6/notebook/img/rcnn_slide.jpg -------------------------------------------------------------------------------- /Seminar6/tools/_init_paths.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | """Set up paths for Fast R-CNN.""" 9 | 10 | import os.path as osp 11 | import sys 12 | 13 | def add_path(path): 14 | if path not in sys.path: 15 | sys.path.insert(0, path) 16 | 17 | this_dir = osp.dirname(__file__) 18 | 19 | # Add lib to PYTHONPATH 20 | lib_path = osp.join(this_dir, '..', 'lib') 21 | add_path(lib_path) 22 | 23 | # Add root dir to PYTHONPATH 24 | lib_path = osp.join(this_dir, '..') 25 | add_path(lib_path) 26 | -------------------------------------------------------------------------------- /Seminar6/tools/eval_recall.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import _init_paths 4 | from fast_rcnn.config import cfg, cfg_from_file, cfg_from_list 5 | from datasets.factory import get_imdb 6 | import argparse 7 | import time, os, sys 8 | import numpy as np 9 | 10 | def parse_args(): 11 | """ 12 | Parse input arguments 13 | """ 14 | parser = argparse.ArgumentParser(description='Test a Fast R-CNN network') 15 | parser.add_argument('--imdb', dest='imdb_name', 16 | help='dataset to test', 17 | default='voc_2007_test', type=str) 18 | parser.add_argument('--method', dest='method', 19 | help='proposal method', 20 | default='selective_search', type=str) 21 | parser.add_argument('--rpn-file', dest='rpn_file', 22 | default=None, type=str) 23 | 24 | if len(sys.argv) == 1: 25 | parser.print_help() 26 | sys.exit(1) 27 | 28 | args = parser.parse_args() 29 | return args 30 | 31 | if __name__ == '__main__': 32 | args = parse_args() 33 | 34 | print('Called with args:') 35 | print(args) 36 | 37 | imdb = get_imdb(args.imdb_name) 38 | imdb.set_proposal_method(args.method) 39 | if args.rpn_file is not None: 40 | imdb.config['rpn_file'] = args.rpn_file 41 | 42 | candidate_boxes = None 43 | if 0: 44 | import scipy.io as sio 45 | filename = 'debug/stage1_rpn_voc_2007_test.mat' 46 | raw_data = sio.loadmat(filename)['aboxes'].ravel() 47 | candidate_boxes = raw_data 48 | 49 | ar, gt_overlaps, recalls, thresholds = \ 50 | imdb.evaluate_recall(candidate_boxes=candidate_boxes) 51 | print 'Method: {}'.format(args.method) 52 | print 'AverageRec: {:.3f}'.format(ar) 53 | 54 | def recall_at(t): 55 | ind = np.where(thresholds > t - 1e-5)[0][0] 56 | assert np.isclose(thresholds[ind], t) 57 | return recalls[ind] 58 | 59 | print 'Recall@0.5: {:.3f}'.format(recall_at(0.5)) 60 | print 'Recall@0.6: {:.3f}'.format(recall_at(0.6)) 61 | print 'Recall@0.7: {:.3f}'.format(recall_at(0.7)) 62 | print 'Recall@0.8: {:.3f}'.format(recall_at(0.8)) 63 | print 'Recall@0.9: {:.3f}'.format(recall_at(0.9)) 64 | # print again for easy spreadsheet copying 65 | print '{:.3f}'.format(ar) 66 | print '{:.3f}'.format(recall_at(0.5)) 67 | print '{:.3f}'.format(recall_at(0.6)) 68 | print '{:.3f}'.format(recall_at(0.7)) 69 | print '{:.3f}'.format(recall_at(0.8)) 70 | print '{:.3f}'.format(recall_at(0.9)) 71 | -------------------------------------------------------------------------------- /Seminar6/tools/reval.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # -------------------------------------------------------- 4 | # Fast R-CNN 5 | # Copyright (c) 2015 Microsoft 6 | # Licensed under The MIT License [see LICENSE for details] 7 | # Written by Ross Girshick 8 | # -------------------------------------------------------- 9 | 10 | """Reval = re-eval. Re-evaluate saved detections.""" 11 | 12 | import _init_paths 13 | from fast_rcnn.test import apply_nms 14 | from fast_rcnn.config import cfg 15 | from datasets.factory import get_imdb 16 | import cPickle 17 | import os, sys, argparse 18 | import numpy as np 19 | 20 | def parse_args(): 21 | """ 22 | Parse input arguments 23 | """ 24 | parser = argparse.ArgumentParser(description='Re-evaluate results') 25 | parser.add_argument('output_dir', nargs=1, help='results directory', 26 | type=str) 27 | parser.add_argument('--imdb', dest='imdb_name', 28 | help='dataset to re-evaluate', 29 | default='voc_2007_test', type=str) 30 | parser.add_argument('--matlab', dest='matlab_eval', 31 | help='use matlab for evaluation', 32 | action='store_true') 33 | parser.add_argument('--comp', dest='comp_mode', help='competition mode', 34 | action='store_true') 35 | parser.add_argument('--nms', dest='apply_nms', help='apply nms', 36 | action='store_true') 37 | 38 | if len(sys.argv) == 1: 39 | parser.print_help() 40 | sys.exit(1) 41 | 42 | args = parser.parse_args() 43 | return args 44 | 45 | def from_dets(imdb_name, output_dir, args): 46 | imdb = get_imdb(imdb_name) 47 | imdb.competition_mode(args.comp_mode) 48 | imdb.config['matlab_eval'] = args.matlab_eval 49 | with open(os.path.join(output_dir, 'detections.pkl'), 'rb') as f: 50 | dets = cPickle.load(f) 51 | 52 | if args.apply_nms: 53 | print 'Applying NMS to all detections' 54 | nms_dets = apply_nms(dets, cfg.TEST.NMS) 55 | else: 56 | nms_dets = dets 57 | 58 | print 'Evaluating detections' 59 | imdb.evaluate_detections(nms_dets, output_dir) 60 | 61 | if __name__ == '__main__': 62 | args = parse_args() 63 | 64 | output_dir = os.path.abspath(args.output_dir[0]) 65 | imdb_name = args.imdb_name 66 | from_dets(imdb_name, output_dir, args) 67 | -------------------------------------------------------------------------------- /Seminar6/tools/test_net.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # -------------------------------------------------------- 4 | # Fast R-CNN 5 | # Copyright (c) 2015 Microsoft 6 | # Licensed under The MIT License [see LICENSE for details] 7 | # Written by Ross Girshick 8 | # -------------------------------------------------------- 9 | 10 | """Test a Fast R-CNN network on an image database.""" 11 | 12 | import _init_paths 13 | from fast_rcnn.test import test_net 14 | from fast_rcnn.config import cfg, cfg_from_file, cfg_from_list 15 | from datasets.factory import get_imdb 16 | import argparse 17 | import pprint 18 | import time, os, sys 19 | 20 | from custom.tester import Tester 21 | 22 | def parse_args(): 23 | """ 24 | Parse input arguments 25 | """ 26 | parser = argparse.ArgumentParser(description='Test a Fast R-CNN network') 27 | parser.add_argument('--snapshot', dest='snapshot', 28 | help='model to test', 29 | default=None, type=str) 30 | parser.add_argument('--cfg', dest='cfg_file', 31 | help='optional config file', default=None, type=str) 32 | parser.add_argument('--wait', dest='wait', 33 | help='wait until net file exists', 34 | default=True, type=bool) 35 | parser.add_argument('--imdb', dest='imdb_name', 36 | help='dataset to test', 37 | default='voc_2007_test', type=str) 38 | parser.add_argument('--comp', dest='comp_mode', help='competition mode', 39 | action='store_true') 40 | parser.add_argument('--set', dest='set_cfgs', 41 | help='set config keys', default=None, 42 | nargs=argparse.REMAINDER) 43 | parser.add_argument('--vis', dest='vis', help='visualize detections', 44 | action='store_true') 45 | parser.add_argument('--num_dets', dest='max_per_image', 46 | help='max number of detections per image', 47 | default=100, type=int) 48 | 49 | if len(sys.argv) == 1: 50 | parser.print_help() 51 | sys.exit(1) 52 | 53 | args = parser.parse_args() 54 | return args 55 | 56 | if __name__ == '__main__': 57 | args = parse_args() 58 | 59 | print('Called with args:') 60 | print(args) 61 | 62 | if args.cfg_file is not None: 63 | cfg_from_file(args.cfg_file) 64 | if args.set_cfgs is not None: 65 | cfg_from_list(args.set_cfgs) 66 | 67 | print('Using config:') 68 | pprint.pprint(cfg) 69 | 70 | tester = Tester(args.snapshot) 71 | tester.name = os.path.splitext(os.path.basename(args.snapshot))[0] 72 | 73 | imdb = get_imdb(args.imdb_name) 74 | imdb.competition_mode(args.comp_mode) 75 | imdb.set_proposal_method(cfg.TEST.PROPOSAL_METHOD) 76 | 77 | test_net(tester, imdb, max_per_image=args.max_per_image, vis=args.vis) 78 | -------------------------------------------------------------------------------- /Seminar6/tools/train_net.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # -------------------------------------------------------- 4 | # Fast R-CNN 5 | # Copyright (c) 2015 Microsoft 6 | # Licensed under The MIT License [see LICENSE for details] 7 | # Written by Ross Girshick 8 | # -------------------------------------------------------- 9 | 10 | """Train a Fast R-CNN network on a region of interest database.""" 11 | 12 | import _init_paths 13 | from fast_rcnn.train import get_training_roidb, train_net 14 | from fast_rcnn.config import cfg, cfg_from_file, cfg_from_list, get_output_dir 15 | from datasets.factory import get_imdb 16 | import datasets.imdb 17 | import argparse 18 | import pprint 19 | import numpy as np 20 | import sys 21 | 22 | def parse_args(): 23 | """ 24 | Parse input arguments 25 | """ 26 | parser = argparse.ArgumentParser(description='Train a Fast R-CNN network') 27 | parser.add_argument('--iters', dest='max_iters', 28 | help='number of iterations to train', 29 | default=40000, type=int) 30 | parser.add_argument('--cfg', dest='cfg_file', 31 | help='optional config file', 32 | default=None, type=str) 33 | parser.add_argument('--imdb', dest='imdb_name', 34 | help='dataset to train on', 35 | default='voc_2007_trainval', type=str) 36 | parser.add_argument('--rand', dest='randomize', 37 | help='randomize (do not use a fixed seed)', 38 | action='store_true') 39 | parser.add_argument('--set', dest='set_cfgs', 40 | help='set config keys', default=None, 41 | nargs=argparse.REMAINDER) 42 | 43 | if len(sys.argv) == 1: 44 | parser.print_help() 45 | sys.exit(1) 46 | 47 | args = parser.parse_args() 48 | return args 49 | 50 | def combined_roidb(imdb_names): 51 | def get_roidb(imdb_name): 52 | imdb = get_imdb(imdb_name) 53 | print 'Loaded dataset `{:s}` for training'.format(imdb.name) 54 | imdb.set_proposal_method(cfg.TRAIN.PROPOSAL_METHOD) 55 | print 'Set proposal method: {:s}'.format(cfg.TRAIN.PROPOSAL_METHOD) 56 | roidb = get_training_roidb(imdb) 57 | return roidb 58 | 59 | roidbs = [get_roidb(s) for s in imdb_names.split('+')] 60 | roidb = roidbs[0] 61 | if len(roidbs) > 1: 62 | for r in roidbs[1:]: 63 | roidb.extend(r) 64 | imdb = datasets.imdb.imdb(imdb_names) 65 | else: 66 | imdb = get_imdb(imdb_names) 67 | return imdb, roidb 68 | 69 | if __name__ == '__main__': 70 | args = parse_args() 71 | 72 | print('Called with args:') 73 | print(args) 74 | 75 | if args.cfg_file is not None: 76 | cfg_from_file(args.cfg_file) 77 | if args.set_cfgs is not None: 78 | cfg_from_list(args.set_cfgs) 79 | 80 | print('Using config:') 81 | pprint.pprint(cfg) 82 | 83 | imdb, roidb = combined_roidb(args.imdb_name) 84 | print '{:d} roidb entries'.format(len(roidb)) 85 | 86 | output_dir = get_output_dir(imdb) 87 | print 'Output will be saved to `{:s}`'.format(output_dir) 88 | 89 | train_net(roidb, output_dir, 90 | max_iters=args.max_iters) 91 | -------------------------------------------------------------------------------- /Seminar7/HW_GAN.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "This requires you to write a 2D GAN game. I let you to get into the topic yourself, whitout any explonations from my side. You can watch lecture, seminar, read papers and tutorials (fun, fun, fun)." 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "### Homework" 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": { 20 | "collapsed": true 21 | }, 22 | "source": [ 23 | "I want you to implement a simple 2D GAN game. The kind of animation, I want to see is like in [this video](https://www.youtube.com/watch?v=KeJINHjyzOU) at 15:30 or in [here](https://habrahabr.ru/post/275429/) but in 2D. You can google, search code at github, whatever, but the network should be based on Theano. \n", 24 | "\n", 25 | "Basically you will need to come up with true distribution $P$, say mixture of gaussians (surprise me), sample some data from it. Visualize it as a heatmap. To visualize $G$ density you can fix $N$ noise vectors $\\{z_i\\} \\quad i=1,\\dots, N$ and draw a circle for each $G(z_i)$. It is also funny to visualize discriminator as a vector field (can be done with `plt.arrow`, `plt.quiver plo). Look how it should be in the middle of [this page](http://www.inference.vc/an-alternative-update-rule-for-generative-adversarial-networks/).\n", 26 | "\n", 27 | "Please, make sure your code works if 'Run All' is pressed and it draws some animation.\n", 28 | "\n", 29 | "Good luck!" 30 | ] 31 | } 32 | ], 33 | "metadata": { 34 | "kernelspec": { 35 | "display_name": "Python 2", 36 | "language": "python", 37 | "name": "python2" 38 | }, 39 | "language_info": { 40 | "codemirror_mode": { 41 | "name": "ipython", 42 | "version": 2 43 | }, 44 | "file_extension": ".py", 45 | "mimetype": "text/x-python", 46 | "name": "python", 47 | "nbconvert_exporter": "python", 48 | "pygments_lexer": "ipython2", 49 | "version": "2.7.11" 50 | } 51 | }, 52 | "nbformat": 4, 53 | "nbformat_minor": 0 54 | } 55 | -------------------------------------------------------------------------------- /Seminar7/HW_textures_style.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Texture synthesis and artistic style transfer\n", 8 | "\n", 9 | "In this homework you are to imlement [A Neural algorithm of artistic style](http://arxiv.org/pdf/1508.06576v2.pdf). This is an extension of [Texture Synthesis Using Convolutional Neural Networks](http://arxiv.org/pdf/1505.07376v3.pdf) method.\n", 10 | "\n", 11 | "The core of the method -- VGG and constrained optimization. The constrains are of two types: *content* and *style*. Given a content image **C** and style image **S** we want to generate an image **X** with content from **C** and style (whatever it really means) from **S**. \n", 12 | "\n", 13 | "We want to design a loss function for the optimization process. Considering \\[1\\], \\[2\\], an input image is easily invertable from the outputs at intermediate layers. This explains the idea of making an intermediate representation $F_X$ of **X** close to **C** representation $F_C$. \n", 14 | "\n", 15 | "$$\n", 16 | " L_{content} = || F_X - F_C || \\rightarrow \\min_X\n", 17 | "$$\n", 18 | "\n", 19 | "Note, that representation $F$ preserve spatial information. Idea: let us dismiss it, so we will know what objects are there on the picture, but will not be able to reestablish their localtion. The style can be thought as something independent of content, something we are left with if we let the content off. L. Gatys suggests to dismiss spatial information by computing correlations between the feature maps $F$. If $F$ has dimensions `CxWxH`, then correlation matrix will be `CxC`, and look there's no spatial dimentions. So the style term will be responsible for mathing these correlation (Gram) matrices. \n", 20 | "\n", 21 | "$$\n", 22 | " L_{style} = || Gram(F_X) - Gram(F_C) || \\rightarrow \\min_X\n", 23 | "$$\n", 24 | "\n", 25 | "And finaly we combine the two.\n", 26 | "\n", 27 | "$$\n", 28 | " L = \\alpha L_{content} + \\beta L_{style} \\min_X\n", 29 | "$$\n", 30 | "\n", 31 | "Read the paper and the code for the details on layers, features $F$ are got from." 32 | ] 33 | }, 34 | { 35 | "cell_type": "markdown", 36 | "metadata": {}, 37 | "source": [ 38 | "#### A little bit of history behind this texture generation method\n", 39 | "\n", 40 | "Actually the idea comes from 90th, when mathematical models of texures were developed \\[3\\]. They defined a probabolistic model for texture generation. They used an idea, that two images are indeed two samples of a particular texture iff their statistics match. The statistics used are histograms of given texture $I$ filtered with a number of filters: $\\{hist(F_i * I), \\quad i = 1,\\dots, k\\}$. And whatever image has the same statistics is thought as a sample of texture $I$. The main drawback was the Gibbs sampling was employed (which is very slow). \\[4\\] suggested exactly the scheme we use now: starting from a random image, let's adjust its statistics iteratively so they match the desired. \n", 41 | "\n", 42 | "Now, what is changed: the filters. \\[4\\] used carefully crafted set of filters, and now we use neural network based non-linear filters. We still use the idea of matching statistics, but the statistics improved. \n", 43 | "\n", 44 | "\\[1\\] *A.Mahendran, A.Vedaldi [Understanding Deep Image Representations by Inverting Them](https://www.robots.ox.ac.uk/~vgg/publications/2015/Mahendran15/mahendran15.pdf)*\n", 45 | "\n", 46 | "\\[2\\] *A.Dosovitsky, T.Brox [Inverting Visual Representations with Convolutional Networks](http://arxiv.org/pdf/1506.02753v3.pdf)*\n", 47 | "\n", 48 | "\\[3\\] *Zhu et. al. Filters, 1997 [Random Fields and Maximum Entropy (FRAME):\n", 49 | "Towards a Unified Theory for Texture Modeling](http://www.stat.ucla.edu/~ywu/research/papers/ijcv.pdf)*\n", 50 | "\n", 51 | "\\[4\\] *Portilla & Simoncelli, 2000 [A Parametric Texture Model Based on Joint Statistics\n", 52 | "of Complex Wavelet Coefficients](http://www.cns.nyu.edu/pub/lcv/portilla99-reprint.pdf)*" 53 | ] 54 | }, 55 | { 56 | "cell_type": "markdown", 57 | "metadata": {}, 58 | "source": [ 59 | "# Homework" 60 | ] 61 | }, 62 | { 63 | "cell_type": "markdown", 64 | "metadata": {}, 65 | "source": [ 66 | "To prevent you from technical problems, you may use a [complete code for the method](https://github.com/Lasagne/Recipes/tree/master/examples/styletransfer). \n", 67 | "Your task will be to play around with it. " 68 | ] 69 | }, 70 | { 71 | "cell_type": "markdown", 72 | "metadata": {}, 73 | "source": [ 74 | "### First part\n", 75 | "**Common mandatory part**:\n", 76 | "- Generate your favourite texture (please, do not use starry night). All you need to do is to set content weight to 0. \n", 77 | "- Stylize your favourite photo with your favourite style (hope you use something interesting).\n", 78 | "- Give an explanation for matching Gram matrices. What does it mean to minimize distance between them in terms of random variables? Assume a true distripution $P$, and model distibution $Q$. What class does $Q$ belong when matching gram matrices? Show, that $KL (P || Q)$ is minimized when Gram matrices are matched. In other words you need to come up with $Q$ such that $KL$ divergence is minimized when models gram matrix is equal to a target Gram matix. If you do not understand the question spend more time, please. If you want a hint after all, here is a [Telegram bot for you](https://telegram.me/rdl_hw7_bot) (send /hint to him)." 79 | ] 80 | }, 81 | { 82 | "cell_type": "markdown", 83 | "metadata": {}, 84 | "source": [ 85 | "### Second part\n", 86 | "We give you **two options** for the second part.\n", 87 | "\n", 88 | "**First one** (if you are lazy or do not have GPU do just this):\n", 89 | "- Implement **Mean** and **Covariance** matching functions instead of $Gram$ matching. That is: \n", 90 | " - *Mean* is a vector of size `C` which containes means over feature maps\n", 91 | " - *Covariance* matrix is a *Gram* matrix of $Feats-mean$\n", 92 | "- What is $Q$ now? \n", 93 | "- Generate texture and stylize with $mean$ loss only; with $mean$ + $Covariance$ loss. Plot the results, side by side (3 textures and 3 stylized). What do you think? Actually, $Gram$ matrix or $Mean$ or $Mean$ + $Covariance$ matrix can be thought as texture descriptors. Does $mean$ encoding have enough parameters to represent texures? \n", 94 | "- ***OR*** come up with your method to remove spatial information instead of above.\n", 95 | "- Bonus: you can mix several styles, averaging their representations. It can be fun. Some examples are [here](https://github.com/jcjohnson/neural-style).\n", 96 | "\n", 97 | "**Second one** (hardcore):\n", 98 | "- Substitute gram matrices with discriminator as in GAN. That is, you match distributions matching gram matrices and discriminator is designed to match distributions. Probably $Q$, we have defined is weak or too constraintive. Neural network based discriminator should be more flexible in this sense.\n", 99 | " - The procedure will be a little bit unusual: we will optimize NN inside optimization loop w.r.t. image.\n", 100 | " - You need to define a pixel level discriminator (at each layer you have $WH$ objects, each with $C$ features). Basically it should decide whether a pixel came from style image or from current image $X$. \n", 101 | " - So the process is like that: \n", 102 | " - At each image optimization iteration update D (actually you do not need to do minibatches updates here, you can simulate fully-connected layers with 1x1 convolutions, softmax with sigmoids). You will need to find a trade-off, for how long and how frequent should updates be. \n", 103 | " - Then propagate gradient just like in GAN when optimizing $G$ i.e. swap labels (another strategy is in [here](https://www.robots.ox.ac.uk/~vgg/rg/papers/Tzeng_ICCV2015.pdf), eq. 4).\n", 104 | " - Let L-BFGS (or whatever, probably adam will be more stable) update $X$.\n", 105 | " - Discriminator architecture is up to you. It's better to start with logistic regression which should emulate $Mean$ + $Cov$ matching (isn't it?). \n", 106 | " - I tried this myself without content loss only." 107 | ] 108 | }, 109 | { 110 | "cell_type": "markdown", 111 | "metadata": {}, 112 | "source": [ 113 | "**Do everything in this notebook, I need your code as well as the generated images**" 114 | ] 115 | }, 116 | { 117 | "cell_type": "markdown", 118 | "metadata": {}, 119 | "source": [ 120 | "HINTS: \n", 121 | "\n", 122 | "- In case you do not have GPU, you need to substitute the line:\n", 123 | " \n", 124 | " `from lasagne.layers.dnn import Conv2DDNNLayer as ConvLayer`\n", 125 | "\n", 126 | " with\n", 127 | "\n", 128 | " `from lasagne.layers import Conv2DLayer as ConvLayer`\n", 129 | " \n", 130 | " \n", 131 | "- If you do not have GPU, resize your images to 256x256 and no more. Even at this resolution it may take an hour. You can decrease the number of iterations if it takes too long. " 132 | ] 133 | } 134 | ], 135 | "metadata": { 136 | "kernelspec": { 137 | "display_name": "Python 2", 138 | "language": "python", 139 | "name": "python2" 140 | }, 141 | "language_info": { 142 | "codemirror_mode": { 143 | "name": "ipython", 144 | "version": 2 145 | }, 146 | "file_extension": ".py", 147 | "mimetype": "text/x-python", 148 | "name": "python", 149 | "nbconvert_exporter": "python", 150 | "pygments_lexer": "ipython2", 151 | "version": "2.7.11" 152 | } 153 | }, 154 | "nbformat": 4, 155 | "nbformat_minor": 0 156 | } 157 | -------------------------------------------------------------------------------- /Seminar7/README.md: -------------------------------------------------------------------------------- 1 | More reading: 2 | - [likemo.net](http://likemo.net/) 3 | - Conditional GAN [example](https://github.com/TIXFeniks/Recipes/blob/4b83a1248a9eb73ca70777333f54f2598e762c6b/examples/Generating%20fonts%20with%20adversarial%20networks/Generating%2Bfonts%2Bwith%2Badversarial%2Bnetworks.ipynb) from the zoo 4 | - Some example from [habr](https://habrahabr.ru/post/278425/) 5 | - Fast neural doodle [repo](https://github.com/DmitryUlyanov/fast-neural-doodle) 6 | -------------------------------------------------------------------------------- /Seminar7/sem7.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Seminar 7" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "## Definition\n", 15 | "$$\n", 16 | "min_G max_D V(D,G) = \\mathbb{E}_{x\\sim P} \\log D(x) + \\mathbb{E}_{z\\sim \\mathcal{N}} \\log(1 - D(G(z)))\n", 17 | "$$" 18 | ] 19 | }, 20 | { 21 | "cell_type": "markdown", 22 | "metadata": {}, 23 | "source": [ 24 | "Let generator $G$ have parameters $\\theta$ and discriminator $D$ paramenters $\\psi$." 25 | ] 26 | }, 27 | { 28 | "cell_type": "markdown", 29 | "metadata": {}, 30 | "source": [ 31 | "## Learning" 32 | ] 33 | }, 34 | { 35 | "cell_type": "markdown", 36 | "metadata": {}, 37 | "source": [ 38 | "### D update\n", 39 | "Theory requires:\n", 40 | "$$\n", 41 | "\\psi_{t+1} \\leftarrow \\operatorname{argmax}_{\\psi} \\mathbb{E}_{x\\sim P} \\log D\\left(x;\\psi\\right) + \\mathbb{E}_{z\\sim \\mathcal{N}} \\log \\left(1 - D\\left(G(z;\\theta_t);\\psi\\right)\\right)\n", 42 | "$$\n", 43 | "\n", 44 | "In practice gradient step only. " 45 | ] 46 | }, 47 | { 48 | "cell_type": "markdown", 49 | "metadata": {}, 50 | "source": [ 51 | "### G update, variant 1\n", 52 | "$$\n", 53 | "\\theta_{t+1} \\leftarrow \\theta_t - \\epsilon_t \\frac{\\partial}{\\partial\\theta} \\mathbb{E}_{z\\sim \\mathcal{N}} \\log \\left(1 - D\\left(G(z;\\theta_t);\\psi_{t+1}\\right)\\right)\n", 54 | "$$\n", 55 | "### G update, variant 2\n", 56 | "$$\n", 57 | "\\theta_{t+1} \\leftarrow \\theta_t + \\epsilon_t \\frac{\\partial}{\\partial\\theta} \\mathbb{E}_{z\\sim \\mathcal{N}} \\log D\\left(G(z;\\theta_t);\\psi_{t+1}\\right)$$" 58 | ] 59 | }, 60 | { 61 | "cell_type": "markdown", 62 | "metadata": {}, 63 | "source": [ 64 | "The first corresponds to definition. What does the second correspond to? \n", 65 | "\n", 66 | "- $$\n", 67 | "min_G max_D V(D,G) = \\mathbb{E}_{x\\sim P} \\log D(x) - \\mathbb{E}_{z\\sim \\mathcal{N}} \\log(D(G(z)))\n", 68 | "$$\n", 69 | "- $$\n", 70 | "max_G max_D V(D,G) = \\mathbb{E}_{x\\sim P} \\log D(x) + \\mathbb{E}_{z\\sim \\mathcal{N}} \\log(D(G(z)))\n", 71 | "$$\n" 72 | ] 73 | }, 74 | { 75 | "cell_type": "markdown", 76 | "metadata": {}, 77 | "source": [ 78 | "[Nice article](http://www.inference.vc/an-alternative-update-rule-for-generative-adversarial-networks/) about GAN (not tutorial)." 79 | ] 80 | }, 81 | { 82 | "cell_type": "markdown", 83 | "metadata": {}, 84 | "source": [ 85 | "# Evaluating generative models" 86 | ] 87 | }, 88 | { 89 | "cell_type": "markdown", 90 | "metadata": {}, 91 | "source": [ 92 | "[Theis](http://arxiv.org/pdf/1511.01844v2.pdf)" 93 | ] 94 | } 95 | ], 96 | "metadata": { 97 | "kernelspec": { 98 | "display_name": "Python 2", 99 | "language": "python", 100 | "name": "python2" 101 | }, 102 | "language_info": { 103 | "codemirror_mode": { 104 | "name": "ipython", 105 | "version": 2 106 | }, 107 | "file_extension": ".py", 108 | "mimetype": "text/x-python", 109 | "name": "python", 110 | "nbconvert_exporter": "python", 111 | "pygments_lexer": "ipython2", 112 | "version": "2.7.11" 113 | } 114 | }, 115 | "nbformat": 4, 116 | "nbformat_minor": 0 117 | } 118 | -------------------------------------------------------------------------------- /Seminar8/Autoencoder_structure.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ddtm/dl-course/9b04d2dda741c0786a9de40a7dfce89d06d0487e/Seminar8/Autoencoder_structure.png -------------------------------------------------------------------------------- /Seminar8/GS.py: -------------------------------------------------------------------------------- 1 | import lasagne 2 | import theano.tensor as T 3 | from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams 4 | 5 | class GaussianSampleLayer(lasagne.layers.MergeLayer): 6 | def __init__(self, mu, logsigma, **kwargs): 7 | self.rng = RandomStreams(lasagne.random.get_rng().randint(1,2147462579)) 8 | super(GaussianSampleLayer, self).__init__([mu, logsigma], **kwargs) 9 | 10 | def get_output_shape_for(self, input_shapes): 11 | return input_shapes[0] 12 | 13 | def get_output_for(self, inputs, deterministic=False, **kwargs): 14 | mu, logsigma = inputs 15 | shape=(self.input_shapes[0][0] or inputs[0].shape[0], 16 | self.input_shapes[0][1] or inputs[0].shape[1]) 17 | if deterministic: 18 | return mu 19 | return mu + T.exp(logsigma) * self.rng.normal(shape) -------------------------------------------------------------------------------- /Seminar8/README.md: -------------------------------------------------------------------------------- 1 | More materials: 2 | - [VAE explained](http://kvfrans.com/variational-autoencoders-explained/) 3 | - [VAEs for "categorical" variables](http://blog.evjang.com/2016/11/tutorial-categorical-variational.html) 4 | -------------------------------------------------------------------------------- /Seminar8/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ddtm/dl-course/9b04d2dda741c0786a9de40a7dfce89d06d0487e/Seminar8/__init__.py -------------------------------------------------------------------------------- /Seminar8/lfw_dataset.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os 3 | from scipy.misc import imread,imresize 4 | import pandas as pd 5 | 6 | def fetch_lfw_dataset(attrs_name = "lfw_attributes.txt", 7 | images_name = "lfw-deepfunneled", 8 | dx=80,dy=80, 9 | dimx=45,dimy=45 10 | ):#sad smile 11 | 12 | #download if not exists 13 | if not os.path.exists(images_name): 14 | print("images not found, donwloading...") 15 | os.system("wget http://vis-www.cs.umass.edu/lfw/lfw-deepfunneled.tgz -O tmp.tgz") 16 | print("extracting...") 17 | os.system("tar xvzf tmp.tgz && rm tmp.tgz") 18 | print("done") 19 | assert os.path.exists(images_name) 20 | 21 | if not os.path.exists(attrs_name): 22 | print("attributes not found, downloading...") 23 | os.system("wget http://www.cs.columbia.edu/CAVE/databases/pubfig/download/%s"%attrs_name) 24 | print("done") 25 | 26 | #read attrs 27 | df_attrs = pd.read_csv("lfw_attributes.txt",sep='\t',skiprows=1,) 28 | df_attrs = pd.DataFrame(df_attrs.iloc[:,:-1].values, columns = df_attrs.columns[1:]) 29 | 30 | 31 | #read photos 32 | photo_ids = [] 33 | for dirpath, dirnames, filenames in os.walk(images_name): 34 | for fname in filenames: 35 | if fname.endswith(".jpg"): 36 | fpath = os.path.join(dirpath,fname) 37 | photo_id = fname[:-4].replace('_',' ').split() 38 | person_id = ' '.join(photo_id[:-1]) 39 | photo_number = int(photo_id[-1]) 40 | photo_ids.append({'person':person_id,'imagenum':photo_number,'photo_path':fpath}) 41 | 42 | photo_ids = pd.DataFrame(photo_ids) 43 | 44 | #mass-merge 45 | #(photos now have same order as attributes) 46 | df = pd.merge(df_attrs,photo_ids,on=('person','imagenum')) 47 | 48 | assert len(df)==len(df_attrs),"lost some data when merging dataframes" 49 | 50 | #image preprocessing 51 | all_photos =df['photo_path'].apply(imread)\ 52 | .apply(lambda img:img[dy:-dy,dx:-dx])\ 53 | .apply(lambda img: imresize(img,[dimx,dimy])) 54 | 55 | all_photos = np.stack(all_photos.values).astype('uint8') 56 | all_attrs = df.drop(["photo_path","person","imagenum"],axis=1) 57 | 58 | return all_photos,all_attrs 59 | 60 | -------------------------------------------------------------------------------- /Seminar8/linear.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ddtm/dl-course/9b04d2dda741c0786a9de40a7dfce89d06d0487e/Seminar8/linear.png -------------------------------------------------------------------------------- /Seminar9/oracle.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import numpy as np 3 | 4 | def APatK ( y_true,y_predicted, K =32500): 5 | """Calculates AP@k given true Y and predictions (probabilities). 6 | Sorts answers by y_predicted to obtain ranking""" 7 | 8 | sort_by_ypred = np.argsort(-y_predicted) 9 | 10 | y_true = y_true[sort_by_ypred] 11 | y_predicted = y_predicted[sort_by_ypred] 12 | 13 | countRelevants = 0 14 | listOfPrecisions = [] 15 | 16 | for i in range(min(K,len(y_true))): 17 | currentk = i + 1.0 18 | if y_true[i] !=0: 19 | countRelevants+=1 20 | precisionAtK = countRelevants / currentk 21 | listOfPrecisions.append(precisionAtK) 22 | return np.sum( listOfPrecisions ) / min(K,len(y_true)) 23 | 24 | 25 | 26 | 27 | 28 | import sys 29 | import socket 30 | 31 | def score(final_accuracy,final_auc,final_apatk): 32 | 33 | print "\nAUC:" 34 | if final_auc >= 0.99: 35 | print "\tПиши статью. (great)" 36 | elif final_auc >= 0.97: 37 | print "\tОтличное решение! (good)" 38 | elif final_auc >= 0.95: 39 | print "\tСойдёт, хотя можно ещё поднажать (ok)" 40 | elif final_auc >= 0.9: 41 | print "\tНеплохо, но ты можешь лучше! (not ok)" 42 | elif final_auc > 0.8: 43 | print "\tТы на правильном пути! (not ok)" 44 | elif final_auc > 0.65: 45 | print "\tДобавь жару! (not ok)" 46 | else: 47 | print "\tМожет быть, она недоучилась? Ну или слишком маленькая? Или в детстве болела? (not ok)" 48 | 49 | 50 | print "\nAccuracy:" 51 | if final_accuracy >= 0.97: 52 | print "\tОчешуенно! (great)" 53 | elif final_accuracy >= 0.95: 54 | print "\tОтличный результат! (good)" 55 | elif final_accuracy >= 0.9: 56 | print "\tВсё ок (ok)" 57 | else: 58 | print "Надо бы подтянуть. (not ok)" 59 | 60 | print "\nAverage precision at K:" 61 | if final_apatk > 0.99: 62 | print "\tЗасабмить на kaggle! (great) \n\t Нет, ну честно - выкачай avito_test.tsv, засабмить и скажи, что вышло." 63 | elif final_apatk > 0.95: 64 | print "\tОтличный результат (good)" 65 | elif final_apatk > 0.92: 66 | print "\tВы побили baseline (ok)" 67 | else: 68 | print "\tНадо бы поднажать (not ok)" 69 | 70 | if socket.gethostname().startswith("cv-gpu" ): 71 | if final_apatk>0.92 and final_accuracy >0.9 and final_auc > 0.95 : 72 | warn_them() 73 | else: 74 | print 75 | 76 | def warn_them(): 77 | """warns some users of what is impending""" 78 | sys.stderr.write("""\nWe Are Watching You! 79 | . .. 80 | . ...;c:,::.. ' ':oococ:. .. 81 | .. .':dodxkkxxxxxxxkxddxkkkkkkkkkxkkdl:. 82 | .'':lodxxxdxxxxxddxxddodddxkkxdxxxxxxxxxxxxk:d;,. 83 | .;dxkxxxxxxxxk000Okdooooooloodxxdddxxkkxxddddodkkkkd' . 84 | .:lkkkkkkkkkkkOOOkkxxxxdollllllloooddxxkOOOOkxdddldkkkOxc. 85 | .lkkkOOOkkkkOOOkkxdollcccccccccccccccllddxkOOOOkddddddxxxddd;. 86 | .cxkkOOOOOOOOkkkdolc:::;;;;;;;;;;;;;::::ccclodxkOOxddooodddooodo' 87 | ,dxkkOOOOOOOOkxolc::;;;;;;;;;;;;;;;;;;;;::::cclodxkkxddooolooollooc.. 88 | ,xxkkkOOO0OOkxdol:;;;;;;,,,,;;;;;;;;;;;;;;;;;::ccloxxxxdoooollllollod, 89 | .xxkkkkOOOkxxollc;;;;,,,,,,,,,,,,,;,,;;;;;;;;;;::cclodxxddoollllcclllclc. 90 | ;xkkxkOOkdoolllc;;;,,,,,,,,,,,,,,;;;,,;;;;;;;;;:::clloddddoooolccc::lcclo: 91 | .ddkkxOOxdoolllc:;;;;;;;;;;;;,,,,,;;,,,;;;;;;;::::::ccodlcdddooolccc::cllll.. 92 | 'xdddxxOOkxolclc::;;:coooooollllc:;;,,;;;;::ccoddddddoollc;coxkxoc:::cc:clcco. 93 | oxddddxkxxdoc::::;:lolccccccccloolc;,,;:ccloddxxxkkOOOkdl::lxkkkxoc:;:cccccoo. 94 | xdododdkkxdlc::c::coc::::clooollllc:;;:cllldxxxddddddxkkdodxkkkxxkdl:;::::odc 95 | ,dddoodkkkdlc::cc::lccccldkkOOxdddol:;;:codkOO000OkdooodkkxxkOkxxkkkdlc:::;cd. 96 | :odoodkkxolc:ccc::cllloxkdollloooddoc;;:lxO00OkxxxxkxdddkkkxkkkxkO0Okxolc;;:c . 97 | oddodkOxolc:::c::::cloddo;;:c:;:llc::,,;lxOOko:;:cccdxxkOkkkkkxkkOO00kdllc;:o.. 98 | cddddkxdlc:::c::;;::cc:cc;:clccc::;;,,,;:oxkxc;:cloloxkkkkkxxkxdxollkxl;:cc:c;. 99 | .oddxkkdc::cc::;;;;;;;;;cllllllcc:;,,,,;codxkdllooddxkxddddddxkooc::dOd:;:lolc' 100 | 'dddxxolcllcc:;;,,,,;;;;;;;:;;;;;;,,,,;:loddoccllodddlccclodxkxl:::dOdl::codlc 101 | cxdddooodolo:;;,,,,,,,,,,,,,,,;;;,',,;:codol:;::::c::;:clodxxko::cdkolc;ccolc. 102 | cdxxxdkdodl:;,,,,,,,''''',::ll:;,;:llodddoc;;,;;;;::cclloxkkxlcokxolc:lcll:. 103 | ldoddkxodl::;,,,,,,,',;:c::ooccldxkO0K0kxxo:;,;;;:ccllclxkkxlcdkxol::dol: 104 | .dddoddoxc::;;,;;;,,;:cc;;;;;;:lxkkkkkkkkkko;;;;:cloooccxkxxocxkooocc;cd. 105 | ,cldddl:;;:;;;;;;;;:c:;,,,,,,,;;:cllodddxddc:::clooddlcxOxoclxkooool : 106 | 'xdd:;;;::;;;;;;;::;,,,,,,,,;;;;;:cccllllcc:cclooddl:xkxoloxxdodoc 107 | .. cdxoccc::;;;;;;;;;;;;::cccc:ccllloooodolccc:clloodccxxdoodxxdodc . 108 | .;; .::;;;;;;;;;:lxO0kdollodxxdkkOOOkdlcc;:cclodclodoodxdl:,. 109 | .c::;;;;;,;ccodOxlc:codkOOOOO0KK0xc:;;:cllddc,.do;.',. . 110 | .clc::;;;;;;;;;::::::cclcllodxddooc,;;:clldxo:..;, 111 | .;clllcc:::;;,,,,;;;;;;;::cccclodddl;;;:looxkxl. 112 | .c:ccooclcc::;;,,,;;;:::ccllooddddol:::coxxkkkc . . 113 | :l:clodccllcc:;;;;,;;;;:::cccccc:::cclodkkkkx: 114 | ;ll:clddcccloolc:;;,,,,;,,,,;;:::::cldxkkkOkc 115 | .ddcclld,lcclloooolc;;;,,,;;;;:ccclldxxdc;,;. 116 | :docclod,'lccclloddxdollclllloooool:;. 117 | .dxlcclldo :cccllloodxkd. .... 118 | ;dxlcclodx.'lcclllloodxx. 119 | \n""") 120 | sys.stderr.write(""" 121 | ______________________________________ 122 | _\|/^ / Молодцы, а теперь слезайте \ \|| / 123 | (_oo / с казённой GPU \ oo / 124 | | \________________________________________/ О_ -- 125 | /|\ ) = 126 | | (. -- 127 | LL 1 1\ 128 | mborisyak@ jheuristic@ 129 | """) 130 | 131 | --------------------------------------------------------------------------------