├── .gitignore
├── README.md
├── additional
    ├── cnn_quantization
    │   ├── .gitignore
    │   ├── README.md
    │   ├── cnn_quantization.ipynb
    │   ├── data
    │   │   └── .gitignore
    │   └── utils
    │   │   ├── __init__.py
    │   │   ├── dataset.py
    │   │   ├── model.py
    │   │   └── trainer.py
    ├── collaborative_filtering
    │   ├── .gitignore
    │   ├── README.md
    │   ├── collaborative_filtering.ipynb
    │   └── data
    │   │   └── .gitignore
    ├── entity_embedding
    │   ├── .gitignore
    │   ├── README.md
    │   ├── data
    │   │   └── .gitignore
    │   └── entity_embedding.ipynb
    ├── prune
    │   ├── .gitignore
    │   ├── README.md
    │   ├── data
    │   │   └── .gitignore
    │   └── prune.ipynb
    └── transformers
    │   ├── README.md
    │   └── transformers.ipynb
├── computer_vision
    ├── generative_models
    │   ├── .gitignore
    │   ├── README.md
    │   ├── cvae.ipynb
    │   ├── data
    │   │   └── .gitignore
    │   ├── dcgan.ipynb
    │   ├── vae.ipynb
    │   └── wgan.ipynb
    ├── image_classification
    │   ├── binary_label_classification
    │   │   ├── .gitignore
    │   │   ├── README.md
    │   │   ├── data
    │   │   │   └── .gitignore
    │   │   └── pneumonia.ipynb
    │   └── multi_label_classification
    │   │   ├── .gitignore
    │   │   ├── README.md
    │   │   ├── amazon.ipynb
    │   │   └── data
    │   │       └── .gitignore
    ├── instance_segmentation
    │   ├── .gitignore
    │   ├── README.md
    │   ├── data
    │   │   └── .gitignore
    │   ├── mask_rcnn.ipynb
    │   └── mask_rcnn_utils
    │   │   ├── __init__.py
    │   │   ├── box_functions.py
    │   │   ├── classes.py
    │   │   ├── connect_bboxes_segmask.py
    │   │   ├── convert_annotations.py
    │   │   ├── create_labels_funcs.py
    │   │   ├── display.py
    │   │   ├── generate_anchor_boxes.py
    │   │   ├── losses.py
    │   │   └── utils.py
    ├── models
    │   ├── .gitignore
    │   ├── README.md
    │   ├── data
    │   │   ├── .gitignore
    │   │   ├── DSConvolution.png
    │   │   ├── DenseNet.png
    │   │   ├── Inception.png
    │   │   └── ResNet.png
    │   ├── dense_net.ipynb
    │   ├── inception_v1.ipynb
    │   ├── mobilenet_v1.ipynb
    │   └── resnet.ipynb
    ├── neural_style_transfer
    │   ├── .gitignore
    │   ├── README.md
    │   ├── data
    │   │   ├── content.jpg
    │   │   └── style.jpg
    │   └── neural_style_transfer.ipynb
    ├── object_detection
    │   ├── .gitignore
    │   ├── README.md
    │   ├── data
    │   │   └── .gitignore
    │   ├── faster_rcnn.ipynb
    │   ├── retinanet.ipynb
    │   ├── src
    │   │   ├── __init__.py
    │   │   ├── augmentations.py
    │   │   ├── data_transformer.py
    │   │   ├── display.py
    │   │   ├── evaluation.py
    │   │   ├── faster_rcnn_utils.py
    │   │   └── xml2json.py
    │   ├── ssd.ipynb
    │   └── yolo.ipynb
    └── semantic_segmentation
    │   ├── .gitignore
    │   ├── README.md
    │   ├── data
    │       ├── .gitignore
    │       ├── fcn_8.png
    │       └── ternaus_net.png
    │   ├── fcn.ipynb
    │   ├── src
    │       ├── __init__.py
    │       ├── dataset.py
    │       └── utils.py
    │   └── u_net.ipynb
└── natural_language_processing
    ├── sentiment_analysis
        ├── .gitignore
        ├── README.md
        ├── bert.ipynb
        ├── data
        │   └── .gitignore
        └── word_embeddings.ipynb
    ├── speech_classification
        ├── .gitignore
        ├── README.md
        ├── data
        │   └── .gitignore
        └── rnn.ipynb
    ├── text_synthesis
        ├── .gitignore
        ├── README.md
        ├── data
        │   └── .gitignore
        └── text_synthesis.ipynb
    ├── tokenizers
        └── bpe.ipynb
    └── word2vec
        ├── .gitignore
        ├── README.md
        ├── data
            └── .gitignore
        └── word2vec.ipynb


/.gitignore:
--------------------------------------------------------------------------------
1 | **/.venv


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Machine learning playground
 2 | 
 3 | - Collection of projects used for learning new concepts.
 4 | - The models are implemented in PyTorch.
 5 | 
 6 | ## Structure
 7 | 
 8 | ### Computer vision
 9 | 
10 | - Image classification
11 |   - Doing binary/multi-class classification.
12 | - Models
13 |   - Implementing SOTA convolutional neural networks.
14 | - Object detection
15 |   - Implementing YOLO, SSD, RetinaNet and Faster R-CNN.
16 | - Semantic segmentation
17 |   - Implementing FCN-8 and U-Net.
18 | - Neural style transfer
19 |   - Implementing a neural algorithm for artistic style.
20 | - Generative models
21 |   - Implementing DCGAN, Wasserstein GAN, VAE and Conditional VAE.
22 | 
23 | ### Natural language processing
24 | 
25 | - Sentiment analysis
26 |   - Doing sentiment analysis using different pre-trained methods: word-embeddings, BERT.
27 | - Text synthesis
28 |   - Synthesis text (character level).
29 | - word2vec
30 |   - Implementing word2vec.
31 | - Speech classification
32 |   - Classifying speech commands.
33 | - Tokenizer
34 |   - Implementing BPE tokenizer.
35 | 
36 | ### Additional
37 | 
38 | - Collaborative filtering
39 |   - Implementing collaborative filtering.
40 | - Entity embedding
41 |   - Implementing entity embeddings.
42 | - Quantization
43 |   - Quantize a cnn model.
44 | - Pruning
45 |   - Pruning a cnn model.
46 | - Transformers
47 |   - Implementing a Transformer from scratch.
48 | 


--------------------------------------------------------------------------------
/additional/cnn_quantization/.gitignore:
--------------------------------------------------------------------------------
1 | .ipynb_checkpoints/
2 | utils/__pycache__/
3 | 


--------------------------------------------------------------------------------
/additional/cnn_quantization/README.md:
--------------------------------------------------------------------------------
1 | # Quantize MobileNetv2 
2 | 
3 | Quantize a cnn model. They are tested on the MNIST data set, which can be downloaded [here](https://www.kaggle.com/c/digit-recognizer). Note that this notebook requires PyTorch 1.3.
4 | 


--------------------------------------------------------------------------------
/additional/cnn_quantization/cnn_quantization.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import pandas as pd\n",
 10 |     "from sklearn.model_selection import train_test_split\n",
 11 |     "\n",
 12 |     "import torch\n",
 13 |     "from torch.utils.data import DataLoader\n",
 14 |     "import torch.quantization as quantization\n",
 15 |     "\n",
 16 |     "from utils.dataset import MNIST\n",
 17 |     "from utils.trainer import Trainer\n",
 18 |     "from utils.model import MobileNetv2, ConvBnRelu, ConvBn"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": 2,
 24 |    "metadata": {},
 25 |    "outputs": [],
 26 |    "source": [
 27 |     "DATA_PATH = \"data/digit-recognizer/\"\n",
 28 |     "MODEL_FILE = \"data/model.pth\"\n",
 29 |     "device = \"cuda\"\n",
 30 |     "\n",
 31 |     "seed = 42\n",
 32 |     "batch_size = 256"
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "code",
 37 |    "execution_count": 3,
 38 |    "metadata": {},
 39 |    "outputs": [],
 40 |    "source": [
 41 |     "df = pd.read_csv(DATA_PATH + \"train.csv\")\n",
 42 |     "y = df[\"label\"].values\n",
 43 |     "X = df.drop(\"label\", axis=1).values\n",
 44 |     "\n",
 45 |     "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=seed)"
 46 |    ]
 47 |   },
 48 |   {
 49 |    "cell_type": "code",
 50 |    "execution_count": 4,
 51 |    "metadata": {},
 52 |    "outputs": [],
 53 |    "source": [
 54 |     "train_dataset = MNIST(X_train, y_train)\n",
 55 |     "train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4)\n",
 56 |     "\n",
 57 |     "test_dataset = MNIST(X_test, y_test)\n",
 58 |     "test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=4)"
 59 |    ]
 60 |   },
 61 |   {
 62 |    "cell_type": "code",
 63 |    "execution_count": 5,
 64 |    "metadata": {},
 65 |    "outputs": [],
 66 |    "source": [
 67 |     "n_epochs = 6\n",
 68 |     "lr = 0.1\n",
 69 |     "\n",
 70 |     "torch.manual_seed(seed)\n",
 71 |     "model = MobileNetv2().to(device)"
 72 |    ]
 73 |   },
 74 |   {
 75 |    "cell_type": "code",
 76 |    "execution_count": 6,
 77 |    "metadata": {},
 78 |    "outputs": [
 79 |     {
 80 |      "name": "stdout",
 81 |      "output_type": "stream",
 82 |      "text": [
 83 |       "Epoch 0: train loss 0.000985, test loss 0.0003, test accuracy 0.9769\n",
 84 |       "Epoch 5: train loss 3e-06, test loss 0.000111, test accuracy 0.9915\n"
 85 |      ]
 86 |     }
 87 |    ],
 88 |    "source": [
 89 |     "trainer = Trainer(model, train_loader, test_loader, seed, lr=lr, momentum=0.9, weight_decay=4e-5)\n",
 90 |     "\n",
 91 |     "for epoch in range(0, n_epochs):\n",
 92 |     "    trainer.run_one_epoch(epoch)"
 93 |    ]
 94 |   },
 95 |   {
 96 |    "cell_type": "code",
 97 |    "execution_count": 7,
 98 |    "metadata": {},
 99 |    "outputs": [],
100 |    "source": [
101 |     "trainer.model = trainer.model.to(\"cpu\")\n",
102 |     "trainer.device = \"cpu\""
103 |    ]
104 |   },
105 |   {
106 |    "cell_type": "code",
107 |    "execution_count": 8,
108 |    "metadata": {},
109 |    "outputs": [
110 |     {
111 |      "name": "stdout",
112 |      "output_type": "stream",
113 |      "text": [
114 |       "Float accruacy: 0.9915\n",
115 |       "CPU times: user 1min 25s, sys: 39.7 s, total: 2min 5s\n",
116 |       "Wall time: 31.6 s\n"
117 |      ]
118 |     }
119 |    ],
120 |    "source": [
121 |     "%%time\n",
122 |     "_, acc = trainer.validate()\n",
123 |     "print(f\"Float accruacy: {acc}\")"
124 |    ]
125 |   },
126 |   {
127 |    "cell_type": "code",
128 |    "execution_count": 9,
129 |    "metadata": {},
130 |    "outputs": [],
131 |    "source": [
132 |     "torch.save(model.state_dict(), MODEL_FILE)"
133 |    ]
134 |   },
135 |   {
136 |    "cell_type": "code",
137 |    "execution_count": 10,
138 |    "metadata": {},
139 |    "outputs": [],
140 |    "source": [
141 |     "class QuantMobileNet(MobileNetv2):\n",
142 |     "    def __init__(self):\n",
143 |     "        super().__init__()\n",
144 |     "        self.quant = quantization.QuantStub()\n",
145 |     "        self.dequant = quantization.DeQuantStub()\n",
146 |     "\n",
147 |     "    def forward(self, x):\n",
148 |     "        x = self.quant(x)\n",
149 |     "        x = super().forward(x)        \n",
150 |     "        x = self.dequant(x)\n",
151 |     "        return x\n",
152 |     "\n",
153 |     "    # Fuse Conv+BN and Conv+BN+Relu modules prior to quantization\n",
154 |     "    def fuse_model(self):\n",
155 |     "        for module in self.modules():\n",
156 |     "            if isinstance(module, ConvBnRelu):\n",
157 |     "                torch.quantization.fuse_modules(module, ['conv', 'bn', 'act'], inplace=True)\n",
158 |     "            elif isinstance(module, ConvBn):\n",
159 |     "                torch.quantization.fuse_modules(module, ['conv', 'bn'], inplace=True)"
160 |    ]
161 |   },
162 |   {
163 |    "cell_type": "markdown",
164 |    "metadata": {},
165 |    "source": [
166 |     "# Per-tensor quantization"
167 |    ]
168 |   },
169 |   {
170 |    "cell_type": "code",
171 |    "execution_count": 11,
172 |    "metadata": {
173 |     "scrolled": true
174 |    },
175 |    "outputs": [],
176 |    "source": [
177 |     "# min/max range estimation and per-tensor quantization of weights\n",
178 |     "per_tensor_quant_model = QuantMobileNet().to('cpu')\n",
179 |     "_ = per_tensor_quant_model.load_state_dict(torch.load(MODEL_FILE))\n",
180 |     "per_tensor_quant_model.eval()\n",
181 |     "per_tensor_quant_model.fuse_model()"
182 |    ]
183 |   },
184 |   {
185 |    "cell_type": "code",
186 |    "execution_count": 12,
187 |    "metadata": {},
188 |    "outputs": [],
189 |    "source": [
190 |     "per_tensor_quant_model.qconfig = quantization.default_qconfig\n",
191 |     "_ = torch.quantization.prepare(per_tensor_quant_model, inplace=True)"
192 |    ]
193 |   },
194 |   {
195 |    "cell_type": "code",
196 |    "execution_count": 13,
197 |    "metadata": {},
198 |    "outputs": [],
199 |    "source": [
200 |     "per_tensor_quant_trainer = Trainer(per_tensor_quant_model, train_loader, test_loader, seed, device=\"cpu\",\n",
201 |     "                        lr=lr, momentum=0.9, weight_decay=4e-5)"
202 |    ]
203 |   },
204 |   {
205 |    "cell_type": "code",
206 |    "execution_count": 14,
207 |    "metadata": {},
208 |    "outputs": [],
209 |    "source": [
210 |     "# Calibrating the model\n",
211 |     "_ = per_tensor_quant_trainer.validate()"
212 |    ]
213 |   },
214 |   {
215 |    "cell_type": "code",
216 |    "execution_count": 15,
217 |    "metadata": {},
218 |    "outputs": [
219 |     {
220 |      "name": "stderr",
221 |      "output_type": "stream",
222 |      "text": [
223 |       "/home/erik/anaconda3/lib/python3.7/site-packages/torch/quantization/observer.py:131: UserWarning: must run observer before calling calculate_qparams.                                    Returning default scale and zero point \n",
224 |       "  Returning default scale and zero point \"\n"
225 |      ]
226 |     }
227 |    ],
228 |    "source": [
229 |     "_ = torch.quantization.convert(per_tensor_quant_trainer.model, inplace=True)"
230 |    ]
231 |   },
232 |   {
233 |    "cell_type": "code",
234 |    "execution_count": 16,
235 |    "metadata": {},
236 |    "outputs": [
237 |     {
238 |      "name": "stdout",
239 |      "output_type": "stream",
240 |      "text": [
241 |       "Per-tensor quant accruacy: 0.9916\n",
242 |       "CPU times: user 10 s, sys: 1.24 s, total: 11.3 s\n",
243 |       "Wall time: 2.92 s\n"
244 |      ]
245 |     }
246 |    ],
247 |    "source": [
248 |     "%%time\n",
249 |     "_, acc = per_tensor_quant_trainer.validate()\n",
250 |     "print(f\"Per-tensor quant accruacy: {acc}\")"
251 |    ]
252 |   },
253 |   {
254 |    "cell_type": "markdown",
255 |    "metadata": {},
256 |    "source": [
257 |     "# Channel-wise quantization"
258 |    ]
259 |   },
260 |   {
261 |    "cell_type": "code",
262 |    "execution_count": 17,
263 |    "metadata": {},
264 |    "outputs": [],
265 |    "source": [
266 |     "per_channel_quant_model = QuantMobileNet().to('cpu')\n",
267 |     "_ = per_channel_quant_model.load_state_dict(torch.load(MODEL_FILE))\n",
268 |     "per_channel_quant_model.eval()\n",
269 |     "per_channel_quant_model.fuse_model()"
270 |    ]
271 |   },
272 |   {
273 |    "cell_type": "code",
274 |    "execution_count": 18,
275 |    "metadata": {},
276 |    "outputs": [],
277 |    "source": [
278 |     "# Channel-wise quant\n",
279 |     "per_channel_quant_model.qconfig = torch.quantization.get_default_qconfig('fbgemm')\n",
280 |     "_ = torch.quantization.prepare(per_channel_quant_model, inplace=True)\n",
281 |     "\n",
282 |     "per_channel_quant_trainer = Trainer(per_channel_quant_model, train_loader, test_loader, seed, device=\"cpu\",\n",
283 |     "                                    lr=lr, momentum=0.9, weight_decay=4e-5)"
284 |    ]
285 |   },
286 |   {
287 |    "cell_type": "code",
288 |    "execution_count": 19,
289 |    "metadata": {},
290 |    "outputs": [],
291 |    "source": [
292 |     "_ = per_channel_quant_trainer.validate()"
293 |    ]
294 |   },
295 |   {
296 |    "cell_type": "code",
297 |    "execution_count": 20,
298 |    "metadata": {},
299 |    "outputs": [
300 |     {
301 |      "name": "stderr",
302 |      "output_type": "stream",
303 |      "text": [
304 |       "/home/erik/anaconda3/lib/python3.7/site-packages/torch/quantization/observer.py:592: UserWarning: must run observer before calling calculate_qparams.                                    Returning default scale and zero point \n",
305 |       "  Returning default scale and zero point \"\n"
306 |      ]
307 |     }
308 |    ],
309 |    "source": [
310 |     "_ = torch.quantization.convert(per_channel_quant_trainer.model, inplace=True)"
311 |    ]
312 |   },
313 |   {
314 |    "cell_type": "code",
315 |    "execution_count": 21,
316 |    "metadata": {},
317 |    "outputs": [
318 |     {
319 |      "name": "stdout",
320 |      "output_type": "stream",
321 |      "text": [
322 |       "Per-tensor quant accruacy: 0.9919\n",
323 |       "CPU times: user 10.9 s, sys: 1.25 s, total: 12.2 s\n",
324 |       "Wall time: 3.2 s\n"
325 |      ]
326 |     }
327 |    ],
328 |    "source": [
329 |     "%%time\n",
330 |     "_, acc = per_channel_quant_trainer.validate()\n",
331 |     "print(f\"Per-tensor quant accruacy: {acc}\")"
332 |    ]
333 |   },
334 |   {
335 |    "cell_type": "markdown",
336 |    "metadata": {},
337 |    "source": [
338 |     "# Quantization-aware training"
339 |    ]
340 |   },
341 |   {
342 |    "cell_type": "code",
343 |    "execution_count": 22,
344 |    "metadata": {},
345 |    "outputs": [],
346 |    "source": [
347 |     "aware_quant_model = QuantMobileNet().to('cpu')\n",
348 |     "_ = aware_quant_model.load_state_dict(torch.load(MODEL_FILE))\n",
349 |     "aware_quant_model.train()\n",
350 |     "aware_quant_model.fuse_model()"
351 |    ]
352 |   },
353 |   {
354 |    "cell_type": "code",
355 |    "execution_count": 23,
356 |    "metadata": {},
357 |    "outputs": [],
358 |    "source": [
359 |     "aware_quant_model.qconfig = torch.quantization.get_default_qconfig('fbgemm')\n",
360 |     "_ = torch.quantization.prepare_qat(aware_quant_model, inplace=True)\n",
361 |     "\n",
362 |     "aware_quant_trainer = Trainer(aware_quant_model, train_loader, test_loader, seed, device=\"cpu\",\n",
363 |     "                              lr=lr / 100, momentum=0.9)"
364 |    ]
365 |   },
366 |   {
367 |    "cell_type": "code",
368 |    "execution_count": 24,
369 |    "metadata": {},
370 |    "outputs": [
371 |     {
372 |      "name": "stdout",
373 |      "output_type": "stream",
374 |      "text": [
375 |       "Epoch 0: train loss 1.7e-05, test loss 0.00011, test accuracy 0.992\n"
376 |      ]
377 |     }
378 |    ],
379 |    "source": [
380 |     "aware_quant_trainer.run_one_epoch(0)"
381 |    ]
382 |   },
383 |   {
384 |    "cell_type": "code",
385 |    "execution_count": 25,
386 |    "metadata": {},
387 |    "outputs": [],
388 |    "source": [
389 |     "aware_quant_trainer.model.eval()\n",
390 |     "_ = quantization.convert(aware_quant_trainer.model, inplace=True)"
391 |    ]
392 |   },
393 |   {
394 |    "cell_type": "code",
395 |    "execution_count": 26,
396 |    "metadata": {},
397 |    "outputs": [
398 |     {
399 |      "name": "stdout",
400 |      "output_type": "stream",
401 |      "text": [
402 |       "Aware quant accruacy: 0.9915\n",
403 |       "CPU times: user 10.9 s, sys: 880 ms, total: 11.8 s\n",
404 |       "Wall time: 3.13 s\n"
405 |      ]
406 |     }
407 |    ],
408 |    "source": [
409 |     "%%time\n",
410 |     "_, acc = aware_quant_trainer.validate()\n",
411 |     "print(f\"Aware quant accruacy: {acc}\")"
412 |    ]
413 |   }
414 |  ],
415 |  "metadata": {
416 |   "kernelspec": {
417 |    "display_name": "Python 3",
418 |    "language": "python",
419 |    "name": "python3"
420 |   },
421 |   "language_info": {
422 |    "codemirror_mode": {
423 |     "name": "ipython",
424 |     "version": 3
425 |    },
426 |    "file_extension": ".py",
427 |    "mimetype": "text/x-python",
428 |    "name": "python",
429 |    "nbconvert_exporter": "python",
430 |    "pygments_lexer": "ipython3",
431 |    "version": "3.7.4"
432 |   }
433 |  },
434 |  "nbformat": 4,
435 |  "nbformat_minor": 2
436 | }
437 | 


--------------------------------------------------------------------------------
/additional/cnn_quantization/data/.gitignore:
--------------------------------------------------------------------------------
1 | model.pth
2 | digit-recognizer.zip
3 | digit-recognizer/


--------------------------------------------------------------------------------
/additional/cnn_quantization/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MrParosk/ml_playground/1202dd9b4341fc8ad5fbadade45ac93e76217303/additional/cnn_quantization/utils/__init__.py


--------------------------------------------------------------------------------
/additional/cnn_quantization/utils/dataset.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.utils.data import Dataset
 3 | 
 4 | class MNIST(Dataset):
 5 |     def __init__(self, X, y):
 6 |         self.y = torch.from_numpy(y).long()
 7 |         X = X.reshape(X.shape[0], 1, 28, 28)
 8 |         X = X / 255.0
 9 |         self.X = torch.from_numpy(X).float()
10 | 
11 |     def __len__(self):
12 |         return self.X.shape[0]
13 | 
14 |     def __getitem__(self, idx):
15 |         return (self.X[idx,:,:,:], self.y[idx])
16 | 


--------------------------------------------------------------------------------
/additional/cnn_quantization/utils/model.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | class ConvBnRelu(nn.Module):
 6 |     def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, padding=0, groups=1):
 7 |         super().__init__()
 8 |         self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size,
 9 |                               stride=stride, padding=padding, groups=groups, bias=False)
10 |         self.bn = nn.BatchNorm2d(out_channels)
11 |         self.act = nn.ReLU()
12 | 
13 |     def forward(self, x):
14 |         x = self.conv(x)
15 |         x = self.bn(x)
16 |         x = self.act(x)
17 |         return x
18 | 
19 | class ConvBn(nn.Module):
20 |     def __init__(self, in_channels, out_channels, kernel_size=1, stride=1, padding=0, groups=1):
21 |         super().__init__()
22 |         self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size,
23 |                               stride=stride, padding=padding, groups=groups, bias=False)
24 |         self.bn = nn.BatchNorm2d(out_channels)
25 | 
26 |     def forward(self, x):
27 |         x = self.conv(x)
28 |         x = self.bn(x)
29 |         return x
30 | 
31 | class Block(nn.Module):
32 |     def __init__(self, in_channels, out_channels, stride, expansion_factor=6):
33 |         super().__init__()
34 |         
35 |         self.stride = stride
36 |         between_channels = out_channels * expansion_factor        
37 |         
38 |         self.bn_layer_1x1_before = ConvBnRelu(in_channels, between_channels, kernel_size=1, stride=1)
39 | 
40 |         self.bn_layer_3x3 = ConvBnRelu(between_channels, between_channels, 
41 |                                        kernel_size=3, stride=stride, padding=1, groups=between_channels)
42 | 
43 |         self.bn_layer_1x1_after = ConvBn(between_channels, out_channels)
44 | 
45 |         self.skip_add = nn.quantized.FloatFunctional()
46 | 
47 | 
48 |     def forward(self, x):
49 |         h = self.bn_layer_1x1_before(x)
50 |         h = self.bn_layer_3x3(h)
51 |         h = self.bn_layer_1x1_after(h)
52 | 
53 |         if self.stride == 1:
54 |             return self.skip_add.add(h, x)
55 |         else:
56 |             return h
57 | 
58 | class MobileNetv2(nn.Module):
59 |     def __init__(self, num_classes = 10):
60 |         super().__init__()
61 | 
62 |         # (expansion_factor, out_channels, num_blocks, stride)
63 |         self.cfg = [
64 |                (6, 32, 2, 2),
65 |                (6, 64, 2, 2),
66 |                (6, 128, 2, 2),
67 |         ]
68 | 
69 |         self.features = self.make_layers()
70 |         self.dropout = nn.Dropout(p=0.5)
71 |         self.linear = nn.Linear(256, num_classes)
72 | 
73 |     def make_layers(self, in_channels=32):
74 |         layers = [ConvBnRelu(1, in_channels, stride=1, padding=1)]
75 | 
76 |         for expension_factor, out_channels, num_block, stride in self.cfg:
77 |             strides = [stride] + [1] * (num_block - 1)
78 | 
79 |             for s in strides:
80 |                 layers.append(Block(in_channels, out_channels, stride=s, expansion_factor=expension_factor))
81 |                 in_channels = out_channels
82 |         
83 |         layers.append(ConvBnRelu(128, 256, stride=1, padding=0))
84 |         return nn.Sequential(*layers)
85 | 
86 |     def forward(self, x):
87 |         x = self.features(x)
88 |         x = F.adaptive_avg_pool2d(x, 1)
89 |         x = x.squeeze(2).squeeze(2)
90 |         x = self.dropout(x)
91 |         x = self.linear(x)
92 |         return x
93 | 


--------------------------------------------------------------------------------
/additional/cnn_quantization/utils/trainer.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | import torch.nn as nn
 4 | import torch.optim as optim
 5 | from torch.optim.lr_scheduler import MultiStepLR
 6 | 
 7 | class Trainer:
 8 |     def __init__(self, model, train_loader, test_loader, seed=42, device="cuda", **kwargs):
 9 |         self.device = device
10 |         self.model = model
11 |         # self.optimizer = optim.Adam(model.parameters(), **kwargs)
12 |         self.optimizer = optim.SGD(model.parameters(), **kwargs)
13 |         self.lr_sceduler = MultiStepLR(self.optimizer, gamma=0.5, milestones=[150, 250])
14 | 
15 |         self.loss = nn.CrossEntropyLoss()
16 | 
17 |         self.train_loader = train_loader
18 |         self.test_loader = test_loader
19 | 
20 |         self.seed = seed
21 | 
22 |     def train(self):
23 |         torch.manual_seed(self.seed)
24 |         self.model.train()
25 | 
26 |         train_loss = 0.0
27 |         for _, (x, y) in enumerate(self.train_loader):
28 |                 x, y = x.to(self.device), y.to(self.device)
29 |                 self.model.zero_grad()
30 |                 output = self.model(x)
31 | 
32 |                 batch_loss = self.loss(output, y)
33 |                 batch_loss.backward()
34 |                 self.optimizer.step()
35 |                 train_loss += batch_loss.cpu().detach().numpy() / x.shape[0]
36 | 
37 |         self.lr_sceduler.step()
38 |         train_loss = np.round(train_loss / len(self.train_loader), 6)
39 | 
40 |         return train_loss
41 | 
42 |     def validate(self):
43 |         self.model.eval()
44 |         torch.manual_seed(self.seed)
45 | 
46 |         test_loss, test_acc = 0.0, 0.0
47 |         with torch.no_grad():
48 |             for _, (x, y) in enumerate(self.test_loader):
49 |                 x, y = x.to(self.device), y.to(self.device)
50 |                 output = self.model(x)
51 | 
52 |                 batch_loss = self.loss(output, y)
53 |                 test_loss += batch_loss.cpu().detach().numpy() / x.shape[0]
54 | 
55 |                 y_hat = torch.argmax(output, dim=1)
56 |                 acc = (y_hat == y).sum().float() / x.shape[0]
57 |                 acc = acc.cpu().detach().numpy()
58 |                 test_acc += acc
59 | 
60 |         test_loss = np.round(test_loss / len(self.test_loader), 6)
61 |         test_acc = np.round(test_acc / len(self.test_loader), 4)
62 |         return test_loss, test_acc
63 | 
64 |     def run_one_epoch(self, epoch):
65 |         train_loss = self.train()
66 |         test_loss, test_acc = self.validate()
67 |         self.model.train()
68 | 
69 |         if epoch % 5 == 0:
70 |             print(f"Epoch {epoch}: train loss {train_loss}, test loss {test_loss}, test accuracy {test_acc}")
71 | 


--------------------------------------------------------------------------------
/additional/collaborative_filtering/.gitignore:
--------------------------------------------------------------------------------
1 | .ipynb_checkpoints


--------------------------------------------------------------------------------
/additional/collaborative_filtering/README.md:
--------------------------------------------------------------------------------
1 | # Collaborative filteringCollaborative filtering using pytorch. The data can be downloaded [here](http://files.grouplens.org/datasets/movielens/ml-latest-small.zip).


--------------------------------------------------------------------------------
/additional/collaborative_filtering/data/.gitignore:
--------------------------------------------------------------------------------
1 | links.csv
2 | movies.csv
3 | ratings.csv
4 | README.txt
5 | tags.csv


--------------------------------------------------------------------------------
/additional/entity_embedding/.gitignore:
--------------------------------------------------------------------------------
1 | .ipynb_checkpoints


--------------------------------------------------------------------------------
/additional/entity_embedding/README.md:
--------------------------------------------------------------------------------
1 | # Entity embedding
2 | 
3 | Implementing [entity embedding](https://arxiv.org/abs/1604.06737) on the Rossmann Store Sales dataset. The data can be downloaded [here](https://www.kaggle.com/c/rossmann-store-sales).


--------------------------------------------------------------------------------
/additional/entity_embedding/data/.gitignore:
--------------------------------------------------------------------------------
1 | store
2 | test
3 | train


--------------------------------------------------------------------------------
/additional/prune/.gitignore:
--------------------------------------------------------------------------------
1 | .ipynb_checkpoints/
2 | 


--------------------------------------------------------------------------------
/additional/prune/README.md:
--------------------------------------------------------------------------------
1 | # Pruning
2 | 
3 | Pruning a CNN model. They are tested on the MNIST data set, which can be downloaded [here](https://www.kaggle.com/c/digit-recognizer). Note that this notebook requires PyTorch 1.4 or higher.
4 | 
5 | The pruning is based on thresholding, i.e. calculate a threshold value and set all weights lower than the threshold to zero.


--------------------------------------------------------------------------------
/additional/prune/data/.gitignore:
--------------------------------------------------------------------------------
1 | train.csv
2 | test.csv
3 | 


--------------------------------------------------------------------------------
/additional/prune/prune.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import random\n",
 10 |     "import numpy as np\n",
 11 |     "import pandas as pd\n",
 12 |     "import matplotlib.pyplot as plt\n",
 13 |     "from sklearn.model_selection import train_test_split\n",
 14 |     "from copy import deepcopy\n",
 15 |     "\n",
 16 |     "import torch\n",
 17 |     "import torch.nn as nn\n",
 18 |     "import torch.nn.functional as F\n",
 19 |     "import torch.optim as optim\n",
 20 |     "from torch.utils.data import Dataset, DataLoader\n",
 21 |     "from torch.nn.utils.prune import BasePruningMethod"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "code",
 26 |    "execution_count": 2,
 27 |    "metadata": {},
 28 |    "outputs": [],
 29 |    "source": [
 30 |     "DATA_PATH = \"./data/\"\n",
 31 |     "seed = 42\n",
 32 |     "device = \"cuda\""
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "code",
 37 |    "execution_count": 3,
 38 |    "metadata": {},
 39 |    "outputs": [],
 40 |    "source": [
 41 |     "df =  pd.read_csv(DATA_PATH +  \"train.csv\")"
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "code",
 46 |    "execution_count": 4,
 47 |    "metadata": {},
 48 |    "outputs": [],
 49 |    "source": [
 50 |     "y = df[\"label\"].values\n",
 51 |     "X = df.drop(\"label\", axis=1).values\n",
 52 |     "X = X.reshape((len(X), 1,  28, 28))\n",
 53 |     "\n",
 54 |     "X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.3, random_state=seed)"
 55 |    ]
 56 |   },
 57 |   {
 58 |    "cell_type": "code",
 59 |    "execution_count": 5,
 60 |    "metadata": {},
 61 |    "outputs": [],
 62 |    "source": [
 63 |     "class MNIST(Dataset):\n",
 64 |     "    def __init__(self, X, y):\n",
 65 |     "        self.X = torch.from_numpy(X).float()\n",
 66 |     "        self.y = torch.from_numpy(y).long()\n",
 67 |     "    \n",
 68 |     "    def __len__(self):\n",
 69 |     "        return len(self.y)\n",
 70 |     "    \n",
 71 |     "    def __getitem__(self, idx):\n",
 72 |     "        return self.X[idx, :, :, :], self.y[idx]"
 73 |    ]
 74 |   },
 75 |   {
 76 |    "cell_type": "code",
 77 |    "execution_count": 6,
 78 |    "metadata": {},
 79 |    "outputs": [],
 80 |    "source": [
 81 |     "batch_size = 128\n",
 82 |     "num_workers = 4\n",
 83 |     "\n",
 84 |     "train_dataset = MNIST(X_train, y_train)\n",
 85 |     "train_dataloader = DataLoader(train_dataset, batch_size=batch_size, num_workers=num_workers, shuffle=True)\n",
 86 |     "\n",
 87 |     "val_dataset = MNIST(X_val, y_val)\n",
 88 |     "val_dataloader = DataLoader(val_dataset, batch_size=batch_size, num_workers=num_workers, shuffle=False)"
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "code",
 93 |    "execution_count": 7,
 94 |    "metadata": {},
 95 |    "outputs": [],
 96 |    "source": [
 97 |     "class BNLayer(nn.Module):\n",
 98 |     "    def __init__(self, in_channels, out_channels, kernel_size=3, stride=1):\n",
 99 |     "        super().__init__()\n",
100 |     "        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size,\n",
101 |     "                              stride=stride, padding=1, bias=False)\n",
102 |     "        self.bn = nn.BatchNorm2d(out_channels)\n",
103 |     "        self.act = nn.ReLU()\n",
104 |     "        \n",
105 |     "    def forward(self, x):\n",
106 |     "        x = self.conv(x)\n",
107 |     "        x = self.bn(x)\n",
108 |     "        x = self.act(x)\n",
109 |     "        return x\n",
110 |     "\n",
111 |     "\n",
112 |     "class Block(nn.Module):\n",
113 |     "    def __init__(self, in_channels, out_channels):\n",
114 |     "        super().__init__()\n",
115 |     "        self.conv_1 = BNLayer(in_channels, in_channels)\n",
116 |     "        self.conv_2 = BNLayer(in_channels, out_channels, stride=2)\n",
117 |     "    \n",
118 |     "    def forward(self, x):\n",
119 |     "        x = self.conv_1(x)\n",
120 |     "        x = self.conv_2(x)\n",
121 |     "        return x\n",
122 |     "    \n",
123 |     "class Net(nn.Module):\n",
124 |     "    def __init__(self, channels, num_classes=10):\n",
125 |     "        super().__init__()\n",
126 |     "        \n",
127 |     "        self.conv_blocks = []\n",
128 |     "        \n",
129 |     "        self.conv_blocks.append(BNLayer(1, channels[0], kernel_size=5))        \n",
130 |     "        for i in range(len(channels) - 1):\n",
131 |     "            self.conv_blocks.append(Block(channels[i], channels[i+1]))\n",
132 |     "        self.conv_blocks = nn.Sequential(*self.conv_blocks)\n",
133 |     "        \n",
134 |     "        self.linear = nn.Linear(32, num_classes)\n",
135 |     "    \n",
136 |     "    def forward(self, x):\n",
137 |     "        x = self.conv_blocks(x)\n",
138 |     "        x = F.adaptive_avg_pool2d(x, 1)\n",
139 |     "        x = x.view(len(x), -1)\n",
140 |     "        x = self.linear(x)\n",
141 |     "        return x"
142 |    ]
143 |   },
144 |   {
145 |    "cell_type": "code",
146 |    "execution_count": 8,
147 |    "metadata": {},
148 |    "outputs": [],
149 |    "source": [
150 |     "torch.manual_seed(seed)\n",
151 |     "\n",
152 |     "channels = [8, 16, 32]\n",
153 |     "model = Net(channels).to(device)"
154 |    ]
155 |   },
156 |   {
157 |    "cell_type": "code",
158 |    "execution_count": 9,
159 |    "metadata": {},
160 |    "outputs": [],
161 |    "source": [
162 |     "num_epochs = 41\n",
163 |     "lr = 1e-4\n",
164 |     "optimizer = optim.Adam(model.parameters(), lr=lr)\n",
165 |     "loss_fct = nn.CrossEntropyLoss()"
166 |    ]
167 |   },
168 |   {
169 |    "cell_type": "code",
170 |    "execution_count": 10,
171 |    "metadata": {},
172 |    "outputs": [],
173 |    "source": [
174 |     "def calculate_accuracy(y_pred, y_true):\n",
175 |     "    y_pred = F.softmax(y_pred, dim=1)\n",
176 |     "    y_pred = torch.argmax(y_pred, dim=1)\n",
177 |     "    correct =  y_pred  == y_true\n",
178 |     "    return torch.mean(correct.float()).numpy()\n",
179 |     "\n",
180 |     "def one_step(model,  optimizer):\n",
181 |     "    train_loss, val_loss =  0.0, 0.0    \n",
182 |     "    val_acc = 0.0\n",
183 |     "    \n",
184 |     "    model.train()\n",
185 |     "    for _, (x, y) in enumerate(train_dataloader):\n",
186 |     "        model.zero_grad()\n",
187 |     "        y, x  = y.to(device), x.to(device)\n",
188 |     "        \n",
189 |     "        y_hat = model(x)\n",
190 |     "        batch_loss = loss_fct(y_hat, y)\n",
191 |     "        batch_loss.backward()\n",
192 |     "        optimizer.step()\n",
193 |     "        \n",
194 |     "        train_loss += batch_loss.detach().cpu().numpy() / len(y)\n",
195 |     "\n",
196 |     "    model.eval()\n",
197 |     "    for _, (x, y) in enumerate(val_dataloader):\n",
198 |     "        y, x  = y.to(device), x.to(device)\n",
199 |     "        y_hat = model(x)\n",
200 |     "        batch_loss = loss_fct(y_hat, y)\n",
201 |     "        val_loss += batch_loss.detach().cpu().numpy() / len(y)\n",
202 |     "        val_acc += calculate_accuracy(y_hat.detach().cpu(), y.cpu())\n",
203 |     "    \n",
204 |     "    train_loss = np.round(train_loss / len(train_dataloader), 6)\n",
205 |     "    val_loss = np.round(val_loss / len(val_dataloader), 6)\n",
206 |     "    val_acc = np.round(val_acc / len(val_dataloader), 6)\n",
207 |     "    \n",
208 |     "    return train_loss, val_loss, val_acc"
209 |    ]
210 |   },
211 |   {
212 |    "cell_type": "code",
213 |    "execution_count": 11,
214 |    "metadata": {
215 |     "scrolled": true
216 |    },
217 |    "outputs": [
218 |     {
219 |      "name": "stdout",
220 |      "output_type": "stream",
221 |      "text": [
222 |       "-------- Epoch 0 --------\n",
223 |       "Train loss: 0.016908\n",
224 |       "Val loss: 0.015977\n",
225 |       "Val acc: 0.34977\n",
226 |       "-------- Epoch 5 --------\n",
227 |       "Train loss: 0.008611\n",
228 |       "Val loss: 0.007999\n",
229 |       "Val acc: 0.864741\n",
230 |       "-------- Epoch 10 --------\n",
231 |       "Train loss: 0.004498\n",
232 |       "Val loss: 0.004228\n",
233 |       "Val acc: 0.940893\n",
234 |       "-------- Epoch 15 --------\n",
235 |       "Train loss: 0.00263\n",
236 |       "Val loss: 0.002474\n",
237 |       "Val acc: 0.954568\n",
238 |       "-------- Epoch 20 --------\n",
239 |       "Train loss: 0.001761\n",
240 |       "Val loss: 0.0017\n",
241 |       "Val acc: 0.962561\n",
242 |       "-------- Epoch 25 --------\n",
243 |       "Train loss: 0.001303\n",
244 |       "Val loss: 0.001281\n",
245 |       "Val acc: 0.968401\n",
246 |       "-------- Epoch 30 --------\n",
247 |       "Train loss: 0.001032\n",
248 |       "Val loss: 0.001024\n",
249 |       "Val acc: 0.97229\n",
250 |       "-------- Epoch 35 --------\n",
251 |       "Train loss: 0.000861\n",
252 |       "Val loss: 0.000884\n",
253 |       "Val acc: 0.975289\n",
254 |       "-------- Epoch 40 --------\n",
255 |       "Train loss: 0.000738\n",
256 |       "Val loss: 0.000788\n",
257 |       "Val acc: 0.977645\n"
258 |      ]
259 |     }
260 |    ],
261 |    "source": [
262 |     "torch.manual_seed(seed)\n",
263 |     "np.random.seed(seed)\n",
264 |     "random.seed(seed)\n",
265 |     "\n",
266 |     "optimizer = optim.Adam(model.parameters(), lr=lr)\n",
267 |     "\n",
268 |     "for epoch in range(num_epochs):\n",
269 |     "    train_loss, val_loss, val_acc = one_step(model, optimizer)\n",
270 |     "    \n",
271 |     "    if epoch % 5 == 0:\n",
272 |     "        print(f\"-------- Epoch {epoch} --------\")\n",
273 |     "        print(f\"Train loss: {train_loss}\")\n",
274 |     "        print(f\"Val loss: {val_loss}\")\n",
275 |     "        print(f\"Val acc: {val_acc}\")"
276 |    ]
277 |   },
278 |   {
279 |    "cell_type": "code",
280 |    "execution_count": 12,
281 |    "metadata": {},
282 |    "outputs": [],
283 |    "source": [
284 |     "class ThresholdPruning(BasePruningMethod):\n",
285 |     "    PRUNING_TYPE = \"unstructured\"\n",
286 |     "\n",
287 |     "    def __init__(self, percentile_threshold,  type_):\n",
288 |     "        \"\"\"\n",
289 |     "            Two types of pruning is supported:\n",
290 |     "                global - calculate one threshold value based on the conv layer.\n",
291 |     "                kernel - calculate threshold values per kernel (i.e. per 3x3 kernel)\n",
292 |     "        \"\"\"\n",
293 |     "\n",
294 |     "        if type(percentile_threshold) == float:\n",
295 |     "            percentile_threshold = percentile_threshold * 100\n",
296 |     "        self.percentile_threshold = percentile_threshold\n",
297 |     "\n",
298 |     "        if type_ != \"global\" and type_ != \"kernel\":\n",
299 |     "            raise ValueError(\"Only global or kernel is supported\")\n",
300 |     "        self.type_ = type_\n",
301 |     "\n",
302 |     "    def compute_mask(self, t, default_mask):\n",
303 |     "        if self.type_  == \"global\":\n",
304 |     "            threshold = np.percentile(np.abs(t.detach().cpu().numpy()), self.percentile_threshold)\n",
305 |     "            mask = torch.abs(t) > threshold\n",
306 |     "        else:\n",
307 |     "            threshold = np.percentile(np.abs(t.detach().cpu().numpy()), self.percentile_threshold, axis=(2, 3))\n",
308 |     "            threshold = torch.from_numpy(threshold).unsqueeze(-1).unsqueeze(-1).to(device)\n",
309 |     "            mask = torch.abs(t) > threshold\n",
310 |     "        return mask\n",
311 |     "\n",
312 |     "    @classmethod\n",
313 |     "    def apply(cls, module, name, percentile_threshold, type_):\n",
314 |     "        return super(ThresholdPruning, cls).apply(module, name, percentile_threshold, type_)"
315 |    ]
316 |   },
317 |   {
318 |    "cell_type": "code",
319 |    "execution_count": 13,
320 |    "metadata": {},
321 |    "outputs": [],
322 |    "source": [
323 |     "def prune_conv(model, amount, type_):\n",
324 |     "    for module in model.modules():\n",
325 |     "        if type(module) == nn.Conv2d:\n",
326 |     "            ThresholdPruning.apply(module, \"weight\", amount, type_)"
327 |    ]
328 |   },
329 |   {
330 |    "cell_type": "code",
331 |    "execution_count": 14,
332 |    "metadata": {},
333 |    "outputs": [],
334 |    "source": [
335 |     "torch.manual_seed(seed)\n",
336 |     "np.random.seed(seed)\n",
337 |     "random.seed(seed)\n",
338 |     "\n",
339 |     "global_prune_amount = [i  /  10 for i in range(1, 10)]\n",
340 |     "global_acc = []\n",
341 |     "\n",
342 |     "for p in global_prune_amount:\n",
343 |     "    global_prune_model = deepcopy(model)\n",
344 |     "    prune_conv(global_prune_model, p, \"global\")\n",
345 |     "    prune_global_optimizer = optim.Adam(global_prune_model.parameters(), lr=lr)\n",
346 |     "\n",
347 |     "    for epoch in range(5):\n",
348 |     "        _, _, val_acc = one_step(global_prune_model, prune_global_optimizer)\n",
349 |     "    global_acc.append(val_acc)"
350 |    ]
351 |   },
352 |   {
353 |    "cell_type": "code",
354 |    "execution_count": 15,
355 |    "metadata": {},
356 |    "outputs": [],
357 |    "source": [
358 |     "torch.manual_seed(seed)\n",
359 |     "np.random.seed(seed)\n",
360 |     "random.seed(seed)\n",
361 |     "\n",
362 |     "kernel_level_prune_amount = [i  /  10 for i in range(1, 10)]\n",
363 |     "kernel_level_acc = []\n",
364 |     "\n",
365 |     "for p in kernel_level_prune_amount:\n",
366 |     "    kernel_prune_model = deepcopy(model)\n",
367 |     "    prune_conv(kernel_prune_model, p, \"kernel\")\n",
368 |     "    prune_kernel_optimizer = optim.Adam(kernel_prune_model.parameters(), lr=lr)\n",
369 |     "\n",
370 |     "    for epoch in range(5):\n",
371 |     "        _, _, val_acc = one_step(kernel_prune_model, prune_kernel_optimizer)\n",
372 |     "    kernel_level_acc.append(val_acc)"
373 |    ]
374 |   },
375 |   {
376 |    "cell_type": "code",
377 |    "execution_count": 16,
378 |    "metadata": {},
379 |    "outputs": [
380 |     {
381 |      "data": {
382 |       "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYgAAAFzCAYAAAAkFp78AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAgAElEQVR4nO3de5hddX3v8fc34RKGmxRSTiVkJlooENBEhqA1cvEClCpREQXHCupxAAtWW+3RE3ukcKKteI71eEkZvHAbBMS24qVq5SQqym1ouAhyDUmYYm2IymkIAZJ8zx9rTdiZrCQ7mVmzdzLv1/PsZ6/b3uu71yT7s9f6rfVbkZlIkjTchFYXIElqTwaEJKmSASFJqmRASJIqGRCSpEoGhCSp0k6tLmC07LffftnV1dXqMiRpu3LHHXc8kZmTq+btMAHR1dXFwMBAq8uQpO1KRCzd1DwPMUmSKhkQkqRKBoQkqdIO0wYhSVWee+45BgcHWb16datLaalJkyYxZcoUdt5556ZfY0BI2qENDg6y55570tXVRUS0upyWyExWrFjB4OAg06ZNa/p1HmKStENbvXo1++6777gNB4CIYN99993qvSgDQtIObzyHw5Bt2QYGhCS1wFlnncX111+/2WW6urp44oknmn7Pyy67jPPOO2+kpa1nQEiSKhkQktSgvx+6umDChOK5v3/k73nRRRdxyCGH8LrXvY4zzjiDT3/60xvMv/HGG5k5cyZHHHEE7373u3nmmWfWz7v44ouZNWsWs2bN4uGHHwbgW9/6FkcffTQzZ87kta99Lb/61a9GXmQFA0KSSv390NsLS5dCZvHc2zuykBgYGOAb3/gGixYt4h/+4R826hJo9erVnHXWWVx77bXcc889rFmzhvnz56+fv9dee3Hbbbdx3nnn8YEPfACA2bNnc8stt7Bo0SJOP/10PvWpT217gZthQEhSae5cWLVqw2mrVhXTt9VNN93EnDlz2G233dhzzz15wxvesMH8Bx54gGnTpnHwwQcDcOaZZ/LjH/94/fwzzjhj/fPNN98MFKfunnjiiRxxxBFcfPHF3Hvvvdte4GYYEJJUWrZs66Y3IzNHNL/x7KOh4fPPP5/zzjuPe+65h0suuaS2iwANCEkqTZ26ddObMXv2bL71rW+xevVqVq5cyXe+850N5h9yyCEsWbJkffvClVdeybHHHrt+/rXXXrv++RWveAUATz75JAcccAAAl19++bYXtwVeSS1JpXnzijaHxsNMHR3F9G111FFHccopp/DSl76Uzs5Ouru72XvvvdfPnzRpEl/96lc57bTTWLNmDUcddRTnnHPO+vnPPPMMRx99NOvWreNrX/saABdccAGnnXYaBxxwAC9/+ct59NFHt73AzYgt7d5sL7q7u9P7QUga7he/+AWHHnpo08v39xdtDsuWFXsO8+ZBT8/Iali5ciV77LEHq1at4phjjqGvr4+XvexlI3vTbVC1LSLijszsrlrePQhJatDTM/JAGK63t5f77ruP1atXc+aZZ7YkHLaFASFJNbv66qtbXcI2sZFaklTJgJAkVTIgJEmVDAhJUiUDosFN7+tncKcu1sUEBnfq4qb3jUIvXSNUR8dhksbWkiVLOPzww8dsfRdccMFGHQJuCwOidNP7+pk5v5cpa5cygWTK2qXMnN/b0pDo74cfvqufhUu7WJMTWLi0ix++q9+QkMaJtWvXtnT9BkSpq28uu7NhL127s4quvhH00jVCt/5ZP59/rpcuitDqYimff66XW/+stQnRrns17VqXtjM1/0NavHgxM2fO5NZbb+XDH/4wRx11FC95yUu45JJLAFi4cCHHH388b3/72zniiCNYsmQJhx56KO9973uZPn06J5xwAk8//TQAjzzyCCeddBJHHnkkr3rVq7j//vtHtVYyc4d4HHnkkTkSa4nMooffDR5riRG970g8SmdlTY/S2bKarroqs6Njw5I6OorprdSudan17rvvvuYXrukf0qOPPprTp0/P+++/P2fMmJGLFi3KSy65JC+66KLMzFy9enUeeeSRuXjx4lywYEF2dHTk4sWL17924sSJuWjRoszMPO200/LKK6/MzMxXv/rV+eCDD2Zm5i233JLHH398ZmZ+/OMfz4svvnijOqq2BTCQm/hedQ+i9PjE6t64NjV9LEylugvJTU0fC3PnwpxV/TxKF2uZwKN0MWdV/4i6Qx6tuka7m2aNQzX+Q1q+fDlz5szhqquuYsaMGfzgBz/giiuuYMaMGRx99NGsWLGChx56CIBZs2Yxbdq09a+dNm0aM2bMAODII49kyZIlrFy5kp/97GecdtppzJgxg7PPPptf/vKXI66zkVdSl5b0zmOf+b0bHGZ6ig6W9M5jSotqWrXvVPZYsbR6egvqAXjl0n76eH47dbGUS+mldynAKPdPsBXq6KZZ41CN/5D23ntvDjzwQH76058yffp0MpPPfe5znHjiiRsst3DhQnbfffcNpu26667rhydOnMjTTz/NunXreMELXsCdd9454to2xT2I0uwv9rDo3D4GJ3ayjmBwYieLzu1j9hdb96W3x2fnsWaXjg2mrdmlgz0+O4KuJUfobydWt9X87cTW/lSvo5tmjUM1/kPaZZdd+Kd/+ieuuOIKrr76ak488UTmz5/Pc889B8CDDz7IU0891fT77bXXXkybNo2vf/3rQNFccNddd424zkYGRIPZX+xhypolTMh1TFmzpKXhAEBPDzt9pQ86OyECOjuL8dHuSWwrHLC2+pfUpqaPlXnzim6ZG420m2aNQzX/Q9p999359re/zWc+8xn2339/DjvsMF72spdx+OGHc/bZZ7NmzZqter/+/n6+/OUv89KXvpTp06fzzW9+c1TqXG9TjRPb22OkjdRqUmdnZcN5dna2urL8yblX5WMTO3MtkY9N7MyfnNv6Fuqrrio2TUTxbKP52NuqRurMHfqPZiO16tWuP9X7+5l9+YbXscy+fIR3mx95SfT2Fje+zyyee1tbkprR0wNLlsC6dcVzC/fYW82A0Nbp6YG+DQ970dfaw15AW57G1IYlSVvFs5i09eq4o8pIteFpTG1YkrRV3IPQjqENT2Nqw5LGreJQ+/i2LdvAgNCOoQ3bRtqwJGD8dUkyadIkVqxYMa5DIjNZsWIFkyZN2qrXeYhJO4ahQ16jfbf5EZbU+dN+uvrm8sK1y3h84lSWnDmP2S2saajhfKhtZKjhfKjeHdGUKVMYHBxk+fLlrS6lpSZNmsSUKVt32W/sKKna3d2dAwMDrS5Det7wb2ModiFa2Kjf1QV/uLSfTzCXqSxjGVP578zjZ509LFnSkpLUYhFxR2Z2V83zEJNUlzY8jemVS/u5lA17CL6UXl65dAc/zqRtYkBIdWnD05jatasUtScDQqpLG57G1K5dpag9GRBSXdrwNKborA6nTU3X+GZASHVpx6vO2zC01L48zVWqU7tddd6GpwOrfRkQ0njTbqGltuUhJklSJQNCklTJgJAkVTIgJEmVDAhJUiUDQpJUyYCQJFUyICRJlQwISS130/v6Gdypi3UxgcGdurjpfXY/3g68klpSS930vn5mzu9d3w35lLVL2Wd+LzcBs7/oFd+t5B6EpJbq6qu+R0VXn/eoaDUDQlJLvXAT96LY1HSNHQNCUks9PrH6XhSbmq6xY0BIaqklvfN4ig3vUfEUHSzp9R4VrWZASGqp2V/sYdG5fQxO7GQdweDEThad22cDdRuIzKzvzSNOAj4LTAS+lJl/M2x+J/AVYDLwa+AdmTlYzlsL3FMuuiwzT9ncurq7u3NgYGCUP4Ek7dgi4o7M7K6aV9tprhExEfgC8DpgELg9Im7IzPsaFvs0cEVmXh4RrwY+CfxJOe/pzJxRV32SpM2r8xDTLODhzFycmc8C1wBzhi1zGHBjObygYr4kqUXqDIgDgMcaxgfLaY3uAk4th98E7BkR+5bjkyJiICJuiYg3Vq0gInrLZQaWL18+mrVL0rhXZ0BExbThDR4fAo6NiEXAscC/AWvKeVPL42JvB/4uIl680Ztl9mVmd2Z2T548eRRLlyTV2dXGIHBgw/gU4PHGBTLzceDNABGxB3BqZj7ZMI/MXBwRC4GZwCM11itJalDnHsTtwEERMS0idgFOB25oXCAi9ouIoRo+SnFGExGxT0TsOrQM8EqgsXFbklSz2gIiM9cA5wHfB34BXJeZ90bEhRExdMrqccADEfEgsD8wdGXMocBARNxF0Xj9N8POfpIk1azW6yDGktdBSNLW29x1EF5JLUmqZEBIkioZEJKkSgaEJKmSASFJqmRASJIqGRCSpEoGhCSpkgEhSapkQEiSKhkQkqRKBoQkqZIBIUmqZEBIkioZEJKkSgaEJKmSASFJqmRASJIqGRCSpEoGhCSpkgEhSapkQEiSKhkQkqRKBoQkqZIBIUmqZEBIkioZEJKkSgaEJKmSASFJqmRASJIqGRCSVKG/H7q6YMKE4rm/v9UVjb2dWl2AJLWb/n7o7YVVq4rxpUuLcYCentbVNdbcg5CkYebOfT4chqxaVUwfTwwISRpm2bKtm76jMiAkaZipU7du+o7KgJCkYebNg46ODad1dBTTxxMDQpKG6emBvj7o7ISI4rmvb3w1UINnMUlSpZ6e8RcIw7kHIUmqZEBIkioZEJKkSgaEJKmSASFJqmRASJIqGRCSpEoGhCRVsb9vL5STpI3Y3zfgHoQkbcz+vgEDQpI2Zn/fgAEhSRuzv2/AgJCkjdnfN2BASNLG7O8b8CwmSapmf9/uQUiSqhkQkqRKBoQkqZIBIUmqZEBIkioZEJKkSrUGREScFBEPRMTDEfGRivmdEXFjRNwdEQsjYkrDvDMj4qHycWaddUqSNlZbQETEROALwB8BhwFnRMRhwxb7NHBFZr4EuBD4ZPna3wE+DhwNzAI+HhH71FWrJGljde5BzAIezszFmfkscA0wZ9gyhwE3lsMLGuafCPxLZv46M38D/AtwUo21SpKGqTMgDgAeaxgfLKc1ugs4tRx+E7BnROzb5GuJiN6IGIiIgeXLl49a4ZKkegMiKqblsPEPAcdGxCLgWODfgDVNvpbM7MvM7szsnjx58kjrlSQ1qLMvpkHgwIbxKcDjjQtk5uPAmwEiYg/g1Mx8MiIGgeOGvXZhjbVKkoapcw/iduCgiJgWEbsApwM3NC4QEftFxFANHwW+Ug5/HzghIvYpG6dPKKdJksZIbQGRmWuA8yi+2H8BXJeZ90bEhRFxSrnYccADEfEgsD8wr3ztr4GLKELmduDCcpokaYxE5kaH9rdL3d3dOTAw0OoyJGm7EhF3ZGZ31TyvpJYkVTIgJEmVDAhJUqUtBkREnGc3F5I0/jSzB/FfgNsj4rqy872qi9gkSTuYLQZEZn4MOAj4MnAW8FBEfCIiXlxzbZKkFmqqDSKLc2H/vXysAfYBro+IT9VYmySphbbY1UZEvB84E3gC+BLw4cx8rrwC+iHgL+stUZLUCs30xbQf8ObMXNo4MTPXRcTr6ylLktRqzRxi+i6wvpuLiNgzIo4GyMxf1FWYJKm1mgmI+cDKhvGnymmSpB1YMwER2dBhU2auo95uwiVJbaCZgFgcEe+PiJ3Lx58Bi+suTJLUWs0ExDnAH1Lc7W0QOBrorbMoSVLrbfFQUWb+B8XNfiRJ40gz10FMAt4DTAcmDU3PzHfXWJckqcWaOcR0JUV/TCcCP6K4P/R/1lmUJKn1mgmI38/MvwKeyszLgT8Gjqi3LElSqzUTEM+Vz7+NiMOBvYGu2iqSJLWFZq5n6CvvB/FXwA3AHsD/qLUqSVLLNXMW05fKwR8BL6q3HElSu2jmLKbKvYXMvHD0y5EktYtmDjE91TA8CXg9YCd9krSDa+YQ0/9qHI+IT1O0RUiSdmBN3VFumA5si5CkHV4zbRD3AEO9uU4EJgO2P0jSDq6ZNojGu8atAX6VmWtqqkeS1CY2GxDlfae/k5mHj1E9kqQ2sdk2iPLmQHdFxNQxqkeS1CaaOcT0e8C9EXEbDae8ZuYptVUlSWq5ZgLir2uvQpLUdpoJiGXALzNzNUBE7AbsX2tVkqSWa+Y6iK8D6xrG15bTJEk7sGYCYqfMfHZopBzepb6SJEntoJmAWB4R6xukI2IO8ER9JUmS2kEzbRDnAP0R8flyfBB4Z30lSZLaQTOd9T0CvDwi9gAiM70ftSSNA1s8xBQRn4iIF2Tmysz8z4jYJyL+51gUJ0lqnWbaIP4oM387NJKZvwFOrq8kSVI7aCYgJkbErkMj5XUQu25meUnSDqCZRuqrgBsj4qvl+LuAy+srSZLUDppppP5URNwNvBYI4HtAZ92FSZJaq9k7yv07xdXUpwKvwXtSS9IOb5N7EBFxMHA6cAawAriW4jTX48eoNklSC23uENP9wE+AN2TmwwAR8cExqUqS1HKbO8R0KsWhpQURcWlEvIaiDUKSNA5sMiAy8x8z823AIcBC4IPA/hExPyJOGKP6JEktssVG6sx8KjP7M/P1wBTgTuAjtVcmSWqpZs9iAiAzf52Zl2Tmq+sqSJLUHrYqICRJ44cBIUmqZEBIkioZEJKkSgaEJKmSASFJqmRASJIqGRCSpEoGhCSpUq0BEREnRcQDEfFwRGzUPUdETI2IBRGxKCLujoiTy+ldEfF0RNxZPv6+zjolSRtr5paj2yQiJgJfAF4HDAK3R8QNmXlfw2IfA67LzPkRcRjwXaCrnPdIZs6oqz5J0ubVuQcxC3g4Mxdn5rPANcCcYcsksFc5vDfweI31SJK2Qp0BcQDwWMP4YDmt0QXAOyJikGLv4fyGedPKQ08/iohX1VinJKlCnQFRdXOhHDZ+BnBZZk4BTgaujIgJwC+BqZk5E/hz4OqI2GvYa4mI3ogYiIiB5cuXj3L5kjS+1RkQg8CBDeNT2PgQ0nuA6wAy82ZgErBfZj6TmSvK6XcAjwAHD19BZvZlZndmdk+ePLmGjyBJ41edAXE7cFBETIuIXYDTgRuGLbMMeA1ARBxKERDLI2Jy2chNRLwIOAhYXGOtkqRhajuLKTPXRMR5wPeBicBXMvPeiLgQGMjMG4C/AC6NiA9SHH46KzMzIo4BLoyINcBa4JzM/HVdtUqSNhaZw5sFtk/d3d05MDDQ6jIkabsSEXdkZnfVPK+kliRVMiAkSZUMCElSJQNCklTJgJAkVTIgJEmVDAhJUiUDQpJUyYCQJFUyICRJlQwISVIlA0KSVMmAkCRVMiAkSZUMCElSJQNCklTJgJAkVTIgJEmVDAhJUiUDQpJUyYCQJFUyICRJlQwISVIlA0KSVMmAkCRVMiAkSZUMCElSJQNCklTJgJAkVTIgJEmVDAhJUiUDQpJUyYCQJFUyICRJlQwISVIlA0KSVMmAkCRVMiAkSZUMCElSJQNCklTJgJAkVTIgJEmVDAhJUiUDQpJUyYCQpO1Ifz90dcGECcVzf39969qpvreWJI2m/n7o7YVVq4rxpUuLcYCentFfn3sQkrSdmDv3+XAYsmpVMb0OBoQkbSeWLdu66SNlQEjSdmLq1K2bPlIGhCRtJ+bNg46ODad1dBTT62BASNJ2oqcH+vqgsxMiiue+vnoaqMGzmCRpu9LTU18gDOcehCSpkgEhSapkQEiSKhkQkqRKBoQkqZIBIUmqZEBIkirVGhARcVJEPBARD0fERyrmT42IBRGxKCLujoiTG+Z9tHzdAxFxYp11SpI2VtuFchExEfgC8DpgELg9Im7IzPsaFvsYcF1mzo+Iw4DvAl3l8OnAdOCFwA8j4uDMXFtXvZKkDdW5BzELeDgzF2fms8A1wJxhyySwVzm8N/B4OTwHuCYzn8nMR4GHy/eTJI2ROgPiAOCxhvHBclqjC4B3RMQgxd7D+VvxWiKiNyIGImJg+fLlo1W3JIl6AyIqpuWw8TOAyzJzCnAycGVETGjytWRmX2Z2Z2b35MmTR1ywJOl5dXbWNwgc2DA+hecPIQ15D3ASQGbeHBGTgP2afK0kqUZ17kHcDhwUEdMiYheKRucbhi2zDHgNQEQcCkwClpfLnR4Ru0bENOAg4LYaa5UkDVPbHkRmromI84DvAxOBr2TmvRFxITCQmTcAfwFcGhEfpDiEdFZmJnBvRFwH3AesAf7UM5gkaWxF8X28/evu7s6BgYFWlyFJ25WIuCMzu6vmeSW1JKmSASFJqmRASJIqGRCSpEoGhCSpkgEhSapkQEiSKhkQkqRKBoQkqZIBIUmqZEBIkioZEJKkSgaEJKmSASFJqmRASJIqGRCSpEoGhCSpkgEhSapkQEiSKhkQkqRKBoQkqZIBIUmqZEBIkioZEJKkSgaEJKmSASFJqmRASJIqGRCSpEoGhCSpkgEhSapkQEiSKhkQkrQ96e+Hri6YMKF47u+vbVU71fbOkqTR1d8Pvb2walUxvnRpMQ7Q0zPqq3MPQpK2F3PnPh8OQ1atKqbXwICQpO3FsmVbN32EDAhJ2l5Mnbp100fIgJCk7cW8edDRseG0jo5ieg0MCEnaXvT0QF8fdHZCRPHc11dLAzV4FpMkbV96emoLhOHcg5AkVTIgJEmVDAhJUiUDQpJUyYCQJFUyICRJlQwISVIlA0KSVMmAkCRVMiAkSZUMCElSJQNCklTJgJAkVYrMbHUNoyIilgNLR+nt9gOeGKX3Gi3W1Lx2rMuammNNzRutujozc3LVjB0mIEZTRAxkZner62hkTc1rx7qsqTnW1LyxqMtDTJKkSgaEJKmSAVGtr9UFVLCm5rVjXdbUHGtqXu112QYhSarkHoQkqdK4DoiIOCkiHoiIhyPiIxXzj4mIf42INRHxljap6c8j4r6IuDsiboyIzjao6ZyIuCci7oyImyLisFbX1LDcWyIiI2JMzkJpYludFRHLy211Z0T811bXVC7z1vLf1b0RcXWra4qIzzRsowcj4rdtUNPUiFgQEYvK/38nt0FNneX3wN0RsTAipoxqAZk5Lh/AROAR4EXALsBdwGHDlukCXgJcAbylTWo6Hugoh88Frm2DmvZqGD4F+F6rayqX2xP4MXAL0N0mf7+zgM/XXctW1nQQsAjYpxz/3VbXNGz584GvtLomimP+55bDhwFL2qCmrwNnlsOvBq4czRrG8x7ELODhzFycmc8C1wBzGhfIzCWZeTewro1qWpCZq8rRW4DR/cWwbTX9v4bR3YG6G7a2WFPpIuBTwOqa69nausZSMzW9F/hCZv4GIDP/ow1qanQG8LU2qCmBvcrhvYHH26Cmw4Aby+EFFfNHZDwHxAHAYw3jg+W0Vtramt4D/HOtFTVZU0T8aUQ8QvGF/P5W1xQRM4EDM/PbNdeyVXWVTi0PCVwfEQe2QU0HAwdHxE8j4paIOKkNagKKQyjANOD/tkFNFwDviIhB4LsUezatruku4NRy+E3AnhGx72gVMJ4DIiqmtfqUrqZrioh3AN3AxbVW1GRNmfmFzHwx8N+Aj7WypoiYAHwG+Iua6xiumW31LaArM18C/BC4vA1q2oniMNNxFL/WvxQRL2hxTUNOB67PzLU11gPN1XQGcFlmTgFOBq4s/621sqYPAcdGxCLgWODfgDWjVcB4DohBoPHX2xTq32XckqZqiojXAnOBUzLzmXaoqcE1wBtrrWjLNe0JHA4sjIglwMuBG8agoXqL2yozVzT8zS4Fjmx1TeUy38zM5zLzUeABisBoZU1DTqf+w0vQXE3vAa4DyMybgUkU/SG1rKbMfDwz35yZMym+E8jMJ0etgjobWdr5QfGraTHF7utQA9D0TSx7GWPTSL3FmoCZFA1XB7XLdmqsBXgDMNDqmoYtv5CxaaRuZlv9XsPwm4Bb2qCmk4DLy+H9KA5r7Nvqvx/wB8ASyuu12mA7/TNwVjl8KMWXdW21NVnTfsCEcngecOGo1lD3hm/nB8Vu4oPlF+7cctqFFL/MAY6iSPGngBXAvW1Q0w+BXwF3lo8b2qCmzwL3lvUs2NyX9VjVNGzZMQmIJrfVJ8ttdVe5rQ5pg5oC+N/AfcA9wOmtrqkcvwD4m7H4uzW5nQ4Dflr+7e4ETmiDmt4CPFQu8yVg19Fcv1dSS5Iqjec2CEnSZhgQkqRKBoQkqZIBIUmqZEBIkioZENpqEbG27GXz5xHx9YjoaFEdH2jVusv1X1z2fjoqV7NHxAsj4vomllu5ielvHElPumVvoG1x7+WIuGyselDWphkQ2hZPZ+aMzDwceBY4p9kXRsTEUazjA0DLAgI4G3hZZn54NN4si6tiR/Kl+EaKc/VH1Sj/zbQdMSA0Uj8Bfh+K/qEi4rZy7+KSoS+WiFgZERdGxK3AKyLiqIj4WUTcVS6/Z0RMLH+R3152ZHd2+drjyl+210fE/RHRH4X3Ay8EFkTEgnLZ+RExUP6q/+uhAiPi5PK1N0XE/4mIb5fTd4+Ir5TrXBQRG/WEWa7r4nJv6Z6IeFs5/QaKnmtvHZrW8Jp7IuIF5WtXRMQ7y+lXRsRrN/NZuyLi5+VwR0RcV86/NiJubfx1HxHzyu13S0TsHxF/SNHV+sXl9n9xRLw/nr93yDUVn223iLhmaB3Abg3zhv/NXlNuo3vKbbZrudySiPjb8u94W0QM/Vu4LCL+PiJ+EsX9HF5fTt/UZ4+I+HxZ73eA323qX5/qNVZXKfrYcR7AyvJ5J+CbFPelOJSiI7qdy3lfBN5ZDifw1nJ4F4ruA44qx/cq36cX+Fg5bVdggKKLgeOAJyn6oZkA3AzMLpdbAuzXUNfvlM8TKa6efglFfzmPAdPKeV8Dvl0OfwJ4Rzn8AoqrUXcf9llPBf6lfM/9gWWU3WUMbYeK7fP3wB9T9Ad1O3BpOf0hYI/NfNYu4Ofl9A8Bl5TDh1N0wNbdsD3fUA5/quG9LqOhSxiKriB2Hfp8FXX+OeV9FsptNXwdQ3+zoW14cDl+BfCBhr/B0BW+72zYtpcB3yv/ZgdR9EgwaTOf/c0N2/mFwG8Zg+5tfGz+4R6EtsVuEXEnxX/uZcCXgddQdDx3eznvNRQ3OgFYC3yjHP4D4JeZeTsU95LIzDXACcA7y9feCuzL8x3G3ZaZg5m5jqKLg65N1PXWiPhXipvfTKc43HIIsDiLTuhgw47fTgA+Uq5zIcUX2NRh7zkb+Fpmrn+GHKEAAAM2SURBVM3MXwE/ouiCZXN+AhxTPuYDR0TEAcCvM3PlFj5r43qvAcjMnwN3N8x7FhjqxvwONr097gb6o+j5t6qHz2OAq8p13D1sHcP/Zo9m5oPl+OXla4d8reH5FQ3Tr8vMdZn5EMWPgkPY9Gc/hue38+PU3723mrBTqwvQdunpzJzROCEigqLDt49WLL86n++uOaju2jmA8zPz+8Pe9zigscfatVT8u42IaRS/uo/KzN9ExGUUX/hVXSY3rvPUzHxgC8tsrR8Df0oRNnMpOuV7C0VwDL1n1WftanK9z2X5E5xNbI/SH1N88Z4C/FVETC/DuNGm+toZ/jfbnGxieGh8U5/95M3UohZxD0Kj5UbgLRHxuwAR8TtRfb/s+4EXRsRR5XJ7RsROwPeBcyNi53L6wRGx+xbW+Z8UXXtDcajqKeDJiNgf+KOG9b2o4cu3sb3g+8D5ZbgN3WRouB8DbyuPnU+m+MK9bXNFZeZjFL1sHpSZi4GbKMJrKCCa+aw3AW8t5x8GHLG5dZbWb48o7lNwYGYuAP6S4hDaHhWfradc/nCKw0xV7ge6htoXgD+h2JMa8raG55sbpp8WERMi4sUUe5MPsOnP/mPg9HI7/x7FrXXVYu5BaFRk5n0R8THgB+WX03MUv6KXDlvu2bJR93MRsRvwNPBaip4ou4B/Lb+wl7Pl+0r0Af8cEb/MzOOjuGnKvRSHM35aru/piHgf8L2IeIINv9wvAv4OuLtc5xLg9cPW8Y8Uh03uoviF+5eZ+e9NbJJbKY6nQxEMn6T40qfJz/pF4PKIuJvikNndFG0xm3MNcGkUDfinA1+OiL0pfrV/JjN/O2z5+cBXy3XcySaCLzNXR8S7gK+XYX47RTvLkF3LxuwJFDfVGfIARZDsD5xTvs+mPvs/UtxT+R6KtqDGAFKL2JurdngRsUdmriy/kL4APJSZn2l1XZsTxRlgO5dfqi+m2EM7OIt7E7eNKG7I1J2ZTwybfhlFg/UWr+tQ+3IPQuPBeyPiTIozqBYBl7S4nmZ0UJzCuzPFHsC57RYO2vG5ByFJqmQjtSSpkgEhSapkQEiSKhkQkqRKBoQkqZIBIUmq9P8Bm05uM8uhTXoAAAAASUVORK5CYII=\n",
383 |       "text/plain": [
384 |        "<Figure size 432x432 with 1 Axes>"
385 |       ]
386 |      },
387 |      "metadata": {
388 |       "needs_background": "light"
389 |      },
390 |      "output_type": "display_data"
391 |     }
392 |    ],
393 |    "source": [
394 |     "plt.figure(figsize=(6, 6))\n",
395 |     "plt.scatter(global_prune_amount, global_acc,  c=\"b\", label=\"global\")\n",
396 |     "plt.scatter(kernel_level_prune_amount, kernel_level_acc, c=\"r\", label=\"kernel\")\n",
397 |     "plt.xlabel(\"Percentage of weights dropped\")\n",
398 |     "plt.ylabel(\"Accruacy\")\n",
399 |     "plt.legend()\n",
400 |     "plt.show()"
401 |    ]
402 |   }
403 |  ],
404 |  "metadata": {
405 |   "kernelspec": {
406 |    "display_name": "Python 3",
407 |    "language": "python",
408 |    "name": "python3"
409 |   },
410 |   "language_info": {
411 |    "codemirror_mode": {
412 |     "name": "ipython",
413 |     "version": 3
414 |    },
415 |    "file_extension": ".py",
416 |    "mimetype": "text/x-python",
417 |    "name": "python",
418 |    "nbconvert_exporter": "python",
419 |    "pygments_lexer": "ipython3",
420 |    "version": "3.7.4"
421 |   }
422 |  },
423 |  "nbformat": 4,
424 |  "nbformat_minor": 2
425 | }
426 | 


--------------------------------------------------------------------------------
/additional/transformers/README.md:
--------------------------------------------------------------------------------
1 | # Transformers
2 | 
3 | Implementing the "Transformer" architecture described in the paper: [Attention Is All You Need](https://arxiv.org/abs/1706.03762).
4 | 


--------------------------------------------------------------------------------
/computer_vision/generative_models/.gitignore:
--------------------------------------------------------------------------------
1 | .ipynb_checkpoints


--------------------------------------------------------------------------------
/computer_vision/generative_models/README.md:
--------------------------------------------------------------------------------
 1 | # Generative models
 2 | 
 3 | Implementation of common deep generative models (for images). They are tested on the MNIST data set, which can be downloaded [here](https://www.kaggle.com/oddrationale/mnist-in-csv/).
 4 | 
 5 | The models are:
 6 | 
 7 | - [DCGAN](https://arxiv.org/abs/1511.06434) 
 8 | - [Wasserstein GAN](https://arxiv.org/abs/1701.07875)
 9 | - [VAE](https://arxiv.org/abs/1312.6114)
10 | - [Conditional VAE](https://papers.nips.cc/paper/5775-learning-structured-output-representation-using-deep-conditional-generative-models)


--------------------------------------------------------------------------------
/computer_vision/generative_models/data/.gitignore:
--------------------------------------------------------------------------------
1 | mnist_train.csv


--------------------------------------------------------------------------------
/computer_vision/image_classification/binary_label_classification/.gitignore:
--------------------------------------------------------------------------------
1 | .ipynb_checkpoints


--------------------------------------------------------------------------------
/computer_vision/image_classification/binary_label_classification/README.md:
--------------------------------------------------------------------------------
1 | # PneumoniaClassifying pneumonia based on X-ray images. The data can be downloaded [here](https://www.kaggle.com/paultimothymooney/chest-xray-pneumonia).


--------------------------------------------------------------------------------
/computer_vision/image_classification/binary_label_classification/data/.gitignore:
--------------------------------------------------------------------------------
1 | chest-xray-pneumonia.zip
2 | chest-xray-pneumonia/


--------------------------------------------------------------------------------
/computer_vision/image_classification/multi_label_classification/.gitignore:
--------------------------------------------------------------------------------
1 | .ipynb_checkpoints
2 | data/train-jpg.tar
3 | data/train-jpg.tar.7z
4 | data/train_v2.csv
5 | 


--------------------------------------------------------------------------------
/computer_vision/image_classification/multi_label_classification/README.md:
--------------------------------------------------------------------------------
1 | # Understanding the Amazon from SpaceClassifying satellite images of the Amazon rainforest. Note that this is a multi-class problem, i.e. there may be multiple classes within each image. The data can be downloaded [here](https://www.kaggle.com/c/planet-understanding-the-amazon-from-space).


--------------------------------------------------------------------------------
/computer_vision/image_classification/multi_label_classification/data/.gitignore:
--------------------------------------------------------------------------------
1 | models
2 | tmp
3 | train-jpg
4 | test_v2_file_mapping.csv
5 | train_v2.csv
6 | test-jpg


--------------------------------------------------------------------------------
/computer_vision/instance_segmentation/.gitignore:
--------------------------------------------------------------------------------
1 | .ipynb_checkpoints/
2 | mask_rcnn_utils/__pycache__/
3 | 


--------------------------------------------------------------------------------
/computer_vision/instance_segmentation/README.md:
--------------------------------------------------------------------------------
 1 | # Instance segmentation
 2 | 
 3 | Implementation of [Mask R-CNN](https://arxiv.org/abs/1703.06870).
 4 | 
 5 | ## Data
 6 | 
 7 | The model is trained on PASCAL VOC 2012, which can be downloaded [here](https://pjreddie.com/projects/pascal-voc-dataset-mirror/).
 8 | 
 9 | The data needs to be converted, which can be done by running (make sure you are in the root folder of this project):
10 | 
11 | ```bash
12 | python3 src/convert_annotations.py
13 | ```
14 | 


--------------------------------------------------------------------------------
/computer_vision/instance_segmentation/data/.gitignore:
--------------------------------------------------------------------------------
1 | VOCdevkit/
2 | annotations.json
3 | 


--------------------------------------------------------------------------------
/computer_vision/instance_segmentation/mask_rcnn_utils/__init__.py:
--------------------------------------------------------------------------------
1 | from . import classes


--------------------------------------------------------------------------------
/computer_vision/instance_segmentation/mask_rcnn_utils/box_functions.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | def intersect(box_a: torch.Tensor, box_b: torch.Tensor) -> float:
 5 |     max_xy = torch.min(box_a[:, None, 2:], box_b[None, :, 2:])
 6 |     min_xy = torch.max(box_a[:, None, :2], box_b[None, :, :2])
 7 |     inter = torch.clamp((max_xy - min_xy), min=0)
 8 |     return inter[:, :, 0] * inter[:, :, 1]
 9 | 
10 | 
11 | def box_area(box: torch.Tensor) -> float:
12 |     return (box[:, 2] - box[:, 0]) * (box[:, 3] - box[:, 1])
13 | 
14 | 
15 | def jaccard(box_a: torch.Tensor, box_b: torch.Tensor) -> float:
16 |     intersection = intersect(box_a, box_b)
17 |     union = box_area(box_a).unsqueeze(1) + box_area(box_b).unsqueeze(0) - intersection
18 |     return intersection / union
19 | 
20 | 
21 | def loc2bbox(anchors, locs):
22 |     # Converting anchors and predicted location deltas to "actual" bounding-boxes.
23 | 
24 |     if anchors.shape[0] == 0:
25 |         return torch.zeros((0, 4), dtype=locs.dtype)
26 | 
27 |     w = anchors[:, 2] - anchors[:, 0]
28 |     h = anchors[:, 3] - anchors[:, 1]
29 |     ctr_x = anchors[:, 0] + 0.5 * w
30 |     ctr_y = anchors[:, 1] + 0.5 * h
31 | 
32 |     dx = locs[:, 0::4]
33 |     dy = locs[:, 1::4]
34 |     dw = locs[:, 2::4]
35 |     dh = locs[:, 3::4]
36 | 
37 |     ctr_x = dx * w[:, None] + ctr_x[:, None]
38 |     ctr_y = dy * h[:, None] + ctr_y[:, None]
39 |     w = torch.exp(dw) * w[:, None]
40 |     h = torch.exp(dh) * h[:, None]
41 | 
42 |     bbox = torch.zeros_like(locs)
43 |     bbox[:, 0::4] = ctr_x - 0.5 * w
44 |     bbox[:, 1::4] = ctr_y - 0.5 * h
45 |     bbox[:, 2::4] = ctr_x + 0.5 * w
46 |     bbox[:, 3::4] = ctr_y + 0.5 * h
47 |     return bbox
48 | 
49 | 
50 | def bbox2loc(src_bbox, dst_bbox, eps=1e-6, device="cuda"):
51 |     eps = torch.tensor(eps).float().to(device)
52 | 
53 |     width = src_bbox[:, 2] - src_bbox[:, 0]
54 |     height = src_bbox[:, 3] - src_bbox[:, 1]
55 |     ctr_x = src_bbox[:, 0] + 0.5 * width
56 |     ctr_y = src_bbox[:, 1] + 0.5 * height
57 | 
58 |     base_width = dst_bbox[:, 2] - dst_bbox[:, 0]
59 |     base_height = dst_bbox[:, 3] - dst_bbox[:, 1]
60 |     base_ctr_x = dst_bbox[:, 0] + 0.5 * base_width
61 |     base_ctr_y = dst_bbox[:, 1] + 0.5 * base_height
62 | 
63 |     height = torch.max(height, eps)
64 |     width = torch.max(width, eps)
65 | 
66 |     dy = (base_ctr_y - ctr_y) / height
67 |     dx = (base_ctr_x - ctr_x) / width
68 |     dh = torch.log(base_height / height)
69 |     dw = torch.log(base_width / width)
70 | 
71 |     locs = torch.stack((dx, dy, dw, dh), dim=1)
72 |     return locs
73 | 


--------------------------------------------------------------------------------
/computer_vision/instance_segmentation/mask_rcnn_utils/classes.py:
--------------------------------------------------------------------------------
 1 | classes = (
 2 |     '__background__',
 3 |     'aeroplane',
 4 |     'bicycle',
 5 |     'bird',
 6 |     'boat',
 7 |     'bottle',
 8 |     'bus',
 9 |     'car',
10 |     'cat',
11 |     'chair',
12 |     'cow',
13 |     'diningtable',
14 |     'dog',
15 |     'horse',
16 |     'motorbike',
17 |     'person',
18 |     'pottedplant',
19 |     'sheep',
20 |     'sofa',
21 |     'train',
22 |     'tvmonitor'
23 | )
24 | 
25 | classes_idx = {}
26 | for idx, c in enumerate(classes):
27 |     classes_idx[c] = idx
28 | 


--------------------------------------------------------------------------------
/computer_vision/instance_segmentation/mask_rcnn_utils/connect_bboxes_segmask.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import cv2
  3 | import numpy as np
  4 | 
  5 | 
  6 | def calc_iou(box_a, box_b):
  7 |     x_a = max(box_a[0], box_b[0])
  8 |     y_a = max(box_a[1], box_b[1])
  9 |     x_b = min(box_a[2], box_b[2])
 10 |     y_b = min(box_a[3], box_b[3])
 11 |     inter_area = max(0, x_b - x_a) * max(0, y_b - y_a)
 12 | 
 13 |     box_a_area = (box_a[2] - box_a[0]) * (box_a[3] - box_a[1])
 14 |     box_b_area = (box_b[2] - box_b[0]) * (box_b[3] - box_b[1])
 15 | 
 16 |     iou = inter_area / float(box_a_area + box_b_area - inter_area)
 17 |     return iou
 18 | 
 19 | 
 20 | def get_max_box_idx(seg_bbox, bboxes):
 21 |     max_iou = 0.0
 22 |     max_idx = -1
 23 | 
 24 |     for i in range(len(bboxes)):
 25 |         iou = calc_iou(seg_bbox, bboxes[i])
 26 | 
 27 |         if max_iou < iou:
 28 |             max_iou = iou
 29 |             max_idx = i
 30 | 
 31 |     return max_idx, max_iou
 32 | 
 33 | 
 34 | def get_bbox_from_mask(object_mask):
 35 |     # Since there might be disconnects between pixels of the same objects,
 36 |     # we merge the location of min/max of all boxes to get the largest
 37 | 
 38 |     cnts,_ = cv2.findContours(object_mask, cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE)
 39 | 
 40 |     boxes = []
 41 |     for c in cnts:
 42 |         (x, y, w, h) = cv2.boundingRect(c)
 43 |         boxes.append([x,y, x+w,y+h])
 44 | 
 45 |     boxes = np.asarray(boxes)
 46 |     left, top = np.min(boxes, axis=0)[:2]
 47 |     right, bottom = np.max(boxes, axis=0)[2:]
 48 | 
 49 |     seg_bbox = [left, top, right, bottom]
 50 |     return seg_bbox
 51 | 
 52 | 
 53 | def get_boxes_from_segmentation(seg_img):
 54 |     assert len(seg_img.shape) == 2, seg_img.shape
 55 |     
 56 |     seg_bboxes = []
 57 |     for cat in np.unique(seg_img):
 58 |         # id 0 is background and id 220 seems to be boundries id
 59 |         if cat == 220 or cat == 0 :
 60 |             continue
 61 | 
 62 |         mask = (seg_img == cat).astype(np.uint8)
 63 |         object_mask = cv2.bitwise_and(seg_img, seg_img, mask=mask)
 64 | 
 65 |         seg_bbox = get_bbox_from_mask(object_mask)
 66 |         seg_bboxes.append({"bbox": seg_bbox, "id": cat})
 67 |     return seg_bboxes
 68 | 
 69 | 
 70 | def connect_bboxes_segmask_func(bboxes_dict, seg_img):
 71 |     bboxes = [d["bbox"] for d in bboxes_dict]
 72 |     seg_bboxes = get_boxes_from_segmentation(seg_img)
 73 | 
 74 |     if len(bboxes) != len(seg_bboxes):
 75 |         # Miss-match between boxes number of seg-boxes and bboxes
 76 |         return {}, False
 77 | 
 78 |     found_idx = set()
 79 |     for i in range(len(seg_bboxes)):
 80 |         max_idx, max_iou = get_max_box_idx(seg_bboxes[i]["bbox"], bboxes)
 81 | 
 82 |         if max_iou < 0.5:
 83 |             # The max iou between seg-box and all bboxes is low
 84 |             return {}, False
 85 | 
 86 |         if max_idx in found_idx:
 87 |             # Already matched the idx with a box
 88 |             return {}, False
 89 |         found_idx.add(max_idx)
 90 | 
 91 |         bboxes_dict[max_idx]["seg_mask_id"] = int(seg_bboxes[i]["id"])
 92 | 
 93 |     return bboxes_dict, True
 94 | 
 95 | 
 96 | def add_segmask_ids(annotation_dicts, path):
 97 |     # Since VOC is not really made for instance-seg there will be a lot of miss-matches
 98 |     # between segmentations and bboxes.
 99 |     # Therefore we will filter away any example which seems to have incorrect seg-ids and bboxes
100 | 
101 |     new_annos = []
102 |     num_discard = 0
103 |     for i in range(len(annotation_dicts)):
104 |         seg_file = os.path.join(path, "SegmentationObject", annotation_dicts[i]["seg_file_name"])
105 |         seg_img = cv2.imread(seg_file, cv2.IMREAD_GRAYSCALE)
106 |         anno, keep = connect_bboxes_segmask_func(annotation_dicts[i]["annotations"], seg_img)
107 | 
108 |         if keep:
109 |             annotation_dicts[i]["annotations"] = anno
110 |             new_annos.append(annotation_dicts[i])
111 |         else:
112 |             num_discard += 1
113 | 
114 |     discard_rate = num_discard / len(annotation_dicts)
115 |     print(f"Discard-rate: {discard_rate}")
116 | 
117 |     return new_annos
118 | 


--------------------------------------------------------------------------------
/computer_vision/instance_segmentation/mask_rcnn_utils/convert_annotations.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | import glob
 4 | import xmltodict
 5 | 
 6 | from classes import classes_idx
 7 | from connect_bboxes_segmask import add_segmask_ids
 8 | 
 9 | 
10 | def convert_bboxes_annotations(path):
11 |     anno_files = glob.glob(os.path.join(path, "Annotations", "*.xml"))
12 | 
13 |     annos = []
14 |     for anno_file in anno_files:
15 |         with open(anno_file, "r") as fp:
16 |             txt = fp.read()
17 |             anno = xmltodict.parse(txt)
18 | 
19 |         segmented = bool(int(anno["annotation"]["segmented"]))
20 | 
21 |         if not segmented:
22 |             continue
23 | 
24 |         id_ = anno["annotation"]["filename"].split(".")[0]
25 | 
26 |         new_anno = {
27 |             "file_name": anno["annotation"]["filename"],
28 |             "seg_file_name": id_ + ".png",
29 |             "id": id_,
30 |             "height": anno["annotation"]["size"]["height"],
31 |             "width": anno["annotation"]["size"]["width"],
32 |             "annotations": []
33 |         }
34 | 
35 |         try:
36 |             for obj in anno["annotation"]["object"]:
37 |                 # Boxes are xmin, ymin, xmax, ymax
38 | 
39 |                 d = {
40 |                     "class_name": obj["name"],
41 |                     "class_idx": classes_idx[obj["name"]],
42 |                     "bbox": [int(obj["bndbox"]["xmin"]),
43 |                              int(obj["bndbox"]["ymin"]),
44 |                              int(obj["bndbox"]["xmax"]),
45 |                              int(obj["bndbox"]["ymax"]),
46 |                             ]
47 |                 }
48 | 
49 |                 new_anno["annotations"].append(d)
50 |             annos.append(new_anno)
51 |         except Exception as e:
52 |             pass
53 |     return annos
54 | 
55 | 
56 | if __name__ == "__main__":
57 |     PATH = "data/VOCdevkit/VOC2012"
58 |     annos = convert_bboxes_annotations(PATH)
59 |     annos = add_segmask_ids(annos, PATH)
60 | 
61 |     with open("data/annotations.json", "w") as fp:
62 |         json.dump(annos, fp)
63 | 


--------------------------------------------------------------------------------
/computer_vision/instance_segmentation/mask_rcnn_utils/create_labels_funcs.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import numpy as np
  3 | from .utils import random_choice
  4 | from .box_functions import jaccard, bbox2loc
  5 | 
  6 | 
  7 | def sample_labels(labels, n_sample=256, pos_ratio=0.5):
  8 |     n_pos = pos_ratio * n_sample
  9 | 
 10 |     pos_index = torch.where(labels == 1)[0]
 11 |     if len(pos_index) > n_pos:
 12 |         disable_index = random_choice(pos_index, size=int(len(pos_index) - n_pos))
 13 |         labels[disable_index] = -1
 14 | 
 15 |     n_neg = n_sample - torch.sum(labels == 1)
 16 |     neg_index = torch.where(labels == 0)[0]
 17 |     if len(neg_index) > n_neg:
 18 |         disable_index = random_choice(neg_index, size=int(len(neg_index) - n_neg))
 19 |         labels[disable_index] = -1
 20 | 
 21 |     return labels
 22 | 
 23 | 
 24 | def create_anchor_labels(anchors, gt, img_size, pos_iou_threshold=0.7, neg_iou_threshold=0.3,
 25 |                          n_sample=256, pos_ratio=0.5, device="cuda"):
 26 |     index_inside = torch.where((anchors[:, 0] >= 0) &
 27 |                                (anchors[:, 1] >= 0) &
 28 |                                (anchors[:, 2] <= img_size) &
 29 |                                (anchors[:, 3] <= img_size))[0]
 30 | 
 31 |     labels = -1 * torch.ones((len(index_inside), )).int()
 32 |     valid_anchor_boxes = anchors[index_inside]
 33 | 
 34 |     ious = jaccard(valid_anchor_boxes, gt)
 35 |     
 36 |     argmax_ious = ious.argmax(dim=1)
 37 |     max_ious = ious[torch.arange(len(index_inside)), argmax_ious]
 38 |     
 39 |     gt_argmax_ious = ious.argmax(dim=0)
 40 |     
 41 |     gt_max_ious = ious[gt_argmax_ious, torch.arange(ious.shape[1])]
 42 |     gt_argmax_ious = torch.where(ious == gt_max_ious)[0]
 43 |     
 44 |     labels[max_ious < neg_iou_threshold] = 0
 45 |     labels[gt_argmax_ious] = 1
 46 |     labels[max_ious >= pos_iou_threshold] = 1    
 47 |     
 48 |     labels = sample_labels(labels, n_sample, pos_ratio)
 49 |     
 50 |     locs = bbox2loc(valid_anchor_boxes, gt[argmax_ious])
 51 | 
 52 |     anchor_labels = -1 * torch.ones((len(anchors),)).int()
 53 |     anchor_labels[index_inside] = labels
 54 |     anchor_labels = anchor_labels.long().to(device)
 55 | 
 56 |     anchor_locations = torch.zeros_like(anchors)
 57 |     anchor_locations[index_inside, :] = locs
 58 |     anchor_locations = anchor_locations.to(device)
 59 |     
 60 |     return anchor_labels, anchor_locations
 61 | 
 62 | 
 63 | def create_box_target_labels(rois, gt_boxes, label, device="cuda"):
 64 |     n_sample = 128
 65 |     pos_ratio = 0.25
 66 |     pos_iou_thresh = 0.5
 67 |     neg_iou_thresh_hi = 0.5
 68 |     neg_iou_thresh_lo = 0.0
 69 |     loc_normalize_mean = torch.tensor([0.0, 0.0, 0.0, 0.0]).view((1, 4)).float().to(device)
 70 |     loc_normalize_std = torch.tensor([1.0, 1.0, 1.0, 1.0]).view((1, 4)).float().to(device)
 71 | 
 72 |     # Rois comes from the network, we need to disable the grad tracing,
 73 |     # since we do some ops which are not differentiable
 74 |     with torch.no_grad():          
 75 |         pos_roi_per_image = np.round(n_sample * pos_ratio)
 76 |         iou = jaccard(rois, gt_boxes)
 77 | 
 78 |         gt_assignment = iou.argmax(dim=1)
 79 |         max_iou = iou.max(axis=1)[0]
 80 |         
 81 |         gt_roi_label = label[gt_assignment].long()
 82 | 
 83 |         pos_index = torch.where(max_iou >= pos_iou_thresh)[0]
 84 |         pos_roi_per_this_image = int(min(pos_roi_per_image, len(pos_index)))
 85 | 
 86 |         if len(pos_index) > 0:
 87 |             pos_index = random_choice(pos_index, pos_roi_per_this_image)
 88 | 
 89 |         neg_index = torch.where((max_iou < neg_iou_thresh_hi) & (max_iou >= neg_iou_thresh_lo))[0]
 90 |         neg_roi_per_this_image = n_sample - pos_roi_per_this_image
 91 |         neg_roi_per_this_image = int(min(neg_roi_per_this_image, len(neg_index)))
 92 | 
 93 |         if len(neg_index) > 0:
 94 |             neg_index = random_choice(neg_index, neg_roi_per_this_image)
 95 | 
 96 |         keep_index = torch.cat([pos_index, neg_index], dim=0)
 97 | 
 98 |         gt_roi_label = gt_roi_label[keep_index]
 99 |         gt_roi_label[pos_roi_per_this_image:] = 0  # negative labels becomes background
100 |         sample_roi = rois[keep_index]
101 | 
102 |         gt_roi_loc = bbox2loc(sample_roi, gt_boxes[gt_assignment[keep_index]])
103 |         gt_roi_loc = (gt_roi_loc - loc_normalize_mean) / loc_normalize_std
104 |     return sample_roi, gt_roi_loc, gt_roi_label
105 | 


--------------------------------------------------------------------------------
/computer_vision/instance_segmentation/mask_rcnn_utils/display.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import random
 3 | import numpy as np
 4 | import matplotlib.pyplot as plt
 5 | 
 6 | 
 7 | def plot_boxes(img, boxes, names, color=(0, 255, 0)):
 8 |     plot_img = np.copy(img)
 9 |     
10 |     box_tickness = 2
11 |     text_tickness = 1
12 | 
13 |     assert len(boxes) == len(names)
14 | 
15 |     for i in range(len(boxes)):
16 |         box = boxes[i]
17 |         name = names[i]
18 | 
19 |         min_ = (box[0], box[1])
20 |         max_ = (box[2], box[3])
21 | 
22 |         cv2.rectangle(plot_img, min_, max_ , color, box_tickness)
23 | 
24 |         center_x = int((box[0] + box[2]) / 2)
25 |         center_y = int((box[1] + box[3]) / 2)
26 |         cv2.putText(plot_img, name, (center_x, center_y), cv2.FONT_HERSHEY_SIMPLEX,
27 |                     0.5, color, text_tickness)
28 |     
29 |     return plot_img
30 | 
31 | 
32 | def plot_img_seg(img, seg_img, mask_img, alpha=0.5):
33 |     # mask_img specifying where the segmentation is
34 |     seg_img_part = mask_img * cv2.addWeighted(img, alpha, seg_img, (1.0 - alpha), 0.0)
35 |     orig_img_part = (1 - mask_img) * img
36 |     return orig_img_part + seg_img_part
37 | 
38 | 
39 | def plot_boxes_instance_masks(plot_img, seg_img, bboxes, names, seg_ids, alpha=0.5):
40 |     for i in range(len(bboxes)):
41 |         range_ = (0, 255)
42 |         color = (random.randint(*range_),
43 |                  random.randint(*range_),
44 |                  random.randint(*range_)
45 |                 )
46 | 
47 |         plot_img = plot_boxes(plot_img, [bboxes[i]], [names[i]], color=color)
48 | 
49 |         id_ = seg_ids[i]
50 |         mask_img = (seg_img == id_).astype(np.uint8)
51 | 
52 |         seg_img_id = mask_img * np.array(color, dtype=np.uint8).reshape((1, 1, 3))
53 |         plot_img = plot_img_seg(plot_img, seg_img_id, mask_img)
54 | 
55 |     return plot_img
56 | 
57 | 
58 | def plot_image(img):
59 |     plt.imshow(img)
60 |     plt.show()
61 | 


--------------------------------------------------------------------------------
/computer_vision/instance_segmentation/mask_rcnn_utils/generate_anchor_boxes.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import numpy as np
 3 | 
 4 | 
 5 | def generate_anchor_base(base_size=16, ratios=[0.5, 1, 2], anchor_scales=[8, 16, 32]):
 6 |     px = base_size / 2.
 7 |     py = base_size / 2.
 8 | 
 9 |     anchor_base = np.zeros((len(ratios) * len(anchor_scales), 4),
10 |                            dtype=np.float32)
11 | 
12 |     for i in range(len(ratios)):
13 |         for j in range(len(anchor_scales)):
14 |             h = base_size * anchor_scales[j] * np.sqrt(ratios[i])
15 |             w = base_size * anchor_scales[j] * np.sqrt(1. / ratios[i])
16 | 
17 |             index = i * len(anchor_scales) + j
18 |             anchor_base[index, 0] = px - w / 2.
19 |             anchor_base[index, 1] = py - h / 2.
20 |             anchor_base[index, 2] = px + w / 2.
21 |             anchor_base[index, 3] = py + h / 2.
22 |     return anchor_base
23 | 
24 | 
25 | def generator_anchors(img_size, sub_sample=16, ratios=[0.7, 1, 1.3], anchor_scales=[4, 8, 16], device="cuda"):
26 |     feat_stride = sub_sample
27 | 
28 |     anchor_base = generate_anchor_base(
29 |         base_size=sub_sample, ratios=ratios, anchor_scales=anchor_scales)
30 | 
31 |     shift_y = np.arange(0, img_size * feat_stride, feat_stride)
32 |     shift_x = np.arange(0, img_size * feat_stride, feat_stride)
33 | 
34 |     shift_x, shift_y = np.meshgrid(shift_x, shift_y)
35 | 
36 |     shift = np.stack((shift_x.ravel(),  shift_y.ravel(),
37 |                       shift_x.ravel(), shift_y.ravel()), axis=1)
38 | 
39 |     A = anchor_base.shape[0]
40 |     K = shift.shape[0]
41 |     anchor = anchor_base.reshape((1, A, 4)) + \
42 |         shift.reshape((1, K, 4)).transpose((1, 0, 2))
43 |     anchor = anchor.reshape((K * A, 4)).astype(np.float32)
44 |     anchor = torch.from_numpy(anchor).float().to(device)
45 |     return anchor
46 | 


--------------------------------------------------------------------------------
/computer_vision/instance_segmentation/mask_rcnn_utils/losses.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn.functional as F
 3 | 
 4 | 
 5 | def filter_smooth_l1(target, pred, labels, ignore_idx=-1):
 6 |     filter_target = target[labels != ignore_idx, :]
 7 |     filter_pred = pred[labels != ignore_idx, :]
 8 |     loss = F.smooth_l1_loss(filter_target, filter_pred, reduction="none")
 9 |     return torch.sum(loss) / len(filter_target)
10 | 


--------------------------------------------------------------------------------
/computer_vision/instance_segmentation/mask_rcnn_utils/utils.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | def random_choice(arr, size, device="cuda"):
 5 |     idx = torch.randperm(len(arr), device=device)
 6 |     return arr[idx][0:size]
 7 | 
 8 | 
 9 | def normal_init(module, mean, stddev):
10 |     module.weight.data.normal_(mean, stddev)
11 |     module.bias.data.zero_()
12 | 


--------------------------------------------------------------------------------
/computer_vision/models/.gitignore:
--------------------------------------------------------------------------------
1 | .ipynb_checkpoints


--------------------------------------------------------------------------------
/computer_vision/models/README.md:
--------------------------------------------------------------------------------
 1 | # CNN models
 2 | 
 3 | Implementation of common state of art CNN models. They are tested on the MNIST data set, which can be downloaded [here](https://www.kaggle.com/c/digit-recognizer).
 4 | 
 5 | The models are:
 6 | 
 7 | - [ResNet](https://arxiv.org/abs/1512.03385) 
 8 | - [Inception V1](https://arxiv.org/abs/1409.4842)
 9 | - [MobileNet V1](https://arxiv.org/abs/1704.04861)
10 | - [DenseNet](https://arxiv.org/abs/1608.06993)


--------------------------------------------------------------------------------
/computer_vision/models/data/.gitignore:
--------------------------------------------------------------------------------
1 | train.csv


--------------------------------------------------------------------------------
/computer_vision/models/data/DSConvolution.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MrParosk/ml_playground/1202dd9b4341fc8ad5fbadade45ac93e76217303/computer_vision/models/data/DSConvolution.png


--------------------------------------------------------------------------------
/computer_vision/models/data/DenseNet.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MrParosk/ml_playground/1202dd9b4341fc8ad5fbadade45ac93e76217303/computer_vision/models/data/DenseNet.png


--------------------------------------------------------------------------------
/computer_vision/models/data/Inception.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MrParosk/ml_playground/1202dd9b4341fc8ad5fbadade45ac93e76217303/computer_vision/models/data/Inception.png


--------------------------------------------------------------------------------
/computer_vision/models/data/ResNet.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MrParosk/ml_playground/1202dd9b4341fc8ad5fbadade45ac93e76217303/computer_vision/models/data/ResNet.png


--------------------------------------------------------------------------------
/computer_vision/models/dense_net.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import numpy as np\n",
 10 |     "import pandas as pd\n",
 11 |     "import torch\n",
 12 |     "import torch.nn as nn\n",
 13 |     "import torch.nn.functional as F\n",
 14 |     "from torch.utils.data import Dataset, DataLoader\n",
 15 |     "import torch.optim as optim\n",
 16 |     "from torch.optim.lr_scheduler import StepLR\n",
 17 |     "from sklearn.model_selection import train_test_split"
 18 |    ]
 19 |   },
 20 |   {
 21 |    "cell_type": "code",
 22 |    "execution_count": 2,
 23 |    "metadata": {},
 24 |    "outputs": [],
 25 |    "source": [
 26 |     "PATH = \"data/\"\n",
 27 |     "device = \"cuda\"\n",
 28 |     "seed = 42\n",
 29 |     "file_name = PATH + \"train.csv\""
 30 |    ]
 31 |   },
 32 |   {
 33 |    "cell_type": "code",
 34 |    "execution_count": 3,
 35 |    "metadata": {},
 36 |    "outputs": [],
 37 |    "source": [
 38 |     "class MNIST(Dataset):\n",
 39 |     "    def __init__(self, X, y):\n",
 40 |     "        self.y = torch.from_numpy(y).long().to(device)\n",
 41 |     "        X = X.reshape(X.shape[0], 1, 28, 28)\n",
 42 |     "        X = X / 255.0\n",
 43 |     "        self.X = torch.from_numpy(X).float().to(device)\n",
 44 |     "    \n",
 45 |     "    def __len__(self):\n",
 46 |     "        return self.X.shape[0]\n",
 47 |     "    \n",
 48 |     "    def __getitem__(self, idx):\n",
 49 |     "        return (self.X[idx,:,:,:], self.y[idx])"
 50 |    ]
 51 |   },
 52 |   {
 53 |    "cell_type": "code",
 54 |    "execution_count": 4,
 55 |    "metadata": {},
 56 |    "outputs": [],
 57 |    "source": [
 58 |     "df = pd.read_csv(file_name)\n",
 59 |     "y = df[\"label\"].values\n",
 60 |     "X = df.drop(\"label\", axis=1).values\n",
 61 |     "\n",
 62 |     "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=seed)"
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "code",
 67 |    "execution_count": 5,
 68 |    "metadata": {},
 69 |    "outputs": [],
 70 |    "source": [
 71 |     "batch_size = 256\n",
 72 |     "train_dataset = MNIST(X_train, y_train)\n",
 73 |     "train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=0)\n",
 74 |     "\n",
 75 |     "test_dataset = MNIST(X_test, y_test)\n",
 76 |     "test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=0)"
 77 |    ]
 78 |   },
 79 |   {
 80 |    "cell_type": "markdown",
 81 |    "metadata": {},
 82 |    "source": [
 83 |     "# DenseNet"
 84 |    ]
 85 |   },
 86 |   {
 87 |    "cell_type": "markdown",
 88 |    "metadata": {},
 89 |    "source": [
 90 |     "![title](data/DenseNet.png)"
 91 |    ]
 92 |   },
 93 |   {
 94 |    "cell_type": "code",
 95 |    "execution_count": 6,
 96 |    "metadata": {},
 97 |    "outputs": [],
 98 |    "source": [
 99 |     "class DenseLayer(nn.Module):\n",
100 |     "    def __init__(self, in_channels, between_channels, growth_rate):\n",
101 |     "        super().__init__()\n",
102 |     "\n",
103 |     "        layers = []\n",
104 |     "        layers.append(nn.BatchNorm2d(in_channels))\n",
105 |     "        layers.append(nn.ReLU(inplace=True))\n",
106 |     "        layers.append(nn.Conv2d(in_channels, between_channels, kernel_size=1,\n",
107 |     "                                stride=1, bias=False))\n",
108 |     "        layers.append(nn.BatchNorm2d(between_channels))\n",
109 |     "        layers.append(nn.ReLU(inplace=True))\n",
110 |     "        layers.append(nn.Conv2d(between_channels, growth_rate, kernel_size=3,\n",
111 |     "                                stride=1, padding=1, bias=False))\n",
112 |     "\n",
113 |     "        self.layers = nn.Sequential(*layers)\n",
114 |     "\n",
115 |     "    def forward(self, x):\n",
116 |     "        out = self.layers(x)\n",
117 |     "        return torch.cat([x, out], dim=1)\n",
118 |     "\n",
119 |     "class DenseBlock(nn.Module):\n",
120 |     "    def __init__(self, in_channels, between_channels, growth_rate, num_layers):\n",
121 |     "        super().__init__()\n",
122 |     "\n",
123 |     "        block = []\n",
124 |     "        for i in range(num_layers):\n",
125 |     "            block.append(DenseLayer(in_channels + i * growth_rate, between_channels, growth_rate))\n",
126 |     "        self.block = nn.Sequential(*block)\n",
127 |     "\n",
128 |     "    def forward(self, x):\n",
129 |     "        return self.block(x)\n",
130 |     "\n",
131 |     "class Transition(nn.Module):\n",
132 |     "    def __init__(self, in_channels, out_channels):\n",
133 |     "        super().__init__()\n",
134 |     "\n",
135 |     "        self.bn = nn.BatchNorm2d(in_channels)\n",
136 |     "        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=1,\n",
137 |     "                              stride=1, bias=False)\n",
138 |     "        self.pool = nn.AvgPool2d(kernel_size=3, stride=2)\n",
139 |     "\n",
140 |     "    def forward(self, x):\n",
141 |     "        x = self.bn(x)\n",
142 |     "        x = self.conv(x)\n",
143 |     "        x = self.pool(x)\n",
144 |     "        return x\n",
145 |     "\n",
146 |     "class DenseNet(nn.Module):\n",
147 |     "    def __init__(self, growth_rate=4, init_channels=32, num_layers=3):\n",
148 |     "        super().__init__()\n",
149 |     "\n",
150 |     "        block_output = [init_channels + i * num_layers * growth_rate for i in range(1, 4)]\n",
151 |     "\n",
152 |     "        self.conv_1 = nn.Conv2d(1, init_channels, kernel_size=5,\n",
153 |     "                                stride=1, bias=False)\n",
154 |     "\n",
155 |     "        self.block_1 = DenseBlock(init_channels, 4 * growth_rate, growth_rate, num_layers)\n",
156 |     "        self.transition_1 = Transition(block_output[0], block_output[0])\n",
157 |     "\n",
158 |     "        self.block_2 = DenseBlock(block_output[0], 4 * growth_rate, growth_rate, num_layers)\n",
159 |     "        self.transition_2 = Transition(block_output[1], block_output[1])\n",
160 |     "\n",
161 |     "        self.block_3 = DenseBlock(block_output[1], 4 * growth_rate, growth_rate, num_layers)\n",
162 |     "\n",
163 |     "        self.out = nn.Linear(block_output[-1], 10)\n",
164 |     "\n",
165 |     "    def forward(self, x):\n",
166 |     "        x = self.conv_1(x)\n",
167 |     "\n",
168 |     "        x = self.block_1(x)\n",
169 |     "        x = self.transition_1(x)\n",
170 |     "\n",
171 |     "        x = self.block_2(x)\n",
172 |     "        x = self.transition_2(x)\n",
173 |     "\n",
174 |     "        x = self.block_3(x)\n",
175 |     "\n",
176 |     "        x = F.adaptive_avg_pool2d(x, 1)\n",
177 |     "        x = x.view(len(x), -1)\n",
178 |     "\n",
179 |     "        x = self.out(x)\n",
180 |     "        return F.log_softmax(x, dim=1)"
181 |    ]
182 |   },
183 |   {
184 |    "cell_type": "code",
185 |    "execution_count": 7,
186 |    "metadata": {},
187 |    "outputs": [],
188 |    "source": [
189 |     "n_epochs = 50\n",
190 |     "lr = 1e-3\n",
191 |     "weight_decay = 1e-5\n",
192 |     "\n",
193 |     "torch.manual_seed(seed)\n",
194 |     "model = DenseNet().to(device)\n",
195 |     "\n",
196 |     "optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)\n",
197 |     "lr_sceduler = StepLR(optimizer, gamma=0.5, step_size=10)"
198 |    ]
199 |   },
200 |   {
201 |    "cell_type": "code",
202 |    "execution_count": 8,
203 |    "metadata": {
204 |     "scrolled": false
205 |    },
206 |    "outputs": [
207 |     {
208 |      "name": "stdout",
209 |      "output_type": "stream",
210 |      "text": [
211 |       "Epoch 0: train loss 0.005685, test loss 0.003544, test accuracy 0.7025\n",
212 |       "Epoch 5: train loss 0.00033, test loss 0.000652, test accuracy 0.9545\n",
213 |       "Epoch 10: train loss 0.000155, test loss 0.000266, test accuracy 0.9804\n",
214 |       "Epoch 15: train loss 0.000114, test loss 0.000259, test accuracy 0.9818\n",
215 |       "Epoch 20: train loss 6.4e-05, test loss 0.000199, test accuracy 0.9854\n",
216 |       "Epoch 25: train loss 4.8e-05, test loss 0.000186, test accuracy 0.9871\n",
217 |       "Epoch 30: train loss 3e-05, test loss 0.000197, test accuracy 0.9869\n",
218 |       "Epoch 35: train loss 2.4e-05, test loss 0.000197, test accuracy 0.987\n",
219 |       "Epoch 40: train loss 1.9e-05, test loss 0.000204, test accuracy 0.9866\n",
220 |       "Epoch 45: train loss 1.5e-05, test loss 0.000201, test accuracy 0.987\n"
221 |      ]
222 |     }
223 |    ],
224 |    "source": [
225 |     "torch.manual_seed(seed)\n",
226 |     "for epoch in range(0, n_epochs):\n",
227 |     "    lr_sceduler.step()\n",
228 |     "    model.train()\n",
229 |     "    train_loss = 0\n",
230 |     "    for _, (x, y) in enumerate(train_loader):\n",
231 |     "            model.zero_grad()\n",
232 |     "            output = model(x)\n",
233 |     "            batch_loss = F.nll_loss(output, y)\n",
234 |     "\n",
235 |     "            batch_loss.backward()\n",
236 |     "            optimizer.step()\n",
237 |     "            train_loss += batch_loss.cpu().detach().numpy() / x.shape[0]\n",
238 |     "    train_loss = np.round(train_loss / len(train_loader), 6)\n",
239 |     "    \n",
240 |     "    model.eval()\n",
241 |     "    test_loss = 0\n",
242 |     "    test_acc = 0\n",
243 |     "    for _, (x, y) in enumerate(test_loader):\n",
244 |     "        output = model(x)\n",
245 |     "        batch_loss = F.nll_loss(output, y)\n",
246 |     "        test_loss += batch_loss.cpu().detach().numpy() / x.shape[0]\n",
247 |     "        \n",
248 |     "        y_hat = torch.argmax(output, dim=1)\n",
249 |     "        acc = (y_hat == y).sum().float() / x.shape[0]\n",
250 |     "        acc = acc.cpu().detach().numpy()\n",
251 |     "        test_acc += acc\n",
252 |     "\n",
253 |     "    test_loss = np.round(test_loss / len(test_loader), 6)\n",
254 |     "    test_acc = np.round(test_acc / len(test_loader), 4)\n",
255 |     "\n",
256 |     "    if epoch % 5 == 0:\n",
257 |     "        print(\"Epoch {}: train loss {}, test loss {}, test accuracy {}\".format(epoch,\n",
258 |     "                                                                               train_loss,\n",
259 |     "                                                                               test_loss,\n",
260 |     "                                                                               test_acc))"
261 |    ]
262 |   }
263 |  ],
264 |  "metadata": {
265 |   "kernelspec": {
266 |    "display_name": "Python 3",
267 |    "language": "python",
268 |    "name": "python3"
269 |   },
270 |   "language_info": {
271 |    "codemirror_mode": {
272 |     "name": "ipython",
273 |     "version": 3
274 |    },
275 |    "file_extension": ".py",
276 |    "mimetype": "text/x-python",
277 |    "name": "python",
278 |    "nbconvert_exporter": "python",
279 |    "pygments_lexer": "ipython3",
280 |    "version": "3.6.4"
281 |   }
282 |  },
283 |  "nbformat": 4,
284 |  "nbformat_minor": 2
285 | }
286 | 


--------------------------------------------------------------------------------
/computer_vision/models/inception_v1.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import numpy as np\n",
 10 |     "import pandas as pd\n",
 11 |     "import torch\n",
 12 |     "import torch.nn as nn\n",
 13 |     "import torch.nn.functional as F\n",
 14 |     "from torch.utils.data import Dataset, DataLoader\n",
 15 |     "import torch.optim as optim\n",
 16 |     "from torch.optim.lr_scheduler import StepLR\n",
 17 |     "from sklearn.model_selection import train_test_split"
 18 |    ]
 19 |   },
 20 |   {
 21 |    "cell_type": "code",
 22 |    "execution_count": 2,
 23 |    "metadata": {},
 24 |    "outputs": [],
 25 |    "source": [
 26 |     "PATH = \"data/\"\n",
 27 |     "device = \"cuda\"\n",
 28 |     "seed = 42\n",
 29 |     "file_name = PATH + \"train.csv\""
 30 |    ]
 31 |   },
 32 |   {
 33 |    "cell_type": "code",
 34 |    "execution_count": 3,
 35 |    "metadata": {},
 36 |    "outputs": [],
 37 |    "source": [
 38 |     "class MNIST(Dataset):\n",
 39 |     "    def __init__(self, X, y):\n",
 40 |     "        self.y = torch.from_numpy(y).long().to(device)\n",
 41 |     "        X = X.reshape(X.shape[0], 1, 28, 28)\n",
 42 |     "        X = X / 255.0\n",
 43 |     "        self.X = torch.from_numpy(X).float().to(device)\n",
 44 |     "\n",
 45 |     "    def __len__(self):\n",
 46 |     "        return self.X.shape[0]\n",
 47 |     "\n",
 48 |     "    def __getitem__(self, idx):\n",
 49 |     "        return (self.X[idx,:,:,:], self.y[idx])"
 50 |    ]
 51 |   },
 52 |   {
 53 |    "cell_type": "code",
 54 |    "execution_count": 4,
 55 |    "metadata": {},
 56 |    "outputs": [],
 57 |    "source": [
 58 |     "df = pd.read_csv(file_name)\n",
 59 |     "y = df[\"label\"].values\n",
 60 |     "X = df.drop(\"label\", axis=1).values\n",
 61 |     "\n",
 62 |     "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=seed)"
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "code",
 67 |    "execution_count": 5,
 68 |    "metadata": {},
 69 |    "outputs": [],
 70 |    "source": [
 71 |     "batch_size = 256\n",
 72 |     "train_dataset = MNIST(X_train, y_train)\n",
 73 |     "train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=0)\n",
 74 |     "\n",
 75 |     "test_dataset = MNIST(X_test, y_test)\n",
 76 |     "test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=0)"
 77 |    ]
 78 |   },
 79 |   {
 80 |    "cell_type": "markdown",
 81 |    "metadata": {},
 82 |    "source": [
 83 |     "# Inception V1"
 84 |    ]
 85 |   },
 86 |   {
 87 |    "cell_type": "markdown",
 88 |    "metadata": {},
 89 |    "source": [
 90 |     "![title](data/Inception.png)"
 91 |    ]
 92 |   },
 93 |   {
 94 |    "cell_type": "code",
 95 |    "execution_count": 6,
 96 |    "metadata": {},
 97 |    "outputs": [],
 98 |    "source": [
 99 |     "class BnLayer(nn.Module):\n",
100 |     "    def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0):\n",
101 |     "        super().__init__()\n",
102 |     "        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride,\n",
103 |     "                              padding=padding, bias=False)\n",
104 |     "        \n",
105 |     "        self.bn = nn.BatchNorm2d(out_channels)\n",
106 |     "\n",
107 |     "    def forward(self, x):\n",
108 |     "        x = self.conv(x)\n",
109 |     "        x = self.bn(x)\n",
110 |     "        x = F.relu(x)\n",
111 |     "        return x"
112 |    ]
113 |   },
114 |   {
115 |    "cell_type": "code",
116 |    "execution_count": 7,
117 |    "metadata": {},
118 |    "outputs": [],
119 |    "source": [
120 |     "class InceptionBlock(nn.Module):\n",
121 |     "    def __init__(self, in_channels, channels_1x1, channels_1x1_3x3, channels_3x3,\n",
122 |     "                 channels_1x1_5x5, channels_5x5, channels_1x1_pool):\n",
123 |     "\n",
124 |     "        super().__init__()\n",
125 |     "\n",
126 |     "        # 1x1 conv\n",
127 |     "        self.b1 = nn.Sequential(BnLayer(in_channels, channels_1x1, kernel_size=1))\n",
128 |     "\n",
129 |     "        # 1x1 conv -> 3x3 conv\n",
130 |     "        self.b2 = nn.Sequential(\n",
131 |     "            BnLayer(in_channels, channels_1x1_3x3, kernel_size=1),\n",
132 |     "            BnLayer(channels_1x1_3x3, channels_3x3, kernel_size=3, padding=1)\n",
133 |     "        )\n",
134 |     "\n",
135 |     "        # 1x1 conv -> 5x5 conv\n",
136 |     "        self.b3 = nn.Sequential(\n",
137 |     "            BnLayer(in_channels, channels_1x1_5x5, kernel_size=1),\n",
138 |     "            BnLayer(channels_1x1_5x5, channels_5x5, kernel_size=5, padding=2)\n",
139 |     "        )\n",
140 |     "\n",
141 |     "        # Max pooling -> 1x1 conv\n",
142 |     "        self.b4 = nn.Sequential(\n",
143 |     "            nn.MaxPool2d(3, stride=1, padding=1),\n",
144 |     "            BnLayer(in_channels, channels_1x1_pool, kernel_size=1)\n",
145 |     "        )  \n",
146 |     "\n",
147 |     "    def forward(self, x):\n",
148 |     "        x1 = self.b1(x)\n",
149 |     "        x2 = self.b2(x)\n",
150 |     "        x3 = self.b3(x)\n",
151 |     "        x4 = self.b4(x)\n",
152 |     "        x_cat = torch.cat([x1, x2, x3, x4], dim=1)         \n",
153 |     "        return x_cat"
154 |    ]
155 |   },
156 |   {
157 |    "cell_type": "code",
158 |    "execution_count": 8,
159 |    "metadata": {},
160 |    "outputs": [],
161 |    "source": [
162 |     "class InceptionNet(nn.Module):\n",
163 |     "    def __init__(self, num_classes=10):\n",
164 |     "        super().__init__()\n",
165 |     "        self.init_layer = BnLayer(1, 16, kernel_size=5, stride=2, padding=2)\n",
166 |     "\n",
167 |     "        self.module_1 = InceptionBlock(16, 8, 4, 8, 4, 8, 8)\n",
168 |     "        self.module_2 = InceptionBlock(32, 16, 4, 16, 4, 16, 16)\n",
169 |     "\n",
170 |     "        self.dropout = nn.Dropout(p=0.5)\n",
171 |     "        self.out = nn.Linear(64, num_classes)\n",
172 |     "\n",
173 |     "    def forward(self, x):\n",
174 |     "        x = self.init_layer(x)\n",
175 |     "        x = F.max_pool2d(x, 2)\n",
176 |     "        x = self.module_1(x)\n",
177 |     "        x = F.max_pool2d(x, 2)\n",
178 |     "        x = self.module_2(x)            \n",
179 |     "        x = F.adaptive_avg_pool2d(x, 1)\n",
180 |     "        x = x.view(x.size(0), -1)\n",
181 |     "\n",
182 |     "        x = self.dropout(x)\n",
183 |     "        x = self.out(x)\n",
184 |     "        return F.log_softmax(x, dim=-1)"
185 |    ]
186 |   },
187 |   {
188 |    "cell_type": "code",
189 |    "execution_count": 9,
190 |    "metadata": {},
191 |    "outputs": [],
192 |    "source": [
193 |     "n_epochs = 50\n",
194 |     "lr = 1e-3\n",
195 |     "weight_decay = 1e-5\n",
196 |     "\n",
197 |     "torch.manual_seed(seed)\n",
198 |     "model = InceptionNet().to(device)\n",
199 |     "\n",
200 |     "optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)\n",
201 |     "lr_sceduler = StepLR(optimizer, gamma=0.5, step_size=10)"
202 |    ]
203 |   },
204 |   {
205 |    "cell_type": "code",
206 |    "execution_count": 10,
207 |    "metadata": {},
208 |    "outputs": [
209 |     {
210 |      "name": "stdout",
211 |      "output_type": "stream",
212 |      "text": [
213 |       "Epoch 0: train loss 0.007024, test loss 0.004663, test accuracy 0.7737\n",
214 |       "Epoch 5: train loss 0.000741, test loss 0.000503, test accuracy 0.9674\n",
215 |       "Epoch 10: train loss 0.000489, test loss 0.000357, test accuracy 0.9771\n",
216 |       "Epoch 15: train loss 0.000402, test loss 0.000326, test accuracy 0.9788\n",
217 |       "Epoch 20: train loss 0.000364, test loss 0.000304, test accuracy 0.9805\n",
218 |       "Epoch 25: train loss 0.000349, test loss 0.000291, test accuracy 0.9807\n",
219 |       "Epoch 30: train loss 0.00033, test loss 0.000287, test accuracy 0.9817\n",
220 |       "Epoch 35: train loss 0.000306, test loss 0.000285, test accuracy 0.9818\n",
221 |       "Epoch 40: train loss 0.000304, test loss 0.000282, test accuracy 0.9814\n",
222 |       "Epoch 45: train loss 0.000303, test loss 0.000284, test accuracy 0.9812\n"
223 |      ]
224 |     }
225 |    ],
226 |    "source": [
227 |     "torch.manual_seed(seed)\n",
228 |     "for epoch in range(0, n_epochs):\n",
229 |     "    lr_sceduler.step()\n",
230 |     "    model.train()\n",
231 |     "    train_loss = 0\n",
232 |     "    for _, (x, y) in enumerate(train_loader):\n",
233 |     "            model.zero_grad()\n",
234 |     "            output = model(x)\n",
235 |     "            batch_loss = F.nll_loss(output, y)\n",
236 |     "            \n",
237 |     "            batch_loss.backward()\n",
238 |     "            optimizer.step()\n",
239 |     "            train_loss += batch_loss.cpu().detach().numpy() / x.shape[0]\n",
240 |     "    \n",
241 |     "    train_loss = np.round(train_loss / len(train_loader), 6)\n",
242 |     "    \n",
243 |     "    model.eval()\n",
244 |     "    test_loss = 0\n",
245 |     "    test_acc = 0\n",
246 |     "    for _, (x, y) in enumerate(test_loader):\n",
247 |     "        output = model(x)\n",
248 |     "        batch_loss = F.nll_loss(output, y)\n",
249 |     "        test_loss += batch_loss.cpu().detach().numpy() / x.shape[0]\n",
250 |     "        \n",
251 |     "        y_hat = torch.argmax(output, dim=1)\n",
252 |     "        acc = (y_hat == y).sum().float() / x.shape[0]\n",
253 |     "        acc = acc.cpu().detach().numpy()\n",
254 |     "        test_acc += acc\n",
255 |     "\n",
256 |     "    test_loss = np.round(test_loss / len(test_loader), 6)\n",
257 |     "    test_acc = np.round(test_acc / len(test_loader), 4)\n",
258 |     "        \n",
259 |     "    if epoch % 5 == 0:\n",
260 |     "        print(\"Epoch {}: train loss {}, test loss {}, test accuracy {}\".format(epoch,\n",
261 |     "                                                                               train_loss,\n",
262 |     "                                                                               test_loss,\n",
263 |     "                                                                               test_acc))"
264 |    ]
265 |   }
266 |  ],
267 |  "metadata": {
268 |   "kernelspec": {
269 |    "display_name": "Python 3",
270 |    "language": "python",
271 |    "name": "python3"
272 |   },
273 |   "language_info": {
274 |    "codemirror_mode": {
275 |     "name": "ipython",
276 |     "version": 3
277 |    },
278 |    "file_extension": ".py",
279 |    "mimetype": "text/x-python",
280 |    "name": "python",
281 |    "nbconvert_exporter": "python",
282 |    "pygments_lexer": "ipython3",
283 |    "version": "3.6.4"
284 |   }
285 |  },
286 |  "nbformat": 4,
287 |  "nbformat_minor": 2
288 | }
289 | 


--------------------------------------------------------------------------------
/computer_vision/models/mobilenet_v1.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import numpy as np\n",
 10 |     "import pandas as pd\n",
 11 |     "import torch\n",
 12 |     "import torch.nn as nn\n",
 13 |     "import torch.nn.functional as F\n",
 14 |     "from torch.utils.data import Dataset, DataLoader\n",
 15 |     "import torch.optim as optim\n",
 16 |     "from torch.optim.lr_scheduler import StepLR\n",
 17 |     "from sklearn.model_selection import train_test_split"
 18 |    ]
 19 |   },
 20 |   {
 21 |    "cell_type": "code",
 22 |    "execution_count": 2,
 23 |    "metadata": {},
 24 |    "outputs": [],
 25 |    "source": [
 26 |     "PATH = \"data/\"\n",
 27 |     "device = \"cuda\"\n",
 28 |     "seed = 42\n",
 29 |     "file_name = PATH + \"train.csv\""
 30 |    ]
 31 |   },
 32 |   {
 33 |    "cell_type": "code",
 34 |    "execution_count": 3,
 35 |    "metadata": {},
 36 |    "outputs": [],
 37 |    "source": [
 38 |     "class MNIST(Dataset):\n",
 39 |     "    def __init__(self, X, y):\n",
 40 |     "        self.y = torch.from_numpy(y).long().to(device)\n",
 41 |     "        X = X.reshape(X.shape[0], 1, 28, 28)\n",
 42 |     "        X = X / 255.0\n",
 43 |     "        self.X = torch.from_numpy(X).float().to(device)\n",
 44 |     "\n",
 45 |     "    def __len__(self):\n",
 46 |     "        return self.X.shape[0]\n",
 47 |     "\n",
 48 |     "    def __getitem__(self, idx):\n",
 49 |     "        return (self.X[idx,:,:,:], self.y[idx])"
 50 |    ]
 51 |   },
 52 |   {
 53 |    "cell_type": "code",
 54 |    "execution_count": 4,
 55 |    "metadata": {},
 56 |    "outputs": [],
 57 |    "source": [
 58 |     "df = pd.read_csv(file_name)\n",
 59 |     "y = df[\"label\"].values\n",
 60 |     "X = df.drop(\"label\", axis=1).values\n",
 61 |     "\n",
 62 |     "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=seed)"
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "code",
 67 |    "execution_count": 5,
 68 |    "metadata": {},
 69 |    "outputs": [],
 70 |    "source": [
 71 |     "batch_size = 256\n",
 72 |     "train_dataset = MNIST(X_train, y_train)\n",
 73 |     "train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=0)\n",
 74 |     "\n",
 75 |     "test_dataset = MNIST(X_test, y_test)\n",
 76 |     "test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=0)"
 77 |    ]
 78 |   },
 79 |   {
 80 |    "cell_type": "markdown",
 81 |    "metadata": {},
 82 |    "source": [
 83 |     "# MobileNet V1"
 84 |    ]
 85 |   },
 86 |   {
 87 |    "cell_type": "markdown",
 88 |    "metadata": {},
 89 |    "source": [
 90 |     "![title](data/DSConvolution.png)"
 91 |    ]
 92 |   },
 93 |   {
 94 |    "cell_type": "code",
 95 |    "execution_count": 6,
 96 |    "metadata": {},
 97 |    "outputs": [],
 98 |    "source": [
 99 |     "class BnLayer(nn.Module):\n",
100 |     "    def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, padding=0, groups=1):\n",
101 |     "        super().__init__()\n",
102 |     "        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride,\n",
103 |     "                              padding=padding, groups=groups, bias=False)\n",
104 |     "        \n",
105 |     "        self.bn = nn.BatchNorm2d(out_channels)\n",
106 |     "\n",
107 |     "    def forward(self, x):\n",
108 |     "        x = self.conv(x)\n",
109 |     "        x = self.bn(x)\n",
110 |     "        x = F.relu(x)\n",
111 |     "        return x"
112 |    ]
113 |   },
114 |   {
115 |    "cell_type": "code",
116 |    "execution_count": 7,
117 |    "metadata": {},
118 |    "outputs": [],
119 |    "source": [
120 |     "class Block(nn.Module):\n",
121 |     "    def __init__(self, in_channels, out_channels, stride=1):\n",
122 |     "        super().__init__()\n",
123 |     "\n",
124 |     "        # Depthwise convolution\n",
125 |     "        self.bn_layer_3x3 = BnLayer(in_channels, in_channels, kernel_size=3, stride=stride, groups=in_channels)\n",
126 |     "\n",
127 |     "        # Pointwise convolution\n",
128 |     "        self.bn_layer_1x1 = BnLayer(in_channels, out_channels, kernel_size=1, stride=1)\n",
129 |     "\n",
130 |     "    def forward(self, x):\n",
131 |     "        x = self.bn_layer_3x3(x)\n",
132 |     "        x = self.bn_layer_1x1(x)\n",
133 |     "        return x"
134 |    ]
135 |   },
136 |   {
137 |    "cell_type": "code",
138 |    "execution_count": 8,
139 |    "metadata": {},
140 |    "outputs": [],
141 |    "source": [
142 |     "class MobileNet(nn.Module):\n",
143 |     "    def __init__(self, num_classes=10):\n",
144 |     "        super().__init__()\n",
145 |     "        self.init_layer = BnLayer(1, 16, kernel_size=3, stride=2)\n",
146 |     "        self.block_1 = Block(16, 32, stride=2)\n",
147 |     "        self.block_2 = Block(32, 64, stride=2)\n",
148 |     "\n",
149 |     "        self.dropout = nn.Dropout(p=0.5)\n",
150 |     "        self.out = nn.Linear(64, num_classes)\n",
151 |     "\n",
152 |     "    def forward(self, x):\n",
153 |     "        x = self.init_layer(x)\n",
154 |     "        x = self.block_1(x)\n",
155 |     "        x = self.block_2(x)\n",
156 |     "\n",
157 |     "        x = F.adaptive_avg_pool2d(x, 1)\n",
158 |     "        x = x.view(x.size(0), -1)\n",
159 |     "        x = self.dropout(x)\n",
160 |     "        x = self.out(x)\n",
161 |     "        return F.log_softmax(x, dim=-1)"
162 |    ]
163 |   },
164 |   {
165 |    "cell_type": "code",
166 |    "execution_count": 9,
167 |    "metadata": {},
168 |    "outputs": [],
169 |    "source": [
170 |     "n_epochs = 50\n",
171 |     "lr = 1e-3\n",
172 |     "weight_decay = 1e-5\n",
173 |     "\n",
174 |     "torch.manual_seed(seed)\n",
175 |     "model = MobileNet().to(device)\n",
176 |     "\n",
177 |     "optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)\n",
178 |     "lr_sceduler = StepLR(optimizer, gamma=0.5, step_size=10)"
179 |    ]
180 |   },
181 |   {
182 |    "cell_type": "code",
183 |    "execution_count": 10,
184 |    "metadata": {},
185 |    "outputs": [
186 |     {
187 |      "name": "stdout",
188 |      "output_type": "stream",
189 |      "text": [
190 |       "Epoch 0: train loss 0.007893, test loss 0.00666, test accuracy 0.5632\n",
191 |       "Epoch 5: train loss 0.001544, test loss 0.001097, test accuracy 0.9318\n",
192 |       "Epoch 10: train loss 0.001016, test loss 0.000732, test accuracy 0.9507\n",
193 |       "Epoch 15: train loss 0.000912, test loss 0.000645, test accuracy 0.9546\n",
194 |       "Epoch 20: train loss 0.000812, test loss 0.0006, test accuracy 0.9591\n",
195 |       "Epoch 25: train loss 0.000778, test loss 0.000582, test accuracy 0.9597\n",
196 |       "Epoch 30: train loss 0.000754, test loss 0.000568, test accuracy 0.9599\n",
197 |       "Epoch 35: train loss 0.000736, test loss 0.000561, test accuracy 0.9603\n",
198 |       "Epoch 40: train loss 0.000727, test loss 0.000556, test accuracy 0.9613\n",
199 |       "Epoch 45: train loss 0.000714, test loss 0.000551, test accuracy 0.9619\n"
200 |      ]
201 |     }
202 |    ],
203 |    "source": [
204 |     "torch.manual_seed(seed)\n",
205 |     "for epoch in range(0, n_epochs):\n",
206 |     "    lr_sceduler.step()\n",
207 |     "    model.train()\n",
208 |     "    train_loss = 0\n",
209 |     "    for _, (x, y) in enumerate(train_loader):\n",
210 |     "            model.zero_grad()\n",
211 |     "            output = model(x)\n",
212 |     "            batch_loss = F.nll_loss(output, y)\n",
213 |     "\n",
214 |     "            batch_loss.backward()\n",
215 |     "            optimizer.step()\n",
216 |     "            train_loss += batch_loss.cpu().detach().numpy() / x.shape[0]\n",
217 |     "\n",
218 |     "    train_loss = np.round(train_loss / len(train_loader), 6)\n",
219 |     "\n",
220 |     "    model.eval()\n",
221 |     "    test_loss = 0\n",
222 |     "    test_acc = 0\n",
223 |     "    for _, (x, y) in enumerate(test_loader):\n",
224 |     "        output = model(x)\n",
225 |     "        batch_loss = F.nll_loss(output, y)\n",
226 |     "        test_loss += batch_loss.cpu().detach().numpy() / x.shape[0]\n",
227 |     "\n",
228 |     "        y_hat = torch.argmax(output, dim=1)\n",
229 |     "        acc = (y_hat == y).sum().float() / x.shape[0]\n",
230 |     "        acc = acc.cpu().detach().numpy()\n",
231 |     "        test_acc += acc\n",
232 |     "\n",
233 |     "    test_loss = np.round(test_loss / len(test_loader), 6)\n",
234 |     "    test_acc = np.round(test_acc / len(test_loader), 4)\n",
235 |     "\n",
236 |     "    if epoch % 5 == 0:\n",
237 |     "        print(\"Epoch {}: train loss {}, test loss {}, test accuracy {}\".format(epoch,\n",
238 |     "                                                                               train_loss,\n",
239 |     "                                                                               test_loss,\n",
240 |     "                                                                               test_acc))"
241 |    ]
242 |   }
243 |  ],
244 |  "metadata": {
245 |   "kernelspec": {
246 |    "display_name": "Python 3",
247 |    "language": "python",
248 |    "name": "python3"
249 |   },
250 |   "language_info": {
251 |    "codemirror_mode": {
252 |     "name": "ipython",
253 |     "version": 3
254 |    },
255 |    "file_extension": ".py",
256 |    "mimetype": "text/x-python",
257 |    "name": "python",
258 |    "nbconvert_exporter": "python",
259 |    "pygments_lexer": "ipython3",
260 |    "version": "3.6.4"
261 |   }
262 |  },
263 |  "nbformat": 4,
264 |  "nbformat_minor": 2
265 | }
266 | 


--------------------------------------------------------------------------------
/computer_vision/models/resnet.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import numpy as np\n",
 10 |     "import pandas as pd\n",
 11 |     "import torch\n",
 12 |     "import torch.nn as nn\n",
 13 |     "import torch.nn.functional as F\n",
 14 |     "from torch.utils.data import Dataset, DataLoader\n",
 15 |     "import torch.optim as optim\n",
 16 |     "from torch.optim.lr_scheduler import StepLR\n",
 17 |     "from sklearn.model_selection import train_test_split"
 18 |    ]
 19 |   },
 20 |   {
 21 |    "cell_type": "code",
 22 |    "execution_count": 2,
 23 |    "metadata": {},
 24 |    "outputs": [],
 25 |    "source": [
 26 |     "PATH = \"data/\"\n",
 27 |     "device = \"cuda\"\n",
 28 |     "seed = 42\n",
 29 |     "file_name = PATH + \"train.csv\""
 30 |    ]
 31 |   },
 32 |   {
 33 |    "cell_type": "code",
 34 |    "execution_count": 3,
 35 |    "metadata": {},
 36 |    "outputs": [],
 37 |    "source": [
 38 |     "class MNIST(Dataset):\n",
 39 |     "    def __init__(self, X, y):\n",
 40 |     "        self.y = torch.from_numpy(y).long().to(device)\n",
 41 |     "        X = X.reshape(X.shape[0], 1, 28, 28)\n",
 42 |     "        X = X / 255.0\n",
 43 |     "        self.X = torch.from_numpy(X).float().to(device)\n",
 44 |     "\n",
 45 |     "    def __len__(self):\n",
 46 |     "        return self.X.shape[0]\n",
 47 |     "\n",
 48 |     "    def __getitem__(self, idx):\n",
 49 |     "        return (self.X[idx,:,:,:], self.y[idx])"
 50 |    ]
 51 |   },
 52 |   {
 53 |    "cell_type": "code",
 54 |    "execution_count": 4,
 55 |    "metadata": {},
 56 |    "outputs": [],
 57 |    "source": [
 58 |     "df = pd.read_csv(file_name)\n",
 59 |     "y = df[\"label\"].values\n",
 60 |     "X = df.drop(\"label\", axis=1).values\n",
 61 |     "\n",
 62 |     "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=seed)"
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "code",
 67 |    "execution_count": 5,
 68 |    "metadata": {},
 69 |    "outputs": [],
 70 |    "source": [
 71 |     "batch_size = 256\n",
 72 |     "train_dataset = MNIST(X_train, y_train)\n",
 73 |     "train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=0)\n",
 74 |     "\n",
 75 |     "test_dataset = MNIST(X_test, y_test)\n",
 76 |     "test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=0)"
 77 |    ]
 78 |   },
 79 |   {
 80 |    "cell_type": "markdown",
 81 |    "metadata": {},
 82 |    "source": [
 83 |     "# ResNet"
 84 |    ]
 85 |   },
 86 |   {
 87 |    "cell_type": "markdown",
 88 |    "metadata": {},
 89 |    "source": [
 90 |     "![title](data/ResNet.png)"
 91 |    ]
 92 |   },
 93 |   {
 94 |    "cell_type": "markdown",
 95 |    "metadata": {},
 96 |    "source": [
 97 |     "Note that this is an implemenation of the \"basic block\" (i.e. the left one). The \"bottleneck block\" (i.e. right one) is mostly used for deeper ResNet models (ResNet50, ResNet101 etc)."
 98 |    ]
 99 |   },
100 |   {
101 |    "cell_type": "code",
102 |    "execution_count": 6,
103 |    "metadata": {},
104 |    "outputs": [],
105 |    "source": [
106 |     "class BnLayer(nn.Module):\n",
107 |     "    def __init__(self, in_channels, out_channels, stride=2, kernel_size=3, padding=1):\n",
108 |     "        super().__init__()\n",
109 |     "        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride,\n",
110 |     "                              padding=padding, bias=False)\n",
111 |     "\n",
112 |     "        self.bn = nn.BatchNorm2d(out_channels)\n",
113 |     "\n",
114 |     "    def forward(self, x):\n",
115 |     "        x = self.conv(x)\n",
116 |     "        x = self.bn(x)\n",
117 |     "        return x"
118 |    ]
119 |   },
120 |   {
121 |    "cell_type": "code",
122 |    "execution_count": 7,
123 |    "metadata": {},
124 |    "outputs": [],
125 |    "source": [
126 |     "class ResNetBlock(nn.Module):\n",
127 |     "    def __init__(self, in_channels, out_channels, kernel_size=3, stride=2):\n",
128 |     "        super().__init__()\n",
129 |     "        self.bn_layer_1 = BnLayer(in_channels, out_channels, kernel_size=kernel_size, stride=stride)\n",
130 |     "        self.bn_layer_2 = BnLayer(out_channels, out_channels, kernel_size=kernel_size, stride=1)\n",
131 |     "        self.bn_shortcut = BnLayer(in_channels, out_channels, kernel_size=1, stride=stride, padding=0)        \n",
132 |     "\n",
133 |     "    def forward(self, x):\n",
134 |     "        shortcut = x\n",
135 |     "        x = self.bn_layer_1(x)\n",
136 |     "        x = F.relu(x)\n",
137 |     "        x = self.bn_layer_2(x)\n",
138 |     "        shortcut = self.bn_shortcut(shortcut)\n",
139 |     "        x = F.relu(x + shortcut)\n",
140 |     "        return x"
141 |    ]
142 |   },
143 |   {
144 |    "cell_type": "code",
145 |    "execution_count": 8,
146 |    "metadata": {},
147 |    "outputs": [],
148 |    "source": [
149 |     "class ResNet(nn.Module):\n",
150 |     "    def __init__(self, layers, num_classes=10):\n",
151 |     "        super().__init__()\n",
152 |     "        self.init_layer = BnLayer(1, layers[0], kernel_size=3, stride=2)\n",
153 |     "        \n",
154 |     "        self.res_layers = nn.ModuleList([ResNetBlock(layers[i], layers[i + 1])\n",
155 |     "                                     for i in range(len(layers) - 1)])\n",
156 |     "\n",
157 |     "        self.dropout = nn.Dropout(p=0.5)\n",
158 |     "        self.out = nn.Linear(layers[-1], num_classes)\n",
159 |     "\n",
160 |     "    def forward(self, x):\n",
161 |     "        x = F.relu(self.init_layer(x))\n",
162 |     "\n",
163 |     "        for res in self.res_layers:\n",
164 |     "            x = res(x)\n",
165 |     "\n",
166 |     "        x = F.adaptive_avg_pool2d(x, 1)\n",
167 |     "        x = x.view(x.size(0), -1)\n",
168 |     "        x = self.dropout(x)\n",
169 |     "        x = self.out(x)\n",
170 |     "        return F.log_softmax(x, dim=-1)"
171 |    ]
172 |   },
173 |   {
174 |    "cell_type": "code",
175 |    "execution_count": 9,
176 |    "metadata": {},
177 |    "outputs": [],
178 |    "source": [
179 |     "n_epochs = 50\n",
180 |     "lr = 1e-3\n",
181 |     "weight_decay = 1e-5\n",
182 |     "layers = [8, 16, 32, 64]\n",
183 |     "\n",
184 |     "torch.manual_seed(seed)\n",
185 |     "model = ResNet(layers).to(device)\n",
186 |     "\n",
187 |     "optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)\n",
188 |     "lr_sceduler = StepLR(optimizer, gamma=0.5, step_size=10)"
189 |    ]
190 |   },
191 |   {
192 |    "cell_type": "code",
193 |    "execution_count": 10,
194 |    "metadata": {},
195 |    "outputs": [
196 |     {
197 |      "name": "stdout",
198 |      "output_type": "stream",
199 |      "text": [
200 |       "Epoch 0: train loss 0.003825, test loss 0.000979, test accuracy 0.9524\n",
201 |       "Epoch 5: train loss 0.000235, test loss 0.000246, test accuracy 0.9809\n",
202 |       "Epoch 10: train loss 0.000105, test loss 0.000225, test accuracy 0.9838\n",
203 |       "Epoch 15: train loss 5.9e-05, test loss 0.00025, test accuracy 0.9846\n",
204 |       "Epoch 20: train loss 3.5e-05, test loss 0.000254, test accuracy 0.9857\n",
205 |       "Epoch 25: train loss 3e-05, test loss 0.000254, test accuracy 0.9866\n",
206 |       "Epoch 30: train loss 4.9e-05, test loss 0.000265, test accuracy 0.9859\n",
207 |       "Epoch 35: train loss 2.5e-05, test loss 0.000283, test accuracy 0.9844\n",
208 |       "Epoch 40: train loss 2.6e-05, test loss 0.000278, test accuracy 0.9855\n",
209 |       "Epoch 45: train loss 2e-05, test loss 0.000292, test accuracy 0.9864\n"
210 |      ]
211 |     }
212 |    ],
213 |    "source": [
214 |     "torch.manual_seed(seed)\n",
215 |     "for epoch in range(0, n_epochs):\n",
216 |     "    model.train()\n",
217 |     "    train_loss = 0\n",
218 |     "    for _, (x, y) in enumerate(train_loader):\n",
219 |     "            model.zero_grad()\n",
220 |     "            output = model(x)\n",
221 |     "            batch_loss = F.nll_loss(output, y)\n",
222 |     "            \n",
223 |     "            batch_loss.backward()\n",
224 |     "            optimizer.step()\n",
225 |     "            train_loss += batch_loss.cpu().detach().numpy() / x.shape[0]\n",
226 |     "\n",
227 |     "    train_loss = np.round(train_loss / len(train_loader), 6)\n",
228 |     "\n",
229 |     "    model.eval()\n",
230 |     "    test_loss = 0\n",
231 |     "    test_acc = 0\n",
232 |     "    for _, (x, y) in enumerate(test_loader):\n",
233 |     "        output = model(x)\n",
234 |     "        batch_loss = F.nll_loss(output, y)\n",
235 |     "        test_loss += batch_loss.cpu().detach().numpy() / x.shape[0]\n",
236 |     "\n",
237 |     "        y_hat = torch.argmax(output, dim=1)\n",
238 |     "        acc = (y_hat == y).sum().float() / x.shape[0]\n",
239 |     "        acc = acc.cpu().detach().numpy()\n",
240 |     "        test_acc += acc\n",
241 |     "\n",
242 |     "    test_loss = np.round(test_loss / len(test_loader), 6)\n",
243 |     "    test_acc = np.round(test_acc / len(test_loader), 4)\n",
244 |     "\n",
245 |     "    if epoch % 5 == 0:\n",
246 |     "        print(\"Epoch {}: train loss {}, test loss {}, test accuracy {}\".format(epoch,\n",
247 |     "                                                                               train_loss,\n",
248 |     "                                                                               test_loss,\n",
249 |     "                                                                               test_acc))"
250 |    ]
251 |   }
252 |  ],
253 |  "metadata": {
254 |   "kernelspec": {
255 |    "display_name": "Python 3",
256 |    "language": "python",
257 |    "name": "python3"
258 |   },
259 |   "language_info": {
260 |    "codemirror_mode": {
261 |     "name": "ipython",
262 |     "version": 3
263 |    },
264 |    "file_extension": ".py",
265 |    "mimetype": "text/x-python",
266 |    "name": "python",
267 |    "nbconvert_exporter": "python",
268 |    "pygments_lexer": "ipython3",
269 |    "version": "3.6.4"
270 |   }
271 |  },
272 |  "nbformat": 4,
273 |  "nbformat_minor": 2
274 | }
275 | 


--------------------------------------------------------------------------------
/computer_vision/neural_style_transfer/.gitignore:
--------------------------------------------------------------------------------
1 | .ipynb_checkpoints


--------------------------------------------------------------------------------
/computer_vision/neural_style_transfer/README.md:
--------------------------------------------------------------------------------
1 | # Neural style transferImplementation of neural style transfer according to the paper [A Neural Algorithm of Artistic Style](https://arxiv.org/abs/1508.06576).


--------------------------------------------------------------------------------
/computer_vision/neural_style_transfer/data/content.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MrParosk/ml_playground/1202dd9b4341fc8ad5fbadade45ac93e76217303/computer_vision/neural_style_transfer/data/content.jpg


--------------------------------------------------------------------------------
/computer_vision/neural_style_transfer/data/style.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MrParosk/ml_playground/1202dd9b4341fc8ad5fbadade45ac93e76217303/computer_vision/neural_style_transfer/data/style.jpg


--------------------------------------------------------------------------------
/computer_vision/object_detection/.gitignore:
--------------------------------------------------------------------------------
1 | .ipynb_checkpoints
2 | src/__pycache__/
3 | src/.vscode/
4 | .vscode/
5 | 


--------------------------------------------------------------------------------
/computer_vision/object_detection/README.md:
--------------------------------------------------------------------------------
 1 | # Object detection
 2 | 
 3 | Implementation of four object detection methods:
 4 | 
 5 | - [YOLO (v1)](https://arxiv.org/abs/1506.02640).
 6 | - [SSD](https://arxiv.org/abs/1512.02325) (with [Focal loss](https://arxiv.org/abs/1708.02002)).
 7 | - [RetinaNet](https://arxiv.org/abs/1708.02002).
 8 | - [Faster-RCNN](https://arxiv.org/abs/1506.01497).
 9 | 
10 | They are trained on PASCAL VOC 2012, which can be found [here](https://pjreddie.com/projects/pascal-voc-dataset-mirror/).
11 | 
12 | The notebooks uses json annotations and therefore the xml-files needs to be converted to json-format. This can be done with xml2json.py, i.e.
13 | 
14 | ```bash
15 | python src/xml2json.py ./data/VOCdevkit 2012
16 | ```
17 | 


--------------------------------------------------------------------------------
/computer_vision/object_detection/data/.gitignore:
--------------------------------------------------------------------------------
1 | VOCdevkit/
2 | pascal_train2012.json
3 | pascal_val2012.json


--------------------------------------------------------------------------------
/computer_vision/object_detection/src/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MrParosk/ml_playground/1202dd9b4341fc8ad5fbadade45ac93e76217303/computer_vision/object_detection/src/__init__.py


--------------------------------------------------------------------------------
/computer_vision/object_detection/src/augmentations.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | import numpy as np
 3 | 
 4 | class RandomHorizontalFlip:
 5 |     def __init__(self, p=0.5):
 6 |         self.p = p
 7 | 
 8 |     def __call__(self, img, bboxes):
 9 |         if random.random() < self.p:
10 |             img_center = img.shape[0] / 2
11 |             img =  img[:,::-1,:]
12 |             img = np.ascontiguousarray(img)
13 | 
14 |             bboxes[:, 0] += 2*(img_center - bboxes[:,0])
15 |         return img, bboxes
16 | 
17 | class RandomContrast:
18 |     def __init__(self, lower=0.5, upper=1.5, p=0.5):
19 |         self.lower = lower
20 |         self.upper = upper
21 |         self.p = p
22 | 
23 |     def __call__(self, img):
24 |         if random.random() < self.p:
25 |             alpha = random.uniform(self.lower, self.upper)
26 |             img *= alpha
27 |         return img
28 | 


--------------------------------------------------------------------------------
/computer_vision/object_detection/src/data_transformer.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import pandas as pd
 3 | from collections import namedtuple
 4 | import torch
 5 | 
 6 | ImageEntry = namedtuple("ImageEntry", ["filename", "width", "height",
 7 |                                        "classnames", "class_id",
 8 |                                        "bounding_boxes"
 9 |                                        ])
10 | 
11 | 
12 | def load_pascal(json_path):
13 |     json_data = json.load(open(json_path))
14 | 
15 |     images_df = pd.DataFrame(json_data["images"])
16 |     anno_df = pd.DataFrame(json_data["annotations"])
17 | 
18 |     anno_df = anno_df[["image_id", "bbox", "category_id"]]
19 |     anno_df = anno_df.rename(columns={"image_id": "id"})
20 | 
21 |     id_classname = {}
22 |     for row in json_data["categories"]:
23 |         id_classname[row["id"]] = row["name"]
24 | 
25 |     anno_df["classname"] = anno_df.apply(lambda x: id_classname[x["category_id"]], axis=1)
26 |     df = anno_df.merge(images_df, on="id")
27 | 
28 |     grouped_data = []
29 |     grouped = df.groupby("file_name")
30 |     for name, group in grouped:
31 |         val = ImageEntry(filename=name, width=group["width"].values[0], height=group["height"].values[0],
32 |                          classnames=list(group["classname"].values), class_id=list(group["category_id"].values - 1),
33 |                          bounding_boxes=list(group["bbox"].values))
34 |         grouped_data.append(val)
35 |     return id_classname, grouped_data
36 | 
37 | 
38 | def rescale_bounding_boxes(data_list, target_size):
39 |     """
40 |     Rescaling the bounding boxes according to the new image size (target_size).
41 |     """
42 | 
43 |     for i in range(len(data_list)):
44 |         d = data_list[i]
45 |         x_scale = target_size / d.width
46 |         y_scale = target_size / d.height
47 | 
48 |         new_boxes = []
49 |         for box in d.bounding_boxes:
50 |             (x, y, d_x, d_y) = box
51 | 
52 |             x = int(round(x * x_scale))
53 |             y = int(round(y * y_scale))
54 |             d_x = int(round(d_x * x_scale))
55 |             d_y = int(round(d_y * y_scale))
56 | 
57 |             new_boxes.append([x, y, d_x, d_y])
58 | 
59 |         data_list[i] = data_list[i]._replace(bounding_boxes=new_boxes)
60 |     return data_list
61 | 
62 | 
63 | def convert_to_center(data_list):
64 |     """
65 |     Converting [bx, by, w, h] to [cx, cy, w, h].
66 |     """
67 | 
68 |     for i in range(len(data_list)):
69 |         d = data_list[i]
70 | 
71 |         new_boxes = []
72 |         for box in d.bounding_boxes:
73 |             cx = box[0] + box[2]/2
74 |             cy = box[1] + box[3]/2
75 |             new_boxes.append([cx, cy, box[2], box[3]])
76 |         data_list[i] = data_list[i]._replace(bounding_boxes=new_boxes)
77 |     return data_list
78 | 
79 | 
80 | def invert_transformation(bb_hat, anchors):
81 |     """
82 |     Invert the transform from "loc_transformation".
83 |     """
84 | 
85 |     return torch.stack([anchors[:, 0] + bb_hat[:, 0] * anchors[:, 2],
86 |                         anchors[:, 1] + bb_hat[:, 1] * anchors[:, 3],
87 |                         anchors[:, 2] * torch.exp(bb_hat[:, 2]),
88 |                         anchors[:, 3] * torch.exp(bb_hat[:, 3])
89 |                         ], dim=1)
90 | 


--------------------------------------------------------------------------------
/computer_vision/object_detection/src/display.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import numpy as np
 3 | 
 4 | 
 5 | def read_img(img_str, target_size):
 6 |     img = cv2.imread(img_str, cv2.IMREAD_UNCHANGED)
 7 |     img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
 8 |     img = cv2.resize(img, (target_size, target_size))
 9 |     return img
10 | 
11 | 
12 | def draw_boxes(img, boxes):
13 |     for box in boxes:
14 |         cv2.rectangle(img, (int(box[0] - box[2]/2), int(box[1] - box[3]/2)),
15 |                       (int(box[0] + box[2]/2), int(box[1] + box[3]/2)),
16 |                       (0, 0, 255), 2)
17 | 
18 |     return img
19 | 
20 | 
21 | def draw_grid(img, pixel_step):
22 |     x = pixel_step
23 |     y = pixel_step
24 | 
25 |     while x < img.shape[1]:
26 |         cv2.line(img, (x, 0), (x, img.shape[0]), color=(255, 255, 255))
27 |         x += pixel_step
28 | 
29 |     while y < img.shape[0]:
30 |         cv2.line(img, (0, y), (img.shape[1], y), color=(255, 255, 255))
31 |         y += pixel_step
32 | 
33 |     return img
34 | 
35 | 
36 | def draw_text(img, texts, locations):
37 |     for text, loc in zip(texts, locations):
38 |         cv2.putText(img, text, (int(loc[0]), int(loc[1])), cv2.FONT_HERSHEY_COMPLEX,
39 |                     0.5, (255, 0, 0), 1)
40 |     return img
41 | 


--------------------------------------------------------------------------------
/computer_vision/object_detection/src/evaluation.py:
--------------------------------------------------------------------------------
  1 | from collections import namedtuple, defaultdict
  2 | import torch
  3 | import numpy as np
  4 | from src.data_transformer import invert_transformation
  5 | 
  6 | 
  7 | PredBoundingBox = namedtuple("PredBoundingBox", ["probability", "class_id",
  8 |                                                  "classname", "bounding_box"
  9 |                                                  ])
 10 | 
 11 | 
 12 | class MAP:
 13 |     def __init__(self, model, dataset, jaccard_threshold, anchors):
 14 |         self.jaccard_threshold = jaccard_threshold
 15 |         self.model = model
 16 |         self.eps = np.finfo(np.float32).eps
 17 |         self.anchors = anchors
 18 |         self.dataset = dataset
 19 | 
 20 |     @staticmethod
 21 |     def voc_ap(rec, prec):
 22 |         """Compute VOC AP given precision and recall with the VOC-07 11-point method."""
 23 | 
 24 |         ap = 0.0
 25 |         for t in np.arange(0.0, 1.1, 0.1):
 26 |             if np.sum(rec >= t) == 0:
 27 |                 p = 0.0
 28 |             else:
 29 |                 p = np.max(prec[rec >= t])
 30 |             ap = ap + p / 11.0
 31 |         return ap
 32 | 
 33 |     def __call__(self):
 34 |         self.model.eval()
 35 |         aps = defaultdict(list)
 36 | 
 37 |         for i in range(len(self.dataset)):
 38 |             (x, bb_true, class_true) = self.dataset[i]
 39 |             class_true = class_true.squeeze(0) - 1 # -1 to convert it from 1-21 to 0-20
 40 | 
 41 |             x = x[None, :, :, :]
 42 |             class_hat, bb_hat = self.model(x)
 43 |             class_hat = class_hat[0, :, 1:].sigmoid()
 44 | 
 45 |             bb_hat = invert_transformation(bb_hat.squeeze(0), self.anchors)
 46 |             jacard_values = jaccard(bb_hat.squeeze(0), bb_true.squeeze(0))
 47 | 
 48 |             for j in range(len(class_true)):
 49 |                 overlap = (jacard_values[:, j] > self.jaccard_threshold).nonzero()
 50 |                 class_true_j = int(class_true[j].detach().cpu().numpy())
 51 | 
 52 |                 if len(overlap) > 0:
 53 |                     class_hat_j = class_hat[overlap[:,0], :]
 54 |                     prob, class_id = class_hat_j.max(1)
 55 |                     prob, sort_index = torch.sort(prob, descending=True)
 56 |                     class_id = class_id[sort_index].detach().cpu().numpy()
 57 | 
 58 |                     tp = np.zeros_like(class_id)
 59 |                     fp = np.zeros_like(class_id)
 60 | 
 61 |                     found = False
 62 |                     for d in range(len(class_id)):
 63 |                         if found or class_id[d] != class_true[j]:
 64 |                             fp[d] = 1.0
 65 |                         else:
 66 |                             tp[d] = 1.0
 67 |                             found = True
 68 | 
 69 |                     fp = np.cumsum(fp)
 70 |                     tp = np.cumsum(tp)
 71 | 
 72 |                     rec = tp
 73 |                     prec = tp / np.maximum(tp + fp, self.eps)
 74 | 
 75 |                     temp_ap = MAP.voc_ap(rec, prec)
 76 |                     aps[class_true_j].append(temp_ap)
 77 |                 else:
 78 |                     aps[class_true_j].append(0)
 79 | 
 80 |         res_list = []
 81 |         for _, list_value in aps.items():
 82 |             res_list.append(sum(list_value) / len(list_value))
 83 | 
 84 |         return res_list, sum(res_list) / len(res_list)
 85 | 
 86 | 
 87 | def center_to_minmax(box: torch.Tensor) -> float:
 88 |     """
 89 |     Converting (cx, cy, w, h) to (x1, y1, x2, y2)
 90 |     """
 91 | 
 92 |     xmin = box[:, 0] - 0.5 * box[:, 2]
 93 |     xmax = box[:, 0] + 0.5 * box[:, 2]
 94 | 
 95 |     ymin = box[:, 1] - 0.5 * box[:, 3]
 96 |     ymax = box[:, 1] + 0.5 * box[:, 3]
 97 |     return torch.stack([xmin, ymin, xmax, ymax], dim=1)
 98 | 
 99 | 
100 | def intersect(box_a: torch.Tensor, box_b: torch.Tensor) -> float:
101 |     # Coverting (cx, cy, w, h) to (x1, y1, x2, y2) since its easier to extract min/max coordinates
102 |     temp_box_a, temp_box_b = center_to_minmax(box_a), center_to_minmax(box_b)
103 | 
104 |     max_xy = torch.min(temp_box_a[:, None, 2:], temp_box_b[None, :, 2:])
105 |     min_xy = torch.max(temp_box_a[:, None, :2], temp_box_b[None, :, :2])
106 |     inter = torch.clamp((max_xy - min_xy), min=0)
107 |     return inter[:, :, 0] * inter[:, :, 1]
108 | 
109 | 
110 | def box_area(box: torch.Tensor) -> float:
111 |     return box[:, 2] * box[:, 3]
112 | 
113 | 
114 | def jaccard(box_a: torch.Tensor, box_b: torch.Tensor) -> float:
115 |     intersection = intersect(box_a, box_b)
116 |     union = box_area(box_a).unsqueeze(1) + box_area(box_b).unsqueeze(0) - intersection
117 |     return intersection / union
118 | 
119 | 
120 | def non_max_suppression(bounding_boxes: list, iou_threshold: float = 0.5) -> list:
121 |     filtered_bb = []
122 | 
123 |     while len(bounding_boxes) != 0:
124 |         best_bb = bounding_boxes.pop(0)
125 |         filtered_bb.append(best_bb)
126 | 
127 |         remove_items = []
128 |         for bb in bounding_boxes:
129 |             iou = jaccard(torch.tensor(best_bb.bounding_box).unsqueeze(0), 
130 |                           torch.tensor(bb.bounding_box).unsqueeze(0))
131 | 
132 |             if iou > iou_threshold:
133 |                 remove_items.append(bb)
134 |         bounding_boxes = [bb for bb in bounding_boxes if bb not in remove_items]
135 |     return filtered_bb
136 | 


--------------------------------------------------------------------------------
/computer_vision/object_detection/src/faster_rcnn_utils.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import numpy as np
  3 | 
  4 | 
  5 | def generator_anchors_old(img_size, sub_sample=16, ratios=[0.7, 1, 1.3], anchor_scales=[4, 8, 16], device="cuda"):
  6 |     # Subsample-factor: how much have have the feature-map decreased from input to last layer
  7 |     # in the backbone. I.e. if input image is 224 and the output from the backbone is 7,
  8 |     # sub-sample factor is 32.
  9 | 
 10 |     feature_size = img_size // sub_sample
 11 |     center_x_array = np.arange(
 12 |         sub_sample, (feature_size + 1) * sub_sample, sub_sample)
 13 |     center_y_array = np.arange(
 14 |         sub_sample, (feature_size + 1) * sub_sample, sub_sample)
 15 | 
 16 |     num_combinations = len(ratios) * len(anchor_scales)
 17 |     anchors = np.zeros((feature_size * feature_size * num_combinations, 4))
 18 | 
 19 |     index = 0
 20 |     for x in range(len(center_x_array)):
 21 |         for y in range(len(center_y_array)):
 22 |             ctr_x = center_x_array[x] - sub_sample / 2.0
 23 |             ctr_y = center_y_array[y] - sub_sample / 2.0
 24 | 
 25 |             for i in range(len(ratios)):
 26 |                 for j in range(len(anchor_scales)):
 27 |                     h = sub_sample * anchor_scales[j] * np.sqrt(ratios[i])
 28 |                     w = sub_sample * \
 29 |                         anchor_scales[j] * np.sqrt(1.0 / ratios[i])
 30 |                     anchors[index, 0] = ctr_x - w / 2.0
 31 |                     anchors[index, 1] = ctr_y - h / 2.0
 32 |                     anchors[index, 2] = ctr_x + w / 2.0
 33 |                     anchors[index, 3] = ctr_y + h / 2.0
 34 |                     index += 1
 35 | 
 36 |     anchors = torch.from_numpy(anchors).float().to(device)
 37 |     return anchors
 38 | 
 39 | 
 40 | def generate_anchor_base(base_size=16, ratios=[0.5, 1, 2], anchor_scales=[8, 16, 32]):
 41 |     px = base_size / 2.
 42 |     py = base_size / 2.
 43 | 
 44 |     anchor_base = np.zeros((len(ratios) * len(anchor_scales), 4),
 45 |                            dtype=np.float32)
 46 | 
 47 |     for i in range(len(ratios)):
 48 |         for j in range(len(anchor_scales)):
 49 |             h = base_size * anchor_scales[j] * np.sqrt(ratios[i])
 50 |             w = base_size * anchor_scales[j] * np.sqrt(1. / ratios[i])
 51 | 
 52 |             index = i * len(anchor_scales) + j
 53 |             anchor_base[index, 0] = px - w / 2.
 54 |             anchor_base[index, 1] = py - h / 2.
 55 |             anchor_base[index, 2] = px + w / 2.
 56 |             anchor_base[index, 3] = py + h / 2.
 57 |     return anchor_base
 58 | 
 59 | 
 60 | def generator_anchors(img_size, sub_sample=16, ratios=[0.7, 1, 1.3], anchor_scales=[4, 8, 16], device="cuda"):
 61 |     feat_stride = sub_sample  # img_size // sub_sample
 62 | 
 63 |     anchor_base = generate_anchor_base(
 64 |         base_size=sub_sample, ratios=ratios, anchor_scales=anchor_scales)
 65 | 
 66 |     shift_y = np.arange(0, img_size * feat_stride, feat_stride)
 67 |     shift_x = np.arange(0, img_size * feat_stride, feat_stride)
 68 | 
 69 |     shift_x, shift_y = np.meshgrid(shift_x, shift_y)
 70 | 
 71 |     shift = np.stack((shift_x.ravel(),  shift_y.ravel(),
 72 |                       shift_x.ravel(), shift_y.ravel()), axis=1)
 73 | 
 74 |     A = anchor_base.shape[0]
 75 |     K = shift.shape[0]
 76 |     anchor = anchor_base.reshape((1, A, 4)) + \
 77 |         shift.reshape((1, K, 4)).transpose((1, 0, 2))
 78 |     anchor = anchor.reshape((K * A, 4)).astype(np.float32)
 79 |     anchor = torch.from_numpy(anchor).float().to(device)
 80 |     # print(anchor.shape)
 81 |     return anchor
 82 | 
 83 | 
 84 | def loc2bbox(anchors, locs):
 85 |     # Converting anchors and predicted location deltas to "actual" bounding-boxes.
 86 | 
 87 |     if anchors.shape[0] == 0:
 88 |         return torch.zeros((0, 4), dtype=locs.dtype)
 89 | 
 90 |     w = anchors[:, 2] - anchors[:, 0]
 91 |     h = anchors[:, 3] - anchors[:, 1]
 92 |     ctr_x = anchors[:, 0] + 0.5 * w
 93 |     ctr_y = anchors[:, 1] + 0.5 * h
 94 | 
 95 |     dx = locs[:, 0::4]
 96 |     dy = locs[:, 1::4]
 97 |     dw = locs[:, 2::4]
 98 |     dh = locs[:, 3::4]
 99 | 
100 |     ctr_x = dx * w[:, None] + ctr_x[:, None]
101 |     ctr_y = dy * h[:, None] + ctr_y[:, None]
102 |     w = torch.exp(dw) * w[:, None]
103 |     h = torch.exp(dh) * h[:, None]
104 | 
105 |     bbox = torch.zeros_like(locs)
106 |     bbox[:, 0::4] = ctr_x - 0.5 * w
107 |     bbox[:, 1::4] = ctr_y - 0.5 * h
108 |     bbox[:, 2::4] = ctr_x + 0.5 * w
109 |     bbox[:, 3::4] = ctr_y + 0.5 * h
110 |     return bbox
111 | 
112 | 
113 | def bbox2loc(src_bbox, dst_bbox, eps=1e-6, device="cuda"):
114 |     eps = torch.tensor(eps).float().to(device)
115 | 
116 |     width = src_bbox[:, 2] - src_bbox[:, 0]
117 |     height = src_bbox[:, 3] - src_bbox[:, 1]
118 |     ctr_x = src_bbox[:, 0] + 0.5 * width
119 |     ctr_y = src_bbox[:, 1] + 0.5 * height
120 | 
121 |     base_width = dst_bbox[:, 2] - dst_bbox[:, 0]
122 |     base_height = dst_bbox[:, 3] - dst_bbox[:, 1]
123 |     base_ctr_x = dst_bbox[:, 0] + 0.5 * base_width
124 |     base_ctr_y = dst_bbox[:, 1] + 0.5 * base_height
125 | 
126 |     height = torch.max(height, eps)
127 |     width = torch.max(width, eps)
128 | 
129 |     dy = (base_ctr_y - ctr_y) / height
130 |     dx = (base_ctr_x - ctr_x) / width
131 |     dh = torch.log(base_height / height)
132 |     dw = torch.log(base_width / width)
133 | 
134 |     locs = torch.stack((dx, dy, dw, dh), dim=1)
135 |     return locs
136 | 
137 | 
138 | def random_choice(arr, size, device="cuda"):
139 |     idx = torch.randperm(len(arr), device=device)
140 |     return arr[idx][0:size]
141 | 
142 | 
143 | def normal_init(module, mean, stddev):
144 |     module.weight.data.normal_(mean, stddev)
145 |     module.bias.data.zero_()
146 | 


--------------------------------------------------------------------------------
/computer_vision/object_detection/src/xml2json.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import json
  4 | import xml.etree.ElementTree as ET
  5 | import numpy as np
  6 | import cv2
  7 | 
  8 | 
  9 | def _isArrayLike(obj):
 10 |     return hasattr(obj, '__iter__') and hasattr(obj, '__len__')
 11 | 
 12 | 
 13 | class voc2coco:
 14 |     def __init__(self, devkit_path=None, year=None):
 15 |         self.classes = (
 16 |             '__background__',  # always index 0
 17 |             'aeroplane',
 18 |             'bicycle',
 19 |             'bird',
 20 |             'boat',
 21 |             'bottle',
 22 |             'bus',
 23 |             'car',
 24 |             'cat',
 25 |             'chair',
 26 |             'cow',
 27 |             'diningtable',
 28 |             'dog',
 29 |             'horse',
 30 |             'motorbike',
 31 |             'person',
 32 |             'pottedplant',
 33 |             'sheep',
 34 |             'sofa',
 35 |             'train',
 36 |             'tvmonitor')
 37 | 
 38 |         self.num_classes = len(self.classes)
 39 |         assert 'VOCdevkit' in devkit_path, 'VOCdevkit path does not exist: {}'.format(
 40 |             devkit_path)
 41 |         self.data_path = os.path.join(devkit_path, 'VOC' + year)
 42 |         self.annotaions_path = os.path.join(self.data_path, 'Annotations')
 43 |         self.image_set_path = os.path.join(self.data_path, 'ImageSets')
 44 |         self.year = year
 45 |         self.categories_to_ids_map = self._get_categories_to_ids_map()
 46 |         self.categories_msg = self._categories_msg_generator()
 47 | 
 48 |     def _load_annotation(self, ids=[]):
 49 |         """
 50 |         Load annotations by ids
 51 |         :param ids (int array) : get amms for idss
 52 |         :return image_msg 
 53 |          return annotation_msg
 54 |         """
 55 |         ids = ids if _isArrayLike(ids) else [ids]
 56 |         image_msg = []
 57 |         annotation_msg = []
 58 |         annotation_id = 1
 59 |         for index in ids:
 60 |             filename = '{:0>6}'.format(index)
 61 |             json_file = os.path.join(self.data_path, 'Segmentation_json',
 62 |                                      filename + '.json')
 63 |             #Labelme label file .json
 64 |             if os.path.exists(json_file):
 65 |                 img_file = os.path.join(self.data_path, 'JPEGImages',
 66 |                                         filename + '.jpg')
 67 |                 im = cv2.imread(img_file)
 68 |                 width = im.shape[1]
 69 |                 height = im.shape[0]
 70 |                 seg_data = json.load(open(json_file, 'r'))
 71 |                 assert type(seg_data) == type(
 72 |                     dict()), 'annotation file format {} not supported'.format(
 73 |                         type(seg_data))
 74 |                 for shape in seg_data['shapes']:
 75 |                     seg_msg = []
 76 |                     for point in shape['points']:
 77 |                         seg_msg += point
 78 |                     one_ann_msg = {
 79 |                         "segmentation": [seg_msg],
 80 |                         "area": self._area_computer(shape['points']),
 81 |                         "iscrowd": 0,
 82 |                         "image_id": int(index),
 83 |                         "bbox": self._points_to_mbr(shape['points']),
 84 |                         "category_id":
 85 |                         self.categories_to_ids_map[shape['label']],
 86 |                         "id": annotation_id,
 87 |                         "ignore": 0
 88 |                     }
 89 |                     annotation_msg.append(one_ann_msg)
 90 |                     annotation_id += 1
 91 |             #LabelImg label file .xml
 92 |             else:
 93 |                 xml_file = os.path.join(self.annotaions_path,
 94 |                                         filename + '.xml')
 95 |                 tree = ET.parse(xml_file)
 96 |                 size = tree.find('size')
 97 |                 objs = tree.findall('object')
 98 |                 width = size.find('width').text
 99 |                 height = size.find('height').text
100 |                 for obj in objs:
101 |                     bndbox = obj.find('bndbox')
102 |                     [xmin, xmax, ymin, ymax] = [
103 |                         int(bndbox.find('xmin').text) - 1,
104 |                         int(bndbox.find('xmax').text) - 1,
105 |                         int(bndbox.find('ymin').text) - 1,
106 |                         int(bndbox.find('ymax').text) -1
107 |                     ]
108 |                     if xmin < 0:
109 |                         xmin = 0
110 |                     if ymin < 0:
111 |                         ymin = 0
112 |                     bbox = [xmin, xmax, ymin, ymax]
113 |                     one_ann_msg = {
114 |                         "segmentation": self._bbox_to_mask(bbox),
115 |                         "area": self._bbox_area_computer(bbox),
116 |                         "iscrowd": 0,
117 |                         "image_id": int(index),
118 |                         "bbox": [xmin, ymin, xmax - xmin, ymax - ymin],
119 |                         "category_id": self.categories_to_ids_map[obj.find('name').text],
120 |                         "id": annotation_id,
121 |                         "ignore": 0
122 |                     }
123 |                     annotation_msg.append(one_ann_msg)
124 |                     annotation_id += 1
125 |             one_image_msg = {
126 |                 "file_name": filename + ".jpg",
127 |                 "height": int(height),
128 |                 "width": int(width),
129 |                 "id": int(index)
130 |             }
131 |             image_msg.append(one_image_msg)
132 |         return image_msg, annotation_msg
133 | 
134 |     def _bbox_to_mask(self, bbox):
135 |         """"
136 |         Generate mask by bbox
137 |         :param bbox e.g. [xmin,xmax,ymin,ymax]
138 |         :return mask [points]
139 |         """
140 |         assert len(bbox) == 4, 'Wrong bndbox!'
141 |         mask = [
142 |             bbox[0], bbox[2], bbox[0], bbox[3], bbox[1], bbox[3], bbox[1],
143 |             bbox[2]
144 |         ]
145 |         return [mask]
146 | 
147 |     def _bbox_area_computer(self, bbox):
148 |         """
149 |         Area computer
150 |         """
151 |         width = bbox[1] - bbox[0]
152 |         height = bbox[3] - bbox[2]
153 |         return width * height
154 | 
155 |     def _save_json_file(self, filename=None, data=None):
156 |         """
157 |         Save result in json
158 |         :param filename (str) : name of json file
159 |          param data           : coco format data
160 |         :return
161 |         """
162 |         json_path = os.path.join(self.data_path, 'cocoformatJson')
163 |         assert filename is not None, 'lack filename'
164 |         if os.path.exists(json_path) == False:
165 |             os.mkdir(json_path)
166 |         if not filename.endswith('.json'):
167 |             filename += '.json'
168 |         assert type(data) == type(
169 |             dict()), 'data format {} not supported'.format(type(data))
170 |         with open(os.path.join(json_path, filename), 'w') as f:
171 |             f.write(json.dumps(data))
172 | 
173 |     def _get_categories_to_ids_map(self):
174 |         """
175 |         Generate categories to ids map
176 |         """
177 |         return dict(zip(self.classes, range(self.num_classes)))
178 | 
179 |     def _get_all_indexs(self):
180 |         """
181 |         Get all images and annotations indexs
182 |         :param
183 |         :return ids (str array)
184 |         """
185 |         ids = []
186 |         for root, dirs, files in os.walk(self.annotaions_path, topdown=False):
187 |             for f in files:
188 |                 if str(f).endswith('.xml'):
189 |                     id = int(str(f).strip('.xml'))
190 |                     ids.append(id)
191 |         assert ids is not None, 'There is none xml file in {}'.format(
192 |             self.annotaions_path)
193 |         return ids
194 | 
195 |     def _get_indexs_by_image_set(self, image_set=None):
196 |         """
197 |         Get images and nnotations indexs in image_set
198 |         """
199 |         if image_set is None:
200 |             return self._get_all_indexs()
201 |         else:
202 |             image_set_path = os.path.join(self.image_set_path, 'Main',
203 |                                           image_set + '.txt')
204 |             assert os.path.exists(
205 |                 image_set_path), 'Path does not exist: {}'.format(
206 |                     image_set_path)
207 |             with open(image_set_path) as f:
208 |                 ids = [x.strip() for x in f.readlines()]
209 |             return ids
210 | 
211 |     def _points_to_mbr(self, points):
212 |         """
213 |         Transfer points to min bounding rectangle
214 |         :param: points (a list of lists)
215 |         :return: [x,y,width,height]
216 |         """
217 |         assert _isArrayLike(points), 'Points should be array like!'
218 |         x = [point[0] for point in points]
219 |         y = [point[1] for point in points]
220 |         assert len(x) == len(y), 'Wrong point quantity'
221 |         xmin, xmax, ymin, ymax = min(x), max(x), min(y), max(y)
222 |         height = ymax - ymin
223 |         width = xmax - xmin
224 |         return [xmin, ymin, width, height]
225 | 
226 |     def _categories_msg_generator(self):
227 |         categories_msg = []
228 |         for category in self.classes:
229 |             if category == '__background__':
230 |                 continue
231 |             one_categories_msg = {
232 |                 "supercategory": "none",
233 |                 "id": self.categories_to_ids_map[category],
234 |                 "name": category
235 |             }
236 |             categories_msg.append(one_categories_msg)
237 |         return categories_msg
238 | 
239 |     def _area_computer(self, points):
240 |         """
241 |         :param: one shape's points (int array array)
242 |         :return: shape's area
243 |         """
244 |         assert _isArrayLike(points), 'Points should be array like!'
245 |         tmp_contour = []
246 |         for point in points:
247 |             tmp_contour.append([point])
248 |         contour = np.array(tmp_contour, dtype=np.int32)
249 |         area = cv2.contourArea(contour)
250 |         return area
251 | 
252 |     def voc_to_coco_converter(self):
253 |         """
254 |         Convert voc dataset to coco dataset
255 |         """
256 |         img_sets = ['train', 'val', 'trainval']
257 | 
258 |         for img_set in img_sets:
259 |             ids = self._get_indexs_by_image_set(img_set)
260 |             img_msg, ann_msg = self._load_annotation(ids)
261 |             result_json = {
262 |                 "images": img_msg,
263 |                 "type": "instances",
264 |                 "annotations": ann_msg,
265 |                 "categories": self.categories_msg
266 |             }
267 | 
268 |             print(len(ids))
269 |             self._save_json_file('voc_' + self.year + '_' + img_set,
270 |                                  result_json)
271 | 
272 | 
273 | if __name__ == "__main__":
274 |     if len(sys.argv) <= 1:
275 |         print('2 arguments are needed')
276 |         print('Usage: python {0} $VOCdevkitPATH $year'.format(sys.argv[0]))
277 |         exit(1)
278 | 
279 |     devkit_path = sys.argv[1]
280 |     year = sys.argv[2]
281 | 
282 |     converter = voc2coco(devkit_path, year)
283 |     converter.voc_to_coco_converter()


--------------------------------------------------------------------------------
/computer_vision/semantic_segmentation/.gitignore:
--------------------------------------------------------------------------------
1 | .ipynb_checkpoints
2 | src/__pycache__


--------------------------------------------------------------------------------
/computer_vision/semantic_segmentation/README.md:
--------------------------------------------------------------------------------
 1 | # Semantic segmentation
 2 | 
 3 | Implementation of X end-to-end semantic segmentation methods:
 4 | 
 5 | - [FCN-8](https://arxiv.org/abs/1411.4038).
 6 | - [U-Net](https://arxiv.org/abs/1505.04597).
 7 | 
 8 | They are trained on PASCAL VOC 2012, which can be found [here](https://pjreddie.com/projects/pascal-voc-dataset-mirror/).
 9 | 
10 | ## FCN-8 Architecture
11 | 
12 | ![fcn](./data/fcn_8.png)
13 | 
14 | ## U-Net Architecture
15 | 
16 | - Note that since I use a pre-trained VGG-net as the encoder, the network is more like the [TernausNet](https://arxiv.org/abs/1801.05746).
17 | 
18 | ![unet](./data/ternaus_net.png)
19 | 


--------------------------------------------------------------------------------
/computer_vision/semantic_segmentation/data/.gitignore:
--------------------------------------------------------------------------------
1 | VOCdevkit/


--------------------------------------------------------------------------------
/computer_vision/semantic_segmentation/data/fcn_8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MrParosk/ml_playground/1202dd9b4341fc8ad5fbadade45ac93e76217303/computer_vision/semantic_segmentation/data/fcn_8.png


--------------------------------------------------------------------------------
/computer_vision/semantic_segmentation/data/ternaus_net.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MrParosk/ml_playground/1202dd9b4341fc8ad5fbadade45ac93e76217303/computer_vision/semantic_segmentation/data/ternaus_net.png


--------------------------------------------------------------------------------
/computer_vision/semantic_segmentation/src/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MrParosk/ml_playground/1202dd9b4341fc8ad5fbadade45ac93e76217303/computer_vision/semantic_segmentation/src/__init__.py


--------------------------------------------------------------------------------
/computer_vision/semantic_segmentation/src/dataset.py:
--------------------------------------------------------------------------------
  1 | from PIL import Image
  2 | import glob
  3 | import random
  4 | import numpy as np
  5 | 
  6 | import torch
  7 | from torch.utils.data import Dataset
  8 | import torchvision.transforms.functional as TVF
  9 | 
 10 | # Color corresponding to class, i.e. COLOR_2_INDEX[i] = CLASS_NAMES[i]
 11 | COLOR_2_INDEX = np.asarray([
 12 |     [0, 0, 0],
 13 |     [128, 0, 0],
 14 |     [0, 128, 0],
 15 |     [128, 128, 0],
 16 |     [0, 0, 128],
 17 |     [128, 0, 128],
 18 |     [0, 128, 128],
 19 |     [128, 128, 128],
 20 |     [64, 0, 0],
 21 |     [192, 0, 0],
 22 |     [64, 128, 0],
 23 |     [192, 128, 0],
 24 |     [64, 0, 128],
 25 |     [192, 0, 128],
 26 |     [64, 128, 128],
 27 |     [192, 128, 128],
 28 |     [0, 64, 0],
 29 |     [128, 64, 0],
 30 |     [0, 192, 0],
 31 |     [128, 192, 0],
 32 |     [0, 64, 128],
 33 |     ])
 34 | 
 35 | CLASS_NAMES = ["background", "aeroplane", "bicycle", "bird", "boat",
 36 |                "bottle", "bus", "car", "cat", "chair", "cow", "diningtable",
 37 |                "dog", "horse", "motorbike", "person", "potted-plant",
 38 |                "sheep", "sofa", "train", "tv/monitor"]
 39 | 
 40 | 
 41 | class PascalVoc(Dataset):
 42 |     def __init__(self, path, img_size, device="cuda"):
 43 |         self.seg_folder = "SegmentationClass/"
 44 |         self.img_folder = "JPEGImages/"
 45 |         self.path = path
 46 |         self.device = device
 47 | 
 48 |         self.segmentation_imgs = glob.glob(path + self.seg_folder + "*")
 49 |         self.img_size = img_size
 50 | 
 51 |     def __len__(self):
 52 |         return len(self.segmentation_imgs)
 53 | 
 54 |     def get_paths(self, idx):
 55 |         mask_path = self.segmentation_imgs[idx]
 56 | 
 57 |         file_name = mask_path.split("\\")[1]
 58 |         img_path = self.path + self.img_folder + file_name
 59 |         img_path = img_path.split(".")[0] + ".jpg"
 60 | 
 61 |         return (img_path, mask_path)
 62 |     
 63 |     def load_imgs(self, idx):
 64 |         img_path, mask_path = self.get_paths(idx)
 65 | 
 66 |         img = Image.open(img_path)
 67 |         img = img.resize((self.img_size, self.img_size))
 68 | 
 69 |         mask_img = Image.open(mask_path).convert("RGB")
 70 |         mask_img = mask_img.resize((self.img_size, self.img_size))
 71 | 
 72 |         return (img, mask_img)
 73 | 
 74 |     @staticmethod
 75 |     def create_label_mask(mask_img):
 76 |         mask = np.array(mask_img).astype(int)
 77 |         label_mask = np.zeros((mask.shape[0], mask.shape[1]), dtype=np.int16)
 78 | 
 79 |         for idx, label in enumerate(COLOR_2_INDEX):
 80 |             label_mask[np.where(np.all(mask == label, axis=-1))[:2]] = idx
 81 | 
 82 |         label_mask = label_mask.astype(int)
 83 |         return label_mask
 84 | 
 85 |     def __getitem__(self, idx):
 86 |         img, mask_img = self.load_imgs(idx)
 87 | 
 88 |         if random.random() > 0.5:
 89 |             img = TVF.hflip(img)
 90 |             mask_img = TVF.hflip(mask_img)
 91 | 
 92 |         mask_img = PascalVoc.create_label_mask(mask_img)
 93 |         mask_img = torch.from_numpy(mask_img).long()
 94 |         
 95 |         img = TVF.to_tensor(img)
 96 |         img = TVF.normalize(img,
 97 |                             mean=(0.485, 0.456, 0.406), 
 98 |                             std=(0.229, 0.224, 0.225)
 99 |                            )
100 | 
101 |         img = img.to(self.device)
102 |         mask_img = mask_img.to(self.device)
103 | 
104 |         return (img, mask_img)
105 | 


--------------------------------------------------------------------------------
/computer_vision/semantic_segmentation/src/utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def batch_mean_iou(predicted_batch_segmentation, gt_batch_segmentation):
 5 |     assert(len(predicted_batch_segmentation.shape) == 3)
 6 | 
 7 |     iou_values = []
 8 |     for i in range(predicted_batch_segmentation.shape[0]):
 9 |         iou = mean_iou(predicted_batch_segmentation[i, :, :], 
10 |                        gt_batch_segmentation[i, :, :])
11 | 
12 |         iou_values.append(iou)
13 |     
14 |     return np.mean(iou_values)
15 | 
16 | 
17 | def mean_iou(predicted_segmentation, gt_segmentation):
18 |     classes, num_classes  = union_classes(predicted_segmentation, gt_segmentation)
19 |     _, n_classes_gt = extract_classes(gt_segmentation)
20 |     eval_mask, gt_mask = extract_both_masks(predicted_segmentation, gt_segmentation, classes, num_classes)
21 | 
22 |     iou_list = list([0]) * num_classes
23 | 
24 |     for i, _ in enumerate(classes):
25 |         curr_eval_mask = eval_mask[i, :, :]
26 |         curr_gt_mask = gt_mask[i, :, :]
27 |  
28 |         if (np.sum(curr_eval_mask) == 0) or (np.sum(curr_gt_mask) == 0):
29 |             continue
30 | 
31 |         intersect = np.sum(np.logical_and(curr_eval_mask, curr_gt_mask))
32 |         union = np.sum(np.logical_or(curr_eval_mask, curr_gt_mask))
33 |         iou_list[i] = intersect / union
34 |  
35 |     mean_iou_value = np.sum(iou_list) / n_classes_gt
36 |     return mean_iou_value
37 | 
38 | 
39 | def extract_both_masks(predicted_segmentation, gt_segmentation, classes, num_classes):
40 |     predicted_mask = extract_masks(predicted_segmentation, classes, num_classes)
41 |     gt_mask   = extract_masks(gt_segmentation, classes, num_classes)
42 |     return predicted_mask, gt_mask
43 | 
44 | 
45 | def extract_classes(segmentation):
46 |     classes = np.unique(segmentation)
47 |     num_classes = len(classes)
48 | 
49 |     return classes, num_classes
50 | 
51 | 
52 | def union_classes(predicted_segmentation, gt_segmentation):
53 |     predicted_classes, _ = extract_classes(predicted_segmentation)
54 |     gt_classes, _   = extract_classes(gt_segmentation)
55 | 
56 |     classes = np.union1d(predicted_classes, gt_classes)
57 |     num_classes = len(classes)
58 | 
59 |     return classes, num_classes
60 | 
61 | 
62 | def extract_masks(segmentation, classes, num_classes):
63 |     h, w  = segmentation_size(segmentation)
64 |     masks = np.zeros((num_classes, h, w))
65 | 
66 |     for i, c in enumerate(classes):
67 |         masks[i, :, :] = segmentation == c
68 | 
69 |     return masks
70 | 
71 | 
72 | def segmentation_size(segmentation):
73 |     height = segmentation.shape[0]
74 |     width  = segmentation.shape[1]
75 | 
76 |     return height, width
77 | 
78 | 
79 | if __name__ == "__main__":
80 |     # Test cases
81 |     
82 |     segm = np.array([[1,0,0,0,0], [0,0,0,0,0]])
83 |     gt = np.array([[0,0,0,0,0], [0,0,0,0,0]])
84 |     res = mean_iou(segm, gt)
85 |     assert(np.allclose(res, 0.9))
86 | 
87 |     segm = np.array([[0,0,0,0,0], [0,0,0,0,0]])
88 |     gt = np.array([[1,2,0,0,0], [0,0,0,0,0]])
89 |     res = mean_iou(segm, gt)
90 |     assert(np.allclose(res, np.mean([8.0/10.0, 0, 0])))
91 | 
92 |     np.random.seed(42)
93 |     segm = np.random.randint(2, size=(2, 100, 100))
94 |     gt = np.random.randint(2, size=(2, 100, 100))
95 |     _ = batch_mean_iou(segm, gt)
96 | 


--------------------------------------------------------------------------------
/natural_language_processing/sentiment_analysis/.gitignore:
--------------------------------------------------------------------------------
1 | .ipynb_checkpoints/
2 | 


--------------------------------------------------------------------------------
/natural_language_processing/sentiment_analysis/README.md:
--------------------------------------------------------------------------------
1 | # Sentiment-analysis using different pre-trained methods
2 | 
3 | Doing sentiment analysis on the Amazon Fine Food Reviews dataset. The data can be found [here](https://www.kaggle.com/snap/amazon-fine-food-reviews). The data is converted into a binary classification problem (negative/positive review).
4 | 
5 | The following pre-trained strategies were tested:
6 | 
7 | - [Word embeddings](https://nlp.stanford.edu/pubs/glove.pdf).
8 | - [BERT](https://arxiv.org/abs/1810.04805).
9 | 


--------------------------------------------------------------------------------
/natural_language_processing/sentiment_analysis/bert.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import pandas as pd\n",
 10 |     "import numpy as np\n",
 11 |     "from sklearn.model_selection import train_test_split\n",
 12 |     "from transformers import BertTokenizer, BertModel\n",
 13 |     "import torch\n",
 14 |     "import torch.nn as nn\n",
 15 |     "from torch.utils.data import Dataset, DataLoader\n",
 16 |     "import torch.optim as optim\n",
 17 |     "from bs4 import BeautifulSoup "
 18 |    ]
 19 |   },
 20 |   {
 21 |    "cell_type": "code",
 22 |    "execution_count": 2,
 23 |    "metadata": {},
 24 |    "outputs": [],
 25 |    "source": [
 26 |     "DATA_PATH = \"data/\"\n",
 27 |     "pretrained_type = 'bert-base-uncased'\n",
 28 |     "seed = 42\n",
 29 |     "\n",
 30 |     "test_size = 0.1\n",
 31 |     "device = \"cuda\""
 32 |    ]
 33 |   },
 34 |   {
 35 |    "cell_type": "code",
 36 |    "execution_count": 3,
 37 |    "metadata": {},
 38 |    "outputs": [],
 39 |    "source": [
 40 |     "df = pd.read_csv(DATA_PATH + \"Reviews.csv\")"
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "code",
 45 |    "execution_count": 4,
 46 |    "metadata": {},
 47 |    "outputs": [],
 48 |    "source": [
 49 |     "max_length_sequence = 256"
 50 |    ]
 51 |   },
 52 |   {
 53 |    "cell_type": "code",
 54 |    "execution_count": 5,
 55 |    "metadata": {},
 56 |    "outputs": [],
 57 |    "source": [
 58 |     "text = df[\"Text\"].values.tolist()\n",
 59 |     "labels = df[\"Score\"].values.astype(int)"
 60 |    ]
 61 |   },
 62 |   {
 63 |    "cell_type": "code",
 64 |    "execution_count": 6,
 65 |    "metadata": {},
 66 |    "outputs": [],
 67 |    "source": [
 68 |     "# Since we would like to do positive / negative sentiment prediction we will remove review value 3 since\n",
 69 |     "# it can be viewed as neutral\n",
 70 |     "\n",
 71 |     "text = [text[i] for i in range(len(text)) if labels[i] != 3]\n",
 72 |     "labels = np.array([labels[i] for i in range(len(labels)) if labels[i] != 3])\n",
 73 |     "labels = (labels > 3).astype(int) # Binary classification"
 74 |    ]
 75 |   },
 76 |   {
 77 |    "cell_type": "code",
 78 |    "execution_count": 7,
 79 |    "metadata": {},
 80 |    "outputs": [],
 81 |    "source": [
 82 |     "# Sub-sampling since it would take too long to train otherwise on my computer\n",
 83 |     "num_samples = 10000\n",
 84 |     "\n",
 85 |     "np.random.seed(seed)\n",
 86 |     "idx = np.random.choice(np.arange(len(text)), size=num_samples, replace=False)\n",
 87 |     "text = [text[i] for i in idx]\n",
 88 |     "labels = labels[idx]"
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "code",
 93 |    "execution_count": 8,
 94 |    "metadata": {},
 95 |    "outputs": [],
 96 |    "source": [
 97 |     "text_train, text_val, labels_train, labels_val = train_test_split(text, labels, test_size=test_size, random_state=seed)"
 98 |    ]
 99 |   },
100 |   {
101 |    "cell_type": "code",
102 |    "execution_count": 9,
103 |    "metadata": {},
104 |    "outputs": [],
105 |    "source": [
106 |     "class Reviews(Dataset):\n",
107 |     "    def __init__(self, text, labels):\n",
108 |     "        self.text = text\n",
109 |     "        self.labels = torch.tensor(labels, dtype=torch.float)\n",
110 |     "        self.len = len(text)\n",
111 |     "        self.tokenizer = BertTokenizer.from_pretrained(pretrained_type)\n",
112 |     "\n",
113 |     "    def __len__(self):\n",
114 |     "        return self.len\n",
115 |     "\n",
116 |     "    def __getitem__(self, idx):\n",
117 |     "        txt = BeautifulSoup(self.text[idx]).get_text().lower() \n",
118 |     "        tokens = self.tokenizer.encode(txt, add_special_tokens=True, max_length=max_length_sequence)\n",
119 |     "        tokens = torch.tensor(tokens, dtype=torch.long)\n",
120 |     "        label = self.labels[idx]\n",
121 |     "        return tokens, label\n",
122 |     "\n",
123 |     "def collate_fn(batch):\n",
124 |     "    labels = torch.tensor([b[1] for b in batch])\n",
125 |     "\n",
126 |     "    lengths = [len(b[0]) for b in batch]\n",
127 |     "    max_length = min([max(lengths), max_length_sequence])\n",
128 |     "\n",
129 |     "    attention_mask = torch.zeros((len(batch), max_length), dtype=torch.int)\n",
130 |     "    idx_tensor = torch.zeros((len(batch), max_length), dtype=torch.long)\n",
131 |     "\n",
132 |     "    for i in range(len(batch)):\n",
133 |     "        batch_len = lengths[i]\n",
134 |     "        batch_len = min([max_length_sequence, batch_len])\n",
135 |     "\n",
136 |     "        attention_mask[i, 0:batch_len] = 1\n",
137 |     "        idx_tensor[i, 0:batch_len] = batch[i][0][0:batch_len]\n",
138 |     "\n",
139 |     "    return idx_tensor, attention_mask, labels"
140 |    ]
141 |   },
142 |   {
143 |    "cell_type": "code",
144 |    "execution_count": 10,
145 |    "metadata": {},
146 |    "outputs": [],
147 |    "source": [
148 |     "num_workers = 4\n",
149 |     "batch_size = 7\n",
150 |     "\n",
151 |     "train_dataset = Reviews(text_train, labels_train)\n",
152 |     "train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True,\n",
153 |     "                              num_workers=num_workers, collate_fn=collate_fn)\n",
154 |     "\n",
155 |     "val_dataset = Reviews(text_val, labels_val)\n",
156 |     "val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False,\n",
157 |     "                              num_workers=num_workers, collate_fn=collate_fn)"
158 |    ]
159 |   },
160 |   {
161 |    "cell_type": "code",
162 |    "execution_count": 11,
163 |    "metadata": {},
164 |    "outputs": [],
165 |    "source": [
166 |     "class SentimentClassifier(nn.Module):\n",
167 |     "    def __init__(self, encoder_dim=768):\n",
168 |     "        super().__init__()\n",
169 |     "        self.bert_model = BertModel.from_pretrained(pretrained_type)\n",
170 |     "        self.linear = nn.Linear(encoder_dim, 1)\n",
171 |     "\n",
172 |     "    def forward(self, x, attention_mask):\n",
173 |     "        output = self.bert_model(x, attention_mask=attention_mask)[1]\n",
174 |     "        output = self.linear(output)\n",
175 |     "        return output[:, 0]"
176 |    ]
177 |   },
178 |   {
179 |    "cell_type": "code",
180 |    "execution_count": 12,
181 |    "metadata": {},
182 |    "outputs": [],
183 |    "source": [
184 |     "torch.manual_seed(seed)\n",
185 |     "model = SentimentClassifier()\n",
186 |     "model = model.to(device)"
187 |    ]
188 |   },
189 |   {
190 |    "cell_type": "code",
191 |    "execution_count": 13,
192 |    "metadata": {},
193 |    "outputs": [],
194 |    "source": [
195 |     "epochs = 1\n",
196 |     "lr = 1e-5\n",
197 |     "\n",
198 |     "optimizer = optim.Adam(model.parameters(), lr=lr)"
199 |    ]
200 |   },
201 |   {
202 |    "cell_type": "code",
203 |    "execution_count": 14,
204 |    "metadata": {},
205 |    "outputs": [],
206 |    "source": [
207 |     "loss_fct = nn.BCEWithLogitsLoss()"
208 |    ]
209 |   },
210 |   {
211 |    "cell_type": "code",
212 |    "execution_count": 15,
213 |    "metadata": {
214 |     "scrolled": false
215 |    },
216 |    "outputs": [
217 |     {
218 |      "name": "stdout",
219 |      "output_type": "stream",
220 |      "text": [
221 |       "----------- Epoch 0 -----------\n",
222 |       "Train loss: 0.176774\n",
223 |       "Validation loss: 0.139315\n",
224 |       "Validation accuracy: 0.944888\n"
225 |      ]
226 |     }
227 |    ],
228 |    "source": [
229 |     "torch.manual_seed(seed)\n",
230 |     "for epoch in range(epochs):\n",
231 |     "    train_loss, val_loss, val_acc = 0.0, 0.0, 0.0\n",
232 |     "\n",
233 |     "    model.train()\n",
234 |     "    for _, (idx_tensor, attention_mask, labels) in enumerate(train_dataloader):\n",
235 |     "        idx_tensor, attention_mask, labels = idx_tensor.to(device), attention_mask.to(device), labels.to(device)\n",
236 |     "        optimizer.zero_grad()\n",
237 |     "\n",
238 |     "        output = model(idx_tensor, attention_mask)\n",
239 |     "\n",
240 |     "        batch_loss = loss_fct(output, labels)        \n",
241 |     "        batch_loss.backward()\n",
242 |     "        optimizer.step()\n",
243 |     "\n",
244 |     "        train_loss += batch_loss.detach().cpu().numpy()\n",
245 |     "\n",
246 |     "    model.eval()\n",
247 |     "    with torch.no_grad():\n",
248 |     "        for _, (idx_tensor, attention_mask, labels) in enumerate(val_dataloader):\n",
249 |     "            idx_tensor, attention_mask, labels = idx_tensor.to(device), attention_mask.to(device), labels.to(device)\n",
250 |     "\n",
251 |     "            output = model(idx_tensor, attention_mask)\n",
252 |     "            batch_loss = loss_fct(output, labels)\n",
253 |     "            val_loss += batch_loss.cpu().numpy()\n",
254 |     "\n",
255 |     "            y_hat = (torch.sigmoid(output) > 0.5).long()\n",
256 |     "            batch_acc = (y_hat == labels).float().mean()\n",
257 |     "            val_acc += batch_acc.cpu().numpy()\n",
258 |     "\n",
259 |     "    train_loss = np.round(train_loss / len(train_dataloader), 6)\n",
260 |     "    val_loss = np.round(val_loss / len(val_dataloader), 6)\n",
261 |     "    val_acc = np.round(val_acc / len(val_dataloader), 6)\n",
262 |     "\n",
263 |     "    print(f\"----------- Epoch {epoch} -----------\")\n",
264 |     "    print(f\"Train loss: {train_loss}\")\n",
265 |     "    print(f\"Validation loss: {val_loss}\")\n",
266 |     "    print(f\"Validation accuracy: {val_acc}\")"
267 |    ]
268 |   }
269 |  ],
270 |  "metadata": {
271 |   "kernelspec": {
272 |    "display_name": "Python 3",
273 |    "language": "python",
274 |    "name": "python3"
275 |   },
276 |   "language_info": {
277 |    "codemirror_mode": {
278 |     "name": "ipython",
279 |     "version": 3
280 |    },
281 |    "file_extension": ".py",
282 |    "mimetype": "text/x-python",
283 |    "name": "python",
284 |    "nbconvert_exporter": "python",
285 |    "pygments_lexer": "ipython3",
286 |    "version": "3.7.5"
287 |   }
288 |  },
289 |  "nbformat": 4,
290 |  "nbformat_minor": 2
291 | }
292 | 


--------------------------------------------------------------------------------
/natural_language_processing/sentiment_analysis/data/.gitignore:
--------------------------------------------------------------------------------
1 | Reviews.csv
2 | amazon-fine-food-reviews.zip
3 | 


--------------------------------------------------------------------------------
/natural_language_processing/sentiment_analysis/word_embeddings.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import spacy\n",
 10 |     "from bs4 import BeautifulSoup\n",
 11 |     "import numpy as np\n",
 12 |     "import pandas as pd\n",
 13 |     "from sklearn.model_selection import train_test_split\n",
 14 |     "import torch\n",
 15 |     "import torch.nn as nn\n",
 16 |     "from torch.utils.data import Dataset, DataLoader\n",
 17 |     "import torch.optim as optim"
 18 |    ]
 19 |   },
 20 |   {
 21 |    "cell_type": "code",
 22 |    "execution_count": 2,
 23 |    "metadata": {},
 24 |    "outputs": [],
 25 |    "source": [
 26 |     "DATA_PATH = \"data/\"\n",
 27 |     "seed = 42\n",
 28 |     "device = \"cuda\"\n",
 29 |     "\n",
 30 |     "embedding_dim = 300\n",
 31 |     "test_size = 0.1\n",
 32 |     "max_length_sequence = 128"
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "code",
 37 |    "execution_count": 3,
 38 |    "metadata": {},
 39 |    "outputs": [],
 40 |    "source": [
 41 |     "df = pd.read_csv(DATA_PATH + \"Reviews.csv\")"
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "code",
 46 |    "execution_count": 4,
 47 |    "metadata": {},
 48 |    "outputs": [],
 49 |    "source": [
 50 |     "text = df[\"Text\"].values.tolist()\n",
 51 |     "labels = df[\"Score\"].values.astype(int)"
 52 |    ]
 53 |   },
 54 |   {
 55 |    "cell_type": "code",
 56 |    "execution_count": 5,
 57 |    "metadata": {},
 58 |    "outputs": [],
 59 |    "source": [
 60 |     "# Since we would like to do positive / negative sentiment prediction we will remove review value 3 since\n",
 61 |     "# it can be viewed as neutral\n",
 62 |     "\n",
 63 |     "text = [text[i] for i in range(len(text)) if labels[i] != 3]\n",
 64 |     "labels = np.array([labels[i] for i in range(len(labels)) if labels[i] != 3])\n",
 65 |     "labels = (labels > 3).astype(int) # Binary classification"
 66 |    ]
 67 |   },
 68 |   {
 69 |    "cell_type": "code",
 70 |    "execution_count": 6,
 71 |    "metadata": {},
 72 |    "outputs": [],
 73 |    "source": [
 74 |     "# Sub-sampling since it would take too long to train otherwise on my computer\n",
 75 |     "num_samples = 10000\n",
 76 |     "\n",
 77 |     "np.random.seed(seed)\n",
 78 |     "idx = np.random.choice(np.arange(len(text)), size=num_samples, replace=False)\n",
 79 |     "text = [text[i] for i in idx]\n",
 80 |     "labels = labels[idx]"
 81 |    ]
 82 |   },
 83 |   {
 84 |    "cell_type": "code",
 85 |    "execution_count": 7,
 86 |    "metadata": {},
 87 |    "outputs": [],
 88 |    "source": [
 89 |     "text_train, text_val, labels_train, labels_val = train_test_split(text, labels, test_size=test_size, random_state=seed)"
 90 |    ]
 91 |   },
 92 |   {
 93 |    "cell_type": "code",
 94 |    "execution_count": 8,
 95 |    "metadata": {},
 96 |    "outputs": [],
 97 |    "source": [
 98 |     "class Reviews(Dataset):\n",
 99 |     "    def __init__(self, text, labels):\n",
100 |     "        self.len = len(text)\n",
101 |     "        self.tokenizer = spacy.load(\"en_core_web_md\")\n",
102 |     "        self.vectors = self.text_to_vectors(text)\n",
103 |     "        self.labels = torch.tensor(labels, dtype=torch.float)\n",
104 |     "\n",
105 |     "    def text_to_vectors(self, text):\n",
106 |     "        vector_list = []\n",
107 |     "        \n",
108 |     "        for txt in text:\n",
109 |     "            txt = BeautifulSoup(txt).get_text().lower() \n",
110 |     "            tokens = self.tokenizer(txt)\n",
111 |     "\n",
112 |     "            vectors = []\n",
113 |     "            for token in tokens:\n",
114 |     "                if not token.is_oov:\n",
115 |     "                    vectors.append(token.vector)\n",
116 |     "\n",
117 |     "            vectors = np.vstack(vectors)\n",
118 |     "            vectors = torch.from_numpy(vectors).float()\n",
119 |     "            vector_list.append(vectors)\n",
120 |     "        return vector_list\n",
121 |     "\n",
122 |     "    def __len__(self):\n",
123 |     "        return self.len\n",
124 |     "\n",
125 |     "    def __getitem__(self, idx):\n",
126 |     "        vector = self.vectors[idx]\n",
127 |     "        label = self.labels[idx]\n",
128 |     "        return vector, label\n",
129 |     "\n",
130 |     "\n",
131 |     "def collate_fn(batch):\n",
132 |     "    labels = torch.tensor([b[1] for b in batch])\n",
133 |     "\n",
134 |     "    lengths = [len(b[0]) for b in batch]\n",
135 |     "    max_length = min([max(lengths), max_length_sequence])\n",
136 |     "\n",
137 |     "    vector_tensor = torch.zeros((len(batch), max_length, embedding_dim))\n",
138 |     "    mask = torch.zeros((len(batch), max_length, 1), dtype=torch.int)\n",
139 |     "\n",
140 |     "    for i in range(len(batch)):\n",
141 |     "        batch_len = lengths[i]\n",
142 |     "        batch_len = min([max_length_sequence, batch_len])\n",
143 |     "        \n",
144 |     "        mask[i, (max_length - batch_len):, :] = 1\n",
145 |     "        vector_tensor[i, (max_length - batch_len):, :] = batch[i][0][0:batch_len, :]\n",
146 |     "\n",
147 |     "    return vector_tensor, mask, labels"
148 |    ]
149 |   },
150 |   {
151 |    "cell_type": "code",
152 |    "execution_count": 9,
153 |    "metadata": {},
154 |    "outputs": [],
155 |    "source": [
156 |     "num_workers = 4\n",
157 |     "batch_size = 128\n",
158 |     "\n",
159 |     "train_dataset = Reviews(text_train, labels_train)\n",
160 |     "train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True,\n",
161 |     "                              num_workers=num_workers, collate_fn=collate_fn)\n",
162 |     "\n",
163 |     "val_dataset = Reviews(text_val, labels_val)\n",
164 |     "val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False,\n",
165 |     "                              num_workers=num_workers, collate_fn=collate_fn)"
166 |    ]
167 |   },
168 |   {
169 |    "cell_type": "code",
170 |    "execution_count": 10,
171 |    "metadata": {},
172 |    "outputs": [],
173 |    "source": [
174 |     "class Model(nn.Module):\n",
175 |     "    def __init__(self, hidden_dim=128, p=0.2):\n",
176 |     "        super().__init__()\n",
177 |     "        \n",
178 |     "        self.lstm = nn.LSTM(input_size=embedding_dim, hidden_size=hidden_dim, batch_first=True)\n",
179 |     "        self.dropout = nn.Dropout(p=p)\n",
180 |     "        self.linear = nn.Linear(3 * hidden_dim, 1)\n",
181 |     "\n",
182 |     "    def forward(self, x, mask):\n",
183 |     "        output, _ = self.lstm(x)\n",
184 |     "        mask = mask.repeat(1, 1, output.shape[-1])\n",
185 |     "        \n",
186 |     "        # Concat the last hidden output, mean & max over all hidden outputs.\n",
187 |     "        output = torch.cat([\n",
188 |     "            torch.sum(mask * output, dim=1) / torch.sum(mask, dim=1),\n",
189 |     "            torch.max(mask * output, dim=1)[0], # Assuming 0 won't be max\n",
190 |     "            output[:, -1, :]\n",
191 |     "        ], dim=1)\n",
192 |     "        \n",
193 |     "        output = self.dropout(output)\n",
194 |     "        output = self.linear(output)\n",
195 |     "        return output[:, 0]"
196 |    ]
197 |   },
198 |   {
199 |    "cell_type": "code",
200 |    "execution_count": 11,
201 |    "metadata": {},
202 |    "outputs": [],
203 |    "source": [
204 |     "torch.manual_seed(seed)\n",
205 |     "model = Model()\n",
206 |     "model = model.to(device)"
207 |    ]
208 |   },
209 |   {
210 |    "cell_type": "code",
211 |    "execution_count": 12,
212 |    "metadata": {},
213 |    "outputs": [],
214 |    "source": [
215 |     "epochs = 5\n",
216 |     "lr = 1e-3\n",
217 |     "wd = 1e-4\n",
218 |     "\n",
219 |     "optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=wd)"
220 |    ]
221 |   },
222 |   {
223 |    "cell_type": "code",
224 |    "execution_count": 13,
225 |    "metadata": {},
226 |    "outputs": [],
227 |    "source": [
228 |     "loss_fct = nn.BCEWithLogitsLoss()"
229 |    ]
230 |   },
231 |   {
232 |    "cell_type": "code",
233 |    "execution_count": 14,
234 |    "metadata": {},
235 |    "outputs": [
236 |     {
237 |      "name": "stdout",
238 |      "output_type": "stream",
239 |      "text": [
240 |       "----------- Epoch 0 -----------\n",
241 |       "Train loss: 0.428948\n",
242 |       "Validation loss: 0.387902\n",
243 |       "Validation accuracy: 0.837515\n",
244 |       "----------- Epoch 1 -----------\n",
245 |       "Train loss: 0.32849\n",
246 |       "Validation loss: 0.312759\n",
247 |       "Validation accuracy: 0.857948\n",
248 |       "----------- Epoch 2 -----------\n",
249 |       "Train loss: 0.271902\n",
250 |       "Validation loss: 0.269043\n",
251 |       "Validation accuracy: 0.887019\n",
252 |       "----------- Epoch 3 -----------\n",
253 |       "Train loss: 0.235976\n",
254 |       "Validation loss: 0.260857\n",
255 |       "Validation accuracy: 0.878681\n",
256 |       "----------- Epoch 4 -----------\n",
257 |       "Train loss: 0.222214\n",
258 |       "Validation loss: 0.236063\n",
259 |       "Validation accuracy: 0.896484\n"
260 |      ]
261 |     }
262 |    ],
263 |    "source": [
264 |     "torch.manual_seed(seed)\n",
265 |     "for epoch in range(epochs):\n",
266 |     "    train_loss, val_loss, val_acc = 0.0, 0.0, 0.0\n",
267 |     "\n",
268 |     "    model.train()\n",
269 |     "    for _, (vectors_tensor, mask, labels) in enumerate(train_dataloader):\n",
270 |     "        vectors_tensor, mask, labels = vectors_tensor.to(device), mask.to(device), labels.to(device)\n",
271 |     "        optimizer.zero_grad()\n",
272 |     "\n",
273 |     "        output = model(vectors_tensor, mask)\n",
274 |     "\n",
275 |     "        batch_loss = loss_fct(output, labels)        \n",
276 |     "        batch_loss.backward()\n",
277 |     "        optimizer.step()\n",
278 |     "\n",
279 |     "        train_loss += batch_loss.detach().cpu().numpy()\n",
280 |     "\n",
281 |     "    model.eval()\n",
282 |     "    with torch.no_grad():\n",
283 |     "        for _, (vectors_tensor, mask, labels) in enumerate(val_dataloader):\n",
284 |     "            vectors_tensor, mask, labels = vectors_tensor.to(device), mask.to(device), labels.to(device)\n",
285 |     "\n",
286 |     "            output = model(vectors_tensor, mask)\n",
287 |     "            batch_loss = loss_fct(output, labels)\n",
288 |     "            val_loss += batch_loss.cpu().numpy()\n",
289 |     "\n",
290 |     "            y_hat = (torch.sigmoid(output) > 0.5).long()\n",
291 |     "            batch_acc = (y_hat == labels).float().mean()\n",
292 |     "            val_acc += batch_acc.cpu().numpy()\n",
293 |     "\n",
294 |     "    train_loss = np.round(train_loss / len(train_dataloader), 6)\n",
295 |     "    val_loss = np.round(val_loss / len(val_dataloader), 6)\n",
296 |     "    val_acc = np.round(val_acc / len(val_dataloader), 6)\n",
297 |     "\n",
298 |     "    print(f\"----------- Epoch {epoch} -----------\")\n",
299 |     "    print(f\"Train loss: {train_loss}\")\n",
300 |     "    print(f\"Validation loss: {val_loss}\")\n",
301 |     "    print(f\"Validation accuracy: {val_acc}\")"
302 |    ]
303 |   },
304 |   {
305 |    "cell_type": "code",
306 |    "execution_count": null,
307 |    "metadata": {},
308 |    "outputs": [],
309 |    "source": []
310 |   }
311 |  ],
312 |  "metadata": {
313 |   "kernelspec": {
314 |    "display_name": "Python 3",
315 |    "language": "python",
316 |    "name": "python3"
317 |   },
318 |   "language_info": {
319 |    "codemirror_mode": {
320 |     "name": "ipython",
321 |     "version": 3
322 |    },
323 |    "file_extension": ".py",
324 |    "mimetype": "text/x-python",
325 |    "name": "python",
326 |    "nbconvert_exporter": "python",
327 |    "pygments_lexer": "ipython3",
328 |    "version": "3.7.4"
329 |   }
330 |  },
331 |  "nbformat": 4,
332 |  "nbformat_minor": 2
333 | }
334 | 


--------------------------------------------------------------------------------
/natural_language_processing/speech_classification/.gitignore:
--------------------------------------------------------------------------------
1 | .ipynb_checkpoints


--------------------------------------------------------------------------------
/natural_language_processing/speech_classification/README.md:
--------------------------------------------------------------------------------
1 | # Speech classification
2 | 
3 | Training a model to classify one second speech commands (30 classes). Models tested:
4 | 
5 | - RNN (GRU).
6 | 
7 | The data can be downloaded [here](https://www.kaggle.com/c/tensorflow-speech-recognition-challenge/).


--------------------------------------------------------------------------------
/natural_language_processing/speech_classification/data/.gitignore:
--------------------------------------------------------------------------------
1 | train.7z
2 | train/


--------------------------------------------------------------------------------
/natural_language_processing/text_synthesis/.gitignore:
--------------------------------------------------------------------------------
1 | .ipynb_checkpoints


--------------------------------------------------------------------------------
/natural_language_processing/text_synthesis/README.md:
--------------------------------------------------------------------------------
1 | # Text synthesisSynthesizing text (Nietzsche). The data can be downloaded [here](https://s3.amazonaws.com/text-datasets/nietzsche.txt).


--------------------------------------------------------------------------------
/natural_language_processing/text_synthesis/data/.gitignore:
--------------------------------------------------------------------------------
1 | nietzsche.txt


--------------------------------------------------------------------------------
/natural_language_processing/text_synthesis/text_synthesis.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import numpy as np\n",
 10 |     "import random\n",
 11 |     "import torch\n",
 12 |     "import torch.optim as optim\n",
 13 |     "from torch.utils.data import Dataset, DataLoader\n",
 14 |     "import torch.nn as nn\n",
 15 |     "import torch.nn.functional as F"
 16 |    ]
 17 |   },
 18 |   {
 19 |    "cell_type": "code",
 20 |    "execution_count": 2,
 21 |    "metadata": {},
 22 |    "outputs": [],
 23 |    "source": [
 24 |     "PATH = \"data/\"\n",
 25 |     "device = \"cuda\"\n",
 26 |     "file_name = f\"{PATH}nietzsche.txt\"\n",
 27 |     "seed = 42"
 28 |    ]
 29 |   },
 30 |   {
 31 |    "cell_type": "code",
 32 |    "execution_count": 3,
 33 |    "metadata": {},
 34 |    "outputs": [],
 35 |    "source": [
 36 |     "class CharDataset(Dataset):\n",
 37 |     "    def __init__(self, file_name, seq_length):\n",
 38 |     "        with open(file_name, \"r\") as file:\n",
 39 |     "            self.text = file.read()\n",
 40 |     "        \n",
 41 |     "        self.vocab = sorted(list(set(self.text)))\n",
 42 |     "        self.vocab_size = len(self.vocab)\n",
 43 |     "        \n",
 44 |     "        self.vocab_indices = {c: i for i, c in enumerate(self.vocab)}\n",
 45 |     "        self.indices_vocab = {i: c for i, c in enumerate(self.vocab)}\n",
 46 |     "        self.idx = [self.vocab_indices[c] for c in self.text]\n",
 47 |     "        \n",
 48 |     "        x = np.stack([self.idx[i] for i in range(len(self.idx) - 1)])\n",
 49 |     "        y = np.stack([self.idx[i+1] for i in range(len(self.idx) - 1)])\n",
 50 |     "        self.x = torch.from_numpy(x).long().to(device)\n",
 51 |     "        self.y = torch.from_numpy(y).long().to(device)\n",
 52 |     "        \n",
 53 |     "        self.seq_length = seq_length\n",
 54 |     "\n",
 55 |     "    def __len__(self):\n",
 56 |     "        return int(len(self.text) / self.seq_length)\n",
 57 |     "\n",
 58 |     "    def random_sequence(self):\n",
 59 |     "        start_index = random.randint(0, len(self.x) - self.seq_length)\n",
 60 |     "        end_index = start_index + self.seq_length\n",
 61 |     "        return self.x[start_index:end_index], self.y[start_index:end_index]\n",
 62 |     "\n",
 63 |     "    def __getitem__(self, idx):\n",
 64 |     "        return self.random_sequence()"
 65 |    ]
 66 |   },
 67 |   {
 68 |    "cell_type": "code",
 69 |    "execution_count": 4,
 70 |    "metadata": {},
 71 |    "outputs": [],
 72 |    "source": [
 73 |     "class RNN(nn.Module):\n",
 74 |     "    def __init__(self, input_size, hidden_size, output_size):\n",
 75 |     "        super().__init__()\n",
 76 |     "        self.input_size = input_size\n",
 77 |     "        self.hidden_size = hidden_size\n",
 78 |     "        self.output_size = output_size\n",
 79 |     "        self.n_layers = 1\n",
 80 |     "        \n",
 81 |     "        self.encoder = nn.Embedding(self.input_size, self.hidden_size)\n",
 82 |     "        self.gru = nn.GRU(hidden_size, hidden_size, self.n_layers)\n",
 83 |     "        self.decoder = nn.Linear(self.hidden_size, self.output_size)\n",
 84 |     "    \n",
 85 |     "    def forward(self, input, hidden):\n",
 86 |     "        bs = input.shape[0]\n",
 87 |     "        input = self.encoder(input.view(1, -1))\n",
 88 |     "        output, hidden = self.gru(input.view(1, bs, -1), hidden)\n",
 89 |     "        output = self.decoder(output.view(bs, -1))\n",
 90 |     "        output = F.log_softmax(output, dim=1)\n",
 91 |     "        return output, hidden\n",
 92 |     "\n",
 93 |     "    def init_hidden(self, bs):\n",
 94 |     "        return torch.tensor(torch.zeros(self.n_layers, bs, self.hidden_size)).to(device)"
 95 |    ]
 96 |   },
 97 |   {
 98 |    "cell_type": "code",
 99 |    "execution_count": 5,
100 |    "metadata": {},
101 |    "outputs": [],
102 |    "source": [
103 |     "batch_size = 256\n",
104 |     "seq_length = 64\n",
105 |     "char_dataset = CharDataset(file_name, seq_length)\n",
106 |     "char_loader = DataLoader(char_dataset, batch_size=batch_size, shuffle=True, num_workers=0)"
107 |    ]
108 |   },
109 |   {
110 |    "cell_type": "code",
111 |    "execution_count": 6,
112 |    "metadata": {},
113 |    "outputs": [],
114 |    "source": [
115 |     "n_hidden = 128\n",
116 |     "n_factor = 50\n",
117 |     "\n",
118 |     "n_epochs = 100\n",
119 |     "lr = 1e-2"
120 |    ]
121 |   },
122 |   {
123 |    "cell_type": "code",
124 |    "execution_count": 7,
125 |    "metadata": {},
126 |    "outputs": [],
127 |    "source": [
128 |     "torch.manual_seed(seed)\n",
129 |     "vocab_size = char_dataset.vocab_size\n",
130 |     "model = RNN(vocab_size, n_hidden, vocab_size).to(device)\n",
131 |     "optimizer = optim.Adam(model.parameters(), lr=lr)"
132 |    ]
133 |   },
134 |   {
135 |    "cell_type": "code",
136 |    "execution_count": 8,
137 |    "metadata": {},
138 |    "outputs": [
139 |     {
140 |      "name": "stdout",
141 |      "output_type": "stream",
142 |      "text": [
143 |       "Loss at epoch 0: 2.51995\n",
144 |       "Loss at epoch 5: 1.60903\n",
145 |       "Loss at epoch 10: 1.51938\n",
146 |       "-----------------------------\n",
147 |       "Decreasing learning rate to: 0.005\n",
148 |       "-----------------------------\n",
149 |       "Loss at epoch 15: 1.46629\n",
150 |       "Loss at epoch 20: 1.44726\n",
151 |       "-----------------------------\n",
152 |       "Decreasing learning rate to: 0.0025\n",
153 |       "-----------------------------\n",
154 |       "Loss at epoch 25: 1.42147\n",
155 |       "Loss at epoch 30: 1.41101\n",
156 |       "-----------------------------\n",
157 |       "Decreasing learning rate to: 0.00125\n",
158 |       "-----------------------------\n",
159 |       "Loss at epoch 35: 1.39633\n",
160 |       "Loss at epoch 40: 1.39206\n",
161 |       "-----------------------------\n",
162 |       "Decreasing learning rate to: 0.000625\n",
163 |       "-----------------------------\n",
164 |       "Loss at epoch 45: 1.38521\n",
165 |       "Loss at epoch 50: 1.37702\n",
166 |       "-----------------------------\n",
167 |       "Decreasing learning rate to: 0.0003125\n",
168 |       "-----------------------------\n",
169 |       "Loss at epoch 55: 1.37642\n",
170 |       "Loss at epoch 60: 1.3739\n",
171 |       "-----------------------------\n",
172 |       "Decreasing learning rate to: 0.00015625\n",
173 |       "-----------------------------\n",
174 |       "Loss at epoch 65: 1.37081\n",
175 |       "Loss at epoch 70: 1.37656\n",
176 |       "-----------------------------\n",
177 |       "Decreasing learning rate to: 7.8125e-05\n",
178 |       "-----------------------------\n",
179 |       "Loss at epoch 75: 1.37143\n",
180 |       "Loss at epoch 80: 1.37181\n",
181 |       "-----------------------------\n",
182 |       "Decreasing learning rate to: 3.90625e-05\n",
183 |       "-----------------------------\n",
184 |       "Loss at epoch 85: 1.37572\n",
185 |       "Loss at epoch 90: 1.37408\n",
186 |       "-----------------------------\n",
187 |       "Decreasing learning rate to: 1.953125e-05\n",
188 |       "-----------------------------\n",
189 |       "Loss at epoch 95: 1.37005\n"
190 |      ]
191 |     }
192 |    ],
193 |    "source": [
194 |     "loss_list = []\n",
195 |     "torch.manual_seed(seed)\n",
196 |     "for epoch in range(0, n_epochs):\n",
197 |     "    batch_loss = 0\n",
198 |     "    for _, (x, y) in enumerate(char_loader):\n",
199 |     "            hidden = model.init_hidden(x.shape[0])\n",
200 |     "            model.zero_grad()\n",
201 |     "            loss = 0\n",
202 |     "\n",
203 |     "            for c in range(seq_length):\n",
204 |     "                output, hidden = model(x[:, c], hidden)\n",
205 |     "                loss += F.nll_loss(output, y[:, c])\n",
206 |     "            \n",
207 |     "            loss.backward()\n",
208 |     "            optimizer.step()\n",
209 |     "            batch_loss += loss\n",
210 |     "    \n",
211 |     "    batch_loss = np.round(batch_loss.item() / (seq_length * len(char_loader)), 5)\n",
212 |     "    loss_list.append(batch_loss)\n",
213 |     "    \n",
214 |     "    if epoch % 5 == 0:\n",
215 |     "        print(\"Loss at epoch {}: {}\".format(epoch, batch_loss))\n",
216 |     "\n",
217 |     "    if epoch % 10 == 0 and epoch != 0:\n",
218 |     "        for param_group in optimizer.param_groups:\n",
219 |     "            param_group['lr'] = param_group[\"lr\"]*0.5\n",
220 |     "        print(\"-----------------------------\")\n",
221 |     "        print(\"Decreasing learning rate to: {}\".format(param_group[\"lr\"]))\n",
222 |     "        print(\"-----------------------------\")"
223 |    ]
224 |   },
225 |   {
226 |    "cell_type": "code",
227 |    "execution_count": 9,
228 |    "metadata": {},
229 |    "outputs": [],
230 |    "source": [
231 |     "def string_2_tensor(string):\n",
232 |     "    tensor = torch.zeros(len(string)).long()\n",
233 |     "    for c in range(len(string)):\n",
234 |     "        tensor[c] = char_dataset.vocab_indices[string[c]]\n",
235 |     "    return tensor"
236 |    ]
237 |   },
238 |   {
239 |    "cell_type": "code",
240 |    "execution_count": 10,
241 |    "metadata": {},
242 |    "outputs": [],
243 |    "source": [
244 |     "def generate_text(start_string=\"T\", predict_len=100, temperature=0.8):\n",
245 |     "    \n",
246 |     "    hidden = model.init_hidden(bs=1)\n",
247 |     "    start_input = string_2_tensor(start_string)\n",
248 |     "    predicted = start_string\n",
249 |     "\n",
250 |     "    # Use start string to warm up the hidden state\n",
251 |     "    for p in range(len(start_string) - 1):\n",
252 |     "        _, hidden = model(start_input[p], hidden)\n",
253 |     "    inp = start_input[-1].view(1).to(device)\n",
254 |     "\n",
255 |     "    for p in range(predict_len):\n",
256 |     "        output, hidden = model(inp, hidden)\n",
257 |     "\n",
258 |     "        # Sample from the network as a multinomial distribution\n",
259 |     "        output_dist = output.data.view(-1).div(temperature).exp()\n",
260 |     "        top_i = torch.multinomial(output_dist, 1)[0]\n",
261 |     "\n",
262 |     "        # Add predicted character to string and use as next input\n",
263 |     "        predicted_char = char_dataset.indices_vocab[top_i.item()]\n",
264 |     "        predicted += predicted_char\n",
265 |     "        inp = string_2_tensor(predicted_char).to(device)\n",
266 |     "    return predicted"
267 |    ]
268 |   },
269 |   {
270 |    "cell_type": "code",
271 |    "execution_count": 11,
272 |    "metadata": {},
273 |    "outputs": [
274 |     {
275 |      "name": "stdout",
276 |      "output_type": "stream",
277 |      "text": [
278 |       "The longer, simply the art probably have continger to his low in the present of the latter the bad th\n"
279 |      ]
280 |     }
281 |    ],
282 |    "source": [
283 |     "print(generate_text())"
284 |    ]
285 |   }
286 |  ],
287 |  "metadata": {
288 |   "kernelspec": {
289 |    "display_name": "Python 3",
290 |    "language": "python",
291 |    "name": "python3"
292 |   },
293 |   "language_info": {
294 |    "codemirror_mode": {
295 |     "name": "ipython",
296 |     "version": 3
297 |    },
298 |    "file_extension": ".py",
299 |    "mimetype": "text/x-python",
300 |    "name": "python",
301 |    "nbconvert_exporter": "python",
302 |    "pygments_lexer": "ipython3",
303 |    "version": "3.6.4"
304 |   }
305 |  },
306 |  "nbformat": 4,
307 |  "nbformat_minor": 2
308 | }
309 | 


--------------------------------------------------------------------------------
/natural_language_processing/tokenizers/bpe.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "from array import array\n",
 10 |     "from collections import Counter"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "code",
 15 |    "execution_count": 2,
 16 |    "metadata": {},
 17 |    "outputs": [],
 18 |    "source": [
 19 |     "s = \"hello world!!!? (안녕하세요!) lol123 😉 fffffffff\""
 20 |    ]
 21 |   },
 22 |   {
 23 |    "cell_type": "code",
 24 |    "execution_count": 3,
 25 |    "metadata": {},
 26 |    "outputs": [
 27 |     {
 28 |      "name": "stdout",
 29 |      "output_type": "stream",
 30 |      "text": [
 31 |       "[104, 101, 108, 108, 111, 32, 119, 111, 114, 108, 100, 33, 33, 33, 63, 32, 40, 236, 149, 136, 235, 133, 149, 237, 149, 152, 236, 132, 184, 236, 154, 148, 33, 41, 32, 108, 111, 108, 49, 50, 51, 32, 240, 159, 152, 137, 32, 102, 102, 102, 102, 102, 102, 102, 102, 102]\n"
 32 |      ]
 33 |     }
 34 |    ],
 35 |    "source": [
 36 |     "byte_list = list(s.encode(\"utf-8\"))\n",
 37 |     "print(byte_list)"
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "code",
 42 |    "execution_count": 4,
 43 |    "metadata": {},
 44 |    "outputs": [],
 45 |    "source": [
 46 |     "def create_pairs(l):\n",
 47 |     "    # Creating paris of consecutive elements\n",
 48 |     "    pairs = []\n",
 49 |     "    for a, b in zip(l, l[1:]):\n",
 50 |     "        pairs.append((a, b))\n",
 51 |     "    return pairs\n",
 52 |     "\n",
 53 |     "assert create_pairs([1, 2, 3, 4]) == [(1, 2), (2, 3), (3, 4)]"
 54 |    ]
 55 |   },
 56 |   {
 57 |    "cell_type": "code",
 58 |    "execution_count": 5,
 59 |    "metadata": {},
 60 |    "outputs": [
 61 |     {
 62 |      "name": "stdout",
 63 |      "output_type": "stream",
 64 |      "text": [
 65 |       "[(104, 101), (101, 108), (108, 108), (108, 111), (111, 32), (32, 119), (119, 111), (111, 114), (114, 108), (108, 100), (100, 33), (33, 33), (33, 33), (33, 63), (63, 32), (32, 40), (40, 236), (236, 149), (149, 136), (136, 235), (235, 133), (133, 149), (149, 237), (237, 149), (149, 152), (152, 236), (236, 132), (132, 184), (184, 236), (236, 154), (154, 148), (148, 33), (33, 41), (41, 32), (32, 108), (108, 111), (111, 108), (108, 49), (49, 50), (50, 51), (51, 32), (32, 240), (240, 159), (159, 152), (152, 137), (137, 32), (32, 102), (102, 102), (102, 102), (102, 102), (102, 102), (102, 102), (102, 102), (102, 102), (102, 102)]\n"
 66 |      ]
 67 |     }
 68 |    ],
 69 |    "source": [
 70 |     "byte_pairs = create_pairs(byte_list)\n",
 71 |     "print(byte_pairs)"
 72 |    ]
 73 |   },
 74 |   {
 75 |    "cell_type": "code",
 76 |    "execution_count": 6,
 77 |    "metadata": {},
 78 |    "outputs": [],
 79 |    "source": [
 80 |     "def replace(byte_list, idx, pair_to_replace):\n",
 81 |     "    # replaces all consecutive pair that are equivalent to pair_to_replace with the value idx\n",
 82 |     "    ptr = 0\n",
 83 |     "    new_tokens = []\n",
 84 |     "    while ptr < len(byte_list):\n",
 85 |     "        current_pair = tuple(byte_list[ptr:(ptr+2)])\n",
 86 |     "        if current_pair == pair_to_replace:\n",
 87 |     "            new_tokens.append(idx)\n",
 88 |     "            ptr += 2\n",
 89 |     "        else:\n",
 90 |     "            new_tokens.append(byte_list[ptr])\n",
 91 |     "            ptr += 1\n",
 92 |     "\n",
 93 |     "    return new_tokens\n",
 94 |     "\n",
 95 |     "\n",
 96 |     "def merge(byte_list, idx, merges):\n",
 97 |     "    # Create one merge, i.e. take the most common pair and replace those paris with idx\n",
 98 |     "    byte_pairs = create_pairs(byte_list)\n",
 99 |     "    most_common_pair, _ = Counter(byte_pairs).most_common(n=1)[0]\n",
100 |     "    merges[most_common_pair] = idx\n",
101 |     "\n",
102 |     "    return replace(byte_list, idx, most_common_pair)\n",
103 |     "\n",
104 |     "\n",
105 |     "assert replace([3, 2, 5, 9, 3, 2], 10, (3, 2)) == [10, 5, 9, 10]\n"
106 |    ]
107 |   },
108 |   {
109 |    "cell_type": "code",
110 |    "execution_count": 7,
111 |    "metadata": {},
112 |    "outputs": [],
113 |    "source": [
114 |     "num_merges = 5\n",
115 |     "idx = 256  # utf-8 has ids until 255 (1 byte), so starting after that\n",
116 |     "new_tokens = byte_list[:]\n",
117 |     "merges = {}\n",
118 |     "\n",
119 |     "for _ in range(num_merges):\n",
120 |     "    new_tokens = merge(new_tokens, idx, merges)\n",
121 |     "    idx += 1"
122 |    ]
123 |   },
124 |   {
125 |    "cell_type": "code",
126 |    "execution_count": 8,
127 |    "metadata": {},
128 |    "outputs": [
129 |     {
130 |      "name": "stdout",
131 |      "output_type": "stream",
132 |      "text": [
133 |       "Length before merge: 56\n",
134 |       "Length after merge: 46\n"
135 |      ]
136 |     }
137 |    ],
138 |    "source": [
139 |     "print(f\"Length before merge: {len(byte_list)}\")\n",
140 |     "print(f\"Length after merge: {len(new_tokens)}\")"
141 |    ]
142 |   },
143 |   {
144 |    "cell_type": "code",
145 |    "execution_count": 9,
146 |    "metadata": {},
147 |    "outputs": [
148 |     {
149 |      "name": "stdout",
150 |      "output_type": "stream",
151 |      "text": [
152 |       "{(102, 102): 256, (256, 256): 257, (108, 111): 258, (33, 33): 259, (104, 101): 260}\n"
153 |      ]
154 |     }
155 |    ],
156 |    "source": [
157 |     "print(merges)"
158 |    ]
159 |   },
160 |   {
161 |    "cell_type": "code",
162 |    "execution_count": 10,
163 |    "metadata": {},
164 |    "outputs": [
165 |     {
166 |      "name": "stdout",
167 |      "output_type": "stream",
168 |      "text": [
169 |       "46\n"
170 |      ]
171 |     }
172 |    ],
173 |    "source": [
174 |     "def encode(byte_list, merges):\n",
175 |     "    new_tokens = byte_list[:]\n",
176 |     "\n",
177 |     "    # This works since the elements was added to merges\n",
178 |     "    # from most to least common\n",
179 |     "    # Also, since python 3.7 dicts are ordered\n",
180 |     "    for m_pair, idx in merges.items():\n",
181 |     "        new_tokens = replace(new_tokens, idx, m_pair)\n",
182 |     "\n",
183 |     "    return new_tokens\n",
184 |     "\n",
185 |     "print(len(encode(byte_list, merges)))"
186 |    ]
187 |   },
188 |   {
189 |    "cell_type": "code",
190 |    "execution_count": 11,
191 |    "metadata": {},
192 |    "outputs": [],
193 |    "source": [
194 |     "def decode(encoded_list, merges):\n",
195 |     "    old_tokens = encoded_list[:]\n",
196 |     "    for m_pair, idx in reversed(merges.items()):\n",
197 |     "        new_tokens = []\n",
198 |     "        for ot in old_tokens:\n",
199 |     "            if ot == idx:\n",
200 |     "                new_tokens.append(m_pair[0])\n",
201 |     "                new_tokens.append(m_pair[1])\n",
202 |     "            else:\n",
203 |     "                new_tokens.append(ot)\n",
204 |     "\n",
205 |     "        old_tokens = new_tokens[:]\n",
206 |     "    return old_tokens\n",
207 |     "\n",
208 |     "\n",
209 |     "# [6, 5, 9] -> [6, 5, 1, 8]; 9 -> (1,8) \n",
210 |     "# [6, 5, 9] -> [6, 5, 1, 1, 2]; 8 -> (1, 2)\n",
211 |     "assert decode([6, 5, 9], {(1, 2): 8, (1, 8): 9}) == [6, 5, 1, 1, 2]"
212 |    ]
213 |   },
214 |   {
215 |    "cell_type": "code",
216 |    "execution_count": 12,
217 |    "metadata": {},
218 |    "outputs": [],
219 |    "source": [
220 |     "encoded_list = encode(byte_list, merges)\n",
221 |     "decoded_list = decode(encoded_list, merges)\n",
222 |     "\n",
223 |     "assert len(decoded_list) == len(byte_list)"
224 |    ]
225 |   },
226 |   {
227 |    "cell_type": "code",
228 |    "execution_count": 13,
229 |    "metadata": {},
230 |    "outputs": [
231 |     {
232 |      "data": {
233 |       "text/plain": [
234 |        "'hello world!!!? (안녕하세요!) lol123 😉 fffffffff'"
235 |       ]
236 |      },
237 |      "execution_count": 13,
238 |      "metadata": {},
239 |      "output_type": "execute_result"
240 |     }
241 |    ],
242 |    "source": [
243 |     "# Should get same string as original\n",
244 |     "arr = array('B', decoded_list)\n",
245 |     "arr.tobytes().decode(\"utf-8\")"
246 |    ]
247 |   }
248 |  ],
249 |  "metadata": {
250 |   "kernelspec": {
251 |    "display_name": ".venv",
252 |    "language": "python",
253 |    "name": "python3"
254 |   },
255 |   "language_info": {
256 |    "codemirror_mode": {
257 |     "name": "ipython",
258 |     "version": 3
259 |    },
260 |    "file_extension": ".py",
261 |    "mimetype": "text/x-python",
262 |    "name": "python",
263 |    "nbconvert_exporter": "python",
264 |    "pygments_lexer": "ipython3",
265 |    "version": "3.12.1"
266 |   }
267 |  },
268 |  "nbformat": 4,
269 |  "nbformat_minor": 2
270 | }
271 | 


--------------------------------------------------------------------------------
/natural_language_processing/word2vec/.gitignore:
--------------------------------------------------------------------------------
1 | .ipynb_checkpoints


--------------------------------------------------------------------------------
/natural_language_processing/word2vec/README.md:
--------------------------------------------------------------------------------
1 | # Word2vecTraining a [word2vec](https://papers.nips.cc/paper/5021-distributed-representations-of-words-and-phrases-and-their-compositionality.pdf) model to extract word embeddings.The data can be downloaded [here](http://mattmahoney.net/dc/textdata.html) (text8.zip).


--------------------------------------------------------------------------------
/natural_language_processing/word2vec/data/.gitignore:
--------------------------------------------------------------------------------
1 | text8.txt
2 | text8.zip


--------------------------------------------------------------------------------
/natural_language_processing/word2vec/word2vec.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "from collections import Counter\n",
 10 |     "import numpy as np\n",
 11 |     "import math\n",
 12 |     "import random\n",
 13 |     "from sklearn.metrics.pairwise import cosine_similarity"
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "code",
 18 |    "execution_count": null,
 19 |    "metadata": {},
 20 |    "outputs": [],
 21 |    "source": [
 22 |     "import torch\n",
 23 |     "import torch.nn as nn\n",
 24 |     "from torch.utils.data import Dataset, DataLoader\n",
 25 |     "import torch.optim as optim\n",
 26 |     "import torch.nn.functional as F"
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "code",
 31 |    "execution_count": null,
 32 |    "metadata": {},
 33 |    "outputs": [],
 34 |    "source": [
 35 |     "data_path = \"data/text8.txt\""
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "code",
 40 |    "execution_count": null,
 41 |    "metadata": {},
 42 |    "outputs": [],
 43 |    "source": [
 44 |     "device = \"cuda\"\n",
 45 |     "context_width = 3\n",
 46 |     "seed = 42"
 47 |    ]
 48 |   },
 49 |   {
 50 |    "cell_type": "code",
 51 |    "execution_count": null,
 52 |    "metadata": {},
 53 |    "outputs": [],
 54 |    "source": [
 55 |     "with open(data_path) as file:\n",
 56 |     "    data = file.read()\n",
 57 |     "\n",
 58 |     "data = data.split(\" \")\n",
 59 |     "data = data[0:500000] # Using a smaller dataset"
 60 |    ]
 61 |   },
 62 |   {
 63 |    "cell_type": "code",
 64 |    "execution_count": null,
 65 |    "metadata": {},
 66 |    "outputs": [],
 67 |    "source": [
 68 |     "freq_threshold = 5\n",
 69 |     "\n",
 70 |     "vocab = dict()\n",
 71 |     "count = 0\n",
 72 |     "\n",
 73 |     "freq = Counter(data)\n",
 74 |     "for key, value in freq.items():\n",
 75 |     "    if value >= freq_threshold:\n",
 76 |     "        vocab[key] = count\n",
 77 |     "        count += 1\n",
 78 |     "    \n",
 79 |     "inverse_vocab = {value: key for key, value in vocab.items()}"
 80 |    ]
 81 |   },
 82 |   {
 83 |    "cell_type": "code",
 84 |    "execution_count": null,
 85 |    "metadata": {},
 86 |    "outputs": [],
 87 |    "source": [
 88 |     "data = list(filter(lambda x: x in vocab.keys(), data))\n",
 89 |     "freq = Counter(data)"
 90 |    ]
 91 |   },
 92 |   {
 93 |    "cell_type": "code",
 94 |    "execution_count": null,
 95 |    "metadata": {},
 96 |    "outputs": [],
 97 |    "source": [
 98 |     "normed_freq = {key: freq[key]/len(data) for (key, value) in vocab.items()}"
 99 |    ]
100 |   },
101 |   {
102 |    "cell_type": "code",
103 |    "execution_count": null,
104 |    "metadata": {},
105 |    "outputs": [],
106 |    "source": [
107 |     "def prob_dropping(frequency, t=5e-4):\n",
108 |     "    return 1.0 - math.sqrt(t/frequency)\n",
109 |     "\n",
110 |     "prob_drop_word = [prob_dropping(normed_freq[word]) for word in data]\n",
111 |     "prob_drop_word = [prob_word if prob_word > 0 else 0 for prob_word in prob_drop_word]"
112 |    ]
113 |   },
114 |   {
115 |    "cell_type": "code",
116 |    "execution_count": null,
117 |    "metadata": {},
118 |    "outputs": [],
119 |    "source": [
120 |     "random.seed(seed)\n",
121 |     "data = filter(lambda x: random.choices([False, True], weights=[x[1], 1-x[1]])[0], zip(data, prob_drop_word))\n",
122 |     "data = [pair[0] for pair in data]"
123 |    ]
124 |   },
125 |   {
126 |    "cell_type": "code",
127 |    "execution_count": null,
128 |    "metadata": {},
129 |    "outputs": [],
130 |    "source": [
131 |     "class TextDataSet(Dataset):\n",
132 |     "    def __init__(self, data, vocab, context_width):\n",
133 |     "        self.data = data\n",
134 |     "        self.context_width = context_width\n",
135 |     "        self.vocab = vocab\n",
136 |     "        \n",
137 |     "    def get_context(self, idx):    \n",
138 |     "        first_index = max(0, idx - self.context_width)\n",
139 |     "        last_index = min(len(self.data), idx + self.context_width + 1)\n",
140 |     "\n",
141 |     "        context = self.data[first_index:idx] + self.data[idx+1:last_index]\n",
142 |     "        context = [self.vocab[c] for c in context]\n",
143 |     "        \n",
144 |     "        return context\n",
145 |     "        \n",
146 |     "    def __len__(self):\n",
147 |     "        return len(self.data)\n",
148 |     "    \n",
149 |     "    def __getitem__(self, idx):\n",
150 |     "        y = self.get_context(idx)\n",
151 |     "        \n",
152 |     "        x = self.data[idx]\n",
153 |     "        x = self.vocab[x]\n",
154 |     "        x = [x for _ in range(len(y))]\n",
155 |     "        \n",
156 |     "        x = np.array(x)\n",
157 |     "        y = np.array(y)\n",
158 |     "        \n",
159 |     "        return x, y"
160 |    ]
161 |   },
162 |   {
163 |    "cell_type": "code",
164 |    "execution_count": null,
165 |    "metadata": {},
166 |    "outputs": [],
167 |    "source": [
168 |     "def collate_fn(batch):        \n",
169 |     "    x = [z[0] for z in batch]\n",
170 |     "    y = [z[1] for z in batch]\n",
171 |     "\n",
172 |     "    x = np.concatenate(x)\n",
173 |     "    x = torch.from_numpy(x).long().to(device)\n",
174 |     " \n",
175 |     "    y = np.concatenate(y)\n",
176 |     "    y = torch.from_numpy(y).long().to(device)\n",
177 |     "\n",
178 |     "    return x, y"
179 |    ]
180 |   },
181 |   {
182 |    "cell_type": "code",
183 |    "execution_count": null,
184 |    "metadata": {},
185 |    "outputs": [],
186 |    "source": [
187 |     "batch_size = 256\n",
188 |     "dataset = TextDataSet(data, vocab, context_width)\n",
189 |     "train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=0, collate_fn=collate_fn)"
190 |    ]
191 |   },
192 |   {
193 |    "cell_type": "code",
194 |    "execution_count": null,
195 |    "metadata": {},
196 |    "outputs": [],
197 |    "source": [
198 |     "class Word2Vec(nn.Module):\n",
199 |     "    def __init__(self, vocab_size, embedding_size):\n",
200 |     "        super().__init__()\n",
201 |     "        self.embedding_layer = nn.Embedding(vocab_size, embedding_size)\n",
202 |     "        self.output_layer = nn.Linear(embedding_size, vocab_size)\n",
203 |     "        \n",
204 |     "    def forward(self, x):\n",
205 |     "        x = self.embedding_layer(x)\n",
206 |     "        x = self.output_layer(x)\n",
207 |     "        x = F.log_softmax(x, dim=1)\n",
208 |     "        return x"
209 |    ]
210 |   },
211 |   {
212 |    "cell_type": "code",
213 |    "execution_count": null,
214 |    "metadata": {},
215 |    "outputs": [],
216 |    "source": [
217 |     "torch.manual_seed(seed)\n",
218 |     "embedding_size = 50\n",
219 |     "model = Word2Vec(len(vocab), embedding_size).to(device)"
220 |    ]
221 |   },
222 |   {
223 |    "cell_type": "code",
224 |    "execution_count": null,
225 |    "metadata": {},
226 |    "outputs": [],
227 |    "source": [
228 |     "num_epochs = 2\n",
229 |     "lr = 0.1\n",
230 |     "\n",
231 |     "optimizer = optim.SGD(model.parameters(), lr=lr)"
232 |    ]
233 |   },
234 |   {
235 |    "cell_type": "code",
236 |    "execution_count": null,
237 |    "metadata": {},
238 |    "outputs": [],
239 |    "source": [
240 |     "def get_closest_words(model, idx, num_closest=5):\n",
241 |     "    weights = model.embedding_layer.weight.detach().cpu().numpy()\n",
242 |     "\n",
243 |     "    word_vector = weights[idx]\n",
244 |     "    word_vector = word_vector.reshape((1, word_vector.shape[0]))\n",
245 |     "\n",
246 |     "    distance = cosine_similarity(word_vector, weights)\n",
247 |     "\n",
248 |     "    arg_distance = distance.argsort()\n",
249 |     "    closes_arg = arg_distance[0, arg_distance.shape[1] - num_closest - 1: (arg_distance.shape[1] - 1)]\n",
250 |     "\n",
251 |     "    return_list = []\n",
252 |     "\n",
253 |     "    for i in reversed(closes_arg):\n",
254 |     "        return_list.append((inverse_vocab[i], distance[0, i]))\n",
255 |     "        \n",
256 |     "    return return_list"
257 |    ]
258 |   },
259 |   {
260 |    "cell_type": "code",
261 |    "execution_count": null,
262 |    "metadata": {},
263 |    "outputs": [],
264 |    "source": [
265 |     "example_word = \"men\""
266 |    ]
267 |   },
268 |   {
269 |    "cell_type": "code",
270 |    "execution_count": null,
271 |    "metadata": {},
272 |    "outputs": [],
273 |    "source": [
274 |     "torch.manual_seed(seed)\n",
275 |     "for epoch in range(1, num_epochs+1):\n",
276 |     "    print(f\"----- Epoch {epoch} -----\")\n",
277 |     "    print(get_closest_words(model, vocab[example_word]))\n",
278 |     "    \n",
279 |     "    loss = 0.0\n",
280 |     "    for i, (x, y) in enumerate(train_loader):\n",
281 |     "        model.zero_grad()\n",
282 |     "        output = model(x)\n",
283 |     "        batch_loss = F.nll_loss(output, y)\n",
284 |     "        \n",
285 |     "        batch_loss.backward()\n",
286 |     "        optimizer.step()\n",
287 |     "        loss += batch_loss.item()\n",
288 |     "        \n",
289 |     "    print(\"Loss : {}\".format(loss/len(train_loader)))"
290 |    ]
291 |   }
292 |  ],
293 |  "metadata": {
294 |   "kernelspec": {
295 |    "display_name": "Python 3",
296 |    "language": "python",
297 |    "name": "python3"
298 |   },
299 |   "language_info": {
300 |    "codemirror_mode": {
301 |     "name": "ipython",
302 |     "version": 3
303 |    },
304 |    "file_extension": ".py",
305 |    "mimetype": "text/x-python",
306 |    "name": "python",
307 |    "nbconvert_exporter": "python",
308 |    "pygments_lexer": "ipython3",
309 |    "version": "3.6.4"
310 |   }
311 |  },
312 |  "nbformat": 4,
313 |  "nbformat_minor": 2
314 | }
315 | 


--------------------------------------------------------------------------------