├── utils.py ├── core ├── __init__.py ├── metrics.py ├── schedule.py ├── iterators.py ├── text.py ├── loop.py ├── callbacks.py └── utils.py ├── rnn ├── history.csv ├── rnn_classify_fastai.py ├── lr.ipynb ├── .ipynb_checkpoints │ ├── lr-checkpoint.ipynb │ └── rnn-checkpoint.ipynb ├── rnn_custom.py ├── rules.py ├── rnn_oop.py ├── rnn.ipynb ├── rnn_plain.py └── rnn_classify.py ├── ssd ├── augmentation.py ├── main.py ├── models.py ├── loss.py ├── dataset.py ├── utils.py └── plots.py ├── text_classification ├── script.py ├── itos.pkl ├── itos.pickle ├── utils.py ├── classify.py ├── main_lyrics.py ├── rules.py ├── tokenizer.py ├── main_imdb.py ├── lyrics.ipynb ├── .ipynb_checkpoints │ └── lyrics-checkpoint.ipynb └── dataset.py ├── README.md ├── trivial.onnx ├── assets ├── loop.gif ├── linear_interp.png └── training_loop.png ├── coreml_export ├── model.onnx ├── model.mlmodel ├── model_valid_loss_0.0604.weights ├── model_valid_loss_0.0987.weights ├── model_valid_loss_1.2028.weights ├── history.csv ├── misc.py ├── model.py └── main.py ├── style_images ├── dancing.jpg └── picasso.jpg ├── progress_bar.py ├── artists.txt ├── adamw.ipynb ├── ssd.ipynb ├── demo.py ├── cifar.py ├── history.csv ├── cnn_fastai.py ├── textsplit.py ├── cnn_dataset.py ├── .ipynb_checkpoints └── rnn-checkpoint.ipynb ├── ssd.py ├── cnn.py ├── gan.py ├── azlyrics.py └── ios.py /utils.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /core/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /rnn/history.csv: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ssd/augmentation.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /rnn/rnn_classify_fastai.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /text_classification/script.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # pytorch_playground 2 | PyTorch experiments, demos and tutorials 3 | -------------------------------------------------------------------------------- /trivial.onnx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/devforfu/pytorch_playground/HEAD/trivial.onnx -------------------------------------------------------------------------------- /assets/loop.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/devforfu/pytorch_playground/HEAD/assets/loop.gif -------------------------------------------------------------------------------- /assets/linear_interp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/devforfu/pytorch_playground/HEAD/assets/linear_interp.png -------------------------------------------------------------------------------- /assets/training_loop.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/devforfu/pytorch_playground/HEAD/assets/training_loop.png -------------------------------------------------------------------------------- /coreml_export/model.onnx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/devforfu/pytorch_playground/HEAD/coreml_export/model.onnx -------------------------------------------------------------------------------- /style_images/dancing.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/devforfu/pytorch_playground/HEAD/style_images/dancing.jpg -------------------------------------------------------------------------------- /style_images/picasso.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/devforfu/pytorch_playground/HEAD/style_images/picasso.jpg -------------------------------------------------------------------------------- /coreml_export/model.mlmodel: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/devforfu/pytorch_playground/HEAD/coreml_export/model.mlmodel -------------------------------------------------------------------------------- /text_classification/itos.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/devforfu/pytorch_playground/HEAD/text_classification/itos.pkl -------------------------------------------------------------------------------- /text_classification/itos.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/devforfu/pytorch_playground/HEAD/text_classification/itos.pickle -------------------------------------------------------------------------------- /progress_bar.py: -------------------------------------------------------------------------------- 1 | import tqdm 2 | 3 | 4 | def main(): 5 | tqdm() 6 | 7 | 8 | if __name__ == '__main__': 9 | main() 10 | -------------------------------------------------------------------------------- /coreml_export/model_valid_loss_0.0604.weights: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/devforfu/pytorch_playground/HEAD/coreml_export/model_valid_loss_0.0604.weights -------------------------------------------------------------------------------- /coreml_export/model_valid_loss_0.0987.weights: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/devforfu/pytorch_playground/HEAD/coreml_export/model_valid_loss_0.0987.weights -------------------------------------------------------------------------------- /coreml_export/model_valid_loss_1.2028.weights: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/devforfu/pytorch_playground/HEAD/coreml_export/model_valid_loss_1.2028.weights -------------------------------------------------------------------------------- /core/metrics.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | def accuracy(y_pred, y_true): 5 | match = y_pred.argmax(dim=1) == y_true 6 | acc = match.type(torch.float).mean() 7 | return acc.item() 8 | -------------------------------------------------------------------------------- /text_classification/utils.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | 4 | def is_empty(folder: Path): 5 | non_empty_dirs = {str(p.parent) for p in folder.rglob('*') if p.is_file()} 6 | return not non_empty_dirs 7 | -------------------------------------------------------------------------------- /coreml_export/history.csv: -------------------------------------------------------------------------------- 1 | Epoch 0 | train_loss: 0.2037 - train_accuracy: 0.9466 - valid_loss: 0.0987 - valid_accuracy: 0.9695 2 | Epoch 1 | train_loss: 0.0700 - train_accuracy: 0.9810 - valid_loss: 0.0604 - valid_accuracy: 0.9816 3 | -------------------------------------------------------------------------------- /artists.txt: -------------------------------------------------------------------------------- 1 | ABBA 2 | ACDC 3 | Ami Whinehouse 4 | Black Sabbath 5 | Bob Dylan 6 | Bob Marley 7 | Bon Jovi 8 | Celine Dion 9 | Creedence Clearwater Revival 10 | Deep Purple 11 | Dio 12 | Eagles 13 | Foreigner 14 | Fools Garden 15 | Grateful Dead 16 | Imagine Dragons 17 | King Crimson 18 | Kylie Minogue 19 | Lil Peep 20 | Linkin Park 21 | Nazareth 22 | Nick Cave The Bad Seeds 23 | Queen 24 | Rainbow 25 | U2 26 | Van Halen 27 | Who 28 | Xzibit -------------------------------------------------------------------------------- /coreml_export/misc.py: -------------------------------------------------------------------------------- 1 | from multiprocessing import cpu_count 2 | 3 | from torch.utils.data import DataLoader 4 | 5 | 6 | def compute_stats(dataset): 7 | n = len(dataset) // 1000 8 | loader = DataLoader( 9 | dataset, 10 | batch_size=n, 11 | num_workers=cpu_count()) 12 | mean, std, total = 0., 0., 0 13 | for batch, _ in iter(loader): 14 | image = batch.squeeze() 15 | mean += image.mean().item() 16 | std += image.std().item() 17 | total += 1 18 | mean /= total 19 | std /= total 20 | print(mean, std) 21 | -------------------------------------------------------------------------------- /adamw.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [] 11 | } 12 | ], 13 | "metadata": { 14 | "kernelspec": { 15 | "display_name": "Python 2", 16 | "language": "python", 17 | "name": "python2" 18 | }, 19 | "language_info": { 20 | "codemirror_mode": { 21 | "name": "ipython", 22 | "version": 2 23 | }, 24 | "file_extension": ".py", 25 | "mimetype": "text/x-python", 26 | "name": "python", 27 | "nbconvert_exporter": "python", 28 | "pygments_lexer": "ipython2", 29 | "version": "2.7.6" 30 | } 31 | }, 32 | "nbformat": 4, 33 | "nbformat_minor": 0 34 | } 35 | -------------------------------------------------------------------------------- /ssd.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [] 11 | } 12 | ], 13 | "metadata": { 14 | "kernelspec": { 15 | "display_name": "Python 2", 16 | "language": "python", 17 | "name": "python2" 18 | }, 19 | "language_info": { 20 | "codemirror_mode": { 21 | "name": "ipython", 22 | "version": 2 23 | }, 24 | "file_extension": ".py", 25 | "mimetype": "text/x-python", 26 | "name": "python", 27 | "nbconvert_exporter": "python", 28 | "pygments_lexer": "ipython2", 29 | "version": "2.7.6" 30 | } 31 | }, 32 | "nbformat": 4, 33 | "nbformat_minor": 0 34 | } 35 | -------------------------------------------------------------------------------- /demo.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | class Net(nn.Module): 7 | 8 | def __init__(self): 9 | super().__init__() 10 | self.conv1 = nn.Conv2d(1, 6, 5) 11 | self.conv2 = nn.Conv2d(6, 16, 5) 12 | self.fc1 = nn.Linear(16 * 5 * 5, 120) 13 | self.fc2 = nn.Linear(120, 84) 14 | self.fc3 = nn.Linear(84, 10) 15 | 16 | def forward(self, x): 17 | x = self.conv1(x) 18 | x = F.relu(x) 19 | x = F.max_pool2d(x, (2, 2)) 20 | 21 | x = self.conv2(x) 22 | x = F.relu(x) 23 | x = F.max_pool2d(x, (2, 2)) 24 | 25 | x = x.view(-1, self.num_flat_features(x)) 26 | x = F.relu(self.fc1(x)) 27 | x = F.relu(self.fc2(x)) 28 | x = self.fc3(x) 29 | return x 30 | 31 | def num_flat_features(self, x): 32 | size = x.size()[1:] 33 | num_features = 1 34 | for s in size: 35 | num_features *= s 36 | return num_features 37 | 38 | -------------------------------------------------------------------------------- /cifar.py: -------------------------------------------------------------------------------- 1 | from os.path import expanduser 2 | 3 | import torch 4 | import torchvision 5 | import torchvision.transforms as transforms 6 | 7 | 8 | PATH = '~/data/cifar10' 9 | 10 | transform = transforms.Compose( 11 | [transforms.ToTensor(), 12 | transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) 13 | 14 | trainset = torchvision.datasets.CIFAR10( 15 | root=expanduser(PATH), 16 | train=True, 17 | download=True, 18 | transform=transform) 19 | 20 | trainloader = torch.utils.data.DataLoader( 21 | trainset, 22 | batch_size=4, 23 | shuffle=True, 24 | num_workers=2) 25 | 26 | testset = torchvision.datasets.CIFAR10( 27 | root=expanduser(PATH), 28 | train=False, 29 | download=True, 30 | transform=transform) 31 | 32 | testloader = torch.utils.data.DataLoader( 33 | testset, 34 | batch_size=4, 35 | shuffle=False, 36 | num_workers=2) 37 | 38 | classes = ('plane', 'car', 'bird', 'cat', 39 | 'deer', 'dog', 'frog', 'horse', 40 | 'ship', 'truck') 41 | 42 | 43 | -------------------------------------------------------------------------------- /core/schedule.py: -------------------------------------------------------------------------------- 1 | import math 2 | from torch.optim.lr_scheduler import _LRScheduler 3 | 4 | 5 | class CosineAnnealingLR(_LRScheduler): 6 | """ 7 | A scheduler implementing cosine annealing with restarts and an increasing 8 | period of the decay. 9 | """ 10 | def __init__(self, optimizer, t_max=200, eta_min=0.0005, 11 | cycle_mult=2, last_epoch=-1): 12 | 13 | self.t_max = t_max 14 | self.eta_min = eta_min 15 | self.cycle_mult = cycle_mult 16 | self.cycle_epoch = last_epoch 17 | super().__init__(optimizer, last_epoch) 18 | 19 | def get_lr(self): 20 | self.cycle_epoch += 1 21 | 22 | t_max = self.t_max 23 | eta_min = self.eta_min 24 | t = self.cycle_epoch % t_max 25 | 26 | learning_rates = [] 27 | for lr in self.base_lrs: 28 | delta = lr - eta_min 29 | new_lr = eta_min + delta*(1 + math.cos(math.pi * t/t_max)) / 2 30 | learning_rates.append(new_lr) 31 | 32 | if t == 0: 33 | self.cycle_epoch = 0 34 | self.t_max *= self.cycle_mult 35 | 36 | return learning_rates 37 | -------------------------------------------------------------------------------- /history.csv: -------------------------------------------------------------------------------- 1 | Epoch 0 | train_loss: 6.7079 - train_accuracy: 0.0490 - valid_loss: 6.6563 - valid_accuracy: 0.0496 2 | Epoch 1 | train_loss: 5.7009 - train_accuracy: 0.1391 - valid_loss: 5.6151 - valid_accuracy: 0.1417 3 | Epoch 2 | train_loss: 5.3068 - train_accuracy: 0.1724 - valid_loss: 5.2317 - valid_accuracy: 0.1753 4 | Epoch 3 | train_loss: 5.1402 - train_accuracy: 0.1867 - valid_loss: 5.0796 - valid_accuracy: 0.1894 5 | Epoch 4 | train_loss: 5.0377 - train_accuracy: 0.1954 - valid_loss: 4.9831 - valid_accuracy: 0.1982 6 | Epoch 5 | train_loss: 4.9522 - train_accuracy: 0.2032 - valid_loss: 4.9313 - valid_accuracy: 0.2044 7 | Epoch 6 | train_loss: 4.8743 - train_accuracy: 0.2094 - valid_loss: 4.8900 - valid_accuracy: 0.2088 8 | Epoch 7 | train_loss: 4.8108 - train_accuracy: 0.2167 - valid_loss: 4.7980 - valid_accuracy: 0.2182 9 | Epoch 8 | train_loss: 4.7780 - train_accuracy: 0.2197 - valid_loss: 4.7561 - valid_accuracy: 0.2220 10 | Epoch 9 | train_loss: 4.7674 - train_accuracy: 0.2220 - valid_loss: 4.7305 - valid_accuracy: 0.2245 11 | Epoch 10 | train_loss: 4.7342 - train_accuracy: 0.2242 - valid_loss: 4.7257 - valid_accuracy: 0.2253 12 | -------------------------------------------------------------------------------- /rnn/lr.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "%load_ext autoreload" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": null, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "%autoreload 2" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": null, 24 | "metadata": {}, 25 | "outputs": [], 26 | "source": [ 27 | "%matplotlib inline" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 1, 33 | "metadata": {}, 34 | "outputs": [], 35 | "source": [ 36 | "import torch\n", 37 | "from torch import nn\n", 38 | "from torch import optim\n", 39 | "from torch.nn import functional as F \n", 40 | "from torch.optim.lr_scheduler import _LRScheduler" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": null, 46 | "metadata": {}, 47 | "outputs": [], 48 | "source": [] 49 | } 50 | ], 51 | "metadata": { 52 | "kernelspec": { 53 | "display_name": "Python 3", 54 | "language": "python", 55 | "name": "python3" 56 | }, 57 | "language_info": { 58 | "codemirror_mode": { 59 | "name": "ipython", 60 | "version": 3 61 | }, 62 | "file_extension": ".py", 63 | "mimetype": "text/x-python", 64 | "name": "python", 65 | "nbconvert_exporter": "python", 66 | "pygments_lexer": "ipython3", 67 | "version": "3.7.0" 68 | } 69 | }, 70 | "nbformat": 4, 71 | "nbformat_minor": 2 72 | } 73 | -------------------------------------------------------------------------------- /rnn/.ipynb_checkpoints/lr-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "%load_ext autoreload" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": null, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "%autoreload 2" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": null, 24 | "metadata": {}, 25 | "outputs": [], 26 | "source": [ 27 | "%matplotlib inline" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 1, 33 | "metadata": {}, 34 | "outputs": [], 35 | "source": [ 36 | "import torch\n", 37 | "from torch import nn\n", 38 | "from torch import optim\n", 39 | "from torch.nn import functional as F \n", 40 | "from torch.optim.lr_scheduler import _LRScheduler" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": null, 46 | "metadata": {}, 47 | "outputs": [], 48 | "source": [] 49 | } 50 | ], 51 | "metadata": { 52 | "kernelspec": { 53 | "display_name": "Python 3", 54 | "language": "python", 55 | "name": "python3" 56 | }, 57 | "language_info": { 58 | "codemirror_mode": { 59 | "name": "ipython", 60 | "version": 3 61 | }, 62 | "file_extension": ".py", 63 | "mimetype": "text/x-python", 64 | "name": "python", 65 | "nbconvert_exporter": "python", 66 | "pygments_lexer": "ipython3", 67 | "version": "3.7.0" 68 | } 69 | }, 70 | "nbformat": 4, 71 | "nbformat_minor": 2 72 | } 73 | -------------------------------------------------------------------------------- /text_classification/classify.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | from fastai.train import TrainingPhase, annealing_cos 4 | from fastai.text import TextDataset, TextLMDataBunch, RNNLearner 5 | from fastai.callbacks import GeneralScheduler 6 | from fastai.callbacks.tracker import EarlyStopping, SaveModel 7 | 8 | 9 | DATA_ROOT = Path.home()/'data' 10 | IMDB_PATH = DATA_ROOT/'aclImdb' 11 | LM_PATH = IMDB_PATH/'lm' 12 | 13 | 14 | def main(): 15 | train_ds = TextDataset.from_folder(IMDB_PATH, name='train', shuffle=True) 16 | valid_ds = TextDataset.from_folder(IMDB_PATH, name='test') 17 | lm_data = [train_ds, valid_ds] 18 | lm_bunch = TextLMDataBunch.create(lm_data, path=LM_PATH) 19 | 20 | learner = RNNLearner.language_model(lm_bunch) 21 | 22 | n = sum(len(ds) for ds in lm_data) 23 | num_epochs, phases = create_phases(3, n) 24 | 25 | callbacks = [ 26 | EarlyStopping(learner, patience=2), 27 | SaveModel(learner), 28 | GeneralScheduler(learner, phases) 29 | ] 30 | 31 | learner.fit(num_epochs, ) 32 | 33 | 34 | def create_phases(n_cycles, n_items, cycle_mult=2, cycle_len=1, mom=0.8, 35 | lr=1e-3): 36 | 37 | phases = [ 38 | TrainingPhase( 39 | n_items*(cycle_len * cycle_mult**i), 40 | lrs=lr, moms=mom, lr_anneal=annealing_cos 41 | ) for i in range(n_cycles)] 42 | 43 | if cycle_mult == 1: 44 | total_epochs = n_cycles * cycle_len 45 | else: 46 | total_epochs = int( 47 | cycle_len*(1 - cycle_mult**n_cycles)/(1 - cycle_mult)) 48 | 49 | return total_epochs, phases 50 | 51 | 52 | 53 | if __name__ == '__main__': 54 | main() 55 | -------------------------------------------------------------------------------- /ssd/main.py: -------------------------------------------------------------------------------- 1 | import math 2 | from pathlib import Path 3 | 4 | import torch 5 | from torch import optim 6 | 7 | from misc import to_np, make_grid, hw2corners, t 8 | from plots import VOCPlotter 9 | from models import SSD 10 | from loss import ssd_loss, BinaryCrossEntropyLoss 11 | from dataset import VOCDataset, VOCDataLoader 12 | from core.loop import Loop 13 | from core.schedule import CosineAnnealingLR 14 | from core.callbacks import Logger 15 | 16 | 17 | ROOT = Path.home().joinpath('data', 'voc2007') 18 | TRAIN_JSON = ROOT / 'pascal_train2007.json' 19 | TRAIN_JPEG = ROOT.joinpath('VOCdevkit', 'VOC2007', 'JPEGImages') 20 | DEVICE = 'cuda:0' if torch.cuda.is_available() else 'cpu' 21 | 22 | 23 | def main(): 24 | bs = 64 25 | n_anchors = 4 26 | dataset = VOCDataset(TRAIN_JSON, TRAIN_JPEG, device=DEVICE) 27 | loader = VOCDataLoader(dataset, batch_size=bs, num_workers=0) 28 | # plotter = VOCPlotter(id2cat=dataset.id2cat, figsize=(12, 10)) 29 | # 30 | # for images, (boxes, classes) in iter(loader): 31 | # with plotter: 32 | # plotter.plot_boxes(*to_np(images, boxes, classes)) 33 | # break # a single batch to verify everything works 34 | 35 | n_classes = len(dataset.id2cat) 36 | cycle_len = math.ceil(len(dataset)/bs) 37 | model = SSD(n_classes=n_classes, bias=-3.) 38 | optimizer = optim.Adam(model.parameters(), lr=1e-2) 39 | scheduler = CosineAnnealingLR(optimizer, t_max=cycle_len) 40 | loop = Loop(model, optimizer, scheduler, device=DEVICE) 41 | 42 | anchors, grid_sizes = [ 43 | x.to(DEVICE) for x in ( 44 | t(make_grid(n_anchors), requires_grad=False).float(), 45 | t([1/n_anchors], requires_grad=False).unsqueeze(1))] 46 | 47 | bce_loss = BinaryCrossEntropyLoss(n_classes) 48 | loss_fn = lambda x, y: ssd_loss(x, y, anchors, grid_sizes, bce_loss, n_classes) 49 | 50 | loop.run( 51 | train_data=loader, 52 | epochs=100, 53 | loss_fn=loss_fn, 54 | callbacks=[Logger()] 55 | ) 56 | 57 | 58 | 59 | if __name__ == '__main__': 60 | main() 61 | -------------------------------------------------------------------------------- /text_classification/main_lyrics.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from pathlib import Path 3 | import pickle 4 | from random import shuffle 5 | import shutil 6 | 7 | from fastai.text import TextDataset 8 | from torch.utils.data import Dataset 9 | 10 | 11 | DATA_ROOT = Path.home() / 'data' 12 | LYRICS_PATH = DATA_ROOT / 'azlyrics' / 'many' 13 | 14 | 15 | def main(): 16 | meta = prepare_lyrics(LYRICS_PATH, LYRICS_PATH.parent/'prepared') 17 | dataset = TextDataset.from_folder(meta.folder) 18 | print(f'Dataset size: {len(dataset)}') 19 | 20 | 21 | def prepare_lyrics(src, dst, test_size: float=0.2) -> 'LyricsData': 22 | meta = dst/'meta.pickle' 23 | 24 | if meta.exists(): 25 | with meta.open('rb') as file: 26 | return pickle.load(file) 27 | 28 | classes, songs = [], {} 29 | 30 | for subdir in src.glob('*'): 31 | author = subdir.stem 32 | classes.append(author) 33 | author_songs = {} 34 | with (subdir/'songs.csv').open() as file: 35 | for line in file: 36 | index, _, name = line.partition(',') 37 | author_songs[int(index)] = name.strip() 38 | songs[author] = author_songs 39 | 40 | files = list(subdir.glob('*.txt')) 41 | shuffle(files) 42 | sz = int(len(files) * (1 - test_size)) 43 | train, test = files[:sz], files[sz:] 44 | 45 | train_dir = dst/'train'/author 46 | train_dir.mkdir(parents=True, exist_ok=True) 47 | for txt_file in train: 48 | shutil.copy(txt_file, train_dir/txt_file.name) 49 | 50 | test_dir = dst/'test'/author 51 | test_dir.mkdir(parents=True, exist_ok=True) 52 | for txt_file in test: 53 | shutil.copy(txt_file, test_dir/txt_file.name) 54 | 55 | data = LyricsData(dst, classes, songs) 56 | 57 | with meta.open('wb') as file: 58 | pickle.dump(data, file) 59 | 60 | return data 61 | 62 | 63 | @dataclass 64 | class LyricsData: 65 | folder: str 66 | classes: list 67 | songs: dict 68 | 69 | 70 | if __name__ == '__main__': 71 | main() 72 | -------------------------------------------------------------------------------- /rnn/rnn_custom.py: -------------------------------------------------------------------------------- 1 | """ 2 | Training RNN on lyrics downloaded from AZLyrics website. 3 | """ 4 | from os.path import expanduser, join 5 | 6 | import torch 7 | from torch import nn 8 | from torch import optim 9 | from torch.nn import functional as F 10 | from torchtext.data import Field 11 | 12 | from rnn_plain import RNN, generate_text, pretty_print 13 | from core.text import TextDataset 14 | from core.loop import Loop, Stepper 15 | from core.iterators import SequenceIterator 16 | from core.schedule import CosineAnnealingLR 17 | from core.callbacks import EarlyStopping, Checkpoint, Logger, History 18 | 19 | 20 | ROOT = expanduser(join('~', 'data', 'azlyrics', 'lyrics')) 21 | TRAIN_DIR = join(ROOT, 'train') 22 | VALID_DIR = join(ROOT, 'valid') 23 | 24 | 25 | DEVICE = 'cuda:0' if torch.cuda.is_available() else 'cpu' 26 | 27 | 28 | def main(): 29 | bs, bptt = 32, 16 30 | field = Field(lower=True, tokenize=list) 31 | dataset = TextDataset(field, keep_new_lines=True, min_freq=5) 32 | factory = lambda seq: SequenceIterator(seq, bptt, bs) 33 | dataset.build(train=TRAIN_DIR, valid=VALID_DIR, iterator_factory=factory) 34 | 35 | model = RNN(dataset.vocab_size, 36 | n_factors=50, 37 | batch_size=128, 38 | n_hidden=256, 39 | n_recurrent=3, 40 | architecture=nn.LSTM) 41 | optimizer = optim.Adam(model.parameters(), lr=1e-2) 42 | cycle_length = dataset['train'].total_iters 43 | scheduler = CosineAnnealingLR(optimizer, t_max=cycle_length/2, eta_min=1e-5) 44 | callbacks = [EarlyStopping(patience=50), Logger(), History(), Checkpoint()] 45 | loop = Loop(Stepper(model, optimizer, scheduler, F.nll_loss)) 46 | 47 | loop.run(train_data=dataset['train'], 48 | valid_data=dataset['valid'], 49 | callbacks=callbacks, 50 | epochs=500) 51 | 52 | best_model = loop['Checkpoint'].best_model 53 | model.load_state_dict(torch.load(best_model)) 54 | text = generate_text(model, field, seed='Deep song') 55 | pretty_print(text) 56 | 57 | 58 | if __name__ == '__main__': 59 | main() 60 | -------------------------------------------------------------------------------- /cnn_fastai.py: -------------------------------------------------------------------------------- 1 | from fastai.conv_learner import * 2 | 3 | 4 | PATH = '/home/ck/data/cifar10/' 5 | 6 | 7 | stats = (np.array([ 0.4914 , 0.48216, 0.44653]), 8 | np.array([ 0.24703, 0.24349, 0.26159])) 9 | 10 | 11 | def get_data(sz, bs): 12 | tfms = tfms_from_stats(stats, sz, aug_tfms=[RandomFlip()], pad=sz//8) 13 | return ImageClassifierData.from_paths(PATH, val_name='valid', tfms=tfms, bs=bs) 14 | 15 | 16 | class ConvLayer(nn.Module): 17 | 18 | def __init__(self, ni, nf, stride=2, kernel_size=3): 19 | super().__init__() 20 | self.conv = nn.Conv2d( 21 | ni, nf, kernel_size=kernel_size, 22 | stride=stride, bias=False, padding=1) 23 | self.bn = nn.BatchNorm2d(nf) 24 | self.relu = nn.LeakyReLU(inplace=True) 25 | 26 | def forward(self, x): 27 | return self.relu(self.bn(self.conv(x))) 28 | 29 | 30 | class ResNetLayer(ConvLayer): 31 | 32 | def forward(self, x): 33 | return x + super().forward(x) 34 | 35 | 36 | class FastAIResNet(nn.Module): 37 | 38 | def __init__(self, layers, num_of_classes): 39 | super().__init__() 40 | self.conv = nn.Conv2d(3, 10, kernel_size=5, stride=1, padding=2) 41 | self.blocks = nn.ModuleList([ 42 | nn.Sequential( 43 | ConvLayer(x, y), 44 | ResNetLayer(y, y, stride=1), 45 | ResNetLayer(y, y, stride=1)) 46 | for x, y in pairs(layers)]) 47 | self.avgpool = nn.AdaptiveAvgPool2d(1) 48 | self.fc = nn.Linear(layers[-1], num_of_classes) 49 | 50 | def forward(self, x): 51 | x = self.conv(x) 52 | for block in self.blocks: 53 | x = block(x) 54 | x = self.avgpool(x) 55 | x = x.view(x.size(0), -1) 56 | x = self.fc(x) 57 | return x 58 | 59 | 60 | def pairs(xs): 61 | current, *rest = xs 62 | for item in rest: 63 | yield current, item 64 | current = item 65 | 66 | 67 | def main(): 68 | bs = 256 69 | lr = 1e-2 70 | data = get_data(32, bs) 71 | net = FastAIResNet([10, 20, 40, 80, 160], 10) 72 | learn = ConvLearner.from_model_data(net, data) 73 | learn.fit(lr, 2, cycle_len=1, wds=1e-5) 74 | 75 | 76 | if __name__ == '__main__': 77 | main() 78 | -------------------------------------------------------------------------------- /rnn/rules.py: -------------------------------------------------------------------------------- 1 | """ 2 | String transformation rules taken from here: 3 | https://github.com/fastai/fastai/blob/master/fastai/text/transform.py 4 | 5 | The IMDB dataset examples contain a lot of junk symbols and tags, so additional 6 | preprocessing is helpful before feeding them into tokenizer. 7 | """ 8 | import re 9 | import html 10 | 11 | 12 | def spec_add_spaces(t: str) -> str: 13 | """Add spaces around / and # in `t`.""" 14 | 15 | return re.sub(r'([/#])', r' \1 ', t) 16 | 17 | 18 | def rm_useless_spaces(t: str) -> str: 19 | """Remove multiple spaces in `t`.""" 20 | 21 | return re.sub(' {2,}', ' ', t) 22 | 23 | 24 | def replace_char_repetitions(t: str, token: str='xxrep') -> str: 25 | """"Replace repetitions at the character level in `t`.""" 26 | 27 | def replace(m) -> str: 28 | c,cc = m.groups() 29 | return f' {token} {len(cc)+1} {c} ' 30 | 31 | re_rep = re.compile(r'(\S)(\1{3,})') 32 | return re_rep.sub(replace, t) 33 | 34 | 35 | def replace_word_repetitions(t: str, token: str='xxwrep') -> str: 36 | """Replace word repetitions in `t`.""" 37 | 38 | def replace(m) -> str: 39 | c,cc = m.groups() 40 | return f' {token} {len(cc.split())+1} {c} ' 41 | 42 | re_wrep = re.compile(r'(\b\w+\W+)(\1{3,})') 43 | return re_wrep.sub(replace, t) 44 | 45 | 46 | def replace_capitalized(t: str, token: str='xxup') -> str: 47 | """Replace words in all caps in `t`.""" 48 | 49 | res = [] 50 | for s in re.findall(r'\w+|\W+', t): 51 | res += ( 52 | [f' {token} ',s.lower()] 53 | if s.isupper() and (len(s) > 2) 54 | else [s.lower()]) 55 | return ''.join(res) 56 | 57 | 58 | def fix_html(x: str, unknown_token: str='xxunk') -> str: 59 | """List of replacements from html strings in `x`.""" 60 | 61 | re1 = re.compile(r' +') 62 | x = x.replace('#39;', "'").replace('amp;', '&').replace('#146;', "'").replace( 63 | 'nbsp;', ' ').replace('#36;', '$').replace('\\n', "\n").replace('quot;', "'").replace( 64 | '
', "\n").replace('\\"', '"').replace('', unknown_token).replace(' @.@ ','.').replace( 65 | ' @-@ ','-').replace('\\', ' \\ ') 66 | return re1.sub(' ', html.unescape(x)) 67 | 68 | 69 | # Note that the order of rules matters 70 | default_rules = ( 71 | fix_html, 72 | replace_char_repetitions, 73 | replace_word_repetitions, 74 | replace_capitalized, 75 | spec_add_spaces, 76 | rm_useless_spaces, 77 | ) -------------------------------------------------------------------------------- /text_classification/rules.py: -------------------------------------------------------------------------------- 1 | """ 2 | String transformation rules taken from here: 3 | https://github.com/fastai/fastai/blob/master/fastai/text/transform.py 4 | 5 | The IMDB dataset examples contain a lot of junk symbols and tags, so additional 6 | preprocessing is helpful before feeding them into tokenizer. 7 | """ 8 | import re 9 | import html 10 | 11 | 12 | def spec_add_spaces(t: str) -> str: 13 | """Add spaces around / and # in `t`.""" 14 | 15 | return re.sub(r'([/#])', r' \1 ', t) 16 | 17 | 18 | def rm_useless_spaces(t: str) -> str: 19 | """Remove multiple spaces in `t`.""" 20 | 21 | return re.sub(' {2,}', ' ', t) 22 | 23 | 24 | def replace_char_repetitions(t: str, token: str='xxrep') -> str: 25 | """"Replace repetitions at the character level in `t`.""" 26 | 27 | def replace(m) -> str: 28 | c,cc = m.groups() 29 | return f' {token} {len(cc)+1} {c} ' 30 | 31 | re_rep = re.compile(r'(\S)(\1{3,})') 32 | return re_rep.sub(replace, t) 33 | 34 | 35 | def replace_word_repetitions(t: str, token: str='xxwrep') -> str: 36 | """Replace word repetitions in `t`.""" 37 | 38 | def replace(m) -> str: 39 | c,cc = m.groups() 40 | return f' {token} {len(cc.split())+1} {c} ' 41 | 42 | re_wrep = re.compile(r'(\b\w+\W+)(\1{3,})') 43 | return re_wrep.sub(replace, t) 44 | 45 | 46 | def replace_capitalized(t: str, token: str='xxup') -> str: 47 | """Replace words in all caps in `t`.""" 48 | 49 | res = [] 50 | for s in re.findall(r'\w+|\W+', t): 51 | res += ( 52 | [f' {token} ',s.lower()] 53 | if s.isupper() and (len(s) > 2) 54 | else [s.lower()]) 55 | return ''.join(res) 56 | 57 | 58 | def fix_html(x: str, unknown_token: str='xxunk') -> str: 59 | """List of replacements from html strings in `x`.""" 60 | 61 | re1 = re.compile(r' +') 62 | x = x.replace('#39;', "'").replace('amp;', '&').replace( 63 | '#146;', "'").replace('nbsp;', ' ').replace('#36;', '$').replace( 64 | '\\n', "\n").replace('quot;', "'").replace('
', "\n").replace( 65 | '\\"', '"').replace('', unknown_token).replace( 66 | ' @.@ ', '.').replace(' @-@ ', '-').replace('\\', ' \\ ') 67 | return re1.sub(' ', html.unescape(x)) 68 | 69 | 70 | # Note that the order of rules matters 71 | default_rules = ( 72 | fix_html, 73 | replace_char_repetitions, 74 | replace_word_repetitions, 75 | replace_capitalized, 76 | spec_add_spaces, 77 | rm_useless_spaces, 78 | ) -------------------------------------------------------------------------------- /ssd/models.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | from torchvision.models import resnet34 3 | 4 | 5 | class StdConv(nn.Module): 6 | """ 7 | The convolution, batch normalization, and dropout layers gathered into 8 | single module. 9 | """ 10 | def __init__(self, ni, nf, stride=2, kernel=3, padding=1, dropout=0.1): 11 | super().__init__() 12 | self.conv = nn.Conv2d(ni, nf, kernel, stride=stride, padding=padding) 13 | self.bn = nn.BatchNorm2d(nf) 14 | self.dropout = nn.Dropout(dropout) 15 | self.relu = nn.LeakyReLU() 16 | 17 | def forward(self, x): 18 | x = self.conv(x) 19 | x = self.bn(x) 20 | x = self.relu(x) 21 | x = self.dropout(x) 22 | return x 23 | 24 | 25 | class SSDConv(nn.Module): 26 | """ 27 | A building block required to construct a Single-Shot Detector, two parallel 28 | convolutions predicting bounding boxes and classes of the objects. 29 | """ 30 | def __init__(self, ni, n_classes, kernel=3, padding=1, k=1, bias=0): 31 | super().__init__() 32 | self.b_conv = nn.Conv2d(ni, 4 * k, kernel_size=kernel, padding=padding) 33 | self.c_conv = nn.Conv2d( 34 | ni, (n_classes + 1)*k, kernel_size=kernel, padding=padding) 35 | self.init(bias) 36 | 37 | def forward(self, x): 38 | return self.b_conv(x), self.c_conv(x) 39 | 40 | def init(self, bias): 41 | self.c_conv.bias.data.zero_().add_(bias) 42 | 43 | 44 | class SSD(nn.Module): 45 | """ 46 | Single-Shot Detector model. 47 | """ 48 | def __init__(self, n_classes, dropout=0.25, bias=0, k=1, 49 | backbone=resnet34, pretrained=True, flatten=True): 50 | 51 | super().__init__() 52 | model = backbone(pretrained=pretrained) 53 | children = list(model.children()) 54 | 55 | self.k = k 56 | self.flatten = flatten 57 | self.backbone = nn.Sequential(*children[:-2]) 58 | self.relu = nn.LeakyReLU() 59 | self.dropout = nn.Dropout(dropout) 60 | self.conv1 = StdConv(512, 256, stride=1) 61 | self.conv2 = StdConv(256, 256) 62 | self.out = SSDConv(256, n_classes, k=k, bias=bias) 63 | 64 | def forward(self, x): 65 | x = self.backbone(x) 66 | x = self.relu(x) 67 | x = self.dropout(x) 68 | x = self.conv1(x) 69 | x = self.conv2(x) 70 | x = self.out(x) 71 | if self.flatten: 72 | x = [flatten_conv(obj, self.k) for obj in x] 73 | return x 74 | 75 | 76 | def flatten_conv(x, k): 77 | bs, nf, gx, gy = x.size() 78 | x = x.permute(0,2,3,1).contiguous() 79 | return x.view(bs, -1, nf//k) 80 | -------------------------------------------------------------------------------- /textsplit.py: -------------------------------------------------------------------------------- 1 | """ 2 | Splits folders with songs into training and validation subsets. 3 | """ 4 | import argparse 5 | import json 6 | from pathlib import Path 7 | import random 8 | 9 | import pandas as pd 10 | 11 | 12 | def main(): 13 | args = parse_args() 14 | 15 | args.output.mkdir(parents=True, exist_ok=True) 16 | 17 | meta = [] 18 | 19 | for subfolder in args.input.iterdir(): 20 | print(f'Splitting folder for artist \'{subfolder.stem}\'') 21 | files = [file for file in subfolder.iterdir() if file.suffix == '.txt'] 22 | n_train = int(len(files) * args.train_size) 23 | random.shuffle(files) 24 | train_files, valid_files = files[:n_train], files[n_train:] 25 | print(f'Training: {len(train_files)}, validation: {len(valid_files)}') 26 | train_ids = {file.stem for file in train_files} 27 | 28 | split = [ 29 | ('train', train_files), 30 | ('valid', valid_files)] 31 | 32 | for line in (subfolder/'songs.csv').open(): 33 | index, _, song = line.partition(',') 34 | meta.append({ 35 | 'id': int(index), 36 | 'artist': subfolder.stem, 37 | 'song': song.strip(), 38 | 'valid': index not in train_ids 39 | }) 40 | 41 | for sub, files in split: 42 | new_dir = args.output/sub/subfolder.stem 43 | new_dir.mkdir(parents=True, exist_ok=True) 44 | for old_file in files: 45 | new_file = new_dir/old_file.name 46 | new_file.open('w').write(old_file.open().read()) 47 | 48 | with (args.output/'songs.json').open('w') as file: 49 | json.dump(json.loads( 50 | pd.DataFrame(meta).to_json(orient='records')), 51 | file, indent=2) 52 | 53 | print('Files copied into folder ', args.output) 54 | 55 | 56 | def parse_args(): 57 | parser = argparse.ArgumentParser() 58 | parser.add_argument( 59 | '-i', '--input', 60 | required=True, 61 | help='path to folders with labelled texts' 62 | ) 63 | parser.add_argument( 64 | '-o', '--output', 65 | default=Path.home(), 66 | help='path to save separated files' 67 | ) 68 | parser.add_argument( 69 | '-ts', '--train-size', 70 | default=0.8, type=float, 71 | help='amount of texts (per category) to keep for training' 72 | ) 73 | parser.add_argument( 74 | '-rs', '--random-state', 75 | default=1, type=int, 76 | help='random state to use when taking training subset' 77 | ) 78 | 79 | args = parser.parse_args() 80 | args.input = Path(args.input) 81 | args.output = Path(args.output) 82 | random.seed(args.random_state) 83 | 84 | return args 85 | 86 | 87 | if __name__ == '__main__': 88 | main() 89 | -------------------------------------------------------------------------------- /ssd/loss.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | import torch 4 | from torch import nn 5 | from torch.nn import functional as F 6 | 7 | from misc import hw2corners, jaccard 8 | 9 | 10 | class BinaryCrossEntropyLoss(nn.Module): 11 | 12 | def __init__(self, num_classes): 13 | super().__init__() 14 | self.num_classes = num_classes 15 | 16 | def forward(self, predictions, target): 17 | one_hot = self.one_hot_embedding(target) 18 | t_target = one_hot[:, :-1].contiguous() 19 | t_input = predictions[:, :-1] 20 | xe = F.binary_cross_entropy_with_logits( 21 | t_input, t_target, reduction='sum') 22 | return xe / self.num_classes 23 | 24 | def one_hot_embedding(self, labels): 25 | device = labels.device 26 | matrix = torch.eye(self.num_classes + 1)[labels.data.cpu()] 27 | return matrix.to(device) 28 | 29 | 30 | def ssd_loss(y_pred, y_true, anchors, grid_sizes, loss_f, n_classes, size=224): 31 | 32 | def get_relevant(boxes, classes): 33 | """Drops samples with boxes of zero width.""" 34 | 35 | boxes = boxes.view(-1, 4).float() / size 36 | index = (boxes[:, 2] - boxes[:, 0]) > 0 37 | keep = index.nonzero()[:, 0] 38 | return boxes[keep], classes[keep] 39 | 40 | 41 | def activations_to_boxes(activations): 42 | """Converts activation values of top layers into bounding boxes.""" 43 | 44 | tanh = torch.tanh(activations) 45 | centers = (tanh[:, :2]/2 * grid_sizes) + anchors[:, :2] 46 | hw = (tanh[:, 2:]/2 + 1) * anchors[:, 2:] 47 | return hw2corners(centers, hw) 48 | 49 | 50 | def map_to_ground_truth(overlaps): 51 | """Converts an array with Jaccard metrics into predictions.""" 52 | 53 | prior_overlap, prior_index = overlaps.max(1) 54 | gt_overlap, gt_index = overlaps.max(0) 55 | gt_overlap[prior_index] = 1.99 56 | for i, index in enumerate(prior_index): 57 | gt_index[index] = i 58 | return gt_overlap, gt_index 59 | 60 | 61 | anchor_corners = hw2corners(anchors[:, :2], anchors[:, 2:]) 62 | 63 | box_loss, class_loss = 0, 0 64 | for pred_bb, pred_cls, true_bb, true_cls in zip(*y_pred, *y_true): 65 | true_bb, true_cls = get_relevant(true_bb, true_cls) 66 | activ_bb = activations_to_boxes(pred_bb) 67 | overlaps = jaccard(true_bb.data, anchor_corners.data) 68 | gt_overlap, gt_index = map_to_ground_truth(overlaps) 69 | gt_class = true_cls[gt_index] 70 | pos = gt_overlap > 0.4 71 | pos_index = torch.nonzero(pos)[:, 0] 72 | gt_class[1 - pos] = n_classes 73 | gt_bb = true_bb[gt_index] 74 | box_loss += (activ_bb[pos_index] - gt_bb[pos_index]).abs().mean() 75 | class_loss += loss_f(pred_cls, gt_class) 76 | 77 | return box_loss + class_loss 78 | -------------------------------------------------------------------------------- /core/iterators.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | class SequenceIterator: 5 | """ 6 | A simple iterator to convert an array of encoded characters into group of 7 | batches reshaped into format, appropriate for the RNN training process. 8 | """ 9 | def __init__(self, seq, bptt=10, batch_size=64, random_length=True, 10 | flatten_target=True): 11 | 12 | # Converting dataset into batches: 13 | # 1) truncate text length to evenly fit into number of batches 14 | # 2) reshape the text into N (# of batches) * M (batch size) 15 | # 3) transpose to convert into "long" format with fixed number of cols 16 | 17 | n_batches = seq.size(0) // batch_size 18 | truncated = seq[:n_batches * batch_size] 19 | batches = truncated.view(batch_size, -1).t().contiguous() 20 | 21 | self.bptt = bptt 22 | self.batch_size = batch_size 23 | self.random_length = random_length 24 | self.flatten_target = flatten_target 25 | self.batches = batches 26 | self.curr_line = 0 27 | self.curr_iter = 0 28 | self.total_lines = batches.size(0) 29 | self.total_iters = self.total_lines // self.bptt - 1 30 | 31 | @property 32 | def completed(self): 33 | if self.curr_line >= self.total_lines - 1: 34 | return True 35 | if self.curr_iter >= self.total_iters: 36 | return True 37 | return False 38 | 39 | def __iter__(self): 40 | self.curr_line = self.curr_iter = 0 41 | return self 42 | 43 | def __next__(self): 44 | return self.next() 45 | 46 | def next(self): 47 | if self.completed: 48 | raise StopIteration() 49 | seq_len = self.get_sequence_length() 50 | batch = self.get_batch(seq_len) 51 | self.curr_line += seq_len 52 | self.curr_iter += 1 53 | return batch 54 | 55 | def get_sequence_length(self): 56 | """ 57 | Returns a length of sequence taken from the dataset to form a batch. 58 | 59 | By default, this value is based on the value of bptt parameter but 60 | randomized during training process to pick sequences of characters with 61 | a bit different length. 62 | """ 63 | if self.random_length is None: 64 | return self.bptt 65 | bptt = self.bptt 66 | if np.random.random() >= 0.95: 67 | bptt /= 2 68 | seq_len = max(5, int(np.random.normal(bptt, 5))) 69 | return seq_len 70 | 71 | def get_batch(self, seq_len): 72 | """ 73 | Picks training and target batches from the source depending on current 74 | iteration number. 75 | """ 76 | i, source = self.curr_line, self.batches 77 | seq_len = min(seq_len, self.total_lines - 1 - i) 78 | X = source[i:i + seq_len].contiguous() 79 | y = source[(i + 1):(i + 1) + seq_len].contiguous() 80 | if self.flatten_target: 81 | y = y.view(-1) 82 | return X, y -------------------------------------------------------------------------------- /text_classification/tokenizer.py: -------------------------------------------------------------------------------- 1 | from collections import Counter, defaultdict 2 | from pathlib import Path 3 | import pickle 4 | from multiprocessing import Pool, cpu_count 5 | 6 | import numpy as np 7 | import spacy 8 | from spacy.symbols import ORTH 9 | 10 | from rules import default_rules 11 | 12 | 13 | BOS, FLD, UNK, PAD = SPECIAL_TOKENS = 'xxbox', 'xxfld', 'xxunk', 'xxpad' 14 | 15 | 16 | class SpacyTokenizer: 17 | """A thin wrapper on top of Spacy tokenization tools.""" 18 | 19 | def __init__(self, lang='en', rules=default_rules, special_tokens=SPECIAL_TOKENS): 20 | tokenizer = spacy.load(lang).tokenizer 21 | if special_tokens: 22 | for token in special_tokens: 23 | tokenizer.add_special_case(token, [{ORTH: token}]) 24 | 25 | self.rules = rules or [] 26 | self.tokenizer = tokenizer 27 | 28 | def tokenize(self, text: str): 29 | """Converts a single string into list of tokens.""" 30 | 31 | for rule in self.rules: 32 | text = rule(text) 33 | return [t.text for t in self.tokenizer(text)] 34 | 35 | 36 | class Vocab: 37 | 38 | def __init__(self, itos): 39 | self.itos = itos 40 | self.stoi = defaultdict(int, {v: k for k, v in enumerate(itos)}) 41 | self.size = len(itos) 42 | 43 | def __eq__(self, other): 44 | if not isinstance(other, Vocab): 45 | raise TypeError( 46 | 'can only compare with another Vocab instance, ' 47 | 'got %s' % type(other)) 48 | return self.itos == other.itos 49 | 50 | def save(self, path: Path): 51 | with path.open('wb') as file: 52 | pickle.dump(self.itos, file) 53 | 54 | @staticmethod 55 | def load(path: Path) -> 'Vocab': 56 | with path.open('rb') as file: 57 | itos = pickle.load(file) 58 | return Vocab(itos) 59 | 60 | @staticmethod 61 | def make_vocab(tokens, min_freq: int=3, max_vocab: int=60000, pad=PAD, unknown=UNK) -> 'Vocab': 62 | freq = Counter(token for sentence in tokens for token in sentence) 63 | most_common = freq.most_common(max_vocab) 64 | itos = [token for token, count in most_common if count > min_freq] 65 | itos.insert(0, pad) 66 | if unknown in itos: 67 | itos.remove(unknown) 68 | itos.insert(0, unknown) 69 | return Vocab(itos) 70 | 71 | def numericalize(self, texts): 72 | return [ 73 | np.array([self.stoi[token] for token in text], dtype=np.int) 74 | for text in texts] 75 | 76 | def textify_all(self, samples): 77 | return [self.textify(sample) for sample in samples] 78 | 79 | def textify(self, tokens): 80 | return ' '.join([self.itos[number] for number in tokens]) 81 | 82 | 83 | def tokenize_in_parallel(texts): 84 | n_workers = cpu_count() 85 | parts = split_into(texts, len(texts)//n_workers + 1) 86 | with Pool(n_workers) as pool: 87 | results = pool.map(tokenize, parts) 88 | return sum(results, []) 89 | 90 | 91 | def tokenize(texts): 92 | tokenizer = SpacyTokenizer() 93 | return [tokenizer.tokenize(text) for text in texts] 94 | 95 | 96 | def split_into(arr, n): 97 | return [arr[i:i + n] for i in range(0, len(arr), n)] 98 | -------------------------------------------------------------------------------- /core/text.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | from torchtext.data import Field 4 | 5 | 6 | class Dataset: 7 | 8 | def __init__(self, field: Field, min_freq: int=1): 9 | self.field = field 10 | self.min_freq = min_freq 11 | self.subsets = {} 12 | self.vocab_size = None 13 | 14 | def build(self, train: str, valid: str, iterator_factory): 15 | content_per_file = {} 16 | for name, path in [('train', train), ('valid', valid)]: 17 | file_content = [] 18 | new_line = False 19 | with open(path) as file: 20 | for line in file: 21 | if line.endswith('\n'): 22 | new_line = True 23 | if line == '\n': 24 | continue 25 | file_content += self.field.preprocess(line) 26 | if new_line: 27 | file_content.append(' ') 28 | new_line = False 29 | content_per_file[name] = file_content 30 | 31 | train_text = content_per_file['train'] 32 | self.field.build_vocab(train_text, min_freq=self.min_freq) 33 | self.vocab_size = len(self.field.vocab.itos) 34 | 35 | for name, content in content_per_file.items(): 36 | sequence = self.field.numericalize(content) 37 | iterator = iterator_factory(sequence.view(-1)) 38 | self.subsets[name] = iterator 39 | 40 | def __getitem__(self, item): 41 | if item not in self.subsets: 42 | raise ValueError(f'Unexpected dataset name: {item}') 43 | return self.subsets[item] 44 | 45 | 46 | class TextDataset: 47 | 48 | def __init__(self, field: Field, min_freq: int=1, append_eos: bool=True, 49 | keep_new_lines=False, search_pattern='*.txt'): 50 | 51 | self.field = field 52 | self.min_freq = min_freq 53 | self.append_eos = append_eos 54 | self.keep_new_lines = keep_new_lines 55 | self.search_pattern = search_pattern 56 | self.vocab_size = None 57 | self.subsets = {} 58 | 59 | def build(self, train: str, iterator_factory, valid: str=None, 60 | test: str=None): 61 | 62 | directories = [('train', train), ('valid', valid), ('test', test)] 63 | content_per_folder = {} 64 | 65 | for name, folder in directories: 66 | if folder is None: 67 | continue 68 | content = [] 69 | for filename in Path(folder).glob(self.search_pattern): 70 | new_line = False 71 | with open(filename) as file: 72 | for line in file: 73 | if line.endswith('\n'): 74 | new_line = True 75 | if line == '\n': 76 | continue 77 | content += self.field.preprocess(line) 78 | if new_line: 79 | char = '\n' if self.keep_new_lines else ' ' 80 | content.append(char) 81 | new_line = False 82 | if self.append_eos: 83 | content.append(['']) 84 | content_per_folder[name] = content 85 | 86 | train_text = content_per_folder['train'] 87 | self.field.build_vocab(train_text, min_freq=self.min_freq) 88 | self.vocab_size = len(self.field.vocab.itos) 89 | 90 | for name, content in content_per_folder.items(): 91 | sequence = self.field.numericalize(content) 92 | iterator = iterator_factory(sequence.view(-1)) 93 | self.subsets[name] = iterator 94 | 95 | def __getitem__(self, item): 96 | if item not in self.subsets: 97 | raise ValueError(f'Unexpected dataset name: {item}') 98 | return self.subsets[item] 99 | -------------------------------------------------------------------------------- /coreml_export/model.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | from torch.nn import functional as F 3 | 4 | 5 | class Flatten(nn.Module): 6 | """ 7 | Pseudo-layer converting convolution output into flat format compatible with 8 | linear layers. 9 | """ 10 | def forward(self, x): 11 | return x.view(x.size(0), -1) 12 | 13 | 14 | class LinearConv3x3(nn.Module): 15 | """ 16 | A convolution with 3x3 kernel and with linear activations. 17 | 18 | Params: 19 | ni: Number of input channels. 20 | nf: Number of output channels. 21 | stride: Convolution stride. 22 | padding: Convolution padding. 23 | 24 | """ 25 | def __init__(self, ni, nf, stride=1, padding=1): 26 | super().__init__() 27 | self.conv = nn.Conv2d(ni, nf, 3, stride, padding, bias=False) 28 | self.bn = nn.BatchNorm2d(nf) 29 | 30 | def forward(self, x): 31 | return self.bn(self.conv(x)) 32 | 33 | 34 | class IdentityBlock(nn.Module): 35 | """ 36 | Basic building block for small ResNet models. 37 | 38 | The block consists of two convolutions with shortcut connections between 39 | between input and output. Note that this type of block is a simple version 40 | usually used for "shallow" ResNets. Deeper networks use bottleneck design 41 | for performance considerations. 42 | 43 | Params: 44 | ni: Number of input channels. 45 | nf: Number of output channels. If None, then `ni` value is used. 46 | Otherwise, the block includes downsampling convolution to convert 47 | input tensor into shape compatible with output before applying 48 | addition. 49 | stride: The stride value of the first and downsampling convolutions. 50 | 51 | """ 52 | def __init__(self, ni, nf=None, stride=1): 53 | super().__init__() 54 | 55 | nf = ni if nf is None else nf 56 | 57 | self.conv1 = LinearConv3x3(ni, nf, stride=stride) 58 | self.conv2 = LinearConv3x3(nf, nf) 59 | if ni != nf: 60 | self.downsample = nn.Sequential( 61 | nn.Conv2d(ni, nf, kernel_size=1, stride=stride, bias=False), 62 | nn.BatchNorm2d(nf)) 63 | 64 | def forward(self, x): 65 | out = self.conv1(x) 66 | out = F.leaky_relu(out) 67 | out = self.conv2(out) 68 | out += self.shortcut(x) 69 | out = F.leaky_relu(out) 70 | return out 71 | 72 | def shortcut(self, x): 73 | if hasattr(self, 'downsample'): 74 | return self.downsample(x) 75 | return x 76 | 77 | 78 | class ResNet(nn.Module): 79 | """ 80 | Custom ResNet classification architecture with identity blocks. 81 | """ 82 | def __init__(self, num_of_classes): 83 | super().__init__() 84 | self.conv = LinearConv3x3(1, 10, padding=2) 85 | self.blocks = nn.ModuleList([ 86 | IdentityBlock(10, 20, stride=2), 87 | IdentityBlock(20, 40, stride=2), 88 | IdentityBlock(40, 80, stride=2) 89 | ]) 90 | self.pool = nn.AvgPool2d(4) 91 | self.flatten = Flatten() 92 | self.fc = nn.Linear(80, num_of_classes) 93 | init(self) 94 | 95 | def forward(self, x): 96 | x = F.leaky_relu(self.conv(x)) 97 | for block in self.blocks: 98 | x = block(x) 99 | x = self.pool(x) 100 | x = self.flatten(x) 101 | x = self.fc(x) 102 | return x 103 | 104 | 105 | def init(m): 106 | if hasattr(m, 'children'): 107 | for child in m.children(): 108 | init(child) 109 | if isinstance(m, nn.Conv2d): 110 | nn.init.kaiming_normal_(m.weight, mode='fan_out') 111 | elif isinstance(m, nn.BatchNorm2d): 112 | nn.init.constant_(m.weight, 1) 113 | nn.init.constant_(m.bias, 0) 114 | -------------------------------------------------------------------------------- /rnn/rnn_oop.py: -------------------------------------------------------------------------------- 1 | """ 2 | The RNN training implementation using object-oriented classes hierarchy. 3 | """ 4 | import textwrap 5 | from os.path import expanduser, join 6 | 7 | import torch 8 | from torch import nn 9 | from torch import optim 10 | from torch.nn import functional as F 11 | from torchtext.data import Field 12 | 13 | from core.text import TextDataset 14 | from core.loop import Loop, Stepper 15 | from core.iterators import SequenceIterator 16 | from core.schedule import CosineAnnealingLR 17 | from core.callbacks import EarlyStopping, Checkpoint, Logger, History 18 | 19 | 20 | ROOT = expanduser(join('~', 'data', 'fastai', 'nietzsche')) 21 | TRAIN_DIR = join(ROOT, 'trn') 22 | VALID_DIR = join(ROOT, 'val') 23 | 24 | 25 | DEVICE = 'cuda:0' if torch.cuda.is_available() else 'cpu' 26 | 27 | 28 | class RNN(nn.Module): 29 | 30 | def __init__(self, vocab_size, n_factors, batch_size, n_hidden, 31 | n_recurrent=1, architecture=nn.RNN, dropout=0.5, 32 | device=DEVICE): 33 | 34 | self.vocab_size = vocab_size 35 | self.n_hidden = n_hidden 36 | self.n_recurrent = n_recurrent 37 | self.device = device 38 | 39 | super().__init__() 40 | self.embed = nn.Embedding(vocab_size, n_factors) 41 | self.rnn = architecture( 42 | n_factors, n_hidden, 43 | dropout=dropout, num_layers=n_recurrent) 44 | self.out = nn.Linear(n_hidden, vocab_size) 45 | self.hidden_state = self.init_hidden(batch_size).to(device) 46 | self.batch_size = batch_size 47 | self.to(device) 48 | 49 | def forward(self, batch): 50 | bs = batch.size(1) 51 | if bs != self.batch_size: 52 | self.hidden_state = self.init_hidden(bs) 53 | self.batch_size = bs 54 | embeddings = self.embed(batch) 55 | rnn_outputs, h = self.rnn(embeddings, self.hidden_state) 56 | self.hidden_state = truncate_history(h) 57 | linear = self.out(rnn_outputs) 58 | return F.log_softmax(linear, dim=-1).view(-1, self.vocab_size) 59 | 60 | def init_hidden(self, batch_size): 61 | if type(self.rnn) == nn.LSTM: 62 | # an LSTM cell requires two hidden states 63 | h = torch.zeros(2, self.n_recurrent, batch_size, self.n_hidden) 64 | else: 65 | h = torch.zeros(self.n_recurrent, batch_size, self.n_hidden) 66 | return h.to(self.device) 67 | 68 | 69 | def truncate_history(v): 70 | if type(v) == torch.Tensor: 71 | return v.detach() 72 | else: 73 | return tuple(truncate_history(x) for x in v) 74 | 75 | 76 | def generate_text(model, field, seed, n=500): 77 | string = seed 78 | for i in range(n): 79 | indexes = field.numericalize(string) 80 | predictions = model(indexes.transpose(0, 1)) 81 | last_output = predictions[-1] 82 | [most_probable] = torch.multinomial(last_output.exp(), 1) 83 | char = field.vocab.itos[most_probable] 84 | seed = seed[1:] + char 85 | string += char 86 | return string 87 | 88 | 89 | def pretty_print(text, width=80): 90 | print('\n'.join(textwrap.wrap(text, width=width))) 91 | 92 | 93 | def main(): 94 | bs = 64 95 | bptt = 8 96 | n_factors = 50 97 | n_hidden = 512 98 | n_recurrent = 2 99 | n_epochs = 100 100 | 101 | field = Field(lower=True, tokenize=list) 102 | dataset = TextDataset(field, min_freq=5) 103 | factory = lambda seq: SequenceIterator(seq, bptt, bs) 104 | dataset.build(train=TRAIN_DIR, valid=VALID_DIR, iterator_factory=factory) 105 | 106 | model = RNN(dataset.vocab_size, 107 | n_factors=n_factors, 108 | batch_size=bs, 109 | n_hidden=n_hidden, 110 | n_recurrent=n_recurrent, 111 | architecture=nn.LSTM) 112 | optimizer = optim.Adam(model.parameters(), lr=1e-2) 113 | cycle_length = dataset['train'].total_iters 114 | scheduler = CosineAnnealingLR(optimizer, t_max=cycle_length, eta_min=1e-5) 115 | loop = Loop(model, optimizer, scheduler, device=DEVICE) 116 | 117 | loop.run(train_data=dataset['train'], 118 | valid_data=dataset['valid'], 119 | epochs=n_epochs, 120 | callbacks=[ 121 | EarlyStopping(patience=50), 122 | Logger(), 123 | History(), 124 | Checkpoint()]) 125 | 126 | best_model = loop['Checkpoint'].best_model 127 | model.load_state_dict(torch.load(best_model)) 128 | text = generate_text(model, field, seed='For thos') 129 | pretty_print(text) 130 | 131 | 132 | if __name__ == '__main__': 133 | main() -------------------------------------------------------------------------------- /text_classification/main_imdb.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import fire 4 | from fastai.data import DataBunch 5 | from fastai.text import RNNLearner 6 | from fastai.train import TrainingPhase, annealing_cos 7 | from fastai.callbacks import GeneralScheduler 8 | from fastai.callbacks.tracker import EarlyStopping, SaveModel 9 | from fastai.text.data import TextLMDataBunch 10 | from torch.nn import functional as F 11 | 12 | from dataset import IMDB 13 | from tokenizer import Vocab, tokenize_in_parallel 14 | from utils import is_empty 15 | 16 | 17 | DATA_ROOT = Path.home() / 'data' 18 | IMDB_PATH = DATA_ROOT / 'aclImdb' 19 | LM_PATH = IMDB_PATH / 'lm' 20 | TRAIN_PATH = DATA_ROOT / 'train' 21 | TEST_PATH = DATA_ROOT / 'test' 22 | 23 | 24 | def main(): 25 | fire.Fire(train_lm) 26 | 27 | 28 | def train_lm(n_cycles: int=3, cycle_len: int=1, cycle_mult: int=2, 29 | momentum: float=0.8, bptt: int=40, lr: float=1e-3, 30 | wd: float=1e-7): 31 | 32 | datasets = create_or_restore(DATA_ROOT) 33 | lm_data = [ 34 | fastai_patch(ds) for ds in ( 35 | datasets['train_unsup'], datasets['test_unsup'])] 36 | bunch = TextLMDataBunch.create(lm_data, path=LM_PATH, bptt=bptt) 37 | 38 | n = sum(len(ds) for ds in lm_data) 39 | phases = [ 40 | TrainingPhase( 41 | n*(cycle_len * cycle_mult**i), 42 | lrs=lr, moms=momentum, lr_anneal=annealing_cos 43 | ) for i in range(n_cycles)] 44 | learner = RNNLearner.language_model(bunch, bptt) 45 | cbs = [ 46 | EarlyStopping(learner, patience=2), 47 | GeneralScheduler(learner, phases), 48 | SaveModel(learner)] 49 | 50 | if cycle_mult == 1: 51 | total_epochs = n_cycles * cycle_len 52 | else: 53 | total_epochs = int(cycle_len*(1 - cycle_mult**n_cycles)/(1 - cycle_mult)) 54 | 55 | print(f'Total number of epochs: {total_epochs:d}') 56 | try: 57 | learner.fit(total_epochs, wd=wd, callbacks=cbs) 58 | except RuntimeError as e: 59 | print(f'Model training error: {e}') 60 | finally: 61 | folder = learner.path/learner.model_dir 62 | print(f'Saving latest model state into {folder}') 63 | learner.save('lm_final') 64 | learner.save_encoder('lm_final_enc') 65 | 66 | 67 | def fastai_patch(ds): 68 | """Adding properties to the dataset required to be compatible with 69 | fastai library. 70 | """ 71 | ds.__dict__['ids'] = ds.train_data if ds.train else ds.test_data 72 | ds.__dict__['vocab_size'] = ds.vocab.size 73 | ds.__dict__['loss_func'] = F.cross_entropy 74 | return ds 75 | 76 | 77 | def create_or_restore(path: Path): 78 | """Prepared IMDB datasets from raw files, or loads previously saved objects 79 | into memory. 80 | """ 81 | datasets_dir = path / 'aclImdb' / 'datasets' 82 | 83 | if datasets_dir.exists() and not is_empty(datasets_dir): 84 | print('Loading data from %s' % datasets_dir) 85 | datasets = {} 86 | for filename in datasets_dir.glob('*.pickle'): 87 | datasets[filename.stem] = IMDB.load(filename) 88 | 89 | else: 90 | print('Creating folder %s' % datasets_dir) 91 | 92 | datasets_dir.mkdir(parents=True, exist_ok=True) 93 | 94 | print('Tokenizing supervised data (sentiment classification)') 95 | 96 | train_sup = IMDB( 97 | path, supervised=True, train=True, 98 | tokenizer=tokenize_in_parallel, 99 | make_vocab=Vocab.make_vocab) 100 | 101 | test_sup = IMDB( 102 | path, supervised=True, train=False, 103 | tokenizer=tokenize_in_parallel, 104 | vocab=train_sup.vocab) 105 | 106 | print('Tokenizing unsupervised data (language model)') 107 | 108 | train_unsup = IMDB( 109 | path, supervised=False, train=True, 110 | tokenizer=tokenize_in_parallel, 111 | make_vocab=Vocab.make_vocab) 112 | 113 | test_unsup = IMDB( 114 | path, supervised=False, train=False, 115 | tokenizer=tokenize_in_parallel, 116 | vocab=train_unsup.vocab) 117 | 118 | datasets = { 119 | 'train_sup': train_sup, 120 | 'test_sup': test_sup, 121 | 'train_unsup': train_unsup, 122 | 'test_unsup': test_unsup 123 | } 124 | 125 | for name, dataset in datasets.items(): 126 | print(f'Saving dataset {name}') 127 | dataset.save(datasets_dir / f'{name}.pickle') 128 | 129 | for name, dataset in datasets.items(): 130 | print(f'{name} vocab size: {dataset.vocab.size}') 131 | 132 | return datasets 133 | 134 | 135 | if __name__ == '__main__': 136 | main() 137 | -------------------------------------------------------------------------------- /coreml_export/main.py: -------------------------------------------------------------------------------- 1 | """ 2 | An example of converting PyTorch model into CoreML format using ONNX as an 3 | intermediate format. 4 | """ 5 | import math 6 | from pathlib import Path 7 | 8 | from onnx_coreml import convert 9 | import matplotlib.pyplot as plt 10 | from multiprocessing import cpu_count 11 | 12 | import torch 13 | from torch import optim 14 | from torchvision import transforms 15 | from torch.nn import functional as F 16 | from torch.utils.data import DataLoader 17 | from torchvision.datasets import EMNIST 18 | from torch.optim.lr_scheduler import CosineAnnealingLR 19 | 20 | from core.loop import Loop 21 | from core.metrics import accuracy 22 | from core.callbacks import default_callbacks 23 | 24 | from model import ResNet 25 | 26 | 27 | DATA_ROOT = Path.home() / 'data' / 'emnist' 28 | DEVICE = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') 29 | STATS = [0.17325], [0.33163] 30 | 31 | 32 | def main(): 33 | data_transforms = { 34 | 'train': transforms.Compose([ 35 | transforms.RandomRotation(4), 36 | transforms.RandomAffine(degrees=0, translate=(0.05, 0.05)), 37 | transforms.ToTensor(), 38 | transforms.Normalize(*STATS) 39 | ]), 40 | 'valid': transforms.Compose([ 41 | transforms.ToTensor(), 42 | transforms.Normalize(*STATS) 43 | ]) 44 | } 45 | 46 | n_epochs = 3 47 | batch_size = 4096 48 | num_workers = 0 # cpu_count() 49 | 50 | datasets = load_dataset(data_transforms, batch_size, num_workers) 51 | n_samples = len(datasets['train']['loader']) 52 | n_batches = math.ceil(n_samples / batch_size) 53 | 54 | model = ResNet(10) 55 | opt = optim.Adam(model.parameters(), lr=1e-2) 56 | sched = CosineAnnealingLR(opt, T_max=n_batches/4, eta_min=1e-5) 57 | loop = Loop(model, opt, sched, device=DEVICE) 58 | 59 | # loop.run(train_data=datasets['train']['loader'], 60 | # valid_data=datasets['valid']['loader'], 61 | # loss_fn=F.cross_entropy, 62 | # metrics=[accuracy], 63 | # callbacks=default_callbacks(), 64 | # epochs=n_epochs) 65 | 66 | # file_name = loop['Checkpoint'].best_model 67 | dataset = datasets['valid']['loader'] 68 | # validate_model(model, file_name, dataset, DEVICE) 69 | export_to_core_ml(model) 70 | 71 | 72 | def load_dataset(data_transforms, batch_size=1024, num_workers=0, 73 | root=DATA_ROOT, split='digits'): 74 | 75 | datasets = {} 76 | for name in ('train', 'valid'): 77 | is_training = name == 'train' 78 | dataset = EMNIST( 79 | root=root, split=split, train=is_training, download=True, 80 | transform=data_transforms[name]) 81 | loader = DataLoader( 82 | dataset, batch_size=batch_size, num_workers=num_workers) 83 | datasets[name] = {'dataset': dataset, 'loader': loader} 84 | return datasets 85 | 86 | 87 | def show_predictions(images, suptitle='', titles=None, dims=(4, 4), 88 | figsize=(12, 12), stats=STATS): 89 | 90 | f, ax = plt.subplots(*dims, figsize=figsize) 91 | titles = titles or [] 92 | f.suptitle(suptitle) 93 | mean, std = stats or [0, 1] 94 | images *= std 95 | images += mean 96 | for i, (img, ax) in enumerate(zip(images, ax.flat)): 97 | ax.imshow(img.reshape(28, 28)) 98 | if i < len(titles): 99 | ax.set_title(titles[i]) 100 | plt.show() 101 | 102 | 103 | def validate_model(model, model_file, dataset, device): 104 | weights = torch.load(model_file) 105 | model.load_state_dict(weights) 106 | x, y = [t.to(device) for t in random_sample(dataset)] 107 | y_pred = model(x) 108 | valid_acc = accuracy(y_pred, y) 109 | title = f'Validation accuracy: {valid_acc:2.2%}' 110 | titles = [str(x) for x in to_np(y_pred.argmax(dim=1))] 111 | images = to_np(x.permute(0, 3, 2, 1)) 112 | show_predictions(images, title, titles) 113 | 114 | 115 | def random_sample(dataset, n=16): 116 | loader = DataLoader(dataset, batch_size=n, shuffle=True) 117 | return next(iter(loader)) 118 | 119 | 120 | def export_to_core_ml(model): 121 | model.eval() 122 | device = model.fc.weight.device 123 | dummy_input = torch.randn(16, 1, 28, 28, requires_grad=True).to(device) 124 | torch.onnx.export(model, dummy_input, 'model.onnx', export_params=True) 125 | core_ml_model = convert('model.onnx') 126 | core_ml_model.save('model.mlmodel') 127 | 128 | 129 | def to_np(*tensors): 130 | 131 | def convert_to_numpy(obj): 132 | return obj.detach().cpu().numpy() 133 | 134 | if len(tensors) == 1: 135 | return convert_to_numpy(tensors[0]) 136 | return [convert_to_numpy(tensor) for tensor in tensors] 137 | 138 | 139 | if __name__ == '__main__': 140 | main() 141 | -------------------------------------------------------------------------------- /ssd/dataset.py: -------------------------------------------------------------------------------- 1 | import json 2 | from itertools import chain, islice 3 | from concurrent.futures import ThreadPoolExecutor 4 | 5 | import numpy as np 6 | import pandas as pd 7 | 8 | import torch 9 | from torch.utils.data import Dataset 10 | from torch.utils.data import BatchSampler, SequentialSampler, RandomSampler 11 | from torchvision import transforms 12 | 13 | from misc import from_voc, read_sample, pad, t 14 | 15 | 16 | class VOCDataset(Dataset): 17 | 18 | def __init__(self, json_path, images_path, size=224, augmentations=None, 19 | device=None): 20 | 21 | self.json_path = json_path 22 | self.images_path = images_path 23 | self.size = size 24 | self.device = device or 'cpu' 25 | self.transform = build_transform(augmentations) 26 | self.id2cat = None 27 | self.cat2id = None 28 | self._dataset = None 29 | self.init() 30 | 31 | def init(self): 32 | with open(self.json_path) as file: 33 | content = json.load(file) 34 | 35 | annotations_df = pd.DataFrame(content['annotations']).set_index('id') 36 | images_df = pd.DataFrame(content['images']).set_index('id') 37 | categories_df = pd.DataFrame(content['categories']).set_index('id') 38 | columns = ['area', 'segmentation', 'height', 'width', 'supercategory'] 39 | 40 | df = pd.DataFrame( 41 | annotations_df. 42 | join(images_df, on='image_id'). 43 | join(categories_df, on='category_id'). 44 | drop(columns=columns) 45 | ).rename(columns={'name': 'category_name'}) 46 | 47 | dataset = df.loc[df.ignore != 1].reset_index(drop=True) 48 | dataset['bbox'] = dataset.bbox.map(from_voc) 49 | categories = df.category_name.unique() 50 | id2cat = {i: c for i, c in enumerate(categories, 1)} 51 | cat2id = {c: i for i, c in enumerate(categories, 1)} 52 | 53 | samples = [] 54 | for file_name, group in dataset.groupby('file_name'): 55 | boxes = list(chain.from_iterable(group.bbox)) 56 | classes = [cat2id[name] for name in group.category_name] 57 | samples.append((file_name, boxes, classes)) 58 | 59 | df = pd.DataFrame(samples, columns=['file_name', 'boxes', 'classes']) 60 | df['boxes'] = df.boxes.map(np.array) 61 | df['classes'] = df.classes.map(np.array) 62 | 63 | self.id2cat = id2cat 64 | self.cat2id = cat2id 65 | self._dataset = df 66 | 67 | def __getitem__(self, index): 68 | """ 69 | Note that index could be a single integer, or a batch of indexes. 70 | """ 71 | batch = self._dataset.loc[index] 72 | images, boxes = [], [] 73 | for sample in batch.itertuples(): 74 | path = self.images_path / sample.file_name 75 | np_image, box = read_sample(path, sample.boxes, size=self.size) 76 | images.append(self.transform(np_image)) 77 | boxes.append(box) 78 | 79 | boxes = pad(boxes) 80 | classes = pad(batch.classes.values) 81 | tensors = torch.stack(images), t(boxes), t(classes) 82 | x, y1, y2 = [tensor.to(self.device) for tensor in tensors] 83 | return x, (y1, y2) 84 | 85 | def __len__(self): 86 | return len(self._dataset) 87 | 88 | 89 | class VOCDataLoader: 90 | 91 | def __init__(self, dataset, batch_size=1, shuffle=False, drop_last=False, 92 | num_workers=0): 93 | 94 | self.dataset = dataset 95 | self.batch_size = batch_size 96 | self.shuffle = shuffle 97 | self.drop_last = drop_last 98 | self.num_workers = num_workers 99 | self.batch_sampler = self._get_sampler() 100 | 101 | def __len__(self): 102 | return len(self.batch_sampler) 103 | 104 | def __iter__(self): 105 | iterator = iter(self.batch_sampler) 106 | if self.num_workers > 0: 107 | n = self.num_workers * 10 108 | chunks = islice(iterator, 0, n) 109 | with ThreadPoolExecutor(self.num_workers) as executor: 110 | yield from executor.map(self._get_batch, chunks) 111 | else: 112 | for indexes in iterator: 113 | yield self._get_batch(indexes) 114 | 115 | def _get_sampler(self): 116 | sampler_class = RandomSampler if self.shuffle else SequentialSampler 117 | sampler = sampler_class(self.dataset) 118 | batch_sampler = BatchSampler(sampler, self.batch_size, self.drop_last) 119 | return batch_sampler 120 | 121 | def _get_batch(self, indexes): 122 | return self.dataset[indexes] 123 | 124 | 125 | 126 | def build_transform(augmentations=None): 127 | transforms_list = augmentations or [] 128 | transforms_list.append(transforms.ToTensor()) 129 | return transforms.Compose(transforms_list) 130 | -------------------------------------------------------------------------------- /text_classification/lyrics.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 64, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "from pathlib import Path\n", 10 | "import warnings\n", 11 | "import pickle\n", 12 | "import types\n", 13 | "warnings.simplefilter('ignore', category=RuntimeWarning)" 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": 117, 19 | "metadata": {}, 20 | "outputs": [], 21 | "source": [ 22 | "import torch\n", 23 | "import numpy as np\n", 24 | "from fastai.text import RNNLearner\n", 25 | "from fastai.text import TextDataset, TextClasDataBunch, convert_weights, Vocab\n", 26 | "from main_imdb import create_or_restore" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": 26, 32 | "metadata": {}, 33 | "outputs": [], 34 | "source": [ 35 | "DATA_ROOT = Path.home() / 'data'\n", 36 | "IMDB_PATH = DATA_ROOT / 'aclImdb'\n", 37 | "LM_PATH = IMDB_PATH / 'lm'\n", 38 | "TRAIN_PATH = DATA_ROOT / 'train'\n", 39 | "TEST_PATH = DATA_ROOT / 'test'\n", 40 | "\n", 41 | "LYRICS_PATH = DATA_ROOT / 'azlyrics' / 'prepared'\n", 42 | "CLASSIFY_PATH = IMDB_PATH / 'cls'" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": 72, 48 | "metadata": {}, 49 | "outputs": [ 50 | { 51 | "name": "stdout", 52 | "output_type": "stream", 53 | "text": [ 54 | "Loading data from /home/ck/data/aclImdb/datasets\n", 55 | "train_sup vocab size: 33069\n", 56 | "test_unsup vocab size: 54255\n", 57 | "test_sup vocab size: 33069\n", 58 | "train_unsup vocab size: 54255\n" 59 | ] 60 | } 61 | ], 62 | "source": [ 63 | "imdb_datasets = create_or_restore(DATA_ROOT)\n", 64 | "train_itos = imdb_datasets['train_unsup'].vocab.itos\n", 65 | "\n", 66 | "with open('itos.pkl', 'wb') as file:\n", 67 | " pickle.dump(train_itos, file)\n", 68 | "\n", 69 | "vocab = Vocab(Path.cwd())\n", 70 | "train_ds = TextDataset.from_folder(LYRICS_PATH, vocab=vocab, name='train')\n", 71 | "valid_ds = TextDataset.from_folder(LYRICS_PATH, vocab=train_ds.vocab, name='test')\n", 72 | "bunch = TextClasDataBunch.create([train_ds, valid_ds], path=CLASSIFY_PATH)" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": 73, 78 | "metadata": {}, 79 | "outputs": [], 80 | "source": [ 81 | "learner = RNNLearner.classifier(bunch, path=LM_PATH)" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": 74, 87 | "metadata": {}, 88 | "outputs": [], 89 | "source": [ 90 | "learner.load_encoder('lm_final_enc')" 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": 119, 96 | "metadata": {}, 97 | "outputs": [ 98 | { 99 | "name": "stdout", 100 | "output_type": "stream", 101 | "text": [ 102 | "epoch train loss valid loss accuracy\n", 103 | "0 1.665988 1.859067 0.366013\n", 104 | "1 1.564737 1.876938 0.356209\n", 105 | "2 1.524006 1.877863 0.349673\n", 106 | "3 1.535829 1.894559 0.310458\n", 107 | "4 1.556463 1.926206 0.316993\n", 108 | "5 1.530345 1.904533 0.316993\n", 109 | "6 1.473818 1.867618 0.349673\n", 110 | "7 1.471133 1.892054 0.362745\n", 111 | "8 1.496333 1.935839 0.330065\n", 112 | "9 1.453613 1.837729 0.411765\n" 113 | ] 114 | } 115 | ], 116 | "source": [ 117 | "learner.fit(10)" 118 | ] 119 | }, 120 | { 121 | "cell_type": "code", 122 | "execution_count": 127, 123 | "metadata": {}, 124 | "outputs": [], 125 | "source": [ 126 | "_, preds = learner.get_preds()" 127 | ] 128 | }, 129 | { 130 | "cell_type": "code", 131 | "execution_count": 128, 132 | "metadata": {}, 133 | "outputs": [ 134 | { 135 | "data": { 136 | "text/plain": [ 137 | "0.14052287581699346" 138 | ] 139 | }, 140 | "execution_count": 128, 141 | "metadata": {}, 142 | "output_type": "execute_result" 143 | } 144 | ], 145 | "source": [ 146 | "y_hat = preds.cpu().numpy()\n", 147 | "y_true = valid_ds.labels\n", 148 | "np.mean(y_hat == y_true)" 149 | ] 150 | } 151 | ], 152 | "metadata": { 153 | "kernelspec": { 154 | "display_name": "Python 3", 155 | "language": "python", 156 | "name": "python3" 157 | }, 158 | "language_info": { 159 | "codemirror_mode": { 160 | "name": "ipython", 161 | "version": 3 162 | }, 163 | "file_extension": ".py", 164 | "mimetype": "text/x-python", 165 | "name": "python", 166 | "nbconvert_exporter": "python", 167 | "pygments_lexer": "ipython3", 168 | "version": "3.7.0" 169 | } 170 | }, 171 | "nbformat": 4, 172 | "nbformat_minor": 2 173 | } 174 | -------------------------------------------------------------------------------- /text_classification/.ipynb_checkpoints/lyrics-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 64, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "from pathlib import Path\n", 10 | "import warnings\n", 11 | "import pickle\n", 12 | "import types\n", 13 | "warnings.simplefilter('ignore', category=RuntimeWarning)" 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": 117, 19 | "metadata": {}, 20 | "outputs": [], 21 | "source": [ 22 | "import torch\n", 23 | "import numpy as np\n", 24 | "from fastai.text import RNNLearner\n", 25 | "from fastai.text import TextDataset, TextClasDataBunch, convert_weights, Vocab\n", 26 | "from main_imdb import create_or_restore" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": 26, 32 | "metadata": {}, 33 | "outputs": [], 34 | "source": [ 35 | "DATA_ROOT = Path.home() / 'data'\n", 36 | "IMDB_PATH = DATA_ROOT / 'aclImdb'\n", 37 | "LM_PATH = IMDB_PATH / 'lm'\n", 38 | "TRAIN_PATH = DATA_ROOT / 'train'\n", 39 | "TEST_PATH = DATA_ROOT / 'test'\n", 40 | "\n", 41 | "LYRICS_PATH = DATA_ROOT / 'azlyrics' / 'prepared'\n", 42 | "CLASSIFY_PATH = IMDB_PATH / 'cls'" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": 72, 48 | "metadata": {}, 49 | "outputs": [ 50 | { 51 | "name": "stdout", 52 | "output_type": "stream", 53 | "text": [ 54 | "Loading data from /home/ck/data/aclImdb/datasets\n", 55 | "train_sup vocab size: 33069\n", 56 | "test_unsup vocab size: 54255\n", 57 | "test_sup vocab size: 33069\n", 58 | "train_unsup vocab size: 54255\n" 59 | ] 60 | } 61 | ], 62 | "source": [ 63 | "imdb_datasets = create_or_restore(DATA_ROOT)\n", 64 | "train_itos = imdb_datasets['train_unsup'].vocab.itos\n", 65 | "\n", 66 | "with open('itos.pkl', 'wb') as file:\n", 67 | " pickle.dump(train_itos, file)\n", 68 | "\n", 69 | "vocab = Vocab(Path.cwd())\n", 70 | "train_ds = TextDataset.from_folder(LYRICS_PATH, vocab=vocab, name='train')\n", 71 | "valid_ds = TextDataset.from_folder(LYRICS_PATH, vocab=train_ds.vocab, name='test')\n", 72 | "bunch = TextClasDataBunch.create([train_ds, valid_ds], path=CLASSIFY_PATH)" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": 73, 78 | "metadata": {}, 79 | "outputs": [], 80 | "source": [ 81 | "learner = RNNLearner.classifier(bunch, path=LM_PATH)" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": 74, 87 | "metadata": {}, 88 | "outputs": [], 89 | "source": [ 90 | "learner.load_encoder('lm_final_enc')" 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": 119, 96 | "metadata": {}, 97 | "outputs": [ 98 | { 99 | "name": "stdout", 100 | "output_type": "stream", 101 | "text": [ 102 | "epoch train loss valid loss accuracy\n", 103 | "0 1.665988 1.859067 0.366013\n", 104 | "1 1.564737 1.876938 0.356209\n", 105 | "2 1.524006 1.877863 0.349673\n", 106 | "3 1.535829 1.894559 0.310458\n", 107 | "4 1.556463 1.926206 0.316993\n", 108 | "5 1.530345 1.904533 0.316993\n", 109 | "6 1.473818 1.867618 0.349673\n", 110 | "7 1.471133 1.892054 0.362745\n", 111 | "8 1.496333 1.935839 0.330065\n", 112 | "9 1.453613 1.837729 0.411765\n" 113 | ] 114 | } 115 | ], 116 | "source": [ 117 | "learner.fit(10)" 118 | ] 119 | }, 120 | { 121 | "cell_type": "code", 122 | "execution_count": 127, 123 | "metadata": {}, 124 | "outputs": [], 125 | "source": [ 126 | "_, preds = learner.get_preds()" 127 | ] 128 | }, 129 | { 130 | "cell_type": "code", 131 | "execution_count": 128, 132 | "metadata": {}, 133 | "outputs": [ 134 | { 135 | "data": { 136 | "text/plain": [ 137 | "0.14052287581699346" 138 | ] 139 | }, 140 | "execution_count": 128, 141 | "metadata": {}, 142 | "output_type": "execute_result" 143 | } 144 | ], 145 | "source": [ 146 | "y_hat = preds.cpu().numpy()\n", 147 | "y_true = valid_ds.labels\n", 148 | "np.mean(y_hat == y_true)" 149 | ] 150 | } 151 | ], 152 | "metadata": { 153 | "kernelspec": { 154 | "display_name": "Python 3", 155 | "language": "python", 156 | "name": "python3" 157 | }, 158 | "language_info": { 159 | "codemirror_mode": { 160 | "name": "ipython", 161 | "version": 3 162 | }, 163 | "file_extension": ".py", 164 | "mimetype": "text/x-python", 165 | "name": "python", 166 | "nbconvert_exporter": "python", 167 | "pygments_lexer": "ipython3", 168 | "version": "3.7.0" 169 | } 170 | }, 171 | "nbformat": 4, 172 | "nbformat_minor": 2 173 | } 174 | -------------------------------------------------------------------------------- /text_classification/dataset.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import pickle 3 | import tarfile 4 | from typing import Callable 5 | 6 | import requests 7 | from torch.utils.data import Dataset 8 | 9 | from utils import is_empty 10 | 11 | 12 | class IMDB(Dataset): 13 | """Represents the IMDB movie reviews dataset. 14 | 15 | The dataset contains 50000 supervised, and 50000 unsupervised movie reviews 16 | with positive and negative sentiment ratings. The supervised subset of data 17 | is separated into two equally sized sets, with 12500 instances per class. 18 | 19 | The two flags, `supervised` and `train` define which subset of the data 20 | we're going to load. There are four possible cases: 21 | 22 | +-------+------------+--------+-------+---------+ 23 | | Train | Supervised | Folder | Size | Labels? | 24 | +-------+------------+--------+-------+---------+ 25 | | True | True | train | 25000 | Yes | 26 | | False | True | test | 25000 | Yes | 27 | | True | False | train | 75000 | No | 28 | | False | False | test | 25000 | No | 29 | +-------+------------+--------+-------+---------+ 30 | """ 31 | url = 'http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz' 32 | archive_size = 84125825 33 | classes = ('pos', 'neg', 'unsup') 34 | 35 | def __init__(self, root: Path, train: bool=True, supervised: bool=False, 36 | tokenizer=None, vocab=None, make_vocab: Callable=None, 37 | download: bool=True): 38 | """ 39 | Args: 40 | root: Path to the folder with train and tests subfolders. 41 | supervised: If True, then the data from supervised subset is loaded. 42 | train: If True, then the data from training subset is loaded. 43 | vocab: Dataset vocab used to convert tokens into digits. 44 | make_vocab: Callable creating vocab from tokens. Note that this 45 | parameter should be provided in case if `vocab` doesn't present. 46 | 47 | """ 48 | assert vocab or make_vocab, 'Nor vocabulary, not function provided' 49 | 50 | self.root = root 51 | self.train = train 52 | self.supervised = supervised 53 | 54 | subfolder = root / 'aclImdb' / ('train' if train else 'test') 55 | if is_empty(subfolder): 56 | if not download: 57 | raise FileNotFoundError( 58 | 'Required files not found! Check if folder with IMDB data exists') 59 | self.download(root) 60 | 61 | if tokenizer is None: 62 | tokenizer = identity 63 | 64 | if supervised: 65 | texts, labels = [], [] 66 | for index, label in enumerate(self.classes): 67 | if label == 'unsup': 68 | continue 69 | for filename in (subfolder/label).glob('*.txt'): 70 | texts.append(filename.open('r').read()) 71 | labels.append(index) 72 | if train: 73 | self.train_labels = labels 74 | else: 75 | self.test_labels = labels 76 | 77 | else: 78 | texts = [] 79 | for label in self.classes: 80 | files_folder = subfolder/label 81 | for filename in files_folder.glob('*.txt'): 82 | texts.append(filename.open('r').read()) 83 | 84 | tokens = tokenizer(texts) 85 | if make_vocab: 86 | vocab = make_vocab(tokens) 87 | num_tokens = vocab.numericalize(tokens) 88 | 89 | self.vocab = vocab 90 | if train: 91 | self.train_data = num_tokens 92 | else: 93 | self.test_data = num_tokens 94 | 95 | def __getitem__(self, index): 96 | if self.train and self.supervised: 97 | return self.train_data[index], self.train_labels[index] 98 | elif self.train and not self.supervised: 99 | return self.train_data[index] 100 | elif not self.train and self.supervised: 101 | return self.test_data[index], self.test_labels[index] 102 | else: 103 | return self.test_data[index] 104 | 105 | def __len__(self): 106 | return len(self.train_data if self.train else self.test_data) 107 | 108 | def save(self, path: Path): 109 | with path.open('wb') as file: 110 | pickle.dump(self, file) 111 | 112 | @property 113 | def ids(self): 114 | if self.train: 115 | return self.train_data 116 | return self.test_data 117 | 118 | @property 119 | def vocab_size(self): 120 | return self.vocab.size 121 | 122 | @staticmethod 123 | def load(path: Path): 124 | with path.open('rb') as file: 125 | dataset = pickle.load(file) 126 | return dataset 127 | 128 | @staticmethod 129 | def download(path: Path): 130 | archive = path / 'imdb.tag.gz' 131 | 132 | if not archive.exists(): 133 | req = requests.get(IMDB.url) 134 | req.raise_for_status() 135 | assert len(req.content) == IMDB.archive_size, 'Downloading failure!' 136 | with archive.open('wb') as file: 137 | file.write(req.content) 138 | 139 | with tarfile.open(archive) as arch: 140 | arch.extractall(path) 141 | 142 | 143 | 144 | def identity(x): 145 | return x 146 | -------------------------------------------------------------------------------- /cnn_dataset.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from multiprocessing import cpu_count 3 | 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | 7 | import torch 8 | from torch import nn 9 | from torch import optim 10 | from torch.nn import functional as F 11 | from torch.utils.data import DataLoader 12 | from torchvision import transforms, utils 13 | from torchvision.datasets import ImageFolder 14 | 15 | from core.loop import Loop 16 | from core.schedule import CosineAnnealingLR 17 | from core.metrics import accuracy 18 | from core.callbacks import ( 19 | Logger, History, EarlyStopping, CSVLogger, Checkpoint) 20 | 21 | 22 | DEVICE = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') 23 | MEAN = np.array([0.4914, 0.48216, 0.44653]) 24 | STD = np.array([0.24703, 0.24349, 0.26159]) 25 | 26 | 27 | def conv3x3(ni, nf, stride=1, padding=1): 28 | return nn.Conv2d(ni, nf, kernel_size=3, stride=stride, padding=padding, 29 | bias=False) 30 | 31 | 32 | class IdentityBlock(nn.Module): 33 | 34 | def __init__(self, ni, nf=None, stride=1): 35 | super().__init__() 36 | 37 | nf = ni if nf is None else nf 38 | self.conv1 = conv3x3(ni, nf, stride=stride) 39 | self.bn1 = nn.BatchNorm2d(nf) 40 | self.conv2 = conv3x3(nf, nf) 41 | self.bn2 = nn.BatchNorm2d(nf) 42 | 43 | if ni != nf: 44 | self.downsample = nn.Sequential( 45 | nn.Conv2d(ni, nf, kernel_size=1, stride=stride, bias=False), 46 | nn.BatchNorm2d(nf)) 47 | 48 | def forward(self, x): 49 | shortcut = x 50 | 51 | out = self.conv1(x) 52 | out = self.bn1(out) 53 | out = F.leaky_relu(out) 54 | 55 | out = self.conv2(out) 56 | out = self.bn2(out) 57 | 58 | if hasattr(self, 'downsample'): 59 | shortcut = self.downsample(x) 60 | 61 | out += shortcut 62 | out = F.leaky_relu(out) 63 | 64 | return out 65 | 66 | 67 | class CustomResNet(nn.Module): 68 | 69 | def __init__(self): 70 | super().__init__() 71 | self.conv = nn.Conv2d(3, 10, kernel_size=5, stride=1, padding=2) 72 | self.block1 = IdentityBlock(10, 20, stride=2) 73 | self.block2 = IdentityBlock(20, 40, stride=2) 74 | self.block3 = IdentityBlock(40, 80, stride=2) 75 | self.block4 = IdentityBlock(80, 160, stride=2) 76 | self.avgpool = nn.AdaptiveAvgPool2d(1) 77 | self.fc = nn.Linear(160, 10) 78 | self.init() 79 | 80 | def forward(self, x): 81 | x = self.conv(x) 82 | x = self.block1(x) 83 | x = self.block2(x) 84 | x = self.block3(x) 85 | x = self.block4(x) 86 | x = self.avgpool(x) 87 | x = x.view(x.size(0), -1) 88 | x = self.fc(x) 89 | return x 90 | 91 | def init(self): 92 | for m in self.modules(): 93 | if isinstance(m, nn.Conv2d): 94 | nn.init.kaiming_normal_(m.weight, mode='fan_out') 95 | elif isinstance(m, nn.BatchNorm2d): 96 | nn.init.constant_(m.weight, 1) 97 | nn.init.constant_(m.bias, 0) 98 | 99 | 100 | def pairs(xs): 101 | current, *rest = xs 102 | for item in rest: 103 | yield current, item 104 | current = item 105 | 106 | 107 | def imshow(image, title=None): 108 | img = image.numpy().transpose((1, 2, 0)) 109 | img = STD*img + MEAN 110 | img = np.clip(img, 0, 1) 111 | plt.imshow(img) 112 | if title is not None: 113 | plt.title(title) 114 | plt.pause(0.001) 115 | 116 | 117 | 118 | def main(): 119 | root = Path.home() / 'data' / 'cifar10' 120 | 121 | data_transforms = { 122 | 'train': transforms.Compose([ 123 | transforms.RandomCrop(32, padding=4), 124 | transforms.RandomHorizontalFlip(), 125 | transforms.ToTensor(), 126 | transforms.Normalize(mean=MEAN, std=STD) 127 | ]), 128 | 'valid': transforms.Compose([ 129 | transforms.ToTensor(), 130 | transforms.Normalize(mean=MEAN, std=STD) 131 | ]) 132 | } 133 | 134 | datasets, loaders, dataset_sizes = {}, {}, {} 135 | for name in ('train', 'valid'): 136 | dataset = ImageFolder(root/name, data_transforms[name]) 137 | training = name == 'train' 138 | datasets[name] = dataset 139 | loaders[name] = DataLoader( 140 | dataset=dataset, batch_size=256, 141 | shuffle=training, num_workers=cpu_count()) 142 | dataset_sizes[name] = len(dataset) 143 | 144 | n = len(datasets['train']) 145 | 146 | model = CustomResNet() 147 | optimizer = optim.Adam(model.parameters(), lr=1e-2, weight_decay=1e-5) 148 | schedule = CosineAnnealingLR(optimizer, t_max=n, eta_min=1e-5, cycle_mult=2) 149 | loop = Loop(model, optimizer, schedule, device=DEVICE) 150 | 151 | callbacks = [ 152 | History(), CSVLogger(), Logger(), 153 | EarlyStopping(patience=50), Checkpoint()] 154 | 155 | loop.run( 156 | train_data=loaders['train'], 157 | valid_data=loaders['valid'], 158 | callbacks=callbacks, 159 | loss_fn=F.cross_entropy, 160 | metrics=[accuracy], 161 | epochs=150) 162 | 163 | dataset = datasets['valid'] 164 | loader = DataLoader(dataset=dataset, batch_size=8, shuffle=True) 165 | x, y = next(iter(loader)) 166 | state = torch.load(loop['Checkpoint'].best_model) 167 | model.load_state_dict(state) 168 | predictions = model(x.cuda()) 169 | labels = predictions.argmax(dim=1) 170 | verbose = [dataset.classes[name] for name in labels] 171 | imshow(utils.make_grid(x), title=verbose) 172 | 173 | 174 | if __name__ == '__main__': 175 | main() 176 | -------------------------------------------------------------------------------- /.ipynb_checkpoints/rnn-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "from os.path import join, expanduser, exists\n", 10 | "from urllib.error import URLError\n", 11 | "from urllib.request import urlopen" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": null, 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "import numpy as np" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": null, 26 | "metadata": {}, 27 | "outputs": [], 28 | "source": [ 29 | "import torch\n", 30 | "from torch import nn\n", 31 | "from torch import optim\n", 32 | "from torch.nn import functional as F\n", 33 | "from torchtext import vocab, data" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": null, 39 | "metadata": {}, 40 | "outputs": [], 41 | "source": [ 42 | "PATH = expanduser(join('~', 'data', 'fastai', 'nietzsche', 'nietzsche.txt'))" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": null, 48 | "metadata": {}, 49 | "outputs": [], 50 | "source": [ 51 | "def set_random_seed(state=1):\n", 52 | " gens = (np.random.seed, torch.manual_seed, torch.cuda.manual_seed)\n", 53 | " for set_state in gens:\n", 54 | " set_state(state)" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": null, 60 | "metadata": {}, 61 | "outputs": [], 62 | "source": [ 63 | "RANDOM_STATE = 1\n", 64 | "set_random_seed(RANDOM_STATE)" 65 | ] 66 | }, 67 | { 68 | "cell_type": "markdown", 69 | "metadata": {}, 70 | "source": [ 71 | "## Dataset Downloading" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": null, 77 | "metadata": {}, 78 | "outputs": [], 79 | "source": [ 80 | "def download(url, download_path, expected_size):\n", 81 | " if exists(download_path):\n", 82 | " print('The file was already downloaded')\n", 83 | " return\n", 84 | " \n", 85 | " try:\n", 86 | " r = urlopen(url)\n", 87 | " except URLError as e:\n", 88 | " print(f'Cannot download the data. Error: {e}')\n", 89 | " return\n", 90 | " \n", 91 | " if r.status != 200:\n", 92 | " print(f'HTTP Error: {r.status}')\n", 93 | " return\n", 94 | " \n", 95 | " data = r.read()\n", 96 | " if len(data) != expected_size:\n", 97 | " print(f'Invalid downloaded array size: {len(data)}')\n", 98 | " return\n", 99 | " \n", 100 | " text = data.decode(encoding='utf-8')\n", 101 | " with open(download_path, 'w') as file:\n", 102 | " file.write(text)\n", 103 | " \n", 104 | " print(f'Downloaded: {download_path}')" 105 | ] 106 | }, 107 | { 108 | "cell_type": "code", 109 | "execution_count": null, 110 | "metadata": {}, 111 | "outputs": [], 112 | "source": [ 113 | "URL = 'https://s3.amazonaws.com/text-datasets/nietzsche.txt'" 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": null, 119 | "metadata": {}, 120 | "outputs": [], 121 | "source": [ 122 | "download(URL, PATH, 600901)" 123 | ] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "execution_count": null, 128 | "metadata": {}, 129 | "outputs": [], 130 | "source": [ 131 | "def split(path, train_size=0.8):\n", 132 | " with open(path) as file:\n", 133 | " content = file.read()\n", 134 | " n = int(len(content) * train_size)\n", 135 | " return content[:n], content[n:]" 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": null, 141 | "metadata": {}, 142 | "outputs": [], 143 | "source": [ 144 | "train_text, valid_text = split(PATH)\n", 145 | "print(len(train_text))\n", 146 | "print(len(valid_text))" 147 | ] 148 | }, 149 | { 150 | "cell_type": "code", 151 | "execution_count": null, 152 | "metadata": {}, 153 | "outputs": [], 154 | "source": [ 155 | "text = train_text + valid_text\n", 156 | "chars = sorted(list(set(text)))\n", 157 | "vocab_size = len(chars) + 1\n", 158 | "print(f'Vocab size: {vocab_size}')" 159 | ] 160 | }, 161 | { 162 | "cell_type": "code", 163 | "execution_count": null, 164 | "metadata": {}, 165 | "outputs": [], 166 | "source": [ 167 | "chars.insert(0, '\\0')" 168 | ] 169 | }, 170 | { 171 | "cell_type": "code", 172 | "execution_count": null, 173 | "metadata": {}, 174 | "outputs": [], 175 | "source": [ 176 | "char_to_index = {c: i for i, c in enumerate(chars)}\n", 177 | "index_to_char = {i: c for i, c in enumerate(chars)}\n", 178 | "indicies = [char_to_index[char] for char in text]" 179 | ] 180 | }, 181 | { 182 | "cell_type": "markdown", 183 | "metadata": {}, 184 | "source": [ 185 | "## Dataset Preparation" 186 | ] 187 | }, 188 | { 189 | "cell_type": "code", 190 | "execution_count": null, 191 | "metadata": {}, 192 | "outputs": [], 193 | "source": [ 194 | "cs = 8" 195 | ] 196 | } 197 | ], 198 | "metadata": { 199 | "kernelspec": { 200 | "display_name": "Python 3", 201 | "language": "python", 202 | "name": "python3" 203 | }, 204 | "language_info": { 205 | "codemirror_mode": { 206 | "name": "ipython", 207 | "version": 3 208 | }, 209 | "file_extension": ".py", 210 | "mimetype": "text/x-python", 211 | "name": "python", 212 | "nbconvert_exporter": "python", 213 | "pygments_lexer": "ipython3", 214 | "version": "3.6.4" 215 | } 216 | }, 217 | "nbformat": 4, 218 | "nbformat_minor": 2 219 | } 220 | -------------------------------------------------------------------------------- /ssd/utils.py: -------------------------------------------------------------------------------- 1 | from os.path import exists, isdir 2 | from urllib.request import urlopen 3 | from collections import defaultdict 4 | 5 | import torch 6 | import cv2 as cv 7 | import numpy as np 8 | 9 | 10 | def parse_annotations(arr): 11 | """ 12 | Parses Pascal VOC dataset annotations into format suitable for next 13 | processing steps. 14 | """ 15 | parsed = defaultdict(list) 16 | for annot in arr: 17 | if annot['ignore']: 18 | continue 19 | bbox_raw = annot['bbox'] 20 | bbox_hw = from_voc(bbox_raw) 21 | parsed[annot['image_id']].append((bbox_hw, annot['category_id'])) 22 | return dict(parsed) 23 | 24 | 25 | def from_voc(bbox): 26 | """ 27 | Converts Pascal bounding box from VOC into NumPy format. 28 | 29 | The original bounding boxes are represented as (x, y, w, h) tuples. The 30 | function converts these bounding boxes into (top, left, bottom, right) 31 | tuples, and switches x/y coordinates to make a converted array indexes 32 | consistent with Numpy. 33 | """ 34 | x, y, w, h = bbox 35 | new_box = [y, x, y + h - 1, x + w - 1] 36 | return new_box 37 | 38 | 39 | def to_voc(bbox): 40 | """ 41 | Converts NumPy bounding boxes back into VOC format. 42 | 43 | The function performs an inverse transformation of the transformation 44 | performed with `from_voc` function. 45 | """ 46 | top, left, bottom, right = bbox 47 | new_box = [left, top, right - left + 1, bottom - top + 1] 48 | return new_box 49 | 50 | 51 | def open_image(path): 52 | """ 53 | Opens an image using OpenCV given the file path. 54 | 55 | Args: 56 | path: A local file path or URL of the image. 57 | 58 | Return: 59 | image: The image in RGB format normalized to range between 0.0 - 1.0 60 | 61 | """ 62 | flags = cv.IMREAD_UNCHANGED + cv.IMREAD_ANYDEPTH + cv.IMREAD_ANYCOLOR 63 | is_url = str(path).startswith('http') 64 | if not exists(path) and not is_url: 65 | raise OSError(f'No such file or directory: {path}') 66 | elif isdir(path) and not is_url: 67 | raise OSError(f'Is a directory: {path}') 68 | else: 69 | try: 70 | if is_url: 71 | r = urlopen(str(path)) 72 | arr = np.asarray(bytearray(r.read()), dtype='uint8') 73 | image = cv.imdecode(arr, flags) 74 | else: 75 | image = cv.imread(str(path), flags) 76 | image = image.astype(np.float32)/255 77 | if image is None: 78 | raise OSError(f'File is not recognized by OpenCV: {path}') 79 | except Exception as e: 80 | raise OSError(f'Error handling image at: {path}') from e 81 | return cv.cvtColor(image, cv.COLOR_BGR2RGB) 82 | 83 | 84 | def read_sample(path, boxes, size=None): 85 | """ 86 | Args: 87 | path: A local file path or URL of the image. 88 | boxes: An array with bounding boxes. 89 | size: An optional tuple or integer with the size used to rescale the 90 | read image. The image is rescaled without keeping aspect ratio. 91 | 92 | """ 93 | image = open_image(path) 94 | old_size = image.shape[:2] 95 | if size is not None: 96 | size = (size, size) if isinstance(size, int) else tuple(size) 97 | image = cv.resize(image, size) 98 | new_size = image.shape[:2] 99 | if old_size != new_size: 100 | old_boxes = np.array(boxes) 101 | new_boxes = np.zeros_like(old_boxes) 102 | for i, box in enumerate(old_boxes.reshape(-1, 4)): 103 | box = resize_box(box, old_size, new_size) 104 | new_boxes[i*4:(i + 1)*4] = box 105 | boxes = new_boxes 106 | return image, boxes 107 | 108 | 109 | def resize_box(box, old_size, new_size): 110 | y1, x1, y2, x2 = box 111 | old_h, old_w = old_size 112 | new_h, new_w = new_size 113 | h_ratio = new_h / float(old_h) 114 | w_ratio = new_w / float(old_w) 115 | new_box = [y1*h_ratio, x1*w_ratio, y2*h_ratio, x2*w_ratio] 116 | return new_box 117 | 118 | 119 | def pad(arr, pad_value=0): 120 | longest = len(max(arr, key=len)) 121 | padded = np.zeros((len(arr), longest), dtype=arr[0].dtype) 122 | for row, vec in enumerate(arr): 123 | n = len(vec) 124 | for i in range(longest): 125 | col = longest - i - 1 126 | padded[row, col] = pad_value if i >= n else vec[n - i - 1] 127 | return padded 128 | 129 | 130 | def valid_box(vec): 131 | return np.count_nonzero(vec) >= 2 132 | 133 | 134 | def t(obj, **kwargs): 135 | return torch.tensor(obj, **kwargs) 136 | 137 | 138 | def to_np(*tensors): 139 | return [tensor.cpu().numpy() for tensor in tensors] 140 | 141 | 142 | def hw2corners(centers, hw): 143 | """Converts an array of rectangles from (cx, cy, height, width) 144 | representations into (top, left, bottom, right) corners representation. 145 | """ 146 | return torch.cat([centers - hw/2, centers + hw/2], dim=1) 147 | 148 | 149 | def jaccard(a, b): 150 | intersection = intersect(a, b) 151 | union = area(a).unsqueeze(1) + area(b).unsqueeze(0) - intersection 152 | return intersection / union 153 | 154 | 155 | def intersect(a, b): 156 | bottom_right = torch.min(a[:, None, 2:], b[None, :, 2:]) 157 | top_left = torch.max(a[:, None, :2], b[None, :, :2]) 158 | inter = torch.clamp((bottom_right - top_left), min=0) 159 | return torch.prod(inter, dim=2) 160 | 161 | 162 | def area(box): 163 | h = box[:, 2] - box[:, 0] 164 | w = box[:, 3] - box[:, 1] 165 | return h * w 166 | 167 | 168 | def make_grid(anchors=4, k=1): 169 | offset = 1/(anchors*2) 170 | points = np.linspace(offset, 1 - offset, anchors) 171 | anchors_x = np.repeat(points, anchors) 172 | anchors_y = np.tile(points, anchors) 173 | centers = np.stack([anchors_x, anchors_y], axis=1) 174 | sizes = np.array([(1/anchors, 1/anchors) for _ in range(anchors*anchors)]) 175 | grid = np.c_[np.tile(centers, (k, 1)), np.tile(sizes, (k, 1))] 176 | return grid -------------------------------------------------------------------------------- /rnn/rnn.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "from os.path import join, expanduser, exists\n", 10 | "from urllib.error import URLError\n", 11 | "from urllib.request import urlopen" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 2, 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "import numpy as np" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 3, 26 | "metadata": {}, 27 | "outputs": [], 28 | "source": [ 29 | "import torch\n", 30 | "from torch import nn\n", 31 | "from torch import optim\n", 32 | "from torch.nn import functional as F\n", 33 | "from torchtext import vocab, data" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": 4, 39 | "metadata": {}, 40 | "outputs": [], 41 | "source": [ 42 | "PATH = expanduser(join('~', 'data', 'fastai', 'nietzsche', 'nietzsche.txt'))" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": 5, 48 | "metadata": {}, 49 | "outputs": [], 50 | "source": [ 51 | "def set_random_seed(state=1):\n", 52 | " gens = (np.random.seed, torch.manual_seed, torch.cuda.manual_seed)\n", 53 | " for set_state in gens:\n", 54 | " set_state(state)" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": 6, 60 | "metadata": {}, 61 | "outputs": [], 62 | "source": [ 63 | "RANDOM_STATE = 1\n", 64 | "set_random_seed(RANDOM_STATE)" 65 | ] 66 | }, 67 | { 68 | "cell_type": "markdown", 69 | "metadata": {}, 70 | "source": [ 71 | "## Dataset Downloading" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": 7, 77 | "metadata": {}, 78 | "outputs": [], 79 | "source": [ 80 | "def download(url, download_path, expected_size):\n", 81 | " if exists(download_path):\n", 82 | " print('The file was already downloaded')\n", 83 | " return\n", 84 | " \n", 85 | " try:\n", 86 | " r = urlopen(url)\n", 87 | " except URLError as e:\n", 88 | " print(f'Cannot download the data. Error: {e}')\n", 89 | " return\n", 90 | " \n", 91 | " if r.status != 200:\n", 92 | " print(f'HTTP Error: {r.status}')\n", 93 | " return\n", 94 | " \n", 95 | " data = r.read()\n", 96 | " if len(data) != expected_size:\n", 97 | " print(f'Invalid downloaded array size: {len(data)}')\n", 98 | " return\n", 99 | " \n", 100 | " text = data.decode(encoding='utf-8')\n", 101 | " with open(download_path, 'w') as file:\n", 102 | " file.write(text)\n", 103 | " \n", 104 | " print(f'Downloaded: {download_path}')" 105 | ] 106 | }, 107 | { 108 | "cell_type": "code", 109 | "execution_count": 8, 110 | "metadata": {}, 111 | "outputs": [], 112 | "source": [ 113 | "URL = 'https://s3.amazonaws.com/text-datasets/nietzsche.txt'" 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": 9, 119 | "metadata": {}, 120 | "outputs": [ 121 | { 122 | "name": "stdout", 123 | "output_type": "stream", 124 | "text": [ 125 | "The file was already downloaded\n" 126 | ] 127 | } 128 | ], 129 | "source": [ 130 | "download(URL, PATH, 600901)" 131 | ] 132 | }, 133 | { 134 | "cell_type": "code", 135 | "execution_count": 10, 136 | "metadata": {}, 137 | "outputs": [], 138 | "source": [ 139 | "def split(path, train_size=0.8):\n", 140 | " with open(path) as file:\n", 141 | " content = file.read()\n", 142 | " n = int(len(content) * train_size)\n", 143 | " return content[:n], content[n:]" 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "execution_count": 11, 149 | "metadata": {}, 150 | "outputs": [ 151 | { 152 | "name": "stdout", 153 | "output_type": "stream", 154 | "text": [ 155 | "480714\n", 156 | "120179\n" 157 | ] 158 | } 159 | ], 160 | "source": [ 161 | "train_text, valid_text = split(PATH)\n", 162 | "print(len(train_text))\n", 163 | "print(len(valid_text))" 164 | ] 165 | }, 166 | { 167 | "cell_type": "code", 168 | "execution_count": 12, 169 | "metadata": {}, 170 | "outputs": [ 171 | { 172 | "name": "stdout", 173 | "output_type": "stream", 174 | "text": [ 175 | "Vocab size: 85\n" 176 | ] 177 | } 178 | ], 179 | "source": [ 180 | "text = train_text + valid_text\n", 181 | "chars = sorted(list(set(text)))\n", 182 | "vocab_size = len(chars) + 1\n", 183 | "print(f'Vocab size: {vocab_size}')" 184 | ] 185 | }, 186 | { 187 | "cell_type": "code", 188 | "execution_count": 13, 189 | "metadata": {}, 190 | "outputs": [], 191 | "source": [ 192 | "chars.insert(0, '\\0')" 193 | ] 194 | }, 195 | { 196 | "cell_type": "code", 197 | "execution_count": 14, 198 | "metadata": {}, 199 | "outputs": [], 200 | "source": [ 201 | "char_to_index = {c: i for i, c in enumerate(chars)}\n", 202 | "index_to_char = {i: c for i, c in enumerate(chars)}\n", 203 | "train_indicies = [char_to_index[char] for char in train_text]\n", 204 | "valid_indicies = [char_to_index[char] for char in valid_text]" 205 | ] 206 | }, 207 | { 208 | "cell_type": "markdown", 209 | "metadata": {}, 210 | "source": [ 211 | "## Dataset Preparation" 212 | ] 213 | }, 214 | { 215 | "cell_type": "code", 216 | "execution_count": null, 217 | "metadata": {}, 218 | "outputs": [], 219 | "source": [] 220 | } 221 | ], 222 | "metadata": { 223 | "kernelspec": { 224 | "display_name": "Python 3", 225 | "language": "python", 226 | "name": "python3" 227 | }, 228 | "language_info": { 229 | "codemirror_mode": { 230 | "name": "ipython", 231 | "version": 3 232 | }, 233 | "file_extension": ".py", 234 | "mimetype": "text/x-python", 235 | "name": "python", 236 | "nbconvert_exporter": "python", 237 | "pygments_lexer": "ipython3", 238 | "version": "3.6.4" 239 | } 240 | }, 241 | "nbformat": 4, 242 | "nbformat_minor": 2 243 | } 244 | -------------------------------------------------------------------------------- /ssd.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import urllib 4 | from pathlib import Path 5 | from collections import defaultdict 6 | 7 | import cv2 8 | import numpy as np 9 | import matplotlib.pyplot as plt 10 | from PIL import ImageDraw, ImageFont 11 | from matplotlib import patches, patheffects 12 | 13 | import torch 14 | from torch import optim 15 | 16 | from fastai.conv_learner import ConvLearner, resnet34 17 | from fastai.conv_learner import tfms_from_model, CropType 18 | from fastai.dataset import ImageClassifierData, to_np 19 | 20 | 21 | PATH = Path.home()/'data'/'voc2007' 22 | IMAGES, ANNOTATIONS, CATEGORIES = 'images', 'annotations', 'categories' 23 | FILE_NAME, ID, BBOX = 'file_name', 'id', 'bbox' 24 | IMG_ID, CAT_ID = 'image_id', 'category_id' 25 | 26 | 27 | def parse_annotations(arr): 28 | parsed = defaultdict(list) 29 | for annot in arr: 30 | if annot['ignore']: 31 | continue 32 | bbox_raw = annot['bbox'] 33 | bbox_hw = from_voc(bbox_raw) 34 | parsed[annot['image_id']].append((bbox_hw, annot['category_id'])) 35 | return dict(parsed) 36 | 37 | 38 | def from_voc(bbox): 39 | """ 40 | Converts Pascal bounding box from (x, y, w, h) into 41 | (top, left, bottom, right) format, and switch x/y coordinates to 42 | make a converted array indexes consistent with Numpy. 43 | """ 44 | x, y, w, h = bbox 45 | new_box = [y, x, y + h - 1, x + w - 1] 46 | return np.array(new_box) 47 | 48 | 49 | def to_voc(bbox): 50 | top, left, bottom, right = bbox 51 | new_box = [left, top, right - left + 1, bottom - top + 1] 52 | return np.array(new_box) 53 | 54 | 55 | def show_img(im, figsize=None, ax=None): 56 | if not ax: 57 | fig, ax = plt.subplots(figsize=figsize) 58 | ax.imshow(im) 59 | ax.set_xticks(np.linspace(0, 224, 8)) 60 | ax.set_yticks(np.linspace(0, 224, 8)) 61 | ax.grid() 62 | ax.set_xticklabels([]) 63 | ax.set_yticklabels([]) 64 | return ax 65 | 66 | 67 | def draw_outline(obj, lw): 68 | effects = [ 69 | patheffects.Stroke(linewidth=lw, foreground='black'), 70 | patheffects.Normal()] 71 | obj.set_path_effects(effects) 72 | 73 | 74 | def draw_rect(ax, bbox, color='white'): 75 | rect = patches.Rectangle( 76 | bbox[:2], *bbox[-2:], 77 | fill=False, edgecolor=color, lw=2) 78 | patch = ax.add_patch(rect) 79 | draw_outline(patch, 4) 80 | 81 | 82 | def draw_text(ax, xy, text, size=14, color='white'): 83 | text = ax.text( 84 | *xy, text, 85 | va='top', color=color, 86 | fontsize=size, weight='bold') 87 | draw_outline(text, 1) 88 | 89 | 90 | def draw_image(img, ann, categories): 91 | ax = show_img(img, figsize=(8, 6)) 92 | for bbox, cat in ann: 93 | bbox = to_voc(bbox) 94 | draw_rect(ax, bbox) 95 | draw_text(ax, bbox[:2], categories[cat], size=16) 96 | 97 | 98 | class Drawer: 99 | 100 | def __init__(self, root, annotations, files, categories): 101 | self.root = root 102 | self.annotations = annotations 103 | self.files = files 104 | self.categories = categories 105 | 106 | def draw(self, index): 107 | annotation = self.annotations[index] 108 | image = open_image(self.root / self.files[index]) 109 | draw_image(image, annotation, self.categories) 110 | plt.pause(0.001) 111 | 112 | 113 | def open_image(fn): 114 | """ Opens an image using OpenCV given the file path. 115 | 116 | Arguments: 117 | fn: the file path of the image 118 | 119 | Returns: 120 | The image in RGB format as numpy array of floats normalized 121 | to range between 0.0 - 1.0 122 | 123 | """ 124 | flags = cv2.IMREAD_UNCHANGED+cv2.IMREAD_ANYDEPTH+cv2.IMREAD_ANYCOLOR 125 | if not os.path.exists(fn) and not str(fn).startswith("http"): 126 | raise OSError('No such file or directory: {}'.format(fn)) 127 | elif os.path.isdir(fn) and not str(fn).startswith("http"): 128 | raise OSError('Is a directory: {}'.format(fn)) 129 | else: 130 | try: 131 | if str(fn).startswith("http"): 132 | req = urllib.urlopen(str(fn)) 133 | image = np.asarray(bytearray(req.read()), dtype="uint8") 134 | im = cv2.imdecode(image, flags).astype(np.float32)/255 135 | else: 136 | im = cv2.imread(str(fn), flags).astype(np.float32)/255 137 | if im is None: 138 | raise OSError(f'File not recognized by opencv: {fn}') 139 | return cv2.cvtColor(im, cv2.COLOR_BGR2RGB) 140 | except Exception as e: 141 | raise OSError('Error handling image at: {}'.format(fn)) from e 142 | 143 | 144 | def main(): 145 | with open(PATH/'pascal_train2007.json') as file: 146 | train_json = json.load(file) 147 | 148 | categories = {obj[ID]: obj['name'] for obj in train_json[CATEGORIES]} 149 | train_files = {obj[ID]: obj[FILE_NAME] for obj in train_json[IMAGES]} 150 | train_indexes = [obj[ID] for obj in train_json[IMAGES]] 151 | train_annotations = parse_annotations(train_json[ANNOTATIONS]) 152 | 153 | JPEGS = 'VOCdevkit/VOC2007/JPEGImages' 154 | drawer = Drawer(PATH / JPEGS, train_annotations, train_files, categories) 155 | # drawer.draw(12) 156 | 157 | factory = resnet34 158 | batch_size = 64 159 | CSV = PATH / 'tmp' / 'mc.csv' 160 | 161 | transforms = tfms_from_model(factory, 224, crop_type=CropType.NO) 162 | data = ImageClassifierData.from_csv( 163 | PATH, JPEGS, CSV, tfms=transforms, bs=batch_size) 164 | 165 | learner = ConvLearner.pretrained(factory, data) 166 | learner.opt_fn = optim.Adam 167 | 168 | lr = 2e-2 169 | learner.fit(lr, 1, cycle_len=3, use_clr=(32, 5)) 170 | 171 | # lrs = np.array([lr/100, lr/10, lr]) 172 | # learner.freeze_to(-2) 173 | # learner.fit(lrs/10, 1, cycle_len=5, use_clr=(32, 5)) 174 | # 175 | y = learner.predict() 176 | x, _ = next(iter(data.val_dl)) 177 | x = to_np(x) 178 | images = data.val_ds.denorm(x) 179 | 180 | fig, axes = plt.subplots(3, 4, figsize=(12, 12)) 181 | for i, ax in enumerate(axes.flat): 182 | image = images[i] 183 | [non_zero] = np.nonzero(y[i] > 0.4) 184 | classes = '\n'.join([data.classes[index] for index in non_zero]) 185 | ax = show_img(image, ax=ax) 186 | draw_text(ax, (0, 0), classes) 187 | plt.tight_layout() 188 | plt.pause(0.001) 189 | 190 | 191 | if __name__ == '__main__': 192 | main() 193 | -------------------------------------------------------------------------------- /cnn.py: -------------------------------------------------------------------------------- 1 | import cv2 as cv 2 | import numpy as np 3 | import torch 4 | from torch import nn 5 | from torch import optim 6 | from torch.nn import functional as F 7 | from torchvision import transforms 8 | 9 | from core.loop import Loop 10 | from core.callbacks import Logger 11 | from core.schedule import CosineAnnealingLR 12 | from core.utils import LabelledImagesDataset 13 | 14 | 15 | class ConvNet(nn.Module): 16 | 17 | def __init__(self, layers, outputs): 18 | super().__init__() 19 | n = len(layers) - 1 20 | self.layers = nn.ModuleList([ 21 | nn.Conv2d(layers[i], layers[i + 1], kernel_size=3, stride=2) 22 | for i in range(n) 23 | ]) 24 | self.pool = nn.AdaptiveMaxPool2d(1) 25 | self.out = nn.Linear(layers[-1], outputs) 26 | 27 | def forward(self, x): 28 | for l in self.layers: 29 | x = F.relu(l(x)) 30 | x = self.pool(x) 31 | x = torch.squeeze(x) 32 | x = self.out(x) 33 | return F.log_softmax(x, dim=-1) 34 | 35 | 36 | def conv3x3(ni, nf, stride=1, padding=1): 37 | return nn.Conv2d(ni, nf, kernel_size=3, stride=stride, padding=padding, 38 | bias=False) 39 | 40 | 41 | class IdentityBlock(nn.Module): 42 | 43 | def __init__(self, ni, nf=None, stride=1): 44 | super().__init__() 45 | nf = ni if nf is None else nf 46 | self.conv1 = conv3x3(ni, nf, stride=stride) 47 | self.bn1 = nn.BatchNorm2d(nf) 48 | self.conv2 = conv3x3(nf, nf) 49 | self.bn2 = nn.BatchNorm2d(nf) 50 | if ni != nf: 51 | self.downsample = nn.Sequential( 52 | nn.Conv2d(ni, nf, kernel_size=1, stride=stride, bias=False), 53 | nn.BatchNorm2d(nf) 54 | ) 55 | 56 | def forward(self, x): 57 | shortcut = x 58 | 59 | out = self.conv1(x) 60 | out = self.bn1(out) 61 | out = F.leaky_relu(out) 62 | 63 | out = self.conv2(out) 64 | out = self.bn2(out) 65 | 66 | if hasattr(self, 'downsample'): 67 | shortcut = self.downsample(shortcut) 68 | 69 | out += shortcut 70 | out = F.leaky_relu(out) 71 | return out 72 | 73 | 74 | class Downsample(nn.Module): 75 | 76 | def __init__(self, ni, nf, stride): 77 | super().__init__() 78 | self.conv = nn.Conv2d(ni, nf, kernel_size=1, stride=stride, 79 | bias=False) 80 | self.bn = nn.BatchNorm2d(nf) 81 | 82 | def forward(self, x): 83 | return self.bn(self.conv(x)) 84 | 85 | 86 | class ResNet(nn.Module): 87 | 88 | def __init__(self): 89 | super().__init__() 90 | self.conv = nn.Conv2d(3, 16, kernel_size=5, stride=1, padding=2) 91 | 92 | self.block1 = nn.Sequential( 93 | IdentityBlock(16), 94 | IdentityBlock(16), 95 | IdentityBlock(16) 96 | ) 97 | 98 | self.block2 = nn.Sequential( 99 | IdentityBlock(16, 32, stride=2), 100 | IdentityBlock(32), 101 | IdentityBlock(32), 102 | IdentityBlock(32) 103 | ) 104 | 105 | self.block3 = nn.Sequential( 106 | IdentityBlock(32, 64, stride=2), 107 | IdentityBlock(64), 108 | IdentityBlock(64), 109 | IdentityBlock(64), 110 | IdentityBlock(64) 111 | ) 112 | 113 | self.fc = nn.Linear(64, 10) 114 | 115 | def forward(self, x): 116 | x = self.conv(x) 117 | x = self.block1(x) 118 | x = self.block2(x) 119 | x = self.block3(x) 120 | x = F.adaptive_avg_pool2d(x, 1) 121 | x = x.view(x.size(0), -1) 122 | x = self.fc(x) 123 | return F.log_softmax(x, dim=-1) 124 | 125 | 126 | class ConvLayer(nn.Module): 127 | 128 | def __init__(self, ni, nf, stride=2, kernel_size=3): 129 | super().__init__() 130 | self.conv = nn.Conv2d( 131 | in_channels=ni, out_channels=nf, 132 | kernel_size=kernel_size, stride=stride, 133 | bias=False, padding=1) 134 | self.bn = nn.BatchNorm2d(nf) 135 | 136 | def forward(self, x): 137 | return F.leaky_relu(self.bn(self.conv(x))) 138 | 139 | 140 | class ResNetLayer(ConvLayer): 141 | 142 | def forward(self, x): 143 | return x + super().forward(x) 144 | 145 | 146 | class SimpleResNet(nn.Module): 147 | 148 | def __init__(self, layers, num_of_classes): 149 | super().__init__() 150 | self.conv = nn.Conv2d(3, 10, kernel_size=5, stride=1, padding=2) 151 | self.layers1 = nn.ModuleList([ 152 | ConvLayer(x, y) for (x, y) in pairs(layers) 153 | ]) 154 | self.layers2 = nn.ModuleList([ 155 | ResNetLayer(x, x, 1) for x in layers[1:] 156 | ]) 157 | self.layers3 = nn.ModuleList([ 158 | ResNetLayer(x, x, 1) for x in layers[1:] 159 | ]) 160 | self.fc = nn.Linear(layers[-1], num_of_classes) 161 | 162 | def forward(self, x): 163 | x = self.conv(x) 164 | for l1, l2, l3 in zip(self.layers1, self.layers2, self.layers3): 165 | x = l3(l2(l1(x))) 166 | x = F.adaptive_max_pool2d(x, 1) 167 | x = x.view(x.size(0), -1) 168 | x = self.fc(x) 169 | return F.log_softmax(x, dim=-1) 170 | 171 | 172 | def pairs(xs): 173 | current, *rest = xs 174 | for item in rest: 175 | yield current, item 176 | current = item 177 | 178 | 179 | def imread(filename): 180 | img = cv.imread(str(filename)) 181 | converted = cv.cvtColor(img, cv.COLOR_BGR2RGB) 182 | return converted.transpose(2, 0, 1) 183 | 184 | 185 | def as_tensor(x, y): 186 | return torch.FloatTensor(x).cuda(), torch.LongTensor(y).cuda() 187 | 188 | 189 | def to_xy(x, y): 190 | images = np.stack([imread(filename) for filename in x]) 191 | classes = np.argmax(y, axis=1) 192 | return images, classes 193 | 194 | 195 | def main(): 196 | path = '/home/ck/data/cifar10/train' 197 | 198 | dataset = LabelledImagesDataset( 199 | labels_from='folders', root=path, batch_size=2048, one_hot=False, 200 | transforms=[to_xy, as_tensor]) 201 | 202 | train_data = iter(dataset) 203 | n = len(train_data) 204 | 205 | model = SimpleResNet([10, 20, 40, 80, 160], 10).cuda() 206 | optimizer = optim.Adam(model.parameters(), lr=1e-2) 207 | schedule = CosineAnnealingLR(optimizer, t_max=n/2, eta_min=1e-5) 208 | loop = Loop(model, optimizer, schedule) 209 | 210 | loop.run(train_data=train_data, callbacks=[Logger()]) 211 | 212 | 213 | if __name__ == '__main__': 214 | main() 215 | -------------------------------------------------------------------------------- /gan.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import random 4 | from pathlib import Path 5 | from multiprocessing import cpu_count 6 | import torch 7 | import torch.nn as nn 8 | import torch.nn.parallel 9 | import torch.backends.cudnn as cudnn 10 | import torch.optim as optim 11 | import torch.utils.data 12 | import torchvision.datasets as dset 13 | import torchvision.transforms as transforms 14 | import torchvision.utils as vutils 15 | import numpy as np 16 | import matplotlib.pyplot as plt 17 | import matplotlib.animation as animation 18 | from IPython.display import HTML 19 | 20 | 21 | RANDOM_STATE = 1 22 | 23 | random.seed(RANDOM_STATE) 24 | torch.manual_seed(RANDOM_STATE) 25 | data_root = Path.home()/'data'/'celeba' 26 | workers = cpu_count() 27 | 28 | 29 | def main(): 30 | batch_size = 128 31 | image_size = 64 32 | nz = 100 33 | nc = 3 34 | n_gen_feat = 64 35 | n_dis_feat = 64 36 | epochs = 5 37 | lr = 1e-5 38 | beta1 = 0.5 39 | device = torch.device('cuda:0') 40 | 41 | dataset = dset.ImageFolder( 42 | root=str(data_root), 43 | transform=transforms.Compose([ 44 | transforms.Resize(image_size), 45 | transforms.CenterCrop(image_size), 46 | transforms.ToTensor(), 47 | transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) 48 | ])) 49 | 50 | data_loader = torch.utils.data.DataLoader( 51 | dataset, batch_size=batch_size, shuffle=True, num_workers=workers) 52 | 53 | # real_batch = next(iter(data_loader)) 54 | # plt.figure(figsize=(8, 8)) 55 | # plt.axis('off') 56 | # plt.imshow( 57 | # np.transpose( 58 | # vutils.make_grid( 59 | # real_batch[0].to(device)[:64], 60 | # padding=2, normalize=True).cpu(), 61 | # (1, 2, 0))) 62 | # plt.pause(0.0001) 63 | 64 | crit = nn.BCELoss() 65 | fixed_noise = torch.randn(64, nz, 1, 1, device=device) 66 | real_label = 1 67 | fake_label = 0 68 | 69 | net_g = Generator(nz, n_gen_feat, nc).to(device) 70 | net_d = Discriminator(n_dis_feat, nc).to(device) 71 | 72 | opt_g = optim.Adam(net_g.parameters(), lr=lr, betas=(beta1, 0.999)) 73 | opt_d = optim.Adam(net_d.parameters(), lr=lr, betas=(beta1, 0.999)) 74 | 75 | g_losses, d_losses = [], [] 76 | img_list = [] 77 | iters = 0 78 | 79 | print('Starting training loop...') 80 | for epoch in range(epochs): 81 | for i, data in enumerate(data_loader, 0): 82 | net_d.zero_grad() 83 | real = data[0].to(device) 84 | b_size = real.size(0) 85 | label = torch.full((b_size,), real_label, device=device) 86 | output = net_d(real).view(-1) 87 | err_d_real = crit(output, label) 88 | err_d_real.backward() 89 | d_x = output.mean().item() 90 | 91 | noise = torch.randn(b_size, nz, 1, 1, device=device) 92 | fake = net_g(noise) 93 | label.fill_(fake_label) 94 | output = net_d(fake.detach()).view(-1) 95 | err_d_fake = crit(output, label) 96 | err_d_fake.backward() 97 | d_g_z1 = output.mean().item() 98 | err_d = err_d_real + err_d_fake 99 | opt_d.step() 100 | 101 | net_g.zero_grad() 102 | label.fill_(real_label) 103 | output = net_d(fake).view(-1) 104 | err_g = crit(output, label) 105 | err_g.backward() 106 | d_g_z2 = output.mean().item() 107 | opt_g.step() 108 | 109 | if i % 50 == 0: 110 | print('[%d/%d][%d/%d]\t' 111 | 'Loss_D: %.4f\t' 112 | 'Loss_G: %.4f\t' 113 | 'D(x): %.4f\t' 114 | 'D(G(z)): %.4f / %.4f' 115 | % (epoch, epochs, i, len(data_loader), 116 | err_d.item(), err_g.item(), d_x, d_g_z1, d_g_z2)) 117 | 118 | g_losses.append(err_g.item()) 119 | d_losses.append(err_d.item()) 120 | 121 | if (iters % 500 == 0) or ( 122 | (epoch == epochs - 1) and (i == len(data_loader) - 1)): 123 | with torch.no_grad(): 124 | fake = net_g(fixed_noise).detach().cpu() 125 | img_list.append(vutils.make_grid( 126 | fake, padding=2, normalize=True)) 127 | 128 | iters += 1 129 | 130 | 131 | class GenBlock(nn.Module): 132 | 133 | def __init__(self, ni, no, kernel, stride, pad, bias=False): 134 | super().__init__() 135 | self.conv = nn.ConvTranspose2d(ni, no, kernel, stride, pad, bias=bias) 136 | self.bn = nn.BatchNorm2d(no) 137 | self.relu = nn.ReLU(True) 138 | 139 | def forward(self, x): 140 | return self.relu(self.bn(self.conv(x))) 141 | 142 | 143 | class Generator(nn.Module): 144 | 145 | def __init__(self, nz, nf, nc): 146 | super().__init__() 147 | self.main = nn.Sequential( 148 | GenBlock(nz, nf * 8, 4, 1, 0), 149 | GenBlock(nf * 8, nf * 4, 4, 2, 1), 150 | GenBlock(nf * 4, nf * 2, 4, 2, 1), 151 | GenBlock(nf * 2, nf, 4, 2, 1), 152 | nn.ConvTranspose2d(nf, nc, 4, 2, 1, bias=False), 153 | nn.Tanh() 154 | ) 155 | self.apply(init_weights) 156 | 157 | def forward(self, x): 158 | return self.main(x) 159 | 160 | 161 | class ConvBlock(nn.Module): 162 | 163 | def __init__(self, ni, no, kernel, stride, pad, alpha=0.2, bias=False): 164 | super().__init__() 165 | self.conv = nn.Conv2d(ni, no, kernel, stride, pad, bias=bias) 166 | self.bn = nn.BatchNorm2d(no) 167 | self.leaky_relu = nn.LeakyReLU(alpha, True) 168 | 169 | def forward(self, x): 170 | return self.leaky_relu(self.bn(self.conv(x))) 171 | 172 | 173 | class Discriminator(nn.Module): 174 | 175 | def __init__(self, nf, nc): 176 | super().__init__() 177 | self.main = nn.Sequential( 178 | nn.Conv2d(nc, nf, 4, 2, 1, bias=False), 179 | nn.LeakyReLU(0.2, inplace=True), 180 | ConvBlock(nf, nf * 2, 4, 2, 1), 181 | ConvBlock(nf * 2, nf * 4, 4, 2, 1), 182 | ConvBlock(nf * 4, nf * 8, 4, 2, 1), 183 | nn.Conv2d(nf * 8, 1, 4, 1, 0), 184 | nn.Sigmoid() 185 | ) 186 | self.apply(init_weights) 187 | 188 | def forward(self, x): 189 | return self.main(x) 190 | 191 | 192 | def init_weights(m): 193 | class_name = m.__class__.__name__ 194 | if class_name in ('ConvBlock', 'GenBlock'): 195 | for child in m.children(): 196 | init_weights(child) 197 | elif class_name.find('Conv') != -1: 198 | nn.init.normal_(m.weight.data, 0.0, 0.02) 199 | elif class_name.find('BatchNorm') != -1: 200 | nn.init.normal_(m.weight.data, 1.0, 0.02) 201 | nn.init.constant_(m.bias.data, 0) 202 | 203 | 204 | if __name__ == '__main__': 205 | main() 206 | 207 | -------------------------------------------------------------------------------- /core/loop.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | 3 | import torch 4 | from torch.nn import functional as F 5 | 6 | from .callbacks import CallbackGroup 7 | 8 | 9 | class Loop: 10 | """ 11 | Simple training loop implementation. 12 | 13 | The loop contains two phases: training and validation. Each phase is 14 | computed on a separate dataset, and tracks its own parameters, like, 15 | average loss and batch number. 16 | 17 | Parameters: 18 | model: An optimized model. 19 | 20 | alpha: A value of weight used to perform linear interpolation between 21 | loss on the previous epoch and the new epoch, like: 22 | 23 | new_loss = old_loss*alpha + (1 - alpha)*new_loss 24 | 25 | """ 26 | def __init__(self, model, optimizer, schedule, alpha: float=0.98, 27 | move_to_device=True, device=None): 28 | 29 | if move_to_device: 30 | device = torch.device(device or 'cpu') 31 | model = model.to(device) 32 | self.device = device 33 | 34 | self.model = model 35 | self.optimizer = optimizer 36 | self.schedule = schedule 37 | self.alpha = alpha 38 | self.move_to_device = move_to_device 39 | self.stop = False 40 | self.callbacks = None 41 | self.stepper = None 42 | 43 | def run(self, train_data, valid_data=None, loss_fn=F.nll_loss, 44 | epochs: int=100, callbacks=None, metrics=None): 45 | 46 | phases = [Phase(name='train', dataset=train_data)] 47 | if valid_data is not None: 48 | phases.append(Phase(name='valid', dataset=valid_data)) 49 | 50 | cb = CallbackGroup(callbacks) 51 | cb.set_loop(self) 52 | cb.training_start() 53 | self.callbacks = cb 54 | self.stepper = self.make_stepper(loss_fn, metrics) 55 | 56 | for epoch in range(epochs): 57 | if self.stop: 58 | break 59 | metrics = {} 60 | cb.epoch_start(epoch) 61 | for phase in phases: 62 | cb.epoch_start(epoch) 63 | is_training = phase.name == 'train' 64 | for batch in phase.dataset: 65 | x, y = self._place_and_unwrap_if_needed(batch) 66 | phase.batch_num += 1 67 | cb.batch_start(epoch, phase) 68 | batch_metrics = self.stepper.step(x, y, is_training) 69 | self.update_metrics(phase, batch_metrics) 70 | cb.batch_end(epoch, phase) 71 | metrics.update({ 72 | f'{phase.name}_{k}': v 73 | for k, v in phase.metrics.items()}) 74 | cb.epoch_end(epoch, metrics) 75 | cb.training_end() 76 | 77 | def make_stepper(self, loss_fn, metrics=None, stepper=None): 78 | stepper_cls = stepper or Stepper 79 | inst = stepper_cls( 80 | self.model, self.optimizer, self.schedule, loss_fn, metrics) 81 | return inst 82 | 83 | def save_model(self, path): 84 | self.stepper.save_model(path) 85 | 86 | def update_metrics(self, phase, batch_metrics): 87 | a = self.alpha 88 | updated = {} 89 | for name, new_value in batch_metrics.items(): 90 | old_value = phase.rolling_metrics[name] 91 | avg_value = a*old_value + (1 - a)*new_value 92 | debias_value = avg_value/(1 - a**phase.batch_num) 93 | updated[name] = debias_value 94 | phase.rolling_metrics[name] = avg_value 95 | phase.metrics = updated 96 | 97 | @property 98 | def lr_schedule(self): 99 | return self.stepper.learning_rates 100 | 101 | def _place_and_unwrap_if_needed(self, batch): 102 | x, *y = batch 103 | if self.move_to_device: 104 | x = x.to(self.device) 105 | y = [tensor.to(self.device) for tensor in y] 106 | else: 107 | x, *y = batch 108 | if len(y) == 1: 109 | [y] = y 110 | return x, y 111 | 112 | def __getitem__(self, item): 113 | return self.callbacks[item] 114 | 115 | 116 | class Phase: 117 | """ 118 | Model training loop phase. 119 | 120 | Each model's training loop iteration could be separated into (at least) two 121 | phases: training and validation. The instances of this class track 122 | metrics and counters, related to the specific phase, and keep the reference 123 | to subset of data, used during phase. 124 | """ 125 | def __init__(self, name: str, dataset): 126 | self.name = name 127 | self.dataset = dataset 128 | self.batch_num = 0 129 | self.rolling_metrics = defaultdict(lambda: 0) 130 | self.metrics = None 131 | 132 | def __repr__(self): 133 | if self.metrics is None: 134 | return f'' 135 | metrics = ', '.join([ 136 | f'{key}={value:2.4f}' 137 | for key, value in self.metrics.items()]) 138 | return f'' 139 | 140 | 141 | class Stepper: 142 | """ 143 | A thin wrapper encapsulating the model, its optimizer, a scheduler, and a 144 | loss function into single object. 145 | 146 | The stepper instance is invoked during each training iteration and returns 147 | the loss on batch. 148 | """ 149 | def __init__(self, model, optimizer, schedule, loss, metrics=None): 150 | if schedule.last_epoch == -1: 151 | schedule.step() 152 | self.model = model 153 | self.optimizer = optimizer 154 | self.schedule = schedule 155 | self.loss = loss 156 | self.metrics = metrics 157 | self.learning_rates = [] 158 | 159 | def step(self, x, y, train: bool=True): 160 | """ 161 | Performs a single training step. 162 | 163 | Args: 164 | x: Features tensor. 165 | y: Target tensor. 166 | train: If False, then the gradient is not computed, and the model's 167 | parameters are not updated. 168 | 169 | Returns: 170 | loss: The loss value on batch. 171 | 172 | """ 173 | metrics = {} 174 | self.model.train(train) 175 | 176 | with torch.set_grad_enabled(train): 177 | out = self.model(x) 178 | loss = self.loss(out, y) 179 | metrics['loss'] = loss.item() 180 | 181 | if self.metrics is not None: 182 | for metric in self.metrics: 183 | metrics[metric.__name__] = metric(out.cpu(), y.cpu()) 184 | 185 | if train: 186 | self.optimizer.zero_grad() 187 | loss.backward() 188 | self.optimizer.step() 189 | self.schedule.step() 190 | lrs = self.schedule.get_lr() 191 | self.learning_rates.append(lrs) 192 | 193 | return metrics 194 | 195 | def save_model(self, path: str): 196 | """ 197 | Saves model state into file. 198 | """ 199 | torch.save(self.model.state_dict(), path) 200 | -------------------------------------------------------------------------------- /core/callbacks.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | from os.path import join 4 | 5 | 6 | class Callback: 7 | """ 8 | Base class for all training loop callbacks. 9 | 10 | The callback is a class that has a set of methods invoked withing training 11 | loop iterations. The class can adjust model's properties, save state, log 12 | output, or perform any other tuning on periodical basis. 13 | """ 14 | def training_start(self): 15 | pass 16 | 17 | def training_end(self): 18 | pass 19 | 20 | def epoch_start(self, epoch): 21 | pass 22 | 23 | def epoch_end(self, epoch, metrics): 24 | pass 25 | 26 | def batch_start(self, epoch, phase): 27 | pass 28 | 29 | def batch_end(self, epoch, phase): 30 | pass 31 | 32 | 33 | class CallbackGroup(Callback): 34 | """ 35 | Wraps a collection of callbacks into single instance which delegates 36 | appropriate methods calls to the elements of collection. 37 | """ 38 | def __init__(self, callbacks=None): 39 | callbacks = callbacks or [] 40 | self.callbacks = callbacks 41 | self._callbacks = {type(cb).__name__: cb for cb in self.callbacks} 42 | 43 | def training_start(self): 44 | for cb in self.callbacks: cb.training_start() 45 | 46 | def training_end(self): 47 | for cb in self.callbacks: cb.training_end() 48 | 49 | def epoch_start(self, epoch): 50 | for cb in self.callbacks: cb.epoch_start(epoch) 51 | 52 | def epoch_end(self, epoch, metrics): 53 | for cb in self.callbacks: cb.epoch_end(epoch, metrics) 54 | 55 | def batch_start(self, epoch, phase): 56 | for cb in self.callbacks: cb.batch_start(epoch, phase) 57 | 58 | def batch_end(self, epoch, phase): 59 | for cb in self.callbacks: cb.batch_start(epoch, phase) 60 | 61 | def set_loop(self, loop): 62 | for cb in self.callbacks: cb.loop = loop 63 | 64 | def __getitem__(self, item): 65 | if item not in self._callbacks: 66 | raise KeyError(f'unknown callback: {item}') 67 | return self._callbacks[item] 68 | 69 | 70 | class Logger(Callback): 71 | """ 72 | Writes performance metrics collected during the training process into list 73 | of streams. 74 | 75 | Parameters: 76 | streams: A list of file-like objects with `write()` method. 77 | 78 | """ 79 | def __init__(self, streams=None, log_every=1): 80 | self.streams = streams or [sys.stdout] 81 | self.log_every = log_every 82 | self.epoch_history = {} 83 | self.curr_epoch = 0 84 | 85 | def epoch_end(self, epoch, metrics): 86 | stats = [f'{name}: {value:2.4f}' for name, value in metrics.items()] 87 | metrics = ' - '.join(stats) 88 | string = f'Epoch {epoch:4d} | {metrics}\n' 89 | for stream in self.streams: 90 | stream.write(string) 91 | stream.flush() 92 | 93 | 94 | class CSVLogger(Logger): 95 | """ 96 | A wrapper build on top of stdout logging callback which opens a CSV file 97 | to write metrics. 98 | 99 | Parameters: 100 | filename: A name of CSV file to store training loss history. 101 | 102 | """ 103 | def __init__(self, filename='history.csv'): 104 | super().__init__() 105 | self.filename = filename 106 | self.file = None 107 | 108 | def training_start(self): 109 | self.file = open(self.filename, 'w') 110 | self.streams = [self.file] 111 | 112 | def training_end(self): 113 | if self.file: 114 | self.file.close() 115 | 116 | 117 | class History(Callback): 118 | 119 | def __init__(self): 120 | self.history = [] 121 | 122 | def epoch_end(self, epoch, metrics): 123 | self.history.append(metrics) 124 | 125 | def training_end(self): 126 | history = [] 127 | for i, record in enumerate(self.history): 128 | item = record.copy() 129 | item['epoch'] = i 130 | history.append(item) 131 | self.history = history 132 | 133 | 134 | class ImprovementTracker(Callback): 135 | """ 136 | Tracks a specific metric during training process and reports when the 137 | metric does not improve after the predefined number of iterations. 138 | """ 139 | # def __init__(self, patience=1, phase='valid', metric='valid_avg_loss', 140 | # better=min): 141 | 142 | def __init__(self, patience=1, metric='valid_loss', better=min): 143 | self.patience = patience 144 | # self.phase = phase 145 | self.metric = metric 146 | self.better = better 147 | self.no_improvement = None 148 | self.best_value = None 149 | self.stagnation = None 150 | self.loop = None 151 | 152 | def training_start(self): 153 | self.no_improvement = 0 154 | self.stagnation = False 155 | 156 | def epoch_end(self, epoch, metrics): 157 | value = metrics[self.metric] 158 | best_value = self.best_value or value 159 | improved = self.better(best_value, value) == value 160 | if not improved: 161 | self.no_improvement += 1 162 | else: 163 | self.best_value = value 164 | self.no_improvement = 0 165 | if self.no_improvement >= self.patience: 166 | self.stagnation = True 167 | 168 | @property 169 | def improved(self): 170 | return self.no_improvement == 0 171 | 172 | 173 | class EarlyStopping(ImprovementTracker): 174 | """ 175 | Stops observed training loop if the tracked performance metrics does not 176 | improve during predefined number of iterations. 177 | """ 178 | 179 | def epoch_end(self, epoch, metrics): 180 | super().epoch_end(epoch, metrics) 181 | if self.stagnation: 182 | self.loop.stop = True 183 | 184 | 185 | class Checkpoint(ImprovementTracker): 186 | """ 187 | Saves model attached to the loop each time when tracked performance metric 188 | is improved, or on each iteration if required. 189 | """ 190 | def __init__(self, folder=None, save_best_only=True, 191 | filename='model_{metric}_{value:2.4f}.weights', 192 | **kwargs): 193 | 194 | super().__init__(**kwargs) 195 | self.folder = folder or os.getcwd() 196 | self.save_best_only = save_best_only 197 | self.filename = filename 198 | self.best_model = None 199 | 200 | @property 201 | def need_to_save(self): 202 | if not self.save_best_only: 203 | return True 204 | return self.improved 205 | 206 | def get_name(self): 207 | return self.filename.format(metric=self.metric, value=self.best_value) 208 | 209 | def epoch_end(self, epoch, metrics): 210 | super().epoch_end(epoch, metrics) 211 | if self.need_to_save: 212 | best_model = join(self.folder, self.get_name()) 213 | self.loop.save_model(best_model) 214 | self.best_model = best_model 215 | 216 | 217 | def default_callbacks(workdir=None): 218 | """Returns a list with commonly used callbacks.""" 219 | 220 | workdir = workdir or os.getcwd() 221 | return [ 222 | History(), 223 | Logger(), 224 | CSVLogger(filename=join(workdir, 'history.csv')), 225 | Checkpoint(folder=workdir) 226 | ] -------------------------------------------------------------------------------- /azlyrics.py: -------------------------------------------------------------------------------- 1 | """ 2 | An AZLyrics parsing tool. 3 | 4 | The tool downloads lyrics from the website and saves each song into separate 5 | file. It also creates a CSV file with song titles. 6 | 7 | Note that the downloaded texts can be used only for educational and personal 8 | purposes. Please visit the website to get familiar with license and privacy 9 | policy: https://www.azlyrics.com 10 | """ 11 | import os 12 | import time 13 | import argparse 14 | import configparser 15 | from pathlib import Path 16 | from urllib.parse import urljoin 17 | from string import ascii_letters, digits 18 | 19 | import requests 20 | import numpy as np 21 | from bs4 import BeautifulSoup 22 | 23 | 24 | # noinspection PyBroadException 25 | class AZLyricsParser: 26 | """ 27 | A simple scrapper to parse content of AZLyrics site. 28 | 29 | The scrapper tries to gather texts without increasing server's load too 30 | much, and makes effort to prevent getting into black list by making random 31 | long delays between requests. 32 | 33 | Parameters: 34 | throttling: Mean value of normal distribution used to generate random 35 | delays between HTTP requests. 36 | proxy: Optional dictionary with SOCKS proxy definition. 37 | 38 | """ 39 | base_url = 'https://www.azlyrics.com' 40 | 41 | def __init__(self, throttling=0.5, proxy=None): 42 | self.throttling = throttling 43 | self.proxy = proxy 44 | 45 | def build_songs_list(self, artist: str): 46 | name = normalize(artist) 47 | first_letter = name[0] 48 | albums_url = f'{self.base_url}/{first_letter}/{name}.html' 49 | try: 50 | r = requests.get(albums_url, proxies=self.proxy) 51 | except: 52 | print('Error: Cannot build songs list! Connection rejected') 53 | return 54 | 55 | page_content = r.text 56 | tree = BeautifulSoup(page_content, 'html.parser') 57 | albums = tree.find('div', id='listAlbum') 58 | if albums is None: 59 | print('Albums were not found') 60 | return 61 | 62 | songs = [] 63 | for tag in albums.find_all('a'): 64 | link = tag.attrs.get('href') 65 | if not link: 66 | continue 67 | songs.append((tag.text, urljoin(self.base_url, link))) 68 | 69 | return songs 70 | 71 | def parse_songs(self, songs: list): 72 | print(f'Number of songs to parse: {len(songs):d}') 73 | texts = [] 74 | for i, (title, url) in enumerate(songs): 75 | print(f'Parsing song: {url}') 76 | wait_time = 10 77 | while True: 78 | try: 79 | text = self.parse_song(url) 80 | break 81 | except: 82 | print('Cannot parse song! Connection rejected') 83 | print(f'Trying again after a period of delay equal ' 84 | f'to {wait_time:d} seconds') 85 | time.sleep(wait_time) 86 | wait_time *= 2 87 | texts.append((title, text)) 88 | wait_time = np.random.normal(self.throttling, 3) 89 | print(f'Waiting for {wait_time:2.2f} seconds...') 90 | time.sleep(wait_time) 91 | return texts 92 | 93 | def parse_song(self, url): 94 | r = requests.get(url, proxies=self.proxy) 95 | page_content = r.text 96 | tree = BeautifulSoup(page_content, 'html.parser') 97 | lyrics = tree.find('div', {'id': None, 'class': None}) 98 | if lyrics is None: 99 | return None 100 | text = lyrics.text.strip().replace('\r\n', '\n') 101 | return text 102 | 103 | 104 | def normalize(string, domain=ascii_letters+digits): 105 | return ''.join([char for char in string if char in domain]).lower() 106 | 107 | 108 | def parse_args(): 109 | parser = argparse.ArgumentParser() 110 | parser.add_argument( 111 | '-a', '--artist', 112 | default='Black Sabbath', 113 | help='an artist whose songs to parse; is used only if -f is missing' 114 | ) 115 | parser.add_argument( 116 | '-p', '--proxy', 117 | default=None, 118 | help='proxy configuration (if required)' 119 | ) 120 | parser.add_argument( 121 | '-o', '--output', 122 | default=None, 123 | help='path to folder with downloaded songs' 124 | ) 125 | parser.add_argument( 126 | '-t', '--throttling', 127 | default=10.0, 128 | type=float, 129 | help='base throttling value used to define delay between requests' 130 | ) 131 | parser.add_argument( 132 | '-f', '--file', 133 | default=None, 134 | help='path to file with artist names' 135 | ) 136 | parser.add_argument( 137 | '--force-reload', 138 | action='store_true', 139 | help='load songs texts even if folder with artist name already exists' 140 | ) 141 | 142 | args = parser.parse_args() 143 | 144 | if args.proxy is not None: 145 | conf = configparser.ConfigParser() 146 | conf.read(args.proxy) 147 | proxy = dict(conf['proxy']) 148 | url = 'socks5://{username}:{password}@{host}:{port}'.format(**proxy) 149 | args.proxy = {'http': url, 'https': url} 150 | 151 | args.output = Path(args.output or '~/data/azlyrics').expanduser() 152 | 153 | if args.file is None: 154 | artists = [args.artist] 155 | 156 | else: 157 | path = Path(args.file) 158 | if not path.exists(): 159 | parser.error(f'File does not exist: {args.file}') 160 | 161 | artists = {line.strip() for line in Path(args.file).open()} 162 | 163 | if not args.force_reload: 164 | for dirname in args.output.iterdir(): 165 | artist = dirname.stem 166 | if artist in artists: 167 | print(f'Artist folder already exists: {artist}') 168 | artists.remove(artist) 169 | 170 | args.artists = sorted(artists) 171 | 172 | return args 173 | 174 | 175 | def main(): 176 | print('Instantiating lyrics parser') 177 | 178 | args = parse_args() 179 | parser = AZLyricsParser(throttling=args.throttling, proxy=args.proxy) 180 | artists = args.artists 181 | 182 | for i, artist in enumerate(artists, 1): 183 | print(f'Building list of songs URLs for artist {artist}', 184 | f'({i} of {len(artists)})') 185 | 186 | songs = parser.build_songs_list(artist) 187 | if not songs: 188 | print('Songs not found. Skipping...') 189 | continue 190 | 191 | print(f'Parsing collected songs ({len(songs)} total)') 192 | folder = args.output/artist 193 | texts = parser.parse_songs(songs) 194 | if not folder.exists(): 195 | folder.mkdir(parents=True, exist_ok=True) 196 | 197 | index_path = Path(folder).joinpath('songs.csv') 198 | with index_path.open('w') as index_file: 199 | for j, (title, text) in enumerate(texts): 200 | index_file.write(f'{j},{title}\n') 201 | with (folder/f'{j}.txt').open('w') as text_file: 202 | text_file.write(text + '\n') 203 | 204 | print(f'Completed! Index path: {index_path}') 205 | 206 | 207 | if __name__ == '__main__': 208 | main() 209 | -------------------------------------------------------------------------------- /ssd/plots.py: -------------------------------------------------------------------------------- 1 | """ 2 | Helper tools to show images from the VOC dataset. 3 | """ 4 | import numpy as np 5 | import matplotlib.cm as cmx 6 | import matplotlib.pyplot as plt 7 | import matplotlib.colors as mcolors 8 | from matplotlib import patches, patheffects 9 | 10 | from misc import open_image, to_voc, valid_box 11 | 12 | 13 | 14 | class VOCPlotter: 15 | """A helper class to visualize samples from VOC dataset. 16 | 17 | The dataset is expected to contain images, bounding boxes, and object 18 | classes. Then the class can be used to show these images and their 19 | annotations. 20 | """ 21 | def __init__(self, id2cat=None, **fig_kwargs): 22 | self.id2cat = id2cat 23 | self.fig = None 24 | self.fig_kwargs = fig_kwargs 25 | 26 | def __enter__(self): 27 | return self 28 | 29 | def __exit__(self, exc_type, exc_val, exc_tb): 30 | plt.pause(0.001) 31 | if self.fig is not None: 32 | plt.close(self.fig) 33 | self.fig = None 34 | 35 | def plot_boxes(self, images, boxes, classes, dims=(3, 4)): 36 | if self.fig is not None: 37 | plt.close(self.fig) 38 | 39 | fig, axes = plt.subplots(*dims, **self.fig_kwargs) 40 | n_colors = min(12, len(np.unique(classes))) 41 | cmap = get_cmap(n_colors) 42 | colors_list = [cmap(float(x)) for x in range(n_colors)] 43 | 44 | for i, ax in enumerate(axes.flat): 45 | ax.axis('off') 46 | image = images[i] 47 | image_classes = [c for c in classes[i] if c > 0] 48 | image_boxes = [b for b in boxes[i].reshape(-1, 4) if valid_box(b)] 49 | if image.shape[0] == 3 and len(image.shape) == 3: 50 | image = image.transpose(1, 2, 0) 51 | self.plot_image(image, ax=ax) 52 | for j, (box, target) in enumerate(zip(image_boxes, image_classes)): 53 | if box[2] <= 0: 54 | continue 55 | box = to_voc(box) 56 | color = colors_list[j % n_colors] 57 | if self.id2cat: 58 | target = self.id2cat.get(target, target) 59 | add_rect(ax, box, color=color) 60 | add_text(ax, box[:2], f'{j}: {target}', color=color) 61 | 62 | self.fig = fig 63 | 64 | def plot_image(self, image, grid=False, n_cells=8, ax=None): 65 | if not ax: 66 | if self.fig is not None: 67 | self.fig.close() 68 | fig, ax = plt.subplots(**self.fig_kwargs) 69 | self.fig = fig 70 | 71 | ax.imshow(image) 72 | if grid: 73 | width, height = image.shape[:2] 74 | ax.set_xticks(np.linspace(0, width, n_cells)) 75 | ax.set_yticks(np.linspace(0, height, n_cells)) 76 | ax.grid() 77 | ax.set_xticklabels([]) 78 | ax.set_yticklabels([]) 79 | return ax 80 | 81 | 82 | class ImagePlotter: 83 | """Helper class to visualize VOC dataset images and bounding boxes.""" 84 | 85 | def __init__(self, root, annotations, files, categories, **fig_kwargs): 86 | self.root = root 87 | self.annotations = annotations 88 | self.files = files 89 | self.categories = categories 90 | self.fig_kwargs = fig_kwargs 91 | 92 | def show(self, index, ax=None): 93 | """ 94 | Plots an image and bounding box with specific ID value from the VOC 95 | dataset and shows matplotlib interface with the image. 96 | """ 97 | self.draw(index, ax) 98 | plt.show() 99 | 100 | def draw(self, index, ax=None): 101 | """ 102 | Plots and image and bounding box with specific ID value from the VOC 103 | dataset, and returns the axes object. 104 | """ 105 | annotation = self.annotations[index] 106 | image = open_image(self.root / self.files[index]) 107 | ax = self.draw_image(image, ax=ax) 108 | for bbox, category in annotation: 109 | bbox = to_voc(bbox) 110 | classes = self.categories[category] 111 | add_rect(ax, bbox) 112 | add_text(ax, bbox[:2], classes, size=16) 113 | return ax 114 | 115 | def show_images(self, images, targets, class_names, dims=(3, 4), 116 | figsize=(12, 12), grid=True): 117 | 118 | fig, axes = plt.subplots(*dims, figsize=figsize) 119 | for i, ax in enumerate(axes.flat): 120 | image = images[i] 121 | [non_zero] = np.nonzero(targets[i] > 0.4) 122 | self.draw_image(image, ax=ax, grid=grid) 123 | if len(non_zero) > 0: 124 | classes = '\n'.join([class_names[index] for index in non_zero]) 125 | add_text(ax, (0, 0), classes) 126 | else: 127 | classes = '' 128 | add_text(ax, (0, 0), classes, color='salmon') 129 | plt.tight_layout() 130 | plt.show() 131 | 132 | def show_ground_truth(self, images, boxes, classes, dims=(3, 4), 133 | figsize=(12, 12)): 134 | 135 | n_colors = 12 136 | cmap = get_cmap(n_colors) 137 | colors_list = [cmap(float(x)) for x in range(n_colors)] 138 | 139 | fig, axes = plt.subplots(*dims, figsize=figsize) 140 | for i, ax in enumerate(axes.flat): 141 | image = images[i] 142 | image_classes = classes[i] 143 | image_boxes = [box for box in boxes[i].reshape(-1, 4)] 144 | self.draw_image(image, ax=ax) 145 | for j, (box, target) in enumerate(zip(image_boxes, image_classes)): 146 | if box[2] <= 0: 147 | continue 148 | box = to_voc(box) 149 | color = colors_list[j % n_colors] 150 | add_rect(ax, box, color=color) 151 | add_text(ax, box[:2], f'{j}: {target}', color=color) 152 | 153 | def draw_image(self, image, grid=False, n_cells=8, ax=None): 154 | if not ax: 155 | fig, ax = plt.subplots(**self.fig_kwargs) 156 | ax.imshow(image) 157 | if grid: 158 | width, height = image.shape[:2] 159 | ax.set_xticks(np.linspace(0, width, n_cells)) 160 | ax.set_yticks(np.linspace(0, height, n_cells)) 161 | ax.grid() 162 | ax.set_xticklabels([]) 163 | ax.set_yticklabels([]) 164 | return ax 165 | 166 | 167 | def get_cmap(n): 168 | color_norm = mcolors.Normalize(vmin=0, vmax=n - 1) 169 | return cmx.ScalarMappable(norm=color_norm, cmap='Set3').to_rgba 170 | 171 | 172 | def add_rect(ax, bbox, outline=4, color='white'): 173 | """Adds a stroke rectangle to the axes.""" 174 | 175 | rect = patches.Rectangle( 176 | bbox[:2], *bbox[-2:], fill=False, edgecolor=color, lw=2) 177 | patch = ax.add_patch(rect) 178 | add_outline(patch, outline) 179 | 180 | 181 | def add_text(ax, xy, text, size=14, outline=1, color='white'): 182 | """Adds a text object to the axes.""" 183 | 184 | text = ax.text( 185 | *xy, text, va='top', color=color, fontsize=size, weight='bold') 186 | add_outline(text, outline) 187 | 188 | 189 | def add_outline(obj, lw=4): 190 | """Adds outline effect to the graphical object.""" 191 | 192 | effects = [ 193 | patheffects.Stroke(linewidth=lw, foreground='black'), 194 | patheffects.Normal()] 195 | obj.set_path_effects(effects) 196 | -------------------------------------------------------------------------------- /ios.py: -------------------------------------------------------------------------------- 1 | import math 2 | from pathlib import Path 3 | from multiprocessing import cpu_count 4 | 5 | import matplotlib.pyplot as plt 6 | 7 | import numpy as np 8 | import torch 9 | from torch import nn 10 | from torch import optim 11 | from torch.optim.lr_scheduler import CosineAnnealingLR 12 | from torch.nn import functional as F 13 | from torch.utils.data import DataLoader 14 | from torchvision import transforms 15 | from torchvision.datasets import EMNIST 16 | from torchvision.models.resnet import resnet18 17 | from torchvision.utils import make_grid 18 | import onnx 19 | from onnx import onnx_pb 20 | import onnx_coreml 21 | from onnx_coreml import convert 22 | 23 | from core.loop import Loop 24 | from core.metrics import accuracy 25 | from core.callbacks import default_callbacks 26 | 27 | 28 | DATA_ROOT = Path.home() / 'data' / 'emnist' 29 | DEVICE = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') 30 | STATS = [0.17325], [0.33163] 31 | 32 | 33 | def conv3x3(ni, nf, stride=1, padding=1): 34 | return nn.Conv2d(ni, nf, kernel_size=3, stride=stride, padding=padding, 35 | bias=False) 36 | 37 | 38 | class IdentityBlock(nn.Module): 39 | 40 | def __init__(self, ni, nf=None, stride=1): 41 | super().__init__() 42 | 43 | nf = ni if nf is None else nf 44 | self.conv1 = conv3x3(ni, nf, stride=stride) 45 | self.bn1 = nn.BatchNorm2d(nf) 46 | self.conv2 = conv3x3(nf, nf) 47 | self.bn2 = nn.BatchNorm2d(nf) 48 | 49 | if ni != nf: 50 | self.downsample = nn.Sequential( 51 | nn.Conv2d(ni, nf, kernel_size=1, stride=stride, bias=False), 52 | nn.BatchNorm2d(nf)) 53 | 54 | def forward(self, x): 55 | shortcut = x 56 | 57 | out = self.conv1(x) 58 | out = self.bn1(out) 59 | out = F.leaky_relu(out) 60 | 61 | out = self.conv2(out) 62 | out = self.bn2(out) 63 | 64 | if hasattr(self, 'downsample'): 65 | shortcut = self.downsample(x) 66 | 67 | out += shortcut 68 | out = F.leaky_relu(out) 69 | 70 | return out 71 | 72 | 73 | class Flatten(nn.Module): 74 | 75 | def forward(self, x): 76 | return x.view(x.size(0), -1) 77 | 78 | 79 | class ResNet(nn.Module): 80 | 81 | def __init__(self, num_of_classes): 82 | super().__init__() 83 | self.conv = nn.Conv2d(1, 10, kernel_size=3, stride=1, padding=2) 84 | self.blocks = nn.ModuleList([ 85 | IdentityBlock(10, 20, stride=2), 86 | IdentityBlock(20, 40, stride=2), 87 | IdentityBlock(40, 80, stride=2) 88 | ]) 89 | self.pool = nn.AvgPool2d(4) 90 | self.flatten = Flatten() 91 | self.fc = nn.Linear(80, num_of_classes) 92 | self.init() 93 | 94 | def forward(self, x): 95 | x = self.conv(x) 96 | for block in self.blocks: 97 | x = block(x) 98 | x = self.pool(x) 99 | x = self.flatten(x) 100 | x = self.fc(x) 101 | return x 102 | 103 | def init(self): 104 | for m in self.modules(): 105 | if isinstance(m, nn.Conv2d): 106 | nn.init.kaiming_normal_(m.weight, mode='fan_out') 107 | elif isinstance(m, nn.BatchNorm2d): 108 | nn.init.constant_(m.weight, 1) 109 | nn.init.constant_(m.bias, 0) 110 | 111 | 112 | def load_dataset(data_transforms, root=DATA_ROOT, split='digits', 113 | batch_size=1024, num_workers=0): 114 | 115 | datasets = {} 116 | for name in ('train', 'valid'): 117 | is_training = name == 'train' 118 | dataset = EMNIST( 119 | root=root, split=split, train=is_training, download=True, 120 | transform=data_transforms[name]) 121 | loader = DataLoader( 122 | dataset, batch_size=batch_size, num_workers=num_workers) 123 | datasets[name] = {'dataset': dataset, 'loader': loader} 124 | return datasets 125 | 126 | 127 | def random_sample(dataset, n=16): 128 | loader = DataLoader(dataset, batch_size=n, shuffle=True) 129 | return next(iter(loader)) 130 | 131 | 132 | def compute_stats(dataset): 133 | n = len(dataset) // 1000 134 | loader = DataLoader( 135 | dataset, 136 | batch_size=n, 137 | num_workers=cpu_count()) 138 | mean, std, total = 0., 0., 0 139 | for batch, _ in iter(loader): 140 | image = batch.squeeze() 141 | mean += image.mean().item() 142 | std += image.std().item() 143 | total += 1 144 | mean /= total 145 | std /= total 146 | print(mean, std) 147 | 148 | 149 | def show_predictions(images, suptitle='', titles=None, dims=(4, 4), figsize=(12, 12)): 150 | f, ax = plt.subplots(*dims, figsize=figsize) 151 | titles = titles or [] 152 | f.suptitle(suptitle) 153 | [mean], [std] = STATS 154 | images *= mean 155 | images += std 156 | for i, (img, ax) in enumerate(zip(images, ax.flat)): 157 | ax.imshow(img.reshape(28, 28)) 158 | if i < len(titles): 159 | ax.set_title(titles[i]) 160 | plt.show() 161 | 162 | 163 | def to_np(*tensors): 164 | 165 | def convert_to_numpy(obj): 166 | return obj.detach().cpu().numpy() 167 | 168 | if len(tensors) == 1: 169 | return convert_to_numpy(tensors[0]) 170 | return [convert_to_numpy(tensor) for tensor in tensors] 171 | 172 | 173 | def main(): 174 | batch_size = 10000 175 | num_workers = cpu_count() 176 | data_transforms = { 177 | 'train': transforms.Compose([ 178 | transforms.RandomRotation(4), 179 | transforms.RandomAffine(degrees=0, translate=(0.05, 0.05)), 180 | transforms.ToTensor(), 181 | transforms.Normalize(*STATS) 182 | ]), 183 | 'valid': transforms.Compose([ 184 | transforms.ToTensor(), 185 | transforms.Normalize(*STATS) 186 | ]) 187 | } 188 | datasets = load_dataset( 189 | data_transforms, 190 | batch_size=batch_size, 191 | num_workers=num_workers) 192 | 193 | n_samples = len(datasets['train']['loader']) 194 | n_batches = math.ceil(n_samples / batch_size) 195 | 196 | model = ResNet(10) 197 | opt = optim.Adam(model.parameters(), lr=1e-2) 198 | sched = CosineAnnealingLR(opt, T_max=n_batches/4, eta_min=1e-5) 199 | loop = Loop(model, opt, sched, device=DEVICE) 200 | 201 | loop.run(train_data=datasets['train']['loader'], 202 | valid_data=datasets['valid']['loader'], 203 | loss_fn=F.cross_entropy, 204 | metrics=[accuracy], 205 | callbacks=default_callbacks(), 206 | epochs=3) 207 | 208 | best_model = loop['Checkpoint'].best_model 209 | weights = torch.load(best_model) 210 | model.load_state_dict(weights) 211 | x, y = random_sample(datasets['valid']['dataset']) 212 | y_pred = model(x.to(DEVICE)) 213 | valid_acc = accuracy(y_pred, y.to(DEVICE)) 214 | title = f'Validation accuracy: {valid_acc:2.2%}' 215 | titles = [str(x) for x in to_np(y_pred.argmax(dim=1))] 216 | 217 | show_predictions( 218 | images=to_np(x.permute(0, 3, 2, 1)), 219 | suptitle=title, 220 | titles=titles) 221 | 222 | dummy_input = torch.randn(16, 1, 28, 28, requires_grad=True).cuda() 223 | torch.onnx.export(model, dummy_input, 'trivial.onnx', export_params=True) 224 | core_ml_model = convert('digits.onnx') 225 | core_ml_model.save('digits.mlmodel') 226 | print('CoreML model was saved onto disk') 227 | 228 | 229 | if __name__ == '__main__': 230 | main() 231 | -------------------------------------------------------------------------------- /core/utils.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import math 3 | from pathlib import Path 4 | 5 | import numpy as np 6 | from sklearn.preprocessing import LabelBinarizer 7 | 8 | 9 | _labels_sources = {} 10 | 11 | 12 | class LabelledImagesDataset: 13 | 14 | def __new__(cls, labels_from: str='file', **kwargs): 15 | if issubclass(cls, LabelledImagesDataset): 16 | cls = get_dataset(labels_from) 17 | return object.__new__(cls) 18 | 19 | def __init__(self, batch_size: int=32, 20 | image_extensions: str='*.png|*.jpeg|*.tiff|*.bmp', 21 | one_hot: bool=True, transforms=None, **kwargs): 22 | 23 | self.batch_size = batch_size 24 | self.image_extensions = image_extensions 25 | self.one_hot = one_hot 26 | self.transforms = transforms 27 | 28 | # should be initialized in descendant classes 29 | self._uid_to_verbose = None 30 | self._files = None 31 | self._classes = None 32 | self._binarizer = None 33 | self._verbose_classes = None 34 | self._verbose_to_label = None 35 | self._label_to_verbose = None 36 | self._one_hot = None 37 | self.init() 38 | 39 | def init(self): 40 | raise NotImplementedError() 41 | 42 | @property 43 | def n_classes(self): 44 | return len(self._classes) 45 | 46 | @property 47 | def classes(self): 48 | return self._classes 49 | 50 | @property 51 | def verbose_classes(self): 52 | return self._verbose_classes 53 | 54 | def to_label(self, names): 55 | return np.array([self._verbose_to_label[name] for name in names]) 56 | 57 | def to_verbose(self, labels): 58 | return np.array([self._label_to_verbose[label] for label in labels]) 59 | 60 | def __iter__(self): 61 | return SupervisedIterator( 62 | x=self._files, 63 | y=self._one_hot, 64 | batch_size=self.batch_size, 65 | one_hot=self.one_hot, 66 | transforms=self.transforms) 67 | 68 | 69 | class SupervisedIterator: 70 | 71 | def __init__(self, x, y, batch_size, one_hot=True, transforms=None): 72 | self.transforms = transforms 73 | self.batch_size = batch_size 74 | self.one_hot = one_hot 75 | 76 | observations = BatchArrayIterator( 77 | x, batch_size=batch_size, infinite=True) 78 | targets = BatchArrayIterator( 79 | y, batch_size=batch_size, infinite=True) 80 | 81 | self.curr_iter = 0 82 | self.steps_ = observations.n_batches 83 | self.iter_ = zip(observations, targets) 84 | 85 | def __len__(self): 86 | return self.steps_ 87 | 88 | def __iter__(self): 89 | self.curr_iter = 0 90 | return self 91 | 92 | def __next__(self): 93 | if self.curr_iter >= self.steps_: 94 | raise StopIteration() 95 | 96 | x, y = next(self.iter_) 97 | if self.transforms is not None: 98 | for transform in self.transforms: 99 | x, y = transform(x, y) 100 | self.curr_iter += 1 101 | return x, y 102 | 103 | 104 | class _LabelsFromFolderNames(LabelledImagesDataset): 105 | 106 | def __init__(self, root: str, **kwargs): 107 | self.root = root 108 | super().__init__(**kwargs) 109 | 110 | def init(self): 111 | uid_to_verbose = {} 112 | files = [] 113 | 114 | for subdir in Path(self.root).iterdir(): 115 | for filename in subdir.iterdir(): 116 | uid = filename.stem 117 | class_name = subdir.stem 118 | uid_to_verbose[uid] = class_name 119 | files.append(filename) 120 | 121 | string_classes = list(uid_to_verbose.values()) 122 | binarizer = LabelBinarizer() 123 | one_hot = binarizer.fit_transform(string_classes) 124 | numerical_classes = one_hot.argmax(axis=1) 125 | 126 | self._uid_to_verbose = uid_to_verbose 127 | self._files = files 128 | self._classes = np.unique(numerical_classes) 129 | self._binarizer = binarizer 130 | self._verbose_classes = np.unique(string_classes) 131 | self._verbose_to_label = dict(zip(string_classes, numerical_classes)) 132 | self._label_to_verbose = { 133 | v: k for k, v in self._verbose_to_label.items()} 134 | self._one_hot = one_hot 135 | 136 | 137 | def register_source(name, cls): 138 | global _labels_sources 139 | _labels_sources[name] = cls 140 | 141 | 142 | def get_dataset(name): 143 | if name not in _labels_sources: 144 | raise ValueError('dispatcher with name \'%s\' is not found' % name) 145 | return _labels_sources[name] 146 | 147 | 148 | register_source('folders', _LabelsFromFolderNames) 149 | 150 | 151 | def read_labels(filename: str, class_column: str, id_column: str='id', 152 | skip_header: bool=True): 153 | """Reads CSV file with labels. 154 | The file should have at least two columns: the one with unique identifiers 155 | and the another one - with class names. 156 | Args: 157 | filename: Path to file with labels. 158 | class_column: Column with class names. 159 | id_column: Column with unique identifiers. 160 | skip_header: If True, then the first row in the file is ignored. 161 | Returns: 162 | labels: The mapping from ID to verbose label. 163 | """ 164 | path = Path(str(filename)) 165 | 166 | if not path.exists(): 167 | raise ValueError('labels file is not found: %s' % filename) 168 | 169 | with open(path.as_posix()) as file: 170 | reader = csv.DictReader(file, fieldnames=[id_column, class_column]) 171 | if skip_header: 172 | _ = next(reader) 173 | try: 174 | labels = { 175 | strip_exts(row[id_column]): row[class_column] 176 | for row in reader} 177 | except KeyError: 178 | raise ValueError( 179 | 'please check your CSV file to make sure that \'%s\' and ' 180 | '\'%s\' columns exist' % (id_column, class_column)) 181 | else: 182 | return labels 183 | 184 | 185 | def strip_exts(filename, exts=None, strip_all=True): 186 | """Removes specific extensions from file name.""" 187 | 188 | if filename.startswith('.'): 189 | return filename 190 | 191 | if strip_all and exts is None: 192 | basename, *_ = filename.split('.') 193 | return basename 194 | 195 | elif not strip_all and exts is None: 196 | return filename 197 | 198 | list_of_extensions = exts.split('|') if '|' in exts else [exts] 199 | for ext in list_of_extensions: 200 | filename = filename.replace('.%s' % ext, '') 201 | return filename 202 | 203 | 204 | class FilesIterator: 205 | 206 | def __init__(self, folder: str, pattern: str, batch_size: int=32, 207 | infinite: bool=False, same_size_batches: bool=False): 208 | 209 | self.folder = str(folder) 210 | self.pattern = pattern 211 | self.infinite = infinite 212 | self.same_size_batches = same_size_batches 213 | self.batch_size = batch_size 214 | 215 | extensions = pattern.split('|') if '|' in pattern else [pattern] 216 | files = list(glob(self.folder, extensions)) 217 | 218 | self._extensions = extensions 219 | self._files = files 220 | self._n = len(self._files) 221 | self._iter = BatchArrayIterator( 222 | self._files, batch_size=batch_size, infinite=infinite) 223 | 224 | @property 225 | def batch_index(self): 226 | return self._iter.batch_index 227 | 228 | @property 229 | def epoch_index(self): 230 | return self._iter.epoch_index 231 | 232 | @property 233 | def extensions(self): 234 | return self._extensions 235 | 236 | def next(self): 237 | return next(self._iter) 238 | 239 | 240 | class BatchArrayIterator: 241 | """Iterates an array or several arrays in smaller batches. 242 | 243 | Attributes: 244 | batch_size: Size of batch. 245 | infinite: If True, then the iterator doesn't raise StopIteration 246 | exception when the array is completely traversed but restarts the 247 | process again. 248 | same_size_batches: If True and `infinite` attribute is True, then all 249 | the batches yielded by the iterator have the same size even if 250 | the total length of the iterated array is not evenly divided by the 251 | `batch_size`. If the last batch is smaller then `batch_size`, it is 252 | discarded. 253 | 254 | """ 255 | def __init__(self, 256 | array, *arrays, 257 | batch_size: int=32, 258 | infinite: bool=False, 259 | same_size_batches: bool=False): 260 | 261 | if not infinite and same_size_batches: 262 | raise ValueError('Incompatible configuration: cannot guarantee ' 263 | 'same size of batches when yielding finite ' 264 | 'number of files.') 265 | 266 | arrays = _convert_to_arrays(array, *arrays) 267 | 268 | self.arrays = arrays 269 | self.batch_size = batch_size 270 | self.infinite = infinite 271 | self.same_size_batches = same_size_batches 272 | 273 | self._n = _num_of_batches(arrays, batch_size, same_size_batches) 274 | self._batch_index = 0 275 | self._epoch_index = 0 276 | 277 | def __iter__(self): 278 | return self 279 | 280 | def __next__(self): 281 | return self.next() 282 | 283 | @property 284 | def n_batches(self): 285 | return self._n 286 | 287 | @property 288 | def batch_index(self): 289 | return self._batch_index 290 | 291 | @property 292 | def epoch_index(self): 293 | return self._epoch_index 294 | 295 | def next(self): 296 | if self._batch_index >= self._n: 297 | if not self.infinite: 298 | raise StopIteration() 299 | self._batch_index = 0 300 | self._epoch_index += 1 301 | 302 | batches = tuple([self._take_next_batch(arr) for arr in self.arrays]) 303 | self._batch_index += 1 304 | return batches[0] if len(batches) == 1 else batches 305 | 306 | def _take_next_batch(self, array): 307 | start = self._batch_index * self.batch_size 308 | end = (self._batch_index + 1) * self.batch_size 309 | return array[start:end] 310 | 311 | 312 | def _convert_to_arrays(seq, *seqs): 313 | sequences = [seq] + list(seqs) 314 | arrays = [np.asarray(seq) for seq in sequences] 315 | n = len(arrays[0]) 316 | for arr in arrays[1:]: 317 | if len(arr) != n: 318 | raise ValueError('arrays should have the same length') 319 | return arrays 320 | 321 | 322 | def _num_of_batches(arrays, batch_size, same_size): 323 | n = len(arrays[0]) 324 | if same_size: 325 | return n // batch_size 326 | return int(math.ceil(n / batch_size)) 327 | 328 | 329 | def glob(folder, extensions): 330 | for ext in extensions: 331 | for path in Path(folder).glob('*.' + ext): 332 | yield path.as_posix() 333 | -------------------------------------------------------------------------------- /rnn/rnn_plain.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import math 4 | import textwrap 5 | from os.path import expanduser, join 6 | 7 | import numpy as np 8 | 9 | import torch 10 | from torch import nn 11 | from torch import optim 12 | from torchtext.data import Field 13 | from torch.nn import functional as F 14 | from torch.optim.lr_scheduler import _LRScheduler 15 | 16 | 17 | PATH = expanduser(join('~', 'data', 'fastai', 'nietzsche')) 18 | TRAIN_PATH = join(PATH, 'trn', 'train.txt') 19 | VALID_PATH = join(PATH, 'val', 'valid.txt') 20 | 21 | 22 | DEVICE = 'cuda:0' if torch.cuda.is_available() else 'cpu' 23 | 24 | 25 | def create_dataset(bptt, batch_size): 26 | field = Field(lower=True, tokenize=list) 27 | dataset = Dataset(field, min_freq=5) 28 | factory = lambda seq: SequenceIterator(seq, bptt, batch_size) 29 | dataset.build(TRAIN_PATH, VALID_PATH, factory) 30 | return dataset, field 31 | 32 | 33 | class Dataset: 34 | """ 35 | Represents a set of encoded texts prepared for model training and 36 | validation. 37 | """ 38 | def __init__(self, field: Field, min_freq: int=1): 39 | self.field = field 40 | self.min_freq = min_freq 41 | self.subsets = {} 42 | self.vocab_size = None 43 | 44 | def build(self, train: str, valid: str, iterator_factory): 45 | content_per_file = {} 46 | for name, path in [('train', train), ('valid', valid)]: 47 | file_content = [] 48 | new_line = False 49 | with open(path) as file: 50 | for line in file: 51 | if line.endswith('\n'): 52 | new_line = True 53 | if line == '\n': 54 | continue 55 | file_content += self.field.preprocess(line) 56 | if new_line: 57 | file_content.append(' ') 58 | new_line = False 59 | content_per_file[name] = file_content 60 | 61 | train_text = content_per_file['train'] 62 | self.field.build_vocab(train_text, min_freq=self.min_freq) 63 | self.vocab_size = len(self.field.vocab.itos) 64 | 65 | for name, content in content_per_file.items(): 66 | sequence = self.field.numericalize(content) 67 | iterator = iterator_factory(sequence.view(-1)) 68 | self.subsets[name] = iterator 69 | 70 | def __getitem__(self, item): 71 | if item not in self.subsets: 72 | raise ValueError(f'Unexpected dataset name: {item}') 73 | return self.subsets[item] 74 | 75 | 76 | class SequenceIterator: 77 | """ 78 | A simple iterator to convert an array of encoded characters into group of 79 | batches reshaped into format, appropriate for the RNN training process. 80 | """ 81 | def __init__(self, seq, bptt=10, batch_size=64, random_length=True, 82 | flatten_target=True): 83 | 84 | # Converting dataset into batches: 85 | # 1) truncate text length to evenly fit into number of batches 86 | # 2) reshape the text into N (# of batches) * M (batch size) 87 | # 3) transpose to convert into "long" format with fixed number of cols 88 | 89 | n_batches = seq.size(0) // batch_size 90 | truncated = seq[:n_batches * batch_size] 91 | batches = truncated.view(batch_size, -1).t().contiguous() 92 | 93 | self.bptt = bptt 94 | self.batch_size = batch_size 95 | self.random_length = random_length 96 | self.flatten_target = flatten_target 97 | self.batches = batches 98 | self.curr_line = 0 99 | self.curr_iter = 0 100 | self.total_lines = batches.size(0) 101 | self.total_iters = self.total_lines // self.bptt - 1 102 | 103 | @property 104 | def completed(self): 105 | if self.curr_line >= self.total_lines - 1: 106 | return True 107 | if self.curr_iter >= self.total_iters: 108 | return True 109 | return False 110 | 111 | def __iter__(self): 112 | self.curr_line = self.curr_iter = 0 113 | return self 114 | 115 | def __next__(self): 116 | return self.next() 117 | 118 | def next(self): 119 | if self.completed: 120 | raise StopIteration() 121 | seq_len = self.get_sequence_length() 122 | batch = self.get_batch(seq_len) 123 | self.curr_line += seq_len 124 | self.curr_iter += 1 125 | return batch 126 | 127 | def get_sequence_length(self): 128 | """ 129 | Returns a length of sequence taken from the dataset to form a batch. 130 | 131 | By default, this value is based on the value of bptt parameter but 132 | randomized during training process to pick sequences of characters with 133 | a bit different length. 134 | """ 135 | if self.random_length is None: 136 | return self.bptt 137 | bptt = self.bptt 138 | if np.random.random() >= 0.95: 139 | bptt /= 2 140 | seq_len = max(5, int(np.random.normal(bptt, 5))) 141 | return seq_len 142 | 143 | def get_batch(self, seq_len): 144 | """ 145 | Picks training and target batches from the source depending on current 146 | iteration number. 147 | """ 148 | i, source = self.curr_line, self.batches 149 | seq_len = min(seq_len, self.total_lines - 1 - i) 150 | X = source[i:i + seq_len].contiguous() 151 | y = source[(i + 1):(i + 1) + seq_len].contiguous() 152 | if self.flatten_target: 153 | y = y.view(-1) 154 | return X, y 155 | 156 | 157 | class CosineAnnealingLR(_LRScheduler): 158 | """ 159 | A scheduler implementing cosine annealing with restarts and an increasing 160 | period of the decay. 161 | """ 162 | def __init__(self, optimizer, t_max=200, eta_min=0.0005, 163 | cycle_mult=2, last_epoch=-1): 164 | 165 | self.t_max = t_max 166 | self.eta_min = eta_min 167 | self.cycle_mult = cycle_mult 168 | self.cycle_epoch = last_epoch 169 | super().__init__(optimizer, last_epoch) 170 | 171 | def get_lr(self): 172 | self.cycle_epoch += 1 173 | 174 | t_max = self.t_max 175 | eta_min = self.eta_min 176 | t = self.cycle_epoch % t_max 177 | 178 | learning_rates = [] 179 | for lr in self.base_lrs: 180 | delta = lr - eta_min 181 | new_lr = eta_min + delta*(1 + math.cos(math.pi * t/t_max)) / 2 182 | learning_rates.append(new_lr) 183 | 184 | if t == 0: 185 | self.cycle_epoch = 0 186 | self.t_max *= self.cycle_mult 187 | 188 | return learning_rates 189 | 190 | 191 | class RNN(nn.Module): 192 | 193 | def __init__(self, vocab_size, n_factors, batch_size, n_hidden, 194 | n_recurrent=1, architecture=nn.RNN, dropout=0.5, 195 | device=DEVICE): 196 | 197 | self.vocab_size = vocab_size 198 | self.n_hidden = n_hidden 199 | self.n_recurrent = n_recurrent 200 | self.device = device 201 | 202 | super().__init__() 203 | self.embed = nn.Embedding(vocab_size, n_factors) 204 | self.rnn = architecture( 205 | n_factors, n_hidden, 206 | dropout=dropout, num_layers=n_recurrent) 207 | self.out = nn.Linear(n_hidden, vocab_size) 208 | self.hidden_state = self.init_hidden(batch_size).to(device) 209 | self.batch_size = batch_size 210 | self.to(device) 211 | 212 | def forward(self, batch): 213 | bs = batch.size(1) 214 | if bs != self.batch_size: 215 | self.hidden_state = self.init_hidden(bs) 216 | self.batch_size = bs 217 | embeddings = self.embed(batch) 218 | rnn_outputs, h = self.rnn(embeddings, self.hidden_state) 219 | self.hidden_state = truncate_history(h) 220 | linear = self.out(rnn_outputs) 221 | return F.log_softmax(linear, dim=-1).view(-1, self.vocab_size) 222 | 223 | def init_hidden(self, batch_size): 224 | if type(self.rnn) == nn.LSTM: 225 | # an LSTM cell requires two hidden states 226 | h = torch.zeros(2, self.n_recurrent, batch_size, self.n_hidden) 227 | else: 228 | h = torch.zeros(self.n_recurrent, batch_size, self.n_hidden) 229 | return h.to(self.device) 230 | 231 | 232 | def truncate_history(v): 233 | """ 234 | Detaches tensor from its computational history. 235 | """ 236 | if type(v) == torch.Tensor: 237 | return v.detach() 238 | else: 239 | return tuple(truncate_history(x) for x in v) 240 | 241 | 242 | def generate_text(model, field, seed, n=500): 243 | """ 244 | Generates text using trained model and an initial seed. 245 | """ 246 | string = seed 247 | for i in range(n): 248 | indexes = field.numericalize(string) 249 | predictions = model(indexes.transpose(0, 1)) 250 | last_output = predictions[-1] 251 | [most_probable] = torch.multinomial(last_output.exp(), 1) 252 | char = field.vocab.itos[most_probable] 253 | seed = seed[1:] + char 254 | string += char 255 | return string 256 | 257 | 258 | def pretty_print(text, width=80): 259 | print('\n'.join(textwrap.wrap(text, width=width))) 260 | 261 | 262 | def main(): 263 | bs = 64 264 | bptt = 8 265 | n_factors = 50 266 | n_hidden = 256 267 | 268 | dataset, field = create_dataset(bptt, bs) 269 | 270 | # model and optimizer initialization 271 | model = RNN( 272 | vocab_size=dataset.vocab_size, 273 | n_factors=n_factors, 274 | batch_size=bs, 275 | n_hidden=n_hidden, 276 | architecture=nn.LSTM) 277 | optimizer = optim.RMSprop(model.parameters(), lr=1e-3) 278 | sched = CosineAnnealingLR(optimizer, t_max=dataset['train'].total_iters) 279 | 280 | # performance metrics 281 | train_avg_loss = 0 282 | valid_avg_loss = 0 283 | train_batch_num = 0 284 | valid_batch_num = 0 285 | n_epochs = 20 286 | alpha = 0.98 287 | patience = 3 288 | no_improvement = 0 289 | best_loss = np.inf 290 | best_model = None 291 | 292 | for epoch in range(1, n_epochs + 1): 293 | 294 | # update model's parameters 295 | for x, y in dataset['train']: 296 | train_batch_num += 1 297 | sched.step() 298 | optimizer.zero_grad() 299 | loss = F.nll_loss(model(x), y.view(-1)) 300 | loss.backward() 301 | optimizer.step() 302 | train_avg_loss = train_avg_loss*alpha + loss.item()*(1 - alpha) 303 | 304 | # validate performance 305 | for x, y in dataset['valid']: 306 | valid_batch_num += 1 307 | with torch.no_grad(): 308 | loss = F.nll_loss(model(x), y.view(-1)) 309 | valid_avg_loss = valid_avg_loss*alpha + loss.item()*(1 - alpha) 310 | 311 | train_epoch_loss = train_avg_loss / (1 - alpha ** train_batch_num) 312 | valid_epoch_loss = valid_avg_loss / (1 - alpha ** valid_batch_num) 313 | print('Epoch %03d - train: %2.4f - valid: %2.4f' % ( 314 | epoch, train_epoch_loss, valid_epoch_loss 315 | )) 316 | 317 | if valid_epoch_loss >= best_loss: 318 | no_improvement += 1 319 | else: 320 | filename = f'model_{valid_epoch_loss:2.4f}.weights' 321 | torch.save(model.state_dict(), filename) 322 | best_loss = valid_epoch_loss 323 | best_model = filename 324 | no_improvement = 0 325 | 326 | if no_improvement >= patience: 327 | print('Early stopping...') 328 | break 329 | 330 | print('\nGenerated text:') 331 | model.load_state_dict(torch.load(best_model)) 332 | pretty_print(generate_text(model, field, 'For thos')) 333 | 334 | 335 | if __name__ == '__main__': 336 | main() 337 | -------------------------------------------------------------------------------- /rnn/.ipynb_checkpoints/rnn-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Text Classification with RNN\n", 8 | "\n", 9 | "In this notebook, we're going to classify lyrics authors. As a bonus, we'll build our own lyrics generator!" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "## Imports " 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 13, 22 | "metadata": {}, 23 | "outputs": [], 24 | "source": [ 25 | "from pathlib import Path\n", 26 | "\n", 27 | "import pandas as pd" 28 | ] 29 | }, 30 | { 31 | "cell_type": "markdown", 32 | "metadata": {}, 33 | "source": [ 34 | "## Dataset Downloading\n", 35 | "\n", 36 | "The lyrics data we're going to use in this analysis is taken from [azlyrics.com](https://www.azlyrics.com) platform. We use a simple [HTML parser](../azlyrics.py) to collect a small subset of all available texts. \n", 37 | "\n", 38 | "> **Disclaimer:** The license agreement of the azlyrics platform allows to use their data for educational and personal purposes only. All lyrics texts used in this notebook is a property of their owners.\n", 39 | "\n", 40 | "Each song is saved into a separate text file, and the files are gathered into repository with author's name. Also, each author's folder contains a CSV file that maps the song file name (represented as an ordered number) onto original song name.\n", 41 | "\n", 42 | "The folders structure used in this analysis looks like this:" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": 8, 48 | "metadata": {}, 49 | "outputs": [ 50 | { 51 | "name": "stdout", 52 | "output_type": "stream", 53 | "text": [ 54 | "ACDC\r\n", 55 | "Black Sabbath\r\n", 56 | "Creedence Clearwater Revival\r\n", 57 | "Deep Purple\r\n", 58 | "Dio\r\n", 59 | "Grateful Dead\r\n", 60 | "King Crimson\r\n", 61 | "Nazareth\r\n", 62 | "Rainbow\r\n", 63 | "Who\r\n" 64 | ] 65 | } 66 | ], 67 | "source": [ 68 | "!ls -1 ~/data/azlyrics/many" 69 | ] 70 | }, 71 | { 72 | "cell_type": "markdown", 73 | "metadata": {}, 74 | "source": [ 75 | "A single folder contains bunch of enumerated `*.txt` files:" 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": 9, 81 | "metadata": {}, 82 | "outputs": [ 83 | { 84 | "name": "stdout", 85 | "output_type": "stream", 86 | "text": [ 87 | "0.txt\t17.txt\t24.txt\t31.txt\t39.txt\t46.txt\t53.txt\t60.txt\t68.txt\r\n", 88 | "10.txt\t18.txt\t25.txt\t32.txt\t3.txt\t47.txt\t54.txt\t61.txt\t69.txt\r\n", 89 | "11.txt\t19.txt\t26.txt\t33.txt\t40.txt\t48.txt\t55.txt\t62.txt\t6.txt\r\n", 90 | "12.txt\t1.txt\t27.txt\t34.txt\t41.txt\t49.txt\t56.txt\t63.txt\t7.txt\r\n", 91 | "13.txt\t20.txt\t28.txt\t35.txt\t42.txt\t4.txt\t57.txt\t64.txt\t8.txt\r\n", 92 | "14.txt\t21.txt\t29.txt\t36.txt\t43.txt\t50.txt\t58.txt\t65.txt\t9.txt\r\n", 93 | "15.txt\t22.txt\t2.txt\t37.txt\t44.txt\t51.txt\t59.txt\t66.txt\tsongs.csv\r\n", 94 | "16.txt\t23.txt\t30.txt\t38.txt\t45.txt\t52.txt\t5.txt\t67.txt\r\n" 95 | ] 96 | } 97 | ], 98 | "source": [ 99 | "!ls ~/data/azlyrics/many/Rainbow" 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": 16, 105 | "metadata": {}, 106 | "outputs": [], 107 | "source": [ 108 | "PATH = Path.home() / 'data' / 'azlyrics' / 'many'" 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": 51, 114 | "metadata": {}, 115 | "outputs": [], 116 | "source": [ 117 | "def get_songs(author):\n", 118 | " \"\"\"Gets list of songs for a specifc author\"\"\"\n", 119 | " \n", 120 | " records = []\n", 121 | " with open(PATH.joinpath(author, 'songs.csv')) as file:\n", 122 | " for line in file:\n", 123 | " order, _, header = line.strip().partition(',')\n", 124 | " order = int(order)\n", 125 | " record = {'index': order, 'song': header}\n", 126 | " with open(PATH.joinpath(author, f'{order}.txt')) as lyrics:\n", 127 | " record['text'] = lyrics.read()\n", 128 | " records.append(record)\n", 129 | " return pd.DataFrame(records).set_index('index')" 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": 53, 135 | "metadata": {}, 136 | "outputs": [], 137 | "source": [ 138 | "dio_songs = get_songs('Dio')" 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "execution_count": 54, 144 | "metadata": {}, 145 | "outputs": [ 146 | { 147 | "data": { 148 | "text/html": [ 149 | "
\n", 150 | "\n", 163 | "\n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | "
songtext
index
0Stand Up And ShoutIt's the same old song\\nyou gotta be somewhere...
1Holy DiverHoly Diver\\nYou've been down too long in the m...
2GypsyYeah gypsy\\nshe was straight from home\\nbut yo...
3Caught In The MiddleLooking inside of yourself\\nyou might see some...
4Don't Talk To StrangersDon't talk to strangers hmm hmm hmm hmm hmm hm...
\n", 204 | "
" 205 | ], 206 | "text/plain": [ 207 | " song \\\n", 208 | "index \n", 209 | "0 Stand Up And Shout \n", 210 | "1 Holy Diver \n", 211 | "2 Gypsy \n", 212 | "3 Caught In The Middle \n", 213 | "4 Don't Talk To Strangers \n", 214 | "\n", 215 | " text \n", 216 | "index \n", 217 | "0 It's the same old song\\nyou gotta be somewhere... \n", 218 | "1 Holy Diver\\nYou've been down too long in the m... \n", 219 | "2 Yeah gypsy\\nshe was straight from home\\nbut yo... \n", 220 | "3 Looking inside of yourself\\nyou might see some... \n", 221 | "4 Don't talk to strangers hmm hmm hmm hmm hmm hm... " 222 | ] 223 | }, 224 | "execution_count": 54, 225 | "metadata": {}, 226 | "output_type": "execute_result" 227 | } 228 | ], 229 | "source": [ 230 | "dio_songs.head()" 231 | ] 232 | }, 233 | { 234 | "cell_type": "code", 235 | "execution_count": 60, 236 | "metadata": {}, 237 | "outputs": [ 238 | { 239 | "name": "stdout", 240 | "output_type": "stream", 241 | "text": [ 242 | "It's the same old song\n", 243 | "you gotta be somewhere at sometime\n", 244 | "and they'll never let you fly\n", 245 | "It's like broken glass\n", 246 | "you get cut before you see it\n", 247 | "so open up your eyes\n", 248 | "\n", 249 | "You've got desire\n", 250 | "so let it out\n", 251 | "you've got the power\n", 252 | "stand up and shout\n", 253 | "shout, shout, stand up and shout\n", 254 | "\n", 255 | "You got wings of steel\n", 256 | "but they never really move you\n", 257 | "you only seem to crawl\n", 258 | "You've been nailed to the wheel\n", 259 | "but never really turning\n", 260 | "you know you've got to work it out\n", 261 | "\n", 262 | "You've got desire\n", 263 | "so let it out\n", 264 | "you've got the power\n", 265 | "stand up and shout\n", 266 | "shout, shout, stand up and shout\n", 267 | "\n", 268 | "Let it out\n", 269 | "\n", 270 | "You are the strongest chain\n", 271 | "and you're not just some reflection\n", 272 | "so never hide again\n", 273 | "You are the driver\n", 274 | "you own the road\n", 275 | "you are the fire -- go on, explode\n", 276 | "\n", 277 | "Let it out\n", 278 | "\n", 279 | "Stand up and shout\n", 280 | "\n" 281 | ] 282 | } 283 | ], 284 | "source": [ 285 | "print(dio_songs.loc[0].text)" 286 | ] 287 | }, 288 | { 289 | "cell_type": "markdown", 290 | "metadata": {}, 291 | "source": [ 292 | "## Dataset Preparation" 293 | ] 294 | }, 295 | { 296 | "cell_type": "code", 297 | "execution_count": null, 298 | "metadata": {}, 299 | "outputs": [], 300 | "source": [] 301 | }, 302 | { 303 | "cell_type": "code", 304 | "execution_count": 1, 305 | "metadata": {}, 306 | "outputs": [], 307 | "source": [ 308 | "from os.path import join, expanduser, exists\n", 309 | "from urllib.error import URLError\n", 310 | "from urllib.request import urlopen" 311 | ] 312 | }, 313 | { 314 | "cell_type": "code", 315 | "execution_count": 2, 316 | "metadata": {}, 317 | "outputs": [], 318 | "source": [ 319 | "import numpy as np" 320 | ] 321 | }, 322 | { 323 | "cell_type": "code", 324 | "execution_count": 3, 325 | "metadata": {}, 326 | "outputs": [], 327 | "source": [ 328 | "import torch\n", 329 | "from torch import nn\n", 330 | "from torch import optim\n", 331 | "from torch.nn import functional as F\n", 332 | "from torchtext import vocab, data" 333 | ] 334 | }, 335 | { 336 | "cell_type": "code", 337 | "execution_count": 4, 338 | "metadata": {}, 339 | "outputs": [], 340 | "source": [ 341 | "PATH = expanduser(join('~', 'data', 'fastai', 'nietzsche', 'nietzsche.txt'))" 342 | ] 343 | }, 344 | { 345 | "cell_type": "code", 346 | "execution_count": 5, 347 | "metadata": {}, 348 | "outputs": [], 349 | "source": [ 350 | "def set_random_seed(state=1):\n", 351 | " gens = (np.random.seed, torch.manual_seed, torch.cuda.manual_seed)\n", 352 | " for set_state in gens:\n", 353 | " set_state(state)" 354 | ] 355 | }, 356 | { 357 | "cell_type": "code", 358 | "execution_count": 6, 359 | "metadata": {}, 360 | "outputs": [], 361 | "source": [ 362 | "RANDOM_STATE = 1\n", 363 | "set_random_seed(RANDOM_STATE)" 364 | ] 365 | }, 366 | { 367 | "cell_type": "markdown", 368 | "metadata": {}, 369 | "source": [ 370 | "## Dataset Downloading" 371 | ] 372 | }, 373 | { 374 | "cell_type": "code", 375 | "execution_count": 7, 376 | "metadata": {}, 377 | "outputs": [], 378 | "source": [ 379 | "def download(url, download_path, expected_size):\n", 380 | " if exists(download_path):\n", 381 | " print('The file was already downloaded')\n", 382 | " return\n", 383 | " \n", 384 | " try:\n", 385 | " r = urlopen(url)\n", 386 | " except URLError as e:\n", 387 | " print(f'Cannot download the data. Error: {e}')\n", 388 | " return\n", 389 | " \n", 390 | " if r.status != 200:\n", 391 | " print(f'HTTP Error: {r.status}')\n", 392 | " return\n", 393 | " \n", 394 | " data = r.read()\n", 395 | " if len(data) != expected_size:\n", 396 | " print(f'Invalid downloaded array size: {len(data)}')\n", 397 | " return\n", 398 | " \n", 399 | " text = data.decode(encoding='utf-8')\n", 400 | " with open(download_path, 'w') as file:\n", 401 | " file.write(text)\n", 402 | " \n", 403 | " print(f'Downloaded: {download_path}')" 404 | ] 405 | }, 406 | { 407 | "cell_type": "code", 408 | "execution_count": 8, 409 | "metadata": {}, 410 | "outputs": [], 411 | "source": [ 412 | "URL = 'https://s3.amazonaws.com/text-datasets/nietzsche.txt'" 413 | ] 414 | }, 415 | { 416 | "cell_type": "code", 417 | "execution_count": 9, 418 | "metadata": {}, 419 | "outputs": [ 420 | { 421 | "name": "stdout", 422 | "output_type": "stream", 423 | "text": [ 424 | "The file was already downloaded\n" 425 | ] 426 | } 427 | ], 428 | "source": [ 429 | "download(URL, PATH, 600901)" 430 | ] 431 | }, 432 | { 433 | "cell_type": "code", 434 | "execution_count": 10, 435 | "metadata": {}, 436 | "outputs": [], 437 | "source": [ 438 | "def split(path, train_size=0.8):\n", 439 | " with open(path) as file:\n", 440 | " content = file.read()\n", 441 | " n = int(len(content) * train_size)\n", 442 | " return content[:n], content[n:]" 443 | ] 444 | }, 445 | { 446 | "cell_type": "code", 447 | "execution_count": 11, 448 | "metadata": {}, 449 | "outputs": [ 450 | { 451 | "name": "stdout", 452 | "output_type": "stream", 453 | "text": [ 454 | "480714\n", 455 | "120179\n" 456 | ] 457 | } 458 | ], 459 | "source": [ 460 | "train_text, valid_text = split(PATH)\n", 461 | "print(len(train_text))\n", 462 | "print(len(valid_text))" 463 | ] 464 | }, 465 | { 466 | "cell_type": "code", 467 | "execution_count": 12, 468 | "metadata": {}, 469 | "outputs": [ 470 | { 471 | "name": "stdout", 472 | "output_type": "stream", 473 | "text": [ 474 | "Vocab size: 85\n" 475 | ] 476 | } 477 | ], 478 | "source": [ 479 | "text = train_text + valid_text\n", 480 | "chars = sorted(list(set(text)))\n", 481 | "vocab_size = len(chars) + 1\n", 482 | "print(f'Vocab size: {vocab_size}')" 483 | ] 484 | }, 485 | { 486 | "cell_type": "code", 487 | "execution_count": 13, 488 | "metadata": {}, 489 | "outputs": [], 490 | "source": [ 491 | "chars.insert(0, '\\0')" 492 | ] 493 | }, 494 | { 495 | "cell_type": "code", 496 | "execution_count": 14, 497 | "metadata": {}, 498 | "outputs": [], 499 | "source": [ 500 | "char_to_index = {c: i for i, c in enumerate(chars)}\n", 501 | "index_to_char = {i: c for i, c in enumerate(chars)}\n", 502 | "train_indicies = [char_to_index[char] for char in train_text]\n", 503 | "valid_indicies = [char_to_index[char] for char in valid_text]" 504 | ] 505 | }, 506 | { 507 | "cell_type": "markdown", 508 | "metadata": {}, 509 | "source": [ 510 | "## Dataset Preparation" 511 | ] 512 | }, 513 | { 514 | "cell_type": "code", 515 | "execution_count": null, 516 | "metadata": {}, 517 | "outputs": [], 518 | "source": [] 519 | } 520 | ], 521 | "metadata": { 522 | "kernelspec": { 523 | "display_name": "Python 3", 524 | "language": "python", 525 | "name": "python3" 526 | }, 527 | "language_info": { 528 | "codemirror_mode": { 529 | "name": "ipython", 530 | "version": 3 531 | }, 532 | "file_extension": ".py", 533 | "mimetype": "text/x-python", 534 | "name": "python", 535 | "nbconvert_exporter": "python", 536 | "pygments_lexer": "ipython3", 537 | "version": "3.6.4" 538 | } 539 | }, 540 | "nbformat": 4, 541 | "nbformat_minor": 2 542 | } 543 | -------------------------------------------------------------------------------- /rnn/rnn_classify.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | import argparse 3 | from textwrap import wrap 4 | from pathlib import Path 5 | from collections import Counter, defaultdict 6 | from multiprocessing import Pool, cpu_count 7 | 8 | import numpy as np 9 | 10 | import spacy 11 | from spacy.symbols import ORTH 12 | 13 | import torch 14 | from torch import nn 15 | from torch import optim 16 | from torch.nn import functional as F 17 | from torch.utils.data import Dataset 18 | 19 | from rules import default_rules 20 | from core.loop import Loop 21 | from core.metrics import accuracy 22 | from core.schedule import CosineAnnealingLR 23 | from core.callbacks import default_callbacks 24 | 25 | 26 | IMDB = Path.home() / 'data' / 'aclImdb' 27 | TRAIN_PATH = IMDB / 'train' 28 | TEST_PATH = IMDB / 'test' 29 | CLASSES = ['neg', 'pos', 'unsup'] 30 | 31 | BOS, FLD, UNK, PAD = SPECIAL_TOKENS = 'xxbox', 'xxfld', 'xxunk', 'xxpad' 32 | 33 | 34 | def main(): 35 | datasets = create_or_restore(IMDB) 36 | train_data = datasets['train_unsup'] 37 | test_data = datasets['test_unsup'] 38 | 39 | bs = 50 40 | bptt = 70 41 | 42 | train = SequenceIterator(to_sequence(train_data), bptt, bs) 43 | valid = SequenceIterator(to_sequence(test_data), bptt, bs) 44 | 45 | lm = LanguageModel( 46 | vocab_sz=train_data.vocab.size, 47 | embed_sz=400, n_hidden=1150) 48 | 49 | dev = device(force_cpu=True) if args.use_cpu else device(args.cuda) 50 | print('Selected device: %s' % dev) 51 | 52 | opt = optim.Adam( 53 | lm.parameters(), lr=1e-3, weight_decay=1e-7, betas=(0.8, 0.99)) 54 | cycle_length = len(train_data) // bs 55 | sched = CosineAnnealingLR(opt, t_max=cycle_length, cycle_mult=1, eta_min=1e-5) 56 | loop = Loop(lm, opt, sched, device=dev) 57 | 58 | loop.run(train_data=train, valid_data=valid, 59 | loss_fn=F.cross_entropy, 60 | metrics=[accuracy], 61 | callbacks=default_callbacks()) 62 | 63 | best_model = loop['Checkpoint'].best_model 64 | print('Best model: %s' % best_model) 65 | with open('best', 'w') as file: 66 | file.write(best_model + '\n') 67 | 68 | 69 | def parse_args(): 70 | argparse.ArgumentParser() 71 | 72 | 73 | def create_or_restore(path: Path): 74 | """Prepared IMDB datasets from raw files, or loads previously saved objects 75 | into memory. 76 | """ 77 | datasets_dir = path / 'datasets' 78 | 79 | if datasets_dir.exists(): 80 | print('Loading data from %s' % datasets_dir) 81 | datasets = {} 82 | for filename in datasets_dir.glob('*.pickle'): 83 | datasets[filename.stem] = ImdbDataset.load(filename) 84 | 85 | else: 86 | print('Creating folder %s' % datasets_dir) 87 | datasets_dir.mkdir(parents=True) 88 | 89 | print('Preparing datasets...') 90 | 91 | train_sup = ImdbDataset( 92 | IMDB, supervised=True, train=True, 93 | tokenizer=tokenize_in_parallel, 94 | make_vocab=Vocab.make_vocab) 95 | 96 | test_sup = ImdbDataset( 97 | IMDB, supervised=True, train=False, 98 | tokenizer=tokenize_in_parallel, 99 | vocab=train_sup.vocab) 100 | 101 | train_unsup = ImdbDataset( 102 | IMDB, supervised=False, train=True, 103 | tokenizer=tokenize_in_parallel, 104 | make_vocab=Vocab.make_vocab) 105 | 106 | test_unsup = ImdbDataset( 107 | IMDB, supervised=False, train=False, 108 | tokenizer=tokenize_in_parallel, 109 | vocab=train_unsup.vocab) 110 | 111 | datasets = { 112 | 'train_sup': train_sup, 113 | 'test_sup': test_sup, 114 | 'train_unsup': train_unsup, 115 | 'test_unsup': test_unsup 116 | } 117 | 118 | for name, dataset in datasets.items(): 119 | print(f'Saving dataset {name}') 120 | dataset.save(datasets_dir / f'{name}.pickle') 121 | 122 | for name, dataset in datasets.items(): 123 | print(f'{name} vocab size: {dataset.vocab.size}') 124 | 125 | return datasets 126 | 127 | 128 | class ImdbDataset(Dataset): 129 | """Represents the IMDB movie reviews dataset. 130 | 131 | The dataset contains 50000 supervised, and 50000 unsupervised movie reviews 132 | with positive and negative sentiment ratings. The supervised subset of data 133 | is separated into two equally sized sets, with 12500 instances per class. 134 | 135 | The two flags, `supervised` and `train` define which subset of the data 136 | we're going to load. There are four possible cases: 137 | 138 | +-------+------------+--------+-------+---------+ 139 | | Train | Supervised | Folder | Size | Labels? | 140 | +-------+------------+--------+-------+---------+ 141 | | True | True | train | 25000 | Yes | 142 | | False | True | test | 25000 | Yes | 143 | | True | False | train | 75000 | No | 144 | | False | False | test | 25000 | No | 145 | +-------+------------+--------+-------+---------+ 146 | """ 147 | def __init__(self, root: Path, train=True, supervised=False, 148 | tokenizer=None, vocab=None, make_vocab=None): 149 | """ 150 | Args: 151 | root: Path to the folder with train and tests subfolders. 152 | supervised: If True, then the data from supervised subset is loaded. 153 | train: If True, then the data from training subset is loaded. 154 | vocab: Dataset vocab used to convert tokens into digits. 155 | make_vocab: Callable creating vocab from tokens. Note that this 156 | parameter should be provided in case if `vocab` doesn't present. 157 | 158 | """ 159 | assert vocab or make_vocab, 'Nor vocabulary, not function provided' 160 | 161 | self.root = root 162 | self.train = train 163 | self.supervised = supervised 164 | 165 | subfolder = root / ('train' if train else 'test') 166 | if tokenizer is None: 167 | tokenizer = lambda x: x 168 | 169 | if supervised: 170 | texts, labels = [], [] 171 | for index, label in enumerate(CLASSES): 172 | if label == 'unsup': 173 | continue 174 | for filename in (subfolder/label).glob('*.txt'): 175 | texts.append(filename.open('r').read()) 176 | labels.append(index) 177 | if train: 178 | self.train_labels = labels 179 | else: 180 | self.test_labels = labels 181 | 182 | else: 183 | texts = [] 184 | for label in CLASSES: 185 | files_folder = subfolder/label 186 | for filename in files_folder.glob('*.txt'): 187 | texts.append(filename.open('r').read()) 188 | 189 | tokens = tokenizer(texts) 190 | if make_vocab: 191 | vocab = make_vocab(tokens) 192 | num_tokens = vocab.numericalize(tokens) 193 | 194 | self.vocab = vocab 195 | if train: 196 | self.train_data = num_tokens 197 | else: 198 | self.test_data = num_tokens 199 | 200 | def __getitem__(self, index): 201 | if self.train and self.supervised: 202 | return self.train_data[index], self.train_labels[index] 203 | elif self.train and not self.supervised: 204 | return self.train_data[index] 205 | elif not self.train and self.supervised: 206 | return self.test_data[index], self.test_labels[index] 207 | else: 208 | return self.test_data[index] 209 | 210 | def __len__(self): 211 | return len(self.train_data if self.train else self.test_data) 212 | 213 | def save(self, path): 214 | with path.open('wb') as file: 215 | pickle.dump(self, file) 216 | 217 | @staticmethod 218 | def load(path): 219 | with path.open('rb') as file: 220 | dataset = pickle.load(file) 221 | return dataset 222 | 223 | 224 | class SpacyTokenizer: 225 | """A thin wrapper on top of Spacy tokenization tools.""" 226 | 227 | def __init__(self, lang='en', rules=default_rules, special_tokens=SPECIAL_TOKENS): 228 | tokenizer = spacy.load(lang).tokenizer 229 | if special_tokens: 230 | for token in special_tokens: 231 | tokenizer.add_special_case(token, [{ORTH: token}]) 232 | 233 | self.rules = rules or [] 234 | self.tokenizer = tokenizer 235 | 236 | def tokenize(self, text: str): 237 | """Converts a single string into list of tokens.""" 238 | 239 | for rule in self.rules: 240 | text = rule(text) 241 | return [t.text for t in self.tokenizer(text)] 242 | 243 | 244 | def tokenize_in_parallel(texts): 245 | n_workers = cpu_count() 246 | parts = split_into(texts, len(texts)//n_workers + 1) 247 | with Pool(n_workers) as pool: 248 | results = pool.map(tokenize, parts) 249 | return sum(results, []) 250 | 251 | 252 | def tokenize(texts): 253 | tokenizer = SpacyTokenizer() 254 | return [tokenizer.tokenize(text) for text in texts] 255 | 256 | 257 | def split_into(arr, n): 258 | return [arr[i:i + n] for i in range(0, len(arr), n)] 259 | 260 | 261 | class Vocab: 262 | 263 | def __init__(self, itos): 264 | self.itos = itos 265 | self.stoi = defaultdict(int, {v: k for k, v in enumerate(itos)}) 266 | self.size = len(itos) 267 | 268 | def __eq__(self, other): 269 | if not isinstance(other, Vocab): 270 | raise TypeError( 271 | 'can only compare with another Vocab instance, ' 272 | 'got %s' % type(other)) 273 | return self.itos == other.itos 274 | 275 | def save(self, path: Path): 276 | with path.open('wb') as file: 277 | pickle.dump(self.itos, file) 278 | 279 | @staticmethod 280 | def load(path: Path) -> 'Vocab': 281 | with path.open('rb') as file: 282 | itos = pickle.load(file) 283 | return Vocab(itos) 284 | 285 | @staticmethod 286 | def make_vocab(tokens, min_freq: int=3, max_vocab: int=60000, pad=PAD, unknown=UNK) -> 'Vocab': 287 | freq = Counter(token for sentence in tokens for token in sentence) 288 | most_common = freq.most_common(max_vocab) 289 | itos = [token for token, count in most_common if count > min_freq] 290 | itos.insert(0, pad) 291 | if unknown in itos: 292 | itos.remove(unknown) 293 | itos.insert(0, unknown) 294 | return Vocab(itos) 295 | 296 | def numericalize(self, texts): 297 | return [ 298 | np.array([self.stoi[token] for token in text], dtype=np.int) 299 | for text in texts] 300 | 301 | def textify_all(self, samples): 302 | return [self.textify(sample) for sample in samples] 303 | 304 | def textify(self, tokens): 305 | return ' '.join([self.itos[number] for number in tokens]) 306 | 307 | 308 | def compact_print(string): 309 | print('\n'.join(wrap(string, width=80))) 310 | 311 | 312 | 313 | class SequenceIterator: 314 | """A wrapper on top of IMDB dataset that converts numericalized 315 | observations into format, suitable to train a language model. 316 | 317 | To train a language model, one needs to convert an unsupervised dataset 318 | into two 2D arrays with tokens. The first array contains "previous" words, 319 | and the second one - "next" words. Each "previous" word is used to predict 320 | the "next" one. Therefore, we're getting a supervised training task. 321 | """ 322 | def __init__(self, seq, bptt=10, split_size=64, random_length=True, 323 | flatten_target=True): 324 | 325 | n_batches = seq.shape[0] // split_size 326 | truncated = seq[:n_batches * split_size] 327 | batches = truncated.view(split_size, -1).t().contiguous() 328 | 329 | self.bptt = bptt 330 | self.split_size = split_size 331 | self.random_length = random_length 332 | self.flatten_target = flatten_target 333 | self.batches = batches 334 | self.curr_iter = 0 335 | self.curr_line = 0 336 | self.total_lines = batches.shape[0] 337 | self.total_iters = self.total_lines // self.bptt - 1 338 | 339 | @property 340 | def completed(self): 341 | if self.curr_line >= self.total_lines - 1: 342 | return True 343 | if self.curr_iter >= self.total_iters: 344 | return True 345 | return False 346 | 347 | def __iter__(self): 348 | self.curr_line = self.curr_iter = 0 349 | return self 350 | 351 | def __next__(self): 352 | return self.next() 353 | 354 | def next(self): 355 | if self.completed: 356 | raise StopIteration() 357 | seq_len = self.get_sequence_length() 358 | batch = self.get_batch(seq_len) 359 | self.curr_line += seq_len 360 | self.curr_iter += 1 361 | return batch 362 | 363 | def get_sequence_length(self): 364 | """ 365 | Returns a length of sequence taken from the dataset to form a batch. 366 | 367 | By default, this value is based on the value of bptt parameter but 368 | randomized during training process to pick sequences of characters with 369 | a bit different length. 370 | """ 371 | if self.random_length is None: 372 | return self.bptt 373 | bptt = self.bptt 374 | if np.random.random() >= 0.95: 375 | bptt /= 2 376 | seq_len = max(5, int(np.random.normal(bptt, 5))) 377 | return seq_len 378 | 379 | def get_batch(self, seq_len): 380 | """ 381 | Picks training and target batches from the source depending on current 382 | iteration number. 383 | """ 384 | i, source = self.curr_line, self.batches 385 | seq_len = min(seq_len, self.total_lines - 1 - i) 386 | X = source[i:i + seq_len].contiguous() 387 | y = source[(i + 1):(i + 1) + seq_len].contiguous() 388 | if self.flatten_target: 389 | y = y.view(-1) 390 | return X, y 391 | 392 | 393 | def to_sequence(dataset): 394 | seq = concat(dataset.train_data if dataset.train else dataset.test_data) 395 | return torch.LongTensor(seq) 396 | 397 | 398 | def concat(arrays): 399 | seq = [] 400 | dtype = arrays[0].dtype 401 | for arr in arrays: 402 | seq.extend(arr.tolist()) 403 | return np.array(seq, dtype=dtype) 404 | 405 | 406 | def to_np(tensor): 407 | return tensor.detach().cpu().numpy() 408 | 409 | 410 | class RNNCore(nn.Module): 411 | 412 | init_range = 0.1 413 | 414 | def __init__(self, vocab_sz: int, embed_sz: int, n_hidden: int, 415 | n_layers: int, pad_idx: int): 416 | 417 | def get_size(index): 418 | """Returns RNN cell input and hidden size depending on its position 419 | in the network. 420 | """ 421 | if index == 0: 422 | return embed_sz, n_hidden 423 | elif index == n_layers - 1: 424 | return n_hidden, embed_sz 425 | return n_hidden, n_hidden 426 | 427 | 428 | def create_lstm(): 429 | return [nn.LSTM(*get_size(l), 1) for l in range(n_layers)] 430 | 431 | 432 | super().__init__() 433 | self.encoder = nn.Embedding(vocab_sz, embed_sz, padding_idx=pad_idx) 434 | self.rnns = nn.ModuleList(create_lstm()) 435 | 436 | self.hidden_sizes = [layer.hidden_size for layer in self.rnns] 437 | self.embed_sz = embed_sz 438 | self.n_hidden = n_hidden 439 | self.n_layers = n_layers 440 | self.bs = None 441 | self.hidden = None 442 | self.weights = None 443 | self._init() 444 | 445 | @property 446 | def output_size(self): 447 | return self.hidden_sizes[-1] 448 | 449 | def forward(self, tensor): 450 | seq_len, bs = tensor.size() 451 | if bs != self.bs: 452 | self.bs = bs 453 | self.create_hidden() 454 | 455 | raw_output = self.encoder(tensor) 456 | raw_outputs, new_hidden = [], [] 457 | for index, rnn in enumerate(self.rnns): 458 | raw_output, new_h = rnn(raw_output, self.hidden[index]) 459 | new_hidden.append(new_h) 460 | raw_outputs.append(raw_output) 461 | self.hidden = truncate_history(new_hidden) 462 | return raw_outputs 463 | 464 | def reset(self): 465 | [r.reset() for r in self.rnns if hasattr(r, 'reset')] 466 | 467 | def create_hidden(self): 468 | self.reset() 469 | self.weights = next(self.parameters()).data 470 | self.hidden = [ 471 | (self._hidden(sz), self._hidden(sz)) 472 | for sz in self.hidden_sizes] 473 | 474 | def _hidden(self, sz): 475 | return self.weights.new(1, self.bs, sz).zero_() 476 | 477 | def _init(self): 478 | a = self.init_range 479 | self.encoder.weight.data.uniform_(-a, a) 480 | 481 | 482 | class WeightDropout(nn.Module): 483 | 484 | def __init__(self, module: nn.Module, weight_p: float, 485 | layer_names=('weight_hh_10')): 486 | 487 | super().__init__() 488 | self.module = module 489 | self.weight_p = weight_p 490 | self.layer_names = layer_names 491 | 492 | for layer in self.layer_names: 493 | w = getattr(self.module, layer) 494 | self.register_parameter(f'{layer}_raw', nn.Parameter(w.data)) 495 | 496 | def forward(self, *tensors): 497 | self._set_weights() 498 | return self.module.forward(*tensors) 499 | 500 | def reset(self): 501 | for layer in self.layer_names: 502 | raw_w = getattr(self, f'{layer}_raw') 503 | self.module._parameters[layer] = F.dropout( 504 | raw_w, p=self.weight_p, training=False) 505 | if hasattr(self.module, 'reset'): 506 | self.module.reset() 507 | 508 | def _set_weights(self): 509 | for layer in self.layer_names: 510 | raw_w = getattr(self, f'{layer}_raw') 511 | self.module._parameters[layer] = F.dropout( 512 | raw_w, p=self.weight_p, training=self.training) 513 | 514 | 515 | class LanguageModel(nn.Module): 516 | """A RNN-based model predicting next word from the previous one.""" 517 | 518 | init_range = 0.1 519 | 520 | def __init__(self, vocab_sz: int, embed_sz: int, n_hidden: int=1000, 521 | n_layers: int=3, bias: bool=True, padding_idx=1): 522 | 523 | super().__init__() 524 | self.rnn = RNNCore(vocab_sz, embed_sz, n_hidden, n_layers, padding_idx) 525 | self.decoder = nn.Linear(self.rnn.output_size, vocab_sz, bias=bias) 526 | self._init(bias) 527 | 528 | def forward(self, tensor): 529 | raw_outputs = self.rnn.forward(tensor) 530 | last = raw_outputs[-1] 531 | input_shape = last.size(0)*last.size(1), last.size(2) 532 | decoded = self.decoder(last.view(input_shape)) 533 | return decoded 534 | 535 | def _init(self, bias): 536 | a = self.init_range 537 | self.decoder.weight.data.uniform_(-a, a) 538 | if bias: 539 | self.decoder.bias.data.zero_() 540 | 541 | 542 | def truncate_history(v): 543 | """ 544 | Detaches tensor from its computational history. 545 | """ 546 | if type(v) == torch.Tensor: 547 | return v.detach() 548 | else: 549 | return tuple(truncate_history(x) for x in v) 550 | 551 | 552 | def device(i=0, force_cpu=True): 553 | name = f'cuda:{i}' if torch.cuda.is_available() else 'cpu' 554 | if force_cpu: 555 | name = 'cpu' 556 | return torch.device(name) 557 | 558 | 559 | if __name__ == '__main__': 560 | main() 561 | --------------------------------------------------------------------------------