├── day_2 ├── datautils │ ├── __init__.py │ ├── serve.py │ └── vocabulary.py ├── 0_favorite_practices_and_debugging.ipynb └── 01_Trump_Tweet_LM.ipynb ├── requirements.txt ├── .gitignore ├── README.md └── day_1 ├── 0_Using_Pretrained_Embeddings.ipynb ├── figures └── intro_to_pytorch │ ├── computational_graph_forward.svg │ ├── pytorch_variable.svg │ └── computational_graph_backward.svg ├── 1_PyTorch_basics.ipynb └── 4_Chinese_document_classification.ipynb /day_2/datautils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | pandas 2 | nltk 3 | annoy 4 | seaborn 5 | numpy 6 | matplotlib 7 | scikit-learn 8 | tqdm 9 | -------------------------------------------------------------------------------- /day_2/datautils/serve.py: -------------------------------------------------------------------------------- 1 | 2 | from torch.utils.data import DataLoader 3 | from torch.autograd import Variable 4 | 5 | 6 | class DataServer(object): 7 | def __init__(self, vectorized_data): 8 | self.vectorized_data = vectorized_data 9 | self.gpu_mode = False 10 | self.volatile_mode = False 11 | 12 | def serve_batches(self, batch_size, num_batches=-1, num_workers=0): 13 | datagen = DataLoader(self.vectorized_data, batch_size=batch_size, 14 | shuffle=True, num_workers=num_workers) 15 | for batch_index, batch in enumerate(datagen): 16 | out = {} 17 | for key, val in batch.items(): 18 | if not isinstance(val, Variable): 19 | val = Variable(val) 20 | if self.gpu_mode: 21 | val = val.cuda() 22 | if self.volatile_mode: 23 | val = val.volatile() 24 | out[key] = val 25 | 26 | yield out 27 | if num_batches > 0 and batch_index > num_batches: 28 | break 29 | 30 | def enable_gpu_mode(self): 31 | self.gpu_mode = True 32 | 33 | def disable_gpu_mode(self): 34 | self.gpu_mode = False 35 | 36 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | day_1/data/* 2 | # Byte-compiled / optimized / DLL files 3 | __pycache__/ 4 | *.py[cod] 5 | *$py.class 6 | 7 | # C extensions 8 | *.so 9 | 10 | # Distribution / packaging 11 | .Python 12 | env/ 13 | build/ 14 | develop-eggs/ 15 | dist/ 16 | downloads/ 17 | eggs/ 18 | .eggs/ 19 | lib/ 20 | lib64/ 21 | parts/ 22 | sdist/ 23 | var/ 24 | wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .coverage 43 | .coverage.* 44 | .cache 45 | nosetests.xml 46 | coverage.xml 47 | *.cover 48 | .hypothesis/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | 58 | # Flask stuff: 59 | instance/ 60 | .webassets-cache 61 | 62 | # Scrapy stuff: 63 | .scrapy 64 | 65 | # Sphinx documentation 66 | docs/_build/ 67 | 68 | # PyBuilder 69 | target/ 70 | 71 | # Jupyter Notebook 72 | .ipynb_checkpoints 73 | 74 | # pyenv 75 | .python-version 76 | 77 | # celery beat schedule file 78 | celerybeat-schedule 79 | 80 | # SageMath parsed files 81 | *.sage.py 82 | 83 | # dotenv 84 | .env 85 | 86 | # virtualenv 87 | .venv 88 | venv/ 89 | ENV/ 90 | 91 | # Spyder project settings 92 | .spyderproject 93 | .spyproject 94 | 95 | # Rope project settings 96 | .ropeproject 97 | 98 | # mkdocs documentation 99 | /site 100 | 101 | # mypy 102 | .mypy_cache/ 103 | .DS_Store 104 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # pytorch-nlp-tutorial 2 | ## Day 1 3 | [Day 1 Slides](https://drive.google.com/open?id=1uCzIGJT2ni9_ZlcwUHEcUJuyJbjHTKc3izsFrPyHw4M) 4 | 5 | ### Day 1 Data 6 | ``` 7 | 1. mkdir day_1/data 8 | 2. copy contents of this drive in the data folder day_1/data 9 | 10 | https://drive.google.com/open?id=0B1sSP-aCtfuHRFJWTkdUbjFUZDQ 11 | 12 | 3. Download glove and unpack contents in day_1/data/glove 13 | http://nlp.stanford.edu/data/glove.6B.zip 14 | ``` 15 | 16 | ## Day 2 17 | [Day 2 Slides](https://drive.google.com/open?id=1ZRzwllU7tMlQJevGhLYJo_woEA4ABPXUQHGFb8Ijbws) 18 | 19 | 20 | ### Day 2 Data 21 | 22 | 1. [Trump Tweets](https://drive.google.com/a/joostware.com/file/d/0B2hg7DTHpfLsNUxFcndiNlVxSmM/view?usp=sharing) 23 | 2. Not-pruned Names dataset 24 | a. [Train](https://drive.google.com/open?id=0B2hg7DTHpfLsTVNfNnpDVGZmZTQ) 25 | b. [Test](https://drive.google.com/open?id=0B2hg7DTHpfLsWmtQT1lXREx2Qmc) 26 | c. [Day One Version](https://drive.google.com/open?id=0B2hg7DTHpfLsMzg5QlRyMzhfQ1U) 27 | 3. [Stanford NLI dataset](https://drive.google.com/open?id=0B2hg7DTHpfLsTy1BTlk0dTBwREU) 28 | 4. [Amazon Reviews small train](https://drive.google.com/open?id=0B2hg7DTHpfLsbk1yME5HN0dxVmc) 29 | 30 | ``` 31 | # install anaconda (if needed) 32 | 33 | conda create -n dl4nlp python=3.6 34 | source activate dl4nlp 35 | conda install ipython 36 | conda install jupyter 37 | python -m ipykernel install --user --name dl4nlp 38 | 39 | # install pytorch 40 | # visit pytorch.org 41 | 42 | # assume we are inside a folder dl4nlp 43 | # note: that if you alternatively download the zip and unzip it to 44 | # a folder, it will be named something else 45 | git clone https://github.com/joosthub/pytorch-nlp-tutorial.git 46 | cd pytorch-nlp-tutorial 47 | 48 | pip install -r requirements.txt 49 | 50 | # going back to root folder 51 | cd .. 52 | 53 | # install torch text 54 | git clone https://github.com/pytorch/text.git 55 | cd text 56 | python setup.py install 57 | ``` 58 | 59 | -------------------------------------------------------------------------------- /day_1/0_Using_Pretrained_Embeddings.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import torch\n", 10 | "from torchtext.vocab import load_word_vectors\n", 11 | "from annoy import AnnoyIndex\n", 12 | "from tqdm import tqdm_notebook as tqdm\n", 13 | "\n", 14 | "from __future__ import print_function" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 2, 20 | "metadata": {}, 21 | "outputs": [ 22 | { 23 | "name": "stdout", 24 | "output_type": "stream", 25 | "text": [ 26 | "the -0.038194 -0.24487 0.72812 -0.39961 0.083172 0.043953 -0.39141 0.3344 -0.57545 0.087459 0.28787 -0.06731 0.30906 -0.26384 -0.13231 -0.20757 0.33395 -0.33848 -0.31743 -0.48336 0.1464 -0.37304 0.34577 0.052041 0.44946 -0.46971 0.02628 -0.54155 -0.15518 -0.14107 -0.039722 0.28277 0.14393 0.23464 -0.31021 0.086173 0.20397 0.52624 0.17164 -0.082378 -0.71787 -0.41531 0.20335 -0.12763 0.41367 0.55187 0.57908 -0.33477 -0.36559 -0.54857 -0.062892 0.26584 0.30205 0.99775 -0.80481 -3.0243 0.01254 -0.36942 2.2167 0.72201 -0.24978 0.92136 0.034514 0.46745 1.1079 -0.19358 -0.074575 0.23353 -0.052062 -0.22044 0.057162 -0.15806 -0.30798 -0.41625 0.37972 0.15006 -0.53212 -0.2055 -1.2526 0.071624 0.70565 0.49744 -0.42063 0.26148 -1.538 -0.30223 -0.073438 -0.28312 0.37104 -0.25217 0.016215 -0.017099 -0.38984 0.87424 -0.72569 -0.51058 -0.52028 -0.1459 0.8278 0.27062\r\n" 27 | ] 28 | } 29 | ], 30 | "source": [ 31 | "!head -n 1 data/glove/glove.6B.100d.txt" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": 17, 37 | "metadata": {}, 38 | "outputs": [], 39 | "source": [ 40 | "class PreTrainedEmbeddings(object):\n", 41 | " def __init__(self, path='data/glove', file='glove.6B', dimension=100):\n", 42 | " self.word_to_index, self.word_vectors, self.word_vector_size = load_word_vectors(path, file, dimension)\n", 43 | " self.index_to_word = {v: k for k, v in self.word_to_index.items()}\n", 44 | " self.index = AnnoyIndex(self.word_vector_size, metric='euclidean')\n", 45 | " print('Building Index')\n", 46 | " for _, i in tqdm(self.word_to_index.items()):\n", 47 | " self.index.add_item(i, self.word_vectors[i])\n", 48 | " self.index.build(50)\n", 49 | " \n", 50 | " def get_embedding(self, word):\n", 51 | " return self.word_vectors[self.word_to_index[word]]\n", 52 | " \n", 53 | " def closest(self, word, n=1):\n", 54 | " vector = self.get_embedding(word)\n", 55 | " nn_indices = self.index.get_nns_by_vector(vector, n)\n", 56 | " return [self.index_to_word[neighbor] for neighbor in nn_indices]\n", 57 | " \n", 58 | " def closest_v(self, vector, n=1):\n", 59 | " nn_indices = self.index.get_nns_by_vector(vector, n)\n", 60 | " return [self.index_to_word[neighbor] for neighbor in nn_indices]\n", 61 | " \n", 62 | " def sim(self, w1, w2):\n", 63 | " return torch.dot(self.get_embedding(w1), self.get_embedding(w2))" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": 18, 69 | "metadata": {}, 70 | "outputs": [ 71 | { 72 | "name": "stdout", 73 | "output_type": "stream", 74 | "text": [ 75 | "loading word vectors from data/glove/glove.6B.100d.pt\n", 76 | "Building Index\n" 77 | ] 78 | }, 79 | { 80 | "data": { 81 | "application/vnd.jupyter.widget-view+json": { 82 | "model_id": "b50fede3640846779df8ab026431259c" 83 | } 84 | }, 85 | "metadata": {}, 86 | "output_type": "display_data" 87 | }, 88 | { 89 | "name": "stdout", 90 | "output_type": "stream", 91 | "text": [ 92 | "\n" 93 | ] 94 | } 95 | ], 96 | "source": [ 97 | "glove = PreTrainedEmbeddings()" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": 19, 103 | "metadata": {}, 104 | "outputs": [ 105 | { 106 | "data": { 107 | "text/plain": [ 108 | "['apple', 'microsoft', 'ibm', 'intel', 'pc']" 109 | ] 110 | }, 111 | "execution_count": 19, 112 | "metadata": {}, 113 | "output_type": "execute_result" 114 | } 115 | ], 116 | "source": [ 117 | "glove.closest('apple', n=5)" 118 | ] 119 | }, 120 | { 121 | "cell_type": "code", 122 | "execution_count": 20, 123 | "metadata": {}, 124 | "outputs": [ 125 | { 126 | "data": { 127 | "text/plain": [ 128 | "['plane', 'jet', 'flight', 'crashed', 'crash']" 129 | ] 130 | }, 131 | "execution_count": 20, 132 | "metadata": {}, 133 | "output_type": "execute_result" 134 | } 135 | ], 136 | "source": [ 137 | "glove.closest('plane', n=5)" 138 | ] 139 | }, 140 | { 141 | "cell_type": "code", 142 | "execution_count": 21, 143 | "metadata": {}, 144 | "outputs": [ 145 | { 146 | "data": { 147 | "text/plain": [ 148 | "(26.873451232910156, 16.50149154663086)" 149 | ] 150 | }, 151 | "execution_count": 21, 152 | "metadata": {}, 153 | "output_type": "execute_result" 154 | } 155 | ], 156 | "source": [ 157 | "glove.sim('beer', 'wine'), glove.sim('beer', 'gasoline')" 158 | ] 159 | }, 160 | { 161 | "cell_type": "code", 162 | "execution_count": 22, 163 | "metadata": { 164 | "collapsed": true 165 | }, 166 | "outputs": [], 167 | "source": [ 168 | "def SAT_analogy(w1, w2, w3):\n", 169 | " '''\n", 170 | " Solves problems of the type:\n", 171 | " w1 : w2 :: w3 : __\n", 172 | " '''\n", 173 | " closest_words = []\n", 174 | " try:\n", 175 | " w1v = glove.get_embedding(w1)\n", 176 | " w2v = glove.get_embedding(w2)\n", 177 | " w3v = glove.get_embedding(w3)\n", 178 | " w4v = w3v + (w2v - w1v)\n", 179 | " closest_words = glove.closest_v(w4v, n=5)\n", 180 | " closest_words = [w for w in closest_words if w not in [w1, w2, w3]]\n", 181 | " except:\n", 182 | " pass\n", 183 | " if len(closest_words) == 0:\n", 184 | " print(':-(')\n", 185 | " else:\n", 186 | " print('{} : {} :: {} : {}'.format(w1, w2, w3, closest_words[0]))" 187 | ] 188 | }, 189 | { 190 | "cell_type": "code", 191 | "execution_count": 23, 192 | "metadata": {}, 193 | "outputs": [ 194 | { 195 | "name": "stdout", 196 | "output_type": "stream", 197 | "text": [ 198 | "man : superman :: woman : supergirl\n" 199 | ] 200 | } 201 | ], 202 | "source": [ 203 | "SAT_analogy('man', 'superman', 'woman')" 204 | ] 205 | }, 206 | { 207 | "cell_type": "code", 208 | "execution_count": 24, 209 | "metadata": {}, 210 | "outputs": [ 211 | { 212 | "name": "stdout", 213 | "output_type": "stream", 214 | "text": [ 215 | "fly : plane :: sail : ship\n" 216 | ] 217 | } 218 | ], 219 | "source": [ 220 | "SAT_analogy('fly', 'plane', 'sail')" 221 | ] 222 | }, 223 | { 224 | "cell_type": "code", 225 | "execution_count": 25, 226 | "metadata": {}, 227 | "outputs": [ 228 | { 229 | "name": "stdout", 230 | "output_type": "stream", 231 | "text": [ 232 | "man : king :: woman : queen\n" 233 | ] 234 | } 235 | ], 236 | "source": [ 237 | "SAT_analogy('man', 'king', 'woman')" 238 | ] 239 | }, 240 | { 241 | "cell_type": "code", 242 | "execution_count": 26, 243 | "metadata": {}, 244 | "outputs": [ 245 | { 246 | "name": "stdout", 247 | "output_type": "stream", 248 | "text": [ 249 | "eat : breakfast :: drink : drinks\n" 250 | ] 251 | } 252 | ], 253 | "source": [ 254 | "SAT_analogy('eat', 'breakfast', 'drink')" 255 | ] 256 | } 257 | ], 258 | "metadata": { 259 | "kernelspec": { 260 | "display_name": "pinkslip", 261 | "language": "python", 262 | "name": "pinkslip" 263 | }, 264 | "language_info": { 265 | "codemirror_mode": { 266 | "name": "ipython", 267 | "version": 3 268 | }, 269 | "file_extension": ".py", 270 | "mimetype": "text/x-python", 271 | "name": "python", 272 | "nbconvert_exporter": "python", 273 | "pygments_lexer": "ipython3", 274 | "version": "3.6.1" 275 | } 276 | }, 277 | "nbformat": 4, 278 | "nbformat_minor": 2 279 | } 280 | -------------------------------------------------------------------------------- /day_2/datautils/vocabulary.py: -------------------------------------------------------------------------------- 1 | from collections import Counter 2 | 3 | import numpy as np 4 | from torch.utils.data import Dataset 5 | import six 6 | 7 | 8 | class Vocabulary(object): 9 | """ 10 | An implementation that manages the interface between a token dataset and the 11 | machine learning algorithm. 12 | """ 13 | 14 | def __init__(self, use_unks=False, unk_token="", 15 | use_mask=False, mask_token="", use_start_end=False, 16 | start_token="", end_token=""): 17 | """ 18 | Args: 19 | use_unks (bool): The vocabulary will output UNK tokens for out of 20 | vocabulary items. 21 | [default=False] 22 | unk_token (str): The token used for unknown tokens. 23 | If `use_unks` is True, this will be added to the vocabulary. 24 | [default=''] 25 | use_mask (bool): The vocabulary will reserve the 0th index for a mask token. 26 | This is used to handle variable lengths in sequence models. 27 | [default=False] 28 | mask_token (str): The token used for the mask. 29 | Note: mostly a placeholder; it's unlikely the token will be seen. 30 | [default=''] 31 | use_start_end (bool): The vocabulary will reserve indices for two tokens 32 | that represent the start and end of a sequence. 33 | [default=False] 34 | start_token: The token used to indicate the start of a sequence. 35 | If `use_start_end` is True, this will be added to the vocabulary. 36 | [default=''] 37 | end_token: The token used to indicate the end of a sequence 38 | If `use_start_end` is True, this will be added to the vocabulary. 39 | [default=''] 40 | """ 41 | 42 | self._mapping = {} # str -> int 43 | self._flip = {} # int -> str; 44 | self._counts = Counter() # int -> int; count occurrences 45 | self._forced_unks = set() # force tokens to unk (e.g. if < 5 occurrences) 46 | self._i = 0 47 | self._frozen = False 48 | self._frequency_threshold = -1 49 | 50 | # mask token for use in masked recurrent networks 51 | # usually need to be the 0th index 52 | self.use_mask = use_mask 53 | self.mask_token = mask_token 54 | if self.use_mask: 55 | self.add(self.mask_token) 56 | 57 | # unk token for out of vocabulary tokens 58 | self.use_unks = use_unks 59 | self.unk_token = unk_token 60 | if self.use_unks: 61 | self.add(self.unk_token) 62 | 63 | # start token for sequence models 64 | self.use_start_end = use_start_end 65 | self.start_token = start_token 66 | self.end_token = end_token 67 | if self.use_start_end: 68 | self.add(self.start_token) 69 | self.add(self.end_token) 70 | 71 | def iterkeys(self): 72 | for k in self._mapping.keys(): 73 | if k == self.unk_token or k == self.mask_token: 74 | continue 75 | else: 76 | yield k 77 | 78 | def keys(self): 79 | return list(self.iterkeys()) 80 | 81 | def iteritems(self): 82 | for key, value in self._mapping.items(): 83 | if key == self.unk_token or key == self.mask_token: 84 | continue 85 | yield key, value 86 | 87 | def items(self): 88 | return list(self.iteritems()) 89 | 90 | def values(self): 91 | return [value for _, value in self.iteritems()] 92 | 93 | def __getitem__(self, k): 94 | if self._frozen: 95 | if k in self._mapping: 96 | out_index = self._mapping[k] 97 | elif self.use_unks: 98 | out_index = self.unk_index 99 | else: # case: frozen, don't want unks, raise exception 100 | raise VocabularyException("Vocabulary is frozen. " + 101 | "Key '{}' not found.".format(k)) 102 | if out_index in self._forced_unks: 103 | out_index = self.unk_index 104 | elif k in self._mapping: # case: normal 105 | out_index = self._mapping[k] 106 | self._counts[out_index] += 1 107 | else: 108 | out_index = self._mapping[k] = self._i 109 | self._i += 1 110 | self._flip[out_index] = k 111 | self._counts[out_index] = 1 112 | 113 | return out_index 114 | 115 | def add(self, k): 116 | return self.__getitem__(k) 117 | 118 | def add_many(self, x): 119 | return [self.add(k) for k in x] 120 | 121 | def lookup(self, i): 122 | try: 123 | return self._flip[i] 124 | except KeyError: 125 | raise VocabularyException("Key {} not in Vocabulary".format(i)) 126 | 127 | def lookup_many(self, x): 128 | for k in x: 129 | yield self.lookup(k) 130 | 131 | def map(self, sequence, include_start_end=False): 132 | if include_start_end: 133 | yield self.start_index 134 | 135 | for item in sequence: 136 | yield self[item] 137 | 138 | if include_start_end: 139 | yield self.end_index 140 | 141 | def freeze(self, use_unks=False, frequency_cutoff=-1): 142 | self.use_unks = use_unks 143 | self._frequency_cutoff = frequency_cutoff 144 | 145 | if use_unks and self.unk_token not in self: 146 | self.add(self.unk_token) 147 | 148 | if self._frequency_cutoff > 0: 149 | for token, count in self._counts.items(): 150 | if count < self._frequency_cutoff: 151 | self._forced_unks.add(token) 152 | 153 | self._frozen = True 154 | 155 | def unfreeze(self): 156 | self._frozen = False 157 | 158 | def get_counts(self): 159 | return {self._flip[i]: count for i, count in self._counts.items()} 160 | 161 | def get_count(self, token=None, index=None): 162 | if token is None and index is None: 163 | return None 164 | elif token is not None and index is not None: 165 | print("Cannot do two things at once; choose one") 166 | elif token is not None: 167 | return self._counts[self[token]] 168 | elif index is not None: 169 | return self._counts[index] 170 | else: 171 | raise Exception("impossible condition") 172 | 173 | @property 174 | def unk_index(self): 175 | if self.unk_token not in self: 176 | return None 177 | return self._mapping[self.unk_token] 178 | 179 | @property 180 | def mask_index(self): 181 | if self.mask_token not in self: 182 | return None 183 | return self._mapping[self.mask_token] 184 | 185 | @property 186 | def start_index(self): 187 | if self.start_token not in self: 188 | return None 189 | return self._mapping[self.start_token] 190 | 191 | @property 192 | def end_index(self): 193 | if self.end_token not in self: 194 | return None 195 | return self._mapping[self.end_token] 196 | 197 | def __contains__(self, k): 198 | return k in self._mapping 199 | 200 | def __len__(self): 201 | return len(self._mapping) 202 | 203 | def __repr__(self): 204 | return "".format(len(self), self._frozen) 205 | 206 | 207 | def get_serializable_contents(self): 208 | """ 209 | Creats a dict containing the necessary information to recreate this instance 210 | """ 211 | config = {"_mapping": self._mapping, 212 | "_flip": self._flip, 213 | "_frozen": self._frozen, 214 | "_i": self._i, 215 | "_counts": list(self._counts.items()), 216 | "_frequency_threshold": self._frequency_threshold, 217 | "use_unks": self.use_unks, 218 | "unk_token": self.unk_token, 219 | "use_mask": self.use_mask, 220 | "mask_token": self.mask_token, 221 | "use_start_end": self.use_start_end, 222 | "start_token": self.start_token, 223 | "end_token": self.end_token} 224 | return config 225 | 226 | @classmethod 227 | def deserialize_from_contents(cls, content): 228 | """ 229 | Recreate a Vocabulary instance; expect same dict as output in `serialize` 230 | """ 231 | try: 232 | _mapping = content.pop("_mapping") 233 | _flip = content.pop("_flip") 234 | _i = content.pop("_i") 235 | _frozen = content.pop("_frozen") 236 | _counts = content.pop("_counts") 237 | _frequency_threshold = content.pop("_frequency_threshold") 238 | except KeyError: 239 | raise Exception("unable to deserialize vocabulary") 240 | if isinstance(list(_flip.keys())[0], six.string_types): 241 | _flip = {int(k): v for k, v in _flip.items()} 242 | out = cls(**content) 243 | out._mapping = _mapping 244 | out._flip = _flip 245 | out._i = _i 246 | out._counts = Counter(dict(_counts)) 247 | out._frequency_threshold = _frequency_threshold 248 | 249 | if _frozen: 250 | out.freeze(out.use_unks) 251 | 252 | return out 253 | 254 | -------------------------------------------------------------------------------- /day_1/figures/intro_to_pytorch/computational_graph_forward.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | -------------------------------------------------------------------------------- /day_1/figures/intro_to_pytorch/pytorch_variable.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | -------------------------------------------------------------------------------- /day_1/figures/intro_to_pytorch/computational_graph_backward.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | -------------------------------------------------------------------------------- /day_1/1_PyTorch_basics.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 8, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import torch\n", 12 | "import numpy as np\n", 13 | "from __future__ import print_function" 14 | ] 15 | }, 16 | { 17 | "cell_type": "markdown", 18 | "metadata": {}, 19 | "source": [ 20 | "## What is a Tensor?\n", 21 | "- Scalar is a single number.\n", 22 | "- Vector is an array of numbers.\n", 23 | "- Matrix is a 2-D array of numbers.\n", 24 | "- Tensors are N-D arrays of numbers.\n", 25 | "![caption](figures/intro_to_pytorch/tensors.svg)" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": 9, 31 | "metadata": { 32 | "collapsed": false 33 | }, 34 | "outputs": [ 35 | { 36 | "name": "stdout", 37 | "output_type": "stream", 38 | "text": [ 39 | "\n", 40 | " 0.0000e+00 -8.5899e+09 -1.0257e-38\n", 41 | "-8.5920e+09 nan 4.5817e-41\n", 42 | " 6.7019e-10 4.6067e-05 4.1955e-08\n", 43 | " 1.9726e+02 2.9481e+03 1.9527e+02\n", 44 | " 1.2389e+01 4.5699e-08 -1.3346e-38\n", 45 | "[torch.FloatTensor of size 5x3]\n", 46 | "\n" 47 | ] 48 | } 49 | ], 50 | "source": [ 51 | "x = torch.Tensor(5, 3)\n", 52 | "print(x)" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": 10, 58 | "metadata": { 59 | "collapsed": false 60 | }, 61 | "outputs": [ 62 | { 63 | "data": { 64 | "text/plain": [ 65 | "\n", 66 | " 0 0 0\n", 67 | " 0 0 0\n", 68 | " 0 0 0\n", 69 | " 0 0 0\n", 70 | " 0 0 0\n", 71 | "[torch.FloatTensor of size 5x3]" 72 | ] 73 | }, 74 | "execution_count": 10, 75 | "metadata": {}, 76 | "output_type": "execute_result" 77 | } 78 | ], 79 | "source": [ 80 | "x.zero_()" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": 12, 86 | "metadata": { 87 | "collapsed": false 88 | }, 89 | "outputs": [ 90 | { 91 | "data": { 92 | "text/plain": [ 93 | "\n", 94 | " 1 2 3\n", 95 | " 4 5 6\n", 96 | " 7 8 9\n", 97 | "[torch.FloatTensor of size 3x3]" 98 | ] 99 | }, 100 | "execution_count": 12, 101 | "metadata": {}, 102 | "output_type": "execute_result" 103 | } 104 | ], 105 | "source": [ 106 | "torch.Tensor([[1, 2, 3], # rank 2 tensor\n", 107 | " [4, 5, 6],\n", 108 | " [7, 8, 9]])" 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": 13, 114 | "metadata": { 115 | "collapsed": false 116 | }, 117 | "outputs": [ 118 | { 119 | "data": { 120 | "text/plain": [ 121 | "torch.Size([5, 3])" 122 | ] 123 | }, 124 | "execution_count": 13, 125 | "metadata": {}, 126 | "output_type": "execute_result" 127 | } 128 | ], 129 | "source": [ 130 | "x.size()" 131 | ] 132 | }, 133 | { 134 | "cell_type": "code", 135 | "execution_count": 15, 136 | "metadata": { 137 | "collapsed": false 138 | }, 139 | "outputs": [ 140 | { 141 | "name": "stdout", 142 | "output_type": "stream", 143 | "text": [ 144 | "\n", 145 | " 0.1871 0.6335 0.3372\n", 146 | " 0.5040 0.2603 0.7920\n", 147 | " 0.2832 0.6153 0.2146\n", 148 | " 0.9213 0.6229 0.4232\n", 149 | " 0.0121 0.2337 0.6289\n", 150 | "[torch.FloatTensor of size 5x3]\n", 151 | "\n" 152 | ] 153 | } 154 | ], 155 | "source": [ 156 | "x = torch.rand(5, 3)\n", 157 | "print(x)" 158 | ] 159 | }, 160 | { 161 | "cell_type": "code", 162 | "execution_count": 25, 163 | "metadata": { 164 | "collapsed": false 165 | }, 166 | "outputs": [ 167 | { 168 | "name": "stdout", 169 | "output_type": "stream", 170 | "text": [ 171 | "\n", 172 | " 0.0141 0.9075 0.3624\n", 173 | " 0.1488 0.9011 0.1987\n", 174 | " 0.4160 0.3243 0.3444\n", 175 | " 0.3825 0.1793 0.9735\n", 176 | " 0.9978 0.1261 0.7800\n", 177 | "[torch.DoubleTensor of size 5x3]\n", 178 | "\n" 179 | ] 180 | } 181 | ], 182 | "source": [ 183 | "npy = np.random.rand(5, 3)\n", 184 | "y = torch.from_numpy(npy)\n", 185 | "print(y)" 186 | ] 187 | }, 188 | { 189 | "cell_type": "code", 190 | "execution_count": 29, 191 | "metadata": { 192 | "collapsed": false 193 | }, 194 | "outputs": [ 195 | { 196 | "ename": "TypeError", 197 | "evalue": "add received an invalid combination of arguments - got (torch.DoubleTensor), but expected one of:\n * (float value)\n didn't match because some of the arguments have invalid types: (\u001b[31;1mtorch.DoubleTensor\u001b[0m)\n * (torch.FloatTensor other)\n didn't match because some of the arguments have invalid types: (\u001b[31;1mtorch.DoubleTensor\u001b[0m)\n * (torch.SparseFloatTensor other)\n didn't match because some of the arguments have invalid types: (\u001b[31;1mtorch.DoubleTensor\u001b[0m)\n * (float value, torch.FloatTensor other)\n * (float value, torch.SparseFloatTensor other)\n", 198 | "output_type": "error", 199 | "traceback": [ 200 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 201 | "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", 202 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mz\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mx\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", 203 | "\u001b[0;32m~/anaconda3/envs/pinkslip/lib/python3.6/site-packages/torch/tensor.py\u001b[0m in \u001b[0;36m__add__\u001b[0;34m(self, other)\u001b[0m\n\u001b[1;32m 265\u001b[0m \u001b[0;31m# TODO: add tests for operators\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 266\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__add__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mother\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 267\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0madd\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mother\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 268\u001b[0m \u001b[0m__radd__\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m__add__\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 269\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", 204 | "\u001b[0;31mTypeError\u001b[0m: add received an invalid combination of arguments - got (torch.DoubleTensor), but expected one of:\n * (float value)\n didn't match because some of the arguments have invalid types: (\u001b[31;1mtorch.DoubleTensor\u001b[0m)\n * (torch.FloatTensor other)\n didn't match because some of the arguments have invalid types: (\u001b[31;1mtorch.DoubleTensor\u001b[0m)\n * (torch.SparseFloatTensor other)\n didn't match because some of the arguments have invalid types: (\u001b[31;1mtorch.DoubleTensor\u001b[0m)\n * (float value, torch.FloatTensor other)\n * (float value, torch.SparseFloatTensor other)\n" 205 | ] 206 | } 207 | ], 208 | "source": [ 209 | "z = x + y" 210 | ] 211 | }, 212 | { 213 | "cell_type": "code", 214 | "execution_count": 30, 215 | "metadata": { 216 | "collapsed": false 217 | }, 218 | "outputs": [ 219 | { 220 | "data": { 221 | "text/plain": [ 222 | "('torch.FloatTensor', 'torch.DoubleTensor')" 223 | ] 224 | }, 225 | "execution_count": 30, 226 | "metadata": {}, 227 | "output_type": "execute_result" 228 | } 229 | ], 230 | "source": [ 231 | "x.type(), y.type()" 232 | ] 233 | }, 234 | { 235 | "cell_type": "code", 236 | "execution_count": 32, 237 | "metadata": { 238 | "collapsed": false 239 | }, 240 | "outputs": [ 241 | { 242 | "name": "stdout", 243 | "output_type": "stream", 244 | "text": [ 245 | "\n", 246 | " 0.2012 1.5410 0.6996\n", 247 | " 0.6528 1.1614 0.9908\n", 248 | " 0.6993 0.9397 0.5590\n", 249 | " 1.3038 0.8022 1.3967\n", 250 | " 1.0099 0.3598 1.4089\n", 251 | "[torch.FloatTensor of size 5x3]\n", 252 | "\n" 253 | ] 254 | } 255 | ], 256 | "source": [ 257 | "z = x + y.float()\n", 258 | "print(z)" 259 | ] 260 | }, 261 | { 262 | "cell_type": "code", 263 | "execution_count": 33, 264 | "metadata": { 265 | "collapsed": false 266 | }, 267 | "outputs": [ 268 | { 269 | "data": { 270 | "text/plain": [ 271 | "\n", 272 | " 0.2012 1.5410 0.6996\n", 273 | " 0.6528 1.1614 0.9908\n", 274 | " 0.6993 0.9397 0.5590\n", 275 | " 1.3038 0.8022 1.3967\n", 276 | " 1.0099 0.3598 1.4089\n", 277 | "[torch.FloatTensor of size 5x3]" 278 | ] 279 | }, 280 | "execution_count": 33, 281 | "metadata": {}, 282 | "output_type": "execute_result" 283 | } 284 | ], 285 | "source": [ 286 | "torch.add(x, y.float())" 287 | ] 288 | }, 289 | { 290 | "cell_type": "code", 291 | "execution_count": 34, 292 | "metadata": { 293 | "collapsed": false 294 | }, 295 | "outputs": [ 296 | { 297 | "data": { 298 | "text/plain": [ 299 | "\n", 300 | " 0.1871 0.6335 0.3372\n", 301 | " 0.5040 0.2603 0.7920\n", 302 | " 0.2832 0.6153 0.2146\n", 303 | " 0.9213 0.6229 0.4232\n", 304 | " 0.0121 0.2337 0.6289\n", 305 | "[torch.FloatTensor of size 5x3]" 306 | ] 307 | }, 308 | "execution_count": 34, 309 | "metadata": {}, 310 | "output_type": "execute_result" 311 | } 312 | ], 313 | "source": [ 314 | "x" 315 | ] 316 | }, 317 | { 318 | "cell_type": "code", 319 | "execution_count": 35, 320 | "metadata": { 321 | "collapsed": false 322 | }, 323 | "outputs": [ 324 | { 325 | "data": { 326 | "text/plain": [ 327 | "\n", 328 | " 1.1871 1.6335 1.3372\n", 329 | " 1.5040 1.2603 1.7920\n", 330 | " 1.2832 1.6153 1.2146\n", 331 | " 1.9213 1.6229 1.4232\n", 332 | " 1.0121 1.2337 1.6289\n", 333 | "[torch.FloatTensor of size 5x3]" 334 | ] 335 | }, 336 | "execution_count": 35, 337 | "metadata": {}, 338 | "output_type": "execute_result" 339 | } 340 | ], 341 | "source": [ 342 | "x.add_(1)" 343 | ] 344 | }, 345 | { 346 | "cell_type": "code", 347 | "execution_count": 36, 348 | "metadata": { 349 | "collapsed": false 350 | }, 351 | "outputs": [ 352 | { 353 | "data": { 354 | "text/plain": [ 355 | "\n", 356 | " 1.1871 1.6335 1.3372\n", 357 | " 1.5040 1.2603 1.7920\n", 358 | " 1.2832 1.6153 1.2146\n", 359 | " 1.9213 1.6229 1.4232\n", 360 | " 1.0121 1.2337 1.6289\n", 361 | "[torch.FloatTensor of size 5x3]" 362 | ] 363 | }, 364 | "execution_count": 36, 365 | "metadata": {}, 366 | "output_type": "execute_result" 367 | } 368 | ], 369 | "source": [ 370 | "x" 371 | ] 372 | }, 373 | { 374 | "cell_type": "code", 375 | "execution_count": 38, 376 | "metadata": { 377 | "collapsed": false 378 | }, 379 | "outputs": [ 380 | { 381 | "data": { 382 | "text/plain": [ 383 | "\n", 384 | " 1.1871 1.6335\n", 385 | " 1.5040 1.2603\n", 386 | "[torch.FloatTensor of size 2x2]" 387 | ] 388 | }, 389 | "execution_count": 38, 390 | "metadata": {}, 391 | "output_type": "execute_result" 392 | } 393 | ], 394 | "source": [ 395 | "x[:2, :2]" 396 | ] 397 | }, 398 | { 399 | "cell_type": "code", 400 | "execution_count": 43, 401 | "metadata": { 402 | "collapsed": false 403 | }, 404 | "outputs": [ 405 | { 406 | "data": { 407 | "text/plain": [ 408 | "\n", 409 | " 0.0167 1.4824 0.4846\n", 410 | " 0.2238 1.1357 0.3562\n", 411 | " 0.5339 0.5239 0.4184\n", 412 | " 0.7349 0.2910 1.3855\n", 413 | " 1.0099 0.1556 1.2705\n", 414 | "[torch.FloatTensor of size 5x3]" 415 | ] 416 | }, 417 | "execution_count": 43, 418 | "metadata": {}, 419 | "output_type": "execute_result" 420 | } 421 | ], 422 | "source": [ 423 | "x * y.float()" 424 | ] 425 | }, 426 | { 427 | "cell_type": "code", 428 | "execution_count": 44, 429 | "metadata": { 430 | "collapsed": false 431 | }, 432 | "outputs": [ 433 | { 434 | "data": { 435 | "text/plain": [ 436 | "\n", 437 | " 3.2775 5.1219 3.8083\n", 438 | " 4.4997 3.5265 6.0016\n", 439 | " 3.6083 5.0296 3.3689\n", 440 | " 6.8300 5.0679 4.1505\n", 441 | " 2.7514 3.4339 5.0984\n", 442 | "[torch.FloatTensor of size 5x3]" 443 | ] 444 | }, 445 | "execution_count": 44, 446 | "metadata": {}, 447 | "output_type": "execute_result" 448 | } 449 | ], 450 | "source": [ 451 | "torch.exp(x)" 452 | ] 453 | }, 454 | { 455 | "cell_type": "code", 456 | "execution_count": 48, 457 | "metadata": { 458 | "collapsed": false 459 | }, 460 | "outputs": [ 461 | { 462 | "data": { 463 | "text/plain": [ 464 | "\n", 465 | " 1.1871 1.5040 1.2832 1.9213 1.0121\n", 466 | " 1.6335 1.2603 1.6153 1.6229 1.2337\n", 467 | " 1.3372 1.7920 1.2146 1.4232 1.6289\n", 468 | "[torch.FloatTensor of size 3x5]" 469 | ] 470 | }, 471 | "execution_count": 48, 472 | "metadata": {}, 473 | "output_type": "execute_result" 474 | } 475 | ], 476 | "source": [ 477 | "torch.transpose(x, 0, 1)" 478 | ] 479 | }, 480 | { 481 | "cell_type": "code", 482 | "execution_count": 55, 483 | "metadata": { 484 | "collapsed": true 485 | }, 486 | "outputs": [], 487 | "source": [ 488 | "#transposing, indexing, slicing, mathematical operations, linear algebra, random numbers" 489 | ] 490 | }, 491 | { 492 | "cell_type": "code", 493 | "execution_count": 56, 494 | "metadata": { 495 | "collapsed": false 496 | }, 497 | "outputs": [ 498 | { 499 | "data": { 500 | "text/plain": [ 501 | "3.66198992729187" 502 | ] 503 | }, 504 | "execution_count": 56, 505 | "metadata": {}, 506 | "output_type": "execute_result" 507 | } 508 | ], 509 | "source": [ 510 | "torch.trace(x)" 511 | ] 512 | }, 513 | { 514 | "cell_type": "code", 515 | "execution_count": 57, 516 | "metadata": { 517 | "collapsed": false 518 | }, 519 | "outputs": [ 520 | { 521 | "data": { 522 | "text/plain": [ 523 | "array([[ 1.18707514, 1.63353515, 1.33718157],\n", 524 | " [ 1.50401783, 1.26031375, 1.79202783],\n", 525 | " [ 1.28324068, 1.61534214, 1.21460104],\n", 526 | " [ 1.92132044, 1.62293506, 1.42323995],\n", 527 | " [ 1.01209915, 1.23370588, 1.62891769]], dtype=float32)" 528 | ] 529 | }, 530 | "execution_count": 57, 531 | "metadata": {}, 532 | "output_type": "execute_result" 533 | } 534 | ], 535 | "source": [ 536 | "x.numpy()" 537 | ] 538 | }, 539 | { 540 | "cell_type": "code", 541 | "execution_count": 58, 542 | "metadata": { 543 | "collapsed": false 544 | }, 545 | "outputs": [ 546 | { 547 | "data": { 548 | "text/plain": [ 549 | "False" 550 | ] 551 | }, 552 | "execution_count": 58, 553 | "metadata": {}, 554 | "output_type": "execute_result" 555 | } 556 | ], 557 | "source": [ 558 | "torch.cuda.is_available()" 559 | ] 560 | }, 561 | { 562 | "cell_type": "code", 563 | "execution_count": 59, 564 | "metadata": { 565 | "collapsed": true 566 | }, 567 | "outputs": [], 568 | "source": [ 569 | "if torch.cuda.is_available():\n", 570 | " x_gpu = x.cuda()\n", 571 | " print(x_gpu)" 572 | ] 573 | }, 574 | { 575 | "cell_type": "code", 576 | "execution_count": 60, 577 | "metadata": { 578 | "collapsed": true 579 | }, 580 | "outputs": [], 581 | "source": [ 582 | "from torch.autograd import Variable" 583 | ] 584 | }, 585 | { 586 | "cell_type": "code", 587 | "execution_count": 65, 588 | "metadata": { 589 | "collapsed": false 590 | }, 591 | "outputs": [ 592 | { 593 | "name": "stdout", 594 | "output_type": "stream", 595 | "text": [ 596 | "\n", 597 | " 0.4417 0.6601 0.2827\n", 598 | " 0.2489 0.1172 0.1850\n", 599 | " 0.7840 0.0001 0.4800\n", 600 | " 0.9407 0.5935 0.3004\n", 601 | " 0.4582 0.3082 0.9570\n", 602 | "[torch.FloatTensor of size 5x3]\n", 603 | "\n" 604 | ] 605 | } 606 | ], 607 | "source": [ 608 | "x = torch.rand(5, 3)\n", 609 | "print(x)" 610 | ] 611 | }, 612 | { 613 | "cell_type": "code", 614 | "execution_count": 66, 615 | "metadata": { 616 | "collapsed": false 617 | }, 618 | "outputs": [ 619 | { 620 | "name": "stdout", 621 | "output_type": "stream", 622 | "text": [ 623 | "Variable containing:\n", 624 | " 0.6768 0.3742 0.6032\n", 625 | " 0.0535 0.9765 0.6030\n", 626 | " 0.4677 0.0532 0.9079\n", 627 | " 0.8636 0.6691 0.4842\n", 628 | " 0.8701 0.6303 0.1738\n", 629 | "[torch.FloatTensor of size 5x3]\n", 630 | "\n" 631 | ] 632 | } 633 | ], 634 | "source": [ 635 | "x = Variable(torch.rand(5, 3))\n", 636 | "print(x)" 637 | ] 638 | }, 639 | { 640 | "cell_type": "markdown", 641 | "metadata": {}, 642 | "source": [ 643 | "Each variable holds data, a gradient, and information about the function that created it.\n", 644 | "" 645 | ] 646 | }, 647 | { 648 | "cell_type": "code", 649 | "execution_count": 67, 650 | "metadata": { 651 | "collapsed": false 652 | }, 653 | "outputs": [ 654 | { 655 | "data": { 656 | "text/plain": [ 657 | "\n", 658 | " 0.6768 0.3742 0.6032\n", 659 | " 0.0535 0.9765 0.6030\n", 660 | " 0.4677 0.0532 0.9079\n", 661 | " 0.8636 0.6691 0.4842\n", 662 | " 0.8701 0.6303 0.1738\n", 663 | "[torch.FloatTensor of size 5x3]" 664 | ] 665 | }, 666 | "execution_count": 67, 667 | "metadata": {}, 668 | "output_type": "execute_result" 669 | } 670 | ], 671 | "source": [ 672 | "x.data" 673 | ] 674 | }, 675 | { 676 | "cell_type": "code", 677 | "execution_count": 68, 678 | "metadata": { 679 | "collapsed": true 680 | }, 681 | "outputs": [], 682 | "source": [ 683 | "x.creator" 684 | ] 685 | }, 686 | { 687 | "cell_type": "code", 688 | "execution_count": 70, 689 | "metadata": { 690 | "collapsed": true 691 | }, 692 | "outputs": [], 693 | "source": [ 694 | "x.grad" 695 | ] 696 | }, 697 | { 698 | "cell_type": "code", 699 | "execution_count": 80, 700 | "metadata": { 701 | "collapsed": true 702 | }, 703 | "outputs": [], 704 | "source": [ 705 | "x = Variable(torch.Tensor([2]), requires_grad=False)\n", 706 | "w = Variable(torch.Tensor([3]), requires_grad=True)\n", 707 | "b = Variable(torch.Tensor([1]), requires_grad=True)" 708 | ] 709 | }, 710 | { 711 | "cell_type": "code", 712 | "execution_count": 81, 713 | "metadata": { 714 | "collapsed": false 715 | }, 716 | "outputs": [ 717 | { 718 | "data": { 719 | "text/plain": [ 720 | "Variable containing:\n", 721 | " 7\n", 722 | "[torch.FloatTensor of size 1]" 723 | ] 724 | }, 725 | "execution_count": 81, 726 | "metadata": {}, 727 | "output_type": "execute_result" 728 | } 729 | ], 730 | "source": [ 731 | "z = x * w\n", 732 | "y = z + b\n", 733 | "y" 734 | ] 735 | }, 736 | { 737 | "cell_type": "markdown", 738 | "metadata": {}, 739 | "source": [ 740 | "![caption](figures/intro_to_pytorch/computational_graph_forward.svg)" 741 | ] 742 | }, 743 | { 744 | "cell_type": "code", 745 | "execution_count": 82, 746 | "metadata": { 747 | "collapsed": false 748 | }, 749 | "outputs": [ 750 | { 751 | "data": { 752 | "text/plain": [ 753 | "" 754 | ] 755 | }, 756 | "execution_count": 82, 757 | "metadata": {}, 758 | "output_type": "execute_result" 759 | } 760 | ], 761 | "source": [ 762 | "z.creator" 763 | ] 764 | }, 765 | { 766 | "cell_type": "code", 767 | "execution_count": 83, 768 | "metadata": { 769 | "collapsed": false 770 | }, 771 | "outputs": [ 772 | { 773 | "data": { 774 | "text/plain": [ 775 | "" 776 | ] 777 | }, 778 | "execution_count": 83, 779 | "metadata": {}, 780 | "output_type": "execute_result" 781 | } 782 | ], 783 | "source": [ 784 | "y.creator" 785 | ] 786 | }, 787 | { 788 | "cell_type": "code", 789 | "execution_count": 84, 790 | "metadata": { 791 | "collapsed": true 792 | }, 793 | "outputs": [], 794 | "source": [ 795 | "w.grad" 796 | ] 797 | }, 798 | { 799 | "cell_type": "code", 800 | "execution_count": 85, 801 | "metadata": { 802 | "collapsed": true 803 | }, 804 | "outputs": [], 805 | "source": [ 806 | "y.backward()" 807 | ] 808 | }, 809 | { 810 | "cell_type": "markdown", 811 | "metadata": {}, 812 | "source": [ 813 | "$ y = 2w + b$, since $x = 2$\n", 814 | "\n", 815 | "Say we want: $\\displaystyle\\frac{\\partial y}{\\partial w}$" 816 | ] 817 | }, 818 | { 819 | "cell_type": "code", 820 | "execution_count": 86, 821 | "metadata": { 822 | "collapsed": false 823 | }, 824 | "outputs": [ 825 | { 826 | "data": { 827 | "text/plain": [ 828 | "Variable containing:\n", 829 | " 2\n", 830 | "[torch.FloatTensor of size 1]" 831 | ] 832 | }, 833 | "execution_count": 86, 834 | "metadata": {}, 835 | "output_type": "execute_result" 836 | } 837 | ], 838 | "source": [ 839 | "w.grad" 840 | ] 841 | }, 842 | { 843 | "cell_type": "code", 844 | "execution_count": 91, 845 | "metadata": { 846 | "collapsed": true 847 | }, 848 | "outputs": [], 849 | "source": [ 850 | "a = Variable(torch.Tensor([2]), requires_grad=True)" 851 | ] 852 | }, 853 | { 854 | "cell_type": "markdown", 855 | "metadata": {}, 856 | "source": [ 857 | "Let's compute, $\\displaystyle\\frac{\\partial}{\\partial a}(3a^2 + 2a + 1)$ when $a = 2$
" 858 | ] 859 | }, 860 | { 861 | "cell_type": "code", 862 | "execution_count": 92, 863 | "metadata": { 864 | "collapsed": false 865 | }, 866 | "outputs": [], 867 | "source": [ 868 | "y = 3*a*a + 2*a + 1" 869 | ] 870 | }, 871 | { 872 | "cell_type": "code", 873 | "execution_count": 93, 874 | "metadata": { 875 | "collapsed": true 876 | }, 877 | "outputs": [], 878 | "source": [ 879 | "y.backward()" 880 | ] 881 | }, 882 | { 883 | "cell_type": "code", 884 | "execution_count": 95, 885 | "metadata": { 886 | "collapsed": false 887 | }, 888 | "outputs": [ 889 | { 890 | "data": { 891 | "text/plain": [ 892 | "Variable containing:\n", 893 | " 14\n", 894 | "[torch.FloatTensor of size 1]" 895 | ] 896 | }, 897 | "execution_count": 95, 898 | "metadata": {}, 899 | "output_type": "execute_result" 900 | } 901 | ], 902 | "source": [ 903 | "a.grad" 904 | ] 905 | }, 906 | { 907 | "cell_type": "markdown", 908 | "metadata": {}, 909 | "source": [ 910 | "checks out, since $\\displaystyle\\frac{\\partial}{\\partial a}(3a^2 + 2a + 1) = 6a + 2$ and $a = 2$" 911 | ] 912 | }, 913 | { 914 | "cell_type": "code", 915 | "execution_count": null, 916 | "metadata": { 917 | "collapsed": true 918 | }, 919 | "outputs": [], 920 | "source": [] 921 | } 922 | ], 923 | "metadata": { 924 | "kernelspec": { 925 | "display_name": "pinkslip", 926 | "language": "python", 927 | "name": "pinkslip" 928 | }, 929 | "language_info": { 930 | "codemirror_mode": { 931 | "name": "ipython", 932 | "version": 3 933 | }, 934 | "file_extension": ".py", 935 | "mimetype": "text/x-python", 936 | "name": "python", 937 | "nbconvert_exporter": "python", 938 | "pygments_lexer": "ipython3", 939 | "version": "3.6.1" 940 | } 941 | }, 942 | "nbformat": 4, 943 | "nbformat_minor": 2 944 | } 945 | -------------------------------------------------------------------------------- /day_2/0_favorite_practices_and_debugging.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## One way to software engineer the problem\n", 8 | "\n", 9 | "There are a few different ways of breaking up the processing pipeline for handling language data. In this notebook, we will be operating under the following division of labor:\n", 10 | "\n", 11 | "- Vocabulary\n", 12 | " + A vocabulary class is responsible managing the bijections between string tokens and integers.\n", 13 | " + It offers the follow API:\n", 14 | " + `add`, indexing with `[]`, `add_many` will take as input a string and return an integer (or a sequence of strings and a sequence of integers in the case of `add_many`\n", 15 | " + `lookup` takes as input an integer and returns the corresponding string toekn\n", 16 | " + It also handles the restriction of observed tokens that did not happen frequently enough. We will be restricting on the frequency of tokens rather than taking the N most frequent. This is because our primary concern is with statistical signal, not raw processing complexity.\n", 17 | "- TextDataset\n", 18 | " + TextDataset is responsible for loading the data from disk. it should process the data to a standardized, intermediate form\n", 19 | "- Vectorizer\n", 20 | " + The Vectorizer uses an existing TextDataset to instantiate one or more vocabularies (more than one is needed if dealing with different models or category labels). \n", 21 | " + Using the vocabs, it can convert any existing TextDataset to a VectorizedDataset, which interfaces with the learner\n", 22 | " + It is important to note the following pipeline:\n", 23 | " 1. Load the Training TextDataset\n", 24 | " 2. Load the Testing (Or Dev/Eval) TextDataset\n", 25 | " 2. Instantiate the Vectorizer\n", 26 | " + in this process, the vectorizer will be frozen, so it cannot grow its token-integer mapping\n", 27 | " + this is to preserve realistic testing conditions\n", 28 | " 3. Use Vectorizer to create a VectorizedDataset from the Training TextDataset\n", 29 | " 4. Use Vectorizer to create another VectorizedDataset from the Testing (or Dev/Eval) TextDataset\n", 30 | "- VectorizedDataset\n", 31 | " + A VectorizedDataset has the necessary structures required for learning\n", 32 | " + A VectorizedDataset is best created all at once from the vectorizer\n", 33 | " + It is also a computation that can be pre-cached for speed\n", 34 | "- DataLoader\n", 35 | " + The DataLoader uses the VectorizedDataset to issue batches of data. \n", 36 | " + The DataLoader should be issuing randomized batches so that the model doesn't get entrenched on a specific pattern of data" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": 10, 42 | "metadata": { 43 | "collapsed": true 44 | }, 45 | "outputs": [], 46 | "source": [ 47 | "from torch.utils.data import Dataset\n", 48 | "from torch.utils.data import DataLoader\n", 49 | "from torch.autograd import Variable\n", 50 | "import pandas as pd\n", 51 | "import numpy as np\n", 52 | "\n", 53 | "from collections import Counter\n", 54 | "\n", 55 | "import numpy as np\n", 56 | "from torch.utils.data import Dataset\n", 57 | "import six" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": 11, 63 | "metadata": { 64 | "collapsed": true 65 | }, 66 | "outputs": [], 67 | "source": [ 68 | "class Vocabulary(object):\n", 69 | " \"\"\"\n", 70 | " An implementation that manages the interface between a token dataset and the\n", 71 | " machine learning algorithm.\n", 72 | " \"\"\"\n", 73 | "\n", 74 | " def __init__(self, use_unks=False, unk_token=\"\",\n", 75 | " use_mask=False, mask_token=\"\", use_start_end=False,\n", 76 | " start_token=\"\", end_token=\"\"):\n", 77 | " \"\"\"\n", 78 | " Args:\n", 79 | " use_unks (bool): The vocabulary will output UNK tokens for out of\n", 80 | " vocabulary items.\n", 81 | " [default=False]\n", 82 | " unk_token (str): The token used for unknown tokens.\n", 83 | " If `use_unks` is True, this will be added to the vocabulary.\n", 84 | " [default='']\n", 85 | " use_mask (bool): The vocabulary will reserve the 0th index for a mask token.\n", 86 | " This is used to handle variable lengths in sequence models.\n", 87 | " [default=False]\n", 88 | " mask_token (str): The token used for the mask.\n", 89 | " Note: mostly a placeholder; it's unlikely the token will be seen.\n", 90 | " [default='']\n", 91 | " use_start_end (bool): The vocabulary will reserve indices for two tokens\n", 92 | " that represent the start and end of a sequence.\n", 93 | " [default=False]\n", 94 | " start_token: The token used to indicate the start of a sequence.\n", 95 | " If `use_start_end` is True, this will be added to the vocabulary.\n", 96 | " [default='']\n", 97 | " end_token: The token used to indicate the end of a sequence\n", 98 | " If `use_start_end` is True, this will be added to the vocabulary.\n", 99 | " [default='']\n", 100 | " \"\"\"\n", 101 | "\n", 102 | " self._mapping = {} # str -> int\n", 103 | " self._flip = {} # int -> str;\n", 104 | " self._counts = Counter() # int -> int; count occurrences\n", 105 | " self._forced_unks = set() # force tokens to unk (e.g. if < 5 occurrences)\n", 106 | " self._i = 0\n", 107 | " self._frozen = False\n", 108 | " self._frequency_threshold = -1\n", 109 | "\n", 110 | " # mask token for use in masked recurrent networks\n", 111 | " # usually need to be the 0th index\n", 112 | " self.use_mask = use_mask\n", 113 | " self.mask_token = mask_token\n", 114 | " if self.use_mask:\n", 115 | " self.add(self.mask_token)\n", 116 | "\n", 117 | " # unk token for out of vocabulary tokens\n", 118 | " self.use_unks = use_unks\n", 119 | " self.unk_token = unk_token\n", 120 | " if self.use_unks:\n", 121 | " self.add(self.unk_token)\n", 122 | "\n", 123 | " # start token for sequence models\n", 124 | " self.use_start_end = use_start_end\n", 125 | " self.start_token = start_token\n", 126 | " self.end_token = end_token\n", 127 | " if self.use_start_end:\n", 128 | " self.add(self.start_token)\n", 129 | " self.add(self.end_token)\n", 130 | "\n", 131 | " def iterkeys(self):\n", 132 | " for k in self._mapping.keys():\n", 133 | " if k == self.unk_token or k == self.mask_token:\n", 134 | " continue\n", 135 | " else:\n", 136 | " yield k\n", 137 | "\n", 138 | " def keys(self):\n", 139 | " return list(self.iterkeys())\n", 140 | "\n", 141 | " def iteritems(self):\n", 142 | " for key, value in self._mapping.items():\n", 143 | " if key == self.unk_token or key == self.mask_token:\n", 144 | " continue\n", 145 | " yield key, value\n", 146 | "\n", 147 | " def items(self):\n", 148 | " return list(self.iteritems())\n", 149 | "\n", 150 | " def values(self):\n", 151 | " return [value for _, value in self.iteritems()]\n", 152 | "\n", 153 | " def __getitem__(self, k):\n", 154 | " if self._frozen:\n", 155 | " if k in self._mapping:\n", 156 | " out_index = self._mapping[k]\n", 157 | " elif self.use_unks:\n", 158 | " out_index = self.unk_index\n", 159 | " else: # case: frozen, don't want unks, raise exception\n", 160 | " raise VocabularyException(\"Vocabulary is frozen. \" +\n", 161 | " \"Key '{}' not found.\".format(k))\n", 162 | " if out_index in self._forced_unks:\n", 163 | " out_index = self.unk_index\n", 164 | " elif k in self._mapping: # case: normal\n", 165 | " out_index = self._mapping[k]\n", 166 | " self._counts[out_index] += 1\n", 167 | " else:\n", 168 | " out_index = self._mapping[k] = self._i\n", 169 | " self._i += 1\n", 170 | " self._flip[out_index] = k\n", 171 | " self._counts[out_index] = 1\n", 172 | "\n", 173 | " return out_index\n", 174 | "\n", 175 | " def add(self, k):\n", 176 | " return self.__getitem__(k)\n", 177 | "\n", 178 | " def add_many(self, x):\n", 179 | " return [self.add(k) for k in x]\n", 180 | "\n", 181 | " def lookup(self, i):\n", 182 | " try:\n", 183 | " return self._flip[i]\n", 184 | " except KeyError:\n", 185 | " raise VocabularyException(\"Key {} not in Vocabulary\".format(i))\n", 186 | "\n", 187 | " def lookup_many(self, x):\n", 188 | " for k in x:\n", 189 | " yield self.lookup(k)\n", 190 | "\n", 191 | " def map(self, sequence, include_start_end=False):\n", 192 | " if include_start_end:\n", 193 | " yield self.start_index\n", 194 | "\n", 195 | " for item in sequence:\n", 196 | " yield self[item]\n", 197 | "\n", 198 | " if include_start_end:\n", 199 | " yield self.end_index\n", 200 | "\n", 201 | " def freeze(self, use_unks=False, frequency_cutoff=-1):\n", 202 | " self.use_unks = use_unks\n", 203 | " self._frequency_cutoff = frequency_cutoff\n", 204 | "\n", 205 | " if use_unks and self.unk_token not in self:\n", 206 | " self.add(self.unk_token)\n", 207 | "\n", 208 | " if self._frequency_cutoff > 0:\n", 209 | " for token, count in self._counts.items():\n", 210 | " if count < self._frequency_cutoff:\n", 211 | " self._forced_unks.add(token)\n", 212 | "\n", 213 | " self._frozen = True\n", 214 | "\n", 215 | " def unfreeze(self):\n", 216 | " self._frozen = False\n", 217 | "\n", 218 | " def get_counts(self):\n", 219 | " return {self._flip[i]: count for i, count in self._counts.items()}\n", 220 | "\n", 221 | " def get_count(self, token=None, index=None):\n", 222 | " if token is None and index is None:\n", 223 | " return None\n", 224 | " elif token is not None and index is not None:\n", 225 | " print(\"Cannot do two things at once; choose one\")\n", 226 | " elif token is not None:\n", 227 | " return self._counts[self[token]]\n", 228 | " elif index is not None:\n", 229 | " return self._counts[index]\n", 230 | " else:\n", 231 | " raise Exception(\"impossible condition\")\n", 232 | "\n", 233 | " @property\n", 234 | " def unk_index(self):\n", 235 | " if self.unk_token not in self:\n", 236 | " return None\n", 237 | " return self._mapping[self.unk_token]\n", 238 | "\n", 239 | " @property\n", 240 | " def mask_index(self):\n", 241 | " if self.mask_token not in self:\n", 242 | " return None\n", 243 | " return self._mapping[self.mask_token]\n", 244 | "\n", 245 | " @property\n", 246 | " def start_index(self):\n", 247 | " if self.start_token not in self:\n", 248 | " return None\n", 249 | " return self._mapping[self.start_token]\n", 250 | "\n", 251 | " @property\n", 252 | " def end_index(self):\n", 253 | " if self.end_token not in self:\n", 254 | " return None\n", 255 | " return self._mapping[self.end_token]\n", 256 | "\n", 257 | " def __contains__(self, k):\n", 258 | " return k in self._mapping\n", 259 | "\n", 260 | " def __len__(self):\n", 261 | " return len(self._mapping)\n", 262 | "\n", 263 | " def __repr__(self):\n", 264 | " return \"\".format(len(self), self._frozen)\n", 265 | "\n", 266 | "\n", 267 | " def get_serializable_contents(self):\n", 268 | " \"\"\"\n", 269 | " Creats a dict containing the necessary information to recreate this instance\n", 270 | " \"\"\"\n", 271 | " config = {\"_mapping\": self._mapping,\n", 272 | " \"_flip\": self._flip,\n", 273 | " \"_frozen\": self._frozen,\n", 274 | " \"_i\": self._i,\n", 275 | " \"_counts\": list(self._counts.items()),\n", 276 | " \"_frequency_threshold\": self._frequency_threshold,\n", 277 | " \"use_unks\": self.use_unks,\n", 278 | " \"unk_token\": self.unk_token,\n", 279 | " \"use_mask\": self.use_mask,\n", 280 | " \"mask_token\": self.mask_token,\n", 281 | " \"use_start_end\": self.use_start_end,\n", 282 | " \"start_token\": self.start_token,\n", 283 | " \"end_token\": self.end_token}\n", 284 | " return config\n", 285 | "\n", 286 | " @classmethod\n", 287 | " def deserialize_from_contents(cls, content):\n", 288 | " \"\"\"\n", 289 | " Recreate a Vocabulary instance; expect same dict as output in `serialize`\n", 290 | " \"\"\"\n", 291 | " try:\n", 292 | " _mapping = content.pop(\"_mapping\")\n", 293 | " _flip = content.pop(\"_flip\")\n", 294 | " _i = content.pop(\"_i\")\n", 295 | " _frozen = content.pop(\"_frozen\")\n", 296 | " _counts = content.pop(\"_counts\")\n", 297 | " _frequency_threshold = content.pop(\"_frequency_threshold\")\n", 298 | " except KeyError:\n", 299 | " raise Exception(\"unable to deserialize vocabulary\")\n", 300 | " if isinstance(list(_flip.keys())[0], six.string_types):\n", 301 | " _flip = {int(k): v for k, v in _flip.items()}\n", 302 | " out = cls(**content)\n", 303 | " out._mapping = _mapping\n", 304 | " out._flip = _flip\n", 305 | " out._i = _i\n", 306 | " out._counts = Counter(dict(_counts))\n", 307 | " out._frequency_threshold = _frequency_threshold\n", 308 | "\n", 309 | " if _frozen:\n", 310 | " out.freeze(out.use_unks)\n", 311 | "\n", 312 | " return out\n", 313 | "\n" 314 | ] 315 | }, 316 | { 317 | "cell_type": "markdown", 318 | "metadata": {}, 319 | "source": [ 320 | "As an example, the names dataset" 321 | ] 322 | }, 323 | { 324 | "cell_type": "code", 325 | "execution_count": 12, 326 | "metadata": { 327 | "code_folding": [], 328 | "collapsed": true 329 | }, 330 | "outputs": [], 331 | "source": [ 332 | "class constants:\n", 333 | " # usually this would be some sort of constants.py or something similar\n", 334 | " NAMES_TRAIN = '/research/data/names/names_train.csv'\n", 335 | " NAMES_TEST = '/research/data/names/names_test.csv'\n", 336 | " CHAR_START_TOKEN = \"^\"\n", 337 | " CHAR_END_TOKEN= \"_\"\n", 338 | "\n", 339 | "class NamesDataset(object):\n", 340 | " def __init__(self, data_path, delimiter=\"\\t\"):\n", 341 | " self.data = pd.read_csv(data_path, delimiter=delimiter)\n", 342 | " \n", 343 | " def get_data(self, nationality=None):\n", 344 | " data = self.data\n", 345 | " if nationality:\n", 346 | " data = self.data[self.data['label']==nationality]\n", 347 | " return data['name'].values, data['label'].values\n", 348 | " \n", 349 | "\n", 350 | "class NamesVectorizer(object):\n", 351 | " def __init__(self, chars_vocab, targets_vocab, max_sequence_length):\n", 352 | " self.chars_vocab = chars_vocab\n", 353 | " self.targets_vocab = targets_vocab\n", 354 | " self.max_sequence_length = max_sequence_length\n", 355 | "\n", 356 | " @classmethod\n", 357 | " def induce_from_text(cls, text, targets):\n", 358 | " chars_vocab = Vocabulary(use_unks=True,\n", 359 | " use_start_end=True,\n", 360 | " start_token=constants.CHAR_START_TOKEN,\n", 361 | " end_token=constants.CHAR_END_TOKEN)\n", 362 | " \n", 363 | " targets_vocab = Vocabulary(use_unks=False,\n", 364 | " use_start_end=False)\n", 365 | " \n", 366 | " for character_sequence in text:\n", 367 | " chars_vocab.add_many(set(character_sequence))\n", 368 | "\n", 369 | " targets_vocab.add_many(targets)\n", 370 | "\n", 371 | " # add two for the start, end tokens\n", 372 | " max_sequence_length = 2 + max(len(character_sequence) for character_sequence in text)\n", 373 | "\n", 374 | " return cls(chars_vocab, targets_vocab, max_sequence_length)\n", 375 | "\n", 376 | " def convert_dataset(self, char_sequences, targets):\n", 377 | " num_data = len(char_sequences)\n", 378 | " \n", 379 | " # create the intended output structures\n", 380 | " x_seq = np.zeros((num_data, self.max_sequence_length), dtype=np.int64)\n", 381 | " y_target = np.zeros((num_data), dtype=np.int64)\n", 382 | "\n", 383 | " # iterate our targets and sequences until they are populated \n", 384 | " \n", 385 | " for seq_i, target in enumerate(targets):\n", 386 | " y_target[seq_i] = self.targets_vocab[target]\n", 387 | "\n", 388 | " for seq_i, char_seq in enumerate(char_sequences):\n", 389 | " converted_seq = list(self.chars_vocab.map(char_seq, include_start_end=True))\n", 390 | " x_seq[seq_i, :len(converted_seq)] = converted_seq\n", 391 | "\n", 392 | " return VectorizedSingleIODataset(x_seq, y_target)\n", 393 | " \n", 394 | " def save(self, filename):\n", 395 | " vec_dict = {\"chars_vocab\": vectorizer.chars_vocab.get_serializable_contents(),\n", 396 | " \"targets_vocab\": vectorizer.targets_vocab.get_serializable_contents(),\n", 397 | " \"max_sequence_length\": vectorizer.max_sequence_length()}\n", 398 | "\n", 399 | " with open(filename, \"w\") as fp:\n", 400 | " json.dump(vec_dict, fp)\n", 401 | " \n", 402 | " @classmethod\n", 403 | " def load(cls, filename):\n", 404 | " with open(filename, \"r\") as fp:\n", 405 | " vec_dict = json.load(fp)\n", 406 | "\n", 407 | " vec_dict[\"chars_vocab\"] = Vocabulary.deserialize_from_contents(vec_dict[\"chars_vocab\"])\n", 408 | " vec_dict[\"targets_vocab\"] = Vocabulary.deserialize_from_contents(vec_dict[\"targets_vocab\"])\n", 409 | " return cls(**vec_dict)\n", 410 | " \n", 411 | "class VectorizedSingleIODataset(Dataset):\n", 412 | " def __init__(self, x_input, y_target):\n", 413 | " self.x_input = x_input\n", 414 | " self.y_target = y_target\n", 415 | " self.class_weights = 1 / np.bincount(self.y_target)\n", 416 | "\n", 417 | " def __len__(self):\n", 418 | " return len(self.x_input)\n", 419 | "\n", 420 | " def __getitem__(self, index):\n", 421 | " return {'x_input': self.x_input[index],\n", 422 | " 'y_target': self.y_target[index], \n", 423 | " 'class_weights': self.class_weights,\n", 424 | " 'x_seq_lengths': len(self.x_input[index].nonzero()[0])}\n" 425 | ] 426 | }, 427 | { 428 | "cell_type": "code", 429 | "execution_count": 15, 430 | "metadata": { 431 | "collapsed": true 432 | }, 433 | "outputs": [], 434 | "source": [ 435 | "class DataServer(object):\n", 436 | " def __init__(self, vectorized_data):\n", 437 | " self.vectorized_data = vectorized_data\n", 438 | " self.gpu_mode = False\n", 439 | " self.volatile_mode = False\n", 440 | "\n", 441 | " def serve_batches(self, batch_size, num_batches=-1, num_workers=0):\n", 442 | " datagen = DataLoader(self.vectorized_data, batch_size=batch_size,\n", 443 | " shuffle=True, num_workers=num_workers)\n", 444 | " for batch_index, batch in enumerate(datagen):\n", 445 | " out = {}\n", 446 | " for key, val in batch.items():\n", 447 | " if not isinstance(val, Variable):\n", 448 | " val = Variable(val)\n", 449 | " if self.gpu_mode:\n", 450 | " val = val.cuda()\n", 451 | " if self.volatile_mode:\n", 452 | " val = val.volatile()\n", 453 | " out[key] = val\n", 454 | "\n", 455 | " yield out\n", 456 | " if num_batches > 0 and batch_index > num_batches:\n", 457 | " break\n", 458 | "\n", 459 | " def enable_gpu_mode(self):\n", 460 | " self.gpu_mode = True\n", 461 | "\n", 462 | " def disable_gpu_mode(self):\n", 463 | " self.gpu_mode = False" 464 | ] 465 | }, 466 | { 467 | "cell_type": "code", 468 | "execution_count": 14, 469 | "metadata": {}, 470 | "outputs": [], 471 | "source": [ 472 | "train_dataset = NamesDataset(constants.NAMES_TRAIN)\n", 473 | "train_names, train_targets = train_dataset.get_data()\n", 474 | "\n", 475 | "test_dataset = NamesDataset(constants.NAMES_TEST)\n", 476 | "test_names, test_targets = test_dataset.get_data()\n", 477 | "\n", 478 | "vectorizer = NamesVectorizer.induce_from_text(list(train_names) + list(test_names), \n", 479 | " list(train_targets) + list(test_targets))\n", 480 | "\n", 481 | "vectorized_train = vectorizer.convert_dataset(train_names, train_targets)\n", 482 | "train_server = DataServer(vectorized_train)\n", 483 | "\n", 484 | "vectorized_test = vectorizer.convert_dataset(test_names, test_targets)\n", 485 | "test_server = DataServer(vectorized_test)" 486 | ] 487 | }, 488 | { 489 | "cell_type": "code", 490 | "execution_count": 16, 491 | "metadata": {}, 492 | "outputs": [ 493 | { 494 | "data": { 495 | "text/plain": [ 496 | "{'class_weights': Variable containing:\n", 497 | " \n", 498 | " Columns 0 to 9 \n", 499 | " 1.00000e-02 *\n", 500 | " 0.0624 0.0132 0.2445 0.1224 0.0343 0.1767 1.5873 0.1779 0.6211 0.4525\n", 501 | " 0.0624 0.0132 0.2445 0.1224 0.0343 0.1767 1.5873 0.1779 0.6211 0.4525\n", 502 | " 0.0624 0.0132 0.2445 0.1224 0.0343 0.1767 1.5873 0.1779 0.6211 0.4525\n", 503 | " 0.0624 0.0132 0.2445 0.1224 0.0343 0.1767 1.5873 0.1779 0.6211 0.4525\n", 504 | " 0.0624 0.0132 0.2445 0.1224 0.0343 0.1767 1.5873 0.1779 0.6211 0.4525\n", 505 | " \n", 506 | " Columns 10 to 17 \n", 507 | " 1.00000e-02 *\n", 508 | " 0.4255 0.5319 0.4785 0.4292 1.4286 1.2346 1.6949 0.9259\n", 509 | " 0.4255 0.5319 0.4785 0.4292 1.4286 1.2346 1.6949 0.9259\n", 510 | " 0.4255 0.5319 0.4785 0.4292 1.4286 1.2346 1.6949 0.9259\n", 511 | " 0.4255 0.5319 0.4785 0.4292 1.4286 1.2346 1.6949 0.9259\n", 512 | " 0.4255 0.5319 0.4785 0.4292 1.4286 1.2346 1.6949 0.9259\n", 513 | " [torch.DoubleTensor of size 5x18], 'x_input': Variable containing:\n", 514 | " \n", 515 | " Columns 0 to 12 \n", 516 | " 1 46 10 13 26 2 0 0 0 0 0 0 0\n", 517 | " 1 35 13 10 8 24 14 5 11 8 7 2 0\n", 518 | " 1 6 18 29 29 13 36 14 18 19 2 0 0\n", 519 | " 1 39 10 5 27 5 12 13 26 2 0 0 0\n", 520 | " 1 50 11 10 5 29 13 18 7 2 0 0 0\n", 521 | " \n", 522 | " Columns 13 to 21 \n", 523 | " 0 0 0 0 0 0 0 0 0\n", 524 | " 0 0 0 0 0 0 0 0 0\n", 525 | " 0 0 0 0 0 0 0 0 0\n", 526 | " 0 0 0 0 0 0 0 0 0\n", 527 | " 0 0 0 0 0 0 0 0 0\n", 528 | " [torch.LongTensor of size 5x22], 'x_seq_lengths': Variable containing:\n", 529 | " 6\n", 530 | " 12\n", 531 | " 11\n", 532 | " 10\n", 533 | " 10\n", 534 | " [torch.LongTensor of size 5], 'y_target': Variable containing:\n", 535 | " 14\n", 536 | " 1\n", 537 | " 9\n", 538 | " 1\n", 539 | " 1\n", 540 | " [torch.LongTensor of size 5]}" 541 | ] 542 | }, 543 | "execution_count": 16, 544 | "metadata": {}, 545 | "output_type": "execute_result" 546 | } 547 | ], 548 | "source": [ 549 | "next(train_server.serve_batches(5))" 550 | ] 551 | }, 552 | { 553 | "cell_type": "code", 554 | "execution_count": null, 555 | "metadata": { 556 | "collapsed": true 557 | }, 558 | "outputs": [], 559 | "source": [] 560 | } 561 | ], 562 | "metadata": { 563 | "kernelspec": { 564 | "display_name": "Python 3", 565 | "language": "python", 566 | "name": "python3" 567 | }, 568 | "language_info": { 569 | "codemirror_mode": { 570 | "name": "ipython", 571 | "version": 3 572 | }, 573 | "file_extension": ".py", 574 | "mimetype": "text/x-python", 575 | "name": "python", 576 | "nbconvert_exporter": "python", 577 | "pygments_lexer": "ipython3", 578 | "version": "3.6.1" 579 | } 580 | }, 581 | "nbformat": 4, 582 | "nbformat_minor": 2 583 | } 584 | -------------------------------------------------------------------------------- /day_2/01_Trump_Tweet_LM.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 82, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import math\n", 12 | "import re\n", 13 | "import random\n", 14 | "\n", 15 | "from ast import literal_eval\n", 16 | "import matplotlib.pyplot as plt\n", 17 | "import seaborn as sns\n", 18 | "import pandas as pd\n", 19 | "import numpy as np\n", 20 | "\n", 21 | "import torch\n", 22 | "import torch.nn as nn\n", 23 | "import torch.nn.functional as F\n", 24 | "from torch.nn import Parameter\n", 25 | "\n", 26 | "from torch.autograd import Variable\n", 27 | "from torch.utils.data import Dataset\n", 28 | "from torch.utils.data import DataLoader\n", 29 | "\n", 30 | "from datautils.vocabulary import Vocabulary\n", 31 | "\n", 32 | "from tqdm import tqdm_notebook\n", 33 | "import spacy\n", 34 | "\n", 35 | "import six\n", 36 | "\n", 37 | "plt.style.use('fivethirtyeight')\n", 38 | "plt.rcParams['figure.figsize'] = (14, 6)\n", 39 | "\n", 40 | "%matplotlib inline" 41 | ] 42 | }, 43 | { 44 | "cell_type": "markdown", 45 | "metadata": {}, 46 | "source": [ 47 | "## Constants" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": 23, 53 | "metadata": { 54 | "collapsed": true 55 | }, 56 | "outputs": [], 57 | "source": [ 58 | "## CONSTANTS\n", 59 | "\n", 60 | "class constants:\n", 61 | " TRUMP_TWEET_CSV = \"/research/data/trump.csv\"\n", 62 | " TRAIN_PROPORTION = 0.8\n", 63 | " WORD_START_TOKEN = \"^\"\n", 64 | " WORD_END_TOKEN= \"_\"" 65 | ] 66 | }, 67 | { 68 | "cell_type": "markdown", 69 | "metadata": {}, 70 | "source": [ 71 | "## Data" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": 96, 77 | "metadata": {}, 78 | "outputs": [ 79 | { 80 | "data": { 81 | "text/plain": [ 82 | "['if',\n", 83 | " 'the',\n", 84 | " 'press',\n", 85 | " 'would',\n", 86 | " 'cover',\n", 87 | " 'me',\n", 88 | " 'accurately',\n", 89 | " 'honorably',\n", 90 | " 'i',\n", 91 | " 'would',\n", 92 | " 'have',\n", 93 | " 'far',\n", 94 | " 'less',\n", 95 | " 'reason',\n", 96 | " 'to',\n", 97 | " 'tweet',\n", 98 | " 'sadly',\n", 99 | " 'i',\n", 100 | " 'dont',\n", 101 | " 'know',\n", 102 | " 'if',\n", 103 | " 'that',\n", 104 | " 'will',\n", 105 | " 'ever',\n", 106 | " 'happen']" 107 | ] 108 | }, 109 | "execution_count": 96, 110 | "metadata": {}, 111 | "output_type": "execute_result" 112 | } 113 | ], 114 | "source": [ 115 | "df = pd.read_csv(constants.TRUMP_TWEET_CSV)\n", 116 | "df['tweet'] = df.tweet.apply(literal_eval)\n", 117 | "df['tweet'] = df.tweet.apply(lambda x: [xx.lower() for xx in x])\n", 118 | "df.tweet[0]" 119 | ] 120 | }, 121 | { 122 | "cell_type": "code", 123 | "execution_count": 321, 124 | "metadata": { 125 | "code_folding": [], 126 | "collapsed": true 127 | }, 128 | "outputs": [], 129 | "source": [ 130 | "class TrumpTweetDataset(object):\n", 131 | " def __init__(self, data_path):\n", 132 | " self.data = pd.read_csv(data_path)\n", 133 | " self.data['tweet'] = self.data.tweet.apply(literal_eval)\n", 134 | " self.data['tweet'] = self.data.tweet.apply(lambda tweet: [word.lower() for word in tweet])\n", 135 | " \n", 136 | " def get_data(self):\n", 137 | " return self.data.tweet.tolist() \n", 138 | "\n", 139 | "class TrumpTweetVectorizer(object):\n", 140 | " def __init__(self, word_vocab, max_seq_length):\n", 141 | " self.word_vocab = word_vocab\n", 142 | " self.max_seq_length = max_seq_length\n", 143 | " \n", 144 | " def save(self, filename):\n", 145 | " vec_dict = {\"word_vocab\": self.word_vocab.get_serializable_contents(),\n", 146 | " 'max_seq_length': self.max_seq_length}\n", 147 | "\n", 148 | " with open(filename, \"w\") as fp:\n", 149 | " json.dump(vec_dict, fp)\n", 150 | " \n", 151 | " @classmethod\n", 152 | " def load(cls, filename):\n", 153 | " with open(filename, \"r\") as fp:\n", 154 | " vec_dict = json.load(fp)\n", 155 | "\n", 156 | " vec_dict[\"word_vocab\"] = Vocabulary.deserialize_from_contents(vec_dict[\"word_vocab\"])\n", 157 | " return cls(**vec_dict)\n", 158 | "\n", 159 | " @classmethod\n", 160 | " def induce_from_text(cls, tweets):\n", 161 | " \"\"\"\n", 162 | " \"\"\"\n", 163 | " vocab = Vocabulary(use_unks=True,\n", 164 | " use_start_end=True,\n", 165 | " start_token=constants.WORD_START_TOKEN,\n", 166 | " end_token=constants.WORD_END_TOKEN)\n", 167 | " max_seq_length = 0\n", 168 | " for tweet in tweets:\n", 169 | " vocab.add_many(tweet)\n", 170 | " if len(tweet) > max_seq_length:\n", 171 | " max_seq_length = len(tweet)\n", 172 | " max_seq_length = max_seq_length + 2\n", 173 | " return cls(vocab, max_seq_length)\n", 174 | "\n", 175 | " def convert_dataset(self, tweets):\n", 176 | "\n", 177 | " num_data = len(tweets)\n", 178 | " \n", 179 | " x_sequences = np.zeros((num_data, self.max_seq_length), dtype=np.int64)\n", 180 | " y_targets = np.zeros((num_data, self.max_seq_length), dtype=np.int64)\n", 181 | "\n", 182 | " for index, tweet in enumerate(tweets):\n", 183 | " converted = list(self.word_vocab.map(tweet, include_start_end=True))\n", 184 | " x_version = converted[:-1]\n", 185 | " y_version = converted[1:]\n", 186 | " \n", 187 | " x_sequences[index, :len(x_version)] = x_version\n", 188 | " y_targets[index, :len(y_version)] = y_version\n", 189 | " \n", 190 | "\n", 191 | " return VectorizedSingleIODataset(x_sequences, y_targets)\n", 192 | "\n", 193 | "class VectorizedSingleIODataset(Dataset):\n", 194 | " def __init__(self, x_input, y_target):\n", 195 | " self.x_input = x_input\n", 196 | " self.y_target = y_target\n", 197 | "\n", 198 | " def __len__(self):\n", 199 | " return len(self.x_input)\n", 200 | "\n", 201 | " def __getitem__(self, index):\n", 202 | " return {'x_input': self.x_input[index],\n", 203 | " 'y_target': self.y_target[index],\n", 204 | " 'x_lengths': len(self.x_input[index].nonzero()[0])}\n", 205 | " \n", 206 | "class DataServer(object):\n", 207 | " def __init__(self, vectorized_data):\n", 208 | " self.vectorized_data = vectorized_data\n", 209 | " self.gpu_mode = False\n", 210 | " self.volatile_mode = False\n", 211 | " \n", 212 | " def serve_batches(self, batch_size, num_batches=-1, num_workers=0):\n", 213 | " datagen = DataLoader(self.vectorized_data, batch_size=batch_size, \n", 214 | " shuffle=True, num_workers=num_workers)\n", 215 | " for batch_index, batch in enumerate(datagen):\n", 216 | " out = {}\n", 217 | " for key, val in batch.items():\n", 218 | " if not isinstance(val, Variable):\n", 219 | " val = Variable(val)\n", 220 | " if self.gpu_mode:\n", 221 | " val = val.cuda()\n", 222 | " if self.volatile_mode:\n", 223 | " val = val.volatile() \n", 224 | " out[key] = val\n", 225 | " \n", 226 | " yield out\n", 227 | " if num_batches > 0 and batch_index > num_batches:\n", 228 | " break\n", 229 | " \n", 230 | " def enable_gpu_mode(self):\n", 231 | " self.gpu_mode = True\n", 232 | " \n", 233 | " def disable_gpu_mode(self):\n", 234 | " self.gpu_mode = False" 235 | ] 236 | }, 237 | { 238 | "cell_type": "markdown", 239 | "metadata": {}, 240 | "source": [ 241 | "## Build Model" 242 | ] 243 | }, 244 | { 245 | "cell_type": "code", 246 | "execution_count": 325, 247 | "metadata": { 248 | "code_folding": [], 249 | "collapsed": true 250 | }, 251 | "outputs": [], 252 | "source": [ 253 | "class TweetLanguageModel(nn.Module):\n", 254 | " def __init__(self, embedding_dim, hidden_dim, input_vocab_size, num_rnn_layers=1):\n", 255 | " super(TweetLanguageModel, self).__init__()\n", 256 | " \n", 257 | " self.emb = nn.Embedding(input_vocab_size, embedding_dim, padding_idx=0)\n", 258 | " self.rnn = nn.GRU(embedding_dim, hidden_dim, batch_first=True, num_layers=num_rnn_layers)\n", 259 | " self.affine = nn.Linear(hidden_dim, input_vocab_size)\n", 260 | " \n", 261 | " \n", 262 | " self._cached_config = dict(embedding_dim=embedding_dim, \n", 263 | " hidden_dim=hidden_dim,\n", 264 | " input_vocab_size=input_vocab_size,\n", 265 | " num_rnn_layers=num_rnn_layers)\n", 266 | " \n", 267 | " def hidden_from_x(self, x, zero_out=True):\n", 268 | " hidden = x.data.new(self.rnn.num_layers, \n", 269 | " x.size(0), \n", 270 | " self.rnn.hidden_size)\n", 271 | " if zero_out:\n", 272 | " hidden = hidden.zero_()\n", 273 | " else:\n", 274 | " stdv = 1. / math.sqrt(self.hidden.size(2))\n", 275 | " hidden.data.uniform_(-stdv, stdv)\n", 276 | " \n", 277 | " return Variable(hidden)\n", 278 | " \n", 279 | " def forward(self, x_in, apply_softmax=False, hidden=None, return_with_hidden=False):\n", 280 | " x_embedded = self.emb(x_in)\n", 281 | "\n", 282 | " if hidden is None:\n", 283 | " hidden = self.hidden_from_x(x_embedded, True)\n", 284 | " \n", 285 | " x_post_rnn, hidden = self.rnn(x_embedded, hidden)\n", 286 | " x_post_rnn = x_post_rnn.contiguous()\n", 287 | " x_reshaped = x_post_rnn.view(-1, x_post_rnn.size(2))\n", 288 | " \n", 289 | " x_out = self.affine(x_reshaped)\n", 290 | " \n", 291 | " if apply_softmax:\n", 292 | " x_out = F.softmax(x_out)\n", 293 | " \n", 294 | " if return_with_hidden:\n", 295 | " return x_out, hidden\n", 296 | " else:\n", 297 | " return x_out\n", 298 | " \n", 299 | " def save(self, json_filename, model_state_filename):\n", 300 | " torch.save(self.state_dict(), model_state_filename)\n", 301 | " with open(json_filename, \"w\") as fp:\n", 302 | " json.dump(self._cached_config, fp)\n", 303 | " \n", 304 | " @classmethod\n", 305 | " def load(cls, json_filename, model_state_filename):\n", 306 | " with open(json_filename, \"r\") as fp:\n", 307 | " config = json.load(fp)\n", 308 | " out = cls(**config)\n", 309 | " state_dict = torch.load(model_state_filename)\n", 310 | " out.load_state_dict(state_dict)\n", 311 | " return out" 312 | ] 313 | }, 314 | { 315 | "cell_type": "code", 316 | "execution_count": 195, 317 | "metadata": { 318 | "collapsed": true 319 | }, 320 | "outputs": [], 321 | "source": [ 322 | "def accuracy(yhat, ytrue):\n", 323 | " pred = yhat.max(1)[1].type_as(ytrue)\n", 324 | " correct = pred.eq(ytrue)\n", 325 | " if not hasattr(correct, 'mean'):\n", 326 | " correct = correct.cpu()\n", 327 | " return correct.float().mean()" 328 | ] 329 | }, 330 | { 331 | "cell_type": "code", 332 | "execution_count": 365, 333 | "metadata": {}, 334 | "outputs": [], 335 | "source": [ 336 | "def variable_from_tokens(vectorizer, tokens):\n", 337 | " out = np.array(list(vectorizer.word_vocab.map(tokens)))\n", 338 | " return Variable(torch.LongTensor(out)).view(1,-1)\n", 339 | "\n", 340 | "def greedy_sample_from(model, vectorizer, temperature=1.0, seed_text=None, use_cuda=True, hx=None, n_length=10, burn=0):\n", 341 | " if seed_text is None:\n", 342 | " seed_text = [vectorizer.word_vocab.start_token]\n", 343 | " in_var = variable_from_tokens(vectorizer, seed_text)\n", 344 | " if use_cuda:\n", 345 | " in_var = in_var.cuda()\n", 346 | " for _ in range(n_length):\n", 347 | " if in_var.data.min() < 0:\n", 348 | " continue\n", 349 | " y_pred, hx = model(in_var, apply_softmax=False, hidden=hx, return_with_hidden=True)\n", 350 | " y_pred = torch.nn.functional.softmax(y_pred/temperature)\n", 351 | " in_var = torch.multinomial(y_pred)\n", 352 | " seed_text.append(vectorizer.word_vocab.lookup(in_var.data.cpu().numpy().ravel()[0]))\n", 353 | " if seed_text[-1] == constants.WORD_END_TOKEN:\n", 354 | " break\n", 355 | " return seed_text" 356 | ] 357 | }, 358 | { 359 | "cell_type": "code", 360 | "execution_count": 335, 361 | "metadata": { 362 | "code_folding": [], 363 | "collapsed": true 364 | }, 365 | "outputs": [], 366 | "source": [ 367 | "class CrossEntTrainer(object):\n", 368 | " def __init__(self, model, optimizer_class, gpu_mode=False, learning_rate=0.001):\n", 369 | " self.model = model\n", 370 | " if isinstance(optimizer_class, six.string_types):\n", 371 | " if not hasattr(torch.optim, optimizer_class):\n", 372 | " raise Exception(\"{} is not a valid optimizer\".format(optimizer_class))\n", 373 | " optimizer_class = getattr(torch.optim, optimizer_class)\n", 374 | " self.optimizer = optimizer_class(model.parameters(), lr=learning_rate, weight_decay=1e-7)\n", 375 | " self.loss = nn.CrossEntropyLoss()\n", 376 | " self.gpu_mode = gpu_mode\n", 377 | " self.loss_history = []\n", 378 | " self.accuracy_history = []\n", 379 | " \n", 380 | " def _reset_gradient(self):\n", 381 | " self.optimizer.zero_grad()\n", 382 | " self.model.zero_grad()\n", 383 | " \n", 384 | " def train(self, data_server, batch_size=64, num_batches=-1, num_epochs=1):\n", 385 | " \n", 386 | " if self.gpu_mode:\n", 387 | " self.model.cuda()\n", 388 | " data_server.enable_gpu_mode()\n", 389 | " else:\n", 390 | " self.model.cpu()\n", 391 | " data_server.disable_gpu_mode()\n", 392 | " \n", 393 | " epoch_bar = tqdm_notebook(total=num_epochs, unit=\" epochs\", position=0)\n", 394 | " batch_bar = tqdm_notebook(unit=\" batches\", position=1)\n", 395 | " \n", 396 | " for _ in range(num_epochs):\n", 397 | " for batch in data_server.serve_batches(batch_size, num_batches):\n", 398 | " \n", 399 | " self._reset_gradient()\n", 400 | " yhat = self.model(batch['x_input'])\n", 401 | " \n", 402 | " # best to reshape y_target to match the yhat\n", 403 | " yhat = yhat.view(-1, yhat.size(1))\n", 404 | " y_target = batch['y_target'].view(-1)\n", 405 | " computed_loss = self.loss(yhat, y_target)\n", 406 | " computed_loss.backward()\n", 407 | " self.optimizer.step()\n", 408 | "\n", 409 | " computed_loss_ = float(computed_loss.data.cpu().numpy()[0])\n", 410 | " computed_accuracy = accuracy(yhat, y_target).data.cpu().numpy()[0]\n", 411 | " \n", 412 | " batch_bar.set_postfix(loss=computed_loss_, accuracy=computed_accuracy)\n", 413 | " self.loss_history.append(computed_loss_)\n", 414 | " self.accuracy_history.append(computed_accuracy)\n", 415 | " batch_bar.update(1)\n", 416 | " \n", 417 | " sampled_text = greedy_sample_from(model, vectorizer, use_cuda=self.gpu_mode, n_length=10)\n", 418 | " epoch_bar.set_postfix(sample=sampled_text)\n", 419 | " epoch_bar.update(1)\n", 420 | "\n", 421 | " batch_bar.clear()" 422 | ] 423 | }, 424 | { 425 | "cell_type": "code", 426 | "execution_count": 374, 427 | "metadata": {}, 428 | "outputs": [ 429 | { 430 | "data": { 431 | "text/plain": [ 432 | "'wendy realdonaldtrump bradcooney1wise the mr trump the art of the deal is doing is why _'" 433 | ] 434 | }, 435 | "execution_count": 374, 436 | "metadata": {}, 437 | "output_type": "execute_result" 438 | } 439 | ], 440 | "source": [ 441 | "\" \".join(greedy_sample_from(model, vectorizer, temperature=0.9, n_length=30)[1:])" 442 | ] 443 | }, 444 | { 445 | "cell_type": "code", 446 | "execution_count": null, 447 | "metadata": { 448 | "collapsed": true 449 | }, 450 | "outputs": [], 451 | "source": [ 452 | "text_dataset = TrumpTweetDataset(constants.TRUMP_TWEET_CSV)\n", 453 | "tweet_sequences = text_dataset.get_data()\n", 454 | "\n", 455 | "n_train = int(constants.TRAIN_PROPORTION * len(tweet_sequences))\n", 456 | "train_tweets = tweet_sequences[:n_train]\n", 457 | "eval_tweets = tweet_sequences[n_train:]" 458 | ] 459 | }, 460 | { 461 | "cell_type": "code", 462 | "execution_count": 327, 463 | "metadata": {}, 464 | "outputs": [ 465 | { 466 | "name": "stdout", 467 | "output_type": "stream", 468 | "text": [ 469 | "Sucessfully Loaded!\n" 470 | ] 471 | } 472 | ], 473 | "source": [ 474 | "model_parameters = dict(embedding_dim=64, \n", 475 | " hidden_dim=256,\n", 476 | " num_rnn_layers=1)\n", 477 | "\n", 478 | "vectorizer_name = \"trump_twitter_{hidden_dim}h_{embedding_dim}e.vectorizer\".format(**model_parameters)\n", 479 | "model_json_name = \"trump_twitter_{hidden_dim}h_{embedding_dim}e.json\".format(**model_parameters)\n", 480 | "model_state_name = \"trump_twitter_{hidden_dim}h_{embedding_dim}e.state\".format(**model_parameters)\n", 481 | "\n", 482 | "import os\n", 483 | "if os.path.exists(vectorizer_name):\n", 484 | " vectorizer = TrumpTweetVectorizer.load(vectorizer_name)\n", 485 | " model = TweetLanguageModel.load(json_filename=model_json_name, model_state_filename=model_state_name)\n", 486 | " print(\"Sucessfully Loaded!\")\n", 487 | "else:\n", 488 | " vectorizer = TrumpTweetVectorizer.induce_from_text(tweet_sequences)\n", 489 | " model = TweetLanguageModel(input_vocab_size=len(vectorizer.word_vocab), **model_parameters)\n", 490 | " print(\"Successfully Created!\")" 491 | ] 492 | }, 493 | { 494 | "cell_type": "code", 495 | "execution_count": 328, 496 | "metadata": { 497 | "collapsed": true 498 | }, 499 | "outputs": [], 500 | "source": [ 501 | "vectorized_train_data = vectorizer.convert_dataset(train_tweets)\n", 502 | "vectorized_eval_data = vectorizer.convert_dataset(eval_tweets)\n", 503 | "\n", 504 | "train_server = DataServer(vectorized_train_data)\n", 505 | "eval_server = DataServer(vectorized_eval_data)" 506 | ] 507 | }, 508 | { 509 | "cell_type": "code", 510 | "execution_count": 329, 511 | "metadata": { 512 | "collapsed": true 513 | }, 514 | "outputs": [], 515 | "source": [ 516 | "trainer = CrossEntTrainer(model, \"Adam\", True, learning_rate=0.001)" 517 | ] 518 | }, 519 | { 520 | "cell_type": "code", 521 | "execution_count": 330, 522 | "metadata": {}, 523 | "outputs": [ 524 | { 525 | "data": { 526 | "application/vnd.jupyter.widget-view+json": { 527 | "model_id": "af6b9bff5c0245398e14982eb3934a2e" 528 | } 529 | }, 530 | "metadata": {}, 531 | "output_type": "display_data" 532 | }, 533 | { 534 | "data": { 535 | "application/vnd.jupyter.widget-view+json": { 536 | "model_id": "27b439ad0fb8417485a060c77037de0a" 537 | } 538 | }, 539 | "metadata": {}, 540 | "output_type": "display_data" 541 | }, 542 | { 543 | "name": "stdout", 544 | "output_type": "stream", 545 | "text": [ 546 | "\n" 547 | ] 548 | } 549 | ], 550 | "source": [ 551 | "try:\n", 552 | " trainer.train(train_server, num_epochs=3)\n", 553 | "except KeyboardInterrupt:\n", 554 | " pass" 555 | ] 556 | }, 557 | { 558 | "cell_type": "code", 559 | "execution_count": 331, 560 | "metadata": { 561 | "collapsed": true 562 | }, 563 | "outputs": [], 564 | "source": [ 565 | "vectorizer.save(vectorizer_name)\n", 566 | "model.save(json_filename=model_json_name, model_state_filename=model_state_name)" 567 | ] 568 | }, 569 | { 570 | "cell_type": "code", 571 | "execution_count": 332, 572 | "metadata": {}, 573 | "outputs": [ 574 | { 575 | "data": { 576 | "text/plain": [ 577 | "" 578 | ] 579 | }, 580 | "execution_count": 332, 581 | "metadata": {}, 582 | "output_type": "execute_result" 583 | }, 584 | { 585 | "data": { 586 | "image/png": "iVBORw0KGgoAAAANSUhEUgAABUcAAAE4CAYAAAB45EL9AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzs3XtY1GX+//HXhAgKiqGYpuAxo8gU8sAVIuuh1LWtdClL\n7bDhadEOpqiVZqhRYrNqkCWru1i6rl+tjNB1k7DEcypfNRVcLYS0EEURJgiE+f3Bl/k1C8qg5jjM\n83FdXMPcn/v+zPvjdZXw8j4YzGazWQAAAAAAAADgZG6xdwEAAAAAAAAAYA+EowAAAAAAAACcEuEo\nAAAAAAAAAKdEOAoAAAAAAADAKRGOAgAAAAAAAHBKhKMAAAAAAAAAnFIDexdwvRQUFNi7BAAAAAAA\nAMBpeHl52buEa8bMUQAAAAAAAABOiXAUAAAAAAAAgFMiHAUAAAAAAADglAhHAQAAAAAAADglwlEA\nAAAAAAAATolwFAAAAAAAAIBTIhwFAAAAAAAA4JQIRwEAAAAAAAA4JcJRAAAAAAAAAE6JcBQAAAAA\nAACAUyIcBQAAAAAAAOCUCEcBAAAAAAAAOCXCUQAAAAAAAABOiXAUAAAAAAAAgFMiHAUAAADg1M6k\n52ivcbPOpOfYuxQAAHCDGcxms9neRVwPBQUF9i4BAAAAgIM5k56jrdPWqeSsSe4tPNQ3NlwtA33t\nXRYAAA7By8vL3iVcM2aOAgAAAHBa2akZKjlrkiSVnDUpOzXDzhUBAIAbiXAUAAAAgNPy6+8v9xYe\nkiT3Fh7yG+Bv54oAAMCNxLJ6AAAAAE7tTHqOslMz5DfAXy27s6QeAABb1Ydl9YSjAAAAAAAAAOqs\nPoSjLKsHAAAAAAAA4JQIRwEAAAAAAAA4JcJRAAAAAAAAAE6JcBQAAAAAAACAUyIcBQAAAAAAAOCU\nCEcBAAAAAAAA3DCJiYkaMGCA7rnnHg0ZMkTJyclX7F9YWKhZs2apV69eCgwMVEREhHJycqz6bNy4\nUcOGDVNgYKD69++vmJgYFRUV1VoL4SgAAAAAAACAG2LVqlUyGo2aOHGikpKSNGLECEVFRSktLe2y\nYyIjI5WVlaUVK1boH//4h0wmk8aPH6+KigpJlcHoyy+/rCFDhmj9+vV67bXX9Pnnn2vOnDm11mMw\nm83m6/Z0dlRQUGDvEgAAAAAAAACn4eXlVaf+ZrNZYWFhGjRokF577TVL+8SJE1VQUKCVK1dWG5OW\nlqZJkyZpy5Yt8vb2liTl5OTo8OHD6tevn9zc3PTHP/5Rt912m5YsWWIZt3TpUsXFxel///d/1aBB\ng8vWdPkrAAAAAAAAAHCdfPfdd8rNzVWfPn2s2u+//37NmzdPJSUlcnd3t7qWmpqq3r17W4JRSfL1\n9ZWvr6/l/YoVK1ReXm41rnnz5iorK1NRUZGaNWt22ZpYVg8AAAAAAADgN3fy5ElJUps2bazafX19\nVVFRUW0fUUk6duyY2rdvr4SEBD344IMKDg7W5MmTlZ+fb+nj6elZbRbrli1b1K5duysGoxLhKAAA\nAAAAAIAbwGQySZIaNWpk1d64cWNJqvEApfz8fG3atEmZmZkyGo2KiYnRgQMHNHr0aF26dKnGz/n0\n00+VkpKiF198sdaabFpWX1paqoSEBH3++efKzc1VmzZtNHLkSI0aNeqyY7Zv365FixYpIyNDTZs2\n1bBhw/TSSy9Z1vjPmDFDn376abVxd9xxR60nVAEAAAAAAACo/y5duiQ3NzfFxsbKxcVFUmW4+uyz\nz2r79u0KCwuz6v/ZZ5/ptdde03PPPaehQ4fWen+bwtGYmBht3LhR0dHRCggI0JYtWzR37ly5ubkp\nPDy8Wv+DBw9q3LhxGjp0qN5880399NNPmjp1qsrLyzV9+nRLv8DAQMXFxVkXdIUNUgEAAAAAAAA4\npiZNmkiqPkO06n3V9V/z8PCQr6+vJRiVpKCgIBkMBmVmZlqFo6tXr1Z0dLTGjRunl19+2aaaal1W\nX1hYqLVr1yoyMlJDhgyRn5+fnnnmGYWEhCgpKanGMR999JFatGihmJgYdenSRX379lVUVJRWrlxp\n9fCurq7y8fGx+rr11lttKhwAAAAAAACA42jXrp0kVdtbNCsrS66urvLz86txzIULF6zaKioqZDab\n5eHhYWlLTk5WdHS0pk+fbnMwKtkQjnp6eiotLU0jRoywam/evLnOnz9f45gTJ06oW7duVrNA+/Xr\np9LSUu3du9fm4gAAAAAAAADUDx06dJCvr6+2bt1q1f71118rODhYDRs2rDYmNDRUBw4csDqAKT09\nXZJ05513SpIyMjL0yiuvaPLkyfrTn/5Up5pqDUcNBoO8vb2tNkotLi7Wrl271K1btxrHlJeXV1se\nf+utt8pgMCg7O7tOBQIAAAAAAACoHyZNmqRPPvlE69ev16lTp5SQkKDdu3crMjJSkmQ0GhUREWHp\n//DDD6t169Z68cUX9Z///Ee7d+9WdHS0goKC1KNHD0nS/Pnz1bFjRw0fPlx5eXlWX6WlpVes56o2\n+JwzZ44KCws1bty4Gq936NBBhw8fltlslsFgkCRlZmbKbDZbTqWSpHPnzmnq1Knav3+/SktL1adP\nH02ZMkU+Pj5XUxYAAAAAAACAm9ijjz4qk8mkuLg45ebmqkOHDoqPj1dQUJAkKS8vz2pyZcOGDZWY\nmKh58+bp8ccf1y233KKBAwdq5syZlj47duyQJPXp06fa53344Yfq3bv3ZesxmM1ms63Fm81mvfHG\nG1q3bp0WLVqkBx54oMZ+u3fv1tNPP63nn39eY8aM0dmzZxUVFaWMjAxFRkZq7Nixmjdvnnbs2KHx\n48frrrvu0vfff68FCxbI1dVV69evl5ubm61lSZIKCgrq1B8AAAAAAADA1fPy8rJ3CdfM5nC0vLxc\nr7zyijZt2qS//OUvGjhw4BX7//Of/9T8+fNVUlIib29vvfrqq3rrrbc0efJk/fGPf6xxzL59+zRy\n5EjFxsbqkUceqdODEI4CAAAAAAAAN059CEdtXlY/Z84cpaSkaPny5erZs2et/Z944gkNGzZM58+f\nl4+Pj0wmk6ZMmWLZKLUmd911l6TK6bMAAAAAAAAA8Fuq9UAmSVqzZo0+/vhjvf/++zYFo5mZmUpK\nSpKbm5tatWolFxcXbdiwQa1atVJAQIDKysr0+uuv68svv7Qad+TIEUlS+/bt6/4kAAAAAAAAAFAH\ntc4cNZlMMhqNCg8PV8eOHavN6vTx8ZHRaNSRI0e0fPlySdIPP/ygqKgo5efna+DAgfr2229lNBo1\na9YsGQwGubq6qqCgQDNnzlRZWZkCAgKUlZWlefPmqUuXLgoLC/ttnhYAAAAAAAAA/k+te47u2bNH\nTz311GWvZ2ZmasaMGdq3b582b95saV+5cqUSExP1008/qW3btho3bpyGDx9uuf7zzz8rPj5e//73\nv5Wbm6umTZsqLCxMU6dOVfPmzev8IOw5CgAAAAAAANw49WHP0TqdVn8zIxwFAACAUzp+SEpPkwJD\npc5d7V0NAABwIvUhHLVpz1EAAAAAN6Hjh6Sl0VLKusrX44fsXREAAIBDIRwFAAAAHFV6mnQxv/L7\ni/mV7wEAAGAzwlEAAADAUQWGSk29K79v6i0F9bVvPQAAAA6GPUcBAAAAR1a152hQX6nTPfauBgAA\nOJH6sOco4SgAAAAAAACAOqsP4SjL6gEAAAAAAAA4JcJRAAAAAAAAAE6JcBQAAAAAAACAUyIcBQAA\nAAAAAOCUCEcBAAAAAAAAOCXCUQAAAAAAAABOiXAUAAAAAAAAgFMiHAUAAAAAAADglAhHAQAAAAAA\nADglwlEAAAAAAAAATolwFAAAAAAAAIBTIhwFAAC4SmfSc7TXuFln0nPsXQoAAACAq0A4Cudy/JC0\ndknlKwAA1+BMeo62TlunjJW7tXXaOgJSAAAAwAERjsJ5HD8kLY2WUtZVvhKQAgCuQXZqhkrOmiRJ\nJWdNyk7NsHNFAAAAAOqKcBTOIz1Nuphf+f3F/Mr3AABcJb/+/nJv4SFJcm/hIb8B/nauCAAAAEBd\nNbB3AcANExgq7UmtDEabektBfe1dkWM6fqgyWA4MlTp3tXc1AGA3LQN91Tc2XNmpGfIb4K+W3X3t\nXRIAAACAOjKYzWazvYu4HgoKCuxdAhxBVbAX1FfqdI+9q3E8VVsTVAXM42cTkAIAAAAA4KS8vLzs\nXcI1Y+YonEvnroR516KmrQn48wQAAAAAAA6KPUcB2C4wtHLGqMTWBAAAAAAAwOGxrB5A3bA1AQAA\nAAAAUP1YVm/TzNHS0lLFx8dr0KBB6t69u4YOHapVq1Zdccz27dv12GOPqWvXrgoJCdE777yjS5cu\nWfXZtm2bhg8frq5du6pPnz5auHChKioqrv5pAPz2OneVHoskGAUAAAAAAA7Ppj1HY2JitHHjRkVH\nRysgIEBbtmzR3Llz5ebmpvDw8Gr9Dx48qHHjxmno0KF688039dNPP2nq1KkqLy/X9OnTJUlHjx7V\nhAkT9PTTT2vBggX6/vvvNXPmTEnS5MmTr+MjAgAAAAAAAEB1tS6rLywsVHBwsKKiovTss89a2iMi\nIlRWVqYPP/yw2pioqCjt2bNHX375pRo0qMxf165dqzlz5mjnzp3y9PTUlClTdPz4cX322WeWcStW\nrNDChQu1Y8cONW7cuE4PwrJ6AAAAAAAA4MZximX1np6eSktL04gRI6zamzdvrvPnz9c45sSJE+rW\nrZslGJWkfv36qbS0VHv37pUk7dy5UyEhIVbjQkJCVFxcrP3799f5QQAAAAAAAACgLmoNRw0Gg7y9\nvdWoUSNLW3FxsXbt2qVu3brVOKa8vNwqGJWkW2+9VQaDQdnZ2SoqKtK5c+fUpk0bqz6+vr6SpJMn\nT9b5QQAAAAAAAACgLmw6kOm/zZkzR4WFhRo3blyN1zt06KDDhw/r1yv2MzMzZTabZTKZZDKZJKna\n0nk3Nze5uLioqKjoasoCAAAAAAAAAJvVKRw1m82aPXu2kpKSFBsbKz8/vxr7Pfnkk8rKytJ7772n\nkpIS/fDDD5o7d64aN25cbUYpAAAAAAAAANiDzeFo1Unzn376qRYvXqwHHnjgsn179+6t6OhoLV++\nXIGBgRoxYoRGjx4tDw8PeXt7y9PTU5KqzRD9+eefVV5ebrkOAAAAAAAAAL8Vm6dxzpkzRykpKVq+\nfLl69uxZa/8nnnhCw4YN0/nz5+Xj4yOTyaQpU6bozjvvlIeHh3x8fJSdnW01pmqv0U6dOtXxMQAA\nAAAAAACgbmyaObpmzRp9/PHHev/9920KRjMzM5WUlCQ3Nze1atVKLi4u2rBhg1q1aqWAgABJUmho\nqLZt22a1L+lXX32lJk2aKCgo6CofBwAAAAAAAMDNLDExUQMGDNA999yjIUOGKDk5+Yr9CwsLNWvW\nLPXq1UuBgYGKiIhQTk5OtX6bN29Wjx499NRTT9lcS63hqMlkktFoVHh4uDp27Ki8vDyrL0kyGo2K\niIiwjPnhhx8UFRWlxMRE/fDDD9q0aZOMRqMmT54sg8EgSRozZoxOnz6t+fPnKycnRykpKVq2bJnG\njx+vhg0b2vwAAAAAAAAAABzDqlWrZDQaNXHiRCUlJWnEiBGKiopSWlraZcdERkYqKytLK1as0D/+\n8Q+ZTCaNHz9eFRUVkqRLly5p/vz5mjZtWrUD4GtjMP966mYN9uzZc8W0NTMzUzNmzNC+ffu0efNm\nS/vKlSuVmJion376SW3bttW4ceM0fPjwavd+++23dezYMTVv3lxPPvmkJkyYUKcHqFJQUHBV4wAA\nAAAAAADUnZeXV536m81mhYWFadCgQXrttdcs7RMnTlRBQYFWrlxZbUxaWpomTZqkLVu2yNvbW5KU\nk5Ojw4cPq1+/fnJzc9PRo0f14osv6r333tPbb7+t0tJSffTRRzbVVGs46igIRwEAAAAAAIAbp67h\n6IkTJ/T73/9eCQkJCgsLs7SvWrVK8+bNU3p6utzd3a3GREdH69SpU0pISLjsfS9cuKBbbrlFTZs2\nVURERJ3CUZtPqwcAAAAAAACAq1V1GHubNm2s2n19fVVRUVHjPqLHjh1T+/btlZCQoAcffFDBwcGa\nPHmy8vPzLX2aNWumpk2bXlVNhKMAAAAAAAAAfnMmk0mS1KhRI6v2qn1Ci4qKqo3Jz8/Xpk2blJmZ\nKaPRqJiYGB04cECjR4/WpUuXrrmmBtd8BwAAAAAAAAD4DVy6dElubm6KjY2Vi4uLpMpw9dlnn9X2\n7dutludfDWaOAgAAAAAAAPjNNWnSRFL1GaJV76uu/5qHh4f8/f0twagkBQUFyWAwKDMz85prIhwF\nAAAAAAAA8Jtr166dJFXbWzQrK0uurq7y8/OrccyFCxes2ioqKmQ2m+Xh4XHNNRGOAgAAAAAAAPjN\ndejQQb6+vtq6datV+9dff63g4GA1bNiw2pjQ0FAdOHDA6gCm9PR0SdKdd955zTURjgIAAAAAAAC4\nISZNmqRPPvlE69ev16lTp5SQkKDdu3crMjJSkmQ0GhUREWHp//DDD6t169Z68cUX9Z///Ee7d+9W\ndHS0goKC1KNHD0mVBz3l5eUpLy9PZWVlKisrs7wvKSm5Yj0cyAQAAAAAAADghnj00UdlMpkUFxen\n3NxcdejQQfHx8QoKCpIk5eXlKTs729K/YcOGSkxM1Lx58/T444/rlltu0cCBAzVz5kxLn7/97W+K\nj4+3+pw+ffpIkt566y0NHz78svUYzGaz+Xo+oL0UFBTYuwQAAAAAAK7KmfQcZadmyK+/v1oG+tq7\nHACwiZeXl71LuGaEowAAAAAA2NGZ9BxtnbZOJWdNcm/hob6x4QSkABxCfQhH2XMUAAAAAAA7yk7N\nUMlZkySp5KxJ2akZdq4IAJwH4SgAAAAAAHbk199f7i08JEnuLTzkN8DfzhUBgPNgWT0AAAAAAHZm\n2XN0gL9admdJPQDHUB+W1ROOAgAAAAAAAKiz+hCOsqweAAAAAAAAgFMiHAUAAAAAAA7vTHqO9ho3\n60x6jr1LAeBAWFYPAAAAAAAc2pn0HG2dtk4lZ01yb+GhvrHhahnI3q3Ab41l9bixjh+S1i6pfAUA\nAAAAAJKk7NQMlZw1SZJKzpqUnZph54oAOArCUUdx/JC0NFpKWVf5SkAKAAAAAIAkya+/v9xbeEiS\n3Ft4yG+Av50rAuAoGti7ANgoPU26mF/5/cX8yvedu9q3JgAAAAAAbgItA33VNzZc2akZ8hvgr5bd\nWVIPwDaEo44iMFTak1oZjDb1loL62rsiAAAAAABuGi0DfdlnFECdcSCTIzl+qHLGaFBfqdM99q4G\nAAAAAAAATqw+HMhEOAoAAAAAAACgzupDOMqBTAAAAAAAAACcEuEoAAAAAAAAAKdEOAoAAAAAAADA\nKdl0Wn1paakSEhL0+eefKzc3V23atNHIkSM1atSoy47ZuXOn4uLidOzYMVVUVCg4OFjTpk1T+/bt\nJUkzZszQp59+Wm3cHXfcoeTk5Kt7GgAAAAAAAACwkU3haExMjDZu3Kjo6GgFBARoy5Ytmjt3rtzc\n3BQeHl6t/7fffqsxY8Zo1KhRmjdvnoqLizV//nz96U9/UnJysjw8PCRJgYGBiouLsy6ogU0lAQAA\nAAAAAMA1qXVZfWFhodauXavIyEgNGTJEfn5+euaZZxQSEqKkpKQax2zYsEGenp6aMWOGOnbsqICA\nAL366qs6ffq09u7da+nn6uoqHx8fq69bb731+j0dAAAAAAAAAFxGrdM0PT09lZaWpkaNGlm1N2/e\nXEePHq1xjMFgsHxVcXV1tVwDAAAAAAAAAHurdeaowWCQt7e3VThaXFysXbt2qVu3bjWOGT58uEpK\nSrR8+XKVlJSouLhYS5YsUfv27RUcHHz9qgcAAAAAAACAq3RVG3zOmTNHhYWFGjduXI3XO3furPfe\ne08vvPCCjEajJKl9+/ZatmyZGjZsaOl37tw5TZ06Vfv371dpaan69OmjKVOmyMfH52rKAgAAAAAA\nAACb1Tpz9NfMZrNmz56tpKQkxcbGys/Pr8Z+x44d08svv6xhw4ZpzZo1SkxM1O23364JEyaoqKhI\nUuVyfUkKDQ3VBx98oFmzZmnv3r16+umn9csvv1zjYwEAAAAAAKDOjh+S1i6pfAWcgMFsNptt6Vhe\nXq5XXnlFmzZt0l/+8hcNHDjwsn1feOEFnTp1Sh9//LGlraioSCEhIZo8ebKeffbZGsft27dPI0eO\nVGxsrB555JE6PUhBQUGd+gMAAAAAAOBXjh+SlkZLF/Olpt7S+NlS5672rgo3MS8vL3uXcM1snjk6\nZ84cpaSkaPny5VcMRiXpxIkT6tixo1Wbp6enmjdvrpMnT1523F133SVJysvLs7UsAAAAAAAAXA/p\naZXBqFT5mp5m33pukAkTJmjixInX74bMvnUoNoWja9as0ccff6z3339fPXv2rLV/q1atlJWVZdVW\nWFioM2fOqFWrViorK9Prr7+uL7/80qrPkSNHJFXuTwoAAH5j/NAGAACAXwsMrZwxKlW+BvW1bz2O\nqGr2bcq6yld+1r7p1Xogk8lkktFoVHh4uDp27FhtVqePj4+MRqOOHDmi5cuXS5JGjx6tCRMmaOHC\nhXr44YdVWlqq+Ph4NWjQQIMHD5arq6sKCgo0c+ZMlZWVKSAgQFlZWZo3b566dOmisLCw3+ZpAQBA\npV8vmdqTypIpAAAAVP48OH525YzRoL5Sp3vsXZHjqWn2LT9n39RqDUcPHz6sgoICrV69WqtXr652\nPTMzU3l5ecrOzra09evXT/Hx8YqPj9fy5cvl6uqqe++9V4mJiWrXrp0k6a233lJ8fLwWLFig3Nxc\nNW3aVGFhYZo6dapcXV2v4yMCAIBq+KENAAAANenclZ8L/8uFCxf07rvvavv27SosLFTr1q0VHh6u\nJ5980tLnf/7nf7Ru3Tr9eOqUGqlc93ncoin+t6nF/82+tVz/8Uc1atRI9913n6ZMmaIWLVrY67Hw\nf2w+kOlmx4FMAOBczqTnKDs1Q379/dUy0Nfe5Tie/95sf8IbzAwAAACAU5owYYJcXFz03nvvVbtm\nNpsVERGhoqIiTZ8+Xbfddpu2b9+uRYsWafLkyXr88ce1a9cuvfTSS3r99dcVGBio8we/kfH9pXL3\nbqH3/rai+vXz52U0GuXu7l7jZzqS+nAgU60zRwEAuNmcSc/R1mnrVHLWpKxN36pvbDgBaV2xZAoA\nAACo1aFDh/Ttt98qLi5O9913nyRpxIgROnTokNauXavHH39cx44dU6NGjTRo0CC5uLiodeuHNf++\n+5WfX7lSq/r11po/f77lOuzL5tPqAQC4WWSnZqjkrEmSVHLWpOzUDDtX5KA6d5UeiyQYBQAAgN2d\nSc/RXuNmnUnPsXcpVo4ePSpJ6trVequBgIAAnTx5UiUlJerVq5fKyso0btw4ffbZZ/rxxx/VokUL\ndenSRZJqvQ77IhwFADgcv/7+atOmREG+x9SmTYn8BvjbuyQAAAAAV6lqZVjGyt3aOm3dTRWQFhUV\nyWAwqHHjxlbtVe+Liork7++vhIQE3XbbbVq0aJEeeeQRRURE6NixY5JU63XYF+EoAMDhtGxyQX3v\nOKy7W/+gvnccVkvPC/YuCcBVullniQAAgBvnZl4Z5unpKbPZLJPJZNVeFZp6enpKku6++27FxMRo\n8+bNiouLU0lJiSZPnqyKigqbrsN+CEcBAI4nPU0uP1cexOfyc0HlvpkAHM7NPEsEAADcOH79/eXe\nwkOS5N7C46ZaGXbXXXdJkg4ePGjVfujQIXXo0EHu7u46ePCgDh8+LElq0KCBevfurTFjxigvL08X\nL16s9Trsi3AUAOB4AkMrT1iXKl+D+tq3HgBX5WaeJQIAAG6cloG+6hsbLv/RvdV3Qbhadr/xh62W\nlZXp7Nmz1b78/f3VrVs3vfPOO9q7d69ycnL00Ucf6auvvtLo0aMlSWlpaZo2bZrS0tL0008/6dix\nY/rss8/UsWNHNWvWrNbrsC+D2Ww227uI66GgoMDeJQAAbqTjhzhpHXBwVTNHS86a5N7Cw26/DAEA\nAOc2YcIE7d+/v8ZrRqNR9957rxYtWqRt27bJZDLJ19dXo0aN0sMPPyxJunTpkpYsWaLNmzcrPz9f\nTZo0Uffu3TVp0iS1bdu21uuOzMvLy94lXDPCUQAAANjNmfQcZadmyG+AP8Eo4MAs/y3391fLQP5b\nBgBnQTh6EyEcBQAAAIAbr9os8NhwAlIAcBL1IRxlz1EAAAAAwFVj/2AAgCMjHAUAezh+SFq7pPIV\nAADAgd3Mp0wDAFAbwlEAuNGOH5KWRksp6ypfCUgBAIADuxlOmQYAOJbExEQNGDBA99xzj4YMGaLk\n5OQr9i8sLNSsWbPUq1cvBQYGKiIiQjk5OVZ9tm3bpuHDh6tr167q06ePFi5cqIqKilprIRwFgBst\nPU26mF/5/cX8yvcAAAAOrGWgr3pMeYBgFABQq1WrVsloNGrixIlKSkrSiBEjFBUVpbS0y/9uHBkZ\nqaysLK1YsUL/+Mc/ZDKZNH78eEv4efToUU2YMEHBwcFav3693njjDa1Zs0aLFy+utZ4G1+3JAAC2\nCQyV9qRWBqNNvaWgvvauCAAAAACA35zZbNbSpUv1xBNPaPjw4ZKkjh076ptvvtHSpUsVGhpabUxa\nWpoOHjyoLVu2yNvbW5K0YMECHT58WGVlZXJzc9OyZcvUqVMnTZs2TZLUqVMnnTp1SgsXLtT48ePV\nuHHjy9bEzFEAuNE6d5XGz5YGhksT3pA63WPvigAAAAAA+M199913ys3NVZ8+faza77//fu3bt08l\nJSXVxqSmpqp3796WYFSSfH19NXjwYLm5uUmSdu7cqZCQEKtxISEhKi4u1v79+69YE+EoANhD567S\nY5EEowAAAAAAp3Hy5ElJUps2bazafX19VVFRUW0fUUk6duyY2rdvr4SEBD344IMKDg7W5MmTlZ9f\nuV1dUVGRzp07V+M9f/2Zl0M4CgAAAAAAAOA3ZzKZJEmNGjWyaq9a9l5UVFRtTH5+vjZt2qTMzEwZ\njUbFxMR6mt8gAAAgAElEQVTowIEDGj16tC5dumS5538vnXdzc5OLi0uN9/w19hwFAAAAAAAAcFO6\ndOmS3NzcFBsbKxcXF0mV4eqzzz6r7du3y9/f/5ruz8xRAAAAAAAAQNJrr72mXr166ZNPPrF3KfVS\nkyZNJFWfIVr1vur6r3l4eMjf398SjEpSUFCQDAaDMjMz5enpWeM9f/75Z5WXl1uuXw7hKAAAAAAA\nAJxeYWGhtm7dqs6dO2vjxo32LqdeateunSRV21s0KytLrq6u8vPzq3HMhQsXrNoqKipkNpvl4eEh\nDw8P+fj4KDs726pP1V6jnTp1umJNhKMAAAAAgGtz/JC0dknlKwA4qC+++EJubm566aWXdPDgwRoP\nB8K16dChg3x9fbV161ar9q+//lrBwcFq2LBhtTGhoaE6cOCA5QAmSUpPT5ck3XnnnZY+27Ztk9ls\ntvT56quv1KRJEwUFBV2xJsJRAAAAAMDVO35IWhotpayrfCUgBeCgNmzYoIEDB6pnz55q3bp1tdmj\nx48f18SJE9W3b18NHTpU8+fPt1rKfaXrycnJ6tWrl3Jzcy39z549q169eik5OdmqT1pamn7/+99r\n9uzZlvtOnjxZ/fr1U2hoqEaNGqXU1FSr2n766SdFRUWpX79+evDBBzVz5kydPXtWBQUF6tOnj1at\nWmXV/9KlSxo4cKA++OCD6/cHaKNJkybpk08+0fr163Xq1CklJCRo9+7dioyMlCQZjUZFRERY+j/8\n8MNq3bq1XnzxRf3nP//R7t27FR0draCgIPXo0UOSNGbMGJ0+fVrz589XTk6OUlJStGzZMo0fP77G\nwPXXCEcBAABgP8w2Axxfepp08f9m81zMr3wPAA4mKytL3377rYYOHSqDwaAhQ4boX//6l2UmYn5+\nviZOnCgfHx/9/e9/V0xMjHbt2qV58+bZdL0u1qxZo0WLFumll15SRUWFXn75ZV26dEnLli3TP//5\nT4WFhem1117TiRMnJEm//PKLnn/+ef3yyy9aunSp3n33XeXk5Gjq1Kny8vLS7373O/3rX/+y+oxv\nvvlGFy9e1EMPPXSNf3J19+ijj+qVV15RXFycBg0apM8//1zx8fGWGZ55eXlWS+QbNmyoxMRENW3a\nVI8//rgiIyPVvXt3JSQkWPp06tRJf/3rX7Vnzx4NGTJEc+fO1dixYzV27Nha6+G0egAAANhH1Wyz\ni/nSnlRp/Gypc1d7VwWgrgJDK/8bvpgvNfWWgvrauyIAqLPk5GS1a9dOXbtW/izy0EMP6W9/+5vS\n09MVFBSkDRs2qKSkRDNmzJC7u7skaerUqUpNTdWlS5dqvV4XDz30kLp06SKpcm/NJUuWyNPTU82a\nNZMkPffcc/r73/+uvXv3qlOnTkpLS1N2drbi4uLUqlUrSdL06dO1Zs0aXbhwQY888ogmTpyoEydO\nWPbfTE1NVffu3dW2bdtr/8O7CqNGjdKoUaNqvPb2229Xa2vdurXee++9K97zag/SIhwFAACAfdQ0\n24xwFHA8nbtW/uNGelplMNrpHntXBMARHT9U+f+RwNAb/vNAeXm5/vWvf2n48OGWILNVq1bq1q2b\nNmzYoKCgIB09elTt27e3BJ+SFBISopCQEEmq9XpdVO2jKUm33HKLLl68qHfffVdHjx5VYWGhzGaz\nysvLVVBQYPlsLy8vSzAqSXfffbeio6MlST169FDbtm21ceNGPf/88yovL9fXX3+tSZMm1bm2+sim\nZfWlpaWKj4/XoEGD1L17dw0dOrTaXgX/befOnRo5cqR69OihoKAgRUZGKisry6rPtm3bNHz4cHXt\n2lV9+vTRwoULVVFRcdUPAwAAAAcSGFo5y0xithng6Dp3lR6LJBgFcHXsvHfxnj17lJeXp6VLl+r+\n+++3fB04cECpqakqKSlRYWGhGjVqdNl71Ha9Lho3bmz5/scff9SECRN0/vx5zZo1SytWrNDKlSvl\n6upq82cbDAb94Q9/0L///W9VVFRo3759+uWXXzRw4MDrUq+js2nmaExMjDZu3Kjo6GgFBARoy5Yt\nmjt3rtzc3BQeHl6t/7fffqsxY8Zo1KhRmjdvnoqLizV//nz96U9/UnJysjw8PHT06FFNmDBBTz/9\ntBYsWKDvv/9eM2fOlCRNnjz5+j4lAAAAbj7MNgMAAJLdV5MkJyfr3nvv1csvv2zVXlZWpj//+c/6\n+uuv1axZs2qT/n6ttus1KS4urrXP1q1bVVJSopiYGPn4+EiSLl68qLKyMqvPNplMV7zPQw89pISE\nBKWnpys1NVX9+vWzCmGdWa0zRwsLC7V27VpFRkZqyJAh8vPz0zPPPKOQkBAlJSXVOGbDhg3y9PTU\njBkz1LFjRwUEBOjVV1/V6dOntXfvXknSsmXL1KlTJ02bNk2dOnXSwIED9ec//1krVqzQzz//fH2f\nEgAAADcnZpsBAAA7riYpLCzU1q1bNXjwYN19991WX926dVPv3r21YcMG+fv767vvvlNhYaFl7I4d\nOzRu3DiVlJTUet3Dw0NSZbBZ5fDhw7XWV7XMv2q/UUnatGmTVR9/f39dvHhR33//vaXt2LFjGjt2\nrE6dOiVJ8vHx0f33368vvvhCX375pf7whz/U5Y+pXqs1HPX09FRaWppGjBhh1d68eXOdP3++xjEG\ng8HyVaVqum9V286dO6vtuxASEqLi4mLt37+/bk8BAAAAAAAAx1S1mmRguDThjRv6j6ZffPGFLl26\npP79+9d4fcCAAfrmm280aNAgubu7Kzo6WidPntTBgwe1aNEieXl5yd3dXQ8//PAVr3fp0kUuLi5a\nuXKlfvjhB+3cufOykw5/LSAgQJL04Ycf6vTp0/rkk0+0Y8cOtWnTRpmZmTp37pzCwsLUtm1bzZ07\nVydOnNCxY8e0YMEC/fLLL7r99tst93rkkUeUlJQkDw8Py8nwsCEcNRgM8vb2ttq7oLi4WLt27VK3\nbt1qHDN8+HCVlJRo+fLlKikpUXFxsZYsWaL27dsrODhYRUVFOnfunNq0aWM1ztfXV5J08uTJa3km\nAAAAAAAAOBI7rSbZsGGDunfvLm9v7xqvh4WFyWAwaNOmTYqPj9fFixc1evRoTZ8+Xd27d9frr78u\nSWrSpMkVr7dp00ZTp07Vvn379OSTT+pvf/ubpk+fXmt93bt31/jx47V27VqNGjVKu3fv1htvvKHw\n8HDt2bNHCxYskIuLi9599115eXnpueee06RJk9SiRQu98847VhMX77//fjVo0EAPPfSQVbuzM5jN\nZnNdB73yyivatGmTPvvsM/n5+dXYZ/v27XrhhRcsS+Tbt2+vhIQE+fr6Kjc3V3379tXbb7+tYcOG\nWY27++679eKLL2r8+PF1qqnqhC4AAAAAAAAA1rZv365p06YpKSlJzZs3vy739PLyui73sSebTquv\nYjabNXv2bCUlJSk2NvayweixY8f08ssva9iwYVqzZo0SExN1++23a8KECSoqKrouhQMAAAAAAAC4\nsgsXLuibb77RW2+9pVGjRl23YLS+sOm0ekkqLy+3zBhdvHixBg4ceNm+8fHxatu2reX0ealyj4SQ\nkBCtW7dOjz32mCRVC0p//vlnlZeXy9PTs67PAQAAAAAAAOC/zJw5U0ePHtXgwYM1duxYe5dz07E5\nHJ0zZ45SUlK0fPly9ezZ84p9T5w4obvvvtuqzdPTU82bN9fJkyfl4eEhHx8fZWdnW/Wp2mu0U6dO\ntpYFAAAAAAAA4DLi4+PtXcJNzaZl9WvWrNHHH3+s999/v9ZgVJJatWqlrKwsq7bCwkKdOXNGrVq1\nkiSFhoZq27Zt+vWWp1999ZWaNGnCiVkA6r0z6Tnaa9ysM+k59i4FAAAAAHAd8fueY6k1HDWZTDIa\njQoPD1fHjh2Vl5dn9SVJRqNRERERljGjR4/WwYMHtXDhQp04cUJHjx7VjBkz1KBBAw0ePFiSNGbM\nGJ0+fVrz589XTk6OUlJStGzZMo0fP14NGzb8jR4XAOzvTHqOtk5bp4yVu7V12jr+wgQAAACAeoLf\n9xxPrcvqDx8+rIKCAq1evVqrV6+udj0zM1N5eXlWS+T79eun+Ph4xcfHa/ny5XJ1ddW9996rxMRE\ntWvXTlLl0vm//vWvevvtt7Vy5Uo1b95cY8eOZe8DAPVedmqGSs6aJEklZ03KTs1Qy0BfO1cFAAAA\nALhW/L7neAzmX69rd2AFBQX2LgEAbFL1L4klZ01yb+GhvgvC1bI7f1kCAAAAgKNztt/3vLy87F3C\nNSMcBQA7OJOeo+zUDPkN8K/Xf1ECAAAAcCyW31X6+zPj8So50+97hKM3EcJRAAAAAACAq1dt1mNs\nOAEprqg+hKM2nVYPAAAAAACA+q2m/TKB+o5wFAAAAAAAAPLr7y/3Fh6SJPcWHvIb4G/nioDfHsvq\nAQAAAAAAIMm59svEtasPy+oJRwEAAAAAAADUWX0IR1lWDwAAAAAAAMApEY4CAAAAAAAAcEqEowAA\nAAAAAACcEuEoAAAAAAD2dvyQtHZJ5SsA4IYhHAUAAAAAwJ6OH5KWRksp6ypfCUgB4IYhHAUAwEmd\nSc/RXuNmnUnPsXcpAAA4t/Q06WJ+5fcX8yvfo+6YfQvgKhCOAgDghM6k52jrtHXKWLlbW6etIyAF\nAMCeAkOlpt6V3zf1loL62rceR8TsWwBXiXAUAAAnlJ2aoZKzJklSyVmTslMz7FwRAABOrHNXafxs\naWC4NOENqdM99q7I8TD7FsBVIhwFAMAJ+fX3l3sLD0mSewsP+Q3wt3NFAAA4uc5dpcciCUavFrNv\nAVwlg9lsNtu7iOuhoKDA3iUAAOBQzqTnKDs1Q34D/NWyu6+9ywEAALg2xw9VzhgN6kvIDNwgXl5e\n9i7hmhGOAgAAAAAAAKiz+hCOsqweAAAAAAAAgFMiHAUAAAAAAADglAhHAQAAAAAAADglwlEAAAAA\nAAAATolwFAAAAAAAAIBTIhwFAAAAAAAA4JQIRwEAAAAAAAA4JcJRAAAAAAAAAE6JcBQAAAAAAACA\nU2pgS6fS0lIlJCTo888/V25urtq0aaORI0dq1KhRNfZ/6qmntGfPnhqvTZo0Sc8//7xmzJihTz/9\ntNr1O+64Q8nJyXV4BAAAAAAAAACOIjExUR999JFyc3Pl6+uriRMn6qGHHqqxb1xcnOLj46u1N27c\nWOnp6Zb3ycnJWrp0qb7//nv5+Pho9OjRioiIqLUWm8LRmJgYbdy4UdHR0QoICNCWLVs0d+5cubm5\nKTw8vMaiy8rKrNp+/PFHjRw5Ur1797a0BQYGKi4uzrqgBjaVBAAAAAAAAMDBrFq1SkajUdHR0ere\nvbu2bt2qqKgoeXl5KTQ0tMYxrVq10rp166zabrnl/y+I/+KLLzRlyhQ999xzWrx4sQ4fPqxXX31V\n7u7ul53cWaXWJLKwsFBr165VVFSUhgwZIkl65plntHXrViUlJdUYjjZr1qxa25tvvqmwsDD16tXL\n0ubq6iofH5/aSgAAAAAAAADg4Mxms5YuXaonnnhCw4cPlyR17NhR33zzjZYuXXrZcNTFxeWKGWJi\nYqK6deum6dOnW+55/PhxffDBBxo5cqQMBsNlx9a656inp6fS0tI0YsQIq/bmzZvr/PnztQ2XJO3d\nu1cpKSmKioqyqT8AAAAAAACA+uW7775Tbm6u+vTpY9V+//33a9++fSopKbmq+544cUL33XefVVv/\n/v115swZHT9+/Ipjaw1HDQaDvL291ahRI0tbcXGxdu3apW7dutlU4JIlSzR48GC1b9/epv4AAAAA\nAAAA6peTJ09Kktq0aWPV7uvrq4qKCuXk5FzVfcvLy6tt1ent7S1Jys7OvuLYq9rgc86cOSosLNS4\nceNq7XvkyBFt375d69evr3bt3Llzmjp1qvbv36/S0lL16dNHU6ZMYak9AAAAAAAAUM+YTCZJspqE\nKVUeriRJRUVFNY4rLi7W7NmztXPnTl28eFH33XefoqKiLBMxO3TooMOHD1uNycjIsPrMy6l15uiv\nmc1mzZ49W0lJSYqNjZWfn1+tY1auXKn77rtPd911l1W7p6enJCk0NFQffPCBZs2apb179+rpp5/W\nL7/8UpeyAAAAAAAAANRDjRs3VqNGjdSlSxfFx8crNjZWP/74o5588knl5+dLkp588knt2LFDa9eu\nVVlZmTIzM/X+++/rlltuqfXwd5tnjpaXl+uVV17Rpk2btHjxYg0cOLDWMRUVFUpNTdVzzz1X7drM\nmTOt3nfp0kUtWrTQyJEjtWnTJj3yyCO2lgYAAAAAAADgJtekSRNJ1WeIVr2vuv5rERERioiIsLzv\n0qWLunTpot/97nf65z//qcjISA0bNkw//PCDoqOj9frrr+v222/X7NmzNXbsWMvy+suxORydM2eO\nUlJStHz5cvXs2dOmMfv379f58+cVFhZmU/+q2aV5eXm2lgUAAAAAAADAAbRr106SlJOTozvvvNPS\nnpWVJVdXV5tWqUtSq1at1KxZM0uGaDAY9MILL2jMmDEqLCxUy5YtlZmZKYPBoC5dulzxXjYtq1+z\nZo0+/vhjvf/++zYHo5L0zTffqHHjxlYPK0llZWV6/fXX9eWXX1q1HzlyRJI4uAkAAAAAAACoZzp0\n6CBfX19t3brVqv3rr79WcHCwGjZsWG3MggULtHbtWqu206dP6/z585YMMT09XSkpKWrcuLFuu+02\nGQwGJScnKzAw8NpnjppMJhmNRoWHh6tjx47VZnX6+PjIaDTqyJEjWr58udW1kydPqm3bttXu6erq\nqoKCAs2cOVNlZWUKCAhQVlaW5s2bpy5dutg80xQAAAAAAACA45g0aZJmzpypoKAg9ezZUxs2bNDu\n3bu1cuVKSaqWM5rNZs2bN08Gg0G9e/dWbm6uYmNj5ePjo0cffVSSdOjQIcXGxurNN99Ujx49tH37\ndn300UdKSEiotZ5aw9HDhw+roKBAq1ev1urVq6tdz8zMVF5enrKzs6tdKygokIeHR433feuttxQf\nH68FCxYoNzdXTZs2VVhYmKZOnSpXV9daCwcAAAAAAADgWB599FGZTCbFxcUpNzdXHTp0UHx8vIKC\ngiSpWs44ZcoUeXl56a9//auio6PVqFEjBQcH65133pGXl5ck6amnntL58+f1l7/8RefOnVPnzp21\nePFi9e7du9Z6DGaz2fzbPOqNVVBQYO8SAAAAAAAAAKdRFU46Mpv2HAUAAAAAAACA+oZwFAAAAAAA\nAIBTIhwFAAAAAAAA4JQIRwEAAAAAAAA4JcJRAAAAAAAAAE6JcBQAAAAAAACAUyIcBQAAAAAAAOCU\nCEcBAAAAAAAAOCXCUQAAAAAAAABOiXAUAAAAAAAAgFMiHAUAAAAAAADglAhHAQAAAAAAADglwlEA\nAADAgZ1Jz9Fe42adSc+xdykAAAAOx2A2m832LuJ6KCgosHcJAAAAwA11Jj1HW6etU8lZk9xbeKhv\nbLhaBvrauywAAOAkvLy87F3CNWPmKAAAAOCgslMzVHLWJEkqOWtSdmqGnSsCAABwLISjAAAAgIPy\n6+8v9xYekiT3Fh7yG+Bv54oAAAAcC8vqAQAAAAd2Jj1H2akZ8hvgr5bdWVIPAABunPqwrJ5wFE7F\n8stDf3/24wIAAAAAALgG9SEcZVk9nEbVgQUZK3dr67R1nOgKALh2xw9Ja5dUvgIAAABwOISjcBoc\nWAAAuK6OH5KWRksp6ypfCUgBAAAAh0M4CqfBgQUAgOsqPU26mF/5/cX8yvcAAAAAHEoDexcA3Cgt\nA33VNzacAwsAANdHYKi0J7UyGG3qLQX1tXdFAAAAAOqIA5kAAACu1vFDlTNGg/pKne6xdzUAAADA\nDVUfDmQiHAUAAAAAAABQZ/UhHGXPUQAAAADO7fghae0SDlYDAMAJ2RSOlpaWKj4+XoMGDVL37t01\ndOhQrVq16rL9n3rqKd155501fsXFxVn6bdu2TcOHD1fXrl3Vp08fLVy4UBUVFdf+VAAAAABgi+OH\npKXRUsq6ylcCUgAAnIpNBzLFxMRo48aNio6OVkBAgLZs2aK5c+fKzc1N4eHh1frHxcWprKzMqu3H\nH3/UyJEj1bt3b0nS0aNHNWHCBD399NNasGCBvv/+e82cOVOSNHny5Gt9LgAAAACoXXpa5cFqUuVr\neprUuat9awIAADdMrXuOFhYWKjg4WFFRUXr22Wct7RERESorK9OHH35o0we99NJLKisr03vvvSdJ\nmjJlio4fP67PPvvM0mfFihVauHChduzYocaNG9fpQdhzFAAAAECdVc0cvZgvNfWWJrzBAWsAANjI\nKfYc9fT0VFpamkaMGGHV3rx5c50/f96mD9m7d69SUlIUFRVladu5c6dCQkKs+oWEhKi4uFj79++3\n6b4AAAAAcE06d5XGz5YGhhOMAgDghGoNRw0Gg7y9vdWoUSNLW3FxsXbt2qVu3brZ9CFLlizR4MGD\n1b59e0lSUVGRzp07pzZt2lj18/X1lSSdPHnS1vqdypn0HO01btaZ9Bx7lwIAAADUH527So9FEowC\nAOCEbNpz9L/NmTNHhYWFGjduXK19jxw5ou3bt2v9+vWWNpPJJEnVls67ubnJxcVFRUVFV1NWvXYm\nPUdbp61TyVmTsjZ9q76x4WoZ6GvvsgAAAAAAAACHZdNp9VXMZrNmz56tpKQkxcbGys/Pr9YxK1eu\n1H333ae77rrrqouElJ2aoZKzlaFyyVmTslMz7FwRAAAAAAAA4NhsDkfLy8s1ffp0ffrpp1q8eLEe\neOCBWsdUVFQoNTVVv/vd7/5fe/cfU9V9/3H8daWg/JIGBI16UcEC/tgUGjt0IB1QOto0U+JWa0fr\nSv0xpE6jdmiMClJWpXdRYSbo6OomdQ6xDq1VS6Vi27VadW1XW2anCNUGcYjBKxaE+/2jkfV+QUCr\nHOt5PhISeX/Oub5OciKct5/P5zjVvby8JKndDNHLly+rpaWlbRz/Exgbpj79PCVJffp5KjAuzOBE\nAAAAAAAAwPdbt5fVZ2ZmqrS0VAUFBRo3bly3zjl69KguXLigmJgYp7qnp6f8/f1VVVXlVL+212hw\ncHB3Y5lGQLhVE1dPUdX+zxUYF6aAsSypBwAAAAAAAL6LbjVHt27dquLi4htqjErS4cOH5eHhodDQ\n0HZj0dHReuedd+RwOGSxWCRJb7/9try9vRUREdHtv8NMAsKt7DMKAAAAAAAA3CJdLqu32+2y2Wya\nMmWKgoKCVFtb6/QlSTabTSkpKe3OPX36tAYPHtzh5z777LM6e/asVq1aperqapWWluqPf/yjZs2a\nJTc3t+94WQAAAAAAAADQuS5njn766ae6ePGitmzZoi1btrQbr6ioUG1tbbsl8pJ08eJFeXp6dvi5\nwcHB2rhxo1588UVt3rxZfn5+mjFjhmbMmHETlwEAAAAAAAAAN8bicDgcRoe4FS5evGh0BAAAAAAA\nAMA0fHx8jI7wnXX7bfUAAAAAAAAAcDehOQoAAAAAAADAlGiOAgAAAAAAAOgxr7zyiuLi4jR69Ggl\nJiZq165d1z02NzdXoaGh7b7Cw8Odjtu9e7eSkpIUHh6uBx54QL/97W917ty5LrN0+UImAAAAAAAA\nALgVCgsLZbPZlJGRobFjx6q8vFyLFi2Sj4+PoqOjOzxnwIAB2rZtm1OtV6//zfksLS3V/PnztXDh\nQiUkJOj8+fPKyMjQ7NmztW3bNqdj/z+aowAAAAAAAABuO4fDofz8fE2dOlVJSUmSpKCgIB0+fFj5\n+fnXbY66uLjI39//up9bUlKisLAwzZgxQ5I0ZMgQzZ07V3PmzNGpU6cUHBx83XNZVg8AAAAAAADg\ntjt58qRqamoUFRXlVJ8wYYKOHDmiK1eu3NTnWiwWubi4ONXc3Ny6dS7NUQAAAAAAAAC33enTpyVJ\ngwYNcqpbrVa1traqurr6pj536tSpqqio0K5du9Tc3Kz6+noVFBTogQce6HTWqERzFAAAAAAAAEAP\nsNvtkiR3d3enuoeHhyTp0qVLHZ7X2Nio5cuXKyEhQZGRkZozZ44qKyvbxsePH68XXnhB6enpGjNm\njCIjI/X1119r3bp1XWaiOQoAAAAAAADgjuTh4SF3d3eFhIQoLy9Pq1ev1ldffaUnnnhCdXV1kqR/\n/OMfbS9gKioq0saNG9Xc3Ky5c+eqpaWl08/nhUwAAAAAAAAAbjtvb29J7WeIXvv+2vi3paSkKCUl\npe37kJAQhYSE6MEHH9Rf//pXpaamKicnR9HR0UpLS2s7LjAwUAkJCSorK1N8fPx1MzFzFAAAAAAA\nAMBtN2TIEElqt7doZWWlXF1dFRgY2K3PGTBggO69917V1tZK+uZFT0FBQU7HWK1WWSwWp+X3HaE5\nCgAAAAAAAOC2GzZsmKxWq8rLy53qBw4cUGRkZIdvmM/JyVFRUZFT7ezZs7pw4YKGDh0qSerfv3+7\nJuipU6fkcDg0YMCATjPRHAUAAAAAAADQI9LS0rR9+3bt2LFDZ86c0YYNG/TBBx8oNTVVkmSz2ZyW\n0TscDmVlZWnbtm2qrq7Whx9+qHnz5snf31+TJk2SJCUnJ2vfvn3atGmTKisr9dFHH2np0qXq16+f\nJk6c2Gke9hwFAAAAAAAA0CMmTZoku92u3Nxc1dTUaNiwYcrLy1NERIQkqba2VlVVVW3HL1iwQD4+\nPtq4caMyMjLk7u6uyMhIvfTSS/Lx8ZEkPfnkk3Jzc1NBQYFycnLUp08fjRs3Ti+88IL69u3baR6L\nw+Fw3L7L7TkXL140OgIAAAAAAABgGteak99nLKsHcEPOHavWh7Y3de5YddcHAwAAAAAA3MGYOQqg\n284dq1b589t05bxdffp5auLqKQoItxodCwAAAAAAGICZowBMpWr/57py3i5JunLerqr9nxucCAAA\nAAAA4ObRHAXQbYGxYerTz1OS1KefpwLjwgxOBAAAAAAAcPNYVg/ghpw7Vq2q/Z8rMC5MAWNZUg8A\nAHiLYn0AAA49SURBVAAAgFndDcvqaY4CAAAAAAAAuGF3Q3OUZfUAAAAAAAAATInmKAAAAAAAAABT\nojkKAAAAAAAAwJRojgIAAAAAAAAwJZqjAAAAAAAAAEzpnu4c1NTUpA0bNmjnzp2qqanRoEGDNG3a\nND355JPXPaehoUGrV6/W3r171dzcrIiICK1YsUJWq1WSlJ6ertdee63deffdd5927dp1k5cDAAAA\nAAAAAN3TreZodna2du/erYyMDI0aNUplZWVauXKlevfurSlTpnR4TmpqqiRp06ZNkqSMjAzNmjVL\nu3btUq9e30xYDQ8PV25urnOge7oVCQAAAAAAAAC+ky47kQ0NDSoqKtKiRYuUmJgoSXr66adVXl6u\nkpKSDpujBw8e1Mcff6yysjL5+vpKknJycvTpp5+qublZvXv3liS5urrK39//Vl4PAAAAAAAAAHRL\nl81RLy8vHTx4UO7u7k51Pz8/ffbZZx2es3//fv3oRz9qa4xKktVqbVtSDwAAAAAAAABG6/KFTBaL\nRb6+vk7N0cbGRr3//vsaM2ZMh+f8+9//1tChQ7VhwwYlJCQoMjJS8+fPV11d3a1LDgAAAAAAAADf\nwU29rT4zM1MNDQ2aOXNmh+N1dXXas2ePKioqZLPZlJ2drY8++ki//OUvdfXq1bbj/vvf/2rhwoWK\njY1VVFSU0tPTVVtbe3NXAgAAAAAAAAA34IbefuRwOLRixQqVlJRozZo1CgwM7PC4q1evqnfv3lq9\nerVcXFwkSe7u7po+fbreffddxcTEyMvLS5IUHR2tmTNn6tSpU8rJydFTTz2lHTt2tO1LCgAAAAAA\nAAC3Q7eboy0tLVq8eLH27NmjtWvXKj4+/rrHenp6ymq1tjVGJSkiIkIWi0UVFRWKiYnR0qVLnc4J\nCQlRv379NG3aNO3Zs0c/+9nPbuJyAAAAAAAAAKB7ut0czczMVGlpqQoKCjRu3LhOjx0yZEi7/UVb\nW1vlcDjk6el53fNGjBghSSytBwAAAAAAAHDbdas5unXrVhUXF3erMSp9s1Q+MzNTdXV1bW+sP3bs\nmCQpNDRUzc3NWrlypWJiYhQXF9d23vHjxyVJQ4cOvdHrkI+Pzw2fAwAAAAAAAMC8LA6Hw9HZAXa7\nXT/5yU/0yCOPaM6cOe3G/f39ZbPZdPz4cRUUFEiSmpqa9NhjjykgIEDLli1TXV2dli1bJl9fX23Z\nskWS9Jvf/EaHDh3S8uXLNWrUKFVWViorK0tubm7avn27XF1db8PlAgAAAAAAAMA3umyOHjp0SMnJ\nydcdr6ioUHp6uo4cOaI333yzrf7VV18pKytL7733nnr16qX4+HgtXbpU3t7ekqTLly8rLy9Pe/fu\nVU1Njfr27auYmBgtXLhQfn5+t+jyAAAAAAAAAKBjXTZHAQAAAAAAAOBu1MvoAAAAAAAAAABgBJqj\nAAAAAAAAAEyJ5igAAAAAAAAAU6I5CgAAAAAAAMCUaI4CAAAAAAAAMCWaowAAAAAAAABMieYoAAAA\nAAAAAFOiOfo98corryguLk6jR49WYmKidu3aZXQkmFBTU5Py8vL08MMPa+zYsXr00UdVWFhodCyY\n2KVLlxQdHa3Y2Fijo8CEjh07pqlTp+qHP/yhoqKiZLPZ1NraanQsmMi1n8s//elP9YMf/EAxMTHK\ny8tTU1OT0dFwl2ttbdW6desUFham3Nxcp7GWlhb9/ve/V3R0tEaPHq3JkyfrvffeMygp7mad3YeX\nLl1SVlaWYmNjFR4erqSkJL3xxhsGJcXdrLP78NvOnj2r8PBwJScn92A6dNc9RgdA1woLC2Wz2ZSR\nkaGxY8eqvLxcixYtko+Pj6Kjo42OBxPJzs7W7t27lZGRoVGjRqmsrEwrV65U7969NWXKFKPjwYTW\nrFmjCxcuKCAgwOgoMJkvvvhCzzzzjGbMmKGcnBx98sknWrJkiby8vDRr1iyj48EkbDabiouLlZ2d\nrREjRuizzz7TkiVL1NDQoMWLFxsdD3epuro6LVy4UF9++aV69Wo/1+bafZmVlaWgoCDt2LFDM2fO\n1Pbt2xUSEmJAYtyNuroP58+fr//85z/KyMiQ1WpVcXGx5s2bp3vvvVfjx483IDHuRl3dh9+WmZmp\n5ubmHkqGG8XM0Tucw+FQfn6+pk6dqqSkJAUFBWn69OmKjY1Vfn6+0fFgIg0NDSoqKlJqaqoSExMV\nGBiop59+Wj/+8Y9VUlJidDyY0CeffKKioiI9+uijRkeBCa1fv17R0dFKTU2V1WrVI488ory8PEVG\nRhodDSby97//XZMnT1ZCQoKsVqsSEhKUlJSknTt3Gh0Nd7GSkhK5uLho27ZtcnFxcRq7dOmSNm/e\nrF//+td66KGHFBwcrAULFmj48OEqKCgwKDHuRp3dhydOnFB5ebmWLFmi6OhoDR06VAsWLNDQoUN5\nbsEt1dl9+G1vvvmmjh07pgcffLDnwuGG0By9w508eVI1NTWKiopyqk+YMEFHjhzRlStXDEoGs/Hy\n8tLBgwf1+OOPO9X9/Px04cIFg1LBrFpaWrR8+XKlpKRo8ODBRseBybS2turtt99WYmKiUz0qKkpj\nxowxKBXMyGKx6J57nBeCubm5yWKxGJQIZhAXF6f8/Hz17du33diRI0f09ddfd/js8u677/ZURJhA\nZ/dhcHCw3nnnHcXExDjV+/Xrx3MLbqnO7sNr7Ha7srKy9Pzzz8vLy6sH0+FG0By9w50+fVqSNGjQ\nIKe61WpVa2urqqurjYgFE7JYLPL19ZW7u3tbrbGxUe+//z7NAPS4zZs3y263a/bs2UZHgQmdOXNG\ndrtdHh4emjt3riZMmKD4+Hht2rTJ6GgwmWnTpqmkpEQff/yxHA6HTpw4oZKSknb/kQncSlar9brL\nR6uqqiR1/OxSW1ury5cv3/Z8MIfO7sNevXrJ399frq6ubbXa2lr961//4rkFt1Rn9+E1a9euldVq\nVVJSUg+lws1gz9E7nN1ulySnhpQkeXh4SPpm6QpglMzMTDU0NGjmzJlGR4GJ1NTUaO3atcrLy5Ob\nm5vRcWBCdXV1kr7Zh3n69OmaPXu2Dhw4oFWrVqmxsZGmPXpMWlqa6urq9POf/1yurq5qbm7W448/\nrueee87oaDApu90ui8XS6bPLtT8DPeXq1at6/vnn5e3trWnTphkdByZy/Phxbd26Vdu3b2dVxx2O\n5iiAG+ZwOLRixQqVlJRozZo1CgwMNDoSTOTam0cnTJhgdBSY1LXN9B977DE98cQTkqSRI0fq5MmT\n+vOf/6xZs2bxCzB6xMaNG7V79269+OKLGjFihCoqKrRq1Sr5+vpq3rx5RscDAMM1NTVp7ty5+uc/\n/6mXX35ZPj4+RkeCSbS2tmrZsmWaPn26goODjY6DLtAcvcN5e3tLaj9D9Nr318aBntLS0qLFixdr\nz549Wrt2reLj442OBBMpKyvT4cOH9frrrxsdBSZ2bb+okSNHOtXvv/9+lZSU6Pz58/L39zciGkyk\nvr5e69at0+LFizV58mRJUlhYmJqamrRixQo99dRT8vX1NTglzMbb21sOh0OXLl1y2luPZxcYobGx\nUampqTp+/Lj+9Kc/aezYsUZHgom8+uqramhoUGpqqtFR0A00R+9wQ4YMkSRVV1crNDS0rV5ZWSlX\nV1dm7KHHZWZmqrS0VAUFBRo3bpzRcWAy+/btU319vaKjo9tqra2tcjgcGjlypFJTU5WWlmZgQpjB\ntf2lLl686FRvbW2VJDbbR4+oqqpSc3OzgoKCnOqBgYG6evWqvvzyS5qj6HHffnYZMWJEW72yslID\nBw5st9weuF1aWlo0b948nThxQoWFhRo+fLjRkWAye/fuVVVVlcLDw9tq335uyc7O1qRJkwxMiG+j\nOXqHGzZsmKxWq8rLy51m6B04cECRkZHst4cetXXrVhUXF9MYhWHmzZunX/3qV061V199VW+99ZYK\nCgrk5+dnUDKYiaenpyIiIlRWVtY2Y0+Sjh49qsDAQB7+0SMGDBggSTp16pTGjx/fVj958qTTONCT\n7r//fnl4eOjgwYNtzVGHw6Hy8nJNnDjR4HQwk7Vr1+ro0aPasmULjVEYIjs7W42NjU61NWvWqKam\nRr/73e/4OX2HoTn6PZCWlqalS5cqIiJC48aN0+uvv64PPvhAmzdvNjoaTMRut8tms2nKlCkKCgpS\nbW2t0zhLSNET+vfvr/79+zvV/Pz85OrqqpCQEINSwYzS0tL0zDPPKD8/X4mJiSorK9Mbb7yh5cuX\nGx0NJhEQEKCHH35Y69evV0BAgEJDQ/XFF19o/fr1ioqKUkBAgNERcZeqr69v23tZki5fvtz2e6Gv\nr69SUlK0YcMGDR8+XPfdd582bdqkc+fO6dlnnzUqMu5Cnd2HTU1NKigo0Ny5c+Xj4+P03OLi4sKs\netwynd2HAwcOlIuLi9Pxffv2VUNDA88tdyCLw+FwGB0CXSssLNTLL7+smpoaDRs2TPPnz1dsbKzR\nsWAihw4dUnJy8nXHKyoqejAN8D+5ubl67bXXtH//fqOjwGT27dun3NxcnTp1SgEBAZo9e7Z+8Ytf\nGB0LJmK325WXl6edO3eqrq5Ovr6+euihh7RgwQK2d8Btk5ycrEOHDnU49tZbb2ngwIH6wx/+oL/9\n7W+qr6/XiBEjlJ6eroiIiB5OirtZZ/fhc889p9zc3A7HBg0axO+MuGW6+vdw8ODBTrX09HSdOXNG\nf/nLX3oiHm4AzVEAAAAAAAAAptTL6AAAAAAAAAAAYASaowAAAAAAAABMieYoAAAAAAAAAFOiOQoA\nAAAAAADAlGiOAgAAAAAAADAlmqMAAAAAAAAATInmKAAAAAAAAABTojkKAAAAAAAAwJRojgIAAAAA\nAAAwpf8D1054DBBxhF0AAAAASUVORK5CYII=\n", 587 | "text/plain": [ 588 | "" 589 | ] 590 | }, 591 | "metadata": {}, 592 | "output_type": "display_data" 593 | } 594 | ], 595 | "source": [ 596 | "sns.set_context(\"poster\")\n", 597 | "sns.set_style(\"dark\", {'axes.facecolor':'0.94'})\n", 598 | "\n", 599 | "average_step = 5\n", 600 | "\n", 601 | "_, ax = plt.subplots(1, 1, figsize=(20,5))\n", 602 | "\n", 603 | "loss_history = trainer.loss_history[:len(trainer.loss_history)//average_step*average_step]\n", 604 | "acc_history = trainer.accuracy_history[:len(trainer.accuracy_history)//average_step*average_step]\n", 605 | "\n", 606 | "loss_history = np.array(loss_history).reshape(-1, average_step).mean(axis=-1)\n", 607 | "acc_history = np.array(acc_history).reshape(-1, average_step).mean(axis=-1)\n", 608 | "\n", 609 | "\n", 610 | "loss_artist = ax.scatter(np.arange(len(loss_history)), \n", 611 | " loss_history, color='#942174', \n", 612 | " s=10+average_step//2, label=\"Loss\");\n", 613 | "\n", 614 | "ax2 = ax.twinx()\n", 615 | "acc_artist = ax2.scatter(np.arange(len(acc_history)), \n", 616 | " acc_history, color='#F05732', \n", 617 | " s=10+average_step//2, label=\"Accuracy\");\n", 618 | "artists = [loss_artist, acc_artist]\n", 619 | "labels = [artist.get_label() for artist in artists]\n", 620 | "plt.legend(artists, labels, loc=5)\n" 621 | ] 622 | } 623 | ], 624 | "metadata": { 625 | "kernelspec": { 626 | "display_name": "py3torch", 627 | "language": "python", 628 | "name": "py3torch" 629 | }, 630 | "language_info": { 631 | "codemirror_mode": { 632 | "name": "ipython", 633 | "version": 3 634 | }, 635 | "file_extension": ".py", 636 | "mimetype": "text/x-python", 637 | "name": "python", 638 | "nbconvert_exporter": "python", 639 | "pygments_lexer": "ipython3", 640 | "version": "3.6.1" 641 | }, 642 | "latex_envs": { 643 | "LaTeX_envs_menu_present": true, 644 | "autocomplete": true, 645 | "bibliofile": "biblio.bib", 646 | "cite_by": "apalike", 647 | "current_citInitial": 1, 648 | "eqLabelWithNumbers": true, 649 | "eqNumInitial": 1, 650 | "hotkeys": { 651 | "equation": "Ctrl-E", 652 | "itemize": "Ctrl-I" 653 | }, 654 | "labels_anchors": false, 655 | "latex_user_defs": false, 656 | "report_style_numbering": false, 657 | "user_envs_cfg": false 658 | }, 659 | "notify_time": "10", 660 | "varInspector": { 661 | "cols": { 662 | "lenName": 16, 663 | "lenType": 16, 664 | "lenVar": 40 665 | }, 666 | "kernels_config": { 667 | "python": { 668 | "delete_cmd_postfix": "", 669 | "delete_cmd_prefix": "del ", 670 | "library": "var_list.py", 671 | "varRefreshCmd": "print(var_dic_list())" 672 | }, 673 | "r": { 674 | "delete_cmd_postfix": ") ", 675 | "delete_cmd_prefix": "rm(", 676 | "library": "var_list.r", 677 | "varRefreshCmd": "cat(var_dic_list()) " 678 | } 679 | }, 680 | "types_to_exclude": [ 681 | "module", 682 | "function", 683 | "builtin_function_or_method", 684 | "instance", 685 | "_Feature" 686 | ], 687 | "window_display": false 688 | } 689 | }, 690 | "nbformat": 4, 691 | "nbformat_minor": 2 692 | } 693 | -------------------------------------------------------------------------------- /day_1/4_Chinese_document_classification.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Chinese document Classification" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": { 14 | "collapsed": true 15 | }, 16 | "outputs": [], 17 | "source": [ 18 | "%matplotlib inline\n", 19 | "import matplotlib.pyplot as plt\n", 20 | "import seaborn as sns\n", 21 | "import pandas as pd\n", 22 | "import numpy as np\n", 23 | "plt.style.use('fivethirtyeight')\n", 24 | "plt.rcParams['figure.figsize'] = (14, 6)" 25 | ] 26 | }, 27 | { 28 | "cell_type": "markdown", 29 | "metadata": {}, 30 | "source": [ 31 | "# Overview of Task" 32 | ] 33 | }, 34 | { 35 | "cell_type": "markdown", 36 | "metadata": {}, 37 | "source": [ 38 | "## 1. Load data" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 2, 44 | "metadata": { 45 | "collapsed": true 46 | }, 47 | "outputs": [], 48 | "source": [ 49 | "data_path = 'data/zh_news/news.csv'" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": 3, 55 | "metadata": { 56 | "collapsed": false 57 | }, 58 | "outputs": [ 59 | { 60 | "data": { 61 | "text/html": [ 62 | "
\n", 63 | "\n", 76 | "\n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | "
labeltitlecontent
01ti3 ca1o shi4 jie4 be1i : che2ng fe1i na2 pi2...su4 du4 : ( shuo1 mi2ng : dia3n ji1 zi4 do4ng ...
14da3o ha2ngdu2 jia1 ti2 go1ng me3i ri4 ba4o jia4 \\n re4 ...
21wa3ng yi4 ti3 yu4gu3n do4ng tu2 ji2 \\n be3n tu2 ji2 go4ng 7 zh...
33zi1 lia4o tu2 pia4n : dia4n shi4 ju4 < fu2 gu...wa3ng ye4 \\n bu4 zhi1 chi2 Flash\\n xi1n la4n...
42niu3 yua2n dui4 me3i yua2n : ku4 lu2n jia3ng ...xi1n xi1 la2n ca2i cha2ng ku4 lu2n fa1 bia3o j...
\n", 118 | "
" 119 | ], 120 | "text/plain": [ 121 | " label title \\\n", 122 | "0 1 ti3 ca1o shi4 jie4 be1i : che2ng fe1i na2 pi2... \n", 123 | "1 4 da3o ha2ng \n", 124 | "2 1 wa3ng yi4 ti3 yu4 \n", 125 | "3 3 zi1 lia4o tu2 pia4n : dia4n shi4 ju4 < fu2 gu... \n", 126 | "4 2 niu3 yua2n dui4 me3i yua2n : ku4 lu2n jia3ng ... \n", 127 | "\n", 128 | " content \n", 129 | "0 su4 du4 : ( shuo1 mi2ng : dia3n ji1 zi4 do4ng ... \n", 130 | "1 du2 jia1 ti2 go1ng me3i ri4 ba4o jia4 \\n re4 ... \n", 131 | "2 gu3n do4ng tu2 ji2 \\n be3n tu2 ji2 go4ng 7 zh... \n", 132 | "3 wa3ng ye4 \\n bu4 zhi1 chi2 Flash\\n xi1n la4n... \n", 133 | "4 xi1n xi1 la2n ca2i cha2ng ku4 lu2n fa1 bia3o j... " 134 | ] 135 | }, 136 | "execution_count": 3, 137 | "metadata": {}, 138 | "output_type": "execute_result" 139 | } 140 | ], 141 | "source": [ 142 | "data = pd.read_csv(data_path, names=['label', 'title', 'content'])\n", 143 | "data.head()" 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "execution_count": 4, 149 | "metadata": { 150 | "collapsed": false 151 | }, 152 | "outputs": [ 153 | { 154 | "data": { 155 | "text/plain": [ 156 | "" 157 | ] 158 | }, 159 | "execution_count": 4, 160 | "metadata": {}, 161 | "output_type": "execute_result" 162 | }, 163 | { 164 | "data": { 165 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAA7EAAAGPCAYAAACKzFetAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3X2QlfV9///XsiAIi6x3haiVUIREqYRgkiKNVnGa2MZU\nk5pJnXqTeLOChthiXXEyMogFB4wRKRZwDai/sVNl6kRrYmesU2gCEgkaiLVU4kSHBkFFiC5oZHfP\n7w+/biUg7urC2c/6eMxkZve6Psd9n53PDOeZ61xna7Zv314JAAAAFKBXtQcAAACAjhKxAAAAFEPE\nAgAAUAwRCwAAQDFELAAAAMUQsQAAABRDxAIAAFAMEQsAAEAxRCwAAADFELEAAAAUQ8QCAABQDBEL\nAABAMUQsAAAAxRCxAAAAFEPEAgAAUAwRCwAAQDFELAAAAMUQsQAAABRDxAIAAFCM3gf6Bz799NOZ\nP39+Fi5cmGeffTY333xzamtr06dPn0yfPj2HH354fvCDH+SBBx5I7969881vfjOnnHJKtm/fnuuv\nvz5vvvlmjjzyyEybNi39+vXb61oAAAB6pgMasffcc08eeeSRHHzwwUmSW265Jddcc01GjhyZBx54\nIPfcc08uuOCC3Hfffbn77rvz1ltv5bLLLssf/dEf5c4778wXv/jFnHXWWbn77rvzwAMP5Atf+MJe\n1x500EEH8mkBAABwgBzQtxMfc8wxmT17dvv3M2fOzMiRI5Mkra2t6du3b5555pmMHj06Bx10UOrq\n6nLMMcfkl7/8ZdauXZtx48YlSU4++eSsXr36PdcCAADQMx3QiJ0wYUJ69/6/i79HHHFEkmTdunVZ\nunRpzjvvvOzYsSN1dXXta/r375/m5ubdjg8YMGCPY+9eCwAAQM90wO+J/V2PPvpolixZkltvvTWH\nHnpoBgwYkJ07d7af37lzZ+rq6tqP9+vXrz1e32ttR23YsOEDz33Oo9s+8GP5aPrBnx5a7RHaDfnH\nadUegYJsvmJGtUdo13vDjdUegcK0jLi+2iO0+/8e+261R6AgF5zxd9Ueod2yZcuqPQIFOe200z70\nf2PEiBH7PF/ViH3kkUfywAMPZMGCBRk0aFCS5IQTTsiCBQvy29/+Nrt27crzzz+f4cOHZ/To0Vm5\ncmXOOuusPP744xkzZsx7ru2o9/vl7NOjT3zwx/KR9KH2Wxd7vdoDUJTutHd/9cH/v0c+orrT/s1j\n1R6AknSnvSti6YwDsXerFrGtra255ZZbMnjw4Fx77bVJkrFjx6ahoSFf//rX09DQkEqlkkmTJqVv\n3765+OKLc8MNN+QHP/hB6uvrc+ONN+bggw/e61oAAAB6pgMesUcddVQWL16cJPn3f//3va4555xz\ncs455+x27PDDD8+8efM6tBYAAICe6YB+sBMAAAB8GCIWAACAYohYAAAAiiFiAQAAKIaIBQAAoBgi\nFgAAgGKIWAAAAIohYgEAACiGiAUAAKAYIhYAAIBiiFgAAACKIWIBAAAohogFAACgGCIWAACAYohY\nAAAAiiFiAQAAKIaIBQAAoBgiFgAAgGKIWAAAAIohYgEAACiGiAUAAKAYIhYAAIBiiFgAAACKIWIB\nAAAohogFAACgGCIWAACAYohYAAAAiiFiAQAAKIaIBQAAoBgiFgAAgGKIWAAAAIohYgEAACiGiAUA\nAKAYIhYAAIBiiFgAAACKIWIBAAAohogFAACgGCIWAACAYohYAAAAiiFiAQAAKIaIBQAAoBgiFgAA\ngGKIWAAAAIohYgEAACiGiAUAAKAYIhYAAIBiiFgAAACKIWIBAAAoRu8D/QOffvrpzJ8/PwsXLszG\njRszY8aMJMnw4cPT2NiYXr16pampKStWrEhtbW2mTJmSUaNGdWotAAAAPdMBvRJ7zz33ZObMmXnr\nrbeSJHPnzs3EiRPT1NSUSqWS5cuXZ/369XnyySezZMmSzJw5M3PmzOn0WgAAAHqmAxqxxxxzTGbP\nnt3+/fr16zN27Ngkyfjx47N69eqsXbs248aNS01NTYYMGZLW1tZs27atU2sBAADomQ7o24knTJiQ\nTZs2tX9fqVRSU1OTJOnfv3+am5vT3NycQYMGta9553hn1h566KEdmmfDhg1d8bSgQ7rTfhtS7QEo\nSnfauwf8HhiK1532L3SGvUupumLvjhgxYp/nq/p6oFev/7sQvHPnzgwcODB1dXXZuXPnHsc7s7aj\n3u+Xs0+PPvHBH8tH0ofab13s9WoPQFG60979ldd0dFJ32r95rNoDUJLutHeXLVtW7REoyIHYu1X9\ndOKRI0dmzZo1SZKVK1dmzJgxGT16dFatWpW2trZs3rw5bW1tqa+v79RaAAAAeqaqXom96qqrMmvW\nrOzatSvDhg3LhAkTUltbmzFjxuSSSy5JW1tbGhsbO70WAACAnumAR+xRRx2VxYsXJ0mGDh2aRYsW\n7bGmoaEhDQ0Nux3rzFoAAAB6pqq+nRgAAAA6Q8QCAABQDBELAABAMUQsAAAAxRCxAAAAFEPEAgAA\nUAwRCwAAQDFELAAAAMUQsQAAABRDxAIAAFAMEQsAAEAxRCwAAADFELEAAAAUQ8QCAABQDBELAABA\nMUQsAAAAxRCxAAAAFEPEAgAAUAwRCwAAQDFELAAAAMUQsQAAABRDxAIAAFAMEQsAAEAxRCwAAADF\nELEAAAAUQ8QCAABQDBELAABAMUQsAAAAxRCxAAAAFEPEAgAAUAwRCwAAQDFELAAAAMUQsQAAABRD\nxAIAAFAMEQsAAEAxRCwAAADFELEAAAAUQ8QCAABQDBELAABAMUQsAAAAxRCxAAAAFEPEAgAAUAwR\nCwAAQDFELAAAAMUQsQAAABRDxAIAAFAMEQsAAEAxeld7gJaWlkyfPj0vvvhievXqle985zupra3N\njBkzkiTDhw9PY2NjevXqlaampqxYsSK1tbWZMmVKRo0alY0bN+51LQAAAD1P1WtvxYoVaW1tzfe/\n//1ceumlWbBgQebOnZuJEyemqakplUoly5cvz/r16/Pkk09myZIlmTlzZubMmZMke10LAABAz1T1\niD322GPT2tqatra27NixI71798769eszduzYJMn48eOzevXqrF27NuPGjUtNTU2GDBmS1tbWbNu2\nba9rAQAA6Jmq/nbi/v3758UXX8zXvva1/OY3v8n3vve9PPXUU6mpqWk/39zcnObm5gwaNGi3xzU3\nN6dSqeyxFgAAgJ6p6hH7T//0Txk3blyuvPLKbNmyJVdccUV27drVfn7nzp0ZOHBg6urqsnPnzj2O\nv/v+13eOddSGDRu65klAB3Sn/Tak2gNQlO60d6v+jxbF6U77FzrD3qVUXbF3R4wYsc/zVX89cMgh\nh6R3797tX7e0tOQTn/hE1qxZk5NOOikrV67MZz7zmRxzzDH5h3/4h5x//vl56aWX0tbWlvr6+owc\nOXKPtR31fr+cfXr0iQ/+WD6SPtR+62KvV3sAitKd9u6vvKajk7rT/s1j1R6AknSnvbts2bJqj0BB\nDsTerXrEnnfeebnxxhtz2WWXpaWlJZMmTcrxxx+fWbNmZdeuXRk2bFgmTJiQ2trajBkzJpdcckna\n2trS2NiYJLnqqqv2WAsAAEDPVPWI7d+/f2666aY9ji9atGiPYw0NDWloaNjt2NChQ/e6FgAAgJ6n\n6p9ODAAAAB0lYgEAACiGiAUAAKAYIhYAAIBiiFgAAACKIWIBAAAohogFAACgGCIWAACAYohYAAAA\niiFiAQAAKIaIBQAAoBgiFgAAgGKIWAAAAIohYgEAACiGiAUAAKAYIhYAAIBiiFgAAACKIWIBAAAo\nhogFAACgGCIWAACAYohYAAAAiiFiAQAAKIaIBQAAoBgiFgAAgGKIWAAAAIrR4Yi98cYb8+tf/3qv\n51544YVMmTKly4YCAACAvem9r5ObN29u//qHP/xh/uRP/iS1tbV7rFuxYkVWr17d9dMBAADAu+wz\nYmfPnp3HH3+8/fvGxsa9rqtUKvnc5z7XtZMBAADA79hnxE6dOjWrVq1KpVLJTTfdlIsuuihHH330\nbmtqa2szcOBAEQsAAMB+t8+IHTx4cM4+++y3F/bunc9//vOpr68/IIMBAADA79pnxL7bWWedlba2\ntjzzzDN544030tbWtseaz372s106HAAAALxbhyP2mWeeybXXXpuXX345ydv3wSZJTU1NKpVKampq\nsmrVqv0zJQAAAKQTETt37tzU1tZm2rRp+b3f+7306uVPzAIAAHBgdThi//u//zszZszI6aefvj/n\nAQAAgPfU4cuphxxySPr27bs/ZwEAAIB96nDEfulLX8p9992X1tbW/TkPAAAAvKcOv524T58+efrp\np3POOedk1KhRe1yVrampyfTp07t6PgAAAGjX4Yh9+OGHU1dXl+Tt+2N/V01NTddNBQAAAHvR4Yh9\n8MEH9+ccAAAA8L78nRwAAACK0eErsZMmTXrfNQsWLPhQwwAAAMC+dDhiW1pa9jj2xhtv5IUXXsiA\nAQNy6qmndulgAAAA8Ls6HLFNTU17Pb59+/ZMmTIlxx13XJcNBQAAAHvzoe+Jra+vz0UXXZR77723\nK+YBAACA99QlH+xUqVTy6quvdsV/CgAAAN5Th99OvHr16j2OtbW15aWXXsrixYvzyU9+sksHAwAA\ngN/V4Yj91re+lZqamlQqlSTZ7evBgwdnypQp+2dCAAAA+H86HLHv9edz6urqctxxx6VXL39yFgAA\ngP2rwxE7duzY/TbEXXfdlf/8z/9MS0tL/vIv/zJjx47NjBkzkiTDhw9PY2NjevXqlaampqxYsSK1\ntbWZMmVKRo0alY0bN+51LQAAAD1Pp2rv+eefz9SpU/Onf/qnOfnkk3PmmWdm6tSpee655z7wAGvW\nrMm6dety5513ZuHChdmyZUvmzp2biRMnpqmpKZVKJcuXL8/69evz5JNPZsmSJZk5c2bmzJmTJHtd\nCwAAQM/U4Suxzz33XC699NL07t07p5xySg4//PC88sor+clPfpLHH388ixcvzvDhwzs9wKpVq3Lc\nccelsbExO3bsyOTJk/Pggw+2X/kdP358fvrTn2bo0KEZN25campqMmTIkLS2tmbbtm1Zv379HmtP\nP/30Ts8BAABA99fhiL399ttz9NFHZ+HChamrq2s/3tzcnCuuuCILFy7MzTff3OkBtm/fns2bN+d7\n3/teNm3alKuvvjptbW2pqalJkvTv3z/Nzc1pbm7OoEGD2h/3zvFKpbLH2o7asGFDp+eFD6o77bch\n1R6AonSnvdvhf7Tg/+lO+xc6w96lVF2xd0eMGLHP8x1+PfDUU0/l+uuv3y1gk7c/2Omiiy7KTTfd\n9IEGHDRoUD7+8Y+nT58+GTp0aA466KBs2bKl/fzOnTszcODA1NXVZefOnXscf/f9r+8c66j3++Xs\n06NPfPDH8pH0ofZbF3u92gNQlO60d3/lNR2d1J32bx6r9gCUpDvt3WXLllV7BApyIPZuh++J7dOn\nT/r06bPXcwcddFB27dr1gQb41Kc+lccffzyVSiUvv/xy3nzzzXz2s5/NmjVrkiQrV67MmDFjMnr0\n6KxatSptbW3ZvHlz2traUl9fn5EjR+6xFgAAgJ6pw1diTzjhhNx///35/Oc/3/723SSpVCq57777\ncsIJJ3ygAU455ZQ89dRT+cY3vpFKpZJrrrkmRx11VGbNmpVdu3Zl2LBhmTBhQmprazNmzJhccskl\naWtrS2NjY5Lkqquu2mMtAAAAPVOHI/byyy/PpZdemr/6q7/KGWeckcMOOyyvvvpqHnvssWzcuDHz\n58//wEN8+9vf3uPYokWL9jjW0NCQhoaG3Y4NHTp0r2sBAADoeTocsccff3zmzZuX22+/PYsXL06l\nUkmSfOITn8i8efP269+RBQAAgKQT98RWKpWsXr06ffv2zbJly/Lwww/nlltuyf/+7//mv/7rv/bn\njAAAAJCkExF711135Z577smJJ56Yfv365cgjj8wnP/nJnHvuubnzzjuzdOnS/TknAAAAdPztxP/6\nr/+aK6+8Mn/913/dfuyII47IFVdckbq6uixdujRf+9rX9suQAAAAkHTiSuzLL7+ckSNH7vXc8ccf\nnxdffLHLhgIAAIC96XDEHn300Vm1atVez61evTqDBw/usqEAAABgbzr8duKvfOUrmTt3bnbt2pXT\nTjsthx56aLZt25bly5dn6dKlmTx58v6cEwAAADoesV//+tezdevW3Hvvvbn//vuTvP2Jxb179855\n552X8847b78NCQAAAEknIjZJrrjiilx44YV5+umns3379gwcODCjRo1KfX39/poPAAAA2nUqYpOk\nrq4u48aN2x+zAAAAwD51+IOdAAAAoNpELAAAAMUQsQAAABRDxAIAAFAMEQsAAEAxRCwAAADFELEA\nAAAUQ8QCAABQDBELAABAMUQsAAAAxRCxAAAAFEPEAgAAUAwRCwAAQDFELAAAAMUQsQAAABRDxAIA\nAFAMEQsAAEAxRCwAAADFELEAAAAUQ8QCAABQDBELAABAMUQsAAAAxRCxAAAAFEPEAgAAUAwRCwAA\nQDFELAAAAMUQsQAAABRDxAIAAFAMEQsAAEAxRCwAAADFELEAAAAUQ8QCAABQDBELAABAMUQsAAAA\nxRCxAAAAFEPEAgAAUAwRCwAAQDFELAAAAMXoXe0B3vHqq6/mwgsvzPz581NbW5sZM2YkSYYPH57G\nxsb06tUrTU1NWbFiRWprazNlypSMGjUqGzdu3OtaAAAAep5uUXstLS256aab0rdv3yTJ3LlzM3Hi\nxDQ1NaVSqWT58uVZv359nnzyySxZsiQzZ87MnDlz3nMtAAAAPVO3iNjbbrstX/3qV3PkkUcmSdav\nX5+xY8cmScaPH5/Vq1dn7dq1GTduXGpqajJkyJC0trZm27Zte10LAABAz1T1txM//PDDqa+vz8kn\nn5y77747SVKpVFJTU5Mk6d+/f5qbm9Pc3JxBgwa1P+6d43tb21EbNmzowmcC+9ad9tuQag9AUbrT\n3q36P1oUpzvtX+gMe5dSdcXeHTFixD7PV/31wEMPPZSampqsXr06zz77bKZPn55t27a1n9+5c2cG\nDhyYurq67Ny5c4/j777/9Z1jHfV+v5x9evSJD/5YPpI+1H7rYq9XewCK0p327q+8pqOTutP+zWPV\nHoCSdKe9u2zZsmqPQEEOxN6t+tuJ77jjjixatCgLFy7MyJEjM3369Jx88slZs2ZNkmTlypUZM2ZM\nRo8enVWrVqWtrS2bN29OW1tb6uvrM3LkyD3WAgAA0DNV/Urs3lx11VWZNWtWdu3alWHDhmXChAmp\nra3NmDFjcskll6StrS2NjY3vuRYAAICeqVtF7MKFC9u/XrRo0R7nGxoa0tDQsNuxoUOH7nUtAAAA\nPU/V304MAAAAHSViAQAAKIaIBQAAoBgiFgAAgGKIWAAAAIohYgEAACiGiAUAAKAYIhYAAIBiiFgA\nAACKIWIBAAAohogFAACgGCIWAACAYohYAAAAiiFiAQAAKIaIBQAAoBgiFgAAgGKIWAAAAIohYgEA\nACiGiAUAAKAYIhYAAIBiiFgAAACKIWIBAAAohogFAACgGCIWAACAYohYAAAAiiFiAQAAKIaIBQAA\noBgiFgAAgGKIWAAAAIohYgEAACiGiAUAAKAYIhYAAIBiiFgAAACKIWIBAAAohogFAACgGCIWAACA\nYohYAAAAiiFiAQAAKIaIBQAAoBgiFgAAgGKIWAAAAIohYgEAACiGiAUAAKAYIhYAAIBiiFgAAACK\nIWIBAAAohogFAACgGL2rPUBLS0tuvPHGbNq0Kbt27crFF1+cYcOGZcaMGUmS4cOHp7GxMb169UpT\nU1NWrFiR2traTJkyJaNGjcrGjRv3uhYAAICep+q198gjj2TQoEFpamrKbbfdlptvvjlz587NxIkT\n09TUlEqlkuXLl2f9+vV58skns2TJksycOTNz5sxJkr2uBQAAoGeqesSeccYZufzyy5MklUoltbW1\nWb9+fcaOHZskGT9+fFavXp21a9dm3LhxqampyZAhQ9La2ppt27btdS0AAAA9U9XfTty/f/8kyY4d\nO3Lddddl4sSJmTdvXmpqatrPNzc3p7m5OYMGDdrtcc3NzalUKnus7agNGzZ04TOBfetO+21ItQeg\nKN1p71b9Hy2K0532L3SGvUupumLvjhgxYp/nu8XrgS1btuSaa67JueeemzPPPDPz589vP7dz584M\nHDgwdXV12blz5x7H333/6zvHOur9fjn79OgTH/yxfCR9qP3WxV6v9gAUpTvt3V95TUcndaf9m8eq\nPQAl6U57d9myZdUegYIciL1b9bcTb926NZMnT863vvWt/MVf/EWSZOTIkVmzZk2SZOXKlRkzZkxG\njx6dVatWpa2tLZs3b05bW1vq6+v3uhYAAICeqepXYu+666689tprWbx4cRYvXpwkmTJlSm655Zbs\n2rUrw4YNy4QJE1JbW5sxY8bkkksuSVtbWxobG5MkV111VWbNmrXbWgAAAHqmqkfs1VdfnauvvnqP\n44sWLdrjWENDQxoaGnY7NnTo0L2uBQAAoOep+tuJAQAAoKNELAAAAMUQsQAAABRDxAIAAFAMEQsA\nAEAxRCwAAADFELEAAAAUQ8QCAABQDBELAABAMUQsAAAAxRCxAAAAFEPEAgAAUAwRCwAAQDFELAAA\nAMUQsQAAABRDxAIAAFAMEQsAAEAxRCwAAADFELEAAAAUQ8QCAABQDBELAABAMUQsAAAAxRCxAAAA\nFEPEAgAAUAwRCwAAQDFELAAAAMUQsQAAABRDxAIAAFAMEQsAAEAxRCwAAADFELEAAAAUQ8QCAABQ\nDBELAABAMUQsAAAAxRCxAAAAFEPEAgAAUAwRCwAAQDFELAAAAMUQsQAAABRDxAIAAFAMEQsAAEAx\nRCwAAADFELEAAAAUQ8QCAABQDBELAABAMUQsAAAAxRCxAAAAFEPEAgAAUIze1R6gK7S1tWX27NnZ\nsGFDDjrooHznO9/J7//+71d7LAAAALpYj7gSu3z58rz11ltZvHhxrrzyytx2223VHgkAAID9oGb7\n9u2Vag/xYd16660ZNWpUvvCFLyRJvvSlL+WHP/xhlacCAACgq/WIK7E7duxIXV1d+/e9evVKS0tL\nFScCAABgf+gRETtgwIDs2LGj/ftKpZLevXvE7b4AAAC8S4+I2E996lNZuXJlkuQXv/hFhg8fXuWJ\nAAAA2B96xD2x73w68S9/+ctUKpVMmzYtH//4x6s9FgAAAF2sR0QsAAAAHw094u3EAAAAfDSIWAAA\nAIrhI3zpUk8//XTmz5+fhQsXVnsU6LCWlpbceOON2bRpU3bt2pWLL744p556arXHgvfV2tqaWbNm\n5YUXXkhNTU2mTp3qww0pyquvvpoLL7ww8+fP93kmFOWCCy7IgAEDkiRHHXVUpk2bVuWJPlpELF3m\nnnvuySOPPJKDDz642qNApzzyyCMZNGhQbrjhhvzmN7/J+eefL2Ipwo9//OMkyZ133pk1a9ZkwYIF\n+e53v1vlqaBjWlpactNNN6Vv377VHgU65be//W0qlYqLNlXk7cR0mWOOOSazZ8+u9hjQaWeccUYu\nv/zyJG//nena2toqTwQdc9ppp+W6665Lkrz44oupq6ur8kTQcbfddlu++tWv5sgjj6z2KNApGzZs\nyJtvvpnJkydn0qRJ+cUvflHtkT5yRCxdZsKECend28V9ytO/f/8MGDAgO3bsyHXXXZeJEydWeyTo\nsN69e2f69Om55ZZbcuaZZ1Z7HOiQhx9+OPX19Tn55JOrPQp0Wr9+/XL++edn3rx5mTp1aqZNm5aW\nlpZqj/WRImIBkmzZsiWTJk3Kn/3ZnwkBijN9+vQsXbo0s2bNyhtvvFHtceB9PfTQQ3niiScyceLE\nPPvss5k+fXpeeeWVao8FHXLsscfmzDPPTE1NTYYOHZpBgwZl69at1R7rI8VlM+Ajb+vWrZk8eXL+\n7u/+Lp/73OeqPQ502I9+9KO89NJL+cY3vpF+/fqlpqYmNTU11R4L3tcdd9zR/vXEiRMzderUHHHE\nEVWcCDruoYceynPPPZdrr702L7/8cnbs2JHDDz+82mN9pIhY4CPvrrvuymuvvZbFixdn8eLFSZK5\nc+emX79+VZ4M9u3000/PjBkz0tDQkJaWlkyZMsW+BdjPzj777Nxwww257LLLkiTXX3+9W+oOsJrt\n27dXqj0EAAAAdIR7YgEAACiGiAUAAKAYIhYAAIBiiFgAAACKIWIBAAAohogFgCo7++yzM23atA6v\nv+GGG3LWWWdV5WcDQLWJWAAAAIohYgEAAChG72oPAAD8n+3bt+eOO+7IT37yk7zyyivp379/Pv3p\nT+dv/uZvcvTRR++29sEHH8z3v//9bNu2LX/4h3+Yb3/72zn++OPbz7/22mv5x3/8xyxbtizNzc35\ngz/4g1x++eX54z/+4wP9tACgy7gSCwDdRKVSyd/+7d/mpz/9aa688srMnz8/l112WX72s59l1qxZ\nu63dunVrFi1alIaGhvz93/99mpubc8UVV2TTpk1JkrfeeitXXnll/uM//iMNDQ2ZM2dOjj322Fx9\n9dX58Y9/XI2nBwBdwpVYAOgmXnnllfTr1y+TJ0/O2LFjkyRjx47Nxo0b8y//8i+7rW1tbc3s2bNz\n4oknJklGjRqVr3zlK/nnf/7nTJkyJT/60Y/yP//zP1m0aFE+/elPJ0nGjx+f5ubm3HbbbTnllFMO\n7JMDgC4iYgGgmzjyyCOzYMGCVCqVbNq0KRs3bszzzz+fdevWpbW1Na2tramtrU2SDBkypD1gk+SI\nI47IiSeemKeeeipJ8rOf/Sz19fU58cQT09LS0r7ulFNOyezZs/Piiy/mYx/72IF9ggDQBUQsAHQj\n//Zv/5bbb789W7ZsySGHHJKRI0emb9++Sd5+u/E7Dj/88D0ee9hhh+XXv/51krfvrd2+fXvGjx+/\n15/z8ssvi1gAiiRiAaCb+PnPf57p06fn3HPPzQUXXJDBgwcnSebNm5e1a9futva1117b4/Fbt27N\nYYcdliSACalnAAABmklEQVSpq6vL0UcfnZkzZ+71Zw0dOrSLpweAA8MHOwFAN7Fu3bq0tbXl0ksv\nbQ/Y1tbWPPHEE0l2vxK7cePGvPDCC+3fb9myJevWrctJJ52U5O17aV966aXU19fnhBNOaP/funXr\nsmTJkvTq5SUAAGVyJRYAuolRo0YlSW699dZ8+ctfzuuvv577778/GzZsSJK88cYb6dOnT5Kkb9++\nueaaazJp0qRUKpUsWrQoAwcOzHnnnZck+fKXv5ylS5dm8uTJ+eY3v5mPfexjWbNmTZYsWZI///M/\nz8EHH1ydJwkAH5KIBYBu4qSTTkpjY2PuvffePPbYYznssMNy0kknZfbs2WlsbMzPf/7znHrqqUmS\nESNG5Itf/GJuvvnmvP766/nMZz6T7373u+33yh588MG54447cvvtt2f+/Plpbm7O4MGDc9lll+XC\nCy+s5tMEgA+lZvv27ZX3XwYAAADV54YYAAAAiiFiAQAAKIaIBQAAoBgiFgAAgGKIWAAAAIohYgEA\nACiGiAUAAKAYIhYAAIBiiFgAAACK8f8Du47tXG1Gl6UAAAAASUVORK5CYII=\n", 166 | "text/plain": [ 167 | "" 168 | ] 169 | }, 170 | "metadata": {}, 171 | "output_type": "display_data" 172 | } 173 | ], 174 | "source": [ 175 | "sns.countplot(data['label'])" 176 | ] 177 | }, 178 | { 179 | "cell_type": "markdown", 180 | "metadata": {}, 181 | "source": [ 182 | "## 2. Build vocab" 183 | ] 184 | }, 185 | { 186 | "cell_type": "code", 187 | "execution_count": 5, 188 | "metadata": { 189 | "collapsed": true 190 | }, 191 | "outputs": [], 192 | "source": [ 193 | "chars = 'abcdefghijklmnopqrstuvwxyz-,;!?:\\'\\\\|_@#$%ˆ&*˜‘+-=<>()[]{} '\n", 194 | "char_to_index = {char:i for i, char in enumerate(chars)}\n", 195 | "index_to_char = {i: char for i, char in enumerate(chars)}" 196 | ] 197 | }, 198 | { 199 | "cell_type": "markdown", 200 | "metadata": {}, 201 | "source": [ 202 | "## 3. Find max sequence length" 203 | ] 204 | }, 205 | { 206 | "cell_type": "code", 207 | "execution_count": 6, 208 | "metadata": { 209 | "collapsed": false 210 | }, 211 | "outputs": [ 212 | { 213 | "data": { 214 | "text/plain": [ 215 | "207" 216 | ] 217 | }, 218 | "execution_count": 6, 219 | "metadata": {}, 220 | "output_type": "execute_result" 221 | } 222 | ], 223 | "source": [ 224 | "maxlen = int(max(data['title'].apply(len)))\n", 225 | "maxlen" 226 | ] 227 | }, 228 | { 229 | "cell_type": "markdown", 230 | "metadata": {}, 231 | "source": [ 232 | "## 4. Convert sequences to Tensors" 233 | ] 234 | }, 235 | { 236 | "cell_type": "code", 237 | "execution_count": 7, 238 | "metadata": { 239 | "collapsed": true 240 | }, 241 | "outputs": [], 242 | "source": [ 243 | "def encode_input(title, maxlen=207):\n", 244 | " title = title.lower().strip()\n", 245 | " encoding = np.zeros((len(chars), maxlen), dtype=np.int64)\n", 246 | " for i, char in enumerate(title[:maxlen]):\n", 247 | " index = char_to_index.get(char, 'unknown')\n", 248 | " if index is not 'unknown':\n", 249 | " encoding[index,i] = 1\n", 250 | " return encoding" 251 | ] 252 | }, 253 | { 254 | "cell_type": "code", 255 | "execution_count": 8, 256 | "metadata": { 257 | "collapsed": false 258 | }, 259 | "outputs": [], 260 | "source": [ 261 | "import torch\n", 262 | "import torch.nn as nn\n", 263 | "from torch.autograd import Variable\n", 264 | "import torch.nn.functional as F" 265 | ] 266 | }, 267 | { 268 | "cell_type": "code", 269 | "execution_count": 9, 270 | "metadata": { 271 | "collapsed": false 272 | }, 273 | "outputs": [ 274 | { 275 | "data": { 276 | "text/plain": [ 277 | "array([[0, 0, 0, ..., 0, 0, 0],\n", 278 | " [1, 0, 0, ..., 0, 0, 0],\n", 279 | " [0, 0, 0, ..., 0, 0, 0],\n", 280 | " ..., \n", 281 | " [0, 0, 0, ..., 0, 0, 0],\n", 282 | " [0, 0, 0, ..., 0, 0, 0],\n", 283 | " [0, 0, 0, ..., 0, 0, 0]])" 284 | ] 285 | }, 286 | "execution_count": 9, 287 | "metadata": {}, 288 | "output_type": "execute_result" 289 | } 290 | ], 291 | "source": [ 292 | "encode_input('Brian')" 293 | ] 294 | }, 295 | { 296 | "cell_type": "code", 297 | "execution_count": 10, 298 | "metadata": { 299 | "collapsed": false 300 | }, 301 | "outputs": [ 302 | { 303 | "data": { 304 | "text/plain": [ 305 | "(57, 207)" 306 | ] 307 | }, 308 | "execution_count": 10, 309 | "metadata": {}, 310 | "output_type": "execute_result" 311 | } 312 | ], 313 | "source": [ 314 | "encode_input('Brian').shape" 315 | ] 316 | }, 317 | { 318 | "cell_type": "markdown", 319 | "metadata": {}, 320 | "source": [ 321 | "## 5. Build PyTorch Dataset and DataLoader" 322 | ] 323 | }, 324 | { 325 | "cell_type": "code", 326 | "execution_count": 11, 327 | "metadata": { 328 | "collapsed": true 329 | }, 330 | "outputs": [], 331 | "source": [ 332 | "from torch.utils.data import Dataset, DataLoader" 333 | ] 334 | }, 335 | { 336 | "cell_type": "code", 337 | "execution_count": 12, 338 | "metadata": { 339 | "collapsed": true 340 | }, 341 | "outputs": [], 342 | "source": [ 343 | "class SogouNews(Dataset):\n", 344 | " \"\"\"Sogou News dataset\"\"\"\n", 345 | " \n", 346 | " def __init__(self, data_path):\n", 347 | " self.data = pd.read_csv(data_path, names=['label', 'title', 'content']).dropna()\n", 348 | " del self.data['content']\n", 349 | " self.X = self.data['title']\n", 350 | " self.y = self.data['label']\n", 351 | " \n", 352 | " def __len__(self):\n", 353 | " return len(self.data)\n", 354 | " \n", 355 | " def __getitem__(self, index):\n", 356 | " content = torch.from_numpy(encode_input(self.data['title'][index])).float()\n", 357 | " label = self.data['label'][index] - 1\n", 358 | " sample = {'X': content, 'y': label}\n", 359 | " return sample" 360 | ] 361 | }, 362 | { 363 | "cell_type": "code", 364 | "execution_count": 13, 365 | "metadata": { 366 | "collapsed": true 367 | }, 368 | "outputs": [], 369 | "source": [ 370 | "sogou_dataset = SogouNews(data_path)" 371 | ] 372 | }, 373 | { 374 | "cell_type": "code", 375 | "execution_count": 14, 376 | "metadata": { 377 | "collapsed": true 378 | }, 379 | "outputs": [], 380 | "source": [ 381 | "dataloader = DataLoader(sogou_dataset, batch_size=32, shuffle=True, num_workers=0)" 382 | ] 383 | }, 384 | { 385 | "cell_type": "code", 386 | "execution_count": 15, 387 | "metadata": { 388 | "collapsed": false 389 | }, 390 | "outputs": [ 391 | { 392 | "data": { 393 | "text/plain": [ 394 | "\n", 395 | " 1 0 0 ... 0 0 0\n", 396 | " 0 0 0 ... 0 0 0\n", 397 | " 0 0 0 ... 0 0 0\n", 398 | " ... ⋱ ... \n", 399 | " 0 0 0 ... 0 0 0\n", 400 | " 0 0 0 ... 0 0 0\n", 401 | " 0 0 0 ... 0 0 0\n", 402 | "[torch.FloatTensor of size 57x207]" 403 | ] 404 | }, 405 | "execution_count": 15, 406 | "metadata": {}, 407 | "output_type": "execute_result" 408 | } 409 | ], 410 | "source": [ 411 | "test_batch = next(iter(dataloader))\n", 412 | "test_batch['X'][0]" 413 | ] 414 | }, 415 | { 416 | "cell_type": "markdown", 417 | "metadata": {}, 418 | "source": [ 419 | "# Define Model" 420 | ] 421 | }, 422 | { 423 | "cell_type": "code", 424 | "execution_count": 16, 425 | "metadata": { 426 | "collapsed": true 427 | }, 428 | "outputs": [], 429 | "source": [ 430 | "class CharCNN(nn.Module): \n", 431 | " def __init__(self, n_classes, vocab_size, max_seq_length, channel_size=128, pool_size=5):\n", 432 | " \n", 433 | " super(CharCNN, self).__init__()\n", 434 | " \n", 435 | " self.conv_stack = nn.ModuleList([nn.Conv1d(vocab_size, channel_size, 7), \n", 436 | " nn.ReLU(),\n", 437 | " nn.BatchNorm1d(num_features=channel_size),\n", 438 | " nn.MaxPool1d(pool_size),\n", 439 | " nn.Conv1d(channel_size, channel_size, 3, padding=1),\n", 440 | " nn.ReLU(),\n", 441 | " nn.BatchNorm1d(num_features=channel_size),\n", 442 | " nn.MaxPool1d(pool_size)])\n", 443 | " self.dropout1 = nn.Dropout(p=0.5)\n", 444 | " self.output = nn.Linear(1024, n_classes)\n", 445 | " \n", 446 | " \n", 447 | " def forward(self, x):\n", 448 | " for op in self.conv_stack:\n", 449 | " x = op(x)\n", 450 | " \n", 451 | " x = x.view(x.size(0),-1)\n", 452 | " x = self.dropout1(x)\n", 453 | " x = self.output(x)\n", 454 | " return x" 455 | ] 456 | }, 457 | { 458 | "cell_type": "markdown", 459 | "metadata": {}, 460 | "source": [ 461 | "## Define loss" 462 | ] 463 | }, 464 | { 465 | "cell_type": "code", 466 | "execution_count": 17, 467 | "metadata": { 468 | "collapsed": true 469 | }, 470 | "outputs": [], 471 | "source": [ 472 | "criterion = nn.CrossEntropyLoss()" 473 | ] 474 | }, 475 | { 476 | "cell_type": "code", 477 | "execution_count": 18, 478 | "metadata": { 479 | "collapsed": true 480 | }, 481 | "outputs": [], 482 | "source": [ 483 | "from tqdm import tqdm_notebook" 484 | ] 485 | }, 486 | { 487 | "cell_type": "code", 488 | "execution_count": 22, 489 | "metadata": { 490 | "collapsed": true 491 | }, 492 | "outputs": [], 493 | "source": [ 494 | "def train(model, dataloader, num_epochs):\n", 495 | " cuda = torch.cuda.is_available()\n", 496 | " if cuda:\n", 497 | " model.cuda()\n", 498 | " optimizer = torch.optim.Adam(model.parameters())\n", 499 | " loss_history = []\n", 500 | " bar = tqdm_notebook(total=len(dataloader))\n", 501 | " for i in range(num_epochs):\n", 502 | " per_epoch_losses = []\n", 503 | " for batch in dataloader:\n", 504 | " X = Variable(batch['X'])\n", 505 | " y = Variable(batch['y'])\n", 506 | " if cuda:\n", 507 | " X = X.cuda()\n", 508 | " y = y.cuda()\n", 509 | " model.zero_grad()\n", 510 | " outputs = model(X)\n", 511 | " loss = criterion(outputs, y)\n", 512 | " loss.backward()\n", 513 | " optimizer.step()\n", 514 | " per_epoch_losses.append(loss.data[0])\n", 515 | " bar.set_postfix(loss=loss.data[0])\n", 516 | " bar.update(1)\n", 517 | " loss_history.append(np.mean(per_epoch_losses))\n", 518 | " print('epoch[%d] loss: %.4f' % (i, loss.data[0]))\n", 519 | " return loss_history" 520 | ] 521 | }, 522 | { 523 | "cell_type": "code", 524 | "execution_count": 23, 525 | "metadata": { 526 | "collapsed": true 527 | }, 528 | "outputs": [], 529 | "source": [ 530 | "charcnn = CharCNN(n_classes=5, vocab_size=len(chars), max_seq_length=maxlen)" 531 | ] 532 | }, 533 | { 534 | "cell_type": "code", 535 | "execution_count": null, 536 | "metadata": { 537 | "collapsed": false 538 | }, 539 | "outputs": [ 540 | { 541 | "name": "stderr", 542 | "output_type": "stream", 543 | "text": [ 544 | "The installed widget Javascript is the wrong version. It must satisfy the semver range ~2.1.4.\n" 545 | ] 546 | }, 547 | { 548 | "data": { 549 | "application/vnd.jupyter.widget-view+json": { 550 | "model_id": "b87af9520b634d7dad6eb24e6bde0462" 551 | } 552 | }, 553 | "metadata": {}, 554 | "output_type": "display_data" 555 | } 556 | ], 557 | "source": [ 558 | "%time loss_history = train(charcnn, dataloader, 100)" 559 | ] 560 | }, 561 | { 562 | "cell_type": "code", 563 | "execution_count": 155, 564 | "metadata": { 565 | "collapsed": false 566 | }, 567 | "outputs": [ 568 | { 569 | "data": { 570 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAA5UAAAF8CAYAAABffJ+wAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzs3Xlg1NW5//HPd/Zksq8IBpCwBUQQUAhapAoiuNe111Jb\nEZfaurTXWi8tdrEKt1SLva1WvbT16q+tioBVELcWKKuyiRIgBVmF7JNlMvvM749gwpiEQCSZSfJ+\n/SM5Z0af8QTlk+d8zzFcLldEAAAAAAC0gynWBQAAAAAAui5CJQAAAACg3QiVAAAAAIB2I1QCAAAA\nANqNUAkAAAAAaDdCJQAAAACg3QiVAAAAAIB2I1QCAAAAANqNUHkSiouLY10C2sAaxT/WKP6xRvGP\nNYp/rFH8Y43iH2vU9RAqAQAAAADtRqgEAAAAALQboRIAAAAA0G6ESgAAAABAuxEqAQAAAADtRqgE\nAAAAALQboRIAAAAA0G6ESgAAAABAuxEqAQAAAADtRqgEAAAAALQboRIAAAAA0G6EypNU6Q2p0huK\ndRkAAAAAEFcIlSdhQ5VJE5aU6rtrXIpEIrEuBwAAAADiBqHyBHyhiP5ro0vf/cSho56wlh3w6oXd\n9bEuCwAAAADiBqHyBEyGtKHEHzX28MZq/bs6EKOKAAAAACC+ECpPwGoy9OzEDCWYmra81gcjmrWq\nSoEw22ABAAAAgFDZhvxUi34wILpbuaU8oLlbamJUEQAAAADED0LlSbgqN6Qr+jqixp74qE5rjvpi\nVBEAAAAAxAdC5UkwDOmpC9J0RmLTv66IpDtXVcnlC8euMAAAAACIMULlScpwmPX7C9Ojxg65Q3pw\nvStGFQEAAABA7BEqT8FX+zj0neHOqLFX9nr08h6uGQEAAADQMxEqT9Gc0akanm6JGvvPdS7trw3G\nqCIAAAAAiB1C5SlyWAw9f1GG7OamsZpARHetrlKIa0YAAAAA9DCEynYoSLfqZ2NTo8bWlfj1m+11\nMaoIAAAAAGKDUNlOdxY4NbmPPWrs8S012lzmb+UdAAAAAND9ECrbyTAM/e7CdGXam/4VBiPSrFWV\nqgtwzQgAAACAnsHS1gvC4bDmzZun4uJi2Ww2zZ49W3l5eY3zv/71r7Vt2zYlJiZKkubPn69QKKTr\nr79e+fn5kqRJkybp5ptv7qCPEDu5iWb99sI0/cd7lY1je2pCmr2xWgsuSD/BOwEAAACge2gzVK5c\nuVJ+v18LFy7U9u3btWDBAs2fP79xfufOnXrqqaeUlpbWOLZx40ZdeumlevDBBzum6jgyvW+Cvj0k\nUX/c1XStyJ9312vymQ5d2S8hhpUBAAAAQMdrc/vr1q1bVVhYKEkaMWKEioqKGufC4bAOHjyoxx57\nTLfffrtef/11SVJRUZF27typO++8Uz/60Y9UXl7eQeXHh0fPS9XAlOh8ft8al47Uh2JUEQAAAAB0\njjY7lW63W0lJSY1fm0wmBYNBWSwWeTwe3XDDDbrlllsUCoV09913q6CgQP3791dBQYHOP/98vfXW\nW5o/f77mzp17UgUVFxe3/9N0oLbqmjPA0Le3ORSKGJKkSl9Y315xWAuG+2QyOqNCxOv3DpqwRvGP\nNYp/rFH8Y43iH2sU/1ij+DNo0KBW59oMlU6nU263u/HrSCQii6XhbQ6HQzfffLMcDockaezYsSou\nLtZFF13UODZp0iT94Q9/OC3FxkpxcXGbdQ2S9GNzrX62qaZxbL3LrPcCZ+g7w5NafyNOi5NZI8QW\naxT/WKP4xxrFP9Yo/rFG8Y816nra3P46cuRIrV27VpK0ffv2xsN3JOnAgQOaNWuWQqGQgsGgtm3b\npiFDhuiXv/yl3n//fUnSBx98oKFDh3ZQ+fHl3rOTNCHXFjX2s03V+qQyEKOKAAAAAKBjtdmpnDRp\nkjZs2KCZM2cqEolozpw5eumll5SXl6eJEydq2rRpuu2222SxWDR9+nTl5+frnnvu0aOPPqpFixYp\nISFBs2fP7ozPEnNmk6E/TEzXBUtLVeOPSJJ8IWnWykq9f2WOHBb2wQIAAADoXtoMlSaTSQ8//HDU\nWP/+/Rt/PWPGDM2YMSNqvk+fPnr66adPT4VdTF6SRU8WpmnmyqrGsR2uoH66qVpzx6Wd4J0AAAAA\n0PW0uf0Vp+66AYm6MT/6OpFndrj1/mFvjCoCAAAAgI5BqOwgvxqfpr5J5qixu1dXqcLLNSMAAAAA\nug9CZQdJtZn07MT0qOtESjxhfW+NS5FIJHaFAQAAAMBpRKjsQONz7fr+OclRY8sOePXC7voYVQQA\nAAAApxehsoM9NCpZY7KsUWMPb6zWv6u5ZgQAAABA10eo7GBWk6HnLsqQ87jrROqDEc1aVaVAmG2w\nAAAAALo2QmUnGJBi0ePjUqPGtpQHNHdLTYwqAgAAAIDTg1DZSWYMStSV/RxRY098VKc1R30xqggA\nAAAAvjxCZScxDEMLJqTpjMSmf+URSXeuqpLLF45dYQAAAADwJRAqO1GGw6zfX5geNXbIHdKD610x\nqggAAAAAvhxCZSf7ah+H7hmeFDX2yl6PXtnDNSMAAAAAuh5CZQzMGZOi4emWqLEfrHNpf20wRhUB\nAAAAQPsQKmPAbjb0/EUZspubxmoCEd21ukohrhkBAAAA0IUQKmOkIN2qn4+NvmZkXYlfv9leF6OK\nAAAAAODUESpj6I4Cpyb3sUeNPb6lRpvL/DGqCAAAAABODaEyhgzD0O8uTFemvWkZghFp1qpK1QW4\nZgQAAABA/CNUxlhuolm/vTAtamxPTUizN1bHqCIAAAAAOHmEyjgwvW+Cvj0kMWrsz7vr9cZ+T4wq\nAgAAAICTQ6iME4+el6qBKdHXjNy7xqUj9aEYVQQAAAAAbSNUxgmn1aTnL0qXxWgaq/SFdc/qKoUj\nXDMCAAAAID4RKuPIqCybZo9OiRp7/zOf/rDDHaOKAAAAAODECJVx5t6zkzQh1xY19tNN1fqkMhCj\nigAAAACgdYTKOGM2GfrDxHSl2Jr2wfpC0qyVlfIG2QYLAAAAIL4QKuNQXpJFTxZGXzOywxXUzzZx\nzQgAAACA+EKojFPXDUjUjfkJUWNP73Dr/cPeGFUEAAAAAM0RKuPYr8anqW+SOWrs7tVVqvByzQgA\nAACA+ECojGOpNpOenZgu03HXjJR4wvreGpciXDMCAAAAIA4QKuPc+Fy7vn9OctTYsgNevbC7PkYV\nAQAAAEATQmUX8NCoZI3JskaNPbyxWv+u5poRAAAAALFFqOwCrCZDz12UIaelaR9sfTCiWauqFAiz\nDRYAAABA7BAqu4gBKRY9Pi41amxLeUBzt9TEqCIAAAAAIFR2KTMGJerKfo6osSc+qtPao74YVQQA\nAACgpyNUdiGGYWjBhDSdkdi0bBFJd6yqkssXjl1hAAAAAHosQmUXk+Ew6/cXpkeNHXKH9OB6V4wq\nAgAAANCTESq7oK/2ceie4UlRY6/s9eiVPVwzAgAAAKBzESq7qDljUjQ83RI19oN1Lu2vDcaoIgAA\nAAA9EaGyi7KbDT1/UYbs5qaxmkBEd62ukj/ENSMAAAAAOgehsgsrSLfq52OjrxlZV+LXN96vkDdI\nsAQAAADQ8QiVXdwdBU5N7mOPGnv7kE83vVshd4ATYQEAAAB0LEtbLwiHw5o3b56Ki4tls9k0e/Zs\n5eXlNc7/+te/1rZt25SYmChJmj9/voLBoH7yk5/I6/UqOztbc+bMkcPhaO0fgS/BMAw9/ZV0TVtW\nrn/XND1PufKIT9e/U6G/Tc5Uio2fHQAAAADoGG2mjZUrV8rv92vhwoW65557tGDBgqj5nTt36qmn\nntIzzzyjZ555RklJSXr++ec1depUPffccxoyZIhee+21DvsAkLITzFo2PUvD0qJ/RrCuxK9rVpSr\nijssAQAAAHSQNkPl1q1bVVhYKEkaMWKEioqKGufC4bAOHjyoxx57TLfffrtef/11SdK2bds0fvx4\nSVJhYaE++OCDjqgdx8lJMOvv07I0MtMaNb65PKAr3ypXuTcUo8oAAAAAdGdtbn91u91KSmq6E9Fk\nMikYDMpiscjj8eiGG27QLbfcolAopLvvvlsFBQVR73E6naqrqzvpgoqLi9vxMTpevNb1RU8Oku7z\n27W9tulY2I8rA5qy5DP9foRXWbYYFtfBusoa9WSsUfxjjeIfaxT/WKP4xxrFP9Yo/gwaNKjVuTZD\npdPplNvtbvw6EonIYml4m8Ph0M0339z4vOTYsWNVXFwsp9Op+vp6ORyOZqH0yxQbK8XFxXFZV2uW\nDQzrpncqtLbE3zj2qceke4qStfSyLOUltbnsXU5XW6OeiDWKf6xR/GON4h9rFP9Yo/jHGnU9bW5/\nHTlypNauXStJ2r59u/Lz8xvnDhw4oFmzZikUCikYDGrbtm0aMmSIzjnnnMb3rFu3TqNGjeqg8tGS\nZKtJr16aqa/2jj4Vdm9tSNOXl2tfbbCVdwIAAADAqWkzVE6aNEk2m00zZ87Uk08+qQceeEAvvfSS\nVq1apbPOOkvTpk3TbbfdpjvvvFPTp09Xfn6+brvtNr399tu6/fbbtX37dt14442d8VlwnESLSX+5\nJFOX5UWfunuwLqRpy8q02xWIUWUAAAAAupM290GaTCY9/PDDUWP9+/dv/PWMGTM0Y8aMqPnMzEw9\n9dRTp6dCtJvDYuiFr2bojlVVWrLP0zh+pD6sy5eXa8nULA3PsJ7g7wAAAAAAJ8YFht2czWzo+YvS\ndVN+QtR4mTesK94q09ZyfyvvBAAAAIC2ESp7AIvJ0NNfSdetgxOjxqt8EV31Vrk2lvpiVBkAAACA\nro5Q2UOYDEO/mZCmOwucUeM1gYiuXVGh1UcIlgAAAABOHaGyBzEMQ3PHper+EdFXvLiDEd3wTrne\nO+yNUWUAAAAAuipCZQ9jGIYeGZOih89Njhr3hqSvv1uhZQc8rbwTAAAAAJojVPZAhmHooVEp+vnY\nlKhxf1j65vuVWvxpfYwqAwAAANDVECp7sHtHJOu/x6VGjQUj0syVVfrLvwmWAAAAANpGqOzh7hiW\npKcuSJNx3Fg4It29ukp/2uWOWV0AAAAAugZCJfTNwU79YWK6zEb0+P1rXXr6k7rYFAUAAACgSyBU\nQpJ0Y36iFk7KkOULwfLhjdV68qPa2BQFAAAAIO4RKtHo6v4JeumSTNnN0eM/21SjX26uUSQSiU1h\nAAAAAOIWoRJRpuY59LfJmUr4wl7YX22r1ZwPCZYAAAAAohEq0cyk3g4tujRTSV/YC/vbj+v04Ppq\nhQmWAAAAAI4hVKJFE3rZteSyLKXaooPl8zvduneNS6EwwRIAAAAAoRInMDbbpr9flqVMe/S3yYvF\n9bprdZWCBEsAAACgxyNU4oTOybTpjWlZyk2I/lZ5Za9H3/5npfwhgiUAAADQkxEq0aaCdKvenJal\nPonRx8L+fb9X33i/Qt4gwRIAAADoqQiVOCkDU616c3qW+iVFB8u3D/l007sVcgfCMaoMAAAAQCwR\nKnHS+idbtGx6tgamWKLGVx7x6fp3KlTjJ1gCAAAAPQ2hEqekj9OsZdOzNCwtOliuK/HrmhXlqvIR\nLAEAAICehFCJU5aTYNbfp2VpZKY1anxzeUBXvlWucm8oRpUBAAAA6GyESrRLpsOspVOzdF52dLD8\nuDKgK5aX62g9wRIAAADoCQiVaLc0u0mvTc3ShFxb1PhOV1DTl5XpYF0wRpUBAAAA6CyESnwpyVaT\nXr00U1/tbY8a31sb0vTl5dpXS7AEAAAAujNCJb60RItJf7kkU5flOaLGD9aFNG1ZmXa7AjGqDAAA\nAEBHI1TitHBYDL3w1Qxd0z8havxIfViXLy/XJ5UESwAAAKA7IlTitLGZDT1/Ubpuyo8OlmXesK54\nq0xby/0xqgwAAABARyFU4rSymAw9/ZV0fWtwYtR4lS+iq1aUa2OpL0aVAQAAAOgIhEqcdibD0JMT\n0nRngTNqvMYf0bUrKrT6CMESAAAA6C4IlegQhmFo7rhU3T8iKWrcHYzohnfK9e4hb4wqAwAAAHA6\nESrRYQzD0CNjUvTwuclR496QdMM7FXpwvUu1gXCMqgMAAABwOhAq0aEMw9BDo1L087EpUeMRSc8V\nuVW4uFTv0LUEAAAAuixCJTrFvSOS9d/jUpuNH3KHdMM7FbpjZaXKvaEYVAYAAADgyyBUotPcMSxJ\nS6dmqX+yudncy3s9GvdaqV7eU69IJBKD6gAAAAC0B6ESneqi3natvSZH3zs7SSYjeq7CF9Ydq6p0\n4zsVOlAXjE2BAAAAAE4JoRKdLtFi0i/OS9X7V2Tr7Axrs/l3DvtUuLhUf9hRp1CYriUAAAAQzwiV\niJlRWTb948pszRmTIvsXdsS6gxE9tKFa05aVa6crEJsCAQAAALSJUImYspoMff+cZP3r6hwV5tqa\nzW8s8+srS0s1d0uN/CG6lgAAAEC8sbT1gnA4rHnz5qm4uFg2m02zZ89WXl5es9c88MADmjhxoq67\n7jpFIhFdccUVja8bMWKE7rnnno75BOgWBqVa9ea0LP15V70e+bBaNYGmABkIS3O31mrpPo+euiBd\n5+U0D58AAAAAYqPNULly5Ur5/X4tXLhQ27dv14IFCzR//vyo1zzzzDOqra1t/PrQoUMaMmSInnji\nidNfMbotk2Ho20Odmprn0A/WubT8YPT9lUWuoC59s0x3FDj1kzEpSrLSaAcAAABirc0/lW/dulWF\nhYWSGjqORUVFUfPvvfeeDMPQ+PHjG8d27typsrIy3X333br//vu1f//+01w2urPeTrP+3yUZ+tOk\nDGU7or9FI5L+UOTW+MWleveQt+W/AQAAAIBO02an0u12KykpqfFrk8mkYDAoi8WiPXv2aMWKFZo7\nd66ef/75xtdkZmbq1ltv1eTJk7V161bNmTNHf/7zn0+qoOLi4nZ8jI4Xr3V1Z8Ml/WWk9JtPbXqj\nNPpb9ZA7pOvfqdC07KC+P8CvNCtr1BWwRvGPNYp/rFH8Y43iH2sU/1ij+DNo0KBW59oMlU6nU263\nu/HrSCQii6XhbW+++abKysr0ne98R0eOHJHFYlHv3r117rnnymxuOM5z1KhRKi8vVyQSkWEYLf4z\nTrbYWCkuLo7LunqKF4dJ//zMq/vWuLS/LhQ1t7zMoo01Nt3fz6PvTjjrpL7HEBv8Pop/rFH8Y43i\nH2sU/1ij+McadT1thsqRI0dq9erVmjJlirZv3678/PzGuXvvvbfx188++6wyMzNVWFio3/72t0pN\nTdU3v/lN7d69W7m5ufxhH1/KpN4Orb0mR49tqdXTO+p0/PWVFb6wfrLbrtX1Ffp1YZryktr8tgYA\nAABwmrT5TOWkSZNks9k0c+ZMPfnkk3rggQf00ksvadWqVa2+59Zbb9WWLVt055136je/+Y3mzJlz\nWotGz+S0mvTL81P17uXZGp7ePDi+fcinwsWleq6oTuEI148AAAAAncFwuVz86bsNtODjTyAc0YLt\ndfrvrTXyh5vPj8ux6akL0jQkzdr5xaFF/D6Kf6xR/GON4h9rFP9Yo/jHGnU93MmALslqMvSfI5P1\nr6tzVJjb/N7KDaV+fWVpaUPoDPFzEwAAAKCjECrRpQ1Os+rNaVl6KN+vZGv0c7v+sPTYllpNer1U\nH5b5Y1QhAAAA0L0RKtHlmQxD158R1LprcjQ1z9FsfocrqClvlOnhDS65Ay3slQUAAADQboRKdBtn\nJln010sytPCidGU5or+1I5Ke3uFW4ZJSvX/YG5sCAQAAgG6IUIluxTAMfW1AojZem6Ob8xOazR+o\nC+lrb1forlWVqvSGWvg7AAAAADgVhEp0SxkOs56ZmKHXLs1U3yRzs/m/7vHo/MWlWrS3XhGuHwEA\nAADajVCJbu3iPg6tvSZHdw9zyvjCXLk3rJkrq3Tze5U67KZrCQAAALQHoRLdXpLVpMfHpemdK7I1\nLM3SbH7FQa/GLy7R80V1CtO1BAAAAE4JoRI9xthsm/55VY7+69xk2b7wnV8biOg/11fr8uXl2u0K\nxKZAAAAAoAsiVKJHsZkN/XBUilZfnaPxObZm8+tK/Lpwaal+tbVG/hBdSwAAAKAthEr0SEPSrFo2\nPUu/Gp+qJEv005b+sPTLLbWa9PdSbS7zx6hCAAAAoGsgVKLHMhmGZhUkad21Obr0THuz+R1VQU1+\ns0xzPqiWJ0jXEgAAAGgJoRI9Xl6SRX+bnKnnL0pXpj36t0Q4Ij31cZ0uXFqidSW+GFUIAAAAxC9C\nJSDJMAxdPyBRG7+WoxvzE5rN76kJafqycv1wvUt1gXAMKgQAAADiE6ESOE6mw6xnJ2bolSmZ6pNo\njpqLSHq2yK0JS0q18jNvbAoEAAAA4gyhEmjBlDMdWndtjr49JLHZ3IG6kK5eUaH711Sp2k/XEgAA\nAD0boRJoRYrNpCcnpGvp1Cz1SzI3m//T7npNWFyqtw/StQQAAEDPRagE2nBRb7vWXpOju4Y5ZXxh\n7nB9SDe+W6G7VlWqykfXEgAAAD0PoRI4CU6rSXPHpWn59CwNTLE0m//rHo/GLS7R3/d7YlAdAAAA\nEDuESuAUjM+1a/XVObp/RJJMX2hblnrCmvF+pb71j0qVeUKxKRAAAADoZIRK4BQlWAz9dGyq3r08\nW8PSmnctl+zzaNziUr2yp16RSCQGFQIAAACdh1AJtNPobJv+eVWOHhqVLMsXupaVvrBmrarS19+r\n1GduupYAAADovgiVwJdgMxt6+NwU/fOqHI3MtDabf+ugV+OXlOj/drvpWgIAAKBbIlQCp8HZGVa9\nd0W2HhmTIvsXbh+p8Uf0vTUufe3tCh2oC8amQAAAAKCDECqB08RiMvTAOcladVWOzs+2NZv/x2c+\nTVhcqueL6hSmawkAAIBuglAJnGZD0qxaPj1Lj52fqgRz9MOWdcGI/nN9ta58q1x7a+haAgAAoOsj\nVAIdwGwy9J3hSVp7TY4u7NW8a7nmqF8XLCnV7z6pUyhM1xIAAABdF6ES6EBnpVj0+mVZeqIwTUlf\nOCLWE4po9sZqXbasTLtcgRhVCAAAAHw5hEqgg5kMQ7cNdWrdtTm6pI+92fwHZQF9ZWmpfr2tVgG6\nlgAAAOhiCJVAJ8lLsujVKZn63YVpSrVFdy39YekXm2t0yd/L9FGFP0YVAgAAAKeOUAl0IsMwdMsg\np9Zfm6tpeY5m8x9VBnTx38v0y8018oXoWgIAACD+ESqBGDgj0az/d0mG/veidGXao38bBiPSr7bV\natLrpdpURtcSAAAA8Y1QCcSIYRi6bkCiNnwtR187K6HZfJErqClvlmnOB9XyBOlaAgAAID4RKoEY\ny3KYtXBShv7v4gzlJkT/lgxHpKc+rtOFS0u0rsQXowoBAACA1hEqgThxZb8Erb82V18fmNhsbk9N\nSNOXleuH612qC4RjUB0AAADQMkIlEEfS7SY9/ZV0vTIlU30SzVFzEUnPFrk1YUmpVn7mjU2BAAAA\nwBcQKoE4NOVMh9Zdm6NvD2netTxQF9LVKyp0/5oqVfvpWgIAACC2CJVAnEqxmfTkhHQtnZqlfknm\nZvN/2l2vwsUlWnGQriUAAABih1AJxLmLetu19poc3TXMKeMLc5/Vh3XTuxX61j8qtbcmGJP6AAAA\n0LO1GSrD4bAef/xx3Xbbbbrrrrt08ODBFl9z3333adGiRZIkr9erhx56SLNmzdL999+vqqqq0185\n0IM4rSbNHZem5dOzNDDF0mx+yT6Pzn+tRA+uc6nME4pBhQAAAOip2gyVK1eulN/v18KFC3XPPfdo\nwYIFzV7zzDPPqLa2tvHrRYsWKT8/X88995ymT5+uhQsXnt6qgR5qfK5dq6/O0f0jkmT6QtsyGJGe\n2+nWua+WaO6WGtVySiwAAAA6QZuhcuvWrSosLJQkjRgxQkVFRVHz7733ngzD0Pjx4xvHtm3b1vie\nCRMmaOPGjaezZqBHS7AY+unYVL17ebbOybA2m68LRjR3a61Gv1qi54rq5A9FYlAlAAAAeorm++i+\nwO12KykpqfFrk8mkYDAoi8WiPXv2aMWKFZo7d66ef/75Ft+TmJgot9t90gUVFxefSv2dJl7rQpOe\ntkbJkp4rkN4uM+vp/VZ95ov+GVGZN6wH11drwdYq3d0voMlZoWbdzc7W09aoK2KN4h9rFP9Yo/jH\nGsU/1ij+DBo0qNW5NkOl0+mMCoWRSEQWS8Pb3nzzTZWVlek73/mOjhw5IovFot69e8vpdKq+vl6S\nVF9fHxVKv0yxsVJcXByXdaFJT16jIYOlO8ZHtHCnW7/aVqtKX/S210Nek2bvsuuVcqt+NjZFF/V2\nxKTOnrxGXQVrFP9Yo/jHGsU/1ij+sUZdT5uhcuTIkVq9erWmTJmi7du3Kz8/v3Hu3nvvbfz1s88+\nq8zMTBUWFmrv3r1as2aNhg8frrVr12rUqFEdUz0ASZLdbOju4Um6ZVCinvq4Tr//pE71wehtr1sr\nArp6RYUu6WPXI2NSdE6mLUbVAgAAoDtp85nKSZMmyWazaebMmXryySf1wAMP6KWXXtKqVatafc91\n112nvXv3atasWVq8eLFuv/3201o0gJal2Ez68egUbb4uV7cNccrcwnbX9w77NPH1Mt2xslL7armG\nBAAAAF9Om51Kk8mkhx9+OGqsf//+zV53xx13NP7a4XBo7ty5X746AO3SK9GsJyak6TvDnfrF5hot\n3edt9pqX93q0eJ9HM4c69eDIZGU6zDGoFAAAAF1dm51KAF3XwFSr/vzVTL17RbYu6NV8u2sgLD2z\nw61Rr5Zo/rZaubmGBAAAAKeIUAn0AGOzbXrjsiy9PDlTw9Kbb1CoDUT06OYajV5Uoj/udCsQ5hoS\nAAAAnBxCJdBDGIahS/McWn1Vjp7+SrrOdDbf7lriCeuBdS4VLi7V0n0eRSKESwAAAJwYoRLoYcwm\nQ18fmKgPv5arR89LUbq9+Wk+/64J6tZ/VGryG2X611FfDKoEAABAV0GoBHooh8XQd89O1pbreun7\n5yQpoYWjYjeVB3TF8nLd+E65Pq4MxKBKAAAAxDtCJdDDpdlNmjMmVZuuy9WtgxNlauEakrcP+fSV\npaW6e3XZ3IrqAAAgAElEQVSVDtZxDQkAAACaECoBSJJ6O81acEG61l2To8v7OprNRyT95d/1Gvta\niX68sVqV3lDnFwkAAIC4Q6gEEGVImlUvXZKpFdOzVJjb/BoSX0j6n0/qNGpRiZ78qFb1Qa4hAQAA\n6MkIlQBaNC7XrmXTsvSXSzI0NK35NSQ1/oh+tqlGYxeV6IXdbgW5hgQAAKBHIlQCaJVhGJrWN0Fr\nrs7R/1yYpj6Jza8h+aw+rHvXuHTBklK9uZ9rSAAAAHoaQiWANplNhr4xyKkPr8vVz8emKNXW/DSf\nXdVB3fJ+pS5bVq71JVxDAgAA0FMQKgGctASLoXtHJGvb9b1039lJsjdvXGpDqV+XLSvX19+t0E4X\n15AAAAB0d4RKAKcszW7Sz85L1aav5eobg1q+hmT5Qa8mLCnVd/9VpcNuTooFAADorgiVANrtzCSL\n/ufCdP3r6hxdltf8GpJwRHqxuF5jFh3Vb/dZVcE1JAAAAN0OoRLAlzYs3aq/Ts7UsmlZOj+7+TUk\n3pD0wiGrCv52VLevrNSaoz4O9AEAAOgmCJUATpsJvexacXmWXrw4Q4NTm19D4g9Lr+716PLl5Rq/\nuFS//6ROVT7uuQQAAOjKCJUATivDMHRFvwStvSZHT12QpjMSW/7PzK7qoP5rY7UK/nZEd62q1IYS\nupcAAABdEaESQIewmAx9c7BTm67L1S/OS9GZjpY7kt6Q9Nc9Hk1dVq4LlpbquaI6VfvpXgIAAHQV\nhEoAHSrRYtL3zk7WojFeLZmaqav7O2Rp4bRYSdpRFdSD66tV8Lej+u6/qrS5zE/3EgAAIM41f+gJ\nADqAyZAm9XZoUm+HSupDerG4Xn/e7daBuuYnwtYHI3qxuF4vFtfrnAyrvj3EqevzE5Rs5edgAAAA\n8YY/oQHodLmJZv1gZLK2XJerV6dkanpfR4t3XUrSR5UBPbDOpYK/HtUDa6u0rcLfucUCAADghOhU\nAogZs8nQ5DMdmnymQ5+5Q/q/Yrde2FWvw/XNu5d1wYj+uKtef9xVrzFZVn1riFNfOytBTrqXAAAA\nMcWfxgDEhd5Osx4alaJtN+TqL5dkaOqZdrXSvNSm8oC+t8algr8d1YPrXdpRFejUWgEAANCETiWA\nuGIxGZrWN0HT+iboQF1QL+yu14u73TrqaX4ibE0goueK3HquyK1xOTZ9a4hT1/RPUEJrJwEBAADg\ntKNTCSBu9U2y6MejU7T9xl76v4szdHFve6uv3VDq192rq1TwtyN6eINLu110LwEAADoDoRJA3LOa\nDF3ZL0GvTc3S1utz9cCIJGU5Wv7Pl8sf0dM73Dp/cakuX16mV/fWyxfiWhIAAICOQqgE0KX0T7bo\nkbGp2nFjL/1xUromntF693LNUb9uX1mlYX87qjkfVGtvTbATKwUAAOgZCJUAuiSb2dC1ZyXq9cuy\n9OHXcvTd4UnKsLf8n7QKX1hPfVyn0YtKdPVb5Vq6z6NAmO4lAADA6UCoBNDlDUy16tHzG7qXz05M\nV2GurdXXrjzi063/qNTwl4/q55uqta+W7iUAAMCXQagE0G04LIZuzE/U8unZWn9tju4scCrV1vJJ\nsKWesJ74qE7nvlqi698u1xv7PQrSvQQAADhlhEoA3dLQNKvmjU9T0U299PsL03R+dsvdy4ikdw/7\n9I33KzXilaN6bEuNDtXRvQQAADhZhEoA3VqixaT/GOTU21dk619X5+j2oU4lW1vuXh6pD+u/t9bq\nnFdLdNM75Vp+gO4lAABAWwiVAHqMszOsml/Y0L186oI0nZtlbfF14Yi04pBPX3+vUiNfKdHjdC8B\nAABaRagE0OMkWU365mCn/nFljv55ZbZuHZwop6Xl7uXh+pDmfd69fLdCbx30KET3EgAAoJEl1gUA\nQCyNyrJpQZZNvzgvVa/u9eiPu9zaXhlo9rpwRFpx0KsVB73qk2jWjMGJmjHYqT5OcwyqBgAAiB90\nKgFAUorNpNuGOrXqqmy9f0W2ZgxKVOIJupdzt9ZqxCtHdfO7FVpx0Ev3EgAA9Fh0KgHgOIZhaHS2\nTaOzbXr0/FS9urdef9xVr49b6V6+ddCrtw56daazoXv5jUF0LwEAQM9CpxIAWpFqM2nm0CStvipb\n712RrW+coHt5yB3S41saupdff7dCb9O9BAAAPQSdSgBog2EYGpNt05hsm355fqpe2VOvP+5y65Oq\n5ifChiPS8oNeLT/Wvfzmse5lb7qXAACgm2ozVIbDYc2bN0/FxcWy2WyaPXu28vLyGudfeeUVvfHG\nGzIMQ7fccoumTJmiSCSiK664ovF1I0aM0D333NNxnwIAOkmqzaTbC5I0c6hTm8oD+uMut17b65En\n1Lwrecgd0mNbajVva62m5jn0rcFOXdLHLrOp5W4nAABAV9RmqFy5cqX8fr8WLlyo7du3a8GCBZo/\nf74kyeVyadGiRXrxxRfl8/l00003afLkyTp06JCGDBmiJ554osM/AADEgmEYGptt09hsmx47P1Uv\nH+te7mihexmKSMsOeLXsQFP3csZgp85IpHsJAAC6vjafqdy6dasKCwslNXQci4qKGufS0tL04osv\nymKxqKKiQna7XYZhaOfOnSorK9Pdd9+t+++/X/v37++4TwAAMZZqM2lWQZLWXJ2jdy7P1i2DEpVg\nbv3Zy8e21Orsl4/qlvcq9M4hnr0EAABdm+FyuU74p5lHH31UF198sSZMmCBJuvLKK7V48WJZLE1N\nzpdfflnPPvusbrrpJs2aNUubN29WZWWlJk+erK1bt+rJJ5/Un//855MqqLi4+Et8HACID7VBaXmp\nRa8dtWhP/Yl/fneGPayrc4O6KjekbDsBEwAAxJ9Bgwa1Otfm9len0ym32934dSQSiQqUknTjjTfq\n2muv1X333acPP/xQZ599tszmhm1do0aNUnl5uSKRiAyj7eeITlRsrBQXF8dlXWjCGsW/nrhGowuk\n/4pE9EGZX3/aVa/Fn7b87OURn0nPHLDpuYPSZXkOfXuIU1/t3fnPXvbENepqWKP4xxrFP9Yo/rFG\nXU+b219HjhyptWvXSpK2b9+u/Pz8xrn9+/frhz/8YWPQtNlsMplMeu655/SXv/xFkrR7927l5uae\nVKAEgO7GMAydn2PX77+SrqKbeum/x6VqWFrLP88LRaQ3D3h1/TsVGrWoRPO31epofaiTKwYAADg1\nbXYqJ02apA0bNmjmzJmKRCKaM2eOXnrpJeXl5WnixIkaNGiQZs6cKUmaMGGCRo8erYEDB+qRRx7R\nmjVrZDabNWfOnA7/IAAQ79LsJt0xLEmzCpz6oMyvP+6q1+JP6+VtITcerAvp0c01enxLjablOfTt\noQ3dSxM/oAMAAHGmzWcqQQu+K2CN4h9r1DKXL6y/7anXn3a5VeRqfnLs8fommXXrYKduGZSoXh1w\ncixrFP9Yo/jHGsU/1ij+sUZdT5vbXwEAHSfNbtKdw5K09pocrZiepZvzE+RoJS8eqAvpF5trdPbL\nRzXj/Qq9ud8jXwvPaAIAAHSmNre/AgA6nmEYGpdr17hcu+aOC+uvx7qXO1voXgYj0t/3e/X3/V6l\n2gxd2S9B1w9I0Fd6df7hPgAAAIRKAIgzaXaT7hqWpDsLnNpQ6tcfd7m1ZJ9Hvhaevaz2R/Ricb1e\nLK5XToJJ1/RvCJjnZds4IA0AAHQKQiUAxCnDMDQ+167xn3cv/93QvdxV3fKzl6WesJ4tcuvZIrf6\nJpl13VkJum5AooanWwiYAACgwxAqAaALSLebdPfwJN01zKmNpX69utejxfs8KveGW3z9gbqQntxe\npye312lomkXXnZWg6wck6qwU/rMPAABOL/50AQBdyPHPXj4+LlUrj/j06l6P3tzvUU2g5UN7drqC\n+uWWWv1yS61GZ1l13YBEfe2sBJ3RASfIAgCAnodQCQBdlMVk6JI+Dl3SxyFvYZrePuTVok/rteKg\nt8W7LyVpc3lAm8ur9eON1bqgl003DEjUVf0TlG7nMHAAANA+hEoA6AYcFkNX9U/QVf0TVOMPa9kB\nrxbtrdf7n/nU0q0jEUn/OurXv4769Z/rXbq4j0MXJph1Rv+wkqwETAAAcPIIlQDQzaTYTLp5YKJu\nHpioCm9IS/d59ereeq0t8bf4+kBYWnHQqxWy67E9RzWtr0PXnZWgyWc6ZDdzwA8AADgxQiUAdGOZ\nDrNuG+rUbUOdOuwO6bVP67Vor0dbKwItvt4Tiui1Tz167VMPd2ACAICTQqgEgB6ij9Os752drO+d\nnax/Vwe06FOPFu31aHcrV5RwByYAADgZhEoA6IEGplr10CirfjgyWdsrA1q016O/FdfoqK/l5ylb\nugPz+gGJGp5h7eTKAQBAvCFUAkAPZhiGzsm06ZxMm/4jtVRVqX316l6PlpzkHZgFaRZdNyBR152V\nwB2YAAD0UPwJAAAgSTIZ0vhcu8bn2jX3JO/ALHIF9ejmGj26uUZjjt2BeS13YAIA0KMQKgEAzbTn\nDsxN5QFtKq/W7I3VurCXTddzByYAAD0CoRIAcELtuQNz9VG/Vh+7A3PSGXZdc1aCLu+boDQCJgAA\n3Q6hEgBw0tpzB+Y7h31657BP95saAubVZyXoCgImAADdBqESANAuLd2B+epej7a1cgdmVMA0XPpq\nbwImAADdAaESAPClneodmMFIdMCc1LtpiyzPYAIA0LUQKgEAp9Xxd2AWuYJa/KlHS/edOGC+e9in\ndwmYAAB0SYRKAECHMAxDw9KtGpZu1X+d2/6AeXX/BF3Rj4AJAEC8IlQCADpcSwFzyT6Plnx6cgHz\ngbUETAAA4hWhEgDQqY4PmA+PagqYSz/1aNdJBsyLett1DQETAIC4QKgEAMRMdAczRUVVAS0+iYD5\n3mGf3jsuYF7dP0FX9HUow2Hu5E8AAAAIlQCAuFGQblXBcQHz8y2yJxMwv79WBEwAAGKAUAkAiEuf\nB8yHjwuYS/d5tNNFwAQAIJ4QKgEAce/LBsyJZzRcU0LABADg9CNUAgC6lPYEzPc/8+n9z3x6YK10\nEQETAIDTilAJAOiyjg+YO10BLfnUoyUnCJghAiYAAKcdoRIA0C0MTbPqR+da9aPjAubSfR4VnWTA\nHJdj07Q8h6bmOTQo1SLDMDr5EwAA0DURKgEA3U57AubaEr/Wlvj1kw9rNCDZrKl5Dl2W51Bhrl02\nMwETAIDWECoBAN3a8QFzl6vpmpLWAqYk7a0N6ekdbj29w60Uq6GL+zQEzCln2pXJNlkAAKIQKgEA\nPcaQNKseGmXVQ6OaAubSfR7tqGo9YNYEIg1BdJ9HhqTzc2y67Ng22YI0tskCAECoBAD0SMcHzP21\nQa046NWKQ16tPuKTP9zyeyKSNpT6taHUr59tqlHfpKZtshf2ssvONlkAQA9EqAQA9Hj9ki26Y1iS\n7hiWpNpAWP/8zNcQMg96VeZtJWFKOlAX0nNFbj1X5JbTYuirve26rK9Dl57pUE4C22QBAD0DoRIA\ngOMkW026sl+CruyXoHAkoi3lAS0/FjC3VwZafZ87GNEbB7x644BXkjQmy9q4TXZEhpVtsgCAbotQ\nCQBAK0yGoTHZNo3JtunHo1N0qC6otw/5tOKgRyuP+OQNtf7eTeUBbSoP6JdbatUnsWGb7NQ8hyae\nYVeChYAJAOg+CJUAAJykM5Msum2oRbcNdcodCGvVEZ/eOtbFPOppfZvs4fqQFu5ya+EutxLMhi7q\nbW/sYp6RyDZZAEDX1maoDIfDmjdvnoqLi2Wz2TR79mzl5eU1zr/yyit64403ZBiGbrnlFk2ZMkVe\nr1ePPPKIKisr5XQ69cgjjyg9Pb1DPwgAAJ3JaTVpWt8ETeuboEgkom0VgYaAecirLeWtb5P1hCJ6\n66BXbx1s2CY7MrNhm+xleQ6NzLTKxDZZAEAX02aoXLlypfx+vxYuXKjt27drwYIFmj9/viTJ5XJp\n0aJFevHFF+Xz+XTTTTdp8uTJWrRokfLz8zVv3jy9/fbbWrhwoX7wgx90+IcBACAWDMPQqCybRmXZ\n9KNzU3SkPqR3Dnm1/IBX//zMJ08o0up7t1UEtK0ioHlba9UrwaRL8xyaeqZDk3rb5bSaOvFTAADQ\nPm2Gyq1bt6qwsFCSNGLECBUVFTXOpaWl6cUXX5TFYtFnn30mu90uwzC0bds2zZgxQ5I0YcIE/e//\n/m8HlQ8AQPw5I9Gsbw526puDnfIEI/rX0aZtsofcrT+IedQT1gu76/XC7nrZzdLEXk2nyeYl8cQK\nACA+tfl/KLfbraSkpMavTSaTgsGgLJaGt1osFr388st69tlnddNNNzV7T2Jiotxud0fUDgBA3Euw\nGJpypkNTznRo/viIPq5quBPzrYMebSoLqLUepi8kvXPYp3cO+yRVa3i6RWOcVl1krtewdKsGplpk\nNbFVFgAQe22GSqfTGRUKI5FIY6D83I033qhrr71W9913nz788EM5nU7V19dLkurr66NCaVuKi4tP\n+rWdKV7rQhPWKP6xRvGPNep4DklXJ0pXD5EqzpLWVpm1utKsDS6z6kOth8RPqoL6pMqqFw5VSZKs\nRkT9EyLKd4Y10BnWwMSw8hMjyrVHxGOZscXvo/jHGsU/1ij+DBo0qNW5NkPlyJEjtXr1ak2ZMkXb\nt29Xfn5+49z+/fv1u9/9TvPmzZPFYpHNZpPJZNLIkSO1Zs0aDR8+XGvXrtWoUaNOS7GxUlxcHJd1\noQlrFP9Yo/jHGnW+QZLGS/q+JF8oojXHtsm+ddCrA3UnuK9EUiBiqLjeUHG9SSprGk+xGRqWZtWw\ndKuGpVuO/dWqNDvPZ3YGfh/FP9Yo/rFGXU+boXLSpEnasGGDZs6cqUgkojlz5uill15SXl6eJk6c\nqEGDBmnmzJmSGp6fHD16tIYNG6af/vSnmjVrliwWi37xi190+AcBAKArs5sNXdzHoYv7ODRvXEQ7\nXZ9vk/VqY5lf4dbP+olS449ofalf60v9UeN9Es0alm5RQXpT4BySZpXdTFsTAPDlGC6X6yT/N9Vz\n8dOS+McaxT/WKP6xRvGrwhvS+4d9WrmnVEeUpB1VAR2pb/1ezJNlNqSBKZZmXc1+yWauNmknfh/F\nP9Yo/rFGXQ9HyQEAEOcyHWbdkJ+oUeGABg3KkiRVekPa4QpqR2VAO6oC2lEVVJEroNrAyf+sOBSR\ndlUHtas6qMX7msadFkND05pC5rB0q4ZnWJTlMJ/mTwYA6A4IlQAAdEEZDrMu7GXWhb3sjWORSEQH\n6kIqcjWEzB1VAe2oDGh3dVDBU9iX5A5GtKk8oE3lgajxnASTCtKauprD060amm5RooXnNQGgJyNU\nAgDQTRiGoX7JFvVLtuiyvKZxfyii4upjIbMq0NDhrAroYBuHAX1RqSesUo9PK4/4mv6Zks5KNjd0\nNDMagmZBmkUDUiyycOUJAPQIhEoAALo5m9nQ8AyrhmdYo8Zr/GEVVTV1NT85Fjpd/pNva0Yk7a0N\naW9tSG8c8DaO283SoFSrhqU1HA5UkG5RQZpVeUk8rwkA3Q2hEgCAHirFZtK4XLvG5UZvoT1SH27q\nah4LnbuqA/KdQmPTF5I+rgzo48qAJE/jeJLF0JDGoNkUOnMTTDIImwDQJREqAQBAI8Mw1NtpVm+n\nWZPPdDSOB8MR7a0JakdVsLGjuaMqoH21IZ3KMfJ1rTyvmW43NPTY/ZoFn4fONIsyOBwIAOIeoRIA\nALTJYjI0OM2qwWlWXXNWQuO4OxDWLtfxQbPhFNpSz6ldeVLli2hdiV/rSqLv1+yVYNLQ44LmsHSr\nhqRZlGzlcCAAiBeESgAA0G5Oq0mjs20anW2LGq/whlTkCqqoKqCiY0FzR1VA1afwvKYkHfWEddTj\n0z8/80WN900yN3YzP//r4FSrHBa20AJAZyNUAgCA0y6zlStPjtSHtfNYwPw8dO50BVV/KneeSDpQ\nF9KBupBWHGwaMxlSforluKDZcEBQPifRAkCHIlQCAIBOcfzzmhf3aXpeM/z5/ZrHBc0dVQEVVwfl\nP4VdtOGIVFwdVHF1UK/vbzqJ1maSBqU23K1ZkG7V0LSGX/flJFoAOC0IlQAAIKZMhqH+yRb1T7Zo\nWt+m8cCxw4E+3z77eejcUxNU+BQam/6w9ElVUJ9UBXX8SbQpVkPn5dg0Lsem8bl2jcmyysmzmgBw\nygiVAAAgLllNhoakWTUkzapr1HQ4kDcY0e7qhoC5syqgHce6mwfqTuHOE0k1gYjeO+zTe4d9kmpl\nMaRzMq0an2vT+By7xufalJPA6bMA0BZCJQAA6FIcFkPnZNp0Tmb04UC1x06i3VEV0E7XsQOCqgI6\nepIn0QYj0ubygDaXB/T7T9ySpAHJZo3Ltaswt6GjOTjVwn2aAPAFhEoAANAtJFtNGptt09gvnERb\nedxJtDuPhc4dVQG5TuIk2r21Ie2trddf/l0vScqwm45tl7VpfI5No7JsspsJmQB6NkIlAADo1jIc\nZl3Qy6wLvnASbXF1UOtL/Vpf4tf6Ep/21ra9fbbSF9byg14tP9hwEJDdLI3OagiY43Ptygp22McA\ngLhFqAQAAD2OYRganGbV4DSrvjnYKUkq9YQaAmapTxtK/NpWEVBbN534QtK6Er/Wlfil7XWSElWw\ns6Tx8J/xuTb1SzKzZRZAt0aoBAAAkJSTYNZV/RN0Vf+GQ4HcgbA2lQe0ocSn9aV+fVDqV02g7S2z\nRa6gilxB/Wl3w5bZXgkmjc+1a1yOTYW5Np2dYeXeTADdCqESAACgBU6rSRPPsGviGQ3bZkPhiHa4\nglpf4tOGY9tmD7nb3jJ71BPWkn0eLdnXcJ2J02JobHbTc5ljc2xK5ioTAF0YoRIAAOAkmE2GRmRY\nNSLDqlkFDWMH64KNAXN9qV+fVPoV0Ym7kO5gRCuP+LTyiE+SZDKkERnWxk7muBy7eju5ygRA10Go\nBAAAaKe8JIvykiy6fkCiJGlLUbEqkvOOHQDk06aygDyhE2+ZDUekbRUBbasI6NmihqtM+iaZo+7L\nHJpmkYnnMgHEKUIlAADAaZJkkc4906HJZzokSYFwRB9VBLTuuC2zZd627808UBfSgTqPXt7TsGU2\n3W5oXI5dF+TaVNjLrpGZVll5LhNAnCBUAgAAdBCrydCYbJvGZNv0XTVcZbK3JqT1pb7GLbPF1W3f\nQ1Lli+itg169dewqk8Rjz2UW5to0Ideu83KsSrTwXCaA2CBUAgAAdBLDMJSfalF+qkW3DGq4yqTc\nG9KGYwFzQ4lfWyr8CrTRzKwPRrTqiE+rjvgk1cpiSKOyrCrMtasw16bCXLvS7YRMAJ2DUAkAABBD\nWQ6zLu+XoMv7NVxl4glGtKXc3/hc5vpSv2r8J34uMxiRPiwL6MOygH77ccPYsDSLCns1hcw+HP4D\noIMQKgEAAOJIgsXQhF52Tehll5TceJXJuqM+rS3xa12JTyWetp/L3OEKaocrqP/d2XD4T78kc8N2\n2WNBc2CKRQaH/wA4DQiVAAAAcez4q0zuGNbwXOantSGtLfFpXYlfa4/69Glt2/dl7q8LaX+dR389\ndvhPtsPU2MUszLVpRIZVZg7/AdAOhEoAAIAuxDAMDUixaECKRd849lzm0fqQ1pV83sn065PKgE68\nYVYq84b1+n6vXt/fcPhPstXQ+TkNB/8U5to0Ossmh4WQCaBthEoAAIAurleiWdeelahrz2q4L9Pl\nC2tDacNW2XUlfm0ub/vwn9pARO8d9um9wz5Jks0kjcm2acKxbub5OTal2Dj8B0BzhEoAAIBuJs1u\n0tQ8h6bmNdyX6QlGtKm8YavsuhK/Npb65Q6euJfpD0vrjnU+pTqZDOnsdGvUc5k5CRz+A4BQCQAA\n0O0lWAxd2MuuC3vZJUnBcETbKwNae+yZzPUlflX4TtzKDEekjyoD+qgyoD8UNRz+MzDFcuy5zIZu\nZv9kM4f/AD0QoRIAAKCHsZgMnZtl07n/v737j626Ovg4/v7e7723P25LKy1UKaVoA1UQ2wE+U4ys\ncyMTw6Y+PnP+mji1zOpwcz9UQtaiRB0b4CDLNLow8Ikat2k2Yx62mS0hCA4Raa1CaZmjtKClPyht\nb9v78/v88S3f9tJCa2u5hX5eSdP2nC96msO53E/Pr0wvD81OwbIsqk+E7YN/GgLs/CxIvX/ww38O\ntoU52Bbmf2s6AbggwWBupr0fc94kD3MzNZspMh4oVIqIiIiMc4ZhkJ/uIT/dwz359uE/dR3hnuWv\n9pLZqtbwoP+d44HYfZkAU30mczM9zJtkh9jCDI/2ZoqcZxQqRURERKSfnBQ3OSlubs2zD/9p7o44\neyzfbQhQ0RwiMtgRs0C9P0K9P+KcMmsAM9PczJ3ktcNmppfZEz0kmFo2K3KuUqgUERERkUFlJJos\nyU1iSW4SAB2hKO83BtnxWZB/NQQobw7RHho8ZVrAgRNhDpwI8+pBu8zrgssn2gHzSz2zmjPS3Li0\nP1PknKBQKSIiIiKfW4rHRdGURIqm2CfMRi2LmhNh9jQG2dsUYk9TkMqW0KBXmYB90uwHTSE+aAo5\nZakeg8IMe1/myVnNqT4dBCQyFilUioiIiMiIufrsy7xjhl0WiFh83GIHzA+aQnzQGKT6RJghrJql\nPWSx/bMg2z8LOmWTk1x8KdPLvMyesJnpYWKiDgISiTeFShEREREZFQmmYc8yTvI6ZW3BKOXNdsD8\noCdsDuWkWYBjXVH+VtfN3+q6nbLpqSbz+sxmFmR4SHbrICCRs0mhUkRERETOmgleFwsvSmDhRQlO\nWUNnxAmY9ucgxwNDmc+EQ+0RDrV38fp/ugAwDbg03c28SV5nNvOyCzx4XFo2KzJaFCpFREREJK6y\nkk0WT0ti8TT7ECDLsjjUHulZNhvkg8YQFc0huoZw3GzEgo+Ph/n4eJiXqu37MxNNKMiwDwHKDJrM\nTe5mqs8k22fi82hWU2SkBg2V0WiUNWvWUFNTg9frZeXKleTk5Dj1r7zyCm+//TYACxYsoLi4GMuy\nWPp1yUkAABG9SURBVLJkifPcnDlzeOihh0bpRxARERGR84lhGFw8wc3FE9z8zyX2lSbhqMX+1jB7\nm4LsabRnNfcdH9q1Jt0R2HUsyK5jQSABDjY7deleg2yfyVSfydQUN9k9YfNk2ZRkE6+uOxE5o0FD\n5bZt2wgGg2zatInKyko2bNjA2rVrAThy5Ah//etf+f3vf4/L5aK4uJiioiISExPJz89n/fr1o/4D\niIiIiMj5z+0ymDPRw5yJHu6e6QOgMxzlw+ZQ77LZxiCftA9tf+ZJrUGL1qA9swmBAZ/JSnLFhM1s\nn0mOrzeAZiW5MLW8VsaxQUNleXk5V199NWDPOO7fv9+py8rKYuPGjZimfepWOBzG6/VSVVVFY2Mj\nJSUlJCQk8Mgjj5CbmztKP4KIiIiIjEfJbhdXZSVwVVbv/szjgagzm7mnJ2we6xrCvSZn0NAVpaEr\nGnPlSV9uAy5MPjnbaZKdHBtAp6aYZCS4dB2KnLcGDZV+v5+UlBTne5fLRTgcxu1243a7SU9Px7Is\nNm7cSH5+Prm5uTQ3N7N06VK+/vWvU15eTmlpKVu2bBlSg2pqaob/04yisdou6aU+GvvUR2Of+mjs\nUx+Nfeqj+MsBcnxwkw+sadAQNNjX7qKqw8XRgMGxgEFDwKAhaBCxRh70whbU+yP2KbbHBn4mwWWR\n5bWYnGCR1fNxYULU/tprf5+i004cGkdjz4wZM05bN+hfXZ/Ph9/vd763LAu3u/ePBQIBVq9ejc/n\n49FHHwVg1qxZzuxlYWEhTU1NWJY1pN/OnKmx8VJTUzMm2yW91Edjn/po7FMfjX3qo7FPfTQ2zQSu\n7fm6bx9FohbHuqMc8Uc40hMKj/jD1HdEnLKGruiQ7tUcTCBqcLjb4HD36Z+Z4Ond35mT4mZaiklu\nqsm0FDe5qeNntlPj6NwzaKgsKChg+/btLFq0iMrKSvLy8pw6y7L46U9/yvz581m6dKlT/uKLL5KW\nlsbdd99NdXU1WVlZ42IAiIiIiMi5w3QZXJRsclGyyfxJAz8TjFh82tk3dMZ+Xe8PD/n6k8G0hSza\nWsPsbx14f6fPbTAtxbQ/UntCZ0/4nJ7qJs1r6D23xMWgobKoqIhdu3Zx3333YVkWpaWlvPzyy+Tk\n5BCJRNi7dy+hUIh3330XgAcffJClS5dSVlbGjh07ME2T0tLSUf9BRERERES+aF7TIDfVTW7q6d82\n+0NRjp4SPPvOdh7xR+gIjzx4+sP2CbinC50TPEafsNk7w5mb4mZaqkmqrk+RUTJoqHS5XKxYsSKm\nbPr06c7X77zzzoB/7tlnnx1Zy0REREREzgE+j4sZaS5mpHkGrLcsixNBK2am84g/7OzDPOKPcNQf\nITiy84RoC1l81BLio5aBDxSamOCKXVLbJ3jmpJgkuxU6ZXi0HVhEREREZBQZhkF6gkF6govLJw4c\nPKOWRVPP/s66jgh1/gi17WFqOyLU9Xz2j3C2syUQpSUQpbx54NA5OckVs6Q2N7U3eE5NMUnQfZ1y\nGgqVIiIiIiJx5jIMJieZTE4y+VJm/3rLsmgJRKltj3C4I0JtR9j+3G5/PtwRpvvzXdHZz7GuKMe6\norzf2D90GsBFyS5yU93k9Ame01Ls77N9Cp3jmUKliIiIiMgYZxgGGYkmGYkmcwc4VMiyLI51RfuE\nTTto1nZEONweps4fITSC5bUWcLQzytHOIO82DPxMVpLLuaszx2fPbton2Zrk+EwuGCen145HCpUi\nIiIiIuc4wzDISjbJSjb5r8n96yNRi8+6os7M5qkznUf8ESIjPEuooStKQ1eUPU0DL6/1uQ0ndJ68\nNqU3hJpM8Zl4XAqd5yKFShERERGR85zpsu/AzPaZLBigPhy1OOKP2DObHWFnpvNwR4TD7RGOdkZG\nfF+nP2xx4ESYAyfCA9afXGKbaSYw40iLM8tph1B7mW2aV4cJjUUKlSIiIiIi45zb1ffqlIR+9YFI\nT+jsM9NZ226fXlvfEeHTrgjREaZOZ4ktJh+2dw34zASP0Sdsunu/9tkfFyWbmJrtPOsUKkVERERE\n5IwSTINLJri5ZMLA8SEUtTjac0VKXcfJsGnv5azvOc228wu4q7MtZLGvNcy+09zVaRowxRe7l3Oq\nz82MdDdXTPQwQTOdo0KhUkRERERERsQTM9PZn2VZHA9EqYsJnRHq/GHqe75v6BrhRZ1AxMK+kqUj\nMuCBQnkTTAoyvBRmeCjI8FCQ4SU9QUFzpBQqRURERERkVBmGwcREk4mJJgUZAz/THbY42hnhXwdq\niaZf6IRNO4SGqfdHCIzw2pR/t0X4d1sXb/ynd3ltbopJQYaHwkxvT9D0kJlojux/NM4oVIqIiIiI\nSNwluu0ltpH0KDNm+PrVRy2Lpu6os5y2riMcs9y2riNCS+Dzz3bWdtgHFL1Z2+2UTfWZXJHh6ZnR\ntMPmhckKmqejUCkiIiIiImOeyzCYnGQyOWnguzoB/KEoR/yRmL2ch9rDVDaHqD4RHvIJtvU9+0P/\n73Bv0LwwyUVBhocr+iyfzfaZunsThUoRERERETlP+DwuZqa7mJnu6VfXEYryUUuIimb7o7w5yIHW\n8JDv5/ysK8pn9QH+Vt97QFBmostZMntyRjM3ZfwFTYVKERERERE576V4XFyVlcBVWb1XpnSFLfYd\ntwPmybC573iI0BBX0TZ1R/nHkQD/ONIbNNO9hhMwCzI8FGZ4uXiCies8DpoKlSIiIiIiMi4luQ3m\nTfIyb5LXKQtELPYfD/FhS4jyphAVzUE+Oh4a8iFBrUGLbZ8G2PZpb9BM9RjMmeihMLN3RnPGBPd5\nc6emQqWIiIiIiEiPBNOgMNNLYaaXu2faZaGoRXVr2JnR/LDZDp1DvXuzPWSxsyHIzoYg4Acg2W0H\nzb4HAs2+wH1OLp1VqBQRERERETkDj8tg9kQPsyd6uHOGXRaJWhxsCzv7M0+GzfbQ0IJmZ9hi17Eg\nu44FAZjgMTh050Wce5FSoVJERERERORzM10G+eke8tM93JqXDNjXnvynLUJFT8gsb7aXz7YGBw+a\nczI85+y+S4VKERERERGRL4DLMMhLc5OX5ua/L7HLLMuitiPSM5MZpLzZ3qvZfMqdmgUZ/U+sPVco\nVIqIiIiIiIwSwzCYnupmeqqbG6cnAXbQPNoZpaInZFY0h1jQ51Tac41CpYiIiIiIyFlkGAbZPpNs\nXxI3TEuKd3NGzBXvBoiIiIiIiMi5S6FSREREREREhk2hUkRERERERIZNoVJERERERESGTaFSRERE\nREREhk2hUkRERERERIZNoVJERERERESGTaFSREREREREhk2hUkRERERERIZNoVJERERERESGTaFS\nREREREREhk2hUkRERERERIbNaG1tteLdCBERERERETk3aaZSREREREREhk2hUkRERERERIZNoVJE\nRERERESGTaFSREREREREhk2hUkRERERERIZNoVJERERERESGTaFSREREREREhs0d7waMJdFolDVr\n1lBTU4PX62XlypXk5OQ49X/+85954403cLvdfO973+Paa6+NY2vHp3A4zOrVqzl69CihUIh7772X\nhQsXOvWvvPIKf/nLX7jgggsAWLFiBbm5ufFq7rj13e9+F5/PB8CUKVMoLS116jSO4u+tt97irbfe\nAiAYDFJdXc3WrVtJTU0FYN26dVRUVJCcnAzA2rVrSUlJiVt7x5OPPvqI3/zmNzz//PPU1dXx5JNP\nApCXl8ejjz6Ky9X7u+Du7m7KyspoaWnB5/NRVlbmvPbJ6OnbR9XV1fzqV7/CNE08Hg+rVq0iIyMj\n5vkzvR7K6OjbRwcOHODHP/6x837ulltuYdGiRc6zGkfx0bePVq5cSXNzMwCffvopl19+OU899ZTz\nrGVZLFmyxOnDOXPm8NBDD8Wl3XJ6CpV9bNu2jWAwyKZNm6isrGTDhg2sXbsWgKamJl577TW2bNlC\nMBikuLiYL3/5y3i93ji3enzZunUraWlpPPHEE5w4cYK77rorJlRWVVWxatUqLrvssji2cnwLBAJY\nlsXzzz/fr07jaGxYsmQJS5YsAeCXv/wl3/zmN51ACfY42rhxI+np6fFq4rj00ksvsXXrVpKSkgD4\n9a9/zQMPPMC8efN45pln2LZtG1/96led519//XXy8vJYs2YNf//739m0aRM/+clP4tX8ceHUPlq3\nbh0/+9nPmDlzJm+88QYvvfQSjzzyiPP8mV4PZXSc2kf79+/njjvu4M477xzweY2js+/UPjoZINva\n2igpKYkZQwD19fXk5+ezfv36s95WGTotf+2jvLycq6++GrB/C7J//36nbt++fVxxxRV4vV5SUlKY\nOnUqBw8ejFdTx62vfe1rfP/73wfs31yZphlTX1VVxZYtWyguLmbz5s1xaKHU1NTQ3d3N8uXLKSkp\nobKy0qnTOBpb9u3bxyeffMLNN9/slEWjUerq6nj66ae5//77efPNN+PYwvFl6tSprFmzxvm+qqqK\nuXPnArBgwQJ2794d83xFRYXzb9aCBQt47733zl5jx6lT++ipp55i5syZAEQiERISEmKeP9ProYyO\ngcbRO++8w7Jly1i9ejV+vz/meY2js+/UPjrphRde4NZbbyUzMzOmvKqqisbGRkpKSvjRj35EbW3t\n2WqqfA4KlX34/f6YJV4ul4twODxgXXJyMh0dHWe9jeNdcnIyPp8Pv9/PihUreOCBB2LqFy1axOOP\nP85vf/tbKioq2L59e5xaOn4lJiZy1113sXHjRh5//HFKS0s1jsaozZs3c//998eUdXV18e1vf5sn\nn3ySDRs28Kc//Ymampo4tXB8ue6663C7excQWZaFYRjAwGOl73hKTk7u92ZZvnin9tHJN78ffvgh\nf/zjH7n99ttjnj/T66GMjlP7aPbs2Tz88MO88MILZGdn87vf/S7meY2js+/UPgJoaWlh9+7dziqa\nvjIyMli6dCnPPfcc99xzj5aQj1EKlX2cDCsnWZbl/KX3+Xx0dnY6dZ2dndpjFCcNDQ2UlJSwePFi\nrr/+eqfcsixuv/120tPT8Xg8XHPNNVRXV8expePTtGnTuP766zEMg9zcXNLS0py9EhpHY0d7ezu1\ntbXMnz8/pjwxMZHbbruNxMREfD4f8+fPV6iMk777Jzs7O2OWKEPseNJYip+3336bX/ziFzz77LP9\n9uKd6fVQzo6ioiJnS0xRUREHDhyIqdc4Ghv++c9/8o1vfKPfCjSAWbNm8ZWvfAWAwsJCmpqasCzr\nbDdRBqFQ2UdBQQE7d+4EoLKykry8PKdu1qxZlJeXEwgE6Ojo4NChQzH1cnY0NzezfPlyfvCDH/Ct\nb30rps7v93PbbbfR2dmJZVm8//77XHrppXFq6fj15ptvsmHDBgAaGxvx+/3OwRUaR2PH3r17ufLK\nK/uVHz58mOLiYiKRCOFwmIqKCvLz8+PQQpk5cyZ79uwBYOfOnRQWFsbUFxQUsGPHjtPWy+jbunUr\nf/jDH3juuefIzs7uV3+m10M5Ox5++GE+/vhjAHbv3t3vfYHG0djw3nvvsWDBggHrXnzxRV599VUA\nqqurycrKclZxyNihg3r6KCoqYteuXdx3331YlkVpaSkvv/wyOTk5LFy4kO985zssW7YMy7IoKSnp\nt3dCRt/mzZtpa2tj06ZNbNq0CYCbbrqJrq4ubr75Zh588EFKSkrwer1ceeWVXHPNNXFu8fhz4403\n8sQTT1BcXAzAz3/+c1577TWNozGmtrY25k1w39e6xYsXc++99+J2u7nhhhsU/OPkhz/8IU8//TSh\nUIiLL76Y6667DoDly5ezfv16brnlFlatWkVxcTFut5vVq1fHucXjSyQSYd26dWRlZfHYY48BMHfu\nXJYtW0ZZWRklJSUDvh6euuxPRtdjjz3G2rVrcbvdZGRksGLFCkDjaKw59d8k6O2jpUuXUlZWxo4d\nOzBNU8tfxyijtbVV88ciIiIiIiIyLFr+KiIiIiIiIsOmUCkiIiIiIiLDplApIiIiIiIiw6ZQKSIi\nIiIiIsOmUCkiIiIiIiLDplApIiIiIiIiw6ZQKSIiIiIiIsOmUCkiIiIiIiLD9v+y7+Y/Jg97XgAA\nAABJRU5ErkJggg==\n", 571 | "text/plain": [ 572 | "" 573 | ] 574 | }, 575 | "metadata": {}, 576 | "output_type": "display_data" 577 | } 578 | ], 579 | "source": [ 580 | "plt.plot(loss_history);" 581 | ] 582 | } 583 | ], 584 | "metadata": { 585 | "kernelspec": { 586 | "display_name": "pinkslip", 587 | "language": "python", 588 | "name": "pinkslip" 589 | }, 590 | "language_info": { 591 | "codemirror_mode": { 592 | "name": "ipython", 593 | "version": 3 594 | }, 595 | "file_extension": ".py", 596 | "mimetype": "text/x-python", 597 | "name": "python", 598 | "nbconvert_exporter": "python", 599 | "pygments_lexer": "ipython3", 600 | "version": "3.6.1" 601 | } 602 | }, 603 | "nbformat": 4, 604 | "nbformat_minor": 2 605 | } 606 | --------------------------------------------------------------------------------