├── .gitignore ├── README.md ├── Reinforcement_learning ├── 01_DQN.ipynb └── 02_REINFORCE_discrete.ipynb ├── deepnlp ├── 01_DL_for_NLP_BoWClassifier.ipynb ├── 02_DL_FOR_NLP_NGRAM.ipynb ├── 03_DL_FOR_NLP_LSTM.ipynb ├── 04_DL_FOR_NLP_BILSTMCRF.ipynb ├── 05_LSTM_Batch.ipynb ├── 06_Seq2Seq_basic.ipynb ├── 06_Seq2Seq_vanilla.ipynb ├── 07_Seq2Seq_Attention.ipynb ├── 08_Relational_Network_for_bAbI(Not yet).ipynb ├── 09_Transformer.ipynb ├── 10_CNN_text_classification.ipynb └── temp_Coref.ipynb ├── evolutionary_algorithms ├── AutoML_Design_by_evolution.ipynb ├── net_builder.py ├── torch_models.py └── worker.py ├── generative_model ├── 01.Simple_Autoencoder.ipynb ├── 02.Regularized_Autoencoders.ipynb ├── 03.Variational_Autoencoder.ipynb ├── 03_1_Appendix_Entropy&KL-Divergence.ipynb ├── 04.Variational_Recurrent_Autoencoder.ipynb └── 05.Controllable_Text_Generation.ipynb ├── mytutorial └── 1_week_pytorch_basic.ipynb └── tutorial ├── 00.XOR.ipynb ├── 01.Linear_Regression.ipynb ├── 02.Logistic_Regression.ipynb ├── 03.Feedforward_Neural_Network.ipynb ├── 04.Convolutional_Neural_Network.ipynb └── 10.GAN.ipynb /.gitignore: -------------------------------------------------------------------------------- 1 | .ipynb_checkpoints 2 | __pycache__ 3 | data/ 4 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Pytorch Study 2 | 3 | 파이토치 스터디 4 | 5 | 관심있는 모든 것(NLP, Generative Model, RL,...)을 더욱 잘 이해하기 위해 코드로 구현해보기 6 | 클래식한 모델부터 논문, 튜토리얼, 강의, 블로그를 읽고 이해한 것을 바탕으로 7 | 구현해보기 +_+ 8 | 9 | 언젠가 직접 모델링하는 그 날까지... 10 | 11 | 12 | ## 파이썬 3.5 Pytorch 환경 구축해둔 도커 13 | 14 | ubuntu 16.04 python 3.5.2 with various of ML/DL packages including tensorflow, sklearn, pytorch 15 | 16 | `docker pull dsksd/deepstudy:0.2` 17 | 18 | 19 | ## 1. Deep NLP Models 20 | 21 | 1. BoWClassifier 22 | 2. NGRAM & CBOW 23 | 3. LSTM POS Tagger 24 | 4. Bidirectional LSTM POS Tagger 25 | 5. LSTM batch learning 26 | 6. Vanilla Sequence2Sequence (Encoder-Decoder) 27 | 7. Sequence2Sequence with Attention 28 | 8. Relational Network for bAbI task(in progress) 29 | 9. Transformer(Attention is all you need) 30 | 31 | ### 읽고 구현해보고 싶은 논문 리스트 32 | 33 | 1. Poincaré Embeddings for Learning Hierarchical Representations 34 | 2. Neural Embeddings of Graphs in Hyperbolic Space 35 | 3. A Deep Reinforced Model for Abstractive Summarization 36 | 4. Controllable Text Generation 37 | 5. A simple neural network module for relational reasoning 38 | 39 | ## 2. Generative Models 40 | 41 | 1. Basic Auto-Encoder 42 | 2. Regularized Auto-Encoder 43 | 3. Variational Auto-Encoder 44 | 3-1. Appendix1: Entropy and KL-divergence 45 | 4. Variational Reccurent Auto-Encoder 46 | 47 | ## 3. Reinforcement Learning 48 | 49 | ## 4. Evolutionary Algorithms -------------------------------------------------------------------------------- /deepnlp/01_DL_for_NLP_BoWClassifier.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "data": { 10 | "text/plain": [ 11 | "" 12 | ] 13 | }, 14 | "execution_count": 1, 15 | "metadata": {}, 16 | "output_type": "execute_result" 17 | } 18 | ], 19 | "source": [ 20 | "import torch\n", 21 | "import torch.autograd as autograd\n", 22 | "import torch.nn as nn\n", 23 | "import torch.nn.functional as F\n", 24 | "import torch.optim as optim\n", 25 | "\n", 26 | "torch.manual_seed(1)" 27 | ] 28 | }, 29 | { 30 | "cell_type": "markdown", 31 | "metadata": {}, 32 | "source": [ 33 | "# 1. Logistic Regression Bag-of-Words classifier" 34 | ] 35 | }, 36 | { 37 | "cell_type": "markdown", 38 | "metadata": {}, 39 | "source": [ 40 | "### 1. word2index 딕 준비 for Bag-of-Words" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": 3, 46 | "metadata": {}, 47 | "outputs": [ 48 | { 49 | "name": "stdout", 50 | "output_type": "stream", 51 | "text": [ 52 | "{'it': 7, 'to': 8, 'una': 13, 'Give': 6, 'good': 19, 'cafeteria': 5, 'comer': 2, 'not': 17, 'si': 24, 'on': 25, 'lost': 21, 'me': 0, 'creo': 10, 'en': 3, 'sea': 12, 'get': 20, 'No': 9, 'is': 16, 'que': 11, 'la': 4, 'idea': 15, 'at': 22, 'gusta': 1, 'Yo': 23, 'a': 18, 'buena': 14}\n", 53 | "{0: 'me', 1: 'gusta', 2: 'comer', 3: 'en', 4: 'la', 5: 'cafeteria', 6: 'Give', 7: 'it', 8: 'to', 9: 'No', 10: 'creo', 11: 'que', 12: 'sea', 13: 'una', 14: 'buena', 15: 'idea', 16: 'is', 17: 'not', 18: 'a', 19: 'good', 20: 'get', 21: 'lost', 22: 'at', 23: 'Yo', 24: 'si', 25: 'on'}\n" 54 | ] 55 | } 56 | ], 57 | "source": [ 58 | "data = [ (\"me gusta comer en la cafeteria\".split(), \"SPANISH\"),\n", 59 | " (\"Give it to me\".split(), \"ENGLISH\"),\n", 60 | " (\"No creo que sea una buena idea\".split(), \"SPANISH\"),\n", 61 | " (\"No it is not a good idea to get lost at sea\".split(), \"ENGLISH\") ]\n", 62 | "\n", 63 | "test_data = [ (\"Yo creo que si\".split(), \"SPANISH\"),\n", 64 | " (\"it is lost on me\".split(), \"ENGLISH\")]\n", 65 | "\n", 66 | "# word_to_ix maps each word in the vocab to a unique integer, which will be its\n", 67 | "# index into the Bag of words vector\n", 68 | "word_to_ix = {}\n", 69 | "for sent, _ in data + test_data:\n", 70 | " for word in sent:\n", 71 | " if word not in word_to_ix:\n", 72 | " word_to_ix[word] = len(word_to_ix)\n", 73 | "\n", 74 | "ix_to_word = {v : k for k,v in word_to_ix.items()}\n", 75 | "\n", 76 | "print(word_to_ix)\n", 77 | "print(ix_to_word)\n", 78 | "\n", 79 | "VOCAB_SIZE = len(word_to_ix)\n", 80 | "NUM_LABELS = 2" 81 | ] 82 | }, 83 | { 84 | "cell_type": "markdown", 85 | "metadata": {}, 86 | "source": [ 87 | "### 2. 모델 선언 " 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": 4, 93 | "metadata": { 94 | "collapsed": true 95 | }, 96 | "outputs": [], 97 | "source": [ 98 | "class BoWClassifier(nn.Module): # nn.Module을 상속받아서 클래스 만들어야 함\n", 99 | " \n", 100 | " def __init__(self, num_labels, vocab_size):\n", 101 | " # 파이토치의 nn.Module을 상속받아 \"모델 클래스\"를 만들 때는\n", 102 | " # 반드시 부모 클래스 nn.Module의 생성자를 초기화 해줘야 함\n", 103 | " super(BoWClassifier, self).__init__()\n", 104 | " \n", 105 | " # 선형 맵핑(아핀 변환?)\n", 106 | " # vocab_size만큼의 벡터를 -> spanish or english 2가지로 분류\n", 107 | " \n", 108 | " self.linear = nn.Linear(vocab_size, num_labels)\n", 109 | " \n", 110 | " # NOTE! The non-linearity log softmax does not have parameters! So we don't need\n", 111 | " # to worry about that here\n", 112 | " \n", 113 | " def forward(self, bow_vec): \n", 114 | " # nn.Module을 상속받은 클래스에서 forward는 예약어임\n", 115 | " # Pass the input through the linear layer,\n", 116 | " # then pass that through log_softmax.\n", 117 | " # Many non-linearities and other functions are in torch.nn.functional\n", 118 | " return F.log_softmax(self.linear(bow_vec))" 119 | ] 120 | }, 121 | { 122 | "cell_type": "markdown", 123 | "metadata": {}, 124 | "source": [ 125 | "### 3. 전처리 함수 선언 (문장 -> 벡터 / 레이블)" 126 | ] 127 | }, 128 | { 129 | "cell_type": "markdown", 130 | "metadata": {}, 131 | "source": [ 132 | "텐서는 리스트로부터 바로 만들 수 있다. torch.Tensor(list) , default 타입은 floatTensor인데
\n", 133 | "integer 타입은 torch.LongTensor를 사용해야 함" 134 | ] 135 | }, 136 | { 137 | "cell_type": "markdown", 138 | "metadata": {}, 139 | "source": [ 140 | "Tensor.view 는 reshape 함수임~" 141 | ] 142 | }, 143 | { 144 | "cell_type": "code", 145 | "execution_count": 6, 146 | "metadata": { 147 | "collapsed": true 148 | }, 149 | "outputs": [], 150 | "source": [ 151 | "def make_bow_vector(sentence, word_to_ix):\n", 152 | " vec = torch.zeros(len(word_to_ix))\n", 153 | " for word in sentence:\n", 154 | " vec[word_to_ix[word]] += 1\n", 155 | " return vec.view(1, -1) # reshape 하는 함수!!\n", 156 | "\n", 157 | "def make_target(label, label_to_ix):\n", 158 | " return torch.LongTensor([label_to_ix[label]]) # integer Tensor는 LongTensor 사용" 159 | ] 160 | }, 161 | { 162 | "cell_type": "code", 163 | "execution_count": 8, 164 | "metadata": {}, 165 | "outputs": [ 166 | { 167 | "name": "stdout", 168 | "output_type": "stream", 169 | "text": [ 170 | "Parameter containing:\n", 171 | "\n", 172 | "Columns 0 to 9 \n", 173 | "-0.1808 -0.0890 -0.1295 -0.1729 0.1483 0.0669 -0.1575 0.0365 -0.0309 0.0673\n", 174 | " 0.1917 0.0630 0.0973 -0.0790 -0.0861 -0.0211 0.1135 -0.1090 -0.1556 -0.1673\n", 175 | "\n", 176 | "Columns 10 to 19 \n", 177 | " 0.1796 -0.0346 0.0130 -0.1186 0.0753 -0.0825 -0.0724 -0.1404 0.0732 0.1111\n", 178 | "-0.0204 -0.0121 0.1603 -0.1584 -0.0810 0.1582 -0.0832 -0.1492 -0.1451 0.0097\n", 179 | "\n", 180 | "Columns 20 to 25 \n", 181 | " 0.1313 -0.0343 -0.1889 -0.1827 0.0981 0.0486\n", 182 | "-0.1885 -0.1633 0.0701 0.1635 -0.1131 0.1610\n", 183 | "[torch.FloatTensor of size 2x26]\n", 184 | "\n", 185 | "Parameter containing:\n", 186 | "1.00000e-02 *\n", 187 | " -9.1960\n", 188 | " -7.8866\n", 189 | "[torch.FloatTensor of size 2]\n", 190 | "\n" 191 | ] 192 | } 193 | ], 194 | "source": [ 195 | "model = BoWClassifier(NUM_LABELS, VOCAB_SIZE)\n", 196 | "\n", 197 | "for param in model.parameters():\n", 198 | " print(param) \n", 199 | " \n", 200 | " # Ax + b\n", 201 | " # nn.Linear가 가지고 있는 2x26 A\n", 202 | " # b\n", 203 | " " 204 | ] 205 | }, 206 | { 207 | "cell_type": "markdown", 208 | "metadata": {}, 209 | "source": [ 210 | "토치에서 모델로 넘겨주는 모든 변수는 autograd.Variable()로 wrapping해줘야 한다!!" 211 | ] 212 | }, 213 | { 214 | "cell_type": "code", 215 | "execution_count": 10, 216 | "metadata": {}, 217 | "outputs": [ 218 | { 219 | "name": "stdout", 220 | "output_type": "stream", 221 | "text": [ 222 | "Variable containing:\n", 223 | "-0.9966 -0.4607\n", 224 | "[torch.FloatTensor of size 1x2]\n", 225 | "\n" 226 | ] 227 | } 228 | ], 229 | "source": [ 230 | "# To run the model, pass in a BoW vector, but wrapped in an autograd.Variable\n", 231 | "sample = data[0]\n", 232 | "bow_vector = make_bow_vector(sample[0], word_to_ix)\n", 233 | "log_probs = model(autograd.Variable(bow_vector)) # 이렇게 넣어주면 forward 함수로 바로 맵핑\n", 234 | "print(log_probs)" 235 | ] 236 | }, 237 | { 238 | "cell_type": "code", 239 | "execution_count": 11, 240 | "metadata": { 241 | "collapsed": true 242 | }, 243 | "outputs": [], 244 | "source": [ 245 | "label_to_ix = { \"SPANISH\": 0, \"ENGLISH\": 1 }" 246 | ] 247 | }, 248 | { 249 | "cell_type": "code", 250 | "execution_count": 19, 251 | "metadata": { 252 | "collapsed": true 253 | }, 254 | "outputs": [], 255 | "source": [ 256 | "ix_to_label = {v:k for k,v in label_to_ix.items()}" 257 | ] 258 | }, 259 | { 260 | "cell_type": "markdown", 261 | "metadata": {}, 262 | "source": [ 263 | "### 4. 트레이닝!" 264 | ] 265 | }, 266 | { 267 | "cell_type": "markdown", 268 | "metadata": {}, 269 | "source": [ 270 | "트레이닝 전 파라미터 확인 (before & after 해보려고) " 271 | ] 272 | }, 273 | { 274 | "cell_type": "code", 275 | "execution_count": 12, 276 | "metadata": {}, 277 | "outputs": [ 278 | { 279 | "name": "stdout", 280 | "output_type": "stream", 281 | "text": [ 282 | "Variable containing:\n", 283 | "-0.6785 -0.7080\n", 284 | "[torch.FloatTensor of size 1x2]\n", 285 | "\n", 286 | "Variable containing:\n", 287 | "-0.8051 -0.5925\n", 288 | "[torch.FloatTensor of size 1x2]\n", 289 | "\n", 290 | "Variable containing:\n", 291 | " 0.1796\n", 292 | "-0.0204\n", 293 | "[torch.FloatTensor of size 2]\n", 294 | "\n" 295 | ] 296 | } 297 | ], 298 | "source": [ 299 | "# Run on test data before we train, just to see a before-and-after\n", 300 | "for instance, label in test_data:\n", 301 | " bow_vec = autograd.Variable(make_bow_vector(instance, word_to_ix))\n", 302 | " log_probs = model(bow_vec)\n", 303 | " print(log_probs)\n", 304 | "print(next(model.parameters())[:,word_to_ix[\"creo\"]]) # Print the matrix column corresponding to \"creo\"" 305 | ] 306 | }, 307 | { 308 | "cell_type": "code", 309 | "execution_count": 13, 310 | "metadata": { 311 | "collapsed": true 312 | }, 313 | "outputs": [], 314 | "source": [ 315 | "loss_function = nn.NLLLoss() # negative log likelihood 로스\n", 316 | "optimizer = optim.SGD(model.parameters(), lr=0.1) # 옵티마이저\n", 317 | "\n", 318 | "# Usually you want to pass over the training data several times.\n", 319 | "# 100 is much bigger than on a real data set, but real datasets have more than\n", 320 | "# two instances. Usually, somewhere between 5 and 30 epochs is reasonable.\n", 321 | "for epoch in range(100):\n", 322 | " for instance, label in data:\n", 323 | " # 1. Pytorch는 gradients를 누적하기 때문에 항상 초기화해줘야 함\n", 324 | " model.zero_grad()\n", 325 | " \n", 326 | " # 2. 문장을 벡터로 만들어 준 후 autograd.Variable로 wrapping하기\n", 327 | " # target 역시 autograd.Variable로 wrapping\n", 328 | " bow_vec = autograd.Variable(make_bow_vector(instance, word_to_ix))\n", 329 | " target = autograd.Variable(make_target(label, label_to_ix))\n", 330 | " \n", 331 | " # 3. forward path\n", 332 | " log_probs = model(bow_vec)\n", 333 | " \n", 334 | " # 4. loss 계산 후, loss로부터 backward(), 그리고 optimizer.step()\n", 335 | " loss = loss_function(log_probs, target)\n", 336 | " loss.backward()\n", 337 | " optimizer.step()" 338 | ] 339 | }, 340 | { 341 | "cell_type": "code", 342 | "execution_count": 33, 343 | "metadata": {}, 344 | "outputs": [ 345 | { 346 | "name": "stdout", 347 | "output_type": "stream", 348 | "text": [ 349 | "pred : SPANISH && label : SPANISH\n", 350 | "pred : ENGLISH && label : ENGLISH\n" 351 | ] 352 | } 353 | ], 354 | "source": [ 355 | "for instance, label in test_data:\n", 356 | " bow_vec = autograd.Variable(make_bow_vector(instance, word_to_ix))\n", 357 | " log_probs = model(bow_vec)\n", 358 | " values, indices = torch.max(log_probs,1)\n", 359 | " print('pred : ' ,ix_to_label[list(indices.data.numpy())[0][0]],'&& label : ', label)\n", 360 | " #print(ix_to_label[indice.numpy()[]])" 361 | ] 362 | }, 363 | { 364 | "cell_type": "code", 365 | "execution_count": null, 366 | "metadata": { 367 | "collapsed": true 368 | }, 369 | "outputs": [], 370 | "source": [] 371 | } 372 | ], 373 | "metadata": { 374 | "kernelspec": { 375 | "display_name": "Python 3", 376 | "language": "python", 377 | "name": "python3" 378 | }, 379 | "language_info": { 380 | "codemirror_mode": { 381 | "name": "ipython", 382 | "version": 3 383 | }, 384 | "file_extension": ".py", 385 | "mimetype": "text/x-python", 386 | "name": "python", 387 | "nbconvert_exporter": "python", 388 | "pygments_lexer": "ipython3", 389 | "version": "3.5.2" 390 | } 391 | }, 392 | "nbformat": 4, 393 | "nbformat_minor": 2 394 | } 395 | -------------------------------------------------------------------------------- /deepnlp/02_DL_FOR_NLP_NGRAM.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Word Embedding" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [ 15 | { 16 | "data": { 17 | "text/plain": [ 18 | "" 19 | ] 20 | }, 21 | "execution_count": 1, 22 | "metadata": {}, 23 | "output_type": "execute_result" 24 | } 25 | ], 26 | "source": [ 27 | "import torch\n", 28 | "import torch.autograd as autograd\n", 29 | "import torch.nn as nn\n", 30 | "import torch.nn.functional as F\n", 31 | "import torch.optim as optim\n", 32 | "\n", 33 | "torch.manual_seed(1)" 34 | ] 35 | }, 36 | { 37 | "cell_type": "markdown", 38 | "metadata": {}, 39 | "source": [ 40 | "### nn.Embedding : # of Vocab -> Dimension" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": 2, 46 | "metadata": {}, 47 | "outputs": [ 48 | { 49 | "name": "stdout", 50 | "output_type": "stream", 51 | "text": [ 52 | "Variable containing:\n", 53 | "-2.9718 1.7070 -0.4305 -2.2820 0.5237\n", 54 | "[torch.FloatTensor of size 1x5]\n", 55 | "\n" 56 | ] 57 | } 58 | ], 59 | "source": [ 60 | "word_to_ix = { \"hello\": 0, \"world\": 1 }\n", 61 | "embeds = nn.Embedding(2, 5) # 2 words in vocab, 5 dimensional embeddings\n", 62 | "lookup_tensor = torch.LongTensor([word_to_ix[\"hello\"]])\n", 63 | "hello_embed = embeds( autograd.Variable(lookup_tensor) )\n", 64 | "print(hello_embed)" 65 | ] 66 | }, 67 | { 68 | "cell_type": "markdown", 69 | "metadata": {}, 70 | "source": [ 71 | "# N-Gram Language Modeling" 72 | ] 73 | }, 74 | { 75 | "cell_type": "markdown", 76 | "metadata": {}, 77 | "source": [ 78 | "$$ P(w_i | w_{i-1}, w_{i-2}, \\dots, w_{i-n+1} ) $$" 79 | ] 80 | }, 81 | { 82 | "cell_type": "markdown", 83 | "metadata": {}, 84 | "source": [ 85 | "### 1. 데이터 준비" 86 | ] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "execution_count": 158, 91 | "metadata": {}, 92 | "outputs": [ 93 | { 94 | "name": "stdout", 95 | "output_type": "stream", 96 | "text": [ 97 | "[(['When', 'forty'], 'winters'), (['forty', 'winters'], 'shall'), (['winters', 'shall'], 'besiege')]\n" 98 | ] 99 | } 100 | ], 101 | "source": [ 102 | "CONTEXT_SIZE = 2\n", 103 | "EMBEDDING_DIM = 10\n", 104 | "# We will use Shakespeare Sonnet 2\n", 105 | "test_sentence = \"\"\"When forty winters shall besiege thy brow,\n", 106 | "And dig deep trenches in thy beauty's field,\n", 107 | "Thy youth's proud livery so gazed on now,\n", 108 | "Will be a totter'd weed of small worth held:\n", 109 | "Then being asked, where all thy beauty lies,\n", 110 | "Where all the treasure of thy lusty days;\n", 111 | "To say, within thine own deep sunken eyes,\n", 112 | "Were an all-eating shame, and thriftless praise.\n", 113 | "How much more praise deserv'd thy beauty's use,\n", 114 | "If thou couldst answer 'This fair child of mine\n", 115 | "Shall sum my count, and make my old excuse,'\n", 116 | "Proving his beauty by succession thine!\n", 117 | "This were to be new made when thou art old,\n", 118 | "And see thy blood warm when thou feel'st it cold.\"\"\".split()\n", 119 | "# we should tokenize the input, but we will ignore that for now\n", 120 | "# build a list of tuples. Each tuple is ([ word_i-2, word_i-1 ], target word)\n", 121 | "trigrams = [ ([test_sentence[i], test_sentence[i+1]], test_sentence[i+2]) for i in range(len(test_sentence) - 2) ]\n", 122 | "print(trigrams[:3]) # print the first 3, just so you can see what they look like" 123 | ] 124 | }, 125 | { 126 | "cell_type": "markdown", 127 | "metadata": {}, 128 | "source": [ 129 | "trigram 즉, 이전 2 단어가 주어지면 그 다음 단어를 예측하는 모델" 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": 177, 135 | "metadata": { 136 | "collapsed": true 137 | }, 138 | "outputs": [], 139 | "source": [ 140 | "vocab = set(test_sentence)\n", 141 | "word_to_ix = { word: i for i, word in enumerate(vocab) }\n", 142 | "ix_to_word = {v:k for k,v in word_to_ix.items()}" 143 | ] 144 | }, 145 | { 146 | "cell_type": "markdown", 147 | "metadata": {}, 148 | "source": [ 149 | "### 2. 모델링 " 150 | ] 151 | }, 152 | { 153 | "cell_type": "code", 154 | "execution_count": 160, 155 | "metadata": { 156 | "collapsed": true 157 | }, 158 | "outputs": [], 159 | "source": [ 160 | "class NGramLanguageModeler(nn.Module):\n", 161 | " \n", 162 | " # 역시나 부모 클래스 초기화 후,\n", 163 | " # 모델의 모듈을 차곡차곡 선언 후\n", 164 | " def __init__(self, vocab_size, embedding_dim, context_size):\n", 165 | " super(NGramLanguageModeler, self).__init__()\n", 166 | " self.embeddings = nn.Embedding(vocab_size, embedding_dim)\n", 167 | " self.linear1 = nn.Linear(context_size * embedding_dim, 128)\n", 168 | " self.linear2 = nn.Linear(128, vocab_size)\n", 169 | " \n", 170 | " # forward 함수에서 이어준다\n", 171 | " def forward(self, inputs):\n", 172 | " embeds = self.embeddings(inputs).view((1, -1))\n", 173 | " out = F.relu(self.linear1(embeds))\n", 174 | " out = self.linear2(out)\n", 175 | " log_probs = F.log_softmax(out)\n", 176 | " return log_probs" 177 | ] 178 | }, 179 | { 180 | "cell_type": "markdown", 181 | "metadata": {}, 182 | "source": [ 183 | "### 3. 트레이닝" 184 | ] 185 | }, 186 | { 187 | "cell_type": "code", 188 | "execution_count": 6, 189 | "metadata": {}, 190 | "outputs": [ 191 | { 192 | "data": { 193 | "text/plain": [ 194 | "(['When', 'forty'], 'winters')" 195 | ] 196 | }, 197 | "execution_count": 6, 198 | "metadata": {}, 199 | "output_type": "execute_result" 200 | } 201 | ], 202 | "source": [ 203 | "trigrams[0]" 204 | ] 205 | }, 206 | { 207 | "cell_type": "markdown", 208 | "metadata": {}, 209 | "source": [ 210 | "When forty 다음에 올 단어로 winters" 211 | ] 212 | }, 213 | { 214 | "cell_type": "code", 215 | "execution_count": 166, 216 | "metadata": {}, 217 | "outputs": [ 218 | { 219 | "name": "stdout", 220 | "output_type": "stream", 221 | "text": [ 222 | "0\n", 223 | "100\n", 224 | "200\n", 225 | "300\n", 226 | "400\n", 227 | "500\n", 228 | "600\n", 229 | "700\n", 230 | "800\n", 231 | "900\n", 232 | "\n", 233 | " 520.9233\n", 234 | "[torch.FloatTensor of size 1]\n", 235 | " \n", 236 | " 5.3252\n", 237 | "[torch.FloatTensor of size 1]\n", 238 | "\n" 239 | ] 240 | } 241 | ], 242 | "source": [ 243 | "losses = []\n", 244 | "loss_function = nn.NLLLoss() # Negative Log Likelihood\n", 245 | "model = NGramLanguageModeler(len(vocab), EMBEDDING_DIM, CONTEXT_SIZE)\n", 246 | "optimizer = optim.SGD(model.parameters(), lr=0.001)\n", 247 | "\n", 248 | "for epoch in range(1000):\n", 249 | " total_loss = torch.Tensor([0])\n", 250 | " \n", 251 | " if epoch % 100 ==0: print(epoch)\n", 252 | " \n", 253 | " for context, target in trigrams:\n", 254 | " \n", 255 | " # 컨텍스트 워드들을 인덱스로 변환해서 인티저텐서(LongTensor)로 만든 후\n", 256 | " # autograd.Variable로 래핑\n", 257 | " context_idxs = list(map(lambda w: word_to_ix[w], context))\n", 258 | " context_var = autograd.Variable( torch.LongTensor(context_idxs) )\n", 259 | " \n", 260 | " # Torch는 gradient를 누적하기 떄문에 항상 초기화를 해줘야 함\n", 261 | " model.zero_grad()\n", 262 | " \n", 263 | " # forward path\n", 264 | " log_probs = model(context_var)\n", 265 | " \n", 266 | " # 예측값과 레이블값의 loss 계산\n", 267 | " # logits, labels 순서로 넣어준다\n", 268 | "\n", 269 | " loss = loss_function(log_probs, autograd.Variable(torch.LongTensor([word_to_ix[target]])))\n", 270 | " \n", 271 | " # Step 5. Do the backward pass and update the gradient\n", 272 | " loss.backward()\n", 273 | " optimizer.step()\n", 274 | " \n", 275 | " total_loss += loss.data\n", 276 | " losses.append(total_loss)\n", 277 | "print(losses[0],losses[-1]) # The loss decreased every iteration over the training data!\n" 278 | ] 279 | }, 280 | { 281 | "cell_type": "markdown", 282 | "metadata": {}, 283 | "source": [ 284 | "로스 줄어든다~" 285 | ] 286 | }, 287 | { 288 | "cell_type": "markdown", 289 | "metadata": {}, 290 | "source": [ 291 | "### 4. 테스트" 292 | ] 293 | }, 294 | { 295 | "cell_type": "code", 296 | "execution_count": 167, 297 | "metadata": { 298 | "collapsed": true 299 | }, 300 | "outputs": [], 301 | "source": [ 302 | "import random" 303 | ] 304 | }, 305 | { 306 | "cell_type": "code", 307 | "execution_count": 168, 308 | "metadata": { 309 | "collapsed": true 310 | }, 311 | "outputs": [], 312 | "source": [ 313 | "test = random.choice(trigrams)\n", 314 | "test_context = list(map(lambda x:word_to_ix[x], test[0]))\n", 315 | "test_input = autograd.Variable(torch.LongTensor(test_context))\n", 316 | "hypothesis = model(test_input)\n", 317 | "v,i = torch.max(hypothesis,1) # argmax " 318 | ] 319 | }, 320 | { 321 | "cell_type": "code", 322 | "execution_count": 169, 323 | "metadata": {}, 324 | "outputs": [ 325 | { 326 | "name": "stdout", 327 | "output_type": "stream", 328 | "text": [ 329 | "맥란 단어 : beauty by\n", 330 | "예측 단어 : succession\n", 331 | "실제 단어 : succession\n" 332 | ] 333 | } 334 | ], 335 | "source": [ 336 | "pred_ix = i.data.numpy()[0][0]\n", 337 | "print('맥란 단어 : ', *test[0]) # * 붙이면 unpack 된다 \n", 338 | "print('예측 단어 : ',ix_to_word[pred_ix])\n", 339 | "print('실제 단어 : ',test[1])" 340 | ] 341 | }, 342 | { 343 | "cell_type": "markdown", 344 | "metadata": { 345 | "collapsed": true 346 | }, 347 | "source": [ 348 | "# Continuous Bag-of-Words (CBOW)" 349 | ] 350 | }, 351 | { 352 | "cell_type": "markdown", 353 | "metadata": {}, 354 | "source": [ 355 | "The CBOW model is as follows. Given a target word $w_i$ and an $N$ context window on each side, $w_{i-1}, \\dots, w_{i-N}$ and $w_{i+1}, \\dots, w_{i+N}$, referring to all context words collectively as $C$, CBOW tries to minimize $$ -\\log p(w_i | C) = \\log \\text{Softmax}(A(\\sum_{w \\in C} q_w) + b) $$ where $q_w$ is the embedding for word $w$.\n" 356 | ] 357 | }, 358 | { 359 | "cell_type": "markdown", 360 | "metadata": {}, 361 | "source": [ 362 | "양 옆에 2개씩 총 4개의 단어들 C가 주어졌을 때, 현재 단어 $w_i$ 를 예측하는 모델" 363 | ] 364 | }, 365 | { 366 | "cell_type": "markdown", 367 | "metadata": {}, 368 | "source": [ 369 | "### 1. 데이터 준비 " 370 | ] 371 | }, 372 | { 373 | "cell_type": "code", 374 | "execution_count": 181, 375 | "metadata": {}, 376 | "outputs": [ 377 | { 378 | "name": "stdout", 379 | "output_type": "stream", 380 | "text": [ 381 | "[(['We', 'are', 'to', 'study'], 'about'), (['are', 'about', 'study', 'the'], 'to'), (['about', 'to', 'the', 'idea'], 'study'), (['to', 'study', 'idea', 'of'], 'the'), (['study', 'the', 'of', 'a'], 'idea')]\n" 382 | ] 383 | } 384 | ], 385 | "source": [ 386 | "CONTEXT_SIZE = 2 # 2 words to the left, 2 to the right\n", 387 | "raw_text = \"\"\"We are about to study the idea of a computational process. Computational processes are abstract\n", 388 | "beings that inhabit computers. As they evolve, processes manipulate other abstract\n", 389 | "things called data. The evolution of a process is directed by a pattern of rules\n", 390 | "called a program. People create programs to direct processes. In effect,\n", 391 | "we conjure the spirits of the computer with our spells.\"\"\".split()\n", 392 | "word_to_ix = { word: i for i, word in enumerate(set(raw_text)) }\n", 393 | "ix_to_word = {v:k for k,v in word_to_ix.items()}\n", 394 | "data = []\n", 395 | "vocab = set(raw_text)\n", 396 | "for i in range(2, len(raw_text) - 2):\n", 397 | " context = [ raw_text[i-2], raw_text[i-1], raw_text[i+1], raw_text[i+2] ]\n", 398 | " target = raw_text[i]\n", 399 | " data.append( (context, target) )\n", 400 | "print(data[:5])" 401 | ] 402 | }, 403 | { 404 | "cell_type": "markdown", 405 | "metadata": {}, 406 | "source": [ 407 | "### 2. 모델링 " 408 | ] 409 | }, 410 | { 411 | "cell_type": "code", 412 | "execution_count": 182, 413 | "metadata": {}, 414 | "outputs": [ 415 | { 416 | "data": { 417 | "text/plain": [ 418 | "Variable containing:\n", 419 | " 4\n", 420 | " 48\n", 421 | " 23\n", 422 | " 29\n", 423 | "[torch.LongTensor of size 4]" 424 | ] 425 | }, 426 | "execution_count": 182, 427 | "metadata": {}, 428 | "output_type": "execute_result" 429 | } 430 | ], 431 | "source": [ 432 | "# create your model and train. here are some functions to help you make the data ready for use by your module\n", 433 | "def make_context_vector(context, word_to_ix):\n", 434 | " idxs = list(map(lambda w: word_to_ix[w], context))\n", 435 | " tensor = torch.LongTensor(idxs)\n", 436 | " return autograd.Variable(tensor)\n", 437 | "\n", 438 | "make_context_vector(data[0][0], word_to_ix) # example" 439 | ] 440 | }, 441 | { 442 | "cell_type": "code", 443 | "execution_count": 183, 444 | "metadata": { 445 | "collapsed": true 446 | }, 447 | "outputs": [], 448 | "source": [ 449 | "class CBOW(nn.Module):\n", 450 | " \n", 451 | " def __init__(self, vocab_size,projection_dim):\n", 452 | " super(CBOW,self).__init__()\n", 453 | " self.embeddings = nn.Embedding(vocab_size, projection_dim)\n", 454 | " self.projection = nn.Linear(projection_dim, vocab_size)\n", 455 | "\n", 456 | " def forward(self, inputs):\n", 457 | " embeds = self.embeddings(inputs)\n", 458 | " sum_embeds = torch.sum(embeds,0) # row 기준으로 sum 혹은 average?\n", 459 | " out = self.projection(sum_embeds)\n", 460 | " probs = F.log_softmax(out)\n", 461 | " return probs\n", 462 | " \n", 463 | " def prediction(self, inputs):\n", 464 | " embeds = self.embeddings(inputs)\n", 465 | " \n", 466 | " return embeds" 467 | ] 468 | }, 469 | { 470 | "cell_type": "markdown", 471 | "metadata": {}, 472 | "source": [ 473 | "### 3. 트레이닝 " 474 | ] 475 | }, 476 | { 477 | "cell_type": "code", 478 | "execution_count": 184, 479 | "metadata": { 480 | "collapsed": true 481 | }, 482 | "outputs": [], 483 | "source": [ 484 | "PROJECTION = 10" 485 | ] 486 | }, 487 | { 488 | "cell_type": "code", 489 | "execution_count": 187, 490 | "metadata": {}, 491 | "outputs": [ 492 | { 493 | "name": "stdout", 494 | "output_type": "stream", 495 | "text": [ 496 | "0\n", 497 | "100\n", 498 | "200\n", 499 | "300\n", 500 | "400\n", 501 | "500\n", 502 | "600\n", 503 | "700\n", 504 | "800\n", 505 | "900\n", 506 | "\n", 507 | " 265.6400\n", 508 | "[torch.FloatTensor of size 1]\n", 509 | " \n", 510 | " 6.2804\n", 511 | "[torch.FloatTensor of size 1]\n", 512 | "\n" 513 | ] 514 | } 515 | ], 516 | "source": [ 517 | "losses = []\n", 518 | "loss_function = nn.NLLLoss() # Negative Log Likelihood\n", 519 | "model = CBOW(len(vocab),PROJECTION)\n", 520 | "optimizer = optim.SGD(model.parameters(), lr=0.001)\n", 521 | "\n", 522 | "for epoch in range(1000):\n", 523 | " total_loss = torch.Tensor([0])\n", 524 | " \n", 525 | " if epoch % 100 ==0: print(epoch)\n", 526 | "\n", 527 | " for context, target in data:\n", 528 | " \n", 529 | " model.zero_grad()\n", 530 | " \n", 531 | " inputs = make_context_vector(context,word_to_ix)\n", 532 | " pred = model(inputs)\n", 533 | " loss = loss_function(pred,autograd.Variable(torch.LongTensor([word_to_ix[target]])))\n", 534 | " \n", 535 | " \n", 536 | " loss.backward()\n", 537 | " optimizer.step()\n", 538 | " \n", 539 | " total_loss += loss.data\n", 540 | " losses.append(total_loss)\n", 541 | "print(losses[0],losses[-1]) " 542 | ] 543 | }, 544 | { 545 | "cell_type": "markdown", 546 | "metadata": {}, 547 | "source": [ 548 | "### 4. 테스트 " 549 | ] 550 | }, 551 | { 552 | "cell_type": "code", 553 | "execution_count": 197, 554 | "metadata": { 555 | "collapsed": true 556 | }, 557 | "outputs": [], 558 | "source": [ 559 | "from scipy.spatial.distance import cosine" 560 | ] 561 | }, 562 | { 563 | "cell_type": "code", 564 | "execution_count": 192, 565 | "metadata": { 566 | "collapsed": true 567 | }, 568 | "outputs": [], 569 | "source": [ 570 | "def transform(word,dic):\n", 571 | " \n", 572 | " return autograd.Variable(torch.LongTensor([dic[word]]))" 573 | ] 574 | }, 575 | { 576 | "cell_type": "code", 577 | "execution_count": 264, 578 | "metadata": { 579 | "collapsed": true 580 | }, 581 | "outputs": [], 582 | "source": [ 583 | "def word_analogy(target,vocabs):\n", 584 | " target_idx = word_to_ix[target]\n", 585 | " target_V = model.prediction(transform(target,word_to_ix)).data.numpy()\n", 586 | " nearest_idx = -1\n", 587 | " minimum = 100\n", 588 | " \n", 589 | " for i in range(len(vocabs)):\n", 590 | " if i == target_idx: continue\n", 591 | " \n", 592 | " vector = model.prediction(transform(list(vocabs)[i],word_to_ix)).data.numpy()\n", 593 | " \n", 594 | " temp = cosine(target_V,vector)\n", 595 | " \n", 596 | " if temp < minimum:\n", 597 | " nearest_idx = i\n", 598 | " minimum = temp\n", 599 | " \n", 600 | " return ix_to_word[nearest_idx], minimum" 601 | ] 602 | }, 603 | { 604 | "cell_type": "code", 605 | "execution_count": 269, 606 | "metadata": {}, 607 | "outputs": [ 608 | { 609 | "data": { 610 | "text/plain": [ 611 | "'rules'" 612 | ] 613 | }, 614 | "execution_count": 269, 615 | "metadata": {}, 616 | "output_type": "execute_result" 617 | } 618 | ], 619 | "source": [ 620 | "test = random.choice(list(vocab))\n", 621 | "test" 622 | ] 623 | }, 624 | { 625 | "cell_type": "code", 626 | "execution_count": 270, 627 | "metadata": {}, 628 | "outputs": [ 629 | { 630 | "data": { 631 | "text/plain": [ 632 | "('idea', 0.36502336690142312)" 633 | ] 634 | }, 635 | "execution_count": 270, 636 | "metadata": {}, 637 | "output_type": "execute_result" 638 | } 639 | ], 640 | "source": [ 641 | "word_analogy(test,vocab)" 642 | ] 643 | }, 644 | { 645 | "cell_type": "markdown", 646 | "metadata": {}, 647 | "source": [ 648 | "잘 된건가? 젠장,,,," 649 | ] 650 | }, 651 | { 652 | "cell_type": "code", 653 | "execution_count": null, 654 | "metadata": { 655 | "collapsed": true 656 | }, 657 | "outputs": [], 658 | "source": [] 659 | } 660 | ], 661 | "metadata": { 662 | "kernelspec": { 663 | "display_name": "Python 3", 664 | "language": "python", 665 | "name": "python3" 666 | }, 667 | "language_info": { 668 | "codemirror_mode": { 669 | "name": "ipython", 670 | "version": 3 671 | }, 672 | "file_extension": ".py", 673 | "mimetype": "text/x-python", 674 | "name": "python", 675 | "nbconvert_exporter": "python", 676 | "pygments_lexer": "ipython3", 677 | "version": "3.5.2" 678 | } 679 | }, 680 | "nbformat": 4, 681 | "nbformat_minor": 2 682 | } 683 | -------------------------------------------------------------------------------- /deepnlp/05_LSTM_Batch.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "data": { 10 | "text/plain": [ 11 | "" 12 | ] 13 | }, 14 | "execution_count": 1, 15 | "metadata": {}, 16 | "output_type": "execute_result" 17 | } 18 | ], 19 | "source": [ 20 | "import torch\n", 21 | "from torch.autograd import Variable\n", 22 | "import torch.nn as nn\n", 23 | "import torch.nn.functional as F\n", 24 | "import torch.optim as optim\n", 25 | "import json\n", 26 | "import pickle\n", 27 | "import random\n", 28 | "from collections import Counter\n", 29 | "from torch.nn.utils.rnn import pad_packed_sequence, pack_padded_sequence\n", 30 | "\n", 31 | "torch.manual_seed(1)" 32 | ] 33 | }, 34 | { 35 | "cell_type": "markdown", 36 | "metadata": {}, 37 | "source": [ 38 | "# 데이터 " 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 2, 44 | "metadata": { 45 | "collapsed": true 46 | }, 47 | "outputs": [], 48 | "source": [ 49 | "train = json.load(open('../../dataset/NER/NER_16000_train.json'))\n", 50 | "\n", 51 | "training_data=[]\n", 52 | "\n", 53 | "for sent in train:\n", 54 | " word=[]\n", 55 | " tag=[]\n", 56 | " for w,p,t in sent:\n", 57 | " word.append(w)\n", 58 | " tag.append(t)\n", 59 | " training_data.append((word,tag))" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": 3, 65 | "metadata": { 66 | "collapsed": true 67 | }, 68 | "outputs": [], 69 | "source": [ 70 | "training_data = [t for t in training_data if len(t[0])!=0]" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": 4, 76 | "metadata": {}, 77 | "outputs": [ 78 | { 79 | "data": { 80 | "text/plain": [ 81 | "11196" 82 | ] 83 | }, 84 | "execution_count": 4, 85 | "metadata": {}, 86 | "output_type": "execute_result" 87 | } 88 | ], 89 | "source": [ 90 | "len(training_data)" 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": 5, 96 | "metadata": { 97 | "collapsed": true 98 | }, 99 | "outputs": [], 100 | "source": [ 101 | "def prepare_sequence(seq, to_ix):\n", 102 | " idxs = list(map(lambda w: to_ix[w], seq))\n", 103 | " tensor = torch.LongTensor(idxs)\n", 104 | " return Variable(tensor)" 105 | ] 106 | }, 107 | { 108 | "cell_type": "code", 109 | "execution_count": 6, 110 | "metadata": { 111 | "collapsed": true 112 | }, 113 | "outputs": [], 114 | "source": [ 115 | "PAD = \"\"" 116 | ] 117 | }, 118 | { 119 | "cell_type": "markdown", 120 | "metadata": {}, 121 | "source": [ 122 | "### 시퀀스 길이 분포 파악 " 123 | ] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "execution_count": 7, 128 | "metadata": { 129 | "collapsed": true 130 | }, 131 | "outputs": [], 132 | "source": [ 133 | "Length = [len(t) for t,l in training_data]\n", 134 | "distribution = Counter(Length)" 135 | ] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "execution_count": 8, 140 | "metadata": { 141 | "collapsed": true 142 | }, 143 | "outputs": [], 144 | "source": [ 145 | "bucket_config = [(5,5),(10,10),(20,20),(30,30)]" 146 | ] 147 | }, 148 | { 149 | "cell_type": "markdown", 150 | "metadata": {}, 151 | "source": [ 152 | "### 버킷에 나눠 담으면서 동시에 <패딩까지> 나중에는 동적으로 패딩하기 " 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": 9, 158 | "metadata": { 159 | "collapsed": true 160 | }, 161 | "outputs": [], 162 | "source": [ 163 | "bucket = [[],[],[],[]]" 164 | ] 165 | }, 166 | { 167 | "cell_type": "code", 168 | "execution_count": 10, 169 | "metadata": { 170 | "collapsed": true 171 | }, 172 | "outputs": [], 173 | "source": [ 174 | "for tr,label in training_data:\n", 175 | " length = len(tr)\n", 176 | " \n", 177 | " for i in range(len(bucket_config)):\n", 178 | " if bucket_config[i][0] >= length:\n", 179 | " \n", 180 | " while len(tr) < bucket_config[i][0]:\n", 181 | " tr.append(PAD)\n", 182 | " label.append(\"O\")\n", 183 | " bucket[i].append((tr,label))\n", 184 | " break" 185 | ] 186 | }, 187 | { 188 | "cell_type": "code", 189 | "execution_count": 11, 190 | "metadata": {}, 191 | "outputs": [ 192 | { 193 | "name": "stdout", 194 | "output_type": "stream", 195 | "text": [ 196 | "3184\n", 197 | "2824\n", 198 | "2568\n", 199 | "998\n" 200 | ] 201 | } 202 | ], 203 | "source": [ 204 | "for b in bucket:\n", 205 | " print(len(b))" 206 | ] 207 | }, 208 | { 209 | "cell_type": "code", 210 | "execution_count": 12, 211 | "metadata": { 212 | "collapsed": true 213 | }, 214 | "outputs": [], 215 | "source": [ 216 | "def getBatch(bucket,bucket_id,batch_size):\n", 217 | " random.shuffle(bucket[bucket_id])\n", 218 | " train_x=[]\n", 219 | " train_y=[]\n", 220 | " lengths=[]\n", 221 | " for tr,label in bucket[bucket_id][:batch_size]:\n", 222 | " temp = prepare_sequence(tr, word_to_ix)\n", 223 | " temp = temp.view(1,-1)\n", 224 | " train_x.append(temp)\n", 225 | " \n", 226 | " temp2 = prepare_sequence(label,tag_to_ix)\n", 227 | " temp2 = temp2.view(1,-1)\n", 228 | " train_y.append(temp2)\n", 229 | " \n", 230 | " length = [t for t in tr if t !='']\n", 231 | " lengths.append(len(length))\n", 232 | " inputs = torch.cat(train_x)\n", 233 | " targets = torch.cat(train_y)\n", 234 | " \n", 235 | " ### PAD 제외하고 로스 계산 ###\n", 236 | " t_out=[]\n", 237 | " for i in range(len(lengths)):\n", 238 | " t_out.append(targets[i][:lengths[i]])\n", 239 | " \n", 240 | " r_targets = torch.cat(t_out)\n", 241 | " \n", 242 | " del train_x\n", 243 | " del train_y\n", 244 | " del t_out\n", 245 | "\n", 246 | " \n", 247 | " return inputs,r_targets, lengths" 248 | ] 249 | }, 250 | { 251 | "cell_type": "markdown", 252 | "metadata": {}, 253 | "source": [ 254 | "### word2index, tag2index 딕 준비" 255 | ] 256 | }, 257 | { 258 | "cell_type": "code", 259 | "execution_count": 13, 260 | "metadata": { 261 | "collapsed": true 262 | }, 263 | "outputs": [], 264 | "source": [ 265 | "NER_LIST = ['B-PER','I-PER', 'B-LOC', 'I-LOC', 'B-ORG', 'I-ORG','B-DATE', 'I-DATE','B-TIME','I-TIME','B-MISC','I-MISC','O']\n", 266 | "\n", 267 | "word_to_ix = {}\n", 268 | "for sentence, tags in training_data:\n", 269 | " for word in sentence:\n", 270 | " if word not in word_to_ix:\n", 271 | " word_to_ix[word] = len(word_to_ix)\n", 272 | "\n", 273 | "ix_to_word = {v:k for k,v in word_to_ix.items()}\n", 274 | "\n", 275 | "tag_to_ix={}\n", 276 | "i=0\n", 277 | "for tag in NER_LIST: \n", 278 | " tag_to_ix[tag] = i\n", 279 | " i+=1\n", 280 | "\n", 281 | "ix_to_tag = {v:k for k,v in tag_to_ix.items()}" 282 | ] 283 | }, 284 | { 285 | "cell_type": "markdown", 286 | "metadata": {}, 287 | "source": [ 288 | "### Sanity Check" 289 | ] 290 | }, 291 | { 292 | "cell_type": "markdown", 293 | "metadata": {}, 294 | "source": [ 295 | "일단 가장 쉬운 길이 10개짜리로 고정해 놓고 배치
\n", 296 | "로스 계산 시에도 패딩까지 계산한다... (나중에 실제 길이 알려줘서 그것만 loss 계산하는 법 고민)" 297 | ] 298 | }, 299 | { 300 | "cell_type": "code", 301 | "execution_count": 14, 302 | "metadata": { 303 | "collapsed": true 304 | }, 305 | "outputs": [], 306 | "source": [ 307 | "import random\n", 308 | "\n", 309 | "#bucket_id = random.choice(range(len(bucket_config)))\n", 310 | "bucket_id = 1" 311 | ] 312 | }, 313 | { 314 | "cell_type": "code", 315 | "execution_count": 15, 316 | "metadata": { 317 | "collapsed": true 318 | }, 319 | "outputs": [], 320 | "source": [ 321 | "train_x=[]\n", 322 | "train_y=[]\n", 323 | "for tr,label in bucket[bucket_id]:\n", 324 | " temp = prepare_sequence(tr, word_to_ix)\n", 325 | " temp = temp.view(1,-1)\n", 326 | " train_x.append(temp)\n", 327 | " \n", 328 | " temp2 = prepare_sequence(label,tag_to_ix)\n", 329 | " temp2 = temp2.view(1,-1)\n", 330 | " train_y.append(temp2)" 331 | ] 332 | }, 333 | { 334 | "cell_type": "code", 335 | "execution_count": 16, 336 | "metadata": { 337 | "collapsed": true 338 | }, 339 | "outputs": [], 340 | "source": [ 341 | "INPUT_SIZE = bucket_config[bucket_id][0]\n", 342 | "EMBEDDING_DIM = 100\n", 343 | "HIDDEN_DIM = 100\n", 344 | "BATCH_SIZE= 64\n", 345 | "NUM_LAYERS = 3" 346 | ] 347 | }, 348 | { 349 | "cell_type": "code", 350 | "execution_count": 33, 351 | "metadata": { 352 | "collapsed": true 353 | }, 354 | "outputs": [], 355 | "source": [ 356 | "class RNN(nn.Module):\n", 357 | " def __init__(self,hidden_size, num_layers, num_classes,vocab_size,embedding_dim):\n", 358 | " super(RNN, self).__init__()\n", 359 | " self.hidden_size = hidden_size\n", 360 | " self.num_layers = num_layers\n", 361 | " self.word_embeddings = nn.Embedding(vocab_size, embedding_dim)\n", 362 | " self.lstm = nn.LSTM(embedding_dim, hidden_size, num_layers, batch_first=True)\n", 363 | " self.fc = nn.Linear(hidden_size, num_classes)\n", 364 | "\n", 365 | " \n", 366 | " def forward(self, x,length):\n", 367 | " # Set initial states \n", 368 | " h0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size)) \n", 369 | " c0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size))\n", 370 | " \n", 371 | " embeds = self.word_embeddings(x)\n", 372 | " # Forward propagate RNN\n", 373 | " out, _ = self.lstm(embeds, (h0, c0)) \n", 374 | " \n", 375 | " # batch_size, input_length, hidden_size\n", 376 | " \n", 377 | "\n", 378 | " ### PAD 제외하고 로스 계산 ### \n", 379 | " t_out=[]\n", 380 | " for i in range(len(length)): # len(length) = batch_size\n", 381 | " t_out.append(out[i][:length[i]]) # 실제 길이만 담기\n", 382 | " \n", 383 | " outwithoutpad = torch.cat(t_out) # row-wise concat\n", 384 | " del t_out\n", 385 | " \n", 386 | " tag_space = self.fc(outwithoutpad) \n", 387 | " tag_scores = F.log_softmax(tag_space)\n", 388 | " \n", 389 | " \n", 390 | " return tag_scores" 391 | ] 392 | }, 393 | { 394 | "cell_type": "code", 395 | "execution_count": 34, 396 | "metadata": { 397 | "collapsed": true 398 | }, 399 | "outputs": [], 400 | "source": [ 401 | "model = RNN(HIDDEN_DIM, NUM_LAYERS,len(tag_to_ix),len(word_to_ix),EMBEDDING_DIM)\n", 402 | "loss_function = nn.CrossEntropyLoss()\n", 403 | "optimizer = optim.Adam(model.parameters(), lr=0.001)" 404 | ] 405 | }, 406 | { 407 | "cell_type": "code", 408 | "execution_count": 35, 409 | "metadata": { 410 | "collapsed": true 411 | }, 412 | "outputs": [], 413 | "source": [ 414 | "x,y,l=getBatch(bucket,1,BATCH_SIZE)" 415 | ] 416 | }, 417 | { 418 | "cell_type": "code", 419 | "execution_count": 36, 420 | "metadata": { 421 | "collapsed": true 422 | }, 423 | "outputs": [], 424 | "source": [ 425 | "o = model(x,l)" 426 | ] 427 | }, 428 | { 429 | "cell_type": "markdown", 430 | "metadata": {}, 431 | "source": [ 432 | "### 버킷이랑 같이 쓰는 모델 " 433 | ] 434 | }, 435 | { 436 | "cell_type": "code", 437 | "execution_count": 22, 438 | "metadata": { 439 | "collapsed": true 440 | }, 441 | "outputs": [], 442 | "source": [ 443 | "class BUCKETRNN(nn.Module):\n", 444 | " \n", 445 | " def __init__(self,bucket_config,hidden_size, num_layers, num_classes,vocab_size,embedding_dim):\n", 446 | " self.models={}\n", 447 | " self.optims={}\n", 448 | " self.bucket_config=bucket_config\n", 449 | " for i in range(len(self.bucket_config)):\n", 450 | " self.models[i] = RNN(hidden_size, num_layers, num_classes,vocab_size,embedding_dim)\n", 451 | " self.optims[i] = optim.Adam(self.models[i].parameters(), lr=0.001)\n", 452 | " \n", 453 | " \n", 454 | " def select_bucket(self):\n", 455 | " bucket_id = random.choice(range(len(bucket_config)))\n", 456 | " \n", 457 | " return bucket_id\n", 458 | " \n", 459 | " " 460 | ] 461 | }, 462 | { 463 | "cell_type": "code", 464 | "execution_count": 23, 465 | "metadata": { 466 | "collapsed": true 467 | }, 468 | "outputs": [], 469 | "source": [ 470 | "bucket_model = BUCKETRNN(bucket_config,HIDDEN_DIM, NUM_LAYERS,len(tag_to_ix),len(word_to_ix),EMBEDDING_DIM)\n", 471 | "loss_function = nn.CrossEntropyLoss()" 472 | ] 473 | }, 474 | { 475 | "cell_type": "code", 476 | "execution_count": 24, 477 | "metadata": { 478 | "collapsed": true 479 | }, 480 | "outputs": [], 481 | "source": [ 482 | "losses=[]" 483 | ] 484 | }, 485 | { 486 | "cell_type": "code", 487 | "execution_count": 25, 488 | "metadata": {}, 489 | "outputs": [ 490 | { 491 | "name": "stdout", 492 | "output_type": "stream", 493 | "text": [ 494 | "[0] loss : 2.5338616371154785 , bucket : 0\n", 495 | "[100] loss : 1.021700382232666 , bucket : 3\n", 496 | "[200] loss : 0.46864187717437744 , bucket : 0\n", 497 | "[300] loss : 0.5949462652206421 , bucket : 1\n", 498 | "[400] loss : 0.9982641339302063 , bucket : 2\n", 499 | "[500] loss : 0.8244224786758423 , bucket : 2\n", 500 | "[600] loss : 0.6691949367523193 , bucket : 3\n", 501 | "[700] loss : 0.5334180593490601 , bucket : 1\n", 502 | "[800] loss : 0.3589295446872711 , bucket : 0\n", 503 | "[900] loss : 0.6886817216873169 , bucket : 3\n", 504 | "[1000] loss : 0.41534432768821716 , bucket : 2\n", 505 | "[1100] loss : 0.49127447605133057 , bucket : 3\n", 506 | "[1200] loss : 0.3872307240962982 , bucket : 2\n", 507 | "[1300] loss : 0.47533732652664185 , bucket : 2\n", 508 | "[1400] loss : 0.4393002986907959 , bucket : 3\n", 509 | "[1500] loss : 0.3600185215473175 , bucket : 3\n", 510 | "[1600] loss : 0.4524793326854706 , bucket : 2\n", 511 | "[1700] loss : 0.09196716547012329 , bucket : 0\n", 512 | "[1800] loss : 0.13422846794128418 , bucket : 0\n", 513 | "[1900] loss : 0.3615540564060211 , bucket : 2\n", 514 | "[2000] loss : 0.13525305688381195 , bucket : 0\n", 515 | "[2100] loss : 0.3106137812137604 , bucket : 1\n", 516 | "[2200] loss : 0.23308174312114716 , bucket : 3\n", 517 | "[2300] loss : 0.07280982285737991 , bucket : 0\n", 518 | "[2400] loss : 0.25790470838546753 , bucket : 2\n", 519 | "[2500] loss : 0.3075273633003235 , bucket : 2\n", 520 | "[2600] loss : 0.20128652453422546 , bucket : 1\n", 521 | "[2700] loss : 0.267413854598999 , bucket : 2\n", 522 | "[2800] loss : 0.2660099267959595 , bucket : 2\n", 523 | "[2900] loss : 0.2145916223526001 , bucket : 0\n", 524 | "[3000] loss : 0.18937240540981293 , bucket : 1\n", 525 | "[3100] loss : 0.13038747012615204 , bucket : 0\n", 526 | "[3200] loss : 0.26689308881759644 , bucket : 1\n", 527 | "[3300] loss : 0.16859322786331177 , bucket : 2\n", 528 | "[3400] loss : 0.08819016814231873 , bucket : 3\n", 529 | "[3500] loss : 0.18127848207950592 , bucket : 1\n", 530 | "[3600] loss : 0.1321011483669281 , bucket : 1\n", 531 | "[3700] loss : 0.14424817264080048 , bucket : 1\n", 532 | "[3800] loss : 0.12812256813049316 , bucket : 2\n", 533 | "[3900] loss : 0.12484750151634216 , bucket : 1\n", 534 | "[4000] loss : 0.12390623986721039 , bucket : 1\n", 535 | "[4100] loss : 0.09196265041828156 , bucket : 1\n", 536 | "[4200] loss : 0.12934979796409607 , bucket : 1\n", 537 | "[4300] loss : 0.10172194987535477 , bucket : 1\n", 538 | "[4400] loss : 0.0912589579820633 , bucket : 2\n", 539 | "[4500] loss : 0.061736565083265305 , bucket : 1\n", 540 | "[4600] loss : 0.08378574997186661 , bucket : 1\n", 541 | "[4700] loss : 0.05071571096777916 , bucket : 3\n", 542 | "[4800] loss : 0.11878789961338043 , bucket : 1\n", 543 | "[4900] loss : 0.02820456586778164 , bucket : 3\n" 544 | ] 545 | } 546 | ], 547 | "source": [ 548 | "for epoch in range(5000):\n", 549 | " \n", 550 | " bucket_id = bucket_model.select_bucket()\n", 551 | " inputs, targets,lengths = getBatch(bucket,bucket_id,BATCH_SIZE)\n", 552 | " \n", 553 | " bucket_model.models[bucket_id].zero_grad()\n", 554 | " \n", 555 | " outputs = bucket_model.models[bucket_id](inputs,lengths)\n", 556 | " \n", 557 | " loss = loss_function(outputs,targets)\n", 558 | " losses.append(loss)\n", 559 | " loss.backward()\n", 560 | " bucket_model.optims[bucket_id].step()\n", 561 | " \n", 562 | " if epoch % 100==0:\n", 563 | " print(\"[{epoch}] loss : {loss} , bucket : {bucket_id}\".format(epoch=epoch,loss=loss.data.numpy()[0],bucket_id=bucket_id))" 564 | ] 565 | }, 566 | { 567 | "cell_type": "markdown", 568 | "metadata": {}, 569 | "source": [ 570 | "### 테스트 " 571 | ] 572 | }, 573 | { 574 | "cell_type": "code", 575 | "execution_count": 26, 576 | "metadata": {}, 577 | "outputs": [ 578 | { 579 | "name": "stdout", 580 | "output_type": "stream", 581 | "text": [ 582 | "혹시 강동구 보건소 도 한 번 물어봐 주 세요 \n", 583 | "\n", 584 | "O : O\n", 585 | "B-LOC : B-LOC\n", 586 | "I-LOC : I-LOC\n", 587 | "O : O\n", 588 | "O : O\n", 589 | "O : O\n", 590 | "O : O\n", 591 | "O : O\n", 592 | "O : O\n", 593 | "O : O\n" 594 | ] 595 | } 596 | ], 597 | "source": [ 598 | "test = random.choice(training_data)\n", 599 | "input_ = test[0]\n", 600 | "tag = test[1]\n", 601 | "print(' '.join(input_)+'\\n')\n", 602 | "\n", 603 | "length = len(input_)\n", 604 | "for i in range(len(bucket_config)):\n", 605 | " if bucket_config[i][0] == length:\n", 606 | " bucket_id = i\n", 607 | " break\n", 608 | "\n", 609 | "\n", 610 | "\n", 611 | "sentence_in = prepare_sequence(input_,word_to_ix)\n", 612 | "sentence_in=sentence_in.view(1,-1)\n", 613 | "\n", 614 | "scores = bucket_model.models[bucket_id](sentence_in,[len(input_)])\n", 615 | "v,i = torch.max(scores,1)\n", 616 | "for t in range(i.size()[0]):\n", 617 | " print(tag[t], ' : ', ix_to_tag[i.data.numpy()[t][0]])" 618 | ] 619 | }, 620 | { 621 | "cell_type": "code", 622 | "execution_count": 27, 623 | "metadata": {}, 624 | "outputs": [ 625 | { 626 | "name": "stderr", 627 | "output_type": "stream", 628 | "text": [ 629 | "/home/dsksd/.local/lib/python3.5/site-packages/torch/serialization.py:147: UserWarning: Couldn't retrieve source code for container of type BUCKETRNN. It won't be checked for correctness upon loading.\n", 630 | " \"type \" + obj.__name__ + \". It won't be checked \"\n", 631 | "/home/dsksd/.local/lib/python3.5/site-packages/torch/serialization.py:147: UserWarning: Couldn't retrieve source code for container of type RNN. It won't be checked for correctness upon loading.\n", 632 | " \"type \" + obj.__name__ + \". It won't be checked \"\n" 633 | ] 634 | } 635 | ], 636 | "source": [ 637 | "torch.save(bucket_model,'NER_model.pkl')" 638 | ] 639 | }, 640 | { 641 | "cell_type": "code", 642 | "execution_count": 28, 643 | "metadata": { 644 | "collapsed": true 645 | }, 646 | "outputs": [], 647 | "source": [ 648 | "restore = torch.load('NER_model.pkl')" 649 | ] 650 | }, 651 | { 652 | "cell_type": "code", 653 | "execution_count": 45, 654 | "metadata": {}, 655 | "outputs": [ 656 | { 657 | "name": "stdout", 658 | "output_type": "stream", 659 | "text": [ 660 | "영화 예매 좀 해 줘\n", 661 | "\n", 662 | "O : O\n", 663 | "O : O\n", 664 | "O : O\n", 665 | "O : O\n", 666 | "O : O\n" 667 | ] 668 | } 669 | ], 670 | "source": [ 671 | "test = random.choice(training_data)\n", 672 | "input_ = test[0]\n", 673 | "tag = test[1]\n", 674 | "print(' '.join(input_)+'\\n')\n", 675 | "\n", 676 | "length = len(input_)\n", 677 | "for i in range(len(bucket_config)):\n", 678 | " if bucket_config[i][0] == length:\n", 679 | " bucket_id = i\n", 680 | " break\n", 681 | "\n", 682 | "\n", 683 | "\n", 684 | "sentence_in = prepare_sequence(input_,word_to_ix)\n", 685 | "sentence_in=sentence_in.view(1,-1)\n", 686 | "\n", 687 | "scores = restore.models[bucket_id](sentence_in,[len(input_)])\n", 688 | "v,i = torch.max(scores,1)\n", 689 | "for t in range(i.size()[0]):\n", 690 | " print(tag[t], ' : ', ix_to_tag[i.data.numpy()[t][0]])" 691 | ] 692 | }, 693 | { 694 | "cell_type": "code", 695 | "execution_count": null, 696 | "metadata": { 697 | "collapsed": true 698 | }, 699 | "outputs": [], 700 | "source": [] 701 | } 702 | ], 703 | "metadata": { 704 | "kernelspec": { 705 | "display_name": "Python 3", 706 | "language": "python", 707 | "name": "python3" 708 | }, 709 | "language_info": { 710 | "codemirror_mode": { 711 | "name": "ipython", 712 | "version": 3 713 | }, 714 | "file_extension": ".py", 715 | "mimetype": "text/x-python", 716 | "name": "python", 717 | "nbconvert_exporter": "python", 718 | "pygments_lexer": "ipython3", 719 | "version": "3.5.2" 720 | } 721 | }, 722 | "nbformat": 4, 723 | "nbformat_minor": 2 724 | } 725 | -------------------------------------------------------------------------------- /deepnlp/06_Seq2Seq_basic.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "data": { 10 | "text/plain": [ 11 | "" 12 | ] 13 | }, 14 | "execution_count": 1, 15 | "metadata": {}, 16 | "output_type": "execute_result" 17 | } 18 | ], 19 | "source": [ 20 | "import torch\n", 21 | "from torch.autograd import Variable\n", 22 | "import torch.nn as nn\n", 23 | "import torch.nn.functional as F\n", 24 | "import torch.optim as optim\n", 25 | "import json\n", 26 | "import pickle\n", 27 | "import random\n", 28 | "import time\n", 29 | "import math\n", 30 | "import numpy as np\n", 31 | "from konlpy.tag import Mecab;tagger=Mecab()\n", 32 | "from collections import Counter\n", 33 | "from torch.nn.utils.rnn import pad_packed_sequence, pack_padded_sequence\n", 34 | "\n", 35 | "torch.manual_seed(1)" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": 2, 41 | "metadata": { 42 | "collapsed": true 43 | }, 44 | "outputs": [], 45 | "source": [ 46 | "USE_CUDA = False" 47 | ] 48 | }, 49 | { 50 | "cell_type": "markdown", 51 | "metadata": {}, 52 | "source": [ 53 | "# 데이터 " 54 | ] 55 | }, 56 | { 57 | "cell_type": "markdown", 58 | "metadata": {}, 59 | "source": [ 60 | "일단 최대 길이 (10,10)으로 고정하고 PAD & Batch" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": 3, 66 | "metadata": { 67 | "collapsed": true 68 | }, 69 | "outputs": [], 70 | "source": [ 71 | "SEQ_LENGTH=10\n", 72 | "SOS_token = 0\n", 73 | "EOS_token = 1" 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": 4, 79 | "metadata": { 80 | "collapsed": true 81 | }, 82 | "outputs": [], 83 | "source": [ 84 | "data = open('../../dataset/corpus/dsksd_chat.txt').readlines()\n", 85 | "data = [[t.split('\\\\t')[0],t.split('\\\\t')[1][:-1]] for t in data if t !='\\n']" 86 | ] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "execution_count": 5, 91 | "metadata": {}, 92 | "outputs": [ 93 | { 94 | "data": { 95 | "text/plain": [ 96 | "153" 97 | ] 98 | }, 99 | "execution_count": 5, 100 | "metadata": {}, 101 | "output_type": "execute_result" 102 | } 103 | ], 104 | "source": [ 105 | "DATA_SIZE = len(data) # 배치 사이즈\n", 106 | "DATA_SIZE" 107 | ] 108 | }, 109 | { 110 | "cell_type": "markdown", 111 | "metadata": {}, 112 | "source": [ 113 | "### 전처리 " 114 | ] 115 | }, 116 | { 117 | "cell_type": "markdown", 118 | "metadata": {}, 119 | "source": [ 120 | "1. 형태소 분석\n", 121 | "2. 최대 길이 10보다 긴 것들 10으로 제한\n", 122 | "3. EOS 태그 달기\n", 123 | "4. 길이 10이 안되는 것들 PADDING\n", 124 | "5. [[Q,A]...] " 125 | ] 126 | }, 127 | { 128 | "cell_type": "code", 129 | "execution_count": 6, 130 | "metadata": { 131 | "collapsed": true 132 | }, 133 | "outputs": [], 134 | "source": [ 135 | "train=[]" 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": 7, 141 | "metadata": { 142 | "collapsed": true 143 | }, 144 | "outputs": [], 145 | "source": [ 146 | "for t0,t1 in data:\n", 147 | " token0 = tagger.morphs(t0)\n", 148 | " \n", 149 | " if len(token0)>=SEQ_LENGTH:\n", 150 | " token0= token0[:SEQ_LENGTH-1]\n", 151 | " token0.append(\"EOS\")\n", 152 | "\n", 153 | " token1 = tagger.morphs(t1)\n", 154 | " if len(token1)>=SEQ_LENGTH:\n", 155 | " token1=token1[:SEQ_LENGTH-1]\n", 156 | " \n", 157 | " token1.append(\"EOS\")\n", 158 | " while len(token0)\n", 413 | "딱히 Context vector의 역할이 없다" 414 | ] 415 | }, 416 | { 417 | "cell_type": "code", 418 | "execution_count": 65, 419 | "metadata": { 420 | "collapsed": true 421 | }, 422 | "outputs": [], 423 | "source": [ 424 | "class DecoderRNN(nn.Module):\n", 425 | " def __init__(self, hidden_size, output_size, n_layers=1, dropout_p=0.1):\n", 426 | " super(DecoderRNN, self).__init__()\n", 427 | " \n", 428 | " self.hidden_size = hidden_size\n", 429 | " self.output_size = output_size\n", 430 | " self.n_layers = n_layers\n", 431 | " self.dropout_p = dropout_p\n", 432 | "\n", 433 | " \n", 434 | " # Define the layers\n", 435 | " self.embedding = nn.Embedding(self.output_size, self.hidden_size)\n", 436 | "\n", 437 | " #self.dropout = nn.Dropout(self.dropout_p)\n", 438 | " self.gru = nn.GRU(self.hidden_size, self.hidden_size, self.n_layers,batch_first=True)\n", 439 | " self.out = nn.Linear(self.hidden_size, self.output_size)\n", 440 | " \n", 441 | " def forward(self, input,hidden,lengths,seq_length,training=True):\n", 442 | " \n", 443 | " # Get the embedding of the current input word\n", 444 | " embedded = self.embedding(input)\n", 445 | " #embedded = self.dropout(embedded)\n", 446 | " \n", 447 | " decode=[]\n", 448 | " # Apply GRU to the output so far\n", 449 | " for i in range(seq_length):\n", 450 | " \n", 451 | " _, hidden = self.gru(embedded, hidden)\n", 452 | " score = self.out(hidden.view(hidden.size(0)*hidden.size(1),-1))\n", 453 | " softmaxed = F.log_softmax(score)\n", 454 | " decode.append(softmaxed)\n", 455 | " _,input = torch.max(softmaxed,1)\n", 456 | " embedded = self.embedding(input)\n", 457 | " #embedded = self.dropout(embedded)\n", 458 | " \n", 459 | " # if training:\n", 460 | " # TODO 패딩이 아닌 진짜 length만 cost 계산하기...\n", 461 | " \n", 462 | " # 요고 주의! time-step을 column-wise concat한 후, reshape!!\n", 463 | " scores = torch.cat(decode,1)\n", 464 | " return scores.view(input.size(0)*seq_length,-1)" 465 | ] 466 | }, 467 | { 468 | "cell_type": "markdown", 469 | "metadata": {}, 470 | "source": [ 471 | "## 트레이닝 " 472 | ] 473 | }, 474 | { 475 | "cell_type": "code", 476 | "execution_count": 66, 477 | "metadata": { 478 | "collapsed": true 479 | }, 480 | "outputs": [], 481 | "source": [ 482 | "HIDDEN_SIZE = 30\n", 483 | "LEARNING_RATE=0.01" 484 | ] 485 | }, 486 | { 487 | "cell_type": "code", 488 | "execution_count": 67, 489 | "metadata": { 490 | "collapsed": true 491 | }, 492 | "outputs": [], 493 | "source": [ 494 | "encoder = EncoderRNN(len(word2index), HIDDEN_SIZE, 2)\n", 495 | "decoder = DecoderRNN(HIDDEN_SIZE,len(word2index))\n", 496 | "\n", 497 | "loss_function = nn.CrossEntropyLoss()\n", 498 | "enc_optim= optim.Adam(encoder.parameters(), lr=LEARNING_RATE)\n", 499 | "dec_optim = optim.Adam(decoder.parameters(),lr=LEARNING_RATE)" 500 | ] 501 | }, 502 | { 503 | "cell_type": "code", 504 | "execution_count": 68, 505 | "metadata": {}, 506 | "outputs": [], 507 | "source": [ 508 | "decoder_input = Variable(torch.LongTensor([[SOS_token]*DATA_SIZE])).transpose(1,0)\n", 509 | "outputs,context = encoder(inputs)\n", 510 | "\n", 511 | "score = decoder(decoder_input,context,lengths,SEQ_LENGTH)" 512 | ] 513 | }, 514 | { 515 | "cell_type": "code", 516 | "execution_count": 69, 517 | "metadata": {}, 518 | "outputs": [ 519 | { 520 | "name": "stdout", 521 | "output_type": "stream", 522 | "text": [ 523 | "[0] loss : 6.053802013397217\n", 524 | "[100] loss : 1.439118504524231\n", 525 | "[200] loss : 0.4035325050354004\n", 526 | "[300] loss : 0.09392164647579193\n", 527 | "[400] loss : 0.039414502680301666\n", 528 | "[500] loss : 0.028216160833835602\n", 529 | "[600] loss : 0.025219986215233803\n", 530 | "[700] loss : 0.021924201399087906\n", 531 | "[800] loss : 0.02056710794568062\n", 532 | "[900] loss : 0.01965375244617462\n" 533 | ] 534 | } 535 | ], 536 | "source": [ 537 | "losses=[]\n", 538 | "for epoch in range(1000):\n", 539 | " \n", 540 | " encoder.zero_grad()\n", 541 | " decoder.zero_grad()\n", 542 | " decoder_input = Variable(torch.LongTensor([[SOS_token]*DATA_SIZE])).transpose(1,0)\n", 543 | " _,context = encoder(inputs)\n", 544 | "\n", 545 | " score = decoder(decoder_input,context,lengths,SEQ_LENGTH)\n", 546 | " loss=loss_function(score,targets.view(-1))\n", 547 | " losses.append(loss)\n", 548 | " loss.backward()\n", 549 | " \n", 550 | " torch.nn.utils.clip_grad_norm(encoder.parameters(), 5.0)\n", 551 | " torch.nn.utils.clip_grad_norm(decoder.parameters(), 5.0)\n", 552 | " \n", 553 | " enc_optim.step()\n", 554 | " dec_optim.step()\n", 555 | " \n", 556 | " if epoch % 100==0:\n", 557 | " print(\"[{epoch}] loss : {loss}\".format(epoch=epoch,loss=loss.data.numpy()[0]))" 558 | ] 559 | }, 560 | { 561 | "cell_type": "markdown", 562 | "metadata": {}, 563 | "source": [ 564 | "## 테스트 " 565 | ] 566 | }, 567 | { 568 | "cell_type": "code", 569 | "execution_count": 70, 570 | "metadata": {}, 571 | "outputs": [ 572 | { 573 | "name": "stdout", 574 | "output_type": "stream", 575 | "text": [ 576 | "Q: 내일 뭐 하 지 ?\n", 577 | "\n", 578 | "A: 내일 은 집 에서 쉬 세요\n", 579 | "\n" 580 | ] 581 | } 582 | ], 583 | "source": [ 584 | "index = random.choice(range(DATA_SIZE))\n", 585 | "input_ = train[index][0]\n", 586 | "target = train[index][1]\n", 587 | "print('Q: ', ' '.join([i for i in input_ if i !='PAD' and i != 'EOS'])+'\\n')\n", 588 | "\n", 589 | "\n", 590 | "decoder_input = Variable(torch.LongTensor([[SOS_token]])).transpose(1,0)\n", 591 | "_,context = encoder(inputs[index].view(1,-1))\n", 592 | "\n", 593 | "score = decoder(decoder_input,context,lengths,SEQ_LENGTH)\n", 594 | "\n", 595 | "v,i = torch.max(score,1)\n", 596 | "\n", 597 | "decoded=[]\n", 598 | "for t in range(i.size()[0]):\n", 599 | " decoded.append(index2word[i.data.numpy()[t][0]])\n", 600 | "\n", 601 | "print('A: ', ' '.join([i for i in decoded if i !='PAD' and i != 'EOS'])+'\\n')" 602 | ] 603 | }, 604 | { 605 | "cell_type": "code", 606 | "execution_count": null, 607 | "metadata": { 608 | "collapsed": true 609 | }, 610 | "outputs": [], 611 | "source": [] 612 | } 613 | ], 614 | "metadata": { 615 | "kernelspec": { 616 | "display_name": "Python 3", 617 | "language": "python", 618 | "name": "python3" 619 | }, 620 | "language_info": { 621 | "codemirror_mode": { 622 | "name": "ipython", 623 | "version": 3 624 | }, 625 | "file_extension": ".py", 626 | "mimetype": "text/x-python", 627 | "name": "python", 628 | "nbconvert_exporter": "python", 629 | "pygments_lexer": "ipython3", 630 | "version": "3.5.2" 631 | } 632 | }, 633 | "nbformat": 4, 634 | "nbformat_minor": 2 635 | } 636 | -------------------------------------------------------------------------------- /deepnlp/06_Seq2Seq_vanilla.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "data": { 10 | "text/plain": [ 11 | "" 12 | ] 13 | }, 14 | "execution_count": 1, 15 | "metadata": {}, 16 | "output_type": "execute_result" 17 | } 18 | ], 19 | "source": [ 20 | "import torch\n", 21 | "from torch.autograd import Variable\n", 22 | "import torch.nn as nn\n", 23 | "import torch.nn.functional as F\n", 24 | "import torch.optim as optim\n", 25 | "import json\n", 26 | "import pickle\n", 27 | "import random\n", 28 | "import time\n", 29 | "import math\n", 30 | "import numpy as np\n", 31 | "from konlpy.tag import Mecab;tagger=Mecab()\n", 32 | "from collections import Counter\n", 33 | "from torch.nn.utils.rnn import pad_packed_sequence, pack_padded_sequence\n", 34 | "\n", 35 | "torch.manual_seed(1)" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": 2, 41 | "metadata": { 42 | "collapsed": true 43 | }, 44 | "outputs": [], 45 | "source": [ 46 | "USE_CUDA = False" 47 | ] 48 | }, 49 | { 50 | "cell_type": "markdown", 51 | "metadata": {}, 52 | "source": [ 53 | "# 데이터 " 54 | ] 55 | }, 56 | { 57 | "cell_type": "markdown", 58 | "metadata": {}, 59 | "source": [ 60 | "일단 최대 길이 (10,10)으로 고정하고 PAD & Batch" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": 3, 66 | "metadata": { 67 | "collapsed": true 68 | }, 69 | "outputs": [], 70 | "source": [ 71 | "SEQ_LENGTH=10\n", 72 | "SOS_token = 0\n", 73 | "EOS_token = 1" 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": 4, 79 | "metadata": { 80 | "collapsed": true 81 | }, 82 | "outputs": [], 83 | "source": [ 84 | "data = open('../../dataset/corpus/dsksd_chat.txt').readlines()\n", 85 | "data = [[t.split('\\\\t')[0],t.split('\\\\t')[1][:-1]] for t in data if t !='\\n']" 86 | ] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "execution_count": 5, 91 | "metadata": {}, 92 | "outputs": [ 93 | { 94 | "data": { 95 | "text/plain": [ 96 | "153" 97 | ] 98 | }, 99 | "execution_count": 5, 100 | "metadata": {}, 101 | "output_type": "execute_result" 102 | } 103 | ], 104 | "source": [ 105 | "DATA_SIZE = len(data) # 배치 사이즈\n", 106 | "DATA_SIZE" 107 | ] 108 | }, 109 | { 110 | "cell_type": "markdown", 111 | "metadata": {}, 112 | "source": [ 113 | "### 전처리 " 114 | ] 115 | }, 116 | { 117 | "cell_type": "markdown", 118 | "metadata": {}, 119 | "source": [ 120 | "1. 형태소 분석\n", 121 | "2. 최대 길이 10보다 긴 것들 10으로 제한\n", 122 | "3. EOS 태그 달기\n", 123 | "4. 길이 10이 안되는 것들 PADDING\n", 124 | "5. [[Q,A]...] " 125 | ] 126 | }, 127 | { 128 | "cell_type": "code", 129 | "execution_count": 6, 130 | "metadata": { 131 | "collapsed": true 132 | }, 133 | "outputs": [], 134 | "source": [ 135 | "train=[]" 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": 7, 141 | "metadata": { 142 | "collapsed": true 143 | }, 144 | "outputs": [], 145 | "source": [ 146 | "for t0,t1 in data:\n", 147 | " token0 = tagger.morphs(t0)\n", 148 | " \n", 149 | " if len(token0)>=SEQ_LENGTH:\n", 150 | " token0= token0[:SEQ_LENGTH-1]\n", 151 | " token0.append(\"EOS\")\n", 152 | "\n", 153 | " token1 = tagger.morphs(t1)\n", 154 | " if len(token1)>=SEQ_LENGTH:\n", 155 | " token1=token1[:SEQ_LENGTH-1]\n", 156 | " \n", 157 | " token1.append(\"EOS\")\n", 158 | " while len(token0)\n", 413 | "즉 매 스텝마다 3가지의 인풋을 사용한다. " 414 | ] 415 | }, 416 | { 417 | "cell_type": "markdown", 418 | "metadata": {}, 419 | "source": [ 420 | "$$h_t = g(h_{t−1}, c, y_{t−1}) $$" 421 | ] 422 | }, 423 | { 424 | "cell_type": "markdown", 425 | "metadata": {}, 426 | "source": [ 427 | "https://arxiv.org/pdf/1406.1078.pdf" 428 | ] 429 | }, 430 | { 431 | "cell_type": "code", 432 | "execution_count": 17, 433 | "metadata": { 434 | "collapsed": true 435 | }, 436 | "outputs": [], 437 | "source": [ 438 | "class DecoderRNN(nn.Module):\n", 439 | " def __init__(self, hidden_size, output_size, n_layers=1, dropout_p=0.1):\n", 440 | " super(DecoderRNN, self).__init__()\n", 441 | " \n", 442 | " self.hidden_size = hidden_size\n", 443 | " self.output_size = output_size\n", 444 | " self.n_layers = n_layers\n", 445 | " self.dropout_p = dropout_p\n", 446 | "\n", 447 | " \n", 448 | " # Define the layers\n", 449 | " self.embedding = nn.Embedding(self.output_size, self.hidden_size)\n", 450 | "\n", 451 | " #self.dropout = nn.Dropout(self.dropout_p)\n", 452 | " \n", 453 | " # 요 부분에서 y_{t-1}과 c를 concat해서 넣어준다..!\n", 454 | " # 이게 논문에서 제시하는 방법과는 조금 다른듯..\n", 455 | " # gru의 내부까지 컨트롤 할 방법이 없으니 (직접 짜지 않는 이상)\n", 456 | " self.gru = nn.GRU(self.hidden_size*2, self.hidden_size, self.n_layers,batch_first=True)\n", 457 | " self.out = nn.Linear(self.hidden_size*2, self.output_size)\n", 458 | " \n", 459 | " def forward(self, input,context,lengths,seq_length,training=True):\n", 460 | " \n", 461 | " # Get the embedding of the current input word\n", 462 | " embedded = self.embedding(input)\n", 463 | " hidden = Variable(torch.zeros(self.n_layers, input.size(0), self.hidden_size)) \n", 464 | " #embedded = self.dropout(embedded)\n", 465 | " \n", 466 | " decode=[]\n", 467 | " # Apply GRU to the output so far\n", 468 | " for i in range(seq_length):\n", 469 | " \n", 470 | " \n", 471 | " _, hidden = self.gru(torch.cat((embedded,context),2), hidden)\n", 472 | " concated = torch.cat((hidden,context.transpose(0,1)),2)\n", 473 | " score = self.out(concated.squeeze(0))\n", 474 | " softmaxed = F.log_softmax(score)\n", 475 | " decode.append(softmaxed)\n", 476 | " _,input = torch.max(softmaxed,1)\n", 477 | " embedded = self.embedding(input)\n", 478 | " #embedded = self.dropout(embedded)\n", 479 | " \n", 480 | " # if training:\n", 481 | " # TODO 패딩이 아닌 진짜 length만 cost 계산하기...\n", 482 | " \n", 483 | " # 요고 주의! time-step을 column-wise concat한 후, reshape!!\n", 484 | " scores = torch.cat(decode,1)\n", 485 | " return scores.view(input.size(0)*seq_length,-1) " 486 | ] 487 | }, 488 | { 489 | "cell_type": "markdown", 490 | "metadata": {}, 491 | "source": [ 492 | "## 트레이닝 " 493 | ] 494 | }, 495 | { 496 | "cell_type": "code", 497 | "execution_count": 18, 498 | "metadata": { 499 | "collapsed": true 500 | }, 501 | "outputs": [], 502 | "source": [ 503 | "HIDDEN_SIZE = 30\n", 504 | "LEARNING_RATE=0.01" 505 | ] 506 | }, 507 | { 508 | "cell_type": "code", 509 | "execution_count": 19, 510 | "metadata": { 511 | "collapsed": true 512 | }, 513 | "outputs": [], 514 | "source": [ 515 | "encoder = EncoderRNN(len(word2index), HIDDEN_SIZE, 2)\n", 516 | "decoder = DecoderRNN(HIDDEN_SIZE,len(word2index))\n", 517 | "\n", 518 | "loss_function = nn.CrossEntropyLoss()\n", 519 | "enc_optim= optim.Adam(encoder.parameters(), lr=LEARNING_RATE)\n", 520 | "dec_optim = optim.Adam(decoder.parameters(),lr=LEARNING_RATE)" 521 | ] 522 | }, 523 | { 524 | "cell_type": "code", 525 | "execution_count": 20, 526 | "metadata": {}, 527 | "outputs": [], 528 | "source": [ 529 | "decoder_input = Variable(torch.LongTensor([[SOS_token]*DATA_SIZE])).transpose(1,0)\n", 530 | "outputs,context = encoder(inputs)\n", 531 | "\n", 532 | "score = decoder(decoder_input,context[-1].view(DATA_SIZE,1,-1),lengths,SEQ_LENGTH)" 533 | ] 534 | }, 535 | { 536 | "cell_type": "code", 537 | "execution_count": 21, 538 | "metadata": {}, 539 | "outputs": [ 540 | { 541 | "name": "stdout", 542 | "output_type": "stream", 543 | "text": [ 544 | "[0] loss : 6.119385719299316\n", 545 | "[100] loss : 1.5012859106063843\n", 546 | "[200] loss : 0.5430554151535034\n", 547 | "[300] loss : 0.15224875509738922\n", 548 | "[400] loss : 0.060109205543994904\n", 549 | "[500] loss : 0.04042838513851166\n", 550 | "[600] loss : 0.031136948615312576\n", 551 | "[700] loss : 0.023928051814436913\n", 552 | "[800] loss : 0.021703246980905533\n", 553 | "[900] loss : 0.020351458340883255\n" 554 | ] 555 | } 556 | ], 557 | "source": [ 558 | "losses=[]\n", 559 | "for epoch in range(1000):\n", 560 | " \n", 561 | " encoder.zero_grad()\n", 562 | " decoder.zero_grad()\n", 563 | " decoder_input = Variable(torch.LongTensor([[SOS_token]*DATA_SIZE])).transpose(1,0)\n", 564 | " _,context = encoder(inputs)\n", 565 | "\n", 566 | " score = decoder(decoder_input,context[-1].view(DATA_SIZE,1,-1),lengths,SEQ_LENGTH)\n", 567 | " loss=loss_function(score,targets.view(-1))\n", 568 | " losses.append(loss)\n", 569 | " loss.backward()\n", 570 | " \n", 571 | " torch.nn.utils.clip_grad_norm(encoder.parameters(), 5.0)\n", 572 | " torch.nn.utils.clip_grad_norm(decoder.parameters(), 5.0)\n", 573 | " \n", 574 | " enc_optim.step()\n", 575 | " dec_optim.step()\n", 576 | " \n", 577 | " if epoch % 100==0:\n", 578 | " print(\"[{epoch}] loss : {loss}\".format(epoch=epoch,loss=loss.data.numpy()[0]))" 579 | ] 580 | }, 581 | { 582 | "cell_type": "markdown", 583 | "metadata": {}, 584 | "source": [ 585 | "## 테스트 " 586 | ] 587 | }, 588 | { 589 | "cell_type": "code", 590 | "execution_count": 23, 591 | "metadata": {}, 592 | "outputs": [ 593 | { 594 | "name": "stdout", 595 | "output_type": "stream", 596 | "text": [ 597 | "Q: 딱 내 가 찾 던 스탈 이 야\n", 598 | "\n", 599 | "A: 정말 기쁘 네요\n", 600 | "\n" 601 | ] 602 | } 603 | ], 604 | "source": [ 605 | "index = random.choice(range(DATA_SIZE))\n", 606 | "input_ = train[index][0]\n", 607 | "target = train[index][1]\n", 608 | "print('Q: ', ' '.join([i for i in input_ if i !='PAD' and i != 'EOS'])+'\\n')\n", 609 | "\n", 610 | "\n", 611 | "decoder_input = Variable(torch.LongTensor([[SOS_token]])).transpose(1,0)\n", 612 | "_,context = encoder(inputs[index].view(1,-1))\n", 613 | "\n", 614 | "score = decoder(decoder_input,context[-1].view(1,1,-1),lengths,SEQ_LENGTH)\n", 615 | "\n", 616 | "v,i = torch.max(score,1)\n", 617 | "\n", 618 | "decoded=[]\n", 619 | "for t in range(i.size()[0]):\n", 620 | " decoded.append(index2word[i.data.numpy()[t][0]])\n", 621 | "\n", 622 | "print('A: ', ' '.join([i for i in decoded if i !='PAD' and i != 'EOS'])+'\\n')" 623 | ] 624 | }, 625 | { 626 | "cell_type": "code", 627 | "execution_count": null, 628 | "metadata": { 629 | "collapsed": true 630 | }, 631 | "outputs": [], 632 | "source": [] 633 | } 634 | ], 635 | "metadata": { 636 | "kernelspec": { 637 | "display_name": "Python 3", 638 | "language": "python", 639 | "name": "python3" 640 | }, 641 | "language_info": { 642 | "codemirror_mode": { 643 | "name": "ipython", 644 | "version": 3 645 | }, 646 | "file_extension": ".py", 647 | "mimetype": "text/x-python", 648 | "name": "python", 649 | "nbconvert_exporter": "python", 650 | "pygments_lexer": "ipython3", 651 | "version": "3.5.2" 652 | } 653 | }, 654 | "nbformat": 4, 655 | "nbformat_minor": 2 656 | } 657 | -------------------------------------------------------------------------------- /deepnlp/07_Seq2Seq_Attention.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "data": { 10 | "text/plain": [ 11 | "" 12 | ] 13 | }, 14 | "execution_count": 1, 15 | "metadata": {}, 16 | "output_type": "execute_result" 17 | } 18 | ], 19 | "source": [ 20 | "import torch\n", 21 | "from torch.autograd import Variable\n", 22 | "import torch.nn as nn\n", 23 | "import torch.nn.functional as F\n", 24 | "import torch.optim as optim\n", 25 | "import json\n", 26 | "import pickle\n", 27 | "import random\n", 28 | "import time\n", 29 | "import math\n", 30 | "import numpy as np\n", 31 | "from konlpy.tag import Mecab;tagger=Mecab()\n", 32 | "from collections import Counter\n", 33 | "from torch.nn.utils.rnn import pad_packed_sequence, pack_padded_sequence\n", 34 | "\n", 35 | "torch.manual_seed(1)" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": 2, 41 | "metadata": { 42 | "collapsed": true 43 | }, 44 | "outputs": [], 45 | "source": [ 46 | "USE_CUDA = torch.cuda.is_available()" 47 | ] 48 | }, 49 | { 50 | "cell_type": "markdown", 51 | "metadata": {}, 52 | "source": [ 53 | "# 데이터 " 54 | ] 55 | }, 56 | { 57 | "cell_type": "markdown", 58 | "metadata": {}, 59 | "source": [ 60 | "일단 최대 길이 (10,10)으로 고정하고 PAD & Batch" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": 3, 66 | "metadata": { 67 | "collapsed": true 68 | }, 69 | "outputs": [], 70 | "source": [ 71 | "SEQ_LENGTH=10\n", 72 | "SOS_token = 0\n", 73 | "EOS_token = 1" 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": 4, 79 | "metadata": { 80 | "collapsed": true 81 | }, 82 | "outputs": [], 83 | "source": [ 84 | "data = open('../../dataset/corpus/dsksd_chat.txt').readlines()\n", 85 | "data = [[t.split('\\\\t')[0],t.split('\\\\t')[1][:-1]] for t in data if t !='\\n']" 86 | ] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "execution_count": 5, 91 | "metadata": {}, 92 | "outputs": [ 93 | { 94 | "data": { 95 | "text/plain": [ 96 | "155" 97 | ] 98 | }, 99 | "execution_count": 5, 100 | "metadata": {}, 101 | "output_type": "execute_result" 102 | } 103 | ], 104 | "source": [ 105 | "DATA_SIZE = len(data) # 배치 사이즈\n", 106 | "DATA_SIZE" 107 | ] 108 | }, 109 | { 110 | "cell_type": "markdown", 111 | "metadata": {}, 112 | "source": [ 113 | "### 전처리 " 114 | ] 115 | }, 116 | { 117 | "cell_type": "markdown", 118 | "metadata": {}, 119 | "source": [ 120 | "1. 형태소 분석\n", 121 | "2. 최대 길이 10보다 긴 것들 10으로 제한\n", 122 | "3. EOS 태그 달기\n", 123 | "4. 길이 10이 안되는 것들 PADDING\n", 124 | "5. [[Q,A]...] " 125 | ] 126 | }, 127 | { 128 | "cell_type": "code", 129 | "execution_count": 6, 130 | "metadata": { 131 | "collapsed": true 132 | }, 133 | "outputs": [], 134 | "source": [ 135 | "train=[]" 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": 7, 141 | "metadata": { 142 | "collapsed": true 143 | }, 144 | "outputs": [], 145 | "source": [ 146 | "for t0,t1 in data:\n", 147 | " token0 = tagger.morphs(t0)\n", 148 | " \n", 149 | " if len(token0)>=SEQ_LENGTH:\n", 150 | " token0= token0[:SEQ_LENGTH-1]\n", 151 | " token0.append(\"EOS\")\n", 152 | "\n", 153 | " token1 = tagger.morphs(t1)\n", 154 | " if len(token1)>=SEQ_LENGTH:\n", 155 | " token1=token1[:SEQ_LENGTH-1]\n", 156 | " \n", 157 | " token1.append(\"EOS\")\n", 158 | " while len(token0)\"], seq))\n", 246 | " tensor = Variable(torch.LongTensor(idxs)).cuda() if USE_CUDA else Variable(torch.LongTensor(idxs))\n", 247 | " return tensor" 248 | ] 249 | }, 250 | { 251 | "cell_type": "code", 252 | "execution_count": 11, 253 | "metadata": { 254 | "collapsed": true 255 | }, 256 | "outputs": [], 257 | "source": [ 258 | "train_x=[]\n", 259 | "train_y=[]\n", 260 | "lengths=[]\n", 261 | "for tr in train:\n", 262 | " temp = prepare_sequence(tr[0], word2index)\n", 263 | " temp = temp.view(1,-1)\n", 264 | " train_x.append(temp)\n", 265 | "\n", 266 | " temp2 = prepare_sequence(tr[1],word2index)\n", 267 | " temp2 = temp2.view(1,-1)\n", 268 | " train_y.append(temp2)\n", 269 | " \n", 270 | " length = [t for t in tr[1] if t !='PAD']\n", 271 | " lengths.append(len(length))\n", 272 | "\n", 273 | "inputs = torch.cat(train_x)\n", 274 | "targets = torch.cat(train_y)\n", 275 | "\n", 276 | "del train_x\n", 277 | "del train_y" 278 | ] 279 | }, 280 | { 281 | "cell_type": "code", 282 | "execution_count": 12, 283 | "metadata": {}, 284 | "outputs": [ 285 | { 286 | "data": { 287 | "text/plain": [ 288 | "['안녕', '하', '세요', '!', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']" 289 | ] 290 | }, 291 | "execution_count": 12, 292 | "metadata": {}, 293 | "output_type": "execute_result" 294 | } 295 | ], 296 | "source": [ 297 | "train[0][-1]" 298 | ] 299 | }, 300 | { 301 | "cell_type": "markdown", 302 | "metadata": {}, 303 | "source": [ 304 | "## masking " 305 | ] 306 | }, 307 | { 308 | "cell_type": "code", 309 | "execution_count": 13, 310 | "metadata": {}, 311 | "outputs": [], 312 | "source": [ 313 | "mask = torch.cat([Variable(torch.ByteTensor(tuple(map(lambda s: s ==0, t.data)))).cuda() if USE_CUDA else Variable(torch.ByteTensor(tuple(map(lambda s: s ==0, t.data)))) for t in inputs]).view(DATA_SIZE,-1)" 314 | ] 315 | }, 316 | { 317 | "cell_type": "markdown", 318 | "metadata": {}, 319 | "source": [ 320 | "## 모델링 " 321 | ] 322 | }, 323 | { 324 | "cell_type": "markdown", 325 | "metadata": {}, 326 | "source": [ 327 | "### Encoder " 328 | ] 329 | }, 330 | { 331 | "cell_type": "code", 332 | "execution_count": 14, 333 | "metadata": { 334 | "collapsed": true 335 | }, 336 | "outputs": [], 337 | "source": [ 338 | "class EncoderRNN(nn.Module):\n", 339 | " def __init__(self, input_size, hidden_size, n_layers=1):\n", 340 | " super(EncoderRNN, self).__init__()\n", 341 | " \n", 342 | " self.input_size = input_size\n", 343 | " self.hidden_size = hidden_size\n", 344 | " self.n_layers = n_layers\n", 345 | " \n", 346 | " self.embedding = nn.Embedding(input_size, hidden_size)\n", 347 | " self.gru = nn.GRU(hidden_size, hidden_size, n_layers,batch_first=True)\n", 348 | " \n", 349 | " def forward(self, input):\n", 350 | " hidden = Variable(torch.zeros(self.n_layers, input.size(0), self.hidden_size)).cuda() if USE_CUDA else Variable(torch.zeros(self.n_layers, input.size(0), self.hidden_size))\n", 351 | " \n", 352 | " embedded = self.embedding(input)\n", 353 | " output, hidden = self.gru(embedded, hidden)\n", 354 | "\n", 355 | " return output, hidden" 356 | ] 357 | }, 358 | { 359 | "cell_type": "code", 360 | "execution_count": 15, 361 | "metadata": {}, 362 | "outputs": [ 363 | { 364 | "name": "stdout", 365 | "output_type": "stream", 366 | "text": [ 367 | "EncoderRNN (\n", 368 | " (embedding): Embedding(462, 30)\n", 369 | " (gru): GRU(30, 30, num_layers=2, batch_first=True)\n", 370 | ")\n" 371 | ] 372 | } 373 | ], 374 | "source": [ 375 | "encoder_test = EncoderRNN(len(word2index), 30, 2)\n", 376 | "print(encoder_test)" 377 | ] 378 | }, 379 | { 380 | "cell_type": "code", 381 | "execution_count": 16, 382 | "metadata": {}, 383 | "outputs": [ 384 | { 385 | "name": "stdout", 386 | "output_type": "stream", 387 | "text": [ 388 | "torch.Size([155, 10, 30]) torch.Size([2, 155, 30])\n" 389 | ] 390 | } 391 | ], 392 | "source": [ 393 | "out, hidden = encoder_test(inputs.view(DATA_SIZE,-1))\n", 394 | "print(out.size(), hidden.size())" 395 | ] 396 | }, 397 | { 398 | "cell_type": "markdown", 399 | "metadata": {}, 400 | "source": [ 401 | "### Decoder with Attention " 402 | ] 403 | }, 404 | { 405 | "cell_type": "markdown", 406 | "metadata": {}, 407 | "source": [ 408 | "* https://github.com/spro/practical-pytorch/blob/master/seq2seq-translation/seq2seq-translation.ipynb\n", 409 | "* https://arxiv.org/pdf/1409.0473.pdf\n", 410 | "* http://web.stanford.edu/class/cs224n/lectures/cs224n-2017-lecture10.pdf" 411 | ] 412 | }, 413 | { 414 | "cell_type": "code", 415 | "execution_count": 54, 416 | "metadata": {}, 417 | "outputs": [], 418 | "source": [ 419 | "class DecoderRNN(nn.Module):\n", 420 | " def __init__(self, hidden_size, output_size,max_len=10,n_layers=1,dropout_p=0.1):\n", 421 | " super(DecoderRNN, self).__init__()\n", 422 | " \n", 423 | " self.hidden_size = hidden_size\n", 424 | " self.output_size = output_size\n", 425 | " self.n_layers = n_layers\n", 426 | " self.dropout_p = dropout_p\n", 427 | " self.max_len=max_len\n", 428 | "\n", 429 | " self.attn = nn.Linear(self.hidden_size,self.hidden_size) # Attention\n", 430 | " # Define the layers\n", 431 | " self.embedding = nn.Embedding(self.output_size, self.hidden_size)\n", 432 | "\n", 433 | " #self.dropout = nn.Dropout(self.dropout_p)\n", 434 | " \n", 435 | " self.gru = nn.GRU(self.hidden_size*2, self.hidden_size, self.n_layers,batch_first=True)\n", 436 | " self.out = nn.Linear(self.hidden_size*2, self.output_size)\n", 437 | " \n", 438 | " def Attention(self, hidden, encoder_outputs, encoder_maskings):\n", 439 | " \"\"\"\n", 440 | " hidden : 1,B,D\n", 441 | " encoder_outputs : B,T,D\n", 442 | " encoder_maskings : B,T # ByteTensor\n", 443 | " \"\"\"\n", 444 | " \n", 445 | " hidden = hidden.squeeze(0).unsqueeze(2) # 히든 : (1,배치,차원) -> (배치,차원,1)\n", 446 | " \n", 447 | " batch_size = encoder_outputs.size(0) # B\n", 448 | " max_len = encoder_outputs.size(1) # T\n", 449 | " energies = self.attn(encoder_outputs.contiguous().view(batch_size*max_len,-1)) # B*T,D -> B*T,D\n", 450 | " energies = energies.view(batch_size,max_len,-1) # B,T,D (배치,타임,차원)\n", 451 | " attn_energies = energies.bmm(hidden).transpose(1,2) # B,T,D * B,D,1 --> B,1,T\n", 452 | " attn_energies = attn_energies.squeeze(1).masked_fill(encoder_maskings,-1e12) # PAD masking\n", 453 | " \n", 454 | " alpha = F.softmax(attn_energies) # B,T\n", 455 | " alpha = alpha.unsqueeze(1) # B,1,T\n", 456 | " context = alpha.bmm(encoder_outputs) # B,1,T * B,T,D => B,1,D\n", 457 | " \n", 458 | " \n", 459 | " return context # B,1,D\n", 460 | " \n", 461 | " \n", 462 | " def forward(self, input,context,encoder_outputs,encoder_maskings,training=True):\n", 463 | " \n", 464 | " # Get the embedding of the current input word\n", 465 | " embedded = self.embedding(input)\n", 466 | " hidden = Variable(torch.zeros(self.n_layers, input.size(0), self.hidden_size)).cuda() if USE_CUDA else Variable(torch.zeros(self.n_layers, input.size(0), self.hidden_size) )\n", 467 | " #embedded = self.dropout(embedded)\n", 468 | " \n", 469 | " decode=[]\n", 470 | "\n", 471 | " # Apply GRU to the output so far\n", 472 | " for i in range(self.max_len):\n", 473 | "\n", 474 | "\n", 475 | " _, hidden = self.gru(torch.cat((embedded,context),2), hidden)\n", 476 | " concated = torch.cat((hidden,context.transpose(0,1)),2)\n", 477 | " score = self.out(concated.squeeze(0))\n", 478 | " softmaxed = F.log_softmax(score)\n", 479 | " decode.append(softmaxed)\n", 480 | " _,input = torch.max(softmaxed,1)\n", 481 | " embedded = self.embedding(input)\n", 482 | " \n", 483 | " # 그 다음 Context Vector를 Attention으로 계산\n", 484 | " context = self.Attention(hidden, encoder_outputs,encoder_maskings) \n", 485 | " \n", 486 | " # if training:\n", 487 | " # TODO 패딩이 아닌 진짜 length만 cost 계산하기...\n", 488 | "\n", 489 | " # 요고 주의! time-step을 column-wise concat한 후, reshape!!\n", 490 | " scores = torch.cat(decode,1)\n", 491 | " return scores.view(input.size(0)*self.max_len,-1) " 492 | ] 493 | }, 494 | { 495 | "cell_type": "markdown", 496 | "metadata": {}, 497 | "source": [ 498 | "## 트레이닝 " 499 | ] 500 | }, 501 | { 502 | "cell_type": "code", 503 | "execution_count": 70, 504 | "metadata": { 505 | "collapsed": true 506 | }, 507 | "outputs": [], 508 | "source": [ 509 | "HIDDEN_SIZE = 30\n", 510 | "LEARNING_RATE=0.01" 511 | ] 512 | }, 513 | { 514 | "cell_type": "code", 515 | "execution_count": 88, 516 | "metadata": {}, 517 | "outputs": [], 518 | "source": [ 519 | "encoder = EncoderRNN(len(word2index), HIDDEN_SIZE, 2)\n", 520 | "decoder = DecoderRNN(HIDDEN_SIZE,len(word2index))\n", 521 | "\n", 522 | "loss_function = nn.CrossEntropyLoss()\n", 523 | "enc_optim= optim.Adam(encoder.parameters(), lr=LEARNING_RATE)\n", 524 | "dec_optim = optim.Adam(decoder.parameters(),lr=LEARNING_RATE)" 525 | ] 526 | }, 527 | { 528 | "cell_type": "code", 529 | "execution_count": 67, 530 | "metadata": {}, 531 | "outputs": [], 532 | "source": [ 533 | "decoder_input = Variable(torch.LongTensor([[SOS_token]*DATA_SIZE])).transpose(1,0)\n", 534 | "outputs,context = encoder(inputs)\n", 535 | "\n", 536 | "score = decoder(decoder_input,context[-1].view(DATA_SIZE,1,-1),outputs,mask)" 537 | ] 538 | }, 539 | { 540 | "cell_type": "code", 541 | "execution_count": 89, 542 | "metadata": {}, 543 | "outputs": [ 544 | { 545 | "name": "stdout", 546 | "output_type": "stream", 547 | "text": [ 548 | "[0] loss : 6.153960227966309\n", 549 | "[100] loss : 1.0622292757034302\n", 550 | "[200] loss : 0.13852475583553314\n", 551 | "[300] loss : 0.03989825025200844\n", 552 | "[400] loss : 0.027184881269931793\n", 553 | "CPU times: user 6min 8s, sys: 14min 25s, total: 20min 33s\n", 554 | "Wall time: 2min 42s\n" 555 | ] 556 | } 557 | ], 558 | "source": [ 559 | "%%time\n", 560 | "losses=[]\n", 561 | "for epoch in range(500):\n", 562 | " \n", 563 | " encoder.zero_grad()\n", 564 | " decoder.zero_grad()\n", 565 | " decoder_input = Variable(torch.LongTensor([[SOS_token]*DATA_SIZE])).transpose(1,0)\n", 566 | " outputs,context = encoder(inputs)\n", 567 | "\n", 568 | " score = decoder(decoder_input,context[-1].view(DATA_SIZE,1,-1),outputs,mask)\n", 569 | " \n", 570 | " #mask_for_cost = mask.unsqueeze(1).view(mask.size(0)*mask.size(1),-1).expand_as(score) # Cost 계산 전 PAD 마스킹\n", 571 | " #score = score.masked_fill(mask_for_cost,-1e12) # ignore_index가 왜 안되는거징..\n", 572 | " loss=loss_function(score,targets.view(-1))\n", 573 | " losses.append(loss)\n", 574 | " loss.backward()\n", 575 | " \n", 576 | " torch.nn.utils.clip_grad_norm(encoder.parameters(), 5.0)\n", 577 | " torch.nn.utils.clip_grad_norm(decoder.parameters(), 5.0)\n", 578 | " \n", 579 | " enc_optim.step()\n", 580 | " dec_optim.step()\n", 581 | " \n", 582 | " if epoch % 100==0:\n", 583 | " print(\"[{epoch}] loss : {loss}\".format(epoch=epoch,loss=loss.data.numpy()[0]))" 584 | ] 585 | }, 586 | { 587 | "cell_type": "markdown", 588 | "metadata": {}, 589 | "source": [ 590 | "## 테스트 " 591 | ] 592 | }, 593 | { 594 | "cell_type": "code", 595 | "execution_count": 100, 596 | "metadata": {}, 597 | "outputs": [ 598 | { 599 | "name": "stdout", 600 | "output_type": "stream", 601 | "text": [ 602 | "Q: 잘 맞 는데 ? ㅋ 굿 ! !\n", 603 | "\n", 604 | "A: 구 웃 ! !\n", 605 | "\n" 606 | ] 607 | } 608 | ], 609 | "source": [ 610 | "index = random.choice(range(DATA_SIZE))\n", 611 | "input_ = train[index][0]\n", 612 | "target = train[index][1]\n", 613 | "print('Q: ', ' '.join([i for i in input_ if i !='PAD' and i != 'EOS'])+'\\n')\n", 614 | "\n", 615 | "\n", 616 | "decoder_input = Variable(torch.LongTensor([[SOS_token]])).transpose(1,0)\n", 617 | "outputs,context = encoder(inputs[index].view(1,-1))\n", 618 | "\n", 619 | "score = decoder(decoder_input,context[-1].view(1,1,-1),outputs,mask[index])\n", 620 | "\n", 621 | "v,i = torch.max(score,1)\n", 622 | "\n", 623 | "decoded=[]\n", 624 | "for t in range(i.size()[0]):\n", 625 | " decoded.append(index2word[i.data.numpy()[t][0]])\n", 626 | "\n", 627 | "print('A: ', ' '.join([i for i in decoded if i !='PAD' and i != 'EOS'])+'\\n')" 628 | ] 629 | }, 630 | { 631 | "cell_type": "code", 632 | "execution_count": null, 633 | "metadata": { 634 | "collapsed": true 635 | }, 636 | "outputs": [], 637 | "source": [] 638 | } 639 | ], 640 | "metadata": { 641 | "kernelspec": { 642 | "display_name": "Python 3", 643 | "language": "python", 644 | "name": "python3" 645 | }, 646 | "language_info": { 647 | "codemirror_mode": { 648 | "name": "ipython", 649 | "version": 3 650 | }, 651 | "file_extension": ".py", 652 | "mimetype": "text/x-python", 653 | "name": "python", 654 | "nbconvert_exporter": "python", 655 | "pygments_lexer": "ipython3", 656 | "version": "3.5.2" 657 | } 658 | }, 659 | "nbformat": 4, 660 | "nbformat_minor": 2 661 | } 662 | -------------------------------------------------------------------------------- /deepnlp/10_CNN_text_classification.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "* https://arxiv.org/abs/1408.5882\n", 8 | "* http://docs.likejazz.com/cnn-text-classification-tf/\n", 9 | "* https://github.com/Shawn1993/cnn-text-classification-pytorch" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 49, 15 | "metadata": { 16 | "collapsed": false, 17 | "deletable": true, 18 | "editable": true 19 | }, 20 | "outputs": [ 21 | { 22 | "data": { 23 | "text/plain": [ 24 | "" 25 | ] 26 | }, 27 | "execution_count": 49, 28 | "metadata": {}, 29 | "output_type": "execute_result" 30 | } 31 | ], 32 | "source": [ 33 | "import torch\n", 34 | "from torch.autograd import Variable\n", 35 | "import torch.nn as nn\n", 36 | "import torch.nn.functional as F\n", 37 | "import torch.optim as optim\n", 38 | "import json\n", 39 | "import pickle\n", 40 | "import random\n", 41 | "import time\n", 42 | "import math\n", 43 | "import numpy as np\n", 44 | "from konlpy.tag import Mecab;tagger=Mecab()\n", 45 | "from collections import Counter\n", 46 | "from torch.nn.utils.rnn import PackedSequence,pad_packed_sequence, pack_padded_sequence\n", 47 | "\n", 48 | "torch.manual_seed(1)" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": 3, 54 | "metadata": { 55 | "collapsed": true 56 | }, 57 | "outputs": [], 58 | "source": [ 59 | "USE_CUDA = torch.cuda.is_available()" 60 | ] 61 | }, 62 | { 63 | "cell_type": "markdown", 64 | "metadata": {}, 65 | "source": [ 66 | "We initially keep the\n", 67 | "word vectors static and learn only the other parameters\n", 68 | "of the model
\n", 69 | " Learning task-specific vectors through\n", 70 | "fine-tuning results in further improvements
\n", 71 | " We\n", 72 | "finally describe a simple modification to the architecture\n", 73 | "to allow for the use of both pre-trained and\n", 74 | "task-specific vectors by having multiple channels." 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": 153, 80 | "metadata": { 81 | "collapsed": true 82 | }, 83 | "outputs": [], 84 | "source": [ 85 | "class CNN_Text(nn.Module):\n", 86 | " \n", 87 | " def __init__(self, embed_num,embed_dim,class_num,kernel_num,kernel_sizes,dropout):\n", 88 | " super(CNN_Text,self).__init__()\n", 89 | " #self.args = args\n", 90 | " \n", 91 | " V = embed_num # num of vocab\n", 92 | " D = embed_dim # dimenstion of word vector\n", 93 | " C = class_num # num of class\n", 94 | " Ci = 1\n", 95 | " Co = kernel_num # 100\n", 96 | " Ks = kernel_sizes # [3,4,5]\n", 97 | "\n", 98 | " self.embed = nn.Embedding(V, D)\n", 99 | " #self.convs1 = [nn.Conv2d(Ci, Co, (K, D)) for K in Ks]\n", 100 | " self.convs1 = nn.ModuleList([nn.Conv2d(Ci, Co, (K, D)) for K in Ks])\n", 101 | " \n", 102 | " # kernal_size = (K,D) : D는 단어 벡터 길이라 픽스, K 사이즈만큼 슬라이딩, 스트라이드는 1\n", 103 | " \n", 104 | " '''\n", 105 | " self.conv13 = nn.Conv2d(Ci, Co, (3, D))\n", 106 | " self.conv14 = nn.Conv2d(Ci, Co, (4, D))\n", 107 | " self.conv15 = nn.Conv2d(Ci, Co, (5, D))\n", 108 | " '''\n", 109 | " self.dropout = nn.Dropout(dropout)\n", 110 | " self.fc1 = nn.Linear(len(Ks)*Co, C)\n", 111 | "\n", 112 | " def conv_and_pool(self, x, conv):\n", 113 | " x = F.relu(conv(x)).squeeze(3) #(N,Co,W)\n", 114 | " x = F.max_pool1d(x, x.size(2)).squeeze(2)\n", 115 | " return x\n", 116 | "\n", 117 | "\n", 118 | " def forward(self, x,train=True):\n", 119 | " x = self.embed(x) # (N,W,D)\n", 120 | " \n", 121 | " #if self.args.static:\n", 122 | " # x = Variable(x)\n", 123 | "\n", 124 | " x = x.unsqueeze(1) # (N,Ci,W,D)\n", 125 | "\n", 126 | " x = [F.relu(conv(x)).squeeze(3) for conv in self.convs1] #[(N,Co,W), ...]*len(Ks)\n", 127 | "\n", 128 | "\n", 129 | " x = [F.max_pool1d(i, i.size(2)).squeeze(2) for i in x] #[(N,Co), ...]*len(Ks)\n", 130 | "\n", 131 | " x = torch.cat(x, 1)\n", 132 | "\n", 133 | " '''\n", 134 | " x1 = self.conv_and_pool(x,self.conv13) #(N,Co)\n", 135 | " x2 = self.conv_and_pool(x,self.conv14) #(N,Co)\n", 136 | " x3 = self.conv_and_pool(x,self.conv15) #(N,Co)\n", 137 | " x = torch.cat((x1, x2, x3), 1) # (N,len(Ks)*Co)\n", 138 | " '''\n", 139 | " if train:\n", 140 | " x = self.dropout(x) # (N,len(Ks)*Co)\n", 141 | " logit = self.fc1(x) # (N,C)\n", 142 | " return logit" 143 | ] 144 | }, 145 | { 146 | "cell_type": "markdown", 147 | "metadata": {}, 148 | "source": [ 149 | "# Data Prepare" 150 | ] 151 | }, 152 | { 153 | "cell_type": "code", 154 | "execution_count": 282, 155 | "metadata": { 156 | "collapsed": false 157 | }, 158 | "outputs": [], 159 | "source": [ 160 | "import json\n", 161 | "from konlpy.tag import Mecab\n", 162 | "tagger = Mecab()\n", 163 | "\n", 164 | "stopwords=['dummy']\n", 165 | "# Helper functions to make the code more readable.\n", 166 | "def prepare_sequence(seq, word_to_ix):\n", 167 | " idxs=[]\n", 168 | " for s in seq:\n", 169 | " \n", 170 | " if s.isdigit():\n", 171 | " idxs.append(word_to_ix['NUM'])\n", 172 | " continue\n", 173 | " try: \n", 174 | " if s not in stopwords:\n", 175 | " idxs.append(word_to_ix[s])\n", 176 | " else:\n", 177 | " idxs.append(word_to_ix['UNK'])\n", 178 | " except:\n", 179 | " idxs.append(word_to_ix['UNK'])\n", 180 | " \n", 181 | " #idxs = list(map(lambda w: to_ix[w], seq))\n", 182 | " tensor = torch.LongTensor(idxs)\n", 183 | " tensor = Variable(tensor)\n", 184 | " if USE_CUDA: tensor = tensor.cuda()\n", 185 | " \n", 186 | " return tensor" 187 | ] 188 | }, 189 | { 190 | "cell_type": "code", 191 | "execution_count": 102, 192 | "metadata": { 193 | "collapsed": true 194 | }, 195 | "outputs": [], 196 | "source": [ 197 | "train_data = json.load(open('../../dataset/corpus/domain_dump_07_10_17.json'))" 198 | ] 199 | }, 200 | { 201 | "cell_type": "code", 202 | "execution_count": 103, 203 | "metadata": { 204 | "collapsed": false 205 | }, 206 | "outputs": [], 207 | "source": [ 208 | "X,y = zip(*train_data)" 209 | ] 210 | }, 211 | { 212 | "cell_type": "code", 213 | "execution_count": 104, 214 | "metadata": { 215 | "collapsed": true 216 | }, 217 | "outputs": [], 218 | "source": [ 219 | "word_to_ix={'PAD':0,'UNK':1,'NUM':2}\n", 220 | "\n", 221 | "for x in X:\n", 222 | " tokens = tagger.morphs(x)\n", 223 | " \n", 224 | " for token in tokens:\n", 225 | " if token.isnumeric():\n", 226 | " token = 'NUM'\n", 227 | " if token not in word_to_ix:\n", 228 | " word_to_ix[token]=len(word_to_ix)\n", 229 | "\n", 230 | "ix_to_word = {v:k for k,v in word_to_ix.items()}\n", 231 | "\n", 232 | "target_to_ix={}\n", 233 | "\n", 234 | "for y_ in y:\n", 235 | " if y_ not in target_to_ix:\n", 236 | " target_to_ix[y_]=len(target_to_ix)\n", 237 | " \n", 238 | "ix_to_target = {v:k for k,v in target_to_ix.items()}" 239 | ] 240 | }, 241 | { 242 | "cell_type": "code", 243 | "execution_count": 145, 244 | "metadata": { 245 | "collapsed": false 246 | }, 247 | "outputs": [], 248 | "source": [ 249 | "X_p = []\n", 250 | "except_index=[]\n", 251 | "for x in X:\n", 252 | " tokens = tagger.morphs(x)\n", 253 | " while len(tokens) < 5:\n", 254 | " tokens.append('PAD')\n", 255 | " \n", 256 | " tokens = ['NUM' if token.isnumeric() else token for token in tokens]\n", 257 | " X_p.append(prepare_sequence(tokens,word_to_ix).view(1,-1))" 258 | ] 259 | }, 260 | { 261 | "cell_type": "code", 262 | "execution_count": 182, 263 | "metadata": { 264 | "collapsed": false 265 | }, 266 | "outputs": [], 267 | "source": [ 268 | "model = CNN_Text(len(word_to_ix),30,len(target_to_ix),30,[3,4,5],0.8)\n", 269 | "loss_function = nn.CrossEntropyLoss()\n", 270 | "optimizer = optim.Adam(model.parameters(), lr=0.0001)" 271 | ] 272 | }, 273 | { 274 | "cell_type": "code", 275 | "execution_count": 183, 276 | "metadata": { 277 | "collapsed": true, 278 | "deletable": true, 279 | "editable": true 280 | }, 281 | "outputs": [], 282 | "source": [ 283 | "train = list(zip(X_p,list(y)))" 284 | ] 285 | }, 286 | { 287 | "cell_type": "code", 288 | "execution_count": 184, 289 | "metadata": { 290 | "collapsed": false 291 | }, 292 | "outputs": [ 293 | { 294 | "name": "stdout", 295 | "output_type": "stream", 296 | "text": [ 297 | "0 1.446259617805481\n", 298 | "10 1.254598617553711\n", 299 | "20 1.2565611600875854\n", 300 | "30 1.1359226703643799\n", 301 | "40 2.089221239089966\n", 302 | "50 2.5133934020996094\n", 303 | "60 0.26536357402801514\n", 304 | "70 0.24461354315280914\n", 305 | "80 0.25541332364082336\n", 306 | "90 1.168743371963501\n" 307 | ] 308 | } 309 | ], 310 | "source": [ 311 | "for step in range(100):\n", 312 | " losses=[]\n", 313 | " for i,(sent,target) in enumerate(train):\n", 314 | " \n", 315 | " pred = model(sent)\n", 316 | " target = Variable(torch.LongTensor([target_to_ix[target]]))\n", 317 | " loss = loss_function(pred,target)\n", 318 | " loss.backward()\n", 319 | " optimizer.step()\n", 320 | " losses.append(loss)\n", 321 | " if step % 10==0:\n", 322 | " print(step,np.mean(losses).data.tolist()[0])" 323 | ] 324 | }, 325 | { 326 | "cell_type": "code", 327 | "execution_count": 345, 328 | "metadata": { 329 | "collapsed": true 330 | }, 331 | "outputs": [], 332 | "source": [ 333 | "test = [\"다음주 부산 날씨 머야?\",\n", 334 | " \"오늘 저녁에 집 앞에서 미팅\",\n", 335 | " \"이거 어떻게 하는거야?\",\n", 336 | " \"꽃다발 배달하고 싶어!\",\n", 337 | " \"장미 배달 돼? 빨간색으로\",\n", 338 | " \"이거 너무하네 진짜\",\n", 339 | " \"ㅇㅇㅇㅇ내일 비 와??\",\n", 340 | " \"이번 주말에 서울에 비 안오지?\",\n", 341 | " \"하이루 내일 뭐하니\",\n", 342 | " \"조화 보내고 싶은데 어케해\",\n", 343 | " \"ㅋㅋㅋ이메일 보내고 싶어!\",\n", 344 | " \"어쭈구리?ㅋㅋ 혼난다ㅡㅡ\",\n", 345 | " \"됐고ㅡㅡ 내일 날씨나 알려줘\",\n", 346 | " \"오늘 일정이 어케됭?ㅋ\",\n", 347 | " \"흐아.. 진자 못맞추는구나 보내고\",\n", 348 | " \"어키,, 알겠으 정말 바보구나 싶어\",\n", 349 | " \"헐 날씨 넘나 좋은것\",\n", 350 | " \"문봇의 문이 뭐야?\",\n", 351 | " \"너 꽃배달 업체가 어디야?\",\n", 352 | " \"꽃만 들어 있으면 이걸로 판단하는거냐\",\n", 353 | " \"ㅋㅋㅋ너네 패턴 다 파악함\",\n", 354 | " \"날씨도 좋은데 꽃이나 보낼래\",\n", 355 | " \"꽃은 안보내도 되고, 날씨나 알려줘\"]" 356 | ] 357 | }, 358 | { 359 | "cell_type": "code", 360 | "execution_count": 346, 361 | "metadata": { 362 | "collapsed": false 363 | }, 364 | "outputs": [ 365 | { 366 | "name": "stdout", 367 | "output_type": "stream", 368 | "text": [ 369 | "다음주 부산 날씨 머야? WEATHER\n", 370 | "오늘 저녁에 집 앞에서 미팅 SCHEDULE\n", 371 | "이거 어떻게 하는거야? OTHER\n", 372 | "꽃다발 배달하고 싶어! FLOWER\n", 373 | "장미 배달 돼? 빨간색으로 FLOWER\n", 374 | "이거 너무하네 진짜 OTHER\n", 375 | "ㅇㅇㅇㅇ내일 비 와?? WEATHER\n", 376 | "이번 주말에 서울에 비 안오지? OTHER\n", 377 | "하이루 내일 뭐하니 OTHER\n", 378 | "조화 보내고 싶은데 어케해 FLOWER\n", 379 | "ㅋㅋㅋ이메일 보내고 싶어! FLOWER\n", 380 | "어쭈구리?ㅋㅋ 혼난다ㅡㅡ OTHER\n", 381 | "됐고ㅡㅡ 내일 날씨나 알려줘 OTHER\n", 382 | "오늘 일정이 어케됭?ㅋ OTHER\n", 383 | "흐아.. 진자 못맞추는구나 보내고 FLOWER\n", 384 | "어키,, 알겠으 정말 바보구나 싶어 FLOWER\n", 385 | "헐 날씨 넘나 좋은것 OTHER\n", 386 | "문봇의 문이 뭐야? OTHER\n", 387 | "너 꽃배달 업체가 어디야? FLOWER\n", 388 | "꽃만 들어 있으면 이걸로 판단하는거냐 FLOWER\n", 389 | "ㅋㅋㅋ너네 패턴 다 파악함 OTHER\n", 390 | "날씨도 좋은데 꽃이나 보낼래 FLOWER\n", 391 | "꽃은 안보내도 되고, 날씨나 알려줘 FLOWER\n" 392 | ] 393 | } 394 | ], 395 | "source": [ 396 | "for t in test:\n", 397 | " tokens = tagger.morphs(t)\n", 398 | " input = prepare_sequence(tokens,word_to_ix)\n", 399 | " pred = model(input.view(1,-1),False)\n", 400 | " v,i = torch.max(pred,1)\n", 401 | " result = ix_to_target[i.data.tolist()[0][0]]\n", 402 | " \n", 403 | " print(t,result)" 404 | ] 405 | }, 406 | { 407 | "cell_type": "code", 408 | "execution_count": null, 409 | "metadata": { 410 | "collapsed": true 411 | }, 412 | "outputs": [], 413 | "source": [] 414 | } 415 | ], 416 | "metadata": { 417 | "kernelspec": { 418 | "display_name": "Python 3", 419 | "language": "python", 420 | "name": "python3" 421 | }, 422 | "language_info": { 423 | "codemirror_mode": { 424 | "name": "ipython", 425 | "version": 3 426 | }, 427 | "file_extension": ".py", 428 | "mimetype": "text/x-python", 429 | "name": "python", 430 | "nbconvert_exporter": "python", 431 | "pygments_lexer": "ipython3", 432 | "version": "3.5.2" 433 | } 434 | }, 435 | "nbformat": 4, 436 | "nbformat_minor": 2 437 | } 438 | -------------------------------------------------------------------------------- /deepnlp/temp_Coref.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [ 10 | { 11 | "ename": "ImportError", 12 | "evalue": "No module named 'spacy'", 13 | "output_type": "error", 14 | "traceback": [ 15 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 16 | "\u001b[0;31mImportError\u001b[0m Traceback (most recent call last)", 17 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0;32mimport\u001b[0m \u001b[0mspacy\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", 18 | "\u001b[0;31mImportError\u001b[0m: No module named 'spacy'" 19 | ] 20 | } 21 | ], 22 | "source": [ 23 | "import spacy" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": 1, 29 | "metadata": { 30 | "collapsed": true 31 | }, 32 | "outputs": [], 33 | "source": [ 34 | "from konlpy.tag import Mecab\n", 35 | "tagger=Mecab()" 36 | ] 37 | }, 38 | { 39 | "cell_type": "markdown", 40 | "metadata": { 41 | "collapsed": true 42 | }, 43 | "source": [ 44 | "1. Scoring Agent (mention pair scoring) 모델\n", 45 | "2. document에서 mentions(entity, anaphora, cataphora) 추출\n", 46 | "3. Scoring for pair (e-greedy하게 pair 선택)\n", 47 | "4. pair to cluster \n", 48 | "5. Recall 계산 (Reward rescaling)" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": 2, 54 | "metadata": { 55 | "collapsed": true 56 | }, 57 | "outputs": [], 58 | "source": [ 59 | "import re" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": 6, 65 | "metadata": { 66 | "collapsed": true 67 | }, 68 | "outputs": [], 69 | "source": [ 70 | "import re" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": null, 76 | "metadata": { 77 | "collapsed": true 78 | }, 79 | "outputs": [], 80 | "source": [ 81 | "train_set=[]" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": 4, 87 | "metadata": { 88 | "collapsed": true 89 | }, 90 | "outputs": [], 91 | "source": [ 92 | "def extract_mentions(sent):\n", 93 | " check_list = [re.compile(\"NP.*\"),re.compile(\"(김|이|박|노|정|안|임|문|오|장|한)[가-힣]{2}\")]\n", 94 | " tokens = tagger.pos(sent)\n", 95 | " candidates=['NEW']\n", 96 | " for token in tokens:\n", 97 | " for check in check_list:\n", 98 | " if check.match(token[1])!=None or check.match(token[0])!=None:\n", 99 | " candidates.append(token[0])\n", 100 | " \n", 101 | " return candidates" 102 | ] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "execution_count": 7, 107 | "metadata": { 108 | "collapsed": false 109 | }, 110 | "outputs": [ 111 | { 112 | "data": { 113 | "text/plain": [ 114 | "[['문재인 대통령은 독일에서 연설을 하셨다. 그의 연설은 박수를 이끌어 냈다.', [('NEW', '문재인', '그')]],\n", 115 | " ['이효리와 이상순은 제주도에서 민박집을 하는 컨셉의 방송에 출연 중이다. 그녀는 그를 참 좋아한다.',\n", 116 | " [('NEW', '이효리', '이상순'), ('이효리', '그녀'), ('이상순', '그')]],\n", 117 | " ['너랑 이재훈이랑 아는 사이야? 걔가 그렇게 인기 많다며.', [('NEW', '너', '이재훈'), ('이재훈', '걔')]]]" 118 | ] 119 | }, 120 | "execution_count": 7, 121 | "metadata": {}, 122 | "output_type": "execute_result" 123 | } 124 | ], 125 | "source": [ 126 | "train_set" 127 | ] 128 | }, 129 | { 130 | "cell_type": "code", 131 | "execution_count": 59, 132 | "metadata": { 133 | "collapsed": false 134 | }, 135 | "outputs": [ 136 | { 137 | "data": { 138 | "text/plain": [ 139 | "['NEW', '너', '이재훈', '걔']" 140 | ] 141 | }, 142 | "execution_count": 59, 143 | "metadata": {}, 144 | "output_type": "execute_result" 145 | } 146 | ], 147 | "source": [ 148 | "text=\"\"\n", 149 | "candit = extract_mentions(text)\n", 150 | "candit" 151 | ] 152 | }, 153 | { 154 | "cell_type": "code", 155 | "execution_count": 60, 156 | "metadata": { 157 | "collapsed": false 158 | }, 159 | "outputs": [ 160 | { 161 | "data": { 162 | "text/plain": [ 163 | "[['문재인 대통령은 독일에서 연설을 하셨다. 그의 연설은 박수를 이끌어 냈다.', [('NEW', '문재인', '그')]],\n", 164 | " ['이효리와 이상순은 제주도에서 민박집을 하는 컨셉의 방송에 출연 중이다. 그녀는 그를 참 좋아한다.',\n", 165 | " [('NEW', '이효리', '이상순'), ('이효리', '그녀'), ('이상순', '그')]],\n", 166 | " ['너랑 이재훈이랑 아는 사이야? 걔가 그렇게 인기 많다며.', [('NEW', '너', '이재훈'), ('이재훈', '걔')]]]" 167 | ] 168 | }, 169 | "execution_count": 60, 170 | "metadata": {}, 171 | "output_type": "execute_result" 172 | } 173 | ], 174 | "source": [ 175 | "temp=[text,[('NEW','너','이재훈'),('이재훈','걔')]]\n", 176 | "train_set.append(temp)\n", 177 | "train_set" 178 | ] 179 | }, 180 | { 181 | "cell_type": "code", 182 | "execution_count": null, 183 | "metadata": { 184 | "collapsed": true 185 | }, 186 | "outputs": [], 187 | "source": [] 188 | } 189 | ], 190 | "metadata": { 191 | "kernelspec": { 192 | "display_name": "Python 3", 193 | "language": "python", 194 | "name": "python3" 195 | }, 196 | "language_info": { 197 | "codemirror_mode": { 198 | "name": "ipython", 199 | "version": 3 200 | }, 201 | "file_extension": ".py", 202 | "mimetype": "text/x-python", 203 | "name": "python", 204 | "nbconvert_exporter": "python", 205 | "pygments_lexer": "ipython3", 206 | "version": "3.5.2" 207 | } 208 | }, 209 | "nbformat": 4, 210 | "nbformat_minor": 2 211 | } 212 | -------------------------------------------------------------------------------- /evolutionary_algorithms/net_builder.py: -------------------------------------------------------------------------------- 1 | import random 2 | import numpy as np 3 | 4 | 5 | # Layer space & net space define the way a model is built and mutated. 6 | 7 | LAYER_SPACE = dict() 8 | LAYER_SPACE['nb_units'] = (128, 1024, 'int', 0.2) 9 | LAYER_SPACE['dropout_rate'] = (0.0, 0.7, 'float', 0.2) 10 | LAYER_SPACE['activation'] =\ 11 | (0, ['linear', 'tanh', 'relu', 'sigmoid', 'elu'], 'list', 0.2) 12 | 13 | 14 | NET_SPACE = dict() 15 | NET_SPACE['nb_layers'] = (1, 4, 'int', 0.1) 16 | NET_SPACE['lr'] = (0.0005, 0.2, 'float', 0.2) 17 | NET_SPACE['weight_decay'] = (0.00005, 0.002, 'float', 0.2) 18 | NET_SPACE['optimizer'] =\ 19 | (0, ['sgd', 'adam', 'adadelta', 'rmsprop'], 'list', 0.2) 20 | 21 | 22 | def check_and_assign(val, space): 23 | """assign a value between the boundaries.""" 24 | val = min(val, space[0]) 25 | val = max(val, space[1]) 26 | return val 27 | 28 | 29 | def random_value(space): 30 | """Sample random value from the given space.""" 31 | val = None 32 | if space[2] == 'int': 33 | val = random.randint(space[0], space[1]) 34 | if space[2] == 'list': 35 | val = random.sample(space[1], 1)[0] 36 | if space[2] == 'float': 37 | val = ((space[1] - space[0]) * random.random()) + space[0] 38 | return {'val': val, 'id': random.randint(0, 2**10)} 39 | 40 | 41 | def randomize_network(bounded=True): 42 | """Create a random network.""" 43 | global NET_SPACE, LAYER_SPACE 44 | net = dict() 45 | for k in NET_SPACE.keys(): 46 | net[k] = random_value(NET_SPACE[k]) 47 | 48 | if bounded: 49 | net['nb_layers']['val'] = min(net['nb_layers']['val'], 1) 50 | 51 | layers = [] 52 | for i in range(net['nb_layers']['val']): 53 | layer = dict() 54 | for k in LAYER_SPACE.keys(): 55 | layer[k] = random_value(LAYER_SPACE[k]) 56 | layers.append(layer) 57 | net['layers'] = layers 58 | return net 59 | 60 | 61 | def mutate_net(net): 62 | """Mutate a network.""" 63 | global NET_SPACE, LAYER_SPACE 64 | 65 | # mutate optimizer 66 | for k in ['lr', 'weight_decay', 'optimizer']: 67 | 68 | if random.random() < NET_SPACE[k][-1]: 69 | net[k] = random_value(NET_SPACE[k]) 70 | 71 | # mutate layers 72 | for layer in net['layers']: 73 | for k in LAYER_SPACE.keys(): 74 | if random.random() < LAYER_SPACE[k][-1]: 75 | layer[k] = random_value(LAYER_SPACE[k]) 76 | # mutate number of layers -- RANDOMLY ADD 77 | if random.random() < NET_SPACE['nb_layers'][-1]: 78 | if net['nb_layers']['val'] < NET_SPACE['nb_layers'][1]: 79 | if random.random()< 0.5: 80 | layer = dict() 81 | for k in LAYER_SPACE.keys(): 82 | layer[k] = random_value(LAYER_SPACE[k]) 83 | net['layers'].append(layer) 84 | # value & id update 85 | net['nb_layers']['val'] = len(net['layers']) 86 | net['nb_layers']['id'] +=1 87 | else: 88 | if net['nb_layers']['val'] > 1: 89 | net['layers'].pop() 90 | net['nb_layers']['val'] = len(net['layers']) 91 | net['nb_layers']['id'] -=1 92 | return net -------------------------------------------------------------------------------- /evolutionary_algorithms/torch_models.py: -------------------------------------------------------------------------------- 1 | 2 | import torch 3 | import torch.nn.functional as F 4 | from torch import nn 5 | import torch.optim as optim 6 | from torch.autograd import Variable 7 | import random 8 | from torchvision import datasets, transforms 9 | import numpy as np 10 | 11 | 12 | class Flatten(nn.Module): 13 | """A simple flatten module.""" 14 | 15 | def __init__(self): 16 | """Call init.""" 17 | super(Flatten, self).__init__() 18 | 19 | def forward(self, x): 20 | """forward pass.""" 21 | return x.view(x.size(0), -1) 22 | 23 | class CustomModel(): 24 | 25 | def __init__(self, build_info, CUDA=True): 26 | 27 | previous_units = 28 * 28 28 | self.model = nn.Sequential() 29 | self.model.add_module('flatten', Flatten()) 30 | for i, layer_info in enumerate(build_info['layers']): 31 | i = str(i) 32 | self.model.add_module( 33 | 'fc_' + i, 34 | nn.Linear(previous_units, layer_info['nb_units']['val']) 35 | ) 36 | self.model.add_module( 37 | 'dropout_' + i, 38 | nn.Dropout(p=layer_info['dropout_rate']['val']) 39 | ) 40 | if layer_info['activation']['val'] == 'tanh': 41 | self.model.add_module( 42 | 'tanh_'+i, 43 | nn.Tanh() 44 | ) 45 | if layer_info['activation']['val'] == 'relu': 46 | self.model.add_module( 47 | 'relu_'+i, 48 | nn.ReLU() 49 | ) 50 | if layer_info['activation']['val'] == 'sigmoid': 51 | self.model.add_module( 52 | 'sigm_'+i, 53 | nn.Sigmoid() 54 | ) 55 | if layer_info['activation']['val'] == 'elu': 56 | self.model.add_module( 57 | 'elu_'+i, 58 | nn.ELU() 59 | ) 60 | previous_units = layer_info['nb_units']['val'] 61 | 62 | self.model.add_module( 63 | 'classification_layer', 64 | nn.Linear(previous_units, 10) 65 | ) 66 | self.model.add_module('sofmax', nn.LogSoftmax()) 67 | self.model.cpu() 68 | 69 | if build_info['optimizer']['val'] == 'adam': 70 | optimizer = optim.Adam(self.model.parameters(), 71 | lr=build_info['weight_decay']['val'], 72 | weight_decay=build_info['weight_decay']['val']) 73 | 74 | elif build_info['optimizer']['val'] == 'adadelta': 75 | optimizer = optim.Adadelta(self.model.parameters(), 76 | lr=build_info['weight_decay']['val'], 77 | weight_decay=build_info['weight_decay']['val']) 78 | 79 | elif build_info['optimizer']['val'] == 'rmsprop': 80 | optimizer = optim.RMSprop(self.model.parameters(), 81 | lr=build_info['weight_decay']['val'], 82 | weight_decay=build_info['weight_decay']['val']) 83 | else: 84 | optimizer = optim.SGD(self.model.parameters(), 85 | lr=build_info['weight_decay']['val'], 86 | weight_decay=build_info['weight_decay']['val'], 87 | momentum=0.9) 88 | self.optimizer = optimizer 89 | self.cuda = False 90 | if CUDA: 91 | self.model.cuda() 92 | self.cuda = True 93 | 94 | 95 | def train(self, train_loader, max_batches=100): 96 | """Train for 1 epoch.""" 97 | self.model.train() 98 | 99 | batch = 0 100 | for batch_idx, (data, target) in enumerate(train_loader): 101 | if self.cuda: 102 | data, target = data.cuda(), target.cuda() 103 | data, target = Variable(data), Variable(target) 104 | self.optimizer.zero_grad() 105 | output = self.model(data) 106 | loss = F.nll_loss(output, target) 107 | # print(type(loss)) 108 | np_loss = loss.cpu().data.numpy() 109 | if np.isnan(np_loss): 110 | print('stopping training - nan loss') 111 | return -1 112 | elif loss.cpu().data.numpy()[0] > 100000: 113 | print('Qutting, loss too high', np_loss) 114 | return -1 115 | 116 | loss.backward() 117 | self.optimizer.step() 118 | batch+=1 119 | if batch > max_batches: 120 | break 121 | return 1 122 | 123 | 124 | def test(self, test_loader, CUDA=False): 125 | """Evaluate a model.""" 126 | self.model.eval() 127 | test_loss = 0 128 | correct = 0 129 | for data, target in test_loader: 130 | if self.cuda: 131 | data, target = data.cuda(), target.cuda() 132 | data, target = Variable(data, volatile=True), Variable(target) 133 | output = self.model(data) 134 | test_loss += F.nll_loss(output, target).data[0] 135 | # get the index of the max log-probability 136 | pred = output.data.max(1)[1] 137 | correct += pred.eq(target.data).cpu().sum() 138 | 139 | test_loss /= len(test_loader) 140 | accuarcy = 100. * correct / len(test_loader.dataset) 141 | return accuarcy -------------------------------------------------------------------------------- /evolutionary_algorithms/worker.py: -------------------------------------------------------------------------------- 1 | from multiprocessing import Queue, Process 2 | import cv2 3 | import numpy as np 4 | import os 5 | import net_builder 6 | 7 | # helper class for scheduling workers 8 | class Scheduler: 9 | def __init__(self, workerids, use_cuda): 10 | self._queue = Queue() 11 | self.workerids = workerids 12 | self._results = Queue() 13 | self.use_cuda = use_cuda 14 | 15 | self.__init_workers() 16 | 17 | def __init_workers(self): 18 | self._workers = list() 19 | for wid in self.workerids: 20 | self._workers.append(CustomWorker(wid, self._queue, self._results, self.use_cuda)) 21 | 22 | 23 | def start(self, xlist): 24 | 25 | # put all of models into queue 26 | for model_info in xlist: 27 | self._queue.put(model_info) 28 | 29 | #add a None into queue to indicate the end of task 30 | self._queue.put(None) 31 | 32 | #start the workers 33 | for worker in self._workers: 34 | worker.start() 35 | 36 | # wait all fo workers finish 37 | for worker in self._workers: 38 | worker.join() 39 | 40 | print("All workers are done") 41 | returns = [] 42 | networks = [] 43 | for i in range(len(xlist)): 44 | score, net = self._results.get() 45 | returns.append(score) 46 | networks.append(net) 47 | 48 | return networks, returns 49 | 50 | class CustomWorker(Process): 51 | def __init__(self, workerid, queue, resultq, use_cuda): 52 | Process.__init__(self, name='ModelProcessor') 53 | self.workerid = workerid 54 | self.queue = queue 55 | self.resultq = resultq 56 | self.use_cuda = use_cuda 57 | from torchvision import datasets, transforms 58 | import torch.utils as utils 59 | 60 | batch_size = 64 61 | # setup our dataloaders 62 | self.train_loader = utils.data.DataLoader( 63 | datasets.MNIST('../../dataset/mnist/', train=True, download=False, 64 | transform=transforms.Compose([ 65 | transforms.ToTensor(), 66 | transforms.Normalize((0.1307,), (0.3081,)) 67 | ])), 68 | batch_size=batch_size, shuffle=True) 69 | 70 | self.test_loader = utils.data.DataLoader( 71 | datasets.MNIST('../../dataset/mnist/', train=False, transform=transforms.Compose([ 72 | transforms.ToTensor(), 73 | transforms.Normalize((0.1307,), (0.3081,)) 74 | ])), 75 | batch_size=batch_size, shuffle=True) 76 | 77 | 78 | def run(self): 79 | import torch_models 80 | while True: 81 | net = self.queue.get() 82 | if net == None: 83 | self.queue.put(None) # for other workers to consume it. 84 | break 85 | # net = net_builder.randomize_network(bounded=False) 86 | xnet = torch_models.CustomModel(net, self.use_cuda) 87 | ret = xnet.train(self.train_loader) 88 | score = -1 89 | if ret ==1: 90 | score = xnet.test(self.test_loader) 91 | print('worker_{} score:{}'.format(self.workerid, score)) 92 | self.resultq.put((score, net)) 93 | del xnet -------------------------------------------------------------------------------- /generative_model/04.Variational_Recurrent_Autoencoder.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": true, 8 | "deletable": true, 9 | "editable": true 10 | }, 11 | "outputs": [], 12 | "source": [ 13 | "import torch\n", 14 | "import torch.nn as nn\n", 15 | "import torchvision.datasets as dsets\n", 16 | "import torchvision.transforms as transforms\n", 17 | "from torch.autograd import Variable\n", 18 | "import torch.nn.functional as F\n", 19 | "import matplotlib.pyplot as plt\n", 20 | "import random\n", 21 | "import numpy as np\n", 22 | "from konlpy.tag import Mecab;tagger=Mecab()\n", 23 | "from collections import Counter\n", 24 | "%matplotlib inline " 25 | ] 26 | }, 27 | { 28 | "cell_type": "markdown", 29 | "metadata": { 30 | "deletable": true, 31 | "editable": true 32 | }, 33 | "source": [ 34 | "* https://arxiv.org/pdf/1412.6581.pdf\n", 35 | "* https://arxiv.org/pdf/1511.06349.pdf" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": 2, 41 | "metadata": { 42 | "collapsed": true, 43 | "deletable": true, 44 | "editable": true 45 | }, 46 | "outputs": [], 47 | "source": [ 48 | "data = open('../../DOMAIN_10D_300EA_DATA_170427.txt','r',encoding='utf-8').readlines()" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": 3, 54 | "metadata": { 55 | "collapsed": true, 56 | "deletable": true, 57 | "editable": true 58 | }, 59 | "outputs": [], 60 | "source": [ 61 | "data = [d.split('\\t')[0] for d in data if 'FLOWER' in d.split('\\t')[1]]" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": 4, 67 | "metadata": { 68 | "collapsed": true, 69 | "deletable": true, 70 | "editable": true 71 | }, 72 | "outputs": [], 73 | "source": [ 74 | "BATCH_SIZE = len(data)" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": 5, 80 | "metadata": { 81 | "collapsed": true, 82 | "deletable": true, 83 | "editable": true 84 | }, 85 | "outputs": [], 86 | "source": [ 87 | "SEQ_LENGTH=15\n", 88 | "SOS_token = 0\n", 89 | "EOS_token = 1" 90 | ] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": 6, 95 | "metadata": { 96 | "collapsed": true, 97 | "deletable": true, 98 | "editable": true 99 | }, 100 | "outputs": [], 101 | "source": [ 102 | "train=[]" 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": 7, 108 | "metadata": { 109 | "collapsed": true, 110 | "deletable": true, 111 | "editable": true 112 | }, 113 | "outputs": [], 114 | "source": [ 115 | "for t0 in data:\n", 116 | " t0 = t0.replace(\"
\",\"\")\n", 117 | " t0 = t0.replace(\"/\",\"\")\n", 118 | " \n", 119 | " token0 = tagger.morphs(t0)\n", 120 | " \n", 121 | " if len(token0)>=SEQ_LENGTH:\n", 122 | " token0= token0[:SEQ_LENGTH-1]\n", 123 | " token0.append(\"EOS\")\n", 124 | "\n", 125 | " while len(token0) 10)\n", 271 | " (Wsigma): Linear (100 -> 10)\n", 272 | " (embedding): Embedding(780, 100)\n", 273 | " (gru): GRU(100, 100, num_layers=2, batch_first=True)\n", 274 | ")\n" 275 | ] 276 | } 277 | ], 278 | "source": [ 279 | "encoder_test = EncoderRNN(len(word2index), 100,10,2)\n", 280 | "print(encoder_test)" 281 | ] 282 | }, 283 | { 284 | "cell_type": "code", 285 | "execution_count": 14, 286 | "metadata": { 287 | "collapsed": true, 288 | "deletable": true, 289 | "editable": true 290 | }, 291 | "outputs": [], 292 | "source": [ 293 | "out, mu,log_var = encoder_test(inputs[:3].view(3,-1))" 294 | ] 295 | }, 296 | { 297 | "cell_type": "code", 298 | "execution_count": 15, 299 | "metadata": { 300 | "collapsed": true, 301 | "deletable": true, 302 | "editable": true 303 | }, 304 | "outputs": [], 305 | "source": [ 306 | "class DecoderRNN(nn.Module):\n", 307 | " def __init__(self, hidden_size, output_size,latent_size=10, n_layers=1):\n", 308 | " super(DecoderRNN, self).__init__()\n", 309 | " \n", 310 | " self.hidden_size = hidden_size\n", 311 | " self.output_size = output_size\n", 312 | " self.n_layers = n_layers\n", 313 | " self.Wz = nn.Linear(latent_size,hidden_size)\n", 314 | " self.tanh = nn.Tanh()\n", 315 | " \n", 316 | " # Define the layers\n", 317 | " self.embedding = nn.Embedding(self.output_size, self.hidden_size)\n", 318 | "\n", 319 | " #self.dropout = nn.Dropout(self.dropout_p)\n", 320 | " self.gru = nn.GRU(self.hidden_size, self.hidden_size, self.n_layers,batch_first=True)\n", 321 | " self.out = nn.Linear(self.hidden_size, self.output_size)\n", 322 | " \n", 323 | " def forward(self, input,latent,lengths,seq_length,training=True):\n", 324 | " \n", 325 | " # Get the embedding of the current input word\n", 326 | " embedded = self.embedding(input)\n", 327 | " #embedded = self.dropout(embedded)\n", 328 | " \n", 329 | " # h0\n", 330 | " if training:\n", 331 | " hidden = self.tanh(self.Wz(latent)).view(self.n_layers,BATCH_SIZE,-1) \n", 332 | " else:\n", 333 | " hidden = self.tanh(self.Wz(latent)).view(self.n_layers,1,-1)\n", 334 | "\n", 335 | " decode=[]\n", 336 | " # Apply GRU to the output so far\n", 337 | " for i in range(seq_length):\n", 338 | " \n", 339 | " _, hidden = self.gru(embedded, hidden)\n", 340 | " score = self.out(hidden.view(hidden.size(0)*hidden.size(1),-1))\n", 341 | " softmaxed = F.log_softmax(score)\n", 342 | " decode.append(softmaxed)\n", 343 | " _,input = torch.max(softmaxed,1)\n", 344 | " embedded = self.embedding(input)\n", 345 | " #embedded = self.dropout(embedded)\n", 346 | " \n", 347 | " # 요고 주의! time-step을 column-wise concat한 후, reshape!!\n", 348 | " scores = torch.cat(decode,1)\n", 349 | " remove_list(decode)\n", 350 | " \n", 351 | " return scores.view(input.size(0)*seq_length,-1)" 352 | ] 353 | }, 354 | { 355 | "cell_type": "code", 356 | "execution_count": 16, 357 | "metadata": { 358 | "collapsed": true, 359 | "deletable": true, 360 | "editable": true 361 | }, 362 | "outputs": [], 363 | "source": [ 364 | "HIDDEN_SIZE = 50\n", 365 | "LATENT_SIZE = 10\n", 366 | "STEP=3000\n", 367 | "LEARNING_RATE=0.001" 368 | ] 369 | }, 370 | { 371 | "cell_type": "code", 372 | "execution_count": 17, 373 | "metadata": { 374 | "collapsed": false, 375 | "deletable": true, 376 | "editable": true 377 | }, 378 | "outputs": [], 379 | "source": [ 380 | "encoder = EncoderRNN(len(word2index), HIDDEN_SIZE,LATENT_SIZE, 2)\n", 381 | "decoder = DecoderRNN(HIDDEN_SIZE,len(word2index),LATENT_SIZE)\n", 382 | "Recon = nn.CrossEntropyLoss()\n", 383 | "enc_optim= torch.optim.Adam(encoder.parameters(), lr=LEARNING_RATE)\n", 384 | "dec_optim = torch.optim.Adam(decoder.parameters(),lr=LEARNING_RATE)" 385 | ] 386 | }, 387 | { 388 | "cell_type": "code", 389 | "execution_count": 66, 390 | "metadata": { 391 | "collapsed": false, 392 | "deletable": true, 393 | "editable": true 394 | }, 395 | "outputs": [ 396 | { 397 | "name": "stdout", 398 | "output_type": "stream", 399 | "text": [ 400 | "[0/3000] ELBO : 1.5520 , RECON : 1.5520 & KLD : 1282.6074\n", 401 | "[100/3000] ELBO : 1304.2341 , RECON : 1.4849 & KLD : 1302.7493\n", 402 | "[200/3000] ELBO : 1.6225 , RECON : 1.6225 & KLD : 1243.2760\n", 403 | "[300/3000] ELBO : 1.5753 , RECON : 1.5753 & KLD : 1258.8152\n", 404 | "[400/3000] ELBO : 1.5726 , RECON : 1.5726 & KLD : 1247.9152\n", 405 | "[500/3000] ELBO : 1291.0088 , RECON : 1.4193 & KLD : 1289.5895\n", 406 | "[600/3000] ELBO : 1.5714 , RECON : 1.5714 & KLD : 1278.0747\n", 407 | "[700/3000] ELBO : 1.5472 , RECON : 1.5472 & KLD : 1274.2760\n", 408 | "[800/3000] ELBO : 1.5684 , RECON : 1.5684 & KLD : 1266.4414\n", 409 | "[900/3000] ELBO : 1.6303 , RECON : 1.6303 & KLD : 1242.3254\n", 410 | "[1000/3000] ELBO : 1.5573 , RECON : 1.5573 & KLD : 1265.3756\n", 411 | "[1100/3000] ELBO : 1.5021 , RECON : 1.5021 & KLD : 1242.4119\n", 412 | "[1200/3000] ELBO : 1.5088 , RECON : 1.5088 & KLD : 1276.9618\n", 413 | "[1300/3000] ELBO : 1.5653 , RECON : 1.5653 & KLD : 1228.1581\n", 414 | "[1400/3000] ELBO : 1.5243 , RECON : 1.5243 & KLD : 1235.4465\n", 415 | "[1500/3000] ELBO : 1216.5645 , RECON : 1.3805 & KLD : 1215.1840\n", 416 | "[1600/3000] ELBO : 1.6670 , RECON : 1.6670 & KLD : 1178.4264\n", 417 | "[1700/3000] ELBO : 1.5782 , RECON : 1.5782 & KLD : 1243.0863\n", 418 | "[1800/3000] ELBO : 1.5014 , RECON : 1.5014 & KLD : 1252.0225\n", 419 | "[1900/3000] ELBO : 1.5694 , RECON : 1.5694 & KLD : 1213.0868\n", 420 | "[2000/3000] ELBO : 1.5444 , RECON : 1.5444 & KLD : 1238.0940\n", 421 | "[2100/3000] ELBO : 1.6286 , RECON : 1.6286 & KLD : 1188.8969\n", 422 | "[2200/3000] ELBO : 1.5394 , RECON : 1.5394 & KLD : 1218.5244\n", 423 | "[2300/3000] ELBO : 1.5932 , RECON : 1.5932 & KLD : 1188.1289\n", 424 | "[2400/3000] ELBO : 1261.5602 , RECON : 1.4181 & KLD : 1260.1421\n", 425 | "[2500/3000] ELBO : 1.6213 , RECON : 1.6213 & KLD : 1205.0756\n", 426 | "[2600/3000] ELBO : 1.6352 , RECON : 1.6352 & KLD : 1170.5635\n", 427 | "[2700/3000] ELBO : 1215.6169 , RECON : 1.4902 & KLD : 1214.1267\n", 428 | "[2800/3000] ELBO : 1190.9048 , RECON : 1.4956 & KLD : 1189.4092\n", 429 | "[2900/3000] ELBO : 1146.9965 , RECON : 1.4930 & KLD : 1145.5034\n" 430 | ] 431 | } 432 | ], 433 | "source": [ 434 | "for epoch in range(STEP):\n", 435 | " \n", 436 | " #KCA = 0.3\n", 437 | " encoder.zero_grad()\n", 438 | " decoder.zero_grad()\n", 439 | " \n", 440 | " decoder_input = Variable(torch.LongTensor([[SOS_token]*BATCH_SIZE])).transpose(1,0)\n", 441 | " normal = Variable(torch.randn([1,BATCH_SIZE,HIDDEN_SIZE])) \n", 442 | " latent, mu, log_var = encoder(inputs)\n", 443 | "\n", 444 | " score = decoder(decoder_input,latent,lengths,SEQ_LENGTH)\n", 445 | " recon_loss=Recon(score,targets.view(-1))\n", 446 | " kld_loss = torch.sum(0.5 * (mu**2 + torch.exp(log_var) - log_var -1))\n", 447 | " #checker.append((recon_loss,kld_loss))\n", 448 | " \n", 449 | "# KL_COST_ANNEALING\n", 450 | " if recon_loss.data.numpy()[0]<1.5:\n", 451 | " KCA = 1.0\n", 452 | "\n", 453 | " else:\n", 454 | " KCA = 0.0\n", 455 | " ELBO = recon_loss+KCA*kld_loss\n", 456 | "# ELBO = recon_loss+kld_loss\n", 457 | " loss = ELBO.data.numpy()[0]\n", 458 | " \n", 459 | " ELBO.backward()\n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " torch.nn.utils.clip_grad_norm(encoder.parameters(), 5.0)\n", 464 | " torch.nn.utils.clip_grad_norm(decoder.parameters(), 5.0)\n", 465 | " \n", 466 | " dec_optim.step()\n", 467 | " enc_optim.step()\n", 468 | " \n", 469 | " \n", 470 | " if epoch % 100==0:\n", 471 | " #kindex+=1\n", 472 | " print(\"[%d/%d] ELBO : %.4f , RECON : %.4f & KLD : %.4f\" % (epoch,STEP,ELBO.data.numpy()[0],\n", 473 | " recon_loss.data.numpy()[0],\n", 474 | " kld_loss.data.numpy()[0]))" 475 | ] 476 | }, 477 | { 478 | "cell_type": "markdown", 479 | "metadata": { 480 | "deletable": true, 481 | "editable": true 482 | }, 483 | "source": [ 484 | "## test " 485 | ] 486 | }, 487 | { 488 | "cell_type": "markdown", 489 | "metadata": { 490 | "deletable": true, 491 | "editable": true 492 | }, 493 | "source": [ 494 | "### Recon" 495 | ] 496 | }, 497 | { 498 | "cell_type": "code", 499 | "execution_count": 67, 500 | "metadata": { 501 | "collapsed": false, 502 | "deletable": true, 503 | "editable": true 504 | }, 505 | "outputs": [ 506 | { 507 | "name": "stdout", 508 | "output_type": "stream", 509 | "text": [ 510 | "Q: 결혼식 에 축화 화환 보낼려고 하 는데 시간 도 정하 면 맞출 수 있\n", 511 | "\n", 512 | "A: 문 비서 축화 화환 보낼려고 하 는데 시간 도 정하 면 맞출 수 있\n", 513 | "\n" 514 | ] 515 | } 516 | ], 517 | "source": [ 518 | "index=random.choice(range(300))\n", 519 | "latent,_,_ = encoder(inputs[index].view(1,-1))\n", 520 | "decoder_input = Variable(torch.LongTensor([[SOS_token]])).transpose(1,0)\n", 521 | "#context = Variable(torch.randn([1,1,HIDDEN_SIZE])) \n", 522 | "recon = decoder(decoder_input,latent,lengths,SEQ_LENGTH,False)\n", 523 | "\n", 524 | "v,i = torch.max(recon,1)\n", 525 | "\n", 526 | "decoded=[]\n", 527 | "for t in range(i.size()[0]):\n", 528 | " decoded.append(index2word[i.data.numpy()[t][0]])\n", 529 | " \n", 530 | "print('Q: ', ' '.join([i for i in train[index][0] if i !='PAD' and i != 'EOS'])+'\\n')\n", 531 | "print('A: ', ' '.join([i for i in decoded if i !='PAD' and i != 'EOS'])+'\\n')" 532 | ] 533 | }, 534 | { 535 | "cell_type": "markdown", 536 | "metadata": { 537 | "deletable": true, 538 | "editable": true 539 | }, 540 | "source": [ 541 | "### Generate" 542 | ] 543 | }, 544 | { 545 | "cell_type": "code", 546 | "execution_count": 195, 547 | "metadata": { 548 | "collapsed": false, 549 | "deletable": true, 550 | "editable": true 551 | }, 552 | "outputs": [ 553 | { 554 | "name": "stdout", 555 | "output_type": "stream", 556 | "text": [ 557 | "A: 화환 꽃 부탁 요청\n", 558 | "\n" 559 | ] 560 | } 561 | ], 562 | "source": [ 563 | "decoder_input = Variable(torch.LongTensor([[SOS_token]])).transpose(1,0)\n", 564 | "context = Variable(torch.randn([1,10])) \n", 565 | "recon = decoder(decoder_input,context,lengths,SEQ_LENGTH,False)\n", 566 | "\n", 567 | "v,i = torch.max(recon,1)\n", 568 | "\n", 569 | "decoded=[]\n", 570 | "for t in range(i.size()[0]):\n", 571 | " decoded.append(index2word[i.data.numpy()[t][0]])\n", 572 | "\n", 573 | "print('A: ', ' '.join([i for i in decoded if i !='PAD' and i != 'EOS'])+'\\n')" 574 | ] 575 | }, 576 | { 577 | "cell_type": "code", 578 | "execution_count": 204, 579 | "metadata": { 580 | "collapsed": true 581 | }, 582 | "outputs": [], 583 | "source": [ 584 | "compare = [' '.join(tagger.morphs(d)) for d in data]" 585 | ] 586 | }, 587 | { 588 | "cell_type": "code", 589 | "execution_count": 212, 590 | "metadata": { 591 | "collapsed": true, 592 | "deletable": true, 593 | "editable": true 594 | }, 595 | "outputs": [], 596 | "source": [ 597 | "def generate(num):\n", 598 | " result=[]\n", 599 | " counter=0\n", 600 | " while counter" 16 | ] 17 | }, 18 | "execution_count": 1, 19 | "metadata": {}, 20 | "output_type": "execute_result" 21 | } 22 | ], 23 | "source": [ 24 | "import torch\n", 25 | "import numpy as np\n", 26 | "\n", 27 | "torch.manual_seed(1)" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 73, 33 | "metadata": { 34 | "collapsed": true, 35 | "deletable": true, 36 | "editable": true 37 | }, 38 | "outputs": [], 39 | "source": [ 40 | "x = torch.Tensor([[1.,0.],[1.,1.],[0.,1.],[0.,0.]])\n", 41 | "y = torch.Tensor([[1.],[0.],[1.],[0.]])\n", 42 | "\n", 43 | "\n", 44 | "relu = torch.nn.ReLU()\n", 45 | "sigmoid = torch.nn.Sigmoid()\n", 46 | "\n", 47 | "\n", 48 | "class XOR(torch.nn.Module):\n", 49 | " \n", 50 | " def __init__(self):\n", 51 | " super(XOR,self).__init__()\n", 52 | " self.linear = torch.nn.Linear(2,2)\n", 53 | " self.linear2 = torch.nn.Linear(2,1)\n", 54 | " \n", 55 | " def forward(self,input):\n", 56 | " h = self.linear(input)\n", 57 | " h2 = self.linear2(relu(h))\n", 58 | " \n", 59 | " return h2" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": 82, 65 | "metadata": { 66 | "collapsed": true, 67 | "deletable": true, 68 | "editable": true 69 | }, 70 | "outputs": [], 71 | "source": [ 72 | "model = XOR()\n", 73 | "\n", 74 | "loss = torch.nn.MSELoss()\n", 75 | "optimizer = torch.optim.SGD(params=model.parameters(),lr=0.1)" 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": 83, 81 | "metadata": { 82 | "collapsed": true, 83 | "deletable": true, 84 | "editable": true 85 | }, 86 | "outputs": [], 87 | "source": [ 88 | "losses=[]" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": 84, 94 | "metadata": { 95 | "collapsed": true, 96 | "deletable": true, 97 | "editable": true 98 | }, 99 | "outputs": [], 100 | "source": [ 101 | "for _ in range(1000):\n", 102 | " model.zero_grad()\n", 103 | " out = model(torch.autograd.Variable(x))\n", 104 | " l = loss(out,torch.autograd.Variable(y))\n", 105 | " losses.append(l)\n", 106 | " l.backward()\n", 107 | " optimizer.step()" 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": 85, 113 | "metadata": { 114 | "collapsed": false, 115 | "deletable": true, 116 | "editable": true 117 | }, 118 | "outputs": [ 119 | { 120 | "data": { 121 | "text/plain": [ 122 | "Variable containing:\n", 123 | "1.00000e-13 *\n", 124 | " 5.3468\n", 125 | "[torch.FloatTensor of size 1]" 126 | ] 127 | }, 128 | "execution_count": 85, 129 | "metadata": {}, 130 | "output_type": "execute_result" 131 | } 132 | ], 133 | "source": [ 134 | "losses[-1]" 135 | ] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "execution_count": 86, 140 | "metadata": { 141 | "collapsed": true, 142 | "deletable": true, 143 | "editable": true 144 | }, 145 | "outputs": [], 146 | "source": [ 147 | "hy = model(torch.autograd.Variable(torch.ones(1,2)))" 148 | ] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "execution_count": 87, 153 | "metadata": { 154 | "collapsed": true, 155 | "deletable": true, 156 | "editable": true 157 | }, 158 | "outputs": [], 159 | "source": [ 160 | "hy2 = model(torch.autograd.Variable(torch.Tensor([[1.,0.]])))" 161 | ] 162 | }, 163 | { 164 | "cell_type": "code", 165 | "execution_count": 88, 166 | "metadata": { 167 | "collapsed": false, 168 | "deletable": true, 169 | "editable": true 170 | }, 171 | "outputs": [ 172 | { 173 | "data": { 174 | "text/plain": [ 175 | "Variable containing:\n", 176 | "1.00000e-06 *\n", 177 | " 1.0729\n", 178 | "[torch.FloatTensor of size 1x1]" 179 | ] 180 | }, 181 | "execution_count": 88, 182 | "metadata": {}, 183 | "output_type": "execute_result" 184 | } 185 | ], 186 | "source": [ 187 | "hy" 188 | ] 189 | }, 190 | { 191 | "cell_type": "code", 192 | "execution_count": 89, 193 | "metadata": { 194 | "collapsed": false, 195 | "deletable": true, 196 | "editable": true 197 | }, 198 | "outputs": [ 199 | { 200 | "data": { 201 | "text/plain": [ 202 | "Variable containing:\n", 203 | " 1.0000\n", 204 | "[torch.FloatTensor of size 1x1]" 205 | ] 206 | }, 207 | "execution_count": 89, 208 | "metadata": {}, 209 | "output_type": "execute_result" 210 | } 211 | ], 212 | "source": [ 213 | "hy2" 214 | ] 215 | }, 216 | { 217 | "cell_type": "code", 218 | "execution_count": null, 219 | "metadata": { 220 | "collapsed": true, 221 | "deletable": true, 222 | "editable": true 223 | }, 224 | "outputs": [], 225 | "source": [] 226 | } 227 | ], 228 | "metadata": { 229 | "kernelspec": { 230 | "display_name": "Python 3", 231 | "language": "python", 232 | "name": "python3" 233 | }, 234 | "language_info": { 235 | "codemirror_mode": { 236 | "name": "ipython", 237 | "version": 3 238 | }, 239 | "file_extension": ".py", 240 | "mimetype": "text/x-python", 241 | "name": "python", 242 | "nbconvert_exporter": "python", 243 | "pygments_lexer": "ipython3", 244 | "version": "3.5.2" 245 | } 246 | }, 247 | "nbformat": 4, 248 | "nbformat_minor": 2 249 | } 250 | -------------------------------------------------------------------------------- /tutorial/01.Linear_Regression.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 7, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import torch\n", 12 | "import torch.nn as nn\n", 13 | "import numpy as np\n", 14 | "import matplotlib.pyplot as plt\n", 15 | "from torch.autograd import Variable\n", 16 | "\n", 17 | "%matplotlib inline" 18 | ] 19 | }, 20 | { 21 | "cell_type": "markdown", 22 | "metadata": {}, 23 | "source": [ 24 | "# Hyper Parameters" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": 2, 30 | "metadata": { 31 | "collapsed": true 32 | }, 33 | "outputs": [], 34 | "source": [ 35 | "input_size = 1\n", 36 | "output_size = 1\n", 37 | "num_epochs = 60\n", 38 | "learning_rate = 0.001" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 3, 44 | "metadata": { 45 | "collapsed": true 46 | }, 47 | "outputs": [], 48 | "source": [ 49 | "# Toy Dataset \n", 50 | "x_train = np.array([[3.3], [4.4], [5.5], [6.71], [6.93], [4.168], \n", 51 | " [9.779], [6.182], [7.59], [2.167], [7.042], \n", 52 | " [10.791], [5.313], [7.997], [3.1]], dtype=np.float32)\n", 53 | "\n", 54 | "y_train = np.array([[1.7], [2.76], [2.09], [3.19], [1.694], [1.573], \n", 55 | " [3.366], [2.596], [2.53], [1.221], [2.827], \n", 56 | " [3.465], [1.65], [2.904], [1.3]], dtype=np.float32)" 57 | ] 58 | }, 59 | { 60 | "cell_type": "markdown", 61 | "metadata": {}, 62 | "source": [ 63 | "# 모델링" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": 4, 69 | "metadata": { 70 | "collapsed": true 71 | }, 72 | "outputs": [], 73 | "source": [ 74 | "class LinearRegression(nn.Module):\n", 75 | " def __init__(self, input_size, output_size):\n", 76 | " super(LinearRegression, self).__init__()\n", 77 | " self.linear = nn.Linear(input_size, output_size) \n", 78 | " \n", 79 | " def forward(self, x):\n", 80 | " out = self.linear(x)\n", 81 | " return out" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": 5, 87 | "metadata": { 88 | "collapsed": true 89 | }, 90 | "outputs": [], 91 | "source": [ 92 | "model = LinearRegression(input_size, output_size)\n", 93 | "\n", 94 | "# Loss and Optimizer\n", 95 | "criterion = nn.MSELoss()\n", 96 | "optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate) " 97 | ] 98 | }, 99 | { 100 | "cell_type": "markdown", 101 | "metadata": {}, 102 | "source": [ 103 | "# 트레이닝 " 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": 6, 109 | "metadata": { 110 | "collapsed": false 111 | }, 112 | "outputs": [ 113 | { 114 | "name": "stdout", 115 | "output_type": "stream", 116 | "text": [ 117 | "Epoch [5/60], Loss: 1.4682\n", 118 | "Epoch [10/60], Loss: 0.7188\n", 119 | "Epoch [15/60], Loss: 0.4151\n", 120 | "Epoch [20/60], Loss: 0.2920\n", 121 | "Epoch [25/60], Loss: 0.2421\n", 122 | "Epoch [30/60], Loss: 0.2218\n", 123 | "Epoch [35/60], Loss: 0.2135\n", 124 | "Epoch [40/60], Loss: 0.2101\n", 125 | "Epoch [45/60], Loss: 0.2087\n", 126 | "Epoch [50/60], Loss: 0.2080\n", 127 | "Epoch [55/60], Loss: 0.2077\n", 128 | "Epoch [60/60], Loss: 0.2075\n" 129 | ] 130 | } 131 | ], 132 | "source": [ 133 | "# Train the Model \n", 134 | "for epoch in range(num_epochs):\n", 135 | " # Convert numpy array to torch Variable\n", 136 | " inputs = Variable(torch.from_numpy(x_train))\n", 137 | " targets = Variable(torch.from_numpy(y_train))\n", 138 | "\n", 139 | " # Forward + Backward + Optimize\n", 140 | " optimizer.zero_grad() \n", 141 | " outputs = model(inputs)\n", 142 | " loss = criterion(outputs, targets)\n", 143 | " loss.backward()\n", 144 | " optimizer.step()\n", 145 | " \n", 146 | " if (epoch+1) % 5 == 0:\n", 147 | " print ('Epoch [%d/%d], Loss: %.4f' \n", 148 | " %(epoch+1, num_epochs, loss.data[0]))" 149 | ] 150 | }, 151 | { 152 | "cell_type": "code", 153 | "execution_count": 8, 154 | "metadata": { 155 | "collapsed": false 156 | }, 157 | "outputs": [ 158 | { 159 | "data": { 160 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAD8CAYAAACMwORRAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3Xl8VOXd9/HPjxAJARRFrAiGiRAVBAkQRYoriyLghhst\ntdXHlrpU6V1FqaGKIgjVG7WPKHdcit6m+igKLrjLKioaEAQCisgAUVRAWWJAAlzPHxMGZkjIhMzk\nzPJ9v155Tc41V+b8GJJvrlznnOuYcw4REUku9bwuQEREok/hLiKShBTuIiJJSOEuIpKEFO4iIklI\n4S4ikoQU7iIiSUjhLiKShBTuIiJJqL5XOz7yyCOdz+fzavciIglp/vz5G5xzzavr51m4+3w+ioqK\nvNq9iEhCMrPVkfTTtIyISBJSuIuIJCGFu4hIEvJszr0y5eXllJSUsH37dq9LESAjI4NWrVqRnp7u\ndSkiUkNxFe4lJSU0adIEn8+HmXldTkpzzrFx40ZKSkrIzs72uhwRqaG4mpbZvn07zZo1U7DHATOj\nWbNm+itKJEHFVbgDCvY4ov8LkcQVd+EuIpKstpfvYvy7X/Ltpm0x35fCPUxJSQkXXXQROTk5tGnT\nhqFDh7Jjx45K+3777bdcdtll1b5mv3792LRp00HVM3LkSB544IFq+zVu3PiAz2/atIlHH330oGoQ\nkdp7oWgtJ/7jLf71/gpmf7k+5vtL7HAvLASfD+rVCzwWFtbq5ZxzDBw4kIsvvpgVK1bw5ZdfUlpa\nSn5+/n59d+7cyTHHHMPkyZOrfd033niDpk2b1qq22lK4i3hj87ZyfMOncdvkzwG4OPcYBp2aFfP9\nJm64FxbCkCGwejU4F3gcMqRWAT99+nQyMjK45pprAEhLS+PBBx/kqaeeoqysjEmTJnHhhRfSs2dP\nevXqhd/vp0OHDgCUlZVxxRVX0L59ey655BK6desWXF7B5/OxYcMG/H4/7dq1409/+hMnnXQS5557\nLtu2Bf48e/zxxznllFPo1KkTl156KWVlZQesddWqVXTv3p2OHTsyYsSIYHtpaSm9evWiS5cudOzY\nkVdeeQWA4cOHs3LlSnJzcxk2bFiV/UQkeibOWkmnu98Jbs8edg4PDepcJ/tO3HDPz4fwACwrC7Qf\npKVLl9K1a9eQtkMPPZSsrCy++uorABYsWMDkyZOZNWtWSL9HH32Uww8/nOLiYkaNGsX8+fMr3ceK\nFSu48cYbWbp0KU2bNuWll14CYODAgXz66acsWrSIdu3a8eSTTx6w1qFDh3L99dezePFiWrRoEWzP\nyMhgypQpLFiwgBkzZnDLLbfgnGPs2LG0adOGhQsXcv/991fZT0Rq74ct2/ENn8bYN5cD8Oczj8M/\ntj9ZzTLrrIa4Os+9RtasqVl7lPTp04cjjjhiv/YPPviAoUOHAtChQwdOPvnkSr8+Ozub3NxcALp2\n7Yrf7wdgyZIljBgxgk2bNlFaWsp55513wDrmzp0b/MVw1VVXcfvttwOBqaU77riD2bNnU69ePb75\n5hu+//77/b6+qn5HH310ZG+EiFRq1OvFPPnBquD2p/m9ad6kQZ3XkbjhnpUVmIqprP0gtW/ffr85\n9C1btrBmzRratm3LggULaNSo0UG/PkCDBnv/k9PS0oLTMldffTVTp06lU6dOTJo0iZkzZ1b7WpWd\nqlhYWMj69euZP38+6enp+Hy+Ss9Vj7SfiETGv+Fnzn5gZnA7v187/nTmcZ7Vk7jTMqNHQ2bYnziZ\nmYH2g9SrVy/Kysp45plnANi1axe33HILV199NZnh+wrTo0cPXnjhBQCKi4tZvHhxjfa9detWWrRo\nQXl5OYURHDfo0aMHzz//PEBI/82bN3PUUUeRnp7OjBkzWF3xC7BJkyZs3bq12n4iUnM3PfdZSLB/\nPvJcT4MdEjncBw+GggJo3RrMAo8FBYH2g2RmTJkyhRdffJGcnByOP/54MjIyGDNmTLVfe8MNN7B+\n/Xrat2/PiBEjOOmkkzjssMMi3veoUaPo1q0bPXr04MQTT6y2/8MPP8yECRPo2LEj33zzTbB98ODB\nFBUV0bFjR5555pngazVr1owePXrQoUMHhg0bVmU/EYnckm824xs+jdcWfQvAA5d3wj+2P4dmeL8e\nk3l1EC0vL8+F36xj2bJltGvXzpN6amvXrl2Ul5eTkZHBypUr6d27N1988QWHHHKI16XVSiL/n4jE\nyu7djkEFH/OJ/0cADs9M56O/9yIjPS3m+zaz+c65vOr6Je6ce5wpKyvjnHPOoby8HOccjz76aMIH\nu4js78OVG/jt4/OC209dnUfPE3/lYUWVU7hHSZMmTXTbQJEkVr5rN73Hz2L1xsAp2Cce3YRpN59B\nWr34XINJ4S4iUo23lqzjumcXBLcnX9edPN/+p0THE4W7iEgVtu3YRedR77C9fDcAZx7fnKevOSUh\nVkxVuIuIVOI/89Zwx5S9pzS//dczOeHoJh5WVDMKdxGRfWwq20HuPe8Gty/v2or7L+/kYUUHJ3HP\nc4+RtLQ0cnNzgx9+v5+ioiJuvvlmAGbOnMmHH34Y7D916lSKi4trvJ+qlujd0x7pcsIiEj2PTF8R\nEuxzbjsnIYMdNHLfT8OGDVm4cGFIm8/nIy8vcFrpzJkzady4Mb/+9a+BQLgPGDCA9u3bR7WOSJcT\nFpHa+27zdk677/3g9o3ntGHYeYl9YZ9G7hGYOXMmAwYMwO/3M3HiRB588EFyc3OZNWsWr776KsOG\nDSM3N5eVK1eycuVK+vbtS9euXTnjjDNYvjywKlxVS/RWZd/lhCdNmsTAgQPp27cvOTk53HbbbcF+\n77zzDt27d6dLly5cfvnllJaWxuZNEElSd72yJCTY54/onfDBDnE8cr/7taUUf7slqq/Z/phDueuC\nkw7YZ9u2bcFVG7Ozs5kyZUrwOZ/Px3XXXUfjxo259dZbAbjwwgsZMGBAcAqlV69eTJw4kZycHObN\nm8cNN9zA9OnTg0v0/v73v2fChAk1rn3hwoV89tlnNGjQgBNOOIGbbrqJhg0bcu+99/Lee+/RqFEj\nxo0bx/jx47nzzjtr/PoiqWbl+lJ6/ffepbvvHNCe/3N6tocVRVfchrtXKpuWiVRpaSkffvghl19+\nebDtl19+AapeojdSvXr1Cq5V0759e1avXs2mTZsoLi6mR48eAOzYsYPu3bsfVO0iqcI5x/XPLuCt\npd8F25bcfR6NGyRXHFb7rzGzDGA20KCi/2Tn3F1hfa4G7gf2rGD1iHPuidoUVt0IOx7t3r2bpk2b\nVvnLoTbnxoYvFbxz506cc/Tp04fnnnvuoF9XJJV8XrKJCx+ZG9x+eFAuF+W29LCi2Ilkzv0XoKdz\nrhOQC/Q1s9Mq6ff/nHO5FR+1CvZ4Fr507r7bhx56KNnZ2bz44otAYISwaNEioOolemvjtNNOY+7c\nucG7RP388898+eWXUXltkWSye7fj4glzg8F+VJMGfHFv36QNdogg3F3AnqN06RUfKXs/tgsuuIAp\nU6aQm5vLnDlzGDRoEPfffz+dO3dm5cqVFBYW8uSTT9KpUydOOumk4L1Jq1qitzaaN2/OpEmT+M1v\nfsPJJ59M9+7dgwdwRSTgP/PWcNwdb7Bw7SYAJl1zCp/k96ZB/div4OiliJb8NbM0YD7QFpjgnLs9\n7PmrgfuA9cCXwH8559ZW8jpDgCEAWVlZXcNvEKHlZeOP/k8kUZXt2En7O98ObndseRhTb+zh7UJf\nhYWB+zyvWRO4a9zo0TW+B0WkS/5GdCqkc26Xcy4XaAWcamYdwrq8BviccycD7wJPV/E6Bc65POdc\nXvPmzSPZtYhIjd1QOD8k2Ede0J7Xbjrd+2AfMiRwe1DnAo9DhgTaY6BGh4edc5vMbAbQF1iyT/vG\nfbo9AfwzOuWJiERuQ+kv5N37Xkjbqvv6xcdCX/n5UFYW2lZWFmivxR3kqhLJ2TLNgfKKYG8I9AHG\nhfVp4ZxbV7F5IbDsYAtyzsXHf4Tg1V26RA5G34dms/y7vSc7PDa4C+d3bOFhRWHWrKlZey1FMnJv\nATxdMe9eD3jBOfe6md0DFDnnXgVuNrMLgZ3Aj8DVB1NMRkYGGzdupFmzZgp4jznn2LhxIxkZGV6X\nInJAX68vpec+FyMB+Mf296iaA8jKCkzFVNYeA3F1D9Xy8nJKSkrYvn27JzVJqIyMDFq1akV6uvc3\n+xWpjG/4tJDtl67vTtfWcXoTjT1z7vtOzWRmQkFBjaZlEvIequnp6WRnJ8/lvyISG/NX/8ilj30U\n0haXo/V97QnwWp4tE6m4CncRkeqEj9bfv+Us2jSvfAntaJx6GFWDB9fZ/hXuIpIQwu9jmnNUY979\n21lVf0H4NMieUw/B24CvI3E15y4iEs45R/bf3whp+zS/N82bNKjiKyr4fJUfwGzdGvz+qNVX1xJy\nzl1EZF//nruKu1/be6ez8zsczWO/6xrZF9fxqYfxRuEuInGnfNducvLfDGkrvuc8Mg+pQWTV8amH\n8UZ3YhKRuHLPa8UhwX7dWW3wj+1fs2CHwMHTzMzQtszMQHsK0MhdROJC6S876XDX2yFtX40+n/pp\nBzkGreNTD+ONwl1EPHftpE95f/kPwe1RF3fgqtNa1/6F6/DUw3ijcBcRz/ywZTunjnk/pC1uFvpK\ncAp3EfHEWffPYPXGvZfiP/H7PHq3/5WHFSUXhbuI1KkV32+lz4OzQ9rifumABKSzZURqorAwcHFM\nvXqBxxjdaCFZ+YZPCwn2qTf2ULDHiEbuIpFK8cvZa+PjrzcyqODj4HaD+vX44t7zPawo+Wn5AZFI\nJenl7LEWvtDXrGFn07pZI4+qSXxafkAk2lL8cvaaem3Rt9z03GfB7Y4tD+O1m073sKLUonAXiVSK\nX84eqcoW+lrwjz4c0egQjypKTTqgKhKpFL+cPRL/M2tlSLBfnHsM/rH9Fewe0MhdJFIpfjn7gezY\nuZvjR4Qu9LV8VF8y0tM8qkgU7iI1kcKXs1dlxNTFPPvx3uMON/fK4W99jvewIgGFu4gcpC3byzl5\n5DshbSvH9COtnpYOiAcKdxGpsd89MY8PvtoQ3B53aUeuPEUHluOJwl1EIrZu8za63zc9pE1XmMYn\nhbuIRKTbmPf4fssvwe1J15zC2Scc5WFFciAKdxE5oGXrtnD+w3NC2jRaj3/VhruZZQCzgQYV/Sc7\n5+4K69MAeAboCmwErnTO+aNerYjUqfClA16/6XQ6tDzMo2qkJiK5iOkXoKdzrhOQC/Q1s9PC+lwL\n/OScaws8CIyLbpkiUpfmfrUhJNgPa5iOf2x/BXsCqXbk7gIri5VWbKZXfISvNnYRMLLi88nAI2Zm\nzqtVyUTkoIWP1ufcdg7HHpFZRW+JVxEtP2BmaWa2EPgBeNc5Ny+sS0tgLYBzbiewGWhWyesMMbMi\nMytav3597SoXkah6eUFJSLCf4jsc/9j+CvYEFdEBVefcLiDXzJoCU8ysg3NuSU135pwrAAogsORv\nTb9eRKJv927HcXeELvS16M5zOSwz3aOKJBpqtHCYc24TMAPoG/bUN8CxAGZWHziMwIFVEYljj0xf\nERLsV+S1wj+2v4I9CURytkxzoNw5t8nMGgJ92P+A6avAH4CPgMuA6ZpvF4lf28t3ceI/3gpp00Jf\nySWSkXsLYIaZfQ58SmDO/XUzu8fMLqzo8yTQzMy+Av4GDI9NuSJSY2H3fb1t3MshwX7rucfjH9tf\nwZ5kIjlb5nOgcyXtd+7z+Xbg8uiWJiK1ts99XzdlNCZ30AT4ae/TX4/pRz0t9JWUdIWqSDLLz4ey\nMny3vx7S/OBHk7hk1oseFSV1QeEuksSKt9WjX1iw+8cNANNoPdkp3EWSlG/4NLjm/wa3x775LwZ9\nXrH+uu77mvR0D1WRWAk7kElhYZ3sdvry7/e7ytQ/bsDeYNd9X1OCRu4isbDPgUwAVq8ObENMb9MX\nHurPXtuN0z95G1q31n1fU4x5dTp6Xl6eKyoq8mTfIjHn8wUCPVzr1uD3R313k+auYuRrxSFtWpY3\nOZnZfOdcXnX9NHIXiYU1a2rWfpCcc2T/PXTpgHf/60xyftUkqvuRxKNwF4mFrKzKR+5RPJD5j6lL\n+N+PQ/eh0brsoQOqqcKjg3spa/TowIHLfUXpQObOXbvxDZ8WEuxFI3or2CWERu6pwKODeyltz/ua\nnx/VA5kXT5jLwrWbgtstmzZk7vCetXpNSU46oJoK6vjgnkTfprId5N7zbkibFvpKTTqgKnvV0cE9\niY3w0xvbtTiUN4ee4VE1kigU7qmgDg7uSfR99UMpvcfPCmnTQl8SKYV7Khg9OnTOHXSVYpwLH633\nPeloJl7V1aNqJBEp3FNBjA7uSfTN/nI9v3/qk5A2nQUjB0PhnioGD1aYx7nw0fqt5x7PX3rmeFSN\nJDqFu4jHnv7Qz12vLg1p02hdakvhLuKh8NH6xN91oW+HFh5VI8lE4S7igb+//DnPfbI2pE2jdYkm\nLT8gyS+Oll5wzuEbPi0k2F+/6XQFu0SdRu6S3OJo6YW+D81m+XdbQ9oU6hIrWn5AklscLL3wy85d\nnDDirZC2T+7oxVGHZtTJ/iW5aPkBEfB86YXwA6ag0brUDc25S3KraomFGC+9sKH0l/2CffmovrUL\n9jg6diDxTyN3SW4eLL0QHurZRzZixq1n1+5F4+jYgSSGakfuZnasmc0ws2IzW2pmQyvpc7aZbTaz\nhRUfd8amXJEaGjwYCgoCc+xmgceCgpgE4oI1P+0X7Kvu61f7YIfA0hH7/oKCwHZ+fu1fW5JSJCP3\nncAtzrkFZtYEmG9m7zrnisP6zXHODYh+iSK1VAdLL4SH+kW5x/DwoM7R24GWbZYaqjbcnXPrgHUV\nn281s2VASyA83EVSzotFaxk2+fOQtpgcMNWyzVJDNTqgamY+oDMwr5Knu5vZIjN708xOquLrh5hZ\nkZkVrV+/vsbFisQT3/BpIcF+7enZsTsTJob3ZJXkFPEBVTNrDLwE/NU5tyXs6QVAa+dcqZn1A6YC\n+y1n55wrAAogcJ77QVct4qG7XlnC0x+FjqJjfnqjlm2WGoroIiYzSwdeB952zo2PoL8fyHPObaiq\njy5ikkQUPrc+/opODOzSyqNqJBVF7SImMzPgSWBZVcFuZkcD3zvnnJmdSmC6Z2MNaxaJW/0enkPx\nutA/WHUxksSzSKZlegBXAYvNbGFF2x1AFoBzbiJwGXC9me0EtgGDnFfrGohE0e7djuPueCOkbeqN\nPcg9tqlHFYlEJpKzZT4ADnhHXufcI8Aj0SpKJB5o6QBJZLpCVSTMz7/s5KS73g5pm3dHL36lhb4k\ngSjcRfah0bokC4W7CLD2xzLO+OeMkLblo/qSkZ7mUUUitaNwl5Sn0bokI4W7pKyPVm7kN49/HNK2\n6r5+BM7+FUlsCndJSeGj9V+3acZ//nSaR9WIRJ/CXVLKMx/5ufOVpSFtmoKRZKRwl5QRPlq/qWdb\nbjn3BI+qEYkthbskvYfe+5KH3lsR0qbRuiQ7hbsktfDR+oTfdqH/yS08qkak7ijcJSn98eki3lv2\nfUibRuuSShTuklR27Xa0CVvoa/otZ3Fc88YeVSTiDYW7JI3O97zDT2XlIW0arUuqUrhLwiv9ZScd\nwhb6WnTnuRyWme5RRSLeU7hLQtPSASKVU7hLQir5qYzTx4Uu9LVi9Pmkp9Xonu8iSUvhLgknfLR+\nqu8IXriuu0fViMQnhbskjPmrf+TSxz4KadMUjEjlFO6SEMJH6388PZsRA9p7VI1I/NMEpXivsBB8\nPqhXL/BYWBh86uUFJfsFu39sfwW7SDU0chdvFRbCkCFQVhbYXr06sA34FjcN6frPy07mirxj67pC\nkYSkcBdv5efvDfYK951yBf8TFuyaWxepGYW7eGvNmpBN3+2vh2y/8OfunJp9RF1WJJIUFO7iraws\nWL2a3145mg99nUKe0mhd5OAp3MVTO+8dTdsloVMwc56+kWMfGO1RRSLJodqzZczsWDObYWbFZrbU\nzIZW0sfM7F9m9pWZfW5mXWJTriSTnPw39gt2//MVwT54sEdViSSHSEbuO4FbnHMLzKwJMN/M3nXO\nFe/T53wgp+KjG/BYxaPIfjZvK6fT3e+EtC0eeS5NMtJBUzEiUVFtuDvn1gHrKj7fambLgJbAvuF+\nEfCMc84BH5tZUzNrUfG1IkHh56w3blCfJXef51E1IsmrRnPuZuYDOgPzwp5qCazdZ7ukoi0k3M1s\nCDAEICsrq2aVSkL7bvN2Trvv/ZC2lWP6kVbPPKpIJLlFHO5m1hh4Cfirc27LwezMOVcAFADk5eW5\ng3kNSTzho/WzT2jOpGtO9agakdQQUbibWTqBYC90zr1cSZdvgH0vHWxV0SYpbOm3m+n/rw9C2nR6\no0jdqDbczcyAJ4FlzrnxVXR7FfiLmT1P4EDqZs23p7bw0fq4Szty5SmaihOpK5GM3HsAVwGLzWxh\nRdsdQBaAc24i8AbQD/gKKAOuiX6pkgjeX/Y91z5dFNKm0bpI3YvkbJkPgAMe9ao4S+bGaBUliSl8\ntF74x270aHukR9WIpDZdoSq19u+5q7j7teKQNo3WRbylcJeD5pwj++9vhLS997czaXtUE48qEpE9\nFO5yUEZMXcyzH4eu6KjRukj8ULhLjezctZu2+W+GtBWN6M2RjRt4VJGIVEbhLhG79LEPmb/6p+D2\nsUc0ZM5tPT2sSESqonCXam3dXk7HkaELfS0f1ZeM9DSPKhKR6ijc5YBy8t+gfNfelSLO73A0j/2u\nq4cViUgkFO5SqZKfyjh93IyQtq/H9KOeFvoSSQgKd9lP+MVIN/fK4W99jveoGhE5GAp3CVq0dhMX\nTZgb0qbTG0USk8JdgP1H6w9dmcvFnVt6VI2I1JbCPcW9tWQd1z27IKRNo3WRxFftDbKlBgoLweeD\nevUCj4WFXld0QL7h00KC/YU/d1ew11aCfQ9I8tLIPVoKC2HIECgrC2yvXh3YBhg82Lu6KjFx1krG\nvrk8pE2hHgUJ9D0gyc8Cq/XWvby8PFdUVFR9x0Th8wV+mMO1bg1+f11XU6nKFvqacevZZB/ZyKOK\nkkwCfA9I4jOz+c65vOr6aVomWtasqVl7HbvlhUX7Bbt/bP+6C/ZUmK6I8+8BSS2alomWrKzKR21Z\n3t5absfO3Rw/InShr4V39qFp5iF1V0SqTFfE6feApCaN3KNl9GjIzAxty8wMtHvk/IfnhAT7iUc3\nwT+2f90GO0B+/t5g36OsLNCeTOLwe0BSl0bu0bJnBJqfH/gzPCsr8EPtwch0c1k5ne4JXejri3v7\n0qC+Rwt9pcp0RRx9D4jogGqSCb8Y6ZLOLXnwylyPqqmgA40iURPpAVWN3JPED1u3c+ro90PaVt3X\nD7M4WOhr9OjQOXfQdIVIjGnOPQn0+u+ZIcF+26Kp+Dtuio9gh8C0REFBYKRuFngsKNB0hUgMaeSe\nwL76oZTe42eFtPnHDQh8Mvs/gcd4CdDBg+OnFpEUoHBPUOFz6y/97610/Xafq073nI2iQBVJSQr3\nBPOp/0cun/hRcNsMVo27ACo7MJ5sZ6OISMSqDXczewoYAPzgnOtQyfNnA68AqyqaXnbO3RPNIiUg\nfLQeXDrgOV08IyKhIjmgOgnoW02fOc653IoPBXuUTft8XUiw77kYKbh0gC6eEZEw1Y7cnXOzzcwX\n+1IkXGULfRWN6M2RjRuEdtTFMyISJlpz7t3NbBHwLXCrc25pZZ3MbAgwBCBLUwYH9MScr7l32rLg\ndv+OLZgwuEvVX6CzUURkH9EI9wVAa+dcqZn1A6YCOZV1dM4VAAUQuEI1CvtOOuW7dpOTH7rQV/E9\n55F5iI59i0jkan0Rk3Nui3OutOLzN4B0Mzuy1pWloJGvLg0J9hvOboN/bH8Fu4jUWK1Tw8yOBr53\nzjkzO5XAL4yNta4shWzdXk7HkaELfa0c04+0enFyhamIJJxIToV8DjgbONLMSoC7gHQA59xE4DLg\nejPbCWwDBjmvViNLQH946hNmfbk+uD3mko78tpuOR4hI7URytsxvqnn+EeCRqFWUIr7bvJ3T7ovT\nhb5EJOFpMtcDp4+bTslP24LbT/4hj17tfuVhRSKSbBTudejL77dy7oOzQ9r8Y/t7VI2IJDOFex0J\nXzrglRt70OnYph5VIyLJTuEeYx+u3MBvH58X3G50SBpL76luNQcRkdpRuMdQ+Gh99rBzyGqWWUVv\nEZHo0Z2YaqqwMHBP0Hr1Ao+Fhft1eWXhNyHB3unYpvjH9lewi0id0ci9JgoLQ+8Funp1YBtg8OBK\nF/r67B99OLzRIXVcqIikOo3cayI/P/QmzxC849ErC78JCfaBnVviH9tfwS4intDIvSYqubNReb00\ncgZNgOcXBtu+uLcvDeqn1WVlIiIhNHKvibBligtOvYScYa8Et++/7GT8Y/sr2EXEc4kV7hEczIyp\nijse/Zyege/21xlzzrXBp74e04/L846t23pERKqQONMy1RzMrBODBzP5p3RuLWkUbPq3r5Rzrruy\nbvYvIhIh82oBx7y8PFdUVBT5F/h8ld8EunVr8PujVVaVtmwv5+R9luVtmJ7GslG6GElE6paZzXfO\n5VXXL3FG7pUczDxgexQVzF7JmDeWB7dn3no2viMbHeArRES8lTjhnpVV+cg9hvdi/WHrdk4dvXdZ\n3mtPz+YfA9rHbH8iItGSOOE+enTonDtAZmagPRa7m1bM43NWBbc/uaMXRx2aEZN9iYhEW+KE+56D\npvn5gamYrKxAsEf5YOrqjT9z1v0zg9u39z2R689uE9V9iIjEWuKEOwSCPIZnxgx9/jNeWfhtcHvR\nXedyWMP0mO1PRCRWEivcY2Tpt5vp/68Pgtv/vOxkrtA56yKSwFI63J1zDCr4mHmrfgSgSUZ9Ps3v\nTUa6rjAVkcSWsuH+8dcbGVTwcXD78d/n0ae97mMqIskh5cJ9567d9HlwNqs2/AxA26Ma89bQM6if\nllgrMYiIHEhKhftbS77jumfnB7df+HN3Ts0+wsOKRERiIyXCfXv5LrqMepeyHbsA6NG2Gc9e2w0z\n87gyEZEA8lcQAAAE+klEQVTYSPpw/3+fruH2lxYHt98cegbtWhzqYUUiIrFXbbib2VPAAOAH51yH\nSp434GGgH1AGXO2cWxDtQmtqc1k5ne7Zu9DXwC4tGX9FrocViYjUnUhG7pOAR4Bnqnj+fCCn4qMb\n8FjFo2cmzPiK+9/+Irg957ZzOPYI3ZxaRFJHteHunJttZr4DdLkIeMYF1g7+2MyamlkL59y6KNUY\nse+3bKfbmL0LfV13VhuGn39iXZchIuK5aMy5twTW7rNdUtFWp+E+8tWlTPrQH9z+NL83zZs0qMsS\nRETiRp0eUDWzIcAQgKwoLdW7asPPnPPAzOD2iP7t+OMZx0XltUVEElU0wv0bYN+FWFpVtO3HOVcA\nFEDgTky12alzjr/85zOmLd77B8LikefSJEMLfYmIRCPcXwX+YmbPEziQujnW8+2LSzZzwSN7F/oa\nf0UnBnZpFctdiogklEhOhXwOOBs40sxKgLuAdADn3ETgDQKnQX5F4FTIa2JVLMDaH8uCwd6s0SHM\nHd5TC32JiISJ5GyZ31TzvANujFpF1WjcoD492jbj2tOz6XmiFvoSEalMwl2henijQyj842lelyEi\nEte0FKKISBJSuIuIJCGFu4hIElK4i4gkIYW7iEgSUriLiCQhhbuISBJSuIuIJCELXGDqwY7N1gOr\nI+h6JLAhxuUkIr0vVdN7Uzm9L1VLpPemtXOueXWdPAv3SJlZkXMuz+s64o3el6rpvamc3peqJeN7\no2kZEZEkpHAXEUlCiRDuBV4XEKf0vlRN703l9L5ULenem7ifcxcRkZpLhJG7iIjUUFyGu5kda2Yz\nzKzYzJaa2VCva4onZpZmZp+Z2ete1xJPzKypmU02s+VmtszMuntdU7wws/+q+FlaYmbPmVmG1zV5\nxcyeMrMfzGzJPm1HmNm7Zrai4vFwL2uMhrgMd2AncItzrj1wGnCjmbX3uKZ4MhRY5nURcehh4C3n\n3IlAJ/QeAWBmLYGbgTznXAcgDRjkbVWemgT0DWsbDrzvnMsB3q/YTmhxGe7OuXXOuQUVn28l8EPa\n0tuq4oOZtQL6A094XUs8MbPDgDOBJwGcczucc5u8rSqu1Acamll9IBP41uN6POOcmw38GNZ8EfB0\nxedPAxfXaVExEJfhvi8z8wGdgXneVhI3HgJuA3Z7XUicyQbWA/+umLJ6wswaeV1UPHDOfQM8AKwB\n1gGbnXPveFtV3PmVc25dxeffAQl/g+a4Dnczawy8BPzVObfF63q8ZmYDgB+cc/O9riUO1Qe6AI85\n5zoDP5MEf1pHQ8X88UUEfgEeAzQys995W1X8coFTCBP+NMK4DXczSycQ7IXOuZe9ridO9AAuNDM/\n8DzQ08ye9bakuFEClDjn9vyFN5lA2Av0BlY559Y758qBl4Ffe1xTvPnezFoAVDz+4HE9tRaX4W5m\nRmDudJlzbrzX9cQL59zfnXOtnHM+AgfEpjvnNAIDnHPfAWvN7ISKpl5AsYclxZM1wGlmllnxs9UL\nHWwO9yrwh4rP/wC84mEtURGX4U5ghHoVgZHpwoqPfl4XJXHvJqDQzD4HcoExHtcTFyr+mpkMLAAW\nE/i5T7orMiNlZs8BHwEnmFmJmV0LjAX6mNkKAn/pjPWyxmjQFaoiIkkoXkfuIiJSCwp3EZEkpHAX\nEUlCCncRkSSkcBcRSUIKdxGRJKRwFxFJQgp3EZEk9P8BcTOMVCem+U4AAAAASUVORK5CYII=\n", 161 | "text/plain": [ 162 | "" 163 | ] 164 | }, 165 | "metadata": {}, 166 | "output_type": "display_data" 167 | } 168 | ], 169 | "source": [ 170 | "# Plot the graph\n", 171 | "predicted = model(Variable(torch.from_numpy(x_train))).data.numpy()\n", 172 | "plt.plot(x_train, y_train, 'ro', label='Original data')\n", 173 | "plt.plot(x_train, predicted, label='Fitted line')\n", 174 | "plt.legend()\n", 175 | "plt.show()" 176 | ] 177 | }, 178 | { 179 | "cell_type": "code", 180 | "execution_count": 9, 181 | "metadata": { 182 | "collapsed": true 183 | }, 184 | "outputs": [], 185 | "source": [ 186 | "# Save the Model\n", 187 | "torch.save(model.state_dict(), 'model.pkl')" 188 | ] 189 | }, 190 | { 191 | "cell_type": "code", 192 | "execution_count": null, 193 | "metadata": { 194 | "collapsed": true 195 | }, 196 | "outputs": [], 197 | "source": [] 198 | } 199 | ], 200 | "metadata": { 201 | "kernelspec": { 202 | "display_name": "Python 3", 203 | "language": "python", 204 | "name": "python3" 205 | }, 206 | "language_info": { 207 | "codemirror_mode": { 208 | "name": "ipython", 209 | "version": 3 210 | }, 211 | "file_extension": ".py", 212 | "mimetype": "text/x-python", 213 | "name": "python", 214 | "nbconvert_exporter": "python", 215 | "pygments_lexer": "ipython3", 216 | "version": "3.5.2" 217 | } 218 | }, 219 | "nbformat": 4, 220 | "nbformat_minor": 2 221 | } 222 | -------------------------------------------------------------------------------- /tutorial/02.Logistic_Regression.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 6, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import torch\n", 10 | "import torch.nn as nn\n", 11 | "import torchvision.datasets as dsets\n", 12 | "import torchvision.transforms as transforms\n", 13 | "from torch.autograd import Variable\n", 14 | "import torch.nn.functional as F" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 3, 20 | "metadata": { 21 | "collapsed": true 22 | }, 23 | "outputs": [], 24 | "source": [ 25 | "# Hyper Parameters \n", 26 | "input_size = 784\n", 27 | "num_classes = 10\n", 28 | "num_epochs = 5\n", 29 | "batch_size = 100\n", 30 | "learning_rate = 0.001" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 4, 36 | "metadata": {}, 37 | "outputs": [ 38 | { 39 | "name": "stdout", 40 | "output_type": "stream", 41 | "text": [ 42 | "Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz\n", 43 | "Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz\n", 44 | "Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz\n", 45 | "Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz\n", 46 | "Processing...\n", 47 | "Done!\n" 48 | ] 49 | } 50 | ], 51 | "source": [ 52 | "# MNIST Dataset (Images and Labels)\n", 53 | "train_dataset = dsets.MNIST(root='../../dataset/mnist', \n", 54 | " train=True, \n", 55 | " transform=transforms.ToTensor(),\n", 56 | " download=True)\n", 57 | "\n", 58 | "test_dataset = dsets.MNIST(root='../../dataset/mnist', \n", 59 | " train=False, \n", 60 | " transform=transforms.ToTensor())\n", 61 | "\n", 62 | "# Dataset Loader (Input Pipline)\n", 63 | "train_loader = torch.utils.data.DataLoader(dataset=train_dataset, \n", 64 | " batch_size=batch_size, \n", 65 | " shuffle=True)\n", 66 | "\n", 67 | "test_loader = torch.utils.data.DataLoader(dataset=test_dataset, \n", 68 | " batch_size=batch_size, \n", 69 | " shuffle=False)" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": 13, 75 | "metadata": { 76 | "collapsed": true 77 | }, 78 | "outputs": [], 79 | "source": [ 80 | "class LogisticRegression(nn.Module):\n", 81 | " def __init__(self, input_size, num_classes):\n", 82 | " super(LogisticRegression, self).__init__()\n", 83 | " self.linear = nn.Linear(input_size, num_classes)\n", 84 | " \n", 85 | " def forward(self, x):\n", 86 | " out = self.linear(x)\n", 87 | " return F.log_softmax(out) \n", 88 | " # CrossEntropy 내부에서 softmax 계산해주는 모양인데 log_softmax 쓰면 로스가 더 줄어든다\n", 89 | " # 뭐지..? 그냥 softmax 쓰면 중첩되서 로스가 안줄어듬" 90 | ] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": 14, 95 | "metadata": { 96 | "collapsed": true 97 | }, 98 | "outputs": [], 99 | "source": [ 100 | "model = LogisticRegression(input_size, num_classes)\n", 101 | "\n", 102 | "# Loss and Optimizer\n", 103 | "# Softmax is internally computed.\n", 104 | "# Set parameters to be updated.\n", 105 | "criterion = nn.CrossEntropyLoss() \n", 106 | "optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate) " 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": 15, 112 | "metadata": {}, 113 | "outputs": [ 114 | { 115 | "name": "stdout", 116 | "output_type": "stream", 117 | "text": [ 118 | "Epoch: [1/5], Step: [100/600], Loss: 2.2386\n", 119 | "Epoch: [1/5], Step: [200/600], Loss: 2.1150\n", 120 | "Epoch: [1/5], Step: [300/600], Loss: 2.0211\n", 121 | "Epoch: [1/5], Step: [400/600], Loss: 1.9450\n", 122 | "Epoch: [1/5], Step: [500/600], Loss: 1.8529\n", 123 | "Epoch: [1/5], Step: [600/600], Loss: 1.7382\n", 124 | "Epoch: [2/5], Step: [100/600], Loss: 1.7467\n", 125 | "Epoch: [2/5], Step: [200/600], Loss: 1.6677\n", 126 | "Epoch: [2/5], Step: [300/600], Loss: 1.5661\n", 127 | "Epoch: [2/5], Step: [400/600], Loss: 1.5083\n", 128 | "Epoch: [2/5], Step: [500/600], Loss: 1.5519\n", 129 | "Epoch: [2/5], Step: [600/600], Loss: 1.5156\n", 130 | "Epoch: [3/5], Step: [100/600], Loss: 1.3911\n", 131 | "Epoch: [3/5], Step: [200/600], Loss: 1.3999\n", 132 | "Epoch: [3/5], Step: [300/600], Loss: 1.3163\n", 133 | "Epoch: [3/5], Step: [400/600], Loss: 1.2420\n", 134 | "Epoch: [3/5], Step: [500/600], Loss: 1.3027\n", 135 | "Epoch: [3/5], Step: [600/600], Loss: 1.2045\n", 136 | "Epoch: [4/5], Step: [100/600], Loss: 1.1302\n", 137 | "Epoch: [4/5], Step: [200/600], Loss: 1.1967\n", 138 | "Epoch: [4/5], Step: [300/600], Loss: 1.1152\n", 139 | "Epoch: [4/5], Step: [400/600], Loss: 1.1326\n", 140 | "Epoch: [4/5], Step: [500/600], Loss: 1.1282\n", 141 | "Epoch: [4/5], Step: [600/600], Loss: 1.1212\n", 142 | "Epoch: [5/5], Step: [100/600], Loss: 1.1105\n", 143 | "Epoch: [5/5], Step: [200/600], Loss: 1.0402\n", 144 | "Epoch: [5/5], Step: [300/600], Loss: 1.0449\n", 145 | "Epoch: [5/5], Step: [400/600], Loss: 0.9804\n", 146 | "Epoch: [5/5], Step: [500/600], Loss: 1.0055\n", 147 | "Epoch: [5/5], Step: [600/600], Loss: 0.9669\n" 148 | ] 149 | } 150 | ], 151 | "source": [ 152 | "# Training the Model\n", 153 | "for epoch in range(num_epochs):\n", 154 | " for i, (images, labels) in enumerate(train_loader):\n", 155 | " images = Variable(images.view(-1, 28*28))\n", 156 | " labels = Variable(labels)\n", 157 | " \n", 158 | " # Forward + Backward + Optimize\n", 159 | " optimizer.zero_grad()\n", 160 | " outputs = model(images)\n", 161 | " loss = criterion(outputs, labels)\n", 162 | " loss.backward()\n", 163 | " optimizer.step()\n", 164 | " \n", 165 | " if (i+1) % 100 == 0:\n", 166 | " print ('Epoch: [%d/%d], Step: [%d/%d], Loss: %.4f' \n", 167 | " % (epoch+1, num_epochs, i+1, len(train_dataset)//batch_size, loss.data[0]))" 168 | ] 169 | }, 170 | { 171 | "cell_type": "code", 172 | "execution_count": 18, 173 | "metadata": {}, 174 | "outputs": [ 175 | { 176 | "name": "stdout", 177 | "output_type": "stream", 178 | "text": [ 179 | "Accuracy of the model on the 10000 test images: 82 %\n" 180 | ] 181 | } 182 | ], 183 | "source": [ 184 | "# Test the Model\n", 185 | "correct = 0\n", 186 | "total = 0\n", 187 | "for images, labels in test_loader:\n", 188 | " images = Variable(images.view(-1, 28*28))\n", 189 | " outputs = model(images)\n", 190 | " _, predicted = torch.max(outputs.data, 1)\n", 191 | " total += labels.size(0)\n", 192 | " correct += (predicted == labels).sum()\n", 193 | " \n", 194 | "print('Accuracy of the model on the 10000 test images: %d %%' % (100 * correct / total))" 195 | ] 196 | }, 197 | { 198 | "cell_type": "code", 199 | "execution_count": null, 200 | "metadata": { 201 | "collapsed": true 202 | }, 203 | "outputs": [], 204 | "source": [] 205 | } 206 | ], 207 | "metadata": { 208 | "kernelspec": { 209 | "display_name": "Python 3", 210 | "language": "python", 211 | "name": "python3" 212 | }, 213 | "language_info": { 214 | "codemirror_mode": { 215 | "name": "ipython", 216 | "version": 3 217 | }, 218 | "file_extension": ".py", 219 | "mimetype": "text/x-python", 220 | "name": "python", 221 | "nbconvert_exporter": "python", 222 | "pygments_lexer": "ipython3", 223 | "version": "3.5.2" 224 | } 225 | }, 226 | "nbformat": 4, 227 | "nbformat_minor": 2 228 | } 229 | -------------------------------------------------------------------------------- /tutorial/03.Feedforward_Neural_Network.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import torch\n", 12 | "import torch.nn as nn\n", 13 | "import torchvision.datasets as dsets\n", 14 | "import torchvision.transforms as transforms\n", 15 | "import torch.functional as F\n", 16 | "from torch.autograd import Variable" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 2, 22 | "metadata": { 23 | "collapsed": true 24 | }, 25 | "outputs": [], 26 | "source": [ 27 | "# Hyper Parameters \n", 28 | "input_size = 784\n", 29 | "hidden_size = 500\n", 30 | "num_classes = 10\n", 31 | "num_epochs = 5\n", 32 | "batch_size = 100\n", 33 | "learning_rate = 0.001" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": 4, 39 | "metadata": {}, 40 | "outputs": [], 41 | "source": [ 42 | "# MNIST Dataset \n", 43 | "train_dataset = dsets.MNIST(root='../../dataset/mnist', \n", 44 | " train=True, \n", 45 | " transform=transforms.ToTensor(), \n", 46 | " download=True)\n", 47 | "\n", 48 | "test_dataset = dsets.MNIST(root='../../dataset/mnist', \n", 49 | " train=False, \n", 50 | " transform=transforms.ToTensor())\n", 51 | "\n", 52 | "# Data Loader (Input Pipeline)\n", 53 | "train_loader = torch.utils.data.DataLoader(dataset=train_dataset, \n", 54 | " batch_size=batch_size, \n", 55 | " shuffle=True)\n", 56 | "\n", 57 | "test_loader = torch.utils.data.DataLoader(dataset=test_dataset, \n", 58 | " batch_size=batch_size, \n", 59 | " shuffle=False)" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": 5, 65 | "metadata": { 66 | "collapsed": true 67 | }, 68 | "outputs": [], 69 | "source": [ 70 | "class Net(nn.Module):\n", 71 | " def __init__(self, input_size, hidden_size, num_classes):\n", 72 | " super(Net, self).__init__()\n", 73 | " self.fc1 = nn.Linear(input_size, hidden_size) \n", 74 | " self.relu = nn.ReLU()\n", 75 | " self.fc2 = nn.Linear(hidden_size, num_classes) \n", 76 | " \n", 77 | " def forward(self, x):\n", 78 | " out = self.fc1(x)\n", 79 | " out = self.relu(out)\n", 80 | " out = self.fc2(out)\n", 81 | " return out" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": 7, 87 | "metadata": { 88 | "collapsed": true 89 | }, 90 | "outputs": [], 91 | "source": [ 92 | "net = Net(input_size, hidden_size, num_classes)\n", 93 | "# Loss and Optimizer\n", 94 | "criterion = nn.CrossEntropyLoss() \n", 95 | "optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate)" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": 11, 101 | "metadata": {}, 102 | "outputs": [ 103 | { 104 | "name": "stdout", 105 | "output_type": "stream", 106 | "text": [ 107 | "Epoch [1/5], Step [100/600], Loss: 0.0239\n", 108 | "Epoch [1/5], Step [200/600], Loss: 0.1238\n", 109 | "Epoch [1/5], Step [300/600], Loss: 0.0435\n", 110 | "Epoch [1/5], Step [400/600], Loss: 0.0215\n", 111 | "Epoch [1/5], Step [500/600], Loss: 0.0070\n", 112 | "Epoch [1/5], Step [600/600], Loss: 0.0429\n", 113 | "Epoch [2/5], Step [100/600], Loss: 0.0235\n", 114 | "Epoch [2/5], Step [200/600], Loss: 0.0217\n", 115 | "Epoch [2/5], Step [300/600], Loss: 0.0051\n", 116 | "Epoch [2/5], Step [400/600], Loss: 0.0183\n", 117 | "Epoch [2/5], Step [500/600], Loss: 0.0096\n", 118 | "Epoch [2/5], Step [600/600], Loss: 0.0060\n", 119 | "Epoch [3/5], Step [100/600], Loss: 0.0084\n", 120 | "Epoch [3/5], Step [200/600], Loss: 0.0156\n", 121 | "Epoch [3/5], Step [300/600], Loss: 0.0241\n", 122 | "Epoch [3/5], Step [400/600], Loss: 0.0523\n", 123 | "Epoch [3/5], Step [500/600], Loss: 0.0900\n", 124 | "Epoch [3/5], Step [600/600], Loss: 0.0156\n", 125 | "Epoch [4/5], Step [100/600], Loss: 0.0046\n", 126 | "Epoch [4/5], Step [200/600], Loss: 0.0053\n", 127 | "Epoch [4/5], Step [300/600], Loss: 0.0369\n", 128 | "Epoch [4/5], Step [400/600], Loss: 0.0248\n", 129 | "Epoch [4/5], Step [500/600], Loss: 0.0115\n", 130 | "Epoch [4/5], Step [600/600], Loss: 0.0174\n", 131 | "Epoch [5/5], Step [100/600], Loss: 0.0017\n", 132 | "Epoch [5/5], Step [200/600], Loss: 0.0058\n", 133 | "Epoch [5/5], Step [300/600], Loss: 0.0476\n", 134 | "Epoch [5/5], Step [400/600], Loss: 0.0073\n", 135 | "Epoch [5/5], Step [500/600], Loss: 0.0058\n", 136 | "Epoch [5/5], Step [600/600], Loss: 0.0167\n" 137 | ] 138 | } 139 | ], 140 | "source": [ 141 | "# Train the Model\n", 142 | "for epoch in range(num_epochs):\n", 143 | " for i, (images, labels) in enumerate(train_loader): \n", 144 | " # Convert torch tensor to Variable\n", 145 | " images = Variable(images.view(-1, 28*28)) # 100,28,28 -> 100,784\n", 146 | " labels = Variable(labels)\n", 147 | " \n", 148 | " # Forward + Backward + Optimize\n", 149 | " optimizer.zero_grad() # zero the gradient buffer\n", 150 | " outputs = net(images)\n", 151 | " loss = criterion(outputs, labels)\n", 152 | " loss.backward()\n", 153 | " optimizer.step()\n", 154 | " \n", 155 | " if (i+1) % 100 == 0:\n", 156 | " print ('Epoch [%d/%d], Step [%d/%d], Loss: %.4f' \n", 157 | " %(epoch+1, num_epochs, i+1, len(train_dataset)//batch_size, loss.data[0]))" 158 | ] 159 | }, 160 | { 161 | "cell_type": "code", 162 | "execution_count": 12, 163 | "metadata": {}, 164 | "outputs": [ 165 | { 166 | "name": "stdout", 167 | "output_type": "stream", 168 | "text": [ 169 | "Accuracy of the network on the 10000 test images: 97 %\n" 170 | ] 171 | } 172 | ], 173 | "source": [ 174 | "# Test the Model\n", 175 | "correct = 0\n", 176 | "total = 0\n", 177 | "for images, labels in test_loader:\n", 178 | " images = Variable(images.view(-1, 28*28))\n", 179 | " outputs = net(images)\n", 180 | " _, predicted = torch.max(outputs.data, 1)\n", 181 | " total += labels.size(0)\n", 182 | " correct += (predicted == labels).sum()\n", 183 | "\n", 184 | "print('Accuracy of the network on the 10000 test images: %d %%' % (100 * correct / total))\n", 185 | "\n", 186 | "# Save the Model\n", 187 | "torch.save(net.state_dict(), 'model.pkl')" 188 | ] 189 | }, 190 | { 191 | "cell_type": "code", 192 | "execution_count": null, 193 | "metadata": { 194 | "collapsed": true 195 | }, 196 | "outputs": [], 197 | "source": [] 198 | } 199 | ], 200 | "metadata": { 201 | "kernelspec": { 202 | "display_name": "Python 3", 203 | "language": "python", 204 | "name": "python3" 205 | }, 206 | "language_info": { 207 | "codemirror_mode": { 208 | "name": "ipython", 209 | "version": 3 210 | }, 211 | "file_extension": ".py", 212 | "mimetype": "text/x-python", 213 | "name": "python", 214 | "nbconvert_exporter": "python", 215 | "pygments_lexer": "ipython3", 216 | "version": "3.5.2" 217 | } 218 | }, 219 | "nbformat": 4, 220 | "nbformat_minor": 2 221 | } 222 | -------------------------------------------------------------------------------- /tutorial/04.Convolutional_Neural_Network.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 4, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import torch \n", 12 | "import torch.nn as nn\n", 13 | "import torchvision.datasets as dsets\n", 14 | "import torchvision.transforms as transforms\n", 15 | "from torch.autograd import Variable\n", 16 | "import torch.functional as F" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 2, 22 | "metadata": { 23 | "collapsed": true 24 | }, 25 | "outputs": [], 26 | "source": [ 27 | "# Hyper Parameters\n", 28 | "num_epochs = 5\n", 29 | "batch_size = 100\n", 30 | "learning_rate = 0.001" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 3, 36 | "metadata": { 37 | "collapsed": true 38 | }, 39 | "outputs": [], 40 | "source": [ 41 | "# MNIST Dataset \n", 42 | "train_dataset = dsets.MNIST(root='../../dataset/mnist', \n", 43 | " train=True, \n", 44 | " transform=transforms.ToTensor(), \n", 45 | " download=True)\n", 46 | "\n", 47 | "test_dataset = dsets.MNIST(root='../../dataset/mnist', \n", 48 | " train=False, \n", 49 | " transform=transforms.ToTensor())\n", 50 | "\n", 51 | "# Data Loader (Input Pipeline)\n", 52 | "train_loader = torch.utils.data.DataLoader(dataset=train_dataset, \n", 53 | " batch_size=batch_size, \n", 54 | " shuffle=True)\n", 55 | "\n", 56 | "test_loader = torch.utils.data.DataLoader(dataset=test_dataset, \n", 57 | " batch_size=batch_size, \n", 58 | " shuffle=False)" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": 8, 64 | "metadata": { 65 | "collapsed": true 66 | }, 67 | "outputs": [], 68 | "source": [ 69 | "# CNN Model (2 conv layer)\n", 70 | "class CNN(nn.Module):\n", 71 | " def __init__(self):\n", 72 | " super(CNN, self).__init__()\n", 73 | " self.layer1 = nn.Sequential(\n", 74 | " nn.Conv2d(1, 16, kernel_size=5, padding=2), # mnist는 흑백 28,28,1\n", 75 | " nn.BatchNorm2d(16),\n", 76 | " nn.ReLU(),\n", 77 | " nn.MaxPool2d(2)) # kernal size\n", 78 | " self.layer2 = nn.Sequential(\n", 79 | " nn.Conv2d(16, 32, kernel_size=5, padding=2),\n", 80 | " nn.BatchNorm2d(32),\n", 81 | " nn.ReLU(),\n", 82 | " nn.MaxPool2d(2))\n", 83 | " self.fc1 = nn.Linear(7*7*32, 256)\n", 84 | " self.fc2 = nn.Linear(256,10)\n", 85 | " self.relu = nn.ReLU()\n", 86 | " def forward(self, x):\n", 87 | " out = self.layer1(x)\n", 88 | " out = self.layer2(out)\n", 89 | " out = out.view(out.size(0), -1) # 배치사이즈x한줄로 펴기\n", 90 | " out = self.fc1(out) # FFN\n", 91 | " out = self.fc2(self.relu(out))\n", 92 | " return out" 93 | ] 94 | }, 95 | { 96 | "cell_type": "markdown", 97 | "metadata": {}, 98 | "source": [ 99 | "#### nn.Sequential" 100 | ] 101 | }, 102 | { 103 | "cell_type": "markdown", 104 | "metadata": {}, 105 | "source": [ 106 | "A sequential container. Modules will be added to it in the order they are passed in the constructor." 107 | ] 108 | }, 109 | { 110 | "cell_type": "markdown", 111 | "metadata": {}, 112 | "source": [ 113 | "#### nn.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True)" 114 | ] 115 | }, 116 | { 117 | "cell_type": "markdown", 118 | "metadata": {}, 119 | "source": [ 120 | "Applies a 2D convolution over an input signal composed of several input planes.\n", 121 | "\n", 122 | "In the simplest case, the output value of the layer with input size $(N,C_{in},H,W)$ and output $(N,C_{out},H_{out},W_{out})$ can be precisely described as:\n", 123 | "\n", 124 | "$out(N_i,C_{out_j})=bias(C_{out_j})+∑^{C_in−1}_{k=0} weight_{(C_{out_j},k)}⋆input_{(N_i,k)}$" 125 | ] 126 | }, 127 | { 128 | "cell_type": "markdown", 129 | "metadata": {}, 130 | "source": [ 131 | "$Input: (N,C_{in},D_{in},H_{in},W_{in})$
\n", 132 | "$Output: (N,C_{out},D_{out},H_{out},W_{out})$ where
$D_{out}=floor((D_{in}+2∗padding[0]−dilation[0]∗(kernel\\_size[0]−1)−1)/stride[0]+1)$ $H_{out}=floor((H_{in}+2∗padding[1]−dilation[1]∗(kernel\\_size[1]−1)−1)/stride[1]+1)$\n", 133 | "$W_{out}=floor((W_{in}+2∗padding[2]−dilation[2]∗(kernel\\_size[2]−1)−1)/stride[2]+1)$" 134 | ] 135 | }, 136 | { 137 | "cell_type": "markdown", 138 | "metadata": {}, 139 | "source": [ 140 | "* Output Size : N-F/stride+1\n", 141 | "* Padding Size : (F-1)/2" 142 | ] 143 | }, 144 | { 145 | "cell_type": "code", 146 | "execution_count": 9, 147 | "metadata": { 148 | "collapsed": true 149 | }, 150 | "outputs": [], 151 | "source": [ 152 | "cnn = CNN()\n", 153 | "# Loss and Optimizer\n", 154 | "criterion = nn.CrossEntropyLoss()\n", 155 | "optimizer = torch.optim.Adam(cnn.parameters(), lr=learning_rate)" 156 | ] 157 | }, 158 | { 159 | "cell_type": "code", 160 | "execution_count": 10, 161 | "metadata": {}, 162 | "outputs": [ 163 | { 164 | "name": "stdout", 165 | "output_type": "stream", 166 | "text": [ 167 | "Epoch [1/5], Iter [100/600] Loss: 0.1305\n", 168 | "Epoch [1/5], Iter [200/600] Loss: 0.0836\n", 169 | "Epoch [1/5], Iter [300/600] Loss: 0.0558\n", 170 | "Epoch [1/5], Iter [400/600] Loss: 0.1149\n", 171 | "Epoch [1/5], Iter [500/600] Loss: 0.0770\n", 172 | "Epoch [1/5], Iter [600/600] Loss: 0.0470\n", 173 | "Epoch [2/5], Iter [100/600] Loss: 0.0118\n", 174 | "Epoch [2/5], Iter [200/600] Loss: 0.1812\n", 175 | "Epoch [2/5], Iter [300/600] Loss: 0.0476\n", 176 | "Epoch [2/5], Iter [400/600] Loss: 0.0260\n", 177 | "Epoch [2/5], Iter [500/600] Loss: 0.0694\n", 178 | "Epoch [2/5], Iter [600/600] Loss: 0.0411\n", 179 | "Epoch [3/5], Iter [100/600] Loss: 0.0135\n", 180 | "Epoch [3/5], Iter [200/600] Loss: 0.0125\n", 181 | "Epoch [3/5], Iter [300/600] Loss: 0.1280\n", 182 | "Epoch [3/5], Iter [400/600] Loss: 0.0664\n", 183 | "Epoch [3/5], Iter [500/600] Loss: 0.0176\n", 184 | "Epoch [3/5], Iter [600/600] Loss: 0.0170\n", 185 | "Epoch [4/5], Iter [100/600] Loss: 0.0215\n", 186 | "Epoch [4/5], Iter [200/600] Loss: 0.0119\n", 187 | "Epoch [4/5], Iter [300/600] Loss: 0.0125\n", 188 | "Epoch [4/5], Iter [400/600] Loss: 0.0180\n", 189 | "Epoch [4/5], Iter [500/600] Loss: 0.0038\n", 190 | "Epoch [4/5], Iter [600/600] Loss: 0.0020\n", 191 | "Epoch [5/5], Iter [100/600] Loss: 0.0009\n", 192 | "Epoch [5/5], Iter [200/600] Loss: 0.0174\n", 193 | "Epoch [5/5], Iter [300/600] Loss: 0.0586\n", 194 | "Epoch [5/5], Iter [400/600] Loss: 0.0503\n", 195 | "Epoch [5/5], Iter [500/600] Loss: 0.0226\n", 196 | "Epoch [5/5], Iter [600/600] Loss: 0.0638\n" 197 | ] 198 | } 199 | ], 200 | "source": [ 201 | "# Train the Model\n", 202 | "for epoch in range(num_epochs):\n", 203 | " for i, (images, labels) in enumerate(train_loader):\n", 204 | " images = Variable(images)\n", 205 | " labels = Variable(labels)\n", 206 | " \n", 207 | " # Forward + Backward + Optimize\n", 208 | " optimizer.zero_grad()\n", 209 | " outputs = cnn(images)\n", 210 | " loss = criterion(outputs, labels)\n", 211 | " loss.backward()\n", 212 | " optimizer.step()\n", 213 | " \n", 214 | " if (i+1) % 100 == 0:\n", 215 | " print ('Epoch [%d/%d], Iter [%d/%d] Loss: %.4f' \n", 216 | " %(epoch+1, num_epochs, i+1, len(train_dataset)//batch_size, loss.data[0]))" 217 | ] 218 | }, 219 | { 220 | "cell_type": "code", 221 | "execution_count": 7, 222 | "metadata": {}, 223 | "outputs": [ 224 | { 225 | "name": "stdout", 226 | "output_type": "stream", 227 | "text": [ 228 | "Test Accuracy of the model on the 10000 test images: 98 %\n" 229 | ] 230 | } 231 | ], 232 | "source": [ 233 | "# Test the Model\n", 234 | "cnn.eval() # Change model to 'eval' mode (BN uses moving mean/var).\n", 235 | "correct = 0\n", 236 | "total = 0\n", 237 | "for images, labels in test_loader:\n", 238 | " images = Variable(images)\n", 239 | " outputs = cnn(images)\n", 240 | " _, predicted = torch.max(outputs.data, 1)\n", 241 | " total += labels.size(0)\n", 242 | " correct += (predicted == labels).sum()\n", 243 | "\n", 244 | "print('Test Accuracy of the model on the 10000 test images: %d %%' % (100 * correct / total))\n", 245 | "\n", 246 | "# Save the Trained Model\n", 247 | "#torch.save(cnn.state_dict(), 'cnn.pkl')" 248 | ] 249 | }, 250 | { 251 | "cell_type": "code", 252 | "execution_count": null, 253 | "metadata": { 254 | "collapsed": true 255 | }, 256 | "outputs": [], 257 | "source": [] 258 | } 259 | ], 260 | "metadata": { 261 | "kernelspec": { 262 | "display_name": "Python 3", 263 | "language": "python", 264 | "name": "python3" 265 | }, 266 | "language_info": { 267 | "codemirror_mode": { 268 | "name": "ipython", 269 | "version": 3 270 | }, 271 | "file_extension": ".py", 272 | "mimetype": "text/x-python", 273 | "name": "python", 274 | "nbconvert_exporter": "python", 275 | "pygments_lexer": "ipython3", 276 | "version": "3.5.2" 277 | } 278 | }, 279 | "nbformat": 4, 280 | "nbformat_minor": 2 281 | } 282 | -------------------------------------------------------------------------------- /tutorial/10.GAN.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 12, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import torch\n", 12 | "import torchvision\n", 13 | "import torch.nn as nn\n", 14 | "import torch.nn.functional as F\n", 15 | "import torchvision.datasets as dsets\n", 16 | "import torchvision.transforms as transforms\n", 17 | "from torch.autograd import Variable\n", 18 | "\n", 19 | "# Image Preprocessing\n", 20 | "transform = transforms.Compose([\n", 21 | " transforms.ToTensor(),\n", 22 | " transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))])\n", 23 | "\n", 24 | "# MNIST Dataset\n", 25 | "train_dataset = dsets.MNIST(root='../../dataset/mnist',\n", 26 | " train=True, \n", 27 | " transform=transform,\n", 28 | " download=True)\n", 29 | "\n", 30 | "# Data Loader (Input Pipeline)\n", 31 | "train_loader = torch.utils.data.DataLoader(dataset=train_dataset,\n", 32 | " batch_size=100, \n", 33 | " shuffle=True)" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": 13, 39 | "metadata": { 40 | "collapsed": true 41 | }, 42 | "outputs": [], 43 | "source": [ 44 | "# Discriminator Model\n", 45 | "class Discriminator(nn.Module):\n", 46 | " def __init__(self):\n", 47 | " super(Discriminator, self).__init__()\n", 48 | " self.fc1 = nn.Linear(784, 256)\n", 49 | " self.fc2 = nn.Linear(256, 256)\n", 50 | " self.fc3 = nn.Linear(256, 1)\n", 51 | " \n", 52 | " def forward(self, x):\n", 53 | " h = F.relu(self.fc1(x))\n", 54 | " h = F.relu(self.fc2(h))\n", 55 | " out = F.sigmoid(self.fc3(h))\n", 56 | " return out\n", 57 | "\n", 58 | "# Generator Model\n", 59 | "class Generator(nn.Module):\n", 60 | " def __init__(self):\n", 61 | " super(Generator, self).__init__()\n", 62 | " self.fc1 = nn.Linear(128, 256)\n", 63 | " self.fc2 = nn.Linear(256, 256)\n", 64 | " self.fc3 = nn.Linear(256, 784)\n", 65 | " \n", 66 | " def forward(self, x):\n", 67 | " h = F.leaky_relu(self.fc1(x))\n", 68 | " h = F.leaky_relu(self.fc2(h))\n", 69 | " out = F.tanh(self.fc3(h)) # -1 ~ 1 ? 이미지에 -가 들어갈 일도 있나?\n", 70 | " return out\n", 71 | "\n", 72 | "discriminator = Discriminator()\n", 73 | "generator = Generator()\n", 74 | "\n", 75 | "\n", 76 | "# Loss and Optimizer\n", 77 | "criterion = nn.BCELoss()\n", 78 | "d_optimizer = torch.optim.Adam(discriminator.parameters(), lr=0.0005)\n", 79 | "g_optimizer = torch.optim.Adam(generator.parameters(), lr=0.0005)" 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": null, 85 | "metadata": {}, 86 | "outputs": [ 87 | { 88 | "name": "stdout", 89 | "output_type": "stream", 90 | "text": [ 91 | "Epoch [0/200], Step[300/600], d_loss: 0.5339, g_loss: 3.1429, D(x): 0.91, D(G(z)): 0.27\n", 92 | "Epoch [0/200], Step[600/600], d_loss: 0.8170, g_loss: 2.3807, D(x): 0.65, D(G(z)): 0.18\n", 93 | "Epoch [1/200], Step[300/600], d_loss: 0.5749, g_loss: 2.0113, D(x): 0.84, D(G(z)): 0.30\n", 94 | "Epoch [1/200], Step[600/600], d_loss: 1.7680, g_loss: 0.8988, D(x): 0.50, D(G(z)): 0.57\n", 95 | "Epoch [2/200], Step[300/600], d_loss: 0.6656, g_loss: 1.4308, D(x): 0.81, D(G(z)): 0.34\n", 96 | "Epoch [2/200], Step[600/600], d_loss: 2.1962, g_loss: 0.3265, D(x): 0.50, D(G(z)): 0.76\n", 97 | "Epoch [3/200], Step[300/600], d_loss: 1.1974, g_loss: 0.9678, D(x): 0.55, D(G(z)): 0.41\n", 98 | "Epoch [3/200], Step[600/600], d_loss: 0.5859, g_loss: 2.5625, D(x): 0.80, D(G(z)): 0.24\n", 99 | "Epoch [4/200], Step[300/600], d_loss: 1.2288, g_loss: 0.7887, D(x): 0.59, D(G(z)): 0.48\n", 100 | "Epoch [4/200], Step[600/600], d_loss: 1.1990, g_loss: 0.8362, D(x): 0.61, D(G(z)): 0.48\n", 101 | "Epoch [5/200], Step[300/600], d_loss: 1.6334, g_loss: 0.9066, D(x): 0.50, D(G(z)): 0.58\n", 102 | "Epoch [5/200], Step[600/600], d_loss: 0.5204, g_loss: 1.7660, D(x): 0.81, D(G(z)): 0.25\n", 103 | "Epoch [6/200], Step[300/600], d_loss: 1.9905, g_loss: 1.3311, D(x): 0.49, D(G(z)): 0.58\n", 104 | "Epoch [6/200], Step[600/600], d_loss: 1.0124, g_loss: 2.3312, D(x): 0.63, D(G(z)): 0.35\n", 105 | "Epoch [7/200], Step[300/600], d_loss: 2.0338, g_loss: 0.8984, D(x): 0.49, D(G(z)): 0.57\n", 106 | "Epoch [7/200], Step[600/600], d_loss: 0.4411, g_loss: 2.5603, D(x): 0.84, D(G(z)): 0.18\n", 107 | "Epoch [8/200], Step[300/600], d_loss: 0.6523, g_loss: 2.6133, D(x): 0.81, D(G(z)): 0.25\n", 108 | "Epoch [8/200], Step[600/600], d_loss: 0.7241, g_loss: 1.5128, D(x): 0.68, D(G(z)): 0.22\n", 109 | "Epoch [9/200], Step[300/600], d_loss: 1.8988, g_loss: 1.5530, D(x): 0.48, D(G(z)): 0.52\n", 110 | "Epoch [9/200], Step[600/600], d_loss: 1.2349, g_loss: 0.8810, D(x): 0.55, D(G(z)): 0.40\n", 111 | "Epoch [10/200], Step[300/600], d_loss: 0.8008, g_loss: 1.2994, D(x): 0.71, D(G(z)): 0.33\n", 112 | "Epoch [10/200], Step[600/600], d_loss: 1.3632, g_loss: 1.0648, D(x): 0.58, D(G(z)): 0.46\n", 113 | "Epoch [11/200], Step[300/600], d_loss: 1.5294, g_loss: 1.1676, D(x): 0.60, D(G(z)): 0.51\n", 114 | "Epoch [11/200], Step[600/600], d_loss: 1.5980, g_loss: 0.9729, D(x): 0.53, D(G(z)): 0.51\n", 115 | "Epoch [12/200], Step[300/600], d_loss: 1.4758, g_loss: 1.4329, D(x): 0.48, D(G(z)): 0.41\n", 116 | "Epoch [12/200], Step[600/600], d_loss: 1.2831, g_loss: 0.8940, D(x): 0.56, D(G(z)): 0.43\n", 117 | "Epoch [13/200], Step[300/600], d_loss: 0.8236, g_loss: 1.3468, D(x): 0.78, D(G(z)): 0.38\n", 118 | "Epoch [13/200], Step[600/600], d_loss: 1.0960, g_loss: 1.5299, D(x): 0.71, D(G(z)): 0.43\n", 119 | "Epoch [14/200], Step[300/600], d_loss: 1.1309, g_loss: 3.1362, D(x): 0.77, D(G(z)): 0.35\n", 120 | "Epoch [14/200], Step[600/600], d_loss: 1.9644, g_loss: 1.3006, D(x): 0.47, D(G(z)): 0.42\n", 121 | "Epoch [15/200], Step[300/600], d_loss: 1.2932, g_loss: 3.1115, D(x): 0.80, D(G(z)): 0.34\n", 122 | "Epoch [15/200], Step[600/600], d_loss: 1.0126, g_loss: 2.4699, D(x): 0.71, D(G(z)): 0.37\n", 123 | "Epoch [16/200], Step[300/600], d_loss: 0.9821, g_loss: 1.6599, D(x): 0.63, D(G(z)): 0.26\n", 124 | "Epoch [16/200], Step[600/600], d_loss: 2.2669, g_loss: 0.7996, D(x): 0.43, D(G(z)): 0.55\n", 125 | "Epoch [17/200], Step[300/600], d_loss: 1.4666, g_loss: 1.1644, D(x): 0.59, D(G(z)): 0.47\n", 126 | "Epoch [17/200], Step[600/600], d_loss: 1.2288, g_loss: 0.9630, D(x): 0.56, D(G(z)): 0.43\n", 127 | "Epoch [18/200], Step[300/600], d_loss: 0.8831, g_loss: 1.4325, D(x): 0.67, D(G(z)): 0.32\n", 128 | "Epoch [18/200], Step[600/600], d_loss: 0.8497, g_loss: 1.7682, D(x): 0.68, D(G(z)): 0.25\n", 129 | "Epoch [19/200], Step[300/600], d_loss: 0.7921, g_loss: 3.1567, D(x): 0.78, D(G(z)): 0.31\n", 130 | "Epoch [19/200], Step[600/600], d_loss: 0.6580, g_loss: 1.9723, D(x): 0.75, D(G(z)): 0.24\n", 131 | "Epoch [20/200], Step[300/600], d_loss: 0.3358, g_loss: 2.1431, D(x): 0.89, D(G(z)): 0.18\n", 132 | "Epoch [20/200], Step[600/600], d_loss: 0.4623, g_loss: 3.3054, D(x): 0.90, D(G(z)): 0.19\n", 133 | "Epoch [21/200], Step[300/600], d_loss: 0.7919, g_loss: 2.1508, D(x): 0.83, D(G(z)): 0.32\n", 134 | "Epoch [21/200], Step[600/600], d_loss: 0.7083, g_loss: 2.2074, D(x): 0.80, D(G(z)): 0.28\n", 135 | "Epoch [22/200], Step[300/600], d_loss: 1.2496, g_loss: 1.6303, D(x): 0.64, D(G(z)): 0.35\n", 136 | "Epoch [22/200], Step[600/600], d_loss: 0.8983, g_loss: 2.2596, D(x): 0.69, D(G(z)): 0.23\n", 137 | "Epoch [23/200], Step[300/600], d_loss: 0.3735, g_loss: 3.7115, D(x): 0.88, D(G(z)): 0.14\n", 138 | "Epoch [23/200], Step[600/600], d_loss: 1.0031, g_loss: 2.5041, D(x): 0.75, D(G(z)): 0.32\n", 139 | "Epoch [24/200], Step[300/600], d_loss: 1.3374, g_loss: 1.2654, D(x): 0.60, D(G(z)): 0.33\n", 140 | "Epoch [24/200], Step[600/600], d_loss: 0.7475, g_loss: 1.6336, D(x): 0.69, D(G(z)): 0.18\n" 141 | ] 142 | } 143 | ], 144 | "source": [ 145 | "# Training \n", 146 | "for epoch in range(200):\n", 147 | " for i, (images, _) in enumerate(train_loader):\n", 148 | " # Build mini-batch dataset\n", 149 | " images = images.view(images.size(0), -1)\n", 150 | " images = Variable(images)\n", 151 | " real_labels = Variable(torch.ones(images.size(0))) # 진짜 이미지\n", 152 | " fake_labels = Variable(torch.zeros(images.size(0))) # 가짜 이미지\n", 153 | " \n", 154 | " # Train the discriminator\n", 155 | " discriminator.zero_grad()\n", 156 | " outputs = discriminator(images)\n", 157 | " real_loss = criterion(outputs, real_labels)\n", 158 | " real_score = outputs\n", 159 | " \n", 160 | " noise = Variable(torch.randn(images.size(0), 128))\n", 161 | " fake_images = generator(noise)\n", 162 | " outputs = discriminator(fake_images.detach()) # .detach?\n", 163 | " fake_loss = criterion(outputs, fake_labels)\n", 164 | " fake_score = outputs\n", 165 | " \n", 166 | " d_loss = real_loss + fake_loss\n", 167 | " d_loss.backward()\n", 168 | " d_optimizer.step()\n", 169 | " \n", 170 | " # Train the generator \n", 171 | " generator.zero_grad()\n", 172 | " noise = Variable(torch.randn(images.size(0), 128))\n", 173 | " fake_images = generator(noise)\n", 174 | " outputs = discriminator(fake_images)\n", 175 | " g_loss = criterion(outputs, real_labels) # gradient saturation 막기 위해, 휴리스틱하게 수정\n", 176 | " g_loss.backward()\n", 177 | " g_optimizer.step()\n", 178 | " \n", 179 | " if (i+1) % 300 == 0:\n", 180 | " print('Epoch [%d/%d], Step[%d/%d], d_loss: %.4f, g_loss: %.4f, ' \n", 181 | " 'D(x): %.2f, D(G(z)): %.2f' \n", 182 | " %(epoch, 200, i+1, 600, d_loss.data[0], g_loss.data[0],\n", 183 | " real_score.data.mean(), fake_score.cpu().data.mean()))\n", 184 | " \n", 185 | " # Save the sampled images\n", 186 | " fake_images = fake_images.view(fake_images.size(0), 1, 28, 28)\n", 187 | " torchvision.utils.save_image(fake_images.data, \n", 188 | " '../../outputs/gan_fake_image/fake_samples_%d.png' %(epoch+1))" 189 | ] 190 | }, 191 | { 192 | "cell_type": "code", 193 | "execution_count": null, 194 | "metadata": { 195 | "collapsed": true 196 | }, 197 | "outputs": [], 198 | "source": [] 199 | } 200 | ], 201 | "metadata": { 202 | "kernelspec": { 203 | "display_name": "Python 3", 204 | "language": "python", 205 | "name": "python3" 206 | }, 207 | "language_info": { 208 | "codemirror_mode": { 209 | "name": "ipython", 210 | "version": 3 211 | }, 212 | "file_extension": ".py", 213 | "mimetype": "text/x-python", 214 | "name": "python", 215 | "nbconvert_exporter": "python", 216 | "pygments_lexer": "ipython3", 217 | "version": "3.5.2" 218 | } 219 | }, 220 | "nbformat": 4, 221 | "nbformat_minor": 2 222 | } 223 | --------------------------------------------------------------------------------