├── .gitignore
├── LICENSE.md
├── 1-1_Virtualenv.txt
├── README.md
├── 1-2_Basic_Autograd.ipynb
├── 3-5_MNIST_MLP_Dropout_ReLU_BN_HE_Adam.ipynb
├── 5_NLP_v2.ipynb
├── 3-1_MNIST_MLP_Dropout.ipynb
├── 3-2_MNIST_MLP_Dropout_ReLU.ipynb
└── 3-3_MNIST_MLP_Dropout_ReLU_BN.ipynb
/.gitignore:
--------------------------------------------------------------------------------
1 | .ipynb_checkpoints/
--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2023 Justin
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/1-1_Virtualenv.txt:
--------------------------------------------------------------------------------
1 | # Python Virtualenv - Windows
2 | - pip install virtualenv
3 | - virtualenv [name of virtualenv]
4 | - virtualenv [name of virtualenv] --python=3.6
5 | - call [name of virtualenv]/scripts/activate
6 | - deactivate
7 |
8 | # Python Virtualenv - Linux
9 | - pip install virtualenv
10 | - virtualenv [name of virtualenv]
11 | - virtualenv [name of virtualenv] --python=3.6
12 | - source [name of virtualenv]/bin/activate
13 | - deactivate
14 |
15 | # Anaconda Virtualenv - Windows
16 | - conda create [name of virtualenv]
17 | - conda create [name of virtualenv] pandas torch
18 | - conda create [name of virtualenv] python=3.6
19 | - activate [name of virtualenv]
20 | - deactivate
21 |
22 | # Anaconda Virtualenv - Linux
23 | - conda create [name of virtualenv]
24 | - conda create [name of virtualenv] pandas torch
25 | - conda create [name of virtualenv] python=3.6
26 | - source activate [name of virtualenv]
27 | - source deactivate
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | > 현재, 출판된 책 내용의 **일부 소스 코드 속 콜론(:)** 이 누락되었습니다.
2 | > 확인 결과, 편집 과정 중 발생한 오류였으며 다음번 인쇄에 수정 예정입니다.
3 | > 독자분들께서는 책 속 코드를 직접 타이핑 하시기보단 현재 저장소에 저장된 코드를 참고하시길 바랍니다.
4 | > 불편을 드려 대단히 죄송합니다.
5 |
6 | # DeepLearning101
7 | - Python, Deep Learning, PyTorch
8 |
9 |
10 |
11 |
12 | # Authors
13 | ### 이경택
14 | - e-mail : lgt5512@naver.com
15 | - github : https://github.com/LeeGyeongTak
16 | - blog : https://bluediary8.tistory.com
17 |
18 | ### 방성수
19 | - e-mail : twilight057@gmail.com
20 | - github : https://github.com/8a496b
21 |
22 | ### 안상준
23 | - e-mail : justin_a@yonsei.ac.kr
24 | - github : https://github.com/Justin-A
25 |
26 | 
27 | 
28 | 
29 | 
30 | 
31 | 
32 |
--------------------------------------------------------------------------------
/1-2_Basic_Autograd.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import torch"
10 | ]
11 | },
12 | {
13 | "cell_type": "markdown",
14 | "metadata": {},
15 | "source": [
16 | "# Scalar"
17 | ]
18 | },
19 | {
20 | "cell_type": "code",
21 | "execution_count": 2,
22 | "metadata": {},
23 | "outputs": [
24 | {
25 | "name": "stdout",
26 | "output_type": "stream",
27 | "text": [
28 | "tensor([1.])\n"
29 | ]
30 | }
31 | ],
32 | "source": [
33 | "scalar1 = torch.tensor([1.])\n",
34 | "print(scalar1)"
35 | ]
36 | },
37 | {
38 | "cell_type": "code",
39 | "execution_count": 3,
40 | "metadata": {},
41 | "outputs": [
42 | {
43 | "name": "stdout",
44 | "output_type": "stream",
45 | "text": [
46 | "tensor([3.])\n"
47 | ]
48 | }
49 | ],
50 | "source": [
51 | "scalar2 = torch.tensor([3.]) \n",
52 | "print(scalar2)"
53 | ]
54 | },
55 | {
56 | "cell_type": "code",
57 | "execution_count": 4,
58 | "metadata": {},
59 | "outputs": [
60 | {
61 | "name": "stdout",
62 | "output_type": "stream",
63 | "text": [
64 | "tensor([4.])\n"
65 | ]
66 | }
67 | ],
68 | "source": [
69 | "add_scalar = scalar1 + scalar2\n",
70 | "print(add_scalar)"
71 | ]
72 | },
73 | {
74 | "cell_type": "code",
75 | "execution_count": 5,
76 | "metadata": {},
77 | "outputs": [
78 | {
79 | "name": "stdout",
80 | "output_type": "stream",
81 | "text": [
82 | "tensor([-2.])\n"
83 | ]
84 | }
85 | ],
86 | "source": [
87 | "sub_scalar = scalar1 - scalar2\n",
88 | "print(sub_scalar)"
89 | ]
90 | },
91 | {
92 | "cell_type": "code",
93 | "execution_count": 6,
94 | "metadata": {},
95 | "outputs": [
96 | {
97 | "name": "stdout",
98 | "output_type": "stream",
99 | "text": [
100 | "tensor([3.])\n"
101 | ]
102 | }
103 | ],
104 | "source": [
105 | "mul_scalar = scalar1 * scalar2\n",
106 | "print(mul_scalar)"
107 | ]
108 | },
109 | {
110 | "cell_type": "code",
111 | "execution_count": 7,
112 | "metadata": {},
113 | "outputs": [
114 | {
115 | "name": "stdout",
116 | "output_type": "stream",
117 | "text": [
118 | "tensor([0.3333])\n"
119 | ]
120 | }
121 | ],
122 | "source": [
123 | "div_scalar = scalar1 / scalar2\n",
124 | "print(div_scalar)"
125 | ]
126 | },
127 | {
128 | "cell_type": "code",
129 | "execution_count": 8,
130 | "metadata": {},
131 | "outputs": [
132 | {
133 | "data": {
134 | "text/plain": [
135 | "tensor([4.])"
136 | ]
137 | },
138 | "execution_count": 8,
139 | "metadata": {},
140 | "output_type": "execute_result"
141 | }
142 | ],
143 | "source": [
144 | "torch.add(scalar1, scalar2)"
145 | ]
146 | },
147 | {
148 | "cell_type": "code",
149 | "execution_count": 9,
150 | "metadata": {},
151 | "outputs": [
152 | {
153 | "data": {
154 | "text/plain": [
155 | "tensor([-2.])"
156 | ]
157 | },
158 | "execution_count": 9,
159 | "metadata": {},
160 | "output_type": "execute_result"
161 | }
162 | ],
163 | "source": [
164 | "torch.sub(scalar1, scalar2)"
165 | ]
166 | },
167 | {
168 | "cell_type": "code",
169 | "execution_count": 10,
170 | "metadata": {},
171 | "outputs": [
172 | {
173 | "data": {
174 | "text/plain": [
175 | "tensor([3.])"
176 | ]
177 | },
178 | "execution_count": 10,
179 | "metadata": {},
180 | "output_type": "execute_result"
181 | }
182 | ],
183 | "source": [
184 | "torch.mul(scalar1, scalar2)"
185 | ]
186 | },
187 | {
188 | "cell_type": "code",
189 | "execution_count": 11,
190 | "metadata": {},
191 | "outputs": [
192 | {
193 | "data": {
194 | "text/plain": [
195 | "tensor([0.3333])"
196 | ]
197 | },
198 | "execution_count": 11,
199 | "metadata": {},
200 | "output_type": "execute_result"
201 | }
202 | ],
203 | "source": [
204 | "torch.div(scalar1, scalar2)"
205 | ]
206 | },
207 | {
208 | "cell_type": "markdown",
209 | "metadata": {},
210 | "source": [
211 | "# Vector"
212 | ]
213 | },
214 | {
215 | "cell_type": "code",
216 | "execution_count": 12,
217 | "metadata": {},
218 | "outputs": [
219 | {
220 | "name": "stdout",
221 | "output_type": "stream",
222 | "text": [
223 | "tensor([1., 2., 3.])\n"
224 | ]
225 | }
226 | ],
227 | "source": [
228 | "vector1 = torch.tensor([1., 2., 3.])\n",
229 | "print(vector1)"
230 | ]
231 | },
232 | {
233 | "cell_type": "code",
234 | "execution_count": 13,
235 | "metadata": {},
236 | "outputs": [
237 | {
238 | "name": "stdout",
239 | "output_type": "stream",
240 | "text": [
241 | "tensor([4., 5., 6.])\n"
242 | ]
243 | }
244 | ],
245 | "source": [
246 | "vector2 = torch.tensor([4., 5., 6.])\n",
247 | "print(vector2)"
248 | ]
249 | },
250 | {
251 | "cell_type": "code",
252 | "execution_count": 14,
253 | "metadata": {},
254 | "outputs": [
255 | {
256 | "name": "stdout",
257 | "output_type": "stream",
258 | "text": [
259 | "tensor([5., 7., 9.])\n"
260 | ]
261 | }
262 | ],
263 | "source": [
264 | "add_vector = vector1 + vector2\n",
265 | "print(add_vector)"
266 | ]
267 | },
268 | {
269 | "cell_type": "code",
270 | "execution_count": 15,
271 | "metadata": {},
272 | "outputs": [
273 | {
274 | "name": "stdout",
275 | "output_type": "stream",
276 | "text": [
277 | "tensor([-3., -3., -3.])\n"
278 | ]
279 | }
280 | ],
281 | "source": [
282 | "sub_vector = vector1 - vector2\n",
283 | "print(sub_vector)"
284 | ]
285 | },
286 | {
287 | "cell_type": "code",
288 | "execution_count": 16,
289 | "metadata": {},
290 | "outputs": [
291 | {
292 | "name": "stdout",
293 | "output_type": "stream",
294 | "text": [
295 | "tensor([ 4., 10., 18.])\n"
296 | ]
297 | }
298 | ],
299 | "source": [
300 | "mul_vector = vector1 * vector2\n",
301 | "print(mul_vector)"
302 | ]
303 | },
304 | {
305 | "cell_type": "code",
306 | "execution_count": 17,
307 | "metadata": {},
308 | "outputs": [
309 | {
310 | "name": "stdout",
311 | "output_type": "stream",
312 | "text": [
313 | "tensor([0.2500, 0.4000, 0.5000])\n"
314 | ]
315 | }
316 | ],
317 | "source": [
318 | "div_vector = vector1 / vector2\n",
319 | "print(div_vector)"
320 | ]
321 | },
322 | {
323 | "cell_type": "code",
324 | "execution_count": 18,
325 | "metadata": {},
326 | "outputs": [
327 | {
328 | "data": {
329 | "text/plain": [
330 | "tensor([5., 7., 9.])"
331 | ]
332 | },
333 | "execution_count": 18,
334 | "metadata": {},
335 | "output_type": "execute_result"
336 | }
337 | ],
338 | "source": [
339 | "torch.add(vector1, vector2)"
340 | ]
341 | },
342 | {
343 | "cell_type": "code",
344 | "execution_count": 19,
345 | "metadata": {},
346 | "outputs": [
347 | {
348 | "data": {
349 | "text/plain": [
350 | "tensor([-3., -3., -3.])"
351 | ]
352 | },
353 | "execution_count": 19,
354 | "metadata": {},
355 | "output_type": "execute_result"
356 | }
357 | ],
358 | "source": [
359 | "torch.sub(vector1, vector2)"
360 | ]
361 | },
362 | {
363 | "cell_type": "code",
364 | "execution_count": 20,
365 | "metadata": {},
366 | "outputs": [
367 | {
368 | "data": {
369 | "text/plain": [
370 | "tensor([ 4., 10., 18.])"
371 | ]
372 | },
373 | "execution_count": 20,
374 | "metadata": {},
375 | "output_type": "execute_result"
376 | }
377 | ],
378 | "source": [
379 | "torch.mul(vector1, vector2)"
380 | ]
381 | },
382 | {
383 | "cell_type": "code",
384 | "execution_count": 21,
385 | "metadata": {},
386 | "outputs": [
387 | {
388 | "data": {
389 | "text/plain": [
390 | "tensor([0.2500, 0.4000, 0.5000])"
391 | ]
392 | },
393 | "execution_count": 21,
394 | "metadata": {},
395 | "output_type": "execute_result"
396 | }
397 | ],
398 | "source": [
399 | "torch.div(vector1, vector2)"
400 | ]
401 | },
402 | {
403 | "cell_type": "code",
404 | "execution_count": 22,
405 | "metadata": {},
406 | "outputs": [
407 | {
408 | "data": {
409 | "text/plain": [
410 | "tensor(32.)"
411 | ]
412 | },
413 | "execution_count": 22,
414 | "metadata": {},
415 | "output_type": "execute_result"
416 | }
417 | ],
418 | "source": [
419 | "torch.dot(vector1, vector2)"
420 | ]
421 | },
422 | {
423 | "cell_type": "markdown",
424 | "metadata": {},
425 | "source": [
426 | "# Matrix"
427 | ]
428 | },
429 | {
430 | "cell_type": "code",
431 | "execution_count": 23,
432 | "metadata": {},
433 | "outputs": [
434 | {
435 | "name": "stdout",
436 | "output_type": "stream",
437 | "text": [
438 | "tensor([[1., 2.],\n",
439 | " [3., 4.]])\n"
440 | ]
441 | }
442 | ],
443 | "source": [
444 | "matrix1 = torch.tensor([[1., 2.], [3., 4.]])\n",
445 | "print(matrix1)"
446 | ]
447 | },
448 | {
449 | "cell_type": "code",
450 | "execution_count": 24,
451 | "metadata": {},
452 | "outputs": [
453 | {
454 | "name": "stdout",
455 | "output_type": "stream",
456 | "text": [
457 | "tensor([[5., 6.],\n",
458 | " [7., 8.]])\n"
459 | ]
460 | }
461 | ],
462 | "source": [
463 | "matrix2 = torch.tensor([[5., 6.], [7., 8.]])\n",
464 | "print(matrix2)"
465 | ]
466 | },
467 | {
468 | "cell_type": "code",
469 | "execution_count": 25,
470 | "metadata": {},
471 | "outputs": [
472 | {
473 | "name": "stdout",
474 | "output_type": "stream",
475 | "text": [
476 | "tensor([[ 6., 8.],\n",
477 | " [10., 12.]])\n"
478 | ]
479 | }
480 | ],
481 | "source": [
482 | "sum_matrix = matrix1 + matrix2\n",
483 | "print(sum_matrix)"
484 | ]
485 | },
486 | {
487 | "cell_type": "code",
488 | "execution_count": 26,
489 | "metadata": {},
490 | "outputs": [
491 | {
492 | "name": "stdout",
493 | "output_type": "stream",
494 | "text": [
495 | "tensor([[-4., -4.],\n",
496 | " [-4., -4.]])\n"
497 | ]
498 | }
499 | ],
500 | "source": [
501 | "sub_matrix = matrix1 - matrix2\n",
502 | "print(sub_matrix)"
503 | ]
504 | },
505 | {
506 | "cell_type": "code",
507 | "execution_count": 27,
508 | "metadata": {},
509 | "outputs": [
510 | {
511 | "name": "stdout",
512 | "output_type": "stream",
513 | "text": [
514 | "tensor([[ 5., 12.],\n",
515 | " [21., 32.]])\n"
516 | ]
517 | }
518 | ],
519 | "source": [
520 | "mul_matrix = matrix1 * matrix2\n",
521 | "print(mul_matrix)"
522 | ]
523 | },
524 | {
525 | "cell_type": "code",
526 | "execution_count": 28,
527 | "metadata": {},
528 | "outputs": [
529 | {
530 | "name": "stdout",
531 | "output_type": "stream",
532 | "text": [
533 | "tensor([[0.2000, 0.3333],\n",
534 | " [0.4286, 0.5000]])\n"
535 | ]
536 | }
537 | ],
538 | "source": [
539 | "div_matrix = matrix1 / matrix2\n",
540 | "print(div_matrix)"
541 | ]
542 | },
543 | {
544 | "cell_type": "code",
545 | "execution_count": 29,
546 | "metadata": {},
547 | "outputs": [
548 | {
549 | "data": {
550 | "text/plain": [
551 | "tensor([[ 6., 8.],\n",
552 | " [10., 12.]])"
553 | ]
554 | },
555 | "execution_count": 29,
556 | "metadata": {},
557 | "output_type": "execute_result"
558 | }
559 | ],
560 | "source": [
561 | "torch.add(matrix1, matrix2)"
562 | ]
563 | },
564 | {
565 | "cell_type": "code",
566 | "execution_count": 30,
567 | "metadata": {},
568 | "outputs": [
569 | {
570 | "data": {
571 | "text/plain": [
572 | "tensor([[-4., -4.],\n",
573 | " [-4., -4.]])"
574 | ]
575 | },
576 | "execution_count": 30,
577 | "metadata": {},
578 | "output_type": "execute_result"
579 | }
580 | ],
581 | "source": [
582 | "torch.sub(matrix1, matrix2)"
583 | ]
584 | },
585 | {
586 | "cell_type": "code",
587 | "execution_count": 31,
588 | "metadata": {},
589 | "outputs": [
590 | {
591 | "data": {
592 | "text/plain": [
593 | "tensor([[ 5., 12.],\n",
594 | " [21., 32.]])"
595 | ]
596 | },
597 | "execution_count": 31,
598 | "metadata": {},
599 | "output_type": "execute_result"
600 | }
601 | ],
602 | "source": [
603 | "torch.mul(matrix1, matrix2)"
604 | ]
605 | },
606 | {
607 | "cell_type": "code",
608 | "execution_count": 32,
609 | "metadata": {},
610 | "outputs": [
611 | {
612 | "data": {
613 | "text/plain": [
614 | "tensor([[0.2000, 0.3333],\n",
615 | " [0.4286, 0.5000]])"
616 | ]
617 | },
618 | "execution_count": 32,
619 | "metadata": {},
620 | "output_type": "execute_result"
621 | }
622 | ],
623 | "source": [
624 | "torch.div(matrix1, matrix2)"
625 | ]
626 | },
627 | {
628 | "cell_type": "code",
629 | "execution_count": 33,
630 | "metadata": {},
631 | "outputs": [
632 | {
633 | "data": {
634 | "text/plain": [
635 | "tensor([[19., 22.],\n",
636 | " [43., 50.]])"
637 | ]
638 | },
639 | "execution_count": 33,
640 | "metadata": {},
641 | "output_type": "execute_result"
642 | }
643 | ],
644 | "source": [
645 | "torch.matmul(matrix1, matrix2)"
646 | ]
647 | },
648 | {
649 | "cell_type": "markdown",
650 | "metadata": {},
651 | "source": [
652 | "# Tensor"
653 | ]
654 | },
655 | {
656 | "cell_type": "code",
657 | "execution_count": 34,
658 | "metadata": {},
659 | "outputs": [
660 | {
661 | "name": "stdout",
662 | "output_type": "stream",
663 | "text": [
664 | "tensor([[[1., 2.],\n",
665 | " [3., 4.]],\n",
666 | "\n",
667 | " [[5., 6.],\n",
668 | " [7., 8.]]])\n"
669 | ]
670 | }
671 | ],
672 | "source": [
673 | "tensor1 = torch.tensor([[[1., 2.], [3., 4.]], [[5., 6.], [7., 8.]]])\n",
674 | "print(tensor1)"
675 | ]
676 | },
677 | {
678 | "cell_type": "code",
679 | "execution_count": 35,
680 | "metadata": {},
681 | "outputs": [
682 | {
683 | "name": "stdout",
684 | "output_type": "stream",
685 | "text": [
686 | "tensor([[[ 9., 10.],\n",
687 | " [11., 12.]],\n",
688 | "\n",
689 | " [[13., 14.],\n",
690 | " [15., 16.]]])\n"
691 | ]
692 | }
693 | ],
694 | "source": [
695 | "tensor2 = torch.tensor([[[9., 10.], [11., 12.]], [[13., 14.], [15., 16.]]])\n",
696 | "print(tensor2)"
697 | ]
698 | },
699 | {
700 | "cell_type": "code",
701 | "execution_count": 36,
702 | "metadata": {},
703 | "outputs": [
704 | {
705 | "name": "stdout",
706 | "output_type": "stream",
707 | "text": [
708 | "tensor([[[10., 12.],\n",
709 | " [14., 16.]],\n",
710 | "\n",
711 | " [[18., 20.],\n",
712 | " [22., 24.]]])\n"
713 | ]
714 | }
715 | ],
716 | "source": [
717 | "sum_tensor = tensor1 + tensor2\n",
718 | "print(sum_tensor)"
719 | ]
720 | },
721 | {
722 | "cell_type": "code",
723 | "execution_count": 37,
724 | "metadata": {},
725 | "outputs": [
726 | {
727 | "name": "stdout",
728 | "output_type": "stream",
729 | "text": [
730 | "tensor([[[-8., -8.],\n",
731 | " [-8., -8.]],\n",
732 | "\n",
733 | " [[-8., -8.],\n",
734 | " [-8., -8.]]])\n"
735 | ]
736 | }
737 | ],
738 | "source": [
739 | "sub_tensor = tensor1 - tensor2\n",
740 | "print(sub_tensor)"
741 | ]
742 | },
743 | {
744 | "cell_type": "code",
745 | "execution_count": 38,
746 | "metadata": {},
747 | "outputs": [
748 | {
749 | "name": "stdout",
750 | "output_type": "stream",
751 | "text": [
752 | "tensor([[[ 9., 20.],\n",
753 | " [ 33., 48.]],\n",
754 | "\n",
755 | " [[ 65., 84.],\n",
756 | " [105., 128.]]])\n"
757 | ]
758 | }
759 | ],
760 | "source": [
761 | "mul_tensor = tensor1 * tensor2\n",
762 | "print(mul_tensor)"
763 | ]
764 | },
765 | {
766 | "cell_type": "code",
767 | "execution_count": 39,
768 | "metadata": {},
769 | "outputs": [
770 | {
771 | "name": "stdout",
772 | "output_type": "stream",
773 | "text": [
774 | "tensor([[[0.1111, 0.2000],\n",
775 | " [0.2727, 0.3333]],\n",
776 | "\n",
777 | " [[0.3846, 0.4286],\n",
778 | " [0.4667, 0.5000]]])\n"
779 | ]
780 | }
781 | ],
782 | "source": [
783 | "div_tensor = tensor1 / tensor2\n",
784 | "print(div_tensor)"
785 | ]
786 | },
787 | {
788 | "cell_type": "code",
789 | "execution_count": 40,
790 | "metadata": {},
791 | "outputs": [
792 | {
793 | "data": {
794 | "text/plain": [
795 | "tensor([[[ 31., 34.],\n",
796 | " [ 71., 78.]],\n",
797 | "\n",
798 | " [[155., 166.],\n",
799 | " [211., 226.]]])"
800 | ]
801 | },
802 | "execution_count": 40,
803 | "metadata": {},
804 | "output_type": "execute_result"
805 | }
806 | ],
807 | "source": [
808 | "torch.matmul(tensor1, tensor2)"
809 | ]
810 | },
811 | {
812 | "cell_type": "code",
813 | "execution_count": 41,
814 | "metadata": {},
815 | "outputs": [
816 | {
817 | "data": {
818 | "text/plain": [
819 | "tensor([[[10., 12.],\n",
820 | " [14., 16.]],\n",
821 | "\n",
822 | " [[18., 20.],\n",
823 | " [22., 24.]]])"
824 | ]
825 | },
826 | "execution_count": 41,
827 | "metadata": {},
828 | "output_type": "execute_result"
829 | }
830 | ],
831 | "source": [
832 | "torch.add(tensor1, tensor2)"
833 | ]
834 | },
835 | {
836 | "cell_type": "code",
837 | "execution_count": 42,
838 | "metadata": {},
839 | "outputs": [
840 | {
841 | "data": {
842 | "text/plain": [
843 | "tensor([[[-8., -8.],\n",
844 | " [-8., -8.]],\n",
845 | "\n",
846 | " [[-8., -8.],\n",
847 | " [-8., -8.]]])"
848 | ]
849 | },
850 | "execution_count": 42,
851 | "metadata": {},
852 | "output_type": "execute_result"
853 | }
854 | ],
855 | "source": [
856 | "torch.sub(tensor1, tensor2)"
857 | ]
858 | },
859 | {
860 | "cell_type": "code",
861 | "execution_count": 43,
862 | "metadata": {},
863 | "outputs": [
864 | {
865 | "data": {
866 | "text/plain": [
867 | "tensor([[[ 9., 20.],\n",
868 | " [ 33., 48.]],\n",
869 | "\n",
870 | " [[ 65., 84.],\n",
871 | " [105., 128.]]])"
872 | ]
873 | },
874 | "execution_count": 43,
875 | "metadata": {},
876 | "output_type": "execute_result"
877 | }
878 | ],
879 | "source": [
880 | "torch.mul(tensor1, tensor2)"
881 | ]
882 | },
883 | {
884 | "cell_type": "code",
885 | "execution_count": 44,
886 | "metadata": {},
887 | "outputs": [
888 | {
889 | "data": {
890 | "text/plain": [
891 | "tensor([[[0.1111, 0.2000],\n",
892 | " [0.2727, 0.3333]],\n",
893 | "\n",
894 | " [[0.3846, 0.4286],\n",
895 | " [0.4667, 0.5000]]])"
896 | ]
897 | },
898 | "execution_count": 44,
899 | "metadata": {},
900 | "output_type": "execute_result"
901 | }
902 | ],
903 | "source": [
904 | "torch.div(tensor1, tensor2)"
905 | ]
906 | },
907 | {
908 | "cell_type": "code",
909 | "execution_count": 45,
910 | "metadata": {},
911 | "outputs": [
912 | {
913 | "data": {
914 | "text/plain": [
915 | "tensor([[[1., 2.],\n",
916 | " [3., 4.]],\n",
917 | "\n",
918 | " [[5., 6.],\n",
919 | " [7., 8.]]])"
920 | ]
921 | },
922 | "execution_count": 45,
923 | "metadata": {},
924 | "output_type": "execute_result"
925 | }
926 | ],
927 | "source": [
928 | "tensor1"
929 | ]
930 | },
931 | {
932 | "cell_type": "code",
933 | "execution_count": 46,
934 | "metadata": {},
935 | "outputs": [
936 | {
937 | "data": {
938 | "text/plain": [
939 | "tensor([[[ 9., 10.],\n",
940 | " [11., 12.]],\n",
941 | "\n",
942 | " [[13., 14.],\n",
943 | " [15., 16.]]])"
944 | ]
945 | },
946 | "execution_count": 46,
947 | "metadata": {},
948 | "output_type": "execute_result"
949 | }
950 | ],
951 | "source": [
952 | "tensor2"
953 | ]
954 | },
955 | {
956 | "cell_type": "code",
957 | "execution_count": 47,
958 | "metadata": {},
959 | "outputs": [
960 | {
961 | "data": {
962 | "text/plain": [
963 | "tensor([[[ 31., 34.],\n",
964 | " [ 71., 78.]],\n",
965 | "\n",
966 | " [[155., 166.],\n",
967 | " [211., 226.]]])"
968 | ]
969 | },
970 | "execution_count": 47,
971 | "metadata": {},
972 | "output_type": "execute_result"
973 | }
974 | ],
975 | "source": [
976 | "torch.matmul(tensor1, tensor2)"
977 | ]
978 | },
979 | {
980 | "cell_type": "code",
981 | "execution_count": 48,
982 | "metadata": {},
983 | "outputs": [
984 | {
985 | "name": "stdout",
986 | "output_type": "stream",
987 | "text": [
988 | "Iteration: 100 \t Loss: 527.76318359375\n",
989 | "Iteration: 200 \t Loss: 3.209841728210449\n",
990 | "Iteration: 300 \t Loss: 0.03574322536587715\n",
991 | "Iteration: 400 \t Loss: 0.0007254641968756914\n",
992 | "Iteration: 500 \t Loss: 8.230483217630535e-05\n"
993 | ]
994 | }
995 | ],
996 | "source": [
997 | "import torch\n",
998 | "\n",
999 | "if torch.cuda.is_available():\n",
1000 | " DEVICE = torch.device('cuda')\n",
1001 | "else:\n",
1002 | " DEVICE = torch.device('cpu')\n",
1003 | "\n",
1004 | "BATCH_SIZE = 64\n",
1005 | "INPUT_SIZE = 1000\n",
1006 | "HIDDEN_SIZE = 100\n",
1007 | "OUTPUT_SIZE = 10\n",
1008 | "\n",
1009 | "x = torch.randn(BATCH_SIZE, \n",
1010 | " INPUT_SIZE, \n",
1011 | " device = DEVICE, \n",
1012 | " dtype = torch.float, \n",
1013 | " requires_grad = False) \n",
1014 | "\n",
1015 | "y = torch.randn(BATCH_SIZE, \n",
1016 | " OUTPUT_SIZE, \n",
1017 | " device = DEVICE,\n",
1018 | " dtype = torch.float, \n",
1019 | " requires_grad = False) \n",
1020 | "\n",
1021 | "w1 = torch.randn(INPUT_SIZE, \n",
1022 | " HIDDEN_SIZE, \n",
1023 | " device = DEVICE, \n",
1024 | " dtype = torch.float,\n",
1025 | " requires_grad = True) \n",
1026 | "\n",
1027 | "w2 = torch.randn(HIDDEN_SIZE,\n",
1028 | " OUTPUT_SIZE, \n",
1029 | " device = DEVICE,\n",
1030 | " dtype = torch.float,\n",
1031 | " requires_grad = True) \n",
1032 | "\n",
1033 | "learning_rate = 1e-6 \n",
1034 | "for t in range(1, 501): \n",
1035 | " y_pred = x.mm(w1).clamp(min = 0).mm(w2) \n",
1036 | "\n",
1037 | " loss = (y_pred - y).pow(2).sum() \n",
1038 | " if t % 100 == 0:\n",
1039 | " print(\"Iteration: \", t, \"\\t\", \"Loss: \", loss.item()) \n",
1040 | " loss.backward() \n",
1041 | "\n",
1042 | " with torch.no_grad(): \n",
1043 | " w1 -= learning_rate * w1.grad \n",
1044 | " w2 -= learning_rate * w2.grad \n",
1045 | "\n",
1046 | " w1.grad.zero_() \n",
1047 | " w2.grad.zero_() "
1048 | ]
1049 | },
1050 | {
1051 | "cell_type": "code",
1052 | "execution_count": 49,
1053 | "metadata": {},
1054 | "outputs": [],
1055 | "source": [
1056 | "# Iteration: 100 \t Loss: 976.0719604492188\n",
1057 | "# Iteration: 200 \t Loss: 5.061180114746094\n",
1058 | "# Iteration: 300 \t Loss: 0.04102771729230881\n",
1059 | "# Iteration: 400 \t Loss: 0.0006718397489748895\n",
1060 | "# Iteration: 500 \t Loss: 6.94335249136202e-05"
1061 | ]
1062 | }
1063 | ],
1064 | "metadata": {
1065 | "kernelspec": {
1066 | "display_name": "Python 3",
1067 | "language": "python",
1068 | "name": "python3"
1069 | },
1070 | "language_info": {
1071 | "codemirror_mode": {
1072 | "name": "ipython",
1073 | "version": 3
1074 | },
1075 | "file_extension": ".py",
1076 | "mimetype": "text/x-python",
1077 | "name": "python",
1078 | "nbconvert_exporter": "python",
1079 | "pygments_lexer": "ipython3",
1080 | "version": "3.6.8"
1081 | }
1082 | },
1083 | "nbformat": 4,
1084 | "nbformat_minor": 4
1085 | }
1086 |
--------------------------------------------------------------------------------
/3-5_MNIST_MLP_Dropout_ReLU_BN_HE_Adam.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "''' 1. Module Import '''\n",
10 | "import numpy as np\n",
11 | "import matplotlib.pyplot as plt\n",
12 | "\n",
13 | "import torch\n",
14 | "import torch.nn as nn\n",
15 | "import torch.nn.functional as F\n",
16 | "from torchvision import transforms, datasets"
17 | ]
18 | },
19 | {
20 | "cell_type": "code",
21 | "execution_count": 2,
22 | "metadata": {},
23 | "outputs": [
24 | {
25 | "name": "stdout",
26 | "output_type": "stream",
27 | "text": [
28 | "Using PyTorch version: 1.6.0+cu101 Device: cuda\n"
29 | ]
30 | }
31 | ],
32 | "source": [
33 | "''' 2. 딥러닝 모델을 설계할 때 활용하는 장비 확인 '''\n",
34 | "if torch.cuda.is_available():\n",
35 | " DEVICE = torch.device('cuda')\n",
36 | "else:\n",
37 | " DEVICE = torch.device('cpu')\n",
38 | "print('Using PyTorch version:', torch.__version__, ' Device:', DEVICE)"
39 | ]
40 | },
41 | {
42 | "cell_type": "code",
43 | "execution_count": 3,
44 | "metadata": {},
45 | "outputs": [],
46 | "source": [
47 | "BATCH_SIZE = 32\n",
48 | "EPOCHS = 10"
49 | ]
50 | },
51 | {
52 | "cell_type": "code",
53 | "execution_count": 4,
54 | "metadata": {},
55 | "outputs": [],
56 | "source": [
57 | "''' 3. MNIST 데이터 다운로드 (Train set, Test set 분리하기) '''\n",
58 | "train_dataset = datasets.MNIST(root = \"../data/MNIST\",\n",
59 | " train = True,\n",
60 | " download = True,\n",
61 | " transform = transforms.ToTensor())\n",
62 | "\n",
63 | "test_dataset = datasets.MNIST(root = \"../data/MNIST\",\n",
64 | " train = False,\n",
65 | " transform = transforms.ToTensor())\n",
66 | "\n",
67 | "train_loader = torch.utils.data.DataLoader(dataset = train_dataset,\n",
68 | " batch_size = BATCH_SIZE,\n",
69 | " shuffle = True)\n",
70 | "\n",
71 | "test_loader = torch.utils.data.DataLoader(dataset = test_dataset,\n",
72 | " batch_size = BATCH_SIZE,\n",
73 | " shuffle = False)"
74 | ]
75 | },
76 | {
77 | "cell_type": "code",
78 | "execution_count": 5,
79 | "metadata": {},
80 | "outputs": [
81 | {
82 | "name": "stdout",
83 | "output_type": "stream",
84 | "text": [
85 | "X_train: torch.Size([32, 1, 28, 28]) type: torch.FloatTensor\n",
86 | "y_train: torch.Size([32]) type: torch.LongTensor\n"
87 | ]
88 | }
89 | ],
90 | "source": [
91 | "''' 4. 데이터 확인하기 (1) '''\n",
92 | "for (X_train, y_train) in train_loader:\n",
93 | " print('X_train:', X_train.size(), 'type:', X_train.type())\n",
94 | " print('y_train:', y_train.size(), 'type:', y_train.type())\n",
95 | " break"
96 | ]
97 | },
98 | {
99 | "cell_type": "code",
100 | "execution_count": 6,
101 | "metadata": {},
102 | "outputs": [
103 | {
104 | "data": {
105 | "image/png": "\n",
106 | "text/plain": [
107 | ""
108 | ]
109 | },
110 | "metadata": {
111 | "needs_background": "light"
112 | },
113 | "output_type": "display_data"
114 | }
115 | ],
116 | "source": [
117 | "''' 5. 데이터 확인하기 (2) '''\n",
118 | "pltsize = 1\n",
119 | "plt.figure(figsize=(10 * pltsize, pltsize))\n",
120 | "for i in range(10):\n",
121 | " plt.subplot(1, 10, i + 1)\n",
122 | " plt.axis('off')\n",
123 | " plt.imshow(X_train[i, :, :, :].numpy().reshape(28, 28), cmap = \"gray_r\")\n",
124 | " plt.title('Class: ' + str(y_train[i].item()))"
125 | ]
126 | },
127 | {
128 | "cell_type": "code",
129 | "execution_count": 7,
130 | "metadata": {},
131 | "outputs": [],
132 | "source": [
133 | "''' 6. Multi Layer Perceptron (MLP) 모델 설계하기 '''\n",
134 | "class Net(nn.Module):\n",
135 | " def __init__(self):\n",
136 | " super(Net, self).__init__()\n",
137 | " self.fc1 = nn.Linear(28 * 28, 512)\n",
138 | " self.fc2 = nn.Linear(512, 256)\n",
139 | " self.fc3 = nn.Linear(256, 10)\n",
140 | " self.dropout_prob = 0.5\n",
141 | " self.batch_norm1 = nn.BatchNorm1d(512)\n",
142 | " self.batch_norm2 = nn.BatchNorm1d(256)\n",
143 | "\n",
144 | " def forward(self, x):\n",
145 | " x = x.view(-1, 28 * 28)\n",
146 | " x = self.fc1(x)\n",
147 | " x = self.batch_norm1(x)\n",
148 | " x = F.relu(x)\n",
149 | " x = F.dropout(x, training = self.training, p = self.dropout_prob)\n",
150 | " x = self.fc2(x)\n",
151 | " x = self.batch_norm2(x)\n",
152 | " x = F.relu(x)\n",
153 | " x = F.dropout(x, training = self.training, p = self.dropout_prob)\n",
154 | " x = self.fc3(x)\n",
155 | " x = F.log_softmax(x, dim = 1)\n",
156 | " return x"
157 | ]
158 | },
159 | {
160 | "cell_type": "code",
161 | "execution_count": 8,
162 | "metadata": {},
163 | "outputs": [
164 | {
165 | "name": "stdout",
166 | "output_type": "stream",
167 | "text": [
168 | "Net(\n",
169 | " (fc1): Linear(in_features=784, out_features=512, bias=True)\n",
170 | " (fc2): Linear(in_features=512, out_features=256, bias=True)\n",
171 | " (fc3): Linear(in_features=256, out_features=10, bias=True)\n",
172 | " (batch_norm1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
173 | " (batch_norm2): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
174 | ")\n"
175 | ]
176 | }
177 | ],
178 | "source": [
179 | "''' 7. Optimizer, Objective Function 설정하기 '''\n",
180 | "import torch.nn.init as init\n",
181 | "def weight_init(m):\n",
182 | " if isinstance(m, nn.Linear):\n",
183 | " init.kaiming_uniform_(m.weight.data)\n",
184 | "\n",
185 | "model = Net().to(DEVICE)\n",
186 | "model.apply(weight_init)\n",
187 | "optimizer = torch.optim.Adam(model.parameters(), lr = 0.01)\n",
188 | "criterion = nn.CrossEntropyLoss()\n",
189 | "\n",
190 | "print(model)"
191 | ]
192 | },
193 | {
194 | "cell_type": "code",
195 | "execution_count": 9,
196 | "metadata": {},
197 | "outputs": [],
198 | "source": [
199 | "''' 8. MLP 모델 학습을 진행하며 학습 데이터에 대한 모델 성능을 확인하는 함수 정의 '''\n",
200 | "def train(model, train_loader, optimizer, log_interval):\n",
201 | " model.train()\n",
202 | " for batch_idx, (image, label) in enumerate(train_loader):\n",
203 | " image = image.to(DEVICE)\n",
204 | " label = label.to(DEVICE)\n",
205 | " optimizer.zero_grad()\n",
206 | " output = model(image)\n",
207 | " loss = criterion(output, label)\n",
208 | " loss.backward()\n",
209 | " optimizer.step()\n",
210 | "\n",
211 | " if batch_idx % log_interval == 0:\n",
212 | " print(\"Train Epoch: {} [{}/{} ({:.0f}%)]\\tTrain Loss: {:.6f}\".format(\n",
213 | " epoch, batch_idx * len(image), \n",
214 | " len(train_loader.dataset), 100. * batch_idx / len(train_loader), \n",
215 | " loss.item()))"
216 | ]
217 | },
218 | {
219 | "cell_type": "code",
220 | "execution_count": 10,
221 | "metadata": {},
222 | "outputs": [],
223 | "source": [
224 | "''' 9. 학습되는 과정 속에서 검증 데이터에 대한 모델 성능을 확인하는 함수 정의 '''\n",
225 | "def evaluate(model, test_loader):\n",
226 | " model.eval()\n",
227 | " test_loss = 0\n",
228 | " correct = 0\n",
229 | "\n",
230 | " with torch.no_grad():\n",
231 | " for image, label in test_loader:\n",
232 | " image = image.to(DEVICE)\n",
233 | " label = label.to(DEVICE)\n",
234 | " output = model(image)\n",
235 | " test_loss += criterion(output, label).item()\n",
236 | " prediction = output.max(1, keepdim = True)[1]\n",
237 | " correct += prediction.eq(label.view_as(prediction)).sum().item()\n",
238 | " \n",
239 | " test_loss /= (len(test_loader.dataset) / BATCH_SIZE)\n",
240 | " test_accuracy = 100. * correct / len(test_loader.dataset)\n",
241 | " return test_loss, test_accuracy"
242 | ]
243 | },
244 | {
245 | "cell_type": "code",
246 | "execution_count": 11,
247 | "metadata": {},
248 | "outputs": [
249 | {
250 | "name": "stdout",
251 | "output_type": "stream",
252 | "text": [
253 | "Train Epoch: 1 [0/60000 (0%)]\tTrain Loss: 3.199550\n",
254 | "Train Epoch: 1 [6400/60000 (11%)]\tTrain Loss: 0.513148\n",
255 | "Train Epoch: 1 [12800/60000 (21%)]\tTrain Loss: 0.923122\n",
256 | "Train Epoch: 1 [19200/60000 (32%)]\tTrain Loss: 0.233807\n",
257 | "Train Epoch: 1 [25600/60000 (43%)]\tTrain Loss: 0.355385\n",
258 | "Train Epoch: 1 [32000/60000 (53%)]\tTrain Loss: 0.256300\n",
259 | "Train Epoch: 1 [38400/60000 (64%)]\tTrain Loss: 0.213376\n",
260 | "Train Epoch: 1 [44800/60000 (75%)]\tTrain Loss: 0.442577\n",
261 | "Train Epoch: 1 [51200/60000 (85%)]\tTrain Loss: 0.195819\n",
262 | "Train Epoch: 1 [57600/60000 (96%)]\tTrain Loss: 0.126851\n",
263 | "\n",
264 | "[EPOCH: 1], \tTest Loss: 0.0043, \tTest Accuracy: 95.85 % \n",
265 | "\n",
266 | "Train Epoch: 2 [0/60000 (0%)]\tTrain Loss: 0.416017\n",
267 | "Train Epoch: 2 [6400/60000 (11%)]\tTrain Loss: 0.328205\n",
268 | "Train Epoch: 2 [12800/60000 (21%)]\tTrain Loss: 0.197520\n",
269 | "Train Epoch: 2 [19200/60000 (32%)]\tTrain Loss: 0.068057\n",
270 | "Train Epoch: 2 [25600/60000 (43%)]\tTrain Loss: 0.609262\n",
271 | "Train Epoch: 2 [32000/60000 (53%)]\tTrain Loss: 0.211904\n",
272 | "Train Epoch: 2 [38400/60000 (64%)]\tTrain Loss: 0.098402\n",
273 | "Train Epoch: 2 [44800/60000 (75%)]\tTrain Loss: 0.043566\n",
274 | "Train Epoch: 2 [51200/60000 (85%)]\tTrain Loss: 0.084222\n",
275 | "Train Epoch: 2 [57600/60000 (96%)]\tTrain Loss: 0.069517\n",
276 | "\n",
277 | "[EPOCH: 2], \tTest Loss: 0.0035, \tTest Accuracy: 96.63 % \n",
278 | "\n",
279 | "Train Epoch: 3 [0/60000 (0%)]\tTrain Loss: 0.120982\n",
280 | "Train Epoch: 3 [6400/60000 (11%)]\tTrain Loss: 0.034953\n",
281 | "Train Epoch: 3 [12800/60000 (21%)]\tTrain Loss: 0.106771\n",
282 | "Train Epoch: 3 [19200/60000 (32%)]\tTrain Loss: 0.325835\n",
283 | "Train Epoch: 3 [25600/60000 (43%)]\tTrain Loss: 0.071694\n",
284 | "Train Epoch: 3 [32000/60000 (53%)]\tTrain Loss: 0.203136\n",
285 | "Train Epoch: 3 [38400/60000 (64%)]\tTrain Loss: 0.087571\n",
286 | "Train Epoch: 3 [44800/60000 (75%)]\tTrain Loss: 0.578591\n",
287 | "Train Epoch: 3 [51200/60000 (85%)]\tTrain Loss: 0.058942\n",
288 | "Train Epoch: 3 [57600/60000 (96%)]\tTrain Loss: 0.083058\n",
289 | "\n",
290 | "[EPOCH: 3], \tTest Loss: 0.0030, \tTest Accuracy: 97.25 % \n",
291 | "\n",
292 | "Train Epoch: 4 [0/60000 (0%)]\tTrain Loss: 0.292411\n",
293 | "Train Epoch: 4 [6400/60000 (11%)]\tTrain Loss: 0.265194\n",
294 | "Train Epoch: 4 [12800/60000 (21%)]\tTrain Loss: 0.247585\n",
295 | "Train Epoch: 4 [19200/60000 (32%)]\tTrain Loss: 0.247720\n",
296 | "Train Epoch: 4 [25600/60000 (43%)]\tTrain Loss: 0.201436\n",
297 | "Train Epoch: 4 [32000/60000 (53%)]\tTrain Loss: 0.208042\n",
298 | "Train Epoch: 4 [38400/60000 (64%)]\tTrain Loss: 0.104799\n",
299 | "Train Epoch: 4 [44800/60000 (75%)]\tTrain Loss: 0.095104\n",
300 | "Train Epoch: 4 [51200/60000 (85%)]\tTrain Loss: 0.321322\n",
301 | "Train Epoch: 4 [57600/60000 (96%)]\tTrain Loss: 0.575262\n",
302 | "\n",
303 | "[EPOCH: 4], \tTest Loss: 0.0027, \tTest Accuracy: 97.41 % \n",
304 | "\n",
305 | "Train Epoch: 5 [0/60000 (0%)]\tTrain Loss: 0.342820\n",
306 | "Train Epoch: 5 [6400/60000 (11%)]\tTrain Loss: 0.028762\n",
307 | "Train Epoch: 5 [12800/60000 (21%)]\tTrain Loss: 0.215698\n",
308 | "Train Epoch: 5 [19200/60000 (32%)]\tTrain Loss: 0.226103\n",
309 | "Train Epoch: 5 [25600/60000 (43%)]\tTrain Loss: 0.128261\n",
310 | "Train Epoch: 5 [32000/60000 (53%)]\tTrain Loss: 0.232673\n",
311 | "Train Epoch: 5 [38400/60000 (64%)]\tTrain Loss: 0.109379\n",
312 | "Train Epoch: 5 [44800/60000 (75%)]\tTrain Loss: 0.319595\n",
313 | "Train Epoch: 5 [51200/60000 (85%)]\tTrain Loss: 0.329783\n",
314 | "Train Epoch: 5 [57600/60000 (96%)]\tTrain Loss: 0.254121\n",
315 | "\n",
316 | "[EPOCH: 5], \tTest Loss: 0.0026, \tTest Accuracy: 97.50 % \n",
317 | "\n",
318 | "Train Epoch: 6 [0/60000 (0%)]\tTrain Loss: 0.168219\n",
319 | "Train Epoch: 6 [6400/60000 (11%)]\tTrain Loss: 0.187313\n",
320 | "Train Epoch: 6 [12800/60000 (21%)]\tTrain Loss: 0.226820\n",
321 | "Train Epoch: 6 [19200/60000 (32%)]\tTrain Loss: 0.088462\n",
322 | "Train Epoch: 6 [25600/60000 (43%)]\tTrain Loss: 0.336217\n",
323 | "Train Epoch: 6 [32000/60000 (53%)]\tTrain Loss: 0.395337\n",
324 | "Train Epoch: 6 [38400/60000 (64%)]\tTrain Loss: 0.034127\n",
325 | "Train Epoch: 6 [44800/60000 (75%)]\tTrain Loss: 0.181321\n",
326 | "Train Epoch: 6 [51200/60000 (85%)]\tTrain Loss: 0.183680\n",
327 | "Train Epoch: 6 [57600/60000 (96%)]\tTrain Loss: 0.409360\n",
328 | "\n",
329 | "[EPOCH: 6], \tTest Loss: 0.0024, \tTest Accuracy: 97.73 % \n",
330 | "\n",
331 | "Train Epoch: 7 [0/60000 (0%)]\tTrain Loss: 0.041711\n",
332 | "Train Epoch: 7 [6400/60000 (11%)]\tTrain Loss: 0.208485\n",
333 | "Train Epoch: 7 [12800/60000 (21%)]\tTrain Loss: 0.121613\n",
334 | "Train Epoch: 7 [19200/60000 (32%)]\tTrain Loss: 0.011681\n",
335 | "Train Epoch: 7 [25600/60000 (43%)]\tTrain Loss: 0.573379\n",
336 | "Train Epoch: 7 [32000/60000 (53%)]\tTrain Loss: 0.144857\n",
337 | "Train Epoch: 7 [38400/60000 (64%)]\tTrain Loss: 0.083845\n",
338 | "Train Epoch: 7 [44800/60000 (75%)]\tTrain Loss: 0.326227\n",
339 | "Train Epoch: 7 [51200/60000 (85%)]\tTrain Loss: 0.051569\n",
340 | "Train Epoch: 7 [57600/60000 (96%)]\tTrain Loss: 0.037317\n",
341 | "\n",
342 | "[EPOCH: 7], \tTest Loss: 0.0023, \tTest Accuracy: 97.89 % \n",
343 | "\n",
344 | "Train Epoch: 8 [0/60000 (0%)]\tTrain Loss: 0.041605\n",
345 | "Train Epoch: 8 [6400/60000 (11%)]\tTrain Loss: 0.199083\n",
346 | "Train Epoch: 8 [12800/60000 (21%)]\tTrain Loss: 0.359119\n",
347 | "Train Epoch: 8 [19200/60000 (32%)]\tTrain Loss: 0.054514\n",
348 | "Train Epoch: 8 [25600/60000 (43%)]\tTrain Loss: 0.007979\n",
349 | "Train Epoch: 8 [32000/60000 (53%)]\tTrain Loss: 0.043286\n",
350 | "Train Epoch: 8 [38400/60000 (64%)]\tTrain Loss: 0.201188\n",
351 | "Train Epoch: 8 [44800/60000 (75%)]\tTrain Loss: 0.095515\n",
352 | "Train Epoch: 8 [51200/60000 (85%)]\tTrain Loss: 0.122469\n",
353 | "Train Epoch: 8 [57600/60000 (96%)]\tTrain Loss: 0.061429\n",
354 | "\n",
355 | "[EPOCH: 8], \tTest Loss: 0.0024, \tTest Accuracy: 97.64 % \n",
356 | "\n",
357 | "Train Epoch: 9 [0/60000 (0%)]\tTrain Loss: 0.152112\n",
358 | "Train Epoch: 9 [6400/60000 (11%)]\tTrain Loss: 0.136112\n",
359 | "Train Epoch: 9 [12800/60000 (21%)]\tTrain Loss: 0.071809\n",
360 | "Train Epoch: 9 [19200/60000 (32%)]\tTrain Loss: 0.021181\n",
361 | "Train Epoch: 9 [25600/60000 (43%)]\tTrain Loss: 0.035105\n",
362 | "Train Epoch: 9 [32000/60000 (53%)]\tTrain Loss: 0.632621\n",
363 | "Train Epoch: 9 [38400/60000 (64%)]\tTrain Loss: 0.077700\n",
364 | "Train Epoch: 9 [44800/60000 (75%)]\tTrain Loss: 0.144102\n",
365 | "Train Epoch: 9 [51200/60000 (85%)]\tTrain Loss: 0.061276\n",
366 | "Train Epoch: 9 [57600/60000 (96%)]\tTrain Loss: 0.066728\n",
367 | "\n",
368 | "[EPOCH: 9], \tTest Loss: 0.0024, \tTest Accuracy: 97.69 % \n",
369 | "\n",
370 | "Train Epoch: 10 [0/60000 (0%)]\tTrain Loss: 0.429119\n",
371 | "Train Epoch: 10 [6400/60000 (11%)]\tTrain Loss: 0.043460\n",
372 | "Train Epoch: 10 [12800/60000 (21%)]\tTrain Loss: 0.073910\n",
373 | "Train Epoch: 10 [19200/60000 (32%)]\tTrain Loss: 0.212445\n",
374 | "Train Epoch: 10 [25600/60000 (43%)]\tTrain Loss: 0.062954\n",
375 | "Train Epoch: 10 [32000/60000 (53%)]\tTrain Loss: 0.282957\n",
376 | "Train Epoch: 10 [38400/60000 (64%)]\tTrain Loss: 0.425912\n",
377 | "Train Epoch: 10 [44800/60000 (75%)]\tTrain Loss: 0.247884\n",
378 | "Train Epoch: 10 [51200/60000 (85%)]\tTrain Loss: 0.166647\n",
379 | "Train Epoch: 10 [57600/60000 (96%)]\tTrain Loss: 0.081299\n",
380 | "\n",
381 | "[EPOCH: 10], \tTest Loss: 0.0021, \tTest Accuracy: 97.97 % \n",
382 | "\n"
383 | ]
384 | }
385 | ],
386 | "source": [
387 | "''' 10. MLP 학습 실행하며 Train, Test set의 Loss 및 Test set Accuracy 확인하기 '''\n",
388 | "for epoch in range(1, EPOCHS + 1):\n",
389 | " train(model, train_loader, optimizer, log_interval = 200)\n",
390 | " test_loss, test_accuracy = evaluate(model, test_loader)\n",
391 | " print(\"\\n[EPOCH: {}], \\tTest Loss: {:.4f}, \\tTest Accuracy: {:.2f} % \\n\".format(\n",
392 | " epoch, test_loss, test_accuracy))"
393 | ]
394 | }
395 | ],
396 | "metadata": {
397 | "kernelspec": {
398 | "display_name": "Python 3",
399 | "language": "python",
400 | "name": "python3"
401 | },
402 | "language_info": {
403 | "codemirror_mode": {
404 | "name": "ipython",
405 | "version": 3
406 | },
407 | "file_extension": ".py",
408 | "mimetype": "text/x-python",
409 | "name": "python",
410 | "nbconvert_exporter": "python",
411 | "pygments_lexer": "ipython3",
412 | "version": "3.6.8"
413 | }
414 | },
415 | "nbformat": 4,
416 | "nbformat_minor": 4
417 | }
418 |
--------------------------------------------------------------------------------
/5_NLP_v2.ipynb:
--------------------------------------------------------------------------------
1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"accelerator":"GPU","colab":{"name":"5_NLP_v2.ipynb","provenance":[],"collapsed_sections":[]},"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.6.8"}},"cells":[{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"2XWOHqwyv9Kn","executionInfo":{"elapsed":811,"status":"ok","timestamp":1647777651500,"user":{"displayName":"seongsu bang","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"12261094723698722963"},"user_tz":-540},"outputId":"96d225df-49cb-4d65-9e8c-97a3123055af"},"source":["# ! pip list | grep \"torch\"\n","# torch 1.10.0+cu111\n","# torchaudio 0.10.0+cu111\n","# torchsummary 1.5.1\n","# torchtext 0.11.0\n","# torchvision 0.11.1+cu111\n","\n","https://pytorch.org/tutorials/beginner/text_sentiment_ngrams_tutorial.html\n","https://tutorials.pytorch.kr/beginner/text_sentiment_ngrams_tutorial.html\n","\n","# 해당 코드는 위 페이지를 기반으로 만들어진 코드입니다. \n","# torchtext가 편한 부분도 많지만, 버전 변화에 따른 코드 변화도 많네요. 앞으로도 버전 변경으로 인한 오류가 생길 가능성이 있습니다.\n","# 해당 파일의 리팩토링 피드백이 늦는다면, 위 공식 문서를 참고하여 변경 부분을 확인하시거나 위 library 버전을 사용하시길 권장합니다.\n","\n","# 이 파일에는 from_scratch, recurrent 모델들을 모두 선택해서 학습할 수 있도록 작성 했습니다. \n","# 그 과정에서 코드가 복잡해진 점에 대해 미리 사과드립니다."],"execution_count":1,"outputs":[{"output_type":"stream","name":"stdout","text":["torch 1.10.0+cu111\n","torchaudio 0.10.0+cu111\n","torchsummary 1.5.1\n","torchtext 0.11.0\n","torchvision 0.11.1+cu111\n"]}]},{"cell_type":"code","source":["import os\n","import time\n","import torch\n","from tqdm import tqdm\n","\n","from torch import nn\n","from torch.utils.data import DataLoader\n","from torch.nn.utils.rnn import pad_sequence\n","from torch.utils.data.dataset import random_split\n","\n","from torchtext.datasets import IMDB\n","from torchtext.data.utils import get_tokenizer\n","from torchtext.data.functional import to_map_style_dataset\n","from torchtext.vocab import build_vocab_from_iterator, Vectors\n","\n","\n","\n","config = {'pre_trained' : 'glove', # 'glove','fasttext', None \n"," 'max_length': 300,\n"," 'batch_size': 64,\n"," 'model_type': 'gru', # 'rnn', 'lstm', 'gru','avg_not_pad', None\n"," 'emb_dim' : 300,\n"," 'hidden_dim':128,\n"," 'is_bidirectional':True,\n"," 'epoch' : 15,\n"," 'LR': 5\n"," }\n"],"metadata":{"id":"Nd6V2m4QCJ4G","executionInfo":{"status":"ok","timestamp":1647959741676,"user_tz":-540,"elapsed":230,"user":{"displayName":"seongsu bang","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"12261094723698722963"}}},"execution_count":11,"outputs":[]},{"cell_type":"code","source":["\n","device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n","# device = torch.device(\"cpu\")\n","\n","# Tokenize & Vocab setup\n","tokenizer = get_tokenizer('basic_english')\n","train_iter = IMDB(split='train')\n","def yield_tokens(data_iter):\n"," for _, text in data_iter:\n"," yield tokenizer(text)\n","\n","vocab = build_vocab_from_iterator(yield_tokens(train_iter), min_freq= 2, specials=[\"\", \"\"])\n","vocab.set_default_index(vocab[\"\"]) # This index will be returned when OOV token is queried.\n","\n","num_class = 2\n","vocab_size = len(vocab)\n","idx_pad = vocab.get_stoi()['']"],"metadata":{"id":"zmeuB_A-dhMm","executionInfo":{"status":"ok","timestamp":1647959017797,"user_tz":-540,"elapsed":23064,"user":{"displayName":"seongsu bang","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"12261094723698722963"}}},"execution_count":1,"outputs":[]},{"cell_type":"code","source":["# 사이즈가 작은 파일들만 가져왔습니다. 다른 모델을 써보고 싶다면, 아래 링크를 참고해서 코드를 변경해서 사용하세요.\n","# https://pytorch.org/text/stable/_modules/torchtext/vocab/vectors.html#Vectors\n","\n","if config['pre_trained'] == 'glove':\n"," pretrained_vectors = Vectors(name = 'glove.6B.300d.txt', \n"," # cache = '[my_path]',\n"," url = 'http://nlp.stanford.edu/data/glove.6B.zip')\n"," pretrained_emb = pretrained_vectors.get_vecs_by_tokens(vocab.get_itos(), lower_case_backup=True)\n","\n","elif config['pre_trained'] == 'fasttext':\n"," pretrained_vectors = Vectors(name = 'wiki.simple.vec', \n"," # cache = '[my_path]',\n"," url = 'https://dl.fbaipublicfiles.com/fasttext/vectors-wiki/wiki.simple.vec')\n"," pretrained_emb = pretrained_vectors.get_vecs_by_tokens(vocab.get_itos(), lower_case_backup=True)\n","else:\n"," pass\n"," "],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"Gn-4UlcPo2Sm","executionInfo":{"status":"ok","timestamp":1647959271331,"user_tz":-540,"elapsed":245850,"user":{"displayName":"seongsu bang","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"12261094723698722963"}},"outputId":"dd7d63e7-4ea9-474e-c80b-ddf889fe8bc7"},"execution_count":2,"outputs":[{"output_type":"stream","name":"stderr","text":[".vector_cache/glove.6B.zip: 862MB [02:40, 5.36MB/s] \n","100%|█████████▉| 399999/400000 [00:56<00:00, 7128.51it/s]\n"]}]},{"cell_type":"code","source":["config = {'pre_trained' : 'glove', # 'glove','fasttext', None \n"," 'max_length': 300,\n"," 'batch_size': 64,\n"," 'model_type': 'lstm', # 'rnn', 'lstm', 'gru','avg_not_pad', None\n"," 'emb_dim' : 300,\n"," 'hidden_dim':128,\n"," 'is_bidirectional':True,\n"," 'epoch' : 10,\n"," 'LR': 5\n"," }"],"metadata":{"id":"qEFzPiWFIIZC","executionInfo":{"status":"ok","timestamp":1647961775746,"user_tz":-540,"elapsed":325,"user":{"displayName":"seongsu bang","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"12261094723698722963"}}},"execution_count":39,"outputs":[]},{"cell_type":"code","source":["# DataLoader Setup\n","text_pipeline = lambda x: vocab(tokenizer(x))[:config['max_length']]\n","label_pipeline = lambda x: {\"neg\":0, \"pos\":1}.get(x)\n","\n","def collate_batch(batch):\n"," label_list, text_list = [], [] \n"," for (_label, _text) in batch:\n"," processed_text = torch.tensor(text_pipeline(_text), dtype=torch.int64)\n"," text_list.append(processed_text)\n"," label_list.append(label_pipeline(_label))\n"," \n"," text_list = pad_sequence(text_list, batch_first= True, padding_value= idx_pad)\n"," label_list = torch.tensor(label_list, dtype=torch.int64)\n"," return text_list.to(device), label_list.to(device)\n","\n","train_iter, test_iter = IMDB()\n","train_dataset = to_map_style_dataset(train_iter)\n","test_dataset = to_map_style_dataset(test_iter)\n","\n","num_train = int(len(train_dataset) * 0.95)\n","split_train_, split_valid_ = random_split(train_dataset, [num_train, len(train_dataset) - num_train])\n","\n","train_dataloader = DataLoader(split_train_, batch_size=config['batch_size'], shuffle=True, collate_fn=collate_batch)\n","valid_dataloader = DataLoader(split_valid_, batch_size=config['batch_size'], shuffle=True, collate_fn=collate_batch)\n","test_dataloader = DataLoader(test_dataset, batch_size=config['batch_size'], shuffle=True, collate_fn=collate_batch)\n"],"metadata":{"id":"woxfsknMdlvp","executionInfo":{"status":"ok","timestamp":1647959771286,"user_tz":-540,"elapsed":24464,"user":{"displayName":"seongsu bang","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"12261094723698722963"}}},"execution_count":12,"outputs":[]},{"cell_type":"code","source":["# Model setup\n","class TextClassificationModel(nn.Module):\n"," def __init__(self, vocab_size, num_class, **config):\n"," super(TextClassificationModel, self).__init__()\n"," self.model_type = config['model_type']\n"," self.pretrained = config['pre_trained']\n"," self.is_bidirectional = config['is_bidirectional']\n"," self.embed_dim = config['emb_dim']\n"," self.hidden_dim = config['hidden_dim']\n"," \n"," self.embedding = nn.Embedding(vocab_size, self.embed_dim,)\n"," if self.pretrained:\n"," self.embedding = nn.Embedding(vocab_size, self.embed_dim,).from_pretrained(pretrained_emb, freeze = False)\n","\n"," if self.model_type is None:\n"," self.fc = nn.Linear(self.embed_dim, num_class)\n","\n"," elif self.model_type == 'avg_not_pad':\n"," self.embedding = nn.EmbeddingBag(vocab_size, self.embed_dim, sparse=True, padding_idx = idx_pad)\n"," if self.pretrained:\n"," self.embedding = self.embedding.from_pretrained(pretrained_emb, freeze = False, sparse=True)\n"," self.fc = nn.Linear(self.embed_dim, num_class)\n","\n"," elif self.model_type in ['rnn','lstm','gru']:\n"," if self.model_type == 'rnn':\n"," self.Recurrent = nn.RNN(input_size = self.embed_dim, hidden_size = self.hidden_dim, \n"," bidirectional = self.is_bidirectional, batch_first = True)\n"," elif self.model_type == 'lstm':\n"," self.Recurrent = nn.LSTM(input_size = self.embed_dim, hidden_size = self.hidden_dim, \n"," bidirectional = self.is_bidirectional, batch_first = True)\n"," else:\n"," self.Recurrent = nn.GRU(input_size = self.embed_dim, hidden_size = self.hidden_dim, \n"," bidirectional = self.is_bidirectional, batch_first = True)\n","\n"," last_input_dim = self.hidden_dim * 2 if self.is_bidirectional else self.hidden_dim \n"," self.fc = nn.Linear(last_input_dim, num_class)\n","\n"," else:\n"," raise NameError('Select model_type in [rnn, lstm, gru, avg_not_pad]')\n","\n"," self.init_weights()\n","\n"," def init_weights(self):\n"," initrange = 0.5\n"," if self.pretrained:\n"," self.embedding.weight.data.uniform_(-initrange, initrange)\n"," self.fc.weight.data.uniform_(-initrange, initrange)\n"," self.fc.bias.data.zero_()\n","\n"," def forward(self, text):\n"," embedded = self.embedding(text)\n"," if self.model_type is None:\n"," embedded = torch.mean(embedded, dim=1)\n"," return self.fc(embedded)\n"," elif self.model_type == 'avg_not_pad':\n"," return self.fc(embedded)\n"," else:\n"," output, _ = self.Recurrent(embedded)\n"," last_output = output[:,-1,:]\n"," return self.fc(last_output)\n","\n","\n","model = TextClassificationModel(vocab_size, num_class, **config).to(device)\n","model"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"UMfu_mmiyIIW","executionInfo":{"status":"ok","timestamp":1647961777737,"user_tz":-540,"elapsed":526,"user":{"displayName":"seongsu bang","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"12261094723698722963"}},"outputId":"83accf31-ead6-4845-8fa0-8b99a915f75d"},"execution_count":40,"outputs":[{"output_type":"execute_result","data":{"text/plain":["TextClassificationModel(\n"," (embedding): Embedding(51718, 300)\n"," (Recurrent): LSTM(300, 128, batch_first=True, bidirectional=True)\n"," (fc): Linear(in_features=256, out_features=2, bias=True)\n",")"]},"metadata":{},"execution_count":40}]},{"cell_type":"code","source":["# Training Setup\n","# Hyperparameters\n","EPOCHS = config['epoch']\n","LR = config['LR'] \n","\n","total_accu = None\n","\n","criterion = torch.nn.CrossEntropyLoss()\n","optimizer = torch.optim.SGD(model.parameters(), lr=LR)\n","scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1.0, gamma=0.1)\n","\n","for epoch in range(1, EPOCHS + 1):\n"," epoch_start_time = time.time()\n","\n"," model.train()\n"," total_acc, total_count = 0, 0\n"," log_interval = 50\n"," start_time = time.time()\n","\n"," for idx, (text, label) in tqdm(enumerate(train_dataloader)):\n"," # Training\n"," optimizer.zero_grad()\n"," predicted_label = model(text)\n"," loss = criterion(predicted_label, label)\n"," loss.backward()\n"," \n"," torch.nn.utils.clip_grad_norm_(model.parameters(), 0.1)\n"," optimizer.step()\n"," total_acc += (predicted_label.argmax(1) == label).sum().item()\n"," total_count += label.size(0)\n"," if idx % log_interval == 0 and idx > 0:\n"," elapsed = time.time() - start_time\n"," print(f'| epoch {epoch:3d} | {idx:5d}/{len(train_dataloader):5d} batches | accuracy {total_acc/total_count:8.3f}') \n"," total_acc, total_count = 0, 0\n"," start_time = time.time()\n"," \n"," # Evaluation\n"," model.eval()\n"," total_acc, total_count = 0, 0\n"," with torch.no_grad():\n"," for idx, (text, label) in enumerate(valid_dataloader):\n"," predicted_label = model(text)\n"," loss = criterion(predicted_label, label)\n"," total_acc += (predicted_label.argmax(1) == label).sum().item()\n"," total_count += label.size(0)\n"," accu_val = total_acc/total_count \n"," \n"," if total_accu is not None and total_accu > accu_val:\n"," scheduler.step()\n"," else:\n"," total_accu = accu_val\n"," print('-' * 59)\n"," print(f'| end of epoch {epoch:3d} | time: {time.time() - epoch_start_time:5.2f}s | valid accuracy {accu_val:8.3f}')\n"," print('-' * 59)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"j611wMOaGFRN","executionInfo":{"status":"ok","timestamp":1647962358199,"user_tz":-540,"elapsed":579480,"user":{"displayName":"seongsu bang","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"12261094723698722963"}},"outputId":"020f6dc7-acd0-408f-c6cb-17d8e95f19c5"},"execution_count":41,"outputs":[{"output_type":"stream","name":"stderr","text":["52it [00:05, 10.12it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 1 | 50/ 372 batches | accuracy 0.493\n"]},{"output_type":"stream","name":"stderr","text":["102it [00:10, 10.07it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 1 | 100/ 372 batches | accuracy 0.500\n"]},{"output_type":"stream","name":"stderr","text":["152it [00:15, 10.05it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 1 | 150/ 372 batches | accuracy 0.503\n"]},{"output_type":"stream","name":"stderr","text":["202it [00:20, 10.10it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 1 | 200/ 372 batches | accuracy 0.503\n"]},{"output_type":"stream","name":"stderr","text":["253it [00:25, 9.93it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 1 | 250/ 372 batches | accuracy 0.505\n"]},{"output_type":"stream","name":"stderr","text":["303it [00:30, 10.15it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 1 | 300/ 372 batches | accuracy 0.500\n"]},{"output_type":"stream","name":"stderr","text":["353it [00:35, 10.07it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 1 | 350/ 372 batches | accuracy 0.510\n"]},{"output_type":"stream","name":"stderr","text":["372it [00:37, 9.91it/s]\n"]},{"output_type":"stream","name":"stdout","text":["-----------------------------------------------------------\n","| end of epoch 1 | time: 38.28s | valid accuracy 0.486\n","-----------------------------------------------------------\n"]},{"output_type":"stream","name":"stderr","text":["52it [00:05, 10.10it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 2 | 50/ 372 batches | accuracy 0.495\n"]},{"output_type":"stream","name":"stderr","text":["102it [00:10, 9.97it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 2 | 100/ 372 batches | accuracy 0.523\n"]},{"output_type":"stream","name":"stderr","text":["151it [00:14, 10.07it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 2 | 150/ 372 batches | accuracy 0.498\n"]},{"output_type":"stream","name":"stderr","text":["202it [00:20, 9.76it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 2 | 200/ 372 batches | accuracy 0.517\n"]},{"output_type":"stream","name":"stderr","text":["253it [00:25, 9.98it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 2 | 250/ 372 batches | accuracy 0.503\n"]},{"output_type":"stream","name":"stderr","text":["303it [00:30, 10.00it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 2 | 300/ 372 batches | accuracy 0.516\n"]},{"output_type":"stream","name":"stderr","text":["353it [00:35, 10.01it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 2 | 350/ 372 batches | accuracy 0.519\n"]},{"output_type":"stream","name":"stderr","text":["372it [00:37, 9.96it/s]\n"]},{"output_type":"stream","name":"stdout","text":["-----------------------------------------------------------\n","| end of epoch 2 | time: 38.11s | valid accuracy 0.498\n","-----------------------------------------------------------\n"]},{"output_type":"stream","name":"stderr","text":["53it [00:05, 10.14it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 3 | 50/ 372 batches | accuracy 0.527\n"]},{"output_type":"stream","name":"stderr","text":["102it [00:10, 10.01it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 3 | 100/ 372 batches | accuracy 0.516\n"]},{"output_type":"stream","name":"stderr","text":["152it [00:15, 9.65it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 3 | 150/ 372 batches | accuracy 0.515\n"]},{"output_type":"stream","name":"stderr","text":["202it [00:20, 9.48it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 3 | 200/ 372 batches | accuracy 0.522\n"]},{"output_type":"stream","name":"stderr","text":["253it [00:25, 10.02it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 3 | 250/ 372 batches | accuracy 0.509\n"]},{"output_type":"stream","name":"stderr","text":["302it [00:30, 9.70it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 3 | 300/ 372 batches | accuracy 0.515\n"]},{"output_type":"stream","name":"stderr","text":["352it [00:35, 9.81it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 3 | 350/ 372 batches | accuracy 0.526\n"]},{"output_type":"stream","name":"stderr","text":["372it [00:37, 9.85it/s]\n"]},{"output_type":"stream","name":"stdout","text":["-----------------------------------------------------------\n","| end of epoch 3 | time: 38.53s | valid accuracy 0.514\n","-----------------------------------------------------------\n"]},{"output_type":"stream","name":"stderr","text":["52it [00:05, 9.81it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 4 | 50/ 372 batches | accuracy 0.537\n"]},{"output_type":"stream","name":"stderr","text":["102it [00:10, 9.69it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 4 | 100/ 372 batches | accuracy 0.538\n"]},{"output_type":"stream","name":"stderr","text":["152it [00:15, 9.31it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 4 | 150/ 372 batches | accuracy 0.526\n"]},{"output_type":"stream","name":"stderr","text":["202it [00:20, 9.61it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 4 | 200/ 372 batches | accuracy 0.538\n"]},{"output_type":"stream","name":"stderr","text":["252it [00:26, 9.74it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 4 | 250/ 372 batches | accuracy 0.551\n"]},{"output_type":"stream","name":"stderr","text":["303it [00:31, 9.82it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 4 | 300/ 372 batches | accuracy 0.648\n"]},{"output_type":"stream","name":"stderr","text":["352it [00:36, 9.55it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 4 | 350/ 372 batches | accuracy 0.723\n"]},{"output_type":"stream","name":"stderr","text":["372it [00:38, 9.68it/s]\n"]},{"output_type":"stream","name":"stdout","text":["-----------------------------------------------------------\n","| end of epoch 4 | time: 39.19s | valid accuracy 0.738\n","-----------------------------------------------------------\n"]},{"output_type":"stream","name":"stderr","text":["53it [00:05, 9.92it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 5 | 50/ 372 batches | accuracy 0.776\n"]},{"output_type":"stream","name":"stderr","text":["102it [00:10, 9.81it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 5 | 100/ 372 batches | accuracy 0.803\n"]},{"output_type":"stream","name":"stderr","text":["152it [00:15, 9.50it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 5 | 150/ 372 batches | accuracy 0.810\n"]},{"output_type":"stream","name":"stderr","text":["202it [00:20, 9.75it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 5 | 200/ 372 batches | accuracy 0.814\n"]},{"output_type":"stream","name":"stderr","text":["253it [00:25, 9.97it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 5 | 250/ 372 batches | accuracy 0.821\n"]},{"output_type":"stream","name":"stderr","text":["302it [00:31, 9.55it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 5 | 300/ 372 batches | accuracy 0.817\n"]},{"output_type":"stream","name":"stderr","text":["351it [00:35, 10.11it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 5 | 350/ 372 batches | accuracy 0.832\n"]},{"output_type":"stream","name":"stderr","text":["372it [00:38, 9.77it/s]\n"]},{"output_type":"stream","name":"stdout","text":["-----------------------------------------------------------\n","| end of epoch 5 | time: 38.81s | valid accuracy 0.810\n","-----------------------------------------------------------\n"]},{"output_type":"stream","name":"stderr","text":["52it [00:05, 9.70it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 6 | 50/ 372 batches | accuracy 0.855\n"]},{"output_type":"stream","name":"stderr","text":["103it [00:10, 10.12it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 6 | 100/ 372 batches | accuracy 0.853\n"]},{"output_type":"stream","name":"stderr","text":["152it [00:15, 9.70it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 6 | 150/ 372 batches | accuracy 0.857\n"]},{"output_type":"stream","name":"stderr","text":["202it [00:20, 9.82it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 6 | 200/ 372 batches | accuracy 0.839\n"]},{"output_type":"stream","name":"stderr","text":["252it [00:25, 9.87it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 6 | 250/ 372 batches | accuracy 0.857\n"]},{"output_type":"stream","name":"stderr","text":["302it [00:30, 9.64it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 6 | 300/ 372 batches | accuracy 0.845\n"]},{"output_type":"stream","name":"stderr","text":["352it [00:36, 9.68it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 6 | 350/ 372 batches | accuracy 0.858\n"]},{"output_type":"stream","name":"stderr","text":["372it [00:38, 9.77it/s]\n"]},{"output_type":"stream","name":"stdout","text":["-----------------------------------------------------------\n","| end of epoch 6 | time: 38.83s | valid accuracy 0.822\n","-----------------------------------------------------------\n"]},{"output_type":"stream","name":"stderr","text":["52it [00:05, 9.57it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 7 | 50/ 372 batches | accuracy 0.878\n"]},{"output_type":"stream","name":"stderr","text":["102it [00:10, 9.74it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 7 | 100/ 372 batches | accuracy 0.878\n"]},{"output_type":"stream","name":"stderr","text":["152it [00:15, 9.82it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 7 | 150/ 372 batches | accuracy 0.869\n"]},{"output_type":"stream","name":"stderr","text":["202it [00:20, 9.58it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 7 | 200/ 372 batches | accuracy 0.873\n"]},{"output_type":"stream","name":"stderr","text":["252it [00:25, 9.68it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 7 | 250/ 372 batches | accuracy 0.883\n"]},{"output_type":"stream","name":"stderr","text":["302it [00:31, 9.36it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 7 | 300/ 372 batches | accuracy 0.882\n"]},{"output_type":"stream","name":"stderr","text":["352it [00:36, 9.53it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 7 | 350/ 372 batches | accuracy 0.873\n"]},{"output_type":"stream","name":"stderr","text":["372it [00:38, 9.70it/s]\n"]},{"output_type":"stream","name":"stdout","text":["-----------------------------------------------------------\n","| end of epoch 7 | time: 39.10s | valid accuracy 0.850\n","-----------------------------------------------------------\n"]},{"output_type":"stream","name":"stderr","text":["52it [00:05, 9.84it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 8 | 50/ 372 batches | accuracy 0.901\n"]},{"output_type":"stream","name":"stderr","text":["102it [00:10, 9.62it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 8 | 100/ 372 batches | accuracy 0.892\n"]},{"output_type":"stream","name":"stderr","text":["152it [00:15, 9.83it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 8 | 150/ 372 batches | accuracy 0.898\n"]},{"output_type":"stream","name":"stderr","text":["202it [00:20, 9.63it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 8 | 200/ 372 batches | accuracy 0.892\n"]},{"output_type":"stream","name":"stderr","text":["252it [00:25, 9.92it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 8 | 250/ 372 batches | accuracy 0.904\n"]},{"output_type":"stream","name":"stderr","text":["303it [00:31, 10.18it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 8 | 300/ 372 batches | accuracy 0.893\n"]},{"output_type":"stream","name":"stderr","text":["353it [00:36, 10.18it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 8 | 350/ 372 batches | accuracy 0.897\n"]},{"output_type":"stream","name":"stderr","text":["372it [00:37, 9.80it/s]\n"]},{"output_type":"stream","name":"stdout","text":["-----------------------------------------------------------\n","| end of epoch 8 | time: 38.70s | valid accuracy 0.810\n","-----------------------------------------------------------\n"]},{"output_type":"stream","name":"stderr","text":["52it [00:05, 9.78it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 9 | 50/ 372 batches | accuracy 0.926\n"]},{"output_type":"stream","name":"stderr","text":["102it [00:10, 9.66it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 9 | 100/ 372 batches | accuracy 0.927\n"]},{"output_type":"stream","name":"stderr","text":["152it [00:15, 9.81it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 9 | 150/ 372 batches | accuracy 0.932\n"]},{"output_type":"stream","name":"stderr","text":["203it [00:20, 10.08it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 9 | 200/ 372 batches | accuracy 0.924\n"]},{"output_type":"stream","name":"stderr","text":["252it [00:25, 9.86it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 9 | 250/ 372 batches | accuracy 0.934\n"]},{"output_type":"stream","name":"stderr","text":["303it [00:31, 9.93it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 9 | 300/ 372 batches | accuracy 0.937\n"]},{"output_type":"stream","name":"stderr","text":["352it [00:35, 9.70it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 9 | 350/ 372 batches | accuracy 0.935\n"]},{"output_type":"stream","name":"stderr","text":["372it [00:37, 9.79it/s]\n"]},{"output_type":"stream","name":"stdout","text":["-----------------------------------------------------------\n","| end of epoch 9 | time: 38.73s | valid accuracy 0.853\n","-----------------------------------------------------------\n"]},{"output_type":"stream","name":"stderr","text":["52it [00:05, 9.84it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 10 | 50/ 372 batches | accuracy 0.945\n"]},{"output_type":"stream","name":"stderr","text":["102it [00:10, 9.41it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 10 | 100/ 372 batches | accuracy 0.935\n"]},{"output_type":"stream","name":"stderr","text":["152it [00:15, 9.63it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 10 | 150/ 372 batches | accuracy 0.933\n"]},{"output_type":"stream","name":"stderr","text":["202it [00:20, 9.75it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 10 | 200/ 372 batches | accuracy 0.941\n"]},{"output_type":"stream","name":"stderr","text":["252it [00:25, 9.73it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 10 | 250/ 372 batches | accuracy 0.938\n"]},{"output_type":"stream","name":"stderr","text":["302it [00:31, 9.78it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 10 | 300/ 372 batches | accuracy 0.939\n"]},{"output_type":"stream","name":"stderr","text":["352it [00:36, 9.74it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 10 | 350/ 372 batches | accuracy 0.939\n"]},{"output_type":"stream","name":"stderr","text":["372it [00:38, 9.74it/s]\n"]},{"output_type":"stream","name":"stdout","text":["-----------------------------------------------------------\n","| end of epoch 10 | time: 38.95s | valid accuracy 0.857\n","-----------------------------------------------------------\n"]},{"output_type":"stream","name":"stderr","text":["52it [00:05, 9.53it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 11 | 50/ 372 batches | accuracy 0.941\n"]},{"output_type":"stream","name":"stderr","text":["102it [00:10, 9.73it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 11 | 100/ 372 batches | accuracy 0.940\n"]},{"output_type":"stream","name":"stderr","text":["152it [00:15, 9.61it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 11 | 150/ 372 batches | accuracy 0.944\n"]},{"output_type":"stream","name":"stderr","text":["202it [00:20, 9.79it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 11 | 200/ 372 batches | accuracy 0.948\n"]},{"output_type":"stream","name":"stderr","text":["252it [00:26, 9.87it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 11 | 250/ 372 batches | accuracy 0.935\n"]},{"output_type":"stream","name":"stderr","text":["302it [00:31, 9.54it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 11 | 300/ 372 batches | accuracy 0.943\n"]},{"output_type":"stream","name":"stderr","text":["352it [00:36, 9.55it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 11 | 350/ 372 batches | accuracy 0.947\n"]},{"output_type":"stream","name":"stderr","text":["372it [00:38, 9.64it/s]\n"]},{"output_type":"stream","name":"stdout","text":["-----------------------------------------------------------\n","| end of epoch 11 | time: 39.34s | valid accuracy 0.853\n","-----------------------------------------------------------\n"]},{"output_type":"stream","name":"stderr","text":["52it [00:05, 9.48it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 12 | 50/ 372 batches | accuracy 0.949\n"]},{"output_type":"stream","name":"stderr","text":["102it [00:10, 9.60it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 12 | 100/ 372 batches | accuracy 0.948\n"]},{"output_type":"stream","name":"stderr","text":["152it [00:15, 9.57it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 12 | 150/ 372 batches | accuracy 0.948\n"]},{"output_type":"stream","name":"stderr","text":["202it [00:20, 9.45it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 12 | 200/ 372 batches | accuracy 0.944\n"]},{"output_type":"stream","name":"stderr","text":["252it [00:26, 9.65it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 12 | 250/ 372 batches | accuracy 0.952\n"]},{"output_type":"stream","name":"stderr","text":["302it [00:31, 10.11it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 12 | 300/ 372 batches | accuracy 0.949\n"]},{"output_type":"stream","name":"stderr","text":["353it [00:36, 10.05it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 12 | 350/ 372 batches | accuracy 0.947\n"]},{"output_type":"stream","name":"stderr","text":["372it [00:38, 9.76it/s]\n"]},{"output_type":"stream","name":"stdout","text":["-----------------------------------------------------------\n","| end of epoch 12 | time: 38.83s | valid accuracy 0.854\n","-----------------------------------------------------------\n"]},{"output_type":"stream","name":"stderr","text":["51it [00:05, 10.17it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 13 | 50/ 372 batches | accuracy 0.942\n"]},{"output_type":"stream","name":"stderr","text":["103it [00:10, 10.08it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 13 | 100/ 372 batches | accuracy 0.949\n"]},{"output_type":"stream","name":"stderr","text":["151it [00:14, 10.04it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 13 | 150/ 372 batches | accuracy 0.952\n"]},{"output_type":"stream","name":"stderr","text":["202it [00:20, 9.68it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 13 | 200/ 372 batches | accuracy 0.943\n"]},{"output_type":"stream","name":"stderr","text":["251it [00:25, 10.03it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 13 | 250/ 372 batches | accuracy 0.955\n"]},{"output_type":"stream","name":"stderr","text":["302it [00:30, 9.61it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 13 | 300/ 372 batches | accuracy 0.957\n"]},{"output_type":"stream","name":"stderr","text":["352it [00:35, 9.82it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 13 | 350/ 372 batches | accuracy 0.945\n"]},{"output_type":"stream","name":"stderr","text":["372it [00:37, 9.96it/s]\n"]},{"output_type":"stream","name":"stdout","text":["-----------------------------------------------------------\n","| end of epoch 13 | time: 38.08s | valid accuracy 0.854\n","-----------------------------------------------------------\n"]},{"output_type":"stream","name":"stderr","text":["51it [00:05, 10.12it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 14 | 50/ 372 batches | accuracy 0.954\n"]},{"output_type":"stream","name":"stderr","text":["103it [00:10, 10.14it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 14 | 100/ 372 batches | accuracy 0.953\n"]},{"output_type":"stream","name":"stderr","text":["152it [00:15, 10.14it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 14 | 150/ 372 batches | accuracy 0.945\n"]},{"output_type":"stream","name":"stderr","text":["203it [00:20, 10.16it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 14 | 200/ 372 batches | accuracy 0.951\n"]},{"output_type":"stream","name":"stderr","text":["251it [00:24, 10.21it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 14 | 250/ 372 batches | accuracy 0.944\n"]},{"output_type":"stream","name":"stderr","text":["303it [00:30, 10.24it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 14 | 300/ 372 batches | accuracy 0.950\n"]},{"output_type":"stream","name":"stderr","text":["353it [00:35, 10.16it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 14 | 350/ 372 batches | accuracy 0.948\n"]},{"output_type":"stream","name":"stderr","text":["372it [00:36, 10.10it/s]\n"]},{"output_type":"stream","name":"stdout","text":["-----------------------------------------------------------\n","| end of epoch 14 | time: 37.58s | valid accuracy 0.854\n","-----------------------------------------------------------\n"]},{"output_type":"stream","name":"stderr","text":["53it [00:05, 10.11it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 15 | 50/ 372 batches | accuracy 0.950\n"]},{"output_type":"stream","name":"stderr","text":["102it [00:10, 9.56it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 15 | 100/ 372 batches | accuracy 0.958\n"]},{"output_type":"stream","name":"stderr","text":["153it [00:15, 9.95it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 15 | 150/ 372 batches | accuracy 0.943\n"]},{"output_type":"stream","name":"stderr","text":["202it [00:20, 9.64it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 15 | 200/ 372 batches | accuracy 0.944\n"]},{"output_type":"stream","name":"stderr","text":["251it [00:25, 10.10it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 15 | 250/ 372 batches | accuracy 0.948\n"]},{"output_type":"stream","name":"stderr","text":["303it [00:30, 9.78it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 15 | 300/ 372 batches | accuracy 0.944\n"]},{"output_type":"stream","name":"stderr","text":["353it [00:35, 10.06it/s]"]},{"output_type":"stream","name":"stdout","text":["| epoch 15 | 350/ 372 batches | accuracy 0.961\n"]},{"output_type":"stream","name":"stderr","text":["372it [00:37, 9.94it/s]\n"]},{"output_type":"stream","name":"stdout","text":["-----------------------------------------------------------\n","| end of epoch 15 | time: 38.16s | valid accuracy 0.854\n","-----------------------------------------------------------\n"]}]},{"cell_type":"code","source":["print('Checking the results of test dataset.')\n","model.eval()\n","total_acc, total_count = 0, 0\n","with torch.no_grad():\n"," for idx, (text, label) in enumerate(test_dataloader):\n"," predicted_label = model(text, )\n"," loss = criterion(predicted_label, label)\n"," total_acc += (predicted_label.argmax(1) == label).sum().item()\n"," total_count += label.size(0)\n","accu_test = total_acc/total_count\n","print('test accuracy {:8.3f}'.format(accu_test))"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"817s3toZH_dl","executionInfo":{"status":"ok","timestamp":1647962376760,"user_tz":-540,"elapsed":14116,"user":{"displayName":"seongsu bang","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"12261094723698722963"}},"outputId":"c32fe972-36e3-444e-aff9-01d787a1cb34"},"execution_count":42,"outputs":[{"output_type":"stream","name":"stdout","text":["Checking the results of test dataset.\n","test accuracy 0.853\n"]}]},{"cell_type":"code","source":["def predict(text, text_pipeline):\n"," with torch.no_grad():\n"," text = torch.tensor(text_pipeline(text))\n"," text = pad_sequence([text], batch_first=True, padding_value=idx_pad)\n"," output = model(text)\n"," return output.argmax(1).item()\n","\n","ex_text_str = \"It was very bad movie\"\n","\n","model = model.to(\"cpu\")\n","label_dict = {0:'neg', 1:'pos'}\n","print(f\"This is a {label_dict.get(predict(ex_text_str, text_pipeline))} comment\")"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"9Ehwb31w43wr","executionInfo":{"status":"ok","timestamp":1647962376761,"user_tz":-540,"elapsed":17,"user":{"displayName":"seongsu bang","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"12261094723698722963"}},"outputId":"e038ef02-3313-4f3d-9c15-4ce2bdb6083f"},"execution_count":43,"outputs":[{"output_type":"stream","name":"stdout","text":["This is a neg comment\n"]}]}]}
--------------------------------------------------------------------------------
/3-1_MNIST_MLP_Dropout.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "''' 1. Module Import '''\n",
10 | "import numpy as np\n",
11 | "import matplotlib.pyplot as plt\n",
12 | "\n",
13 | "import torch\n",
14 | "import torch.nn as nn\n",
15 | "import torch.nn.functional as F\n",
16 | "from torchvision import transforms, datasets"
17 | ]
18 | },
19 | {
20 | "cell_type": "code",
21 | "execution_count": 2,
22 | "metadata": {},
23 | "outputs": [
24 | {
25 | "name": "stdout",
26 | "output_type": "stream",
27 | "text": [
28 | "Using PyTorch version: 1.6.0+cu101 Device: cuda\n"
29 | ]
30 | }
31 | ],
32 | "source": [
33 | "''' 2. 딥러닝 모델을 설계할 때 활용하는 장비 확인 '''\n",
34 | "if torch.cuda.is_available():\n",
35 | " DEVICE = torch.device('cuda')\n",
36 | "else:\n",
37 | " DEVICE = torch.device('cpu')\n",
38 | "print('Using PyTorch version:', torch.__version__, ' Device:', DEVICE)"
39 | ]
40 | },
41 | {
42 | "cell_type": "code",
43 | "execution_count": 3,
44 | "metadata": {},
45 | "outputs": [],
46 | "source": [
47 | "BATCH_SIZE = 32\n",
48 | "EPOCHS = 10"
49 | ]
50 | },
51 | {
52 | "cell_type": "code",
53 | "execution_count": 4,
54 | "metadata": {},
55 | "outputs": [],
56 | "source": [
57 | "''' 3. MNIST 데이터 다운로드 (Train set, Test set 분리하기) '''\n",
58 | "train_dataset = datasets.MNIST(root = \"../data/MNIST\",\n",
59 | " train = True,\n",
60 | " download = True,\n",
61 | " transform = transforms.ToTensor())\n",
62 | "\n",
63 | "test_dataset = datasets.MNIST(root = \"../data/MNIST\",\n",
64 | " train = False,\n",
65 | " transform = transforms.ToTensor())\n",
66 | "\n",
67 | "train_loader = torch.utils.data.DataLoader(dataset = train_dataset,\n",
68 | " batch_size = BATCH_SIZE,\n",
69 | " shuffle = True)\n",
70 | "\n",
71 | "test_loader = torch.utils.data.DataLoader(dataset = test_dataset,\n",
72 | " batch_size = BATCH_SIZE,\n",
73 | " shuffle = False)"
74 | ]
75 | },
76 | {
77 | "cell_type": "code",
78 | "execution_count": 5,
79 | "metadata": {},
80 | "outputs": [
81 | {
82 | "name": "stdout",
83 | "output_type": "stream",
84 | "text": [
85 | "X_train: torch.Size([32, 1, 28, 28]) type: torch.FloatTensor\n",
86 | "y_train: torch.Size([32]) type: torch.LongTensor\n"
87 | ]
88 | }
89 | ],
90 | "source": [
91 | "''' 4. 데이터 확인하기 (1) '''\n",
92 | "for (X_train, y_train) in train_loader:\n",
93 | " print('X_train:', X_train.size(), 'type:', X_train.type())\n",
94 | " print('y_train:', y_train.size(), 'type:', y_train.type())\n",
95 | " break"
96 | ]
97 | },
98 | {
99 | "cell_type": "code",
100 | "execution_count": 6,
101 | "metadata": {},
102 | "outputs": [
103 | {
104 | "data": {
105 | "image/png": "\n",
106 | "text/plain": [
107 | ""
108 | ]
109 | },
110 | "metadata": {
111 | "needs_background": "light"
112 | },
113 | "output_type": "display_data"
114 | }
115 | ],
116 | "source": [
117 | "''' 5. 데이터 확인하기 (2) '''\n",
118 | "pltsize = 1\n",
119 | "plt.figure(figsize=(10 * pltsize, pltsize))\n",
120 | "for i in range(10):\n",
121 | " plt.subplot(1, 10, i + 1)\n",
122 | " plt.axis('off')\n",
123 | " plt.imshow(X_train[i, :, :, :].numpy().reshape(28, 28), cmap = \"gray_r\")\n",
124 | " plt.title('Class: ' + str(y_train[i].item()))"
125 | ]
126 | },
127 | {
128 | "cell_type": "code",
129 | "execution_count": 7,
130 | "metadata": {},
131 | "outputs": [],
132 | "source": [
133 | "''' 6. Multi Layer Perceptron (MLP) 모델 설계하기 '''\n",
134 | "class Net(nn.Module):\n",
135 | " def __init__(self):\n",
136 | " super(Net, self).__init__()\n",
137 | " self.fc1 = nn.Linear(28 * 28, 512)\n",
138 | " self.fc2 = nn.Linear(512, 256)\n",
139 | " self.fc3 = nn.Linear(256, 10)\n",
140 | " self.dropout_prob = 0.5\n",
141 | "\n",
142 | " def forward(self, x):\n",
143 | " x = x.view(-1, 28 * 28)\n",
144 | " x = self.fc1(x)\n",
145 | " x = F.sigmoid(x)\n",
146 | " x = F.dropout(x, training = self.training, p = self.dropout_prob)\n",
147 | " x = self.fc2(x)\n",
148 | " x = F.sigmoid(x)\n",
149 | " x = F.dropout(x, training = self.training, p = self.dropout_prob)\n",
150 | " x = self.fc3(x)\n",
151 | " x = F.log_softmax(x, dim = 1)\n",
152 | " return x"
153 | ]
154 | },
155 | {
156 | "cell_type": "code",
157 | "execution_count": 8,
158 | "metadata": {},
159 | "outputs": [
160 | {
161 | "name": "stdout",
162 | "output_type": "stream",
163 | "text": [
164 | "Net(\n",
165 | " (fc1): Linear(in_features=784, out_features=512, bias=True)\n",
166 | " (fc2): Linear(in_features=512, out_features=256, bias=True)\n",
167 | " (fc3): Linear(in_features=256, out_features=10, bias=True)\n",
168 | ")\n"
169 | ]
170 | }
171 | ],
172 | "source": [
173 | "''' 7. Optimizer, Objective Function 설정하기 '''\n",
174 | "model = Net().to(DEVICE)\n",
175 | "optimizer = torch.optim.SGD(model.parameters(), lr = 0.01, momentum = 0.5)\n",
176 | "criterion = nn.CrossEntropyLoss()\n",
177 | "\n",
178 | "print(model)"
179 | ]
180 | },
181 | {
182 | "cell_type": "code",
183 | "execution_count": 9,
184 | "metadata": {},
185 | "outputs": [],
186 | "source": [
187 | "''' 8. MLP 모델 학습을 진행하며 학습 데이터에 대한 모델 성능을 확인하는 함수 정의 '''\n",
188 | "def train(model, train_loader, optimizer, log_interval):\n",
189 | " model.train()\n",
190 | " for batch_idx, (image, label) in enumerate(train_loader):\n",
191 | " image = image.to(DEVICE)\n",
192 | " label = label.to(DEVICE)\n",
193 | " optimizer.zero_grad()\n",
194 | " output = model(image)\n",
195 | " loss = criterion(output, label)\n",
196 | " loss.backward()\n",
197 | " optimizer.step()\n",
198 | "\n",
199 | " if batch_idx % log_interval == 0:\n",
200 | " print(\"Train Epoch: {} [{}/{} ({:.0f}%)]\\tTrain Loss: {:.6f}\".format(\n",
201 | " epoch, batch_idx * len(image), \n",
202 | " len(train_loader.dataset), 100. * batch_idx / len(train_loader), \n",
203 | " loss.item()))"
204 | ]
205 | },
206 | {
207 | "cell_type": "code",
208 | "execution_count": 10,
209 | "metadata": {},
210 | "outputs": [],
211 | "source": [
212 | "''' 9. 학습되는 과정 속에서 검증 데이터에 대한 모델 성능을 확인하는 함수 정의 '''\n",
213 | "def evaluate(model, test_loader):\n",
214 | " model.eval()\n",
215 | " test_loss = 0\n",
216 | " correct = 0\n",
217 | "\n",
218 | " with torch.no_grad():\n",
219 | " for image, label in test_loader:\n",
220 | " image = image.to(DEVICE)\n",
221 | " label = label.to(DEVICE)\n",
222 | " output = model(image)\n",
223 | " test_loss += criterion(output, label).item()\n",
224 | " prediction = output.max(1, keepdim = True)[1]\n",
225 | " correct += prediction.eq(label.view_as(prediction)).sum().item()\n",
226 | " \n",
227 | " test_loss /= (len(test_loader.dataset) / BATCH_SIZE)\n",
228 | " test_accuracy = 100. * correct / len(test_loader.dataset)\n",
229 | " return test_loss, test_accuracy"
230 | ]
231 | },
232 | {
233 | "cell_type": "code",
234 | "execution_count": 11,
235 | "metadata": {},
236 | "outputs": [
237 | {
238 | "name": "stderr",
239 | "output_type": "stream",
240 | "text": [
241 | "c:\\users\\justin\\101\\lib\\site-packages\\torch\\nn\\functional.py:1625: UserWarning: nn.functional.sigmoid is deprecated. Use torch.sigmoid instead.\n",
242 | " warnings.warn(\"nn.functional.sigmoid is deprecated. Use torch.sigmoid instead.\")\n"
243 | ]
244 | },
245 | {
246 | "name": "stdout",
247 | "output_type": "stream",
248 | "text": [
249 | "Train Epoch: 1 [0/60000 (0%)]\tTrain Loss: 2.465242\n",
250 | "Train Epoch: 1 [6400/60000 (11%)]\tTrain Loss: 2.306811\n",
251 | "Train Epoch: 1 [12800/60000 (21%)]\tTrain Loss: 2.269554\n",
252 | "Train Epoch: 1 [19200/60000 (32%)]\tTrain Loss: 2.332778\n",
253 | "Train Epoch: 1 [25600/60000 (43%)]\tTrain Loss: 2.253882\n",
254 | "Train Epoch: 1 [32000/60000 (53%)]\tTrain Loss: 2.392977\n",
255 | "Train Epoch: 1 [38400/60000 (64%)]\tTrain Loss: 2.271408\n",
256 | "Train Epoch: 1 [44800/60000 (75%)]\tTrain Loss: 2.266852\n",
257 | "Train Epoch: 1 [51200/60000 (85%)]\tTrain Loss: 2.284899\n",
258 | "Train Epoch: 1 [57600/60000 (96%)]\tTrain Loss: 2.321651\n",
259 | "\n",
260 | "[EPOCH: 1], \tTest Loss: 0.0714, \tTest Accuracy: 18.03 % \n",
261 | "\n",
262 | "Train Epoch: 2 [0/60000 (0%)]\tTrain Loss: 2.262630\n",
263 | "Train Epoch: 2 [6400/60000 (11%)]\tTrain Loss: 2.257258\n",
264 | "Train Epoch: 2 [12800/60000 (21%)]\tTrain Loss: 2.291954\n",
265 | "Train Epoch: 2 [19200/60000 (32%)]\tTrain Loss: 2.280940\n",
266 | "Train Epoch: 2 [25600/60000 (43%)]\tTrain Loss: 2.270547\n",
267 | "Train Epoch: 2 [32000/60000 (53%)]\tTrain Loss: 2.244217\n",
268 | "Train Epoch: 2 [38400/60000 (64%)]\tTrain Loss: 2.277828\n",
269 | "Train Epoch: 2 [44800/60000 (75%)]\tTrain Loss: 2.204417\n",
270 | "Train Epoch: 2 [51200/60000 (85%)]\tTrain Loss: 2.264808\n",
271 | "Train Epoch: 2 [57600/60000 (96%)]\tTrain Loss: 2.091909\n",
272 | "\n",
273 | "[EPOCH: 2], \tTest Loss: 0.0646, \tTest Accuracy: 39.75 % \n",
274 | "\n",
275 | "Train Epoch: 3 [0/60000 (0%)]\tTrain Loss: 2.206783\n",
276 | "Train Epoch: 3 [6400/60000 (11%)]\tTrain Loss: 2.064043\n",
277 | "Train Epoch: 3 [12800/60000 (21%)]\tTrain Loss: 1.923739\n",
278 | "Train Epoch: 3 [19200/60000 (32%)]\tTrain Loss: 1.672903\n",
279 | "Train Epoch: 3 [25600/60000 (43%)]\tTrain Loss: 1.902599\n",
280 | "Train Epoch: 3 [32000/60000 (53%)]\tTrain Loss: 1.836077\n",
281 | "Train Epoch: 3 [38400/60000 (64%)]\tTrain Loss: 1.688588\n",
282 | "Train Epoch: 3 [44800/60000 (75%)]\tTrain Loss: 1.541492\n",
283 | "Train Epoch: 3 [51200/60000 (85%)]\tTrain Loss: 1.430701\n",
284 | "Train Epoch: 3 [57600/60000 (96%)]\tTrain Loss: 1.311218\n",
285 | "\n",
286 | "[EPOCH: 3], \tTest Loss: 0.0386, \tTest Accuracy: 61.52 % \n",
287 | "\n",
288 | "Train Epoch: 4 [0/60000 (0%)]\tTrain Loss: 1.305975\n",
289 | "Train Epoch: 4 [6400/60000 (11%)]\tTrain Loss: 1.575562\n",
290 | "Train Epoch: 4 [12800/60000 (21%)]\tTrain Loss: 1.077880\n",
291 | "Train Epoch: 4 [19200/60000 (32%)]\tTrain Loss: 1.361190\n",
292 | "Train Epoch: 4 [25600/60000 (43%)]\tTrain Loss: 1.040486\n",
293 | "Train Epoch: 4 [32000/60000 (53%)]\tTrain Loss: 1.046731\n",
294 | "Train Epoch: 4 [38400/60000 (64%)]\tTrain Loss: 0.939643\n",
295 | "Train Epoch: 4 [44800/60000 (75%)]\tTrain Loss: 1.149699\n",
296 | "Train Epoch: 4 [51200/60000 (85%)]\tTrain Loss: 1.164859\n",
297 | "Train Epoch: 4 [57600/60000 (96%)]\tTrain Loss: 0.875005\n",
298 | "\n",
299 | "[EPOCH: 4], \tTest Loss: 0.0277, \tTest Accuracy: 71.57 % \n",
300 | "\n",
301 | "Train Epoch: 5 [0/60000 (0%)]\tTrain Loss: 1.081894\n",
302 | "Train Epoch: 5 [6400/60000 (11%)]\tTrain Loss: 1.370590\n",
303 | "Train Epoch: 5 [12800/60000 (21%)]\tTrain Loss: 1.087004\n",
304 | "Train Epoch: 5 [19200/60000 (32%)]\tTrain Loss: 1.052157\n",
305 | "Train Epoch: 5 [25600/60000 (43%)]\tTrain Loss: 1.223841\n",
306 | "Train Epoch: 5 [32000/60000 (53%)]\tTrain Loss: 0.740738\n",
307 | "Train Epoch: 5 [38400/60000 (64%)]\tTrain Loss: 0.943647\n",
308 | "Train Epoch: 5 [44800/60000 (75%)]\tTrain Loss: 0.790824\n",
309 | "Train Epoch: 5 [51200/60000 (85%)]\tTrain Loss: 0.729670\n",
310 | "Train Epoch: 5 [57600/60000 (96%)]\tTrain Loss: 0.912352\n",
311 | "\n",
312 | "[EPOCH: 5], \tTest Loss: 0.0237, \tTest Accuracy: 75.84 % \n",
313 | "\n",
314 | "Train Epoch: 6 [0/60000 (0%)]\tTrain Loss: 0.933268\n",
315 | "Train Epoch: 6 [6400/60000 (11%)]\tTrain Loss: 0.947774\n",
316 | "Train Epoch: 6 [12800/60000 (21%)]\tTrain Loss: 0.760683\n",
317 | "Train Epoch: 6 [19200/60000 (32%)]\tTrain Loss: 0.668853\n",
318 | "Train Epoch: 6 [25600/60000 (43%)]\tTrain Loss: 0.722454\n",
319 | "Train Epoch: 6 [32000/60000 (53%)]\tTrain Loss: 0.677378\n",
320 | "Train Epoch: 6 [38400/60000 (64%)]\tTrain Loss: 0.445301\n",
321 | "Train Epoch: 6 [44800/60000 (75%)]\tTrain Loss: 0.844144\n",
322 | "Train Epoch: 6 [51200/60000 (85%)]\tTrain Loss: 0.728118\n",
323 | "Train Epoch: 6 [57600/60000 (96%)]\tTrain Loss: 0.880896\n",
324 | "\n",
325 | "[EPOCH: 6], \tTest Loss: 0.0206, \tTest Accuracy: 80.45 % \n",
326 | "\n",
327 | "Train Epoch: 7 [0/60000 (0%)]\tTrain Loss: 0.751742\n",
328 | "Train Epoch: 7 [6400/60000 (11%)]\tTrain Loss: 0.687919\n",
329 | "Train Epoch: 7 [12800/60000 (21%)]\tTrain Loss: 0.826463\n",
330 | "Train Epoch: 7 [19200/60000 (32%)]\tTrain Loss: 0.584632\n",
331 | "Train Epoch: 7 [25600/60000 (43%)]\tTrain Loss: 0.752852\n",
332 | "Train Epoch: 7 [32000/60000 (53%)]\tTrain Loss: 0.558399\n",
333 | "Train Epoch: 7 [38400/60000 (64%)]\tTrain Loss: 0.810356\n",
334 | "Train Epoch: 7 [44800/60000 (75%)]\tTrain Loss: 0.726763\n",
335 | "Train Epoch: 7 [51200/60000 (85%)]\tTrain Loss: 0.547987\n",
336 | "Train Epoch: 7 [57600/60000 (96%)]\tTrain Loss: 0.431549\n",
337 | "\n",
338 | "[EPOCH: 7], \tTest Loss: 0.0181, \tTest Accuracy: 82.70 % \n",
339 | "\n",
340 | "Train Epoch: 8 [0/60000 (0%)]\tTrain Loss: 0.775636\n",
341 | "Train Epoch: 8 [6400/60000 (11%)]\tTrain Loss: 0.660315\n",
342 | "Train Epoch: 8 [12800/60000 (21%)]\tTrain Loss: 0.631106\n",
343 | "Train Epoch: 8 [19200/60000 (32%)]\tTrain Loss: 0.603776\n",
344 | "Train Epoch: 8 [25600/60000 (43%)]\tTrain Loss: 0.543177\n",
345 | "Train Epoch: 8 [32000/60000 (53%)]\tTrain Loss: 0.548135\n",
346 | "Train Epoch: 8 [38400/60000 (64%)]\tTrain Loss: 0.623748\n",
347 | "Train Epoch: 8 [44800/60000 (75%)]\tTrain Loss: 0.553701\n",
348 | "Train Epoch: 8 [51200/60000 (85%)]\tTrain Loss: 0.534373\n",
349 | "Train Epoch: 8 [57600/60000 (96%)]\tTrain Loss: 0.813260\n",
350 | "\n",
351 | "[EPOCH: 8], \tTest Loss: 0.0161, \tTest Accuracy: 84.64 % \n",
352 | "\n",
353 | "Train Epoch: 9 [0/60000 (0%)]\tTrain Loss: 0.589929\n",
354 | "Train Epoch: 9 [6400/60000 (11%)]\tTrain Loss: 0.443540\n",
355 | "Train Epoch: 9 [12800/60000 (21%)]\tTrain Loss: 0.651524\n",
356 | "Train Epoch: 9 [19200/60000 (32%)]\tTrain Loss: 0.367655\n",
357 | "Train Epoch: 9 [25600/60000 (43%)]\tTrain Loss: 0.863036\n",
358 | "Train Epoch: 9 [32000/60000 (53%)]\tTrain Loss: 0.724021\n",
359 | "Train Epoch: 9 [38400/60000 (64%)]\tTrain Loss: 0.875995\n",
360 | "Train Epoch: 9 [44800/60000 (75%)]\tTrain Loss: 0.445317\n",
361 | "Train Epoch: 9 [51200/60000 (85%)]\tTrain Loss: 0.467477\n",
362 | "Train Epoch: 9 [57600/60000 (96%)]\tTrain Loss: 0.581248\n",
363 | "\n",
364 | "[EPOCH: 9], \tTest Loss: 0.0148, \tTest Accuracy: 85.65 % \n",
365 | "\n",
366 | "Train Epoch: 10 [0/60000 (0%)]\tTrain Loss: 1.041706\n",
367 | "Train Epoch: 10 [6400/60000 (11%)]\tTrain Loss: 0.749753\n",
368 | "Train Epoch: 10 [12800/60000 (21%)]\tTrain Loss: 0.646062\n",
369 | "Train Epoch: 10 [19200/60000 (32%)]\tTrain Loss: 0.527050\n",
370 | "Train Epoch: 10 [25600/60000 (43%)]\tTrain Loss: 0.660469\n",
371 | "Train Epoch: 10 [32000/60000 (53%)]\tTrain Loss: 0.509758\n",
372 | "Train Epoch: 10 [38400/60000 (64%)]\tTrain Loss: 0.559824\n",
373 | "Train Epoch: 10 [44800/60000 (75%)]\tTrain Loss: 0.527527\n",
374 | "Train Epoch: 10 [51200/60000 (85%)]\tTrain Loss: 0.475507\n",
375 | "Train Epoch: 10 [57600/60000 (96%)]\tTrain Loss: 0.651053\n",
376 | "\n",
377 | "[EPOCH: 10], \tTest Loss: 0.0139, \tTest Accuracy: 86.52 % \n",
378 | "\n"
379 | ]
380 | }
381 | ],
382 | "source": [
383 | "''' 10. MLP 학습 실행하며 Train, Test set의 Loss 및 Test set Accuracy 확인하기 '''\n",
384 | "for epoch in range(1, EPOCHS + 1):\n",
385 | " train(model, train_loader, optimizer, log_interval = 200)\n",
386 | " test_loss, test_accuracy = evaluate(model, test_loader)\n",
387 | " print(\"\\n[EPOCH: {}], \\tTest Loss: {:.4f}, \\tTest Accuracy: {:.2f} % \\n\".format(\n",
388 | " epoch, test_loss, test_accuracy))"
389 | ]
390 | },
391 | {
392 | "cell_type": "code",
393 | "execution_count": null,
394 | "metadata": {},
395 | "outputs": [],
396 | "source": []
397 | }
398 | ],
399 | "metadata": {
400 | "kernelspec": {
401 | "display_name": "Python 3",
402 | "language": "python",
403 | "name": "python3"
404 | },
405 | "language_info": {
406 | "codemirror_mode": {
407 | "name": "ipython",
408 | "version": 3
409 | },
410 | "file_extension": ".py",
411 | "mimetype": "text/x-python",
412 | "name": "python",
413 | "nbconvert_exporter": "python",
414 | "pygments_lexer": "ipython3",
415 | "version": "3.6.8"
416 | }
417 | },
418 | "nbformat": 4,
419 | "nbformat_minor": 4
420 | }
421 |
--------------------------------------------------------------------------------
/3-2_MNIST_MLP_Dropout_ReLU.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "''' 1. Module Import '''\n",
10 | "import numpy as np\n",
11 | "import matplotlib.pyplot as plt\n",
12 | "\n",
13 | "import torch\n",
14 | "import torch.nn as nn\n",
15 | "import torch.nn.functional as F\n",
16 | "from torchvision import transforms, datasets"
17 | ]
18 | },
19 | {
20 | "cell_type": "code",
21 | "execution_count": 2,
22 | "metadata": {},
23 | "outputs": [
24 | {
25 | "name": "stdout",
26 | "output_type": "stream",
27 | "text": [
28 | "Using PyTorch version: 1.6.0+cu101 Device: cuda\n"
29 | ]
30 | }
31 | ],
32 | "source": [
33 | "''' 2. 딥러닝 모델을 설계할 때 활용하는 장비 확인 '''\n",
34 | "if torch.cuda.is_available():\n",
35 | " DEVICE = torch.device('cuda')\n",
36 | "else:\n",
37 | " DEVICE = torch.device('cpu')\n",
38 | "print('Using PyTorch version:', torch.__version__, ' Device:', DEVICE)"
39 | ]
40 | },
41 | {
42 | "cell_type": "code",
43 | "execution_count": 3,
44 | "metadata": {},
45 | "outputs": [],
46 | "source": [
47 | "BATCH_SIZE = 32\n",
48 | "EPOCHS = 10"
49 | ]
50 | },
51 | {
52 | "cell_type": "code",
53 | "execution_count": 4,
54 | "metadata": {},
55 | "outputs": [],
56 | "source": [
57 | "''' 3. MNIST 데이터 다운로드 (Train set, Test set 분리하기) '''\n",
58 | "train_dataset = datasets.MNIST(root = \"../data/MNIST\",\n",
59 | " train = True,\n",
60 | " download = True,\n",
61 | " transform = transforms.ToTensor())\n",
62 | "\n",
63 | "test_dataset = datasets.MNIST(root = \"../data/MNIST\",\n",
64 | " train = False,\n",
65 | " transform = transforms.ToTensor())\n",
66 | "\n",
67 | "train_loader = torch.utils.data.DataLoader(dataset = train_dataset,\n",
68 | " batch_size = BATCH_SIZE,\n",
69 | " shuffle = True)\n",
70 | "\n",
71 | "test_loader = torch.utils.data.DataLoader(dataset = test_dataset,\n",
72 | " batch_size = BATCH_SIZE,\n",
73 | " shuffle = False)"
74 | ]
75 | },
76 | {
77 | "cell_type": "code",
78 | "execution_count": 5,
79 | "metadata": {},
80 | "outputs": [
81 | {
82 | "name": "stdout",
83 | "output_type": "stream",
84 | "text": [
85 | "X_train: torch.Size([32, 1, 28, 28]) type: torch.FloatTensor\n",
86 | "y_train: torch.Size([32]) type: torch.LongTensor\n"
87 | ]
88 | }
89 | ],
90 | "source": [
91 | "''' 4. 데이터 확인하기 (1) '''\n",
92 | "for (X_train, y_train) in train_loader:\n",
93 | " print('X_train:', X_train.size(), 'type:', X_train.type())\n",
94 | " print('y_train:', y_train.size(), 'type:', y_train.type())\n",
95 | " break"
96 | ]
97 | },
98 | {
99 | "cell_type": "code",
100 | "execution_count": 6,
101 | "metadata": {},
102 | "outputs": [
103 | {
104 | "data": {
105 | "image/png": "\n",
106 | "text/plain": [
107 | ""
108 | ]
109 | },
110 | "metadata": {
111 | "needs_background": "light"
112 | },
113 | "output_type": "display_data"
114 | }
115 | ],
116 | "source": [
117 | "''' 5. 데이터 확인하기 (2) '''\n",
118 | "pltsize = 1\n",
119 | "plt.figure(figsize=(10 * pltsize, pltsize))\n",
120 | "for i in range(10):\n",
121 | " plt.subplot(1, 10, i + 1)\n",
122 | " plt.axis('off')\n",
123 | " plt.imshow(X_train[i, :, :, :].numpy().reshape(28, 28), cmap = \"gray_r\")\n",
124 | " plt.title('Class: ' + str(y_train[i].item()))"
125 | ]
126 | },
127 | {
128 | "cell_type": "code",
129 | "execution_count": 7,
130 | "metadata": {},
131 | "outputs": [],
132 | "source": [
133 | "''' 6. Multi Layer Perceptron (MLP) 모델 설계하기 '''\n",
134 | "class Net(nn.Module):\n",
135 | " def __init__(self):\n",
136 | " super(Net, self).__init__()\n",
137 | " self.fc1 = nn.Linear(28 * 28, 512)\n",
138 | " self.fc2 = nn.Linear(512, 256)\n",
139 | " self.fc3 = nn.Linear(256, 10)\n",
140 | " self.dropout_prob = 0.5\n",
141 | "\n",
142 | " def forward(self, x):\n",
143 | " x = x.view(-1, 28 * 28)\n",
144 | " x = self.fc1(x)\n",
145 | " x = F.relu(x)\n",
146 | " x = F.dropout(x, training = self.training, p = self.dropout_prob)\n",
147 | " x = self.fc2(x)\n",
148 | " x = F.relu(x)\n",
149 | " x = F.dropout(x, training = self.training, p = self.dropout_prob)\n",
150 | " x = self.fc3(x)\n",
151 | " x = F.log_softmax(x, dim = 1)\n",
152 | " return x"
153 | ]
154 | },
155 | {
156 | "cell_type": "code",
157 | "execution_count": 8,
158 | "metadata": {},
159 | "outputs": [
160 | {
161 | "name": "stdout",
162 | "output_type": "stream",
163 | "text": [
164 | "Net(\n",
165 | " (fc1): Linear(in_features=784, out_features=512, bias=True)\n",
166 | " (fc2): Linear(in_features=512, out_features=256, bias=True)\n",
167 | " (fc3): Linear(in_features=256, out_features=10, bias=True)\n",
168 | ")\n"
169 | ]
170 | }
171 | ],
172 | "source": [
173 | "''' 7. Optimizer, Objective Function 설정하기 '''\n",
174 | "model = Net().to(DEVICE)\n",
175 | "optimizer = torch.optim.SGD(model.parameters(), lr = 0.01, momentum = 0.5)\n",
176 | "criterion = nn.CrossEntropyLoss()\n",
177 | "\n",
178 | "print(model)"
179 | ]
180 | },
181 | {
182 | "cell_type": "code",
183 | "execution_count": 9,
184 | "metadata": {},
185 | "outputs": [],
186 | "source": [
187 | "''' 8. MLP 모델 학습을 진행하며 학습 데이터에 대한 모델 성능을 확인하는 함수 정의 '''\n",
188 | "def train(model, train_loader, optimizer, log_interval):\n",
189 | " model.train()\n",
190 | " for batch_idx, (image, label) in enumerate(train_loader):\n",
191 | " image = image.to(DEVICE)\n",
192 | " label = label.to(DEVICE)\n",
193 | " optimizer.zero_grad()\n",
194 | " output = model(image)\n",
195 | " loss = criterion(output, label)\n",
196 | " loss.backward()\n",
197 | " optimizer.step()\n",
198 | "\n",
199 | " if batch_idx % log_interval == 0:\n",
200 | " print(\"Train Epoch: {} [{}/{} ({:.0f}%)]\\tTrain Loss: {:.6f}\".format(\n",
201 | " epoch, batch_idx * len(image), \n",
202 | " len(train_loader.dataset), 100. * batch_idx / len(train_loader), \n",
203 | " loss.item()))"
204 | ]
205 | },
206 | {
207 | "cell_type": "code",
208 | "execution_count": 10,
209 | "metadata": {},
210 | "outputs": [],
211 | "source": [
212 | "''' 9. 학습되는 과정 속에서 검증 데이터에 대한 모델 성능을 확인하는 함수 정의 '''\n",
213 | "def evaluate(model, test_loader):\n",
214 | " model.eval()\n",
215 | " test_loss = 0\n",
216 | " correct = 0\n",
217 | "\n",
218 | " with torch.no_grad():\n",
219 | " for image, label in test_loader:\n",
220 | " image = image.to(DEVICE)\n",
221 | " label = label.to(DEVICE)\n",
222 | " output = model(image)\n",
223 | " test_loss += criterion(output, label).item()\n",
224 | " prediction = output.max(1, keepdim = True)[1]\n",
225 | " correct += prediction.eq(label.view_as(prediction)).sum().item()\n",
226 | " \n",
227 | " test_loss /= (len(test_loader.dataset) / BATCH_SIZE)\n",
228 | " test_accuracy = 100. * correct / len(test_loader.dataset)\n",
229 | " return test_loss, test_accuracy"
230 | ]
231 | },
232 | {
233 | "cell_type": "code",
234 | "execution_count": 11,
235 | "metadata": {},
236 | "outputs": [
237 | {
238 | "name": "stdout",
239 | "output_type": "stream",
240 | "text": [
241 | "Train Epoch: 1 [0/60000 (0%)]\tTrain Loss: 2.293252\n",
242 | "Train Epoch: 1 [6400/60000 (11%)]\tTrain Loss: 2.103498\n",
243 | "Train Epoch: 1 [12800/60000 (21%)]\tTrain Loss: 1.014423\n",
244 | "Train Epoch: 1 [19200/60000 (32%)]\tTrain Loss: 0.739293\n",
245 | "Train Epoch: 1 [25600/60000 (43%)]\tTrain Loss: 0.824886\n",
246 | "Train Epoch: 1 [32000/60000 (53%)]\tTrain Loss: 0.373551\n",
247 | "Train Epoch: 1 [38400/60000 (64%)]\tTrain Loss: 0.590951\n",
248 | "Train Epoch: 1 [44800/60000 (75%)]\tTrain Loss: 0.609839\n",
249 | "Train Epoch: 1 [51200/60000 (85%)]\tTrain Loss: 0.725992\n",
250 | "Train Epoch: 1 [57600/60000 (96%)]\tTrain Loss: 0.316456\n",
251 | "\n",
252 | "[EPOCH: 1], \tTest Loss: 0.0099, \tTest Accuracy: 91.00 % \n",
253 | "\n",
254 | "Train Epoch: 2 [0/60000 (0%)]\tTrain Loss: 0.434049\n",
255 | "Train Epoch: 2 [6400/60000 (11%)]\tTrain Loss: 0.423664\n",
256 | "Train Epoch: 2 [12800/60000 (21%)]\tTrain Loss: 0.223913\n",
257 | "Train Epoch: 2 [19200/60000 (32%)]\tTrain Loss: 0.185579\n",
258 | "Train Epoch: 2 [25600/60000 (43%)]\tTrain Loss: 0.516311\n",
259 | "Train Epoch: 2 [32000/60000 (53%)]\tTrain Loss: 0.170941\n",
260 | "Train Epoch: 2 [38400/60000 (64%)]\tTrain Loss: 0.679857\n",
261 | "Train Epoch: 2 [44800/60000 (75%)]\tTrain Loss: 0.206488\n",
262 | "Train Epoch: 2 [51200/60000 (85%)]\tTrain Loss: 0.293542\n",
263 | "Train Epoch: 2 [57600/60000 (96%)]\tTrain Loss: 0.532575\n",
264 | "\n",
265 | "[EPOCH: 2], \tTest Loss: 0.0070, \tTest Accuracy: 93.47 % \n",
266 | "\n",
267 | "Train Epoch: 3 [0/60000 (0%)]\tTrain Loss: 0.494590\n",
268 | "Train Epoch: 3 [6400/60000 (11%)]\tTrain Loss: 0.692674\n",
269 | "Train Epoch: 3 [12800/60000 (21%)]\tTrain Loss: 0.141787\n",
270 | "Train Epoch: 3 [19200/60000 (32%)]\tTrain Loss: 0.254909\n",
271 | "Train Epoch: 3 [25600/60000 (43%)]\tTrain Loss: 0.143211\n",
272 | "Train Epoch: 3 [32000/60000 (53%)]\tTrain Loss: 0.290160\n",
273 | "Train Epoch: 3 [38400/60000 (64%)]\tTrain Loss: 0.370776\n",
274 | "Train Epoch: 3 [44800/60000 (75%)]\tTrain Loss: 0.711377\n",
275 | "Train Epoch: 3 [51200/60000 (85%)]\tTrain Loss: 0.091218\n",
276 | "Train Epoch: 3 [57600/60000 (96%)]\tTrain Loss: 0.176025\n",
277 | "\n",
278 | "[EPOCH: 3], \tTest Loss: 0.0054, \tTest Accuracy: 94.96 % \n",
279 | "\n",
280 | "Train Epoch: 4 [0/60000 (0%)]\tTrain Loss: 0.298294\n",
281 | "Train Epoch: 4 [6400/60000 (11%)]\tTrain Loss: 0.155041\n",
282 | "Train Epoch: 4 [12800/60000 (21%)]\tTrain Loss: 0.358912\n",
283 | "Train Epoch: 4 [19200/60000 (32%)]\tTrain Loss: 0.178691\n",
284 | "Train Epoch: 4 [25600/60000 (43%)]\tTrain Loss: 0.085475\n",
285 | "Train Epoch: 4 [32000/60000 (53%)]\tTrain Loss: 0.272500\n",
286 | "Train Epoch: 4 [38400/60000 (64%)]\tTrain Loss: 0.203928\n",
287 | "Train Epoch: 4 [44800/60000 (75%)]\tTrain Loss: 0.455426\n",
288 | "Train Epoch: 4 [51200/60000 (85%)]\tTrain Loss: 0.123246\n",
289 | "Train Epoch: 4 [57600/60000 (96%)]\tTrain Loss: 0.114354\n",
290 | "\n",
291 | "[EPOCH: 4], \tTest Loss: 0.0044, \tTest Accuracy: 95.63 % \n",
292 | "\n",
293 | "Train Epoch: 5 [0/60000 (0%)]\tTrain Loss: 0.124653\n",
294 | "Train Epoch: 5 [6400/60000 (11%)]\tTrain Loss: 0.157264\n",
295 | "Train Epoch: 5 [12800/60000 (21%)]\tTrain Loss: 0.202116\n",
296 | "Train Epoch: 5 [19200/60000 (32%)]\tTrain Loss: 0.063491\n",
297 | "Train Epoch: 5 [25600/60000 (43%)]\tTrain Loss: 0.215543\n",
298 | "Train Epoch: 5 [32000/60000 (53%)]\tTrain Loss: 0.039489\n",
299 | "Train Epoch: 5 [38400/60000 (64%)]\tTrain Loss: 0.062817\n",
300 | "Train Epoch: 5 [44800/60000 (75%)]\tTrain Loss: 0.074638\n",
301 | "Train Epoch: 5 [51200/60000 (85%)]\tTrain Loss: 0.100656\n",
302 | "Train Epoch: 5 [57600/60000 (96%)]\tTrain Loss: 0.263398\n",
303 | "\n",
304 | "[EPOCH: 5], \tTest Loss: 0.0038, \tTest Accuracy: 96.28 % \n",
305 | "\n",
306 | "Train Epoch: 6 [0/60000 (0%)]\tTrain Loss: 0.243492\n",
307 | "Train Epoch: 6 [6400/60000 (11%)]\tTrain Loss: 0.134330\n",
308 | "Train Epoch: 6 [12800/60000 (21%)]\tTrain Loss: 0.132107\n",
309 | "Train Epoch: 6 [19200/60000 (32%)]\tTrain Loss: 0.210380\n",
310 | "Train Epoch: 6 [25600/60000 (43%)]\tTrain Loss: 0.166309\n",
311 | "Train Epoch: 6 [32000/60000 (53%)]\tTrain Loss: 0.154378\n",
312 | "Train Epoch: 6 [38400/60000 (64%)]\tTrain Loss: 0.415324\n",
313 | "Train Epoch: 6 [44800/60000 (75%)]\tTrain Loss: 0.091827\n",
314 | "Train Epoch: 6 [51200/60000 (85%)]\tTrain Loss: 0.088081\n",
315 | "Train Epoch: 6 [57600/60000 (96%)]\tTrain Loss: 0.156756\n",
316 | "\n",
317 | "[EPOCH: 6], \tTest Loss: 0.0035, \tTest Accuracy: 96.58 % \n",
318 | "\n",
319 | "Train Epoch: 7 [0/60000 (0%)]\tTrain Loss: 0.311500\n",
320 | "Train Epoch: 7 [6400/60000 (11%)]\tTrain Loss: 0.153611\n",
321 | "Train Epoch: 7 [12800/60000 (21%)]\tTrain Loss: 0.182327\n",
322 | "Train Epoch: 7 [19200/60000 (32%)]\tTrain Loss: 0.109722\n",
323 | "Train Epoch: 7 [25600/60000 (43%)]\tTrain Loss: 0.357384\n",
324 | "Train Epoch: 7 [32000/60000 (53%)]\tTrain Loss: 0.124124\n",
325 | "Train Epoch: 7 [38400/60000 (64%)]\tTrain Loss: 0.139059\n",
326 | "Train Epoch: 7 [44800/60000 (75%)]\tTrain Loss: 0.094896\n",
327 | "Train Epoch: 7 [51200/60000 (85%)]\tTrain Loss: 0.069713\n",
328 | "Train Epoch: 7 [57600/60000 (96%)]\tTrain Loss: 0.060162\n",
329 | "\n",
330 | "[EPOCH: 7], \tTest Loss: 0.0031, \tTest Accuracy: 97.02 % \n",
331 | "\n",
332 | "Train Epoch: 8 [0/60000 (0%)]\tTrain Loss: 0.219568\n",
333 | "Train Epoch: 8 [6400/60000 (11%)]\tTrain Loss: 0.265314\n",
334 | "Train Epoch: 8 [12800/60000 (21%)]\tTrain Loss: 0.136442\n",
335 | "Train Epoch: 8 [19200/60000 (32%)]\tTrain Loss: 0.182477\n",
336 | "Train Epoch: 8 [25600/60000 (43%)]\tTrain Loss: 0.057788\n",
337 | "Train Epoch: 8 [32000/60000 (53%)]\tTrain Loss: 0.180467\n",
338 | "Train Epoch: 8 [38400/60000 (64%)]\tTrain Loss: 0.163830\n",
339 | "Train Epoch: 8 [44800/60000 (75%)]\tTrain Loss: 0.146127\n",
340 | "Train Epoch: 8 [51200/60000 (85%)]\tTrain Loss: 0.078550\n",
341 | "Train Epoch: 8 [57600/60000 (96%)]\tTrain Loss: 0.366327\n",
342 | "\n",
343 | "[EPOCH: 8], \tTest Loss: 0.0028, \tTest Accuracy: 97.16 % \n",
344 | "\n",
345 | "Train Epoch: 9 [0/60000 (0%)]\tTrain Loss: 0.032364\n",
346 | "Train Epoch: 9 [6400/60000 (11%)]\tTrain Loss: 0.049485\n",
347 | "Train Epoch: 9 [12800/60000 (21%)]\tTrain Loss: 0.083032\n",
348 | "Train Epoch: 9 [19200/60000 (32%)]\tTrain Loss: 0.120306\n",
349 | "Train Epoch: 9 [25600/60000 (43%)]\tTrain Loss: 0.236849\n",
350 | "Train Epoch: 9 [32000/60000 (53%)]\tTrain Loss: 0.034962\n",
351 | "Train Epoch: 9 [38400/60000 (64%)]\tTrain Loss: 0.355975\n",
352 | "Train Epoch: 9 [44800/60000 (75%)]\tTrain Loss: 0.098553\n",
353 | "Train Epoch: 9 [51200/60000 (85%)]\tTrain Loss: 0.739664\n",
354 | "Train Epoch: 9 [57600/60000 (96%)]\tTrain Loss: 0.029625\n",
355 | "\n",
356 | "[EPOCH: 9], \tTest Loss: 0.0027, \tTest Accuracy: 97.21 % \n",
357 | "\n",
358 | "Train Epoch: 10 [0/60000 (0%)]\tTrain Loss: 0.316167\n",
359 | "Train Epoch: 10 [6400/60000 (11%)]\tTrain Loss: 0.184830\n",
360 | "Train Epoch: 10 [12800/60000 (21%)]\tTrain Loss: 0.160652\n",
361 | "Train Epoch: 10 [19200/60000 (32%)]\tTrain Loss: 0.162540\n",
362 | "Train Epoch: 10 [25600/60000 (43%)]\tTrain Loss: 0.044575\n",
363 | "Train Epoch: 10 [32000/60000 (53%)]\tTrain Loss: 0.090206\n",
364 | "Train Epoch: 10 [38400/60000 (64%)]\tTrain Loss: 0.021095\n",
365 | "Train Epoch: 10 [44800/60000 (75%)]\tTrain Loss: 0.275242\n",
366 | "Train Epoch: 10 [51200/60000 (85%)]\tTrain Loss: 0.117553\n",
367 | "Train Epoch: 10 [57600/60000 (96%)]\tTrain Loss: 0.028020\n",
368 | "\n",
369 | "[EPOCH: 10], \tTest Loss: 0.0026, \tTest Accuracy: 97.40 % \n",
370 | "\n"
371 | ]
372 | }
373 | ],
374 | "source": [
375 | "''' 10. MLP 학습 실행하며 Train, Test set의 Loss 및 Test set Accuracy 확인하기 '''\n",
376 | "for epoch in range(1, EPOCHS + 1):\n",
377 | " train(model, train_loader, optimizer, log_interval = 200)\n",
378 | " test_loss, test_accuracy = evaluate(model, test_loader)\n",
379 | " print(\"\\n[EPOCH: {}], \\tTest Loss: {:.4f}, \\tTest Accuracy: {:.2f} % \\n\".format(\n",
380 | " epoch, test_loss, test_accuracy))"
381 | ]
382 | }
383 | ],
384 | "metadata": {
385 | "kernelspec": {
386 | "display_name": "Python 3",
387 | "language": "python",
388 | "name": "python3"
389 | },
390 | "language_info": {
391 | "codemirror_mode": {
392 | "name": "ipython",
393 | "version": 3
394 | },
395 | "file_extension": ".py",
396 | "mimetype": "text/x-python",
397 | "name": "python",
398 | "nbconvert_exporter": "python",
399 | "pygments_lexer": "ipython3",
400 | "version": "3.6.8"
401 | }
402 | },
403 | "nbformat": 4,
404 | "nbformat_minor": 4
405 | }
406 |
--------------------------------------------------------------------------------
/3-3_MNIST_MLP_Dropout_ReLU_BN.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "''' 1. Module Import '''\n",
10 | "import numpy as np\n",
11 | "import matplotlib.pyplot as plt\n",
12 | "\n",
13 | "import torch\n",
14 | "import torch.nn as nn\n",
15 | "import torch.nn.functional as F\n",
16 | "from torchvision import transforms, datasets"
17 | ]
18 | },
19 | {
20 | "cell_type": "code",
21 | "execution_count": 2,
22 | "metadata": {},
23 | "outputs": [
24 | {
25 | "name": "stdout",
26 | "output_type": "stream",
27 | "text": [
28 | "Using PyTorch version: 1.6.0+cu101 Device: cuda\n"
29 | ]
30 | }
31 | ],
32 | "source": [
33 | "''' 2. 딥러닝 모델을 설계할 때 활용하는 장비 확인 '''\n",
34 | "if torch.cuda.is_available():\n",
35 | " DEVICE = torch.device('cuda')\n",
36 | "else:\n",
37 | " DEVICE = torch.device('cpu')\n",
38 | "print('Using PyTorch version:', torch.__version__, ' Device:', DEVICE)"
39 | ]
40 | },
41 | {
42 | "cell_type": "code",
43 | "execution_count": 3,
44 | "metadata": {},
45 | "outputs": [],
46 | "source": [
47 | "BATCH_SIZE = 32\n",
48 | "EPOCHS = 10"
49 | ]
50 | },
51 | {
52 | "cell_type": "code",
53 | "execution_count": 4,
54 | "metadata": {},
55 | "outputs": [],
56 | "source": [
57 | "''' 3. MNIST 데이터 다운로드 (Train set, Test set 분리하기) '''\n",
58 | "train_dataset = datasets.MNIST(root = \"../data/MNIST\",\n",
59 | " train = True,\n",
60 | " download = True,\n",
61 | " transform = transforms.ToTensor())\n",
62 | "\n",
63 | "test_dataset = datasets.MNIST(root = \"../data/MNIST\",\n",
64 | " train = False,\n",
65 | " transform = transforms.ToTensor())\n",
66 | "\n",
67 | "train_loader = torch.utils.data.DataLoader(dataset = train_dataset,\n",
68 | " batch_size = BATCH_SIZE,\n",
69 | " shuffle = True)\n",
70 | "\n",
71 | "test_loader = torch.utils.data.DataLoader(dataset = test_dataset,\n",
72 | " batch_size = BATCH_SIZE,\n",
73 | " shuffle = False)"
74 | ]
75 | },
76 | {
77 | "cell_type": "code",
78 | "execution_count": 5,
79 | "metadata": {},
80 | "outputs": [
81 | {
82 | "name": "stdout",
83 | "output_type": "stream",
84 | "text": [
85 | "X_train: torch.Size([32, 1, 28, 28]) type: torch.FloatTensor\n",
86 | "y_train: torch.Size([32]) type: torch.LongTensor\n"
87 | ]
88 | }
89 | ],
90 | "source": [
91 | "''' 4. 데이터 확인하기 (1) '''\n",
92 | "for (X_train, y_train) in train_loader:\n",
93 | " print('X_train:', X_train.size(), 'type:', X_train.type())\n",
94 | " print('y_train:', y_train.size(), 'type:', y_train.type())\n",
95 | " break"
96 | ]
97 | },
98 | {
99 | "cell_type": "code",
100 | "execution_count": 6,
101 | "metadata": {},
102 | "outputs": [
103 | {
104 | "data": {
105 | "image/png": "\n",
106 | "text/plain": [
107 | ""
108 | ]
109 | },
110 | "metadata": {
111 | "needs_background": "light"
112 | },
113 | "output_type": "display_data"
114 | }
115 | ],
116 | "source": [
117 | "''' 5. 데이터 확인하기 (2) '''\n",
118 | "pltsize = 1\n",
119 | "plt.figure(figsize=(10 * pltsize, pltsize))\n",
120 | "for i in range(10):\n",
121 | " plt.subplot(1, 10, i + 1)\n",
122 | " plt.axis('off')\n",
123 | " plt.imshow(X_train[i, :, :, :].numpy().reshape(28, 28), cmap = \"gray_r\")\n",
124 | " plt.title('Class: ' + str(y_train[i].item()))"
125 | ]
126 | },
127 | {
128 | "cell_type": "code",
129 | "execution_count": 7,
130 | "metadata": {},
131 | "outputs": [],
132 | "source": [
133 | "''' 6. Multi Layer Perceptron (MLP) 모델 설계하기 '''\n",
134 | "class Net(nn.Module):\n",
135 | " def __init__(self):\n",
136 | " super(Net, self).__init__()\n",
137 | " self.fc1 = nn.Linear(28 * 28, 512)\n",
138 | " self.fc2 = nn.Linear(512, 256)\n",
139 | " self.fc3 = nn.Linear(256, 10)\n",
140 | " self.dropout_prob = 0.5\n",
141 | " self.batch_norm1 = nn.BatchNorm1d(512)\n",
142 | " self.batch_norm2 = nn.BatchNorm1d(256)\n",
143 | "\n",
144 | " def forward(self, x):\n",
145 | " x = x.view(-1, 28 * 28)\n",
146 | " x = self.fc1(x)\n",
147 | " x = self.batch_norm1(x)\n",
148 | " x = F.relu(x)\n",
149 | " x = F.dropout(x, training = self.training, p = self.dropout_prob)\n",
150 | " x = self.fc2(x)\n",
151 | " x = self.batch_norm2(x)\n",
152 | " x = F.relu(x)\n",
153 | " x = F.dropout(x, training = self.training, p = self.dropout_prob)\n",
154 | " x = self.fc3(x)\n",
155 | " x = F.log_softmax(x, dim = 1)\n",
156 | " return x"
157 | ]
158 | },
159 | {
160 | "cell_type": "code",
161 | "execution_count": 8,
162 | "metadata": {},
163 | "outputs": [
164 | {
165 | "name": "stdout",
166 | "output_type": "stream",
167 | "text": [
168 | "Net(\n",
169 | " (fc1): Linear(in_features=784, out_features=512, bias=True)\n",
170 | " (fc2): Linear(in_features=512, out_features=256, bias=True)\n",
171 | " (fc3): Linear(in_features=256, out_features=10, bias=True)\n",
172 | " (batch_norm1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
173 | " (batch_norm2): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
174 | ")\n"
175 | ]
176 | }
177 | ],
178 | "source": [
179 | "''' 7. Optimizer, Objective Function 설정하기 '''\n",
180 | "model = Net().to(DEVICE)\n",
181 | "optimizer = torch.optim.SGD(model.parameters(), lr = 0.01, momentum = 0.5)\n",
182 | "criterion = nn.CrossEntropyLoss()\n",
183 | "\n",
184 | "print(model)"
185 | ]
186 | },
187 | {
188 | "cell_type": "code",
189 | "execution_count": 9,
190 | "metadata": {},
191 | "outputs": [],
192 | "source": [
193 | "''' 8. MLP 모델 학습을 진행하며 학습 데이터에 대한 모델 성능을 확인하는 함수 정의 '''\n",
194 | "def train(model, train_loader, optimizer, log_interval):\n",
195 | " model.train()\n",
196 | " for batch_idx, (image, label) in enumerate(train_loader):\n",
197 | " image = image.to(DEVICE)\n",
198 | " label = label.to(DEVICE)\n",
199 | " optimizer.zero_grad()\n",
200 | " output = model(image)\n",
201 | " loss = criterion(output, label)\n",
202 | " loss.backward()\n",
203 | " optimizer.step()\n",
204 | "\n",
205 | " if batch_idx % log_interval == 0:\n",
206 | " print(\"Train Epoch: {} [{}/{} ({:.0f}%)]\\tTrain Loss: {:.6f}\".format(\n",
207 | " epoch, batch_idx * len(image), \n",
208 | " len(train_loader.dataset), 100. * batch_idx / len(train_loader), \n",
209 | " loss.item()))"
210 | ]
211 | },
212 | {
213 | "cell_type": "code",
214 | "execution_count": 10,
215 | "metadata": {},
216 | "outputs": [],
217 | "source": [
218 | "''' 9. 학습되는 과정 속에서 검증 데이터에 대한 모델 성능을 확인하는 함수 정의 '''\n",
219 | "def evaluate(model, test_loader):\n",
220 | " model.eval()\n",
221 | " test_loss = 0\n",
222 | " correct = 0\n",
223 | "\n",
224 | " with torch.no_grad():\n",
225 | " for image, label in test_loader:\n",
226 | " image = image.to(DEVICE)\n",
227 | " label = label.to(DEVICE)\n",
228 | " output = model(image)\n",
229 | " test_loss += criterion(output, label).item()\n",
230 | " prediction = output.max(1, keepdim = True)[1]\n",
231 | " correct += prediction.eq(label.view_as(prediction)).sum().item()\n",
232 | " \n",
233 | " test_loss /= (len(test_loader.dataset) / BATCH_SIZE)\n",
234 | " test_accuracy = 100. * correct / len(test_loader.dataset)\n",
235 | " return test_loss, test_accuracy"
236 | ]
237 | },
238 | {
239 | "cell_type": "code",
240 | "execution_count": 11,
241 | "metadata": {},
242 | "outputs": [
243 | {
244 | "name": "stdout",
245 | "output_type": "stream",
246 | "text": [
247 | "Train Epoch: 1 [0/60000 (0%)]\tTrain Loss: 2.408397\n",
248 | "Train Epoch: 1 [6400/60000 (11%)]\tTrain Loss: 0.565749\n",
249 | "Train Epoch: 1 [12800/60000 (21%)]\tTrain Loss: 0.529340\n",
250 | "Train Epoch: 1 [19200/60000 (32%)]\tTrain Loss: 0.337603\n",
251 | "Train Epoch: 1 [25600/60000 (43%)]\tTrain Loss: 0.587682\n",
252 | "Train Epoch: 1 [32000/60000 (53%)]\tTrain Loss: 0.371249\n",
253 | "Train Epoch: 1 [38400/60000 (64%)]\tTrain Loss: 0.757439\n",
254 | "Train Epoch: 1 [44800/60000 (75%)]\tTrain Loss: 0.523711\n",
255 | "Train Epoch: 1 [51200/60000 (85%)]\tTrain Loss: 0.288144\n",
256 | "Train Epoch: 1 [57600/60000 (96%)]\tTrain Loss: 0.253793\n",
257 | "\n",
258 | "[EPOCH: 1], \tTest Loss: 0.0048, \tTest Accuracy: 95.26 % \n",
259 | "\n",
260 | "Train Epoch: 2 [0/60000 (0%)]\tTrain Loss: 0.408605\n",
261 | "Train Epoch: 2 [6400/60000 (11%)]\tTrain Loss: 0.290822\n",
262 | "Train Epoch: 2 [12800/60000 (21%)]\tTrain Loss: 0.197596\n",
263 | "Train Epoch: 2 [19200/60000 (32%)]\tTrain Loss: 0.256635\n",
264 | "Train Epoch: 2 [25600/60000 (43%)]\tTrain Loss: 0.263706\n",
265 | "Train Epoch: 2 [32000/60000 (53%)]\tTrain Loss: 0.269775\n",
266 | "Train Epoch: 2 [38400/60000 (64%)]\tTrain Loss: 0.330976\n",
267 | "Train Epoch: 2 [44800/60000 (75%)]\tTrain Loss: 0.063536\n",
268 | "Train Epoch: 2 [51200/60000 (85%)]\tTrain Loss: 0.278092\n",
269 | "Train Epoch: 2 [57600/60000 (96%)]\tTrain Loss: 0.370884\n",
270 | "\n",
271 | "[EPOCH: 2], \tTest Loss: 0.0037, \tTest Accuracy: 96.38 % \n",
272 | "\n",
273 | "Train Epoch: 3 [0/60000 (0%)]\tTrain Loss: 0.473225\n",
274 | "Train Epoch: 3 [6400/60000 (11%)]\tTrain Loss: 0.359397\n",
275 | "Train Epoch: 3 [12800/60000 (21%)]\tTrain Loss: 0.335095\n",
276 | "Train Epoch: 3 [19200/60000 (32%)]\tTrain Loss: 0.061883\n",
277 | "Train Epoch: 3 [25600/60000 (43%)]\tTrain Loss: 0.148324\n",
278 | "Train Epoch: 3 [32000/60000 (53%)]\tTrain Loss: 0.295295\n",
279 | "Train Epoch: 3 [38400/60000 (64%)]\tTrain Loss: 0.143966\n",
280 | "Train Epoch: 3 [44800/60000 (75%)]\tTrain Loss: 0.027375\n",
281 | "Train Epoch: 3 [51200/60000 (85%)]\tTrain Loss: 0.296227\n",
282 | "Train Epoch: 3 [57600/60000 (96%)]\tTrain Loss: 0.146117\n",
283 | "\n",
284 | "[EPOCH: 3], \tTest Loss: 0.0031, \tTest Accuracy: 96.92 % \n",
285 | "\n",
286 | "Train Epoch: 4 [0/60000 (0%)]\tTrain Loss: 0.128434\n",
287 | "Train Epoch: 4 [6400/60000 (11%)]\tTrain Loss: 0.144728\n",
288 | "Train Epoch: 4 [12800/60000 (21%)]\tTrain Loss: 0.251649\n",
289 | "Train Epoch: 4 [19200/60000 (32%)]\tTrain Loss: 0.080718\n",
290 | "Train Epoch: 4 [25600/60000 (43%)]\tTrain Loss: 0.185179\n",
291 | "Train Epoch: 4 [32000/60000 (53%)]\tTrain Loss: 0.388055\n",
292 | "Train Epoch: 4 [38400/60000 (64%)]\tTrain Loss: 0.169363\n",
293 | "Train Epoch: 4 [44800/60000 (75%)]\tTrain Loss: 0.044224\n",
294 | "Train Epoch: 4 [51200/60000 (85%)]\tTrain Loss: 0.206786\n",
295 | "Train Epoch: 4 [57600/60000 (96%)]\tTrain Loss: 0.246572\n",
296 | "\n",
297 | "[EPOCH: 4], \tTest Loss: 0.0028, \tTest Accuracy: 97.29 % \n",
298 | "\n",
299 | "Train Epoch: 5 [0/60000 (0%)]\tTrain Loss: 0.072247\n",
300 | "Train Epoch: 5 [6400/60000 (11%)]\tTrain Loss: 0.243650\n",
301 | "Train Epoch: 5 [12800/60000 (21%)]\tTrain Loss: 0.123755\n",
302 | "Train Epoch: 5 [19200/60000 (32%)]\tTrain Loss: 0.073167\n",
303 | "Train Epoch: 5 [25600/60000 (43%)]\tTrain Loss: 0.055972\n",
304 | "Train Epoch: 5 [32000/60000 (53%)]\tTrain Loss: 0.302183\n",
305 | "Train Epoch: 5 [38400/60000 (64%)]\tTrain Loss: 0.355575\n",
306 | "Train Epoch: 5 [44800/60000 (75%)]\tTrain Loss: 0.080272\n",
307 | "Train Epoch: 5 [51200/60000 (85%)]\tTrain Loss: 0.410138\n",
308 | "Train Epoch: 5 [57600/60000 (96%)]\tTrain Loss: 0.532726\n",
309 | "\n",
310 | "[EPOCH: 5], \tTest Loss: 0.0026, \tTest Accuracy: 97.54 % \n",
311 | "\n",
312 | "Train Epoch: 6 [0/60000 (0%)]\tTrain Loss: 0.082712\n",
313 | "Train Epoch: 6 [6400/60000 (11%)]\tTrain Loss: 0.242383\n",
314 | "Train Epoch: 6 [12800/60000 (21%)]\tTrain Loss: 0.073166\n",
315 | "Train Epoch: 6 [19200/60000 (32%)]\tTrain Loss: 0.065983\n",
316 | "Train Epoch: 6 [25600/60000 (43%)]\tTrain Loss: 0.153423\n",
317 | "Train Epoch: 6 [32000/60000 (53%)]\tTrain Loss: 0.106637\n",
318 | "Train Epoch: 6 [38400/60000 (64%)]\tTrain Loss: 0.181440\n",
319 | "Train Epoch: 6 [44800/60000 (75%)]\tTrain Loss: 0.217991\n",
320 | "Train Epoch: 6 [51200/60000 (85%)]\tTrain Loss: 0.047518\n",
321 | "Train Epoch: 6 [57600/60000 (96%)]\tTrain Loss: 0.152127\n",
322 | "\n",
323 | "[EPOCH: 6], \tTest Loss: 0.0024, \tTest Accuracy: 97.55 % \n",
324 | "\n",
325 | "Train Epoch: 7 [0/60000 (0%)]\tTrain Loss: 0.074586\n",
326 | "Train Epoch: 7 [6400/60000 (11%)]\tTrain Loss: 0.285396\n",
327 | "Train Epoch: 7 [12800/60000 (21%)]\tTrain Loss: 0.101163\n",
328 | "Train Epoch: 7 [19200/60000 (32%)]\tTrain Loss: 0.222260\n",
329 | "Train Epoch: 7 [25600/60000 (43%)]\tTrain Loss: 0.096511\n",
330 | "Train Epoch: 7 [32000/60000 (53%)]\tTrain Loss: 0.184161\n",
331 | "Train Epoch: 7 [38400/60000 (64%)]\tTrain Loss: 0.040291\n",
332 | "Train Epoch: 7 [44800/60000 (75%)]\tTrain Loss: 0.186297\n",
333 | "Train Epoch: 7 [51200/60000 (85%)]\tTrain Loss: 0.321589\n",
334 | "Train Epoch: 7 [57600/60000 (96%)]\tTrain Loss: 0.179110\n",
335 | "\n",
336 | "[EPOCH: 7], \tTest Loss: 0.0022, \tTest Accuracy: 97.74 % \n",
337 | "\n",
338 | "Train Epoch: 8 [0/60000 (0%)]\tTrain Loss: 0.041527\n",
339 | "Train Epoch: 8 [6400/60000 (11%)]\tTrain Loss: 0.089809\n",
340 | "Train Epoch: 8 [12800/60000 (21%)]\tTrain Loss: 0.173585\n",
341 | "Train Epoch: 8 [19200/60000 (32%)]\tTrain Loss: 0.122103\n",
342 | "Train Epoch: 8 [25600/60000 (43%)]\tTrain Loss: 0.102293\n",
343 | "Train Epoch: 8 [32000/60000 (53%)]\tTrain Loss: 0.140654\n",
344 | "Train Epoch: 8 [38400/60000 (64%)]\tTrain Loss: 0.190124\n",
345 | "Train Epoch: 8 [44800/60000 (75%)]\tTrain Loss: 0.364228\n",
346 | "Train Epoch: 8 [51200/60000 (85%)]\tTrain Loss: 0.095618\n",
347 | "Train Epoch: 8 [57600/60000 (96%)]\tTrain Loss: 0.050769\n",
348 | "\n",
349 | "[EPOCH: 8], \tTest Loss: 0.0022, \tTest Accuracy: 97.87 % \n",
350 | "\n",
351 | "Train Epoch: 9 [0/60000 (0%)]\tTrain Loss: 0.289604\n",
352 | "Train Epoch: 9 [6400/60000 (11%)]\tTrain Loss: 0.199143\n",
353 | "Train Epoch: 9 [12800/60000 (21%)]\tTrain Loss: 0.064629\n",
354 | "Train Epoch: 9 [19200/60000 (32%)]\tTrain Loss: 0.038997\n",
355 | "Train Epoch: 9 [25600/60000 (43%)]\tTrain Loss: 0.132849\n",
356 | "Train Epoch: 9 [32000/60000 (53%)]\tTrain Loss: 0.466395\n",
357 | "Train Epoch: 9 [38400/60000 (64%)]\tTrain Loss: 0.095477\n",
358 | "Train Epoch: 9 [44800/60000 (75%)]\tTrain Loss: 0.124449\n",
359 | "Train Epoch: 9 [51200/60000 (85%)]\tTrain Loss: 0.069822\n",
360 | "Train Epoch: 9 [57600/60000 (96%)]\tTrain Loss: 0.073448\n",
361 | "\n",
362 | "[EPOCH: 9], \tTest Loss: 0.0021, \tTest Accuracy: 97.98 % \n",
363 | "\n",
364 | "Train Epoch: 10 [0/60000 (0%)]\tTrain Loss: 0.060481\n",
365 | "Train Epoch: 10 [6400/60000 (11%)]\tTrain Loss: 0.127830\n",
366 | "Train Epoch: 10 [12800/60000 (21%)]\tTrain Loss: 0.053453\n",
367 | "Train Epoch: 10 [19200/60000 (32%)]\tTrain Loss: 0.273952\n",
368 | "Train Epoch: 10 [25600/60000 (43%)]\tTrain Loss: 0.150731\n",
369 | "Train Epoch: 10 [32000/60000 (53%)]\tTrain Loss: 0.040127\n",
370 | "Train Epoch: 10 [38400/60000 (64%)]\tTrain Loss: 0.267848\n",
371 | "Train Epoch: 10 [44800/60000 (75%)]\tTrain Loss: 0.110777\n",
372 | "Train Epoch: 10 [51200/60000 (85%)]\tTrain Loss: 0.167811\n",
373 | "Train Epoch: 10 [57600/60000 (96%)]\tTrain Loss: 0.026505\n",
374 | "\n",
375 | "[EPOCH: 10], \tTest Loss: 0.0021, \tTest Accuracy: 97.92 % \n",
376 | "\n"
377 | ]
378 | }
379 | ],
380 | "source": [
381 | "''' 10. MLP 학습 실행하며 Train, Test set의 Loss 및 Test set Accuracy 확인하기 '''\n",
382 | "for epoch in range(1, EPOCHS + 1):\n",
383 | " train(model, train_loader, optimizer, log_interval = 200)\n",
384 | " test_loss, test_accuracy = evaluate(model, test_loader)\n",
385 | " print(\"\\n[EPOCH: {}], \\tTest Loss: {:.4f}, \\tTest Accuracy: {:.2f} % \\n\".format(\n",
386 | " epoch, test_loss, test_accuracy))"
387 | ]
388 | }
389 | ],
390 | "metadata": {
391 | "kernelspec": {
392 | "display_name": "Python 3",
393 | "language": "python",
394 | "name": "python3"
395 | },
396 | "language_info": {
397 | "codemirror_mode": {
398 | "name": "ipython",
399 | "version": 3
400 | },
401 | "file_extension": ".py",
402 | "mimetype": "text/x-python",
403 | "name": "python",
404 | "nbconvert_exporter": "python",
405 | "pygments_lexer": "ipython3",
406 | "version": "3.6.8"
407 | }
408 | },
409 | "nbformat": 4,
410 | "nbformat_minor": 4
411 | }
412 |
--------------------------------------------------------------------------------