├── .gitignore
├── .isort.cfg
├── .pre-commit-config.yaml
├── README.md
├── dldemos
    ├── AdvancedOptimizer
    │   ├── README.md
    │   ├── compare_optimizer.py
    │   ├── main.py
    │   ├── model.py
    │   ├── optimizer.py
    │   └── single_step.py
    ├── BasicCNN
    │   ├── README.md
    │   ├── dataset.py
    │   ├── np_conv.py
    │   ├── np_conv_backward.py
    │   ├── pt_main.py
    │   └── tf_main.py
    ├── BasicRNN
    │   ├── README.md
    │   ├── constant.py
    │   ├── main.py
    │   ├── models.py
    │   └── read_imdb.py
    ├── DeepNetwork
    │   ├── README.md
    │   ├── dataset.py
    │   ├── main.py
    │   └── model.py
    ├── FourierFeature
    │   ├── image_mlp.ipynb
    │   ├── kernel_regression.ipynb
    │   └── misuzu.png
    ├── Initialization
    │   ├── README.md
    │   ├── main.py
    │   └── points_classification.py
    ├── LogisticRegression
    │   ├── README.md
    │   └── main.py
    ├── MulticlassClassification
    │   ├── README.md
    │   ├── points_classification.py
    │   ├── pt_main.py
    │   └── tf_main.py
    ├── MyYOLO
    │   └── load_coco.py
    ├── PyTorchDistributed
    │   └── main.py
    ├── Regularization
    │   ├── README.md
    │   ├── main.py
    │   └── points_classification.py
    ├── ResNet
    │   ├── README.md
    │   └── tf_main.py
    ├── SentimentAnalysis
    │   ├── README.md
    │   ├── glove.py
    │   ├── main.py
    │   └── read_imdb.py
    ├── ShallowNetwork
    │   ├── README.md
    │   ├── genereate_points.py
    │   ├── model.py
    │   ├── plot_activation_func.py
    │   └── points_classification.py
    ├── StyleTransfer
    │   ├── README.md
    │   ├── combine_img.py
    │   ├── copy_img.py
    │   ├── dancing.jpg
    │   ├── picasso.jpg
    │   └── style_transfer.py
    ├── Transformer
    │   ├── data_load.py
    │   ├── model.py
    │   ├── outdated
    │   │   ├── dataset.py
    │   │   ├── dist_train.py
    │   │   ├── dist_train.sh
    │   │   ├── preprocess_data.py
    │   │   ├── test.py
    │   │   └── train.py
    │   ├── train.py
    │   └── translate.py
    ├── VAE
    │   ├── README.md
    │   ├── load_celebA.py
    │   ├── main.py
    │   └── model.py
    ├── VQVAE
    │   ├── configs.py
    │   ├── dataset.py
    │   ├── dist_train_pixelcnn.py
    │   ├── main.py
    │   ├── model.py
    │   └── pixelcnn_model.py
    ├── attention
    │   ├── README.md
    │   ├── dataset.py
    │   └── main.py
    ├── ddim
    │   ├── configs.py
    │   ├── dataset.py
    │   ├── ddim.py
    │   ├── ddpm.py
    │   ├── dist_sample.py
    │   ├── dist_train.py
    │   ├── main.py
    │   ├── network.py
    │   └── network_my.py
    ├── ddpm
    │   ├── dataset.py
    │   ├── ddpm.py
    │   ├── ddpm_simple.py
    │   ├── main.py
    │   └── network.py
    ├── lmdb_loader.py
    ├── nms
    │   ├── bboxes.pt
    │   ├── iou.py
    │   ├── nms.py
    │   └── show_bbox.py
    ├── pixelcnn
    │   ├── dataset.py
    │   ├── main.py
    │   └── model.py
    └── utils
    │   ├── __init__.py
    │   └── function.py
├── requirements.txt
└── setup.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | **/*.pyc
  6 | 
  7 | # C extensions
  8 | *.so
  9 | 
 10 | # Distribution / packaging
 11 | .Python
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .coverage
 43 | .coverage.*
 44 | .cache
 45 | nosetests.xml
 46 | coverage.xml
 47 | *.cover
 48 | .hypothesis/
 49 | .pytest_cache/
 50 | 
 51 | # Translations
 52 | *.mo
 53 | *.pot
 54 | 
 55 | # Django stuff:
 56 | *.log
 57 | local_settings.py
 58 | db.sqlite3
 59 | 
 60 | # Flask stuff:
 61 | instance/
 62 | .webassets-cache
 63 | 
 64 | # Scrapy stuff:
 65 | .scrapy
 66 | 
 67 | # Sphinx documentation
 68 | docs/en/_build/
 69 | docs/zh_cn/_build/
 70 | 
 71 | # PyBuilder
 72 | target/
 73 | 
 74 | # Jupyter Notebook
 75 | .ipynb_checkpoints
 76 | 
 77 | # pyenv
 78 | .python-version
 79 | 
 80 | # celery beat schedule file
 81 | celerybeat-schedule
 82 | 
 83 | # SageMath parsed files
 84 | *.sage.py
 85 | 
 86 | # Environments
 87 | .env
 88 | .venv
 89 | env/
 90 | venv/
 91 | ENV/
 92 | env.bak/
 93 | venv.bak/
 94 | 
 95 | # Spyder project settings
 96 | .spyderproject
 97 | .spyproject
 98 | 
 99 | # Rope project settings
100 | .ropeproject
101 | 
102 | # mkdocs documentation
103 | /site
104 | 
105 | # mypy
106 | .mypy_cache/
107 | 
108 | # custom
109 | .vscode
110 | .idea
111 | *.pkl
112 | *.pkl.json
113 | *.log.json
114 | work_dirs/
115 | 
116 | # Pytorch
117 | *.pth
118 | 
119 | # onnx and tensorrt
120 | *.onnx
121 | *.trt
122 | 
123 | # local history
124 | .history/**
125 | 
126 | # Pytorch Server
127 | *.mar
128 | .DS_Store
129 | 
130 | /data/
131 | /data
132 | data
133 | .vector_cache
134 | 
135 | dldemos/*/*.txt
136 | 
137 | nohup.out
138 | 


--------------------------------------------------------------------------------
/.isort.cfg:
--------------------------------------------------------------------------------
1 | [settings]
2 | known_third_party = PIL,babel,cv2,einops,faker,matplotlib,numpy,pytest,setuptools,tensorflow,torch,torchtext,torchvision
3 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 |   - repo: https://github.com/PyCQA/flake8
 3 |     rev: 4.0.1
 4 |     hooks:
 5 |       - id: flake8
 6 |   - repo: https://github.com/PyCQA/isort
 7 |     rev: 5.11.5
 8 |     hooks:
 9 |       - id: isort
10 |   - repo: https://github.com/pre-commit/mirrors-yapf
11 |     rev: v0.32.0
12 |     hooks:
13 |       - id: yapf
14 |   - repo: https://github.com/pre-commit/pre-commit-hooks
15 |     rev: v4.2.0
16 |     hooks:
17 |       - id: trailing-whitespace
18 |       - id: check-yaml
19 |       - id: end-of-file-fixer
20 |       - id: requirements-txt-fixer
21 |       - id: double-quote-string-fixer
22 |       - id: check-merge-conflict
23 |       - id: fix-encoding-pragma
24 |         args: ["--remove"]
25 |       - id: mixed-line-ending
26 |         args: ["--fix=lf"]
27 |   - repo: https://github.com/executablebooks/mdformat
28 |     rev: 0.7.9
29 |     hooks:
30 |       - id: mdformat
31 |         args: ["--number", "--disable-escape", "link-enclosure"]
32 |         additional_dependencies:
33 |           - mdformat-openmmlab
34 |           - mdformat_frontmatter
35 |           - linkify-it-py
36 |   - repo: https://github.com/myint/docformatter
37 |     rev: v1.4
38 |     hooks:
39 |       - id: docformatter
40 |         args: ["--in-place", "--wrap-descriptions", "79"]
41 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Installation
 2 | 
 3 | 1. Clone the git repo:
 4 | 
 5 | ```shell
 6 | git clone https://github.com/SingleZombie/DL-Demos.git
 7 | ```
 8 | 
 9 | 2. Run the installation command:
10 | 
11 | ```shell
12 | python setup.py develop
13 | pip install -r requirements.txt
14 | ```
15 | 
16 | It is recommended to create a directory named `work_dirs` and put temporary results into it.
17 | 
18 | # Description
19 | 
20 | Demos for deep learning.
21 | 
22 | # Project
23 | 
24 | ## Andrew Ng Deep Learning Specialization
25 | 
26 | 01. Logistic Regression
27 | 02. Shallow Nerual Network
28 | 03. Deep Nerual Network (MLP)
29 | 04. Parameter Initialization
30 | 05. Regularization
31 | 06. Advanced Optimizer (mini-batch, momentum, Adam)
32 | 07. Multiclass Classification with TensorFlow and PyTorch
33 | 08. NumPy Convolution 2D
34 | 09. Basic CNN
35 | 10. ResNet
36 | 11. NMS
37 | 12. ~~My YOLO model~~
38 | 13. Letter level language model with PyTorch
39 | 14. Sentiment analysis using Glove with PyTorch
40 | 15. Date translation attention model with PyTorch
41 | 16. Transformer cn-en translation with PyTorch
42 | 
43 | ## Generative Model
44 | 
45 | 1. VAE with PyTorch
46 | 2. DDPM with PyTorch
47 | 3. PixelCNN with PyTorch
48 | 4. VQVAE with PyTorch
49 | 5. DDIM with PyTorch
50 | 
51 | ## Others
52 | 
53 | 1. Style Transfer with PyTorch
54 | 2. PyTorch DDP Demo
55 | 3. Fourier Feature
56 | 


--------------------------------------------------------------------------------
/dldemos/AdvancedOptimizer/README.md:
--------------------------------------------------------------------------------
 1 | 1. Install the repository
 2 | 
 3 | ```shell
 4 | python setup.py develop
 5 | ```
 6 | 
 7 | 2. Download the dataset from https://www.kaggle.com/datasets/fusicfenta/cat-and-dog?resource=download and organize the directory as follows:
 8 | 
 9 | ```plain text
10 | └─data
11 |     └─archive
12 |         └─dataset
13 |             ├─single_prediction
14 |             ├─test_set
15 |             │  ├─cats
16 |             │  └─dogs
17 |             └─training_set
18 |                 ├─cats
19 |                 └─dogs
20 | ```
21 | 
22 | 3. Modify the path in `main.py`:
23 | 
24 | ```Python
25 |     train_X, train_Y, dev_X, dev_Y = get_cat_set(
26 |         'dldemos/LogisticRegression/data/archive/dataset', train_size=1000)
27 | ```
28 | 
29 | Replace 'dldemos/LogisticRegression/data/archive/dataset' with your path.
30 | 
31 | 4. Run `main.py`.
32 | 
33 | You can tune the hyper-parameters and try different optimizers.
34 | 


--------------------------------------------------------------------------------
/dldemos/AdvancedOptimizer/compare_optimizer.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | import numpy as np
 3 | 
 4 | LEN = 10
 5 | 
 6 | result_dict = {
 7 |     'batch_size_8': [
 8 |         0.6954, 0.6527, 0.5950, 0.5475, 0.4941, 0.6317, 0.4309, 0.4870, 0.4461,
 9 |         0.2928
10 |     ],
11 |     'batch_size_64': [
12 |         0.6910, 0.6447, 0.6133, 0.5756, 0.5590, 0.5224, 0.5478, 0.4379, 0.4241,
13 |         0.3764
14 |     ],
15 |     'batch_size_128': [
16 |         0.6910, 0.6497, 0.6289, 0.6168, 0.5802, 0.5677, 0.5366, 0.5436, 0.5282,
17 |         0.5344
18 |     ],
19 |     'batch_size_2000': [
20 |         0.6966, 0.6840, 0.6770, 0.6780, 0.6675, 0.6572, 0.6605, 0.6482, 0.6719,
21 |         0.6392
22 |     ],
23 |     'Momentum_64': [
24 |         0.6917, 0.6581, 0.6212, 0.5774, 0.5123, 0.4700, 0.4162, 0.3581, 0.3168,
25 |         0.2996
26 |     ],
27 |     'RMSProp_64': [
28 |         0.6924, 0.6519, 0.6381, 0.6209, 0.6043, 0.5895, 0.5747, 0.5635, 0.5491,
29 |         0.5363
30 |     ],
31 |     'Adam_64': [
32 |         0.6781, 0.6150, 0.5801, 0.5466, 0.5163, 0.4881, 0.4617, 0.4365, 0.4154,
33 |         0.3959
34 |     ],
35 |     'Adam_64_decay_0.2': [
36 |         0.6861, 0.6021, 0.5783, 0.5644, 0.5544, 0.5471, 0.5409, 0.5357, 0.5314,
37 |         0.5276
38 |     ],
39 |     'Adam_64_decay_0.005': [
40 |         0.6900, 0.6047, 0.5558, 0.5283, 0.5068, 0.4843, 0.4462, 0.4307, 0.4145,
41 |         0.3974
42 |     ]
43 | }
44 | 
45 | 
46 | def plot_curves(result_keys):
47 |     x = np.linspace(0, 90, LEN)
48 |     for k in result_keys:
49 |         y = result_dict[k]
50 |         plt.plot(x, y, label=k)
51 |     plt.xlabel('Epoch')
52 |     plt.ylabel('Training Cost')
53 |     plt.legend()
54 | 
55 |     plt.show()
56 | 
57 | 
58 | plot_curves(
59 |     ['batch_size_8', 'batch_size_64', 'batch_size_128', 'batch_size_2000'])
60 | plot_curves(['batch_size_64', 'Momentum_64', 'RMSProp_64', 'Adam_64'])
61 | plot_curves(['Adam_64', 'Adam_64_decay_0.2', 'Adam_64_decay_0.005'])
62 | 


--------------------------------------------------------------------------------
/dldemos/AdvancedOptimizer/main.py:
--------------------------------------------------------------------------------
 1 | from dldemos.AdvancedOptimizer.model import DeepNetwork, train
 2 | from dldemos.AdvancedOptimizer.optimizer import (Adam, GradientDescent,
 3 |                                                  Momentum, RMSProp,
 4 |                                                  get_hyperbola_func)
 5 | from dldemos.DeepNetwork.dataset import get_cat_set
 6 | 
 7 | 
 8 | def main():
 9 |     train_X, train_Y, dev_X, dev_Y = get_cat_set(
10 |         'dldemos/LogisticRegression/data/archive/dataset', train_size=1000)
11 |     n_x = train_X.shape[0]
12 | 
13 |     # train_X: [224*224*3, 2000]
14 |     model = DeepNetwork([n_x, 30, 20, 20, 1],
15 |                         ['relu', 'relu', 'relu', 'sigmoid'])
16 | 
17 |     # Please close the unused optimizers by comment marks
18 | 
19 |     optimizer = GradientDescent(model.save(), learning_rate=0.001)
20 |     optimizer = Momentum(model.save(), learning_rate=0.001, from_scratch=True)
21 |     optimizer = RMSProp(model.save(), learning_rate=0.00001, from_scratch=True)
22 |     optimizer = Adam(model.save(), learning_rate=0.00001, from_scratch=True)
23 | 
24 |     lr_scheduler_1 = get_hyperbola_func(0.2)
25 |     lr_scheduler_2 = get_hyperbola_func(0.005)
26 | 
27 |     optimizer = Adam(model.save(),
28 |                      learning_rate=0.00001,
29 |                      from_scratch=True,
30 |                      lr_scheduler=lr_scheduler_1)
31 | 
32 |     optimizer = Adam(model.save(),
33 |                      learning_rate=0.00001,
34 |                      from_scratch=True,
35 |                      lr_scheduler=lr_scheduler_2)
36 | 
37 |     train(model,
38 |           optimizer,
39 |           train_X,
40 |           train_Y,
41 |           100,
42 |           model_name='model_64',
43 |           save_dir='work_dirs',
44 |           recover_from=None,
45 |           batch_size=64,
46 |           print_interval=10,
47 |           dev_X=dev_X,
48 |           dev_Y=dev_Y,
49 |           plot_mini_batch=False)
50 | 
51 | 
52 | if __name__ == '__main__':
53 |     main()
54 | 


--------------------------------------------------------------------------------
/dldemos/AdvancedOptimizer/single_step.py:
--------------------------------------------------------------------------------
 1 | from dldemos.DeepNetwork.dataset import get_cat_set
 2 | from dldemos.DeepNetwork.model import DeepNetwork, train
 3 | 
 4 | 
 5 | def main():
 6 |     train_X, train_Y, test_X, test_Y = get_cat_set(
 7 |         'dldemos/LogisticRegression/data/archive/dataset', train_size=1500)
 8 |     n_x = train_X.shape[0]
 9 |     model = DeepNetwork([n_x, 30, 30, 20, 20, 1],
10 |                         ['relu', 'relu', 'relu', 'relu', 'sigmoid'])
11 |     train(model,
12 |           train_X,
13 |           train_Y,
14 |           1,
15 |           learning_rate=0.01,
16 |           print_interval=10,
17 |           test_X=test_X,
18 |           test_Y=test_Y)
19 | 
20 | 
21 | if __name__ == '__main__':
22 |     main()
23 | 


--------------------------------------------------------------------------------
/dldemos/BasicCNN/README.md:
--------------------------------------------------------------------------------
 1 | 1. Install the repository
 2 | 
 3 | ```shell
 4 | python setup.py develop
 5 | ```
 6 | 
 7 | 2. Download the dataset from https://www.kaggle.com/datasets/fusicfenta/cat-and-dog?resource=download and organize the directory as follows:
 8 | 
 9 | ```plain text
10 | └─data
11 |     └─archive
12 |         └─dataset
13 |             ├─single_prediction
14 |             ├─test_set
15 |             │  ├─cats
16 |             │  └─dogs
17 |             └─training_set
18 |                 ├─cats
19 |                 └─dogs
20 | ```
21 | 
22 | 3. Modify the path in "main" scripts:
23 | 
24 | ```Python
25 | train_X, train_Y, test_X, test_Y = get_cat_set(
26 |         'dldemos/LogisticRegression/data/archive/dataset', train_size=1500)
27 | ```
28 | 
29 | Replace 'dldemos/LogisticRegression/data/archive/dataset' with your path.
30 | 
31 | 4. Run `tf_main.py` or `pt_main.py`.
32 | 
33 | The NumPy implementation of convolution is in `np_conv` and `np_conv_backward`
34 | 


--------------------------------------------------------------------------------
/dldemos/BasicCNN/dataset.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from typing import Tuple
 3 | 
 4 | import cv2
 5 | import numpy as np
 6 | 
 7 | 
 8 | def load_set(data_path: str, cnt: int, img_shape: Tuple[int, int]):
 9 |     cat_dirs = sorted(os.listdir(os.path.join(data_path, 'cats')))
10 |     dog_dirs = sorted(os.listdir(os.path.join(data_path, 'dogs')))
11 |     images = []
12 |     for i, cat_dir in enumerate(cat_dirs):
13 |         if i >= cnt:
14 |             break
15 |         name = os.path.join(data_path, 'cats', cat_dir)
16 |         cat = cv2.imread(name)
17 |         images.append(cat)
18 | 
19 |     for i, dog_dir in enumerate(dog_dirs):
20 |         if i >= cnt:
21 |             break
22 |         name = os.path.join(data_path, 'dogs', dog_dir)
23 |         dog = cv2.imread(name)
24 |         images.append(dog)
25 | 
26 |     for i in range(len(images)):
27 |         images[i] = cv2.resize(images[i], img_shape)
28 |         images[i] = images[i].astype(np.float32) / 255.0
29 | 
30 |     return np.array(images)
31 | 
32 | 
33 | def get_cat_set(
34 |         data_root: str,
35 |         img_shape: Tuple[int, int] = (224, 224),
36 |         train_size=1000,
37 |         test_size=200,
38 |         format='nhwc'
39 | ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
40 | 
41 |     train_X = load_set(os.path.join(data_root, 'training_set'), train_size,
42 |                        img_shape)
43 |     test_X = load_set(os.path.join(data_root, 'test_set'), test_size,
44 |                       img_shape)
45 | 
46 |     train_Y = np.array([1] * train_size + [0] * train_size)
47 |     test_Y = np.array([1] * test_size + [0] * test_size)
48 | 
49 |     if format == 'nhwc':
50 |         return train_X, np.expand_dims(train_Y,
51 |                                        1), test_X, np.expand_dims(test_Y, 1)
52 |     elif format == 'nchw':
53 |         train_X = np.reshape(train_X, (-1, 3, *img_shape))
54 |         test_X = np.reshape(test_X, (-1, 3, *img_shape))
55 |         return train_X, np.expand_dims(train_Y,
56 |                                        1), test_X, np.expand_dims(test_Y, 1)
57 |     else:
58 |         raise NotImplementedError('Format must be "nhwc" or "nchw". ')
59 | 


--------------------------------------------------------------------------------
/dldemos/BasicCNN/np_conv.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pytest
  3 | import torch
  4 | 
  5 | 
  6 | def conv2d(input: np.ndarray,
  7 |            weight: np.ndarray,
  8 |            stride: int,
  9 |            padding: int,
 10 |            dilation: int,
 11 |            groups: int,
 12 |            bias: np.ndarray = None) -> np.ndarray:
 13 |     """2D Convolution Implemented with NumPy.
 14 | 
 15 |     Args:
 16 |         input (np.ndarray): The input NumPy array of shape (H, W, C).
 17 |         weight (np.ndarray): The weight NumPy array of shape
 18 |             (C', F, F, C / groups).
 19 |         stride (int): Stride for convolution.
 20 |         padding (int): The count of zeros to pad on both sides.
 21 |         dilation (int): The space between kernel elements.
 22 |         groups (int): Split the input to groups.
 23 |         bias (np.ndarray | None): The bias NumPy array of shape (C').
 24 |             Default: None.
 25 | 
 26 |     Outputs:
 27 |         np.ndarray: The output NumPy array of shape (H', W', C')
 28 |     """
 29 |     h_i, w_i, c_i = input.shape
 30 |     c_o, f, f_2, c_k = weight.shape
 31 | 
 32 |     assert (f == f_2)
 33 |     assert (c_i % groups == 0)
 34 |     assert (c_o % groups == 0)
 35 |     assert (c_i // groups == c_k)
 36 |     if bias is not None:
 37 |         assert (bias.shape[0] == c_o)
 38 | 
 39 |     f_new = f + (f - 1) * (dilation - 1)
 40 |     weight_new = np.zeros((c_o, f_new, f_new, c_k), dtype=weight.dtype)
 41 |     for i_c_o in range(c_o):
 42 |         for i_c_k in range(c_k):
 43 |             for i_f in range(f):
 44 |                 for j_f in range(f):
 45 |                     i_f_new = i_f * dilation
 46 |                     j_f_new = j_f * dilation
 47 |                     weight_new[i_c_o, i_f_new, j_f_new, i_c_k] = \
 48 |                         weight[i_c_o, i_f, j_f, i_c_k]
 49 | 
 50 |     input_pad = np.pad(input, [(padding, padding), (padding, padding), (0, 0)])
 51 | 
 52 |     def cal_new_sidelngth(sl, s, f, p):
 53 |         return (sl + 2 * p - f) // s + 1
 54 | 
 55 |     h_o = cal_new_sidelngth(h_i, stride, f_new, padding)
 56 |     w_o = cal_new_sidelngth(w_i, stride, f_new, padding)
 57 | 
 58 |     output = np.empty((h_o, w_o, c_o), dtype=input.dtype)
 59 | 
 60 |     c_o_per_group = c_o // groups
 61 | 
 62 |     for i_h in range(h_o):
 63 |         for i_w in range(w_o):
 64 |             for i_c in range(c_o):
 65 |                 i_g = i_c // c_o_per_group
 66 |                 h_lower = i_h * stride
 67 |                 h_upper = i_h * stride + f_new
 68 |                 w_lower = i_w * stride
 69 |                 w_upper = i_w * stride + f_new
 70 |                 c_lower = i_g * c_k
 71 |                 c_upper = (i_g + 1) * c_k
 72 |                 input_slice = input_pad[h_lower:h_upper, w_lower:w_upper,
 73 |                                         c_lower:c_upper]
 74 |                 kernel_slice = weight_new[i_c]
 75 |                 output[i_h, i_w, i_c] = np.sum(input_slice * kernel_slice)
 76 |                 if bias:
 77 |                     output[i_h, i_w, i_c] += bias[i_c]
 78 |     return output
 79 | 
 80 | 
 81 | @pytest.mark.parametrize('c_i, c_o', [(3, 6), (2, 2)])
 82 | @pytest.mark.parametrize('kernel_size', [3, 5])
 83 | @pytest.mark.parametrize('stride', [1, 2])
 84 | @pytest.mark.parametrize('padding', [0, 1])
 85 | @pytest.mark.parametrize('dilation', [1, 2])
 86 | @pytest.mark.parametrize('groups', ['1', 'all'])
 87 | @pytest.mark.parametrize('bias', [False])
 88 | def test_conv(c_i: int, c_o: int, kernel_size: int, stride: int, padding: str,
 89 |               dilation: int, groups: str, bias: bool):
 90 |     if groups == '1':
 91 |         groups = 1
 92 |     elif groups == 'all':
 93 |         groups = c_i
 94 | 
 95 |     if bias:
 96 |         bias = np.random.randn(c_o)
 97 |         torch_bias = torch.from_numpy(bias)
 98 |     else:
 99 |         bias = None
100 |         torch_bias = None
101 | 
102 |     input = np.random.randn(20, 20, c_i)
103 |     weight = np.random.randn(c_o, kernel_size, kernel_size, c_i // groups)
104 | 
105 |     torch_input = torch.from_numpy(np.transpose(input, (2, 0, 1))).unsqueeze(0)
106 |     torch_weight = torch.from_numpy(np.transpose(weight, (0, 3, 1, 2)))
107 |     torch_output = torch.conv2d(torch_input, torch_weight, torch_bias, stride,
108 |                                 padding, dilation, groups).numpy()
109 |     torch_output = np.transpose(torch_output.squeeze(0), (1, 2, 0))
110 | 
111 |     numpy_output = conv2d(input, weight, stride, padding, dilation, groups,
112 |                           bias)
113 | 
114 |     assert np.allclose(torch_output, numpy_output)
115 | 


--------------------------------------------------------------------------------
/dldemos/BasicCNN/np_conv_backward.py:
--------------------------------------------------------------------------------
  1 | from typing import Dict, Tuple
  2 | 
  3 | import numpy as np
  4 | import pytest
  5 | import torch
  6 | 
  7 | 
  8 | def conv2d_forward(input: np.ndarray, weight: np.ndarray, bias: np.ndarray,
  9 |                    stride: int, padding: int) -> Dict[str, np.ndarray]:
 10 |     """2D Convolution Forward Implemented with NumPy.
 11 | 
 12 |     Args:
 13 |         input (np.ndarray): The input NumPy array of shape (H, W, C).
 14 |         weight (np.ndarray): The weight NumPy array of shape
 15 |             (C', F, F, C).
 16 |         bias (np.ndarray | None): The bias NumPy array of shape (C').
 17 |             Default: None.
 18 |         stride (int): Stride for convolution.
 19 |         padding (int): The count of zeros to pad on both sides.
 20 | 
 21 |     Outputs:
 22 |         Dict[str, np.ndarray]: Cached data for backward prop.
 23 |     """
 24 |     h_i, w_i, c_i = input.shape
 25 |     c_o, f, f_2, c_k = weight.shape
 26 | 
 27 |     assert (f == f_2)
 28 |     assert (c_i == c_k)
 29 |     assert (bias.shape[0] == c_o)
 30 | 
 31 |     input_pad = np.pad(input, [(padding, padding), (padding, padding), (0, 0)])
 32 | 
 33 |     def cal_new_sidelngth(sl, s, f, p):
 34 |         return (sl + 2 * p - f) // s + 1
 35 | 
 36 |     h_o = cal_new_sidelngth(h_i, stride, f, padding)
 37 |     w_o = cal_new_sidelngth(w_i, stride, f, padding)
 38 | 
 39 |     output = np.empty((h_o, w_o, c_o), dtype=input.dtype)
 40 | 
 41 |     for i_h in range(h_o):
 42 |         for i_w in range(w_o):
 43 |             for i_c in range(c_o):
 44 |                 h_lower = i_h * stride
 45 |                 h_upper = i_h * stride + f
 46 |                 w_lower = i_w * stride
 47 |                 w_upper = i_w * stride + f
 48 |                 input_slice = input_pad[h_lower:h_upper, w_lower:w_upper, :]
 49 |                 kernel_slice = weight[i_c]
 50 |                 output[i_h, i_w, i_c] = np.sum(input_slice * kernel_slice)
 51 |                 output[i_h, i_w, i_c] += bias[i_c]
 52 | 
 53 |     cache = dict()
 54 |     cache['Z'] = output
 55 |     cache['W'] = weight
 56 |     cache['b'] = bias
 57 |     cache['A_prev'] = input
 58 |     return cache
 59 | 
 60 | 
 61 | def conv2d_backward(dZ: np.ndarray, cache: Dict[str, np.ndarray], stride: int,
 62 |                     padding: int) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
 63 |     """2D Convolution Backward Implemented with NumPy.
 64 | 
 65 |     Args:
 66 |         dZ: (np.ndarray): The derivative of the output of conv.
 67 |         cache (Dict[str, np.ndarray]): Record output 'Z', weight 'W', bias 'b'
 68 |             and input 'A_prev' of forward function.
 69 |         stride (int): Stride for convolution.
 70 |         padding (int): The count of zeros to pad on both sides.
 71 | 
 72 |     Outputs:
 73 |         Tuple[np.ndarray, np.ndarray, np.ndarray]: The derivative of W, b,
 74 |             A_prev.
 75 |     """
 76 |     W = cache['W']
 77 |     b = cache['b']
 78 |     A_prev = cache['A_prev']
 79 |     dW = np.zeros(W.shape)
 80 |     db = np.zeros(b.shape)
 81 |     dA_prev = np.zeros(A_prev.shape)
 82 | 
 83 |     _, _, c_i = A_prev.shape
 84 |     c_o, f, f_2, c_k = W.shape
 85 |     h_o, w_o, c_o_2 = dZ.shape
 86 | 
 87 |     assert (f == f_2)
 88 |     assert (c_i == c_k)
 89 |     assert (c_o == c_o_2)
 90 | 
 91 |     A_prev_pad = np.pad(A_prev, [(padding, padding), (padding, padding),
 92 |                                  (0, 0)])
 93 |     dA_prev_pad = np.pad(dA_prev, [(padding, padding), (padding, padding),
 94 |                                    (0, 0)])
 95 | 
 96 |     for i_h in range(h_o):
 97 |         for i_w in range(w_o):
 98 |             for i_c in range(c_o):
 99 |                 h_lower = i_h * stride
100 |                 h_upper = i_h * stride + f
101 |                 w_lower = i_w * stride
102 |                 w_upper = i_w * stride + f
103 | 
104 |                 input_slice = A_prev_pad[h_lower:h_upper, w_lower:w_upper, :]
105 |                 # forward
106 |                 # kernel_slice = W[i_c]
107 |                 # Z[i_h, i_w, i_c] = np.sum(input_slice * kernel_slice)
108 |                 # Z[i_h, i_w, i_c] += b[i_c]
109 | 
110 |                 # backward
111 |                 dW[i_c] += input_slice * dZ[i_h, i_w, i_c]
112 |                 dA_prev_pad[h_lower:h_upper,
113 |                             w_lower:w_upper, :] += W[i_c] * dZ[i_h, i_w, i_c]
114 |                 db[i_c] += dZ[i_h, i_w, i_c]
115 | 
116 |     if padding > 0:
117 |         dA_prev = dA_prev_pad[padding:-padding, padding:-padding, :]
118 |     else:
119 |         dA_prev = dA_prev_pad
120 |     return dW, db, dA_prev
121 | 
122 | 
123 | @pytest.mark.parametrize('c_i, c_o', [(3, 6), (2, 2)])
124 | @pytest.mark.parametrize('kernel_size', [3, 5])
125 | @pytest.mark.parametrize('stride', [1, 2])
126 | @pytest.mark.parametrize('padding', [0, 1])
127 | def test_conv(c_i: int, c_o: int, kernel_size: int, stride: int, padding: str):
128 | 
129 |     # Preprocess
130 |     input = np.random.randn(20, 20, c_i)
131 |     weight = np.random.randn(c_o, kernel_size, kernel_size, c_i)
132 |     bias = np.random.randn(c_o)
133 | 
134 |     torch_input = torch.from_numpy(np.transpose(
135 |         input, (2, 0, 1))).unsqueeze(0).requires_grad_()
136 |     torch_weight = torch.from_numpy(np.transpose(
137 |         weight, (0, 3, 1, 2))).requires_grad_()
138 |     torch_bias = torch.from_numpy(bias).requires_grad_()
139 | 
140 |     # forward
141 |     torch_output_tensor = torch.conv2d(torch_input, torch_weight, torch_bias,
142 |                                        stride, padding)
143 |     torch_output = np.transpose(
144 |         torch_output_tensor.detach().numpy().squeeze(0), (1, 2, 0))
145 | 
146 |     cache = conv2d_forward(input, weight, bias, stride, padding)
147 |     numpy_output = cache['Z']
148 | 
149 |     assert np.allclose(torch_output, numpy_output)
150 | 
151 |     # backward
152 |     torch_sum = torch.sum(torch_output_tensor)
153 |     torch_sum.backward()
154 |     torch_dW = np.transpose(torch_weight.grad.numpy(), (0, 2, 3, 1))
155 |     torch_db = torch_bias.grad.numpy()
156 |     torch_dA_prev = np.transpose(torch_input.grad.numpy().squeeze(0),
157 |                                  (1, 2, 0))
158 | 
159 |     dZ = np.ones(numpy_output.shape)
160 |     dW, db, dA_prev = conv2d_backward(dZ, cache, stride, padding)
161 | 
162 |     assert np.allclose(dW, torch_dW)
163 |     assert np.allclose(db, torch_db)
164 |     assert np.allclose(dA_prev, torch_dA_prev)
165 | 


--------------------------------------------------------------------------------
/dldemos/BasicCNN/pt_main.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | 
  3 | import numpy as np
  4 | import torch
  5 | import torch.nn as nn
  6 | 
  7 | from dldemos.BasicCNN.dataset import get_cat_set
  8 | 
  9 | 
 10 | def init_model(device='cpu'):
 11 |     model = nn.Sequential(nn.Conv2d(3, 16, 11, 3), nn.BatchNorm2d(16),
 12 |                           nn.ReLU(True), nn.MaxPool2d(2, 2),
 13 |                           nn.Conv2d(16, 32, 5), nn.BatchNorm2d(32),
 14 |                           nn.ReLU(True), nn.MaxPool2d(2, 2),
 15 |                           nn.Conv2d(32, 64, 3, padding=1), nn.BatchNorm2d(64),
 16 |                           nn.ReLU(True), nn.Conv2d(64, 64, 3),
 17 |                           nn.BatchNorm2d(64), nn.ReLU(True),
 18 |                           nn.MaxPool2d(2, 2), nn.Flatten(),
 19 |                           nn.Linear(3136, 2048), nn.ReLU(True),
 20 |                           nn.Linear(2048, 1), nn.Sigmoid()).to(device)
 21 | 
 22 |     def weights_init(m):
 23 |         if isinstance(m, nn.Conv2d):
 24 |             torch.nn.init.xavier_normal_(m.weight)
 25 |             m.bias.data.fill_(0)
 26 |         elif isinstance(m, nn.BatchNorm2d):
 27 |             m.weight.data.normal_(1.0, 0.02)
 28 |             m.bias.data.fill_(0)
 29 |         elif isinstance(m, nn.Linear):
 30 |             torch.nn.init.xavier_normal_(m.weight)
 31 |             m.bias.data.fill_(0)
 32 | 
 33 |     model.apply(weights_init)
 34 | 
 35 |     print(model)
 36 |     return model
 37 | 
 38 | 
 39 | def train(model: nn.Module,
 40 |           train_X: np.ndarray,
 41 |           train_Y: np.ndarray,
 42 |           optimizer: torch.optim.Optimizer,
 43 |           loss_fn: nn.Module,
 44 |           batch_size: int,
 45 |           num_epoch: int,
 46 |           device: str = 'cpu'):
 47 |     m = train_X.shape[0]
 48 |     indices = np.random.permutation(m)
 49 |     shuffle_X = train_X[indices, ...]
 50 |     shuffle_Y = train_Y[indices, ...]
 51 |     num_mini_batch = math.ceil(m / batch_size)
 52 |     mini_batch_XYs = []
 53 |     for i in range(num_mini_batch):
 54 |         if i == num_mini_batch - 1:
 55 |             mini_batch_X = shuffle_X[i * batch_size:, ...]
 56 |             mini_batch_Y = shuffle_Y[i * batch_size:, ...]
 57 |         else:
 58 |             mini_batch_X = shuffle_X[i * batch_size:(i + 1) * batch_size, ...]
 59 |             mini_batch_Y = shuffle_Y[i * batch_size:(i + 1) * batch_size, ...]
 60 |         mini_batch_X = torch.from_numpy(mini_batch_X)
 61 |         mini_batch_Y = torch.from_numpy(mini_batch_Y).float()
 62 |         mini_batch_XYs.append((mini_batch_X, mini_batch_Y))
 63 |     print(f'Num mini-batch: {num_mini_batch}')
 64 | 
 65 |     for e in range(num_epoch):
 66 |         for mini_batch_X, mini_batch_Y in mini_batch_XYs:
 67 |             mini_batch_X = mini_batch_X.to(device)
 68 |             mini_batch_Y = mini_batch_Y.to(device)
 69 |             mini_batch_Y_hat = model(mini_batch_X)
 70 |             loss: torch.Tensor = loss_fn(mini_batch_Y_hat, mini_batch_Y)
 71 | 
 72 |             optimizer.zero_grad()
 73 |             loss.backward()
 74 |             optimizer.step()
 75 | 
 76 |         print(f'Epoch {e}. loss: {loss}')
 77 | 
 78 | 
 79 | def evaluate(model: nn.Module,
 80 |              test_X: np.ndarray,
 81 |              test_Y: np.ndarray,
 82 |              device='cpu'):
 83 |     test_X = torch.from_numpy(test_X).to(device)
 84 |     test_Y = torch.from_numpy(test_Y).to(device)
 85 |     test_Y_hat = model(test_X)
 86 |     predicts = torch.where(test_Y_hat > 0.5, 1, 0)
 87 |     score = torch.where(predicts == test_Y, 1.0, 0.0)
 88 |     acc = torch.mean(score)
 89 |     print(f'Accuracy: {acc}')
 90 | 
 91 | 
 92 | def main():
 93 |     train_X, train_Y, test_X, test_Y = get_cat_set(
 94 |         'dldemos/LogisticRegression/data/archive/dataset',
 95 |         train_size=1500,
 96 |         format='nchw')
 97 |     print(train_X.shape)  # (m, 3, 224, 224)
 98 |     print(train_Y.shape)  # (m, 1)
 99 | 
100 |     device = 'cuda:0'
101 |     num_epoch = 20
102 |     batch_size = 16
103 |     model = init_model(device)
104 |     optimizer = torch.optim.Adam(model.parameters(), 5e-4)
105 |     loss_fn = torch.nn.BCELoss()
106 |     train(model, train_X, train_Y, optimizer, loss_fn, batch_size, num_epoch,
107 |           device)
108 |     evaluate(model, test_X, test_Y, device)
109 | 
110 | 
111 | if __name__ == '__main__':
112 |     main()
113 | 


--------------------------------------------------------------------------------
/dldemos/BasicCNN/tf_main.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | from dldemos.BasicCNN.dataset import get_cat_set
 4 | 
 5 | 
 6 | def init_model(input_shape=(224, 224, 3)):
 7 |     model = tf.keras.Sequential([
 8 |         tf.keras.layers.Conv2D(16, 11, (3, 3), input_shape=input_shape),
 9 |         tf.keras.layers.BatchNormalization(3),
10 |         tf.keras.layers.ReLU(),
11 |         tf.keras.layers.MaxPool2D(),
12 |         tf.keras.layers.Conv2D(32, 5),
13 |         tf.keras.layers.BatchNormalization(3),
14 |         tf.keras.layers.ReLU(),
15 |         tf.keras.layers.MaxPool2D(),
16 |         tf.keras.layers.Conv2D(64, 3, padding='same'),
17 |         tf.keras.layers.BatchNormalization(3),
18 |         tf.keras.layers.ReLU(),
19 |         tf.keras.layers.Conv2D(64, 3),
20 |         tf.keras.layers.BatchNormalization(3),
21 |         tf.keras.layers.ReLU(),
22 |         tf.keras.layers.MaxPool2D(),
23 |         tf.keras.layers.Flatten(),
24 |         tf.keras.layers.Dense(2048, 'relu'),
25 |         tf.keras.layers.Dense(1, 'sigmoid')
26 |     ])
27 | 
28 |     model.summary()
29 | 
30 |     return model
31 | 
32 | 
33 | def main():
34 |     train_X, train_Y, test_X, test_Y = get_cat_set(
35 |         'dldemos/LogisticRegression/data/archive/dataset', train_size=1500)
36 |     print(train_X.shape)  # (m, 224, 224, 3)
37 |     print(train_Y.shape)  # (m, 1)
38 | 
39 |     model = init_model()
40 |     model.compile(optimizer='adam',
41 |                   loss='binary_crossentropy',
42 |                   metrics=['accuracy'])
43 | 
44 |     model.fit(train_X, train_Y, epochs=20, batch_size=16)
45 |     model.evaluate(test_X, test_Y)
46 | 
47 | 
48 | if __name__ == '__main__':
49 |     main()
50 | 


--------------------------------------------------------------------------------
/dldemos/BasicRNN/README.md:
--------------------------------------------------------------------------------
 1 | 1. Download [IMDb dataset](https://ai.stanford.edu/~amaas/data/sentiment/).
 2 | 
 3 | 2. Modify the directory in `read_imdb`.
 4 | 
 5 | 3. Run `main.py` to train and test the language model. You can:
 6 | 
 7 | - Use `rnn1` or `rnn2`
 8 | - Switch the dataset by modifying `is_vocab` parameter of `get_dataloader_and_max_length`
 9 | - Tune the hyperparameters
10 | 
11 | to do more experiments.
12 | 


--------------------------------------------------------------------------------
/dldemos/BasicRNN/constant.py:
--------------------------------------------------------------------------------
1 | EMBEDDING_LENGTH = 27
2 | LETTER_MAP = {' ': 0}
3 | ENCODING_MAP = [' ']
4 | for i in range(26):
5 |     LETTER_MAP[chr(ord('a') + i)] = i + 1
6 |     ENCODING_MAP.append(chr(ord('a') + i))
7 | LETTER_LIST = list(LETTER_MAP.keys())
8 | 


--------------------------------------------------------------------------------
/dldemos/BasicRNN/models.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | import torch.nn as nn
  4 | import torch.nn.functional as F
  5 | 
  6 | from dldemos.BasicRNN.constant import EMBEDDING_LENGTH, LETTER_LIST, LETTER_MAP
  7 | 
  8 | 
  9 | class RNN1(nn.Module):
 10 | 
 11 |     def __init__(self, hidden_units=32):
 12 |         super().__init__()
 13 |         self.hidden_units = hidden_units
 14 |         self.linear_a = nn.Linear(hidden_units + EMBEDDING_LENGTH,
 15 |                                   hidden_units)
 16 |         self.linear_y = nn.Linear(hidden_units, EMBEDDING_LENGTH)
 17 |         self.tanh = nn.Tanh()
 18 | 
 19 |     def forward(self, word: torch.Tensor):
 20 |         # word shape: [batch, max_word_length, embedding_length]
 21 |         batch, Tx = word.shape[0:2]
 22 | 
 23 |         # word shape: [max_word_length, batch,  embedding_length]
 24 |         word = torch.transpose(word, 0, 1)
 25 | 
 26 |         # output shape: [max_word_length, batch,  embedding_length]
 27 |         output = torch.empty_like(word)
 28 | 
 29 |         a = torch.zeros(batch, self.hidden_units, device=word.device)
 30 |         x = torch.zeros(batch, EMBEDDING_LENGTH, device=word.device)
 31 |         for i in range(Tx):
 32 |             next_a = self.tanh(self.linear_a(torch.cat((a, x), 1)))
 33 |             hat_y = self.linear_y(next_a)
 34 |             output[i] = hat_y
 35 |             x = word[i]
 36 |             a = next_a
 37 | 
 38 |         # output shape: [batch, max_word_length, embedding_length]
 39 |         return torch.transpose(output, 0, 1)
 40 | 
 41 |     @torch.no_grad()
 42 |     def language_model(self, word: torch.Tensor):
 43 |         # word shape: [batch, max_word_length, embedding_length]
 44 |         batch, Tx = word.shape[0:2]
 45 | 
 46 |         # word shape: [max_word_length, batch,  embedding_length]
 47 |         # word_label shape: [max_word_length, batch]
 48 |         word = torch.transpose(word, 0, 1)
 49 |         word_label = torch.argmax(word, 2)
 50 | 
 51 |         # output shape: [batch]
 52 |         output = torch.ones(batch, device=word.device)
 53 | 
 54 |         a = torch.zeros(batch, self.hidden_units, device=word.device)
 55 |         x = torch.zeros(batch, EMBEDDING_LENGTH, device=word.device)
 56 |         for i in range(Tx):
 57 |             next_a = self.tanh(self.linear_a(torch.cat((a, x), 1)))
 58 |             tmp = self.linear_y(next_a)
 59 |             hat_y = F.softmax(tmp, 1)
 60 |             probs = hat_y[torch.arange(batch), word_label[i]]
 61 |             output *= probs
 62 |             x = word[i]
 63 |             a = next_a
 64 | 
 65 |         return output
 66 | 
 67 |     @torch.no_grad()
 68 |     def sample_word(self, device='cuda:0'):
 69 |         batch = 1
 70 |         output = ''
 71 | 
 72 |         a = torch.zeros(batch, self.hidden_units, device=device)
 73 |         x = torch.zeros(batch, EMBEDDING_LENGTH, device=device)
 74 |         for i in range(10):
 75 |             next_a = self.tanh(self.linear_a(torch.cat((a, x), 1)))
 76 |             tmp = self.linear_y(next_a)
 77 |             hat_y = F.softmax(tmp, 1)
 78 | 
 79 |             np_prob = hat_y[0].detach().cpu().numpy()
 80 |             letter = np.random.choice(LETTER_LIST, p=np_prob)
 81 |             output += letter
 82 | 
 83 |             if letter == ' ':
 84 |                 break
 85 | 
 86 |             x = torch.zeros(batch, EMBEDDING_LENGTH, device=device)
 87 |             x[0][LETTER_MAP[letter]] = 1
 88 |             a = next_a
 89 | 
 90 |         return output
 91 | 
 92 | 
 93 | class RNN2(torch.nn.Module):
 94 | 
 95 |     def __init__(self, hidden_units=64, embeding_dim=64, dropout_rate=0.2):
 96 |         super().__init__()
 97 |         self.drop = nn.Dropout(dropout_rate)
 98 |         self.encoder = nn.Embedding(EMBEDDING_LENGTH, embeding_dim)
 99 |         self.rnn = nn.GRU(embeding_dim, hidden_units, 1, batch_first=True)
100 |         self.decoder = torch.nn.Linear(hidden_units, EMBEDDING_LENGTH)
101 |         self.hidden_units = hidden_units
102 | 
103 |         self.init_weights()
104 | 
105 |     def init_weights(self):
106 |         initrange = 0.1
107 |         nn.init.uniform_(self.encoder.weight, -initrange, initrange)
108 |         nn.init.zeros_(self.decoder.bias)
109 |         nn.init.uniform_(self.decoder.weight, -initrange, initrange)
110 | 
111 |     def forward(self, word: torch.Tensor):
112 |         # word shape: [batch, max_word_length]
113 |         batch, Tx = word.shape[0:2]
114 |         first_letter = word.new_zeros(batch, 1)
115 |         x = torch.cat((first_letter, word[:, 0:-1]), 1)
116 |         hidden = torch.zeros(1, batch, self.hidden_units, device=word.device)
117 |         emb = self.drop(self.encoder(x))
118 |         output, hidden = self.rnn(emb, hidden)
119 |         y = self.decoder(output.reshape(batch * Tx, -1))
120 | 
121 |         return y.reshape(batch, Tx, -1)
122 | 
123 |     @torch.no_grad()
124 |     def language_model(self, word: torch.Tensor):
125 |         batch, Tx = word.shape[0:2]
126 |         hat_y = self.forward(word)
127 |         hat_y = F.softmax(hat_y, 2)
128 |         output = torch.ones(batch, device=word.device)
129 |         for i in range(Tx):
130 |             probs = hat_y[torch.arange(batch), i, word[:, i]]
131 |             output *= probs
132 | 
133 |         return output
134 | 
135 |     @torch.no_grad()
136 |     def sample_word(self, device='cuda:0'):
137 |         batch = 1
138 |         output = ''
139 | 
140 |         hidden = torch.zeros(1, batch, self.hidden_units, device=device)
141 |         x = torch.zeros(batch, 1, device=device, dtype=torch.long)
142 |         for _ in range(10):
143 |             emb = self.drop(self.encoder(x))
144 |             rnn_output, hidden = self.rnn(emb, hidden)
145 |             hat_y = self.decoder(rnn_output)
146 |             hat_y = F.softmax(hat_y, 2)
147 | 
148 |             np_prob = hat_y[0, 0].detach().cpu().numpy()
149 |             letter = np.random.choice(LETTER_LIST, p=np_prob)
150 |             output += letter
151 | 
152 |             if letter == ' ':
153 |                 break
154 | 
155 |             x = torch.zeros(batch, 1, device=device, dtype=torch.long)
156 |             x[0] = LETTER_MAP[letter]
157 | 
158 |         return output
159 | 


--------------------------------------------------------------------------------
/dldemos/BasicRNN/read_imdb.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import re
 3 | 
 4 | 
 5 | def read_imdb(dir='data/aclImdb', split='pos', is_train=True):
 6 |     subdir = 'train' if is_train else 'test'
 7 |     dir = os.path.join(dir, subdir, split)
 8 |     lines = []
 9 |     for file in os.listdir(dir):
10 |         with open(os.path.join(dir, file), 'rb') as f:
11 |             line = f.read().decode('utf-8')
12 |             lines.append(line)
13 |     return lines
14 | 
15 | 
16 | def read_imdb_words(dir='data/aclImdb',
17 |                     split='pos',
18 |                     is_train=True,
19 |                     n_files=1000):
20 |     subdir = 'train' if is_train else 'test'
21 |     dir = os.path.join(dir, subdir, split)
22 |     all_str = ''
23 |     for file in os.listdir(dir):
24 |         if n_files <= 0:
25 |             break
26 |         with open(os.path.join(dir, file), 'rb') as f:
27 |             line = f.read().decode('utf-8')
28 |             all_str += line
29 |         n_files -= 1
30 | 
31 |     words = re.sub(u'([^\u0020\u0061-\u007a])', '', all_str.lower()).split(' ')
32 | 
33 |     return words
34 | 
35 | 
36 | def read_imdb_vocab(dir='data/aclImdb'):
37 |     fn = os.path.join(dir, 'imdb.vocab')
38 |     with open(fn, 'rb') as f:
39 |         word = f.read().decode('utf-8').replace('\n', ' ')
40 |         words = re.sub(u'([^\u0020\u0061-\u007a])', '',
41 |                        word.lower()).split(' ')
42 |         filtered_words = [w for w in words if len(w) > 0]
43 | 
44 |     return filtered_words
45 | 
46 | 
47 | def main():
48 |     vocab = read_imdb_vocab()
49 |     print(vocab[0])
50 |     print(vocab[1])
51 | 
52 |     lines = read_imdb()
53 |     print('Length of the file:', len(lines))
54 |     print('lines[0]:', lines[0])
55 |     words = read_imdb_words(n_files=100)
56 |     print('Length of the words:', len(words))
57 |     for i in range(5):
58 |         print(words[i])
59 | 
60 | 
61 | if __name__ == '__main__':
62 |     main()
63 | 


--------------------------------------------------------------------------------
/dldemos/DeepNetwork/README.md:
--------------------------------------------------------------------------------
 1 | 1. Install the repository
 2 | 
 3 | ```shell
 4 | python setup.py develop
 5 | ```
 6 | 
 7 | 2. Download the dataset from https://www.kaggle.com/datasets/fusicfenta/cat-and-dog?resource=download and organize the directory as follows:
 8 | 
 9 | ```plain text
10 | └─data
11 |     └─archive
12 |         └─dataset
13 |             ├─single_prediction
14 |             ├─test_set
15 |             │  ├─cats
16 |             │  └─dogs
17 |             └─training_set
18 |                 ├─cats
19 |                 └─dogs
20 | ```
21 | 
22 | 3. Modify the path in `main.py`:
23 | 
24 | ```Python
25 | train_X, train_Y, test_X, test_Y = get_cat_set(
26 |         'dldemos/LogisticRegression/data/archive/dataset', train_size=1500)
27 | ```
28 | 
29 | Replace 'dldemos/LogisticRegression/data/archive/dataset' with your path.
30 | 
31 | 4. Run `main.py`. (You can open and close `save()` and `load()`  using comment)
32 | 
33 | You can edit the model hyper-parameters and see what will happen.
34 | 


--------------------------------------------------------------------------------
/dldemos/DeepNetwork/dataset.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from typing import Tuple
 3 | 
 4 | import cv2
 5 | import numpy as np
 6 | 
 7 | 
 8 | def load_set(data_path: str, cnt: int, img_shape):
 9 |     cat_dirs = sorted(os.listdir(os.path.join(data_path, 'cats')))
10 |     dog_dirs = sorted(os.listdir(os.path.join(data_path, 'dogs')))
11 |     images = []
12 |     for i, cat_dir in enumerate(cat_dirs):
13 |         if i >= cnt:
14 |             break
15 |         name = os.path.join(data_path, 'cats', cat_dir)
16 |         cat = cv2.imread(name)
17 |         images.append(cat)
18 | 
19 |     for i, dog_dir in enumerate(dog_dirs):
20 |         if i >= cnt:
21 |             break
22 |         name = os.path.join(data_path, 'dogs', dog_dir)
23 |         dog = cv2.imread(name)
24 |         images.append(dog)
25 | 
26 |     for i in range(len(images)):
27 |         images[i] = cv2.resize(images[i], img_shape)
28 |         images[i] = np.reshape(images[i], (-1))
29 |         images[i] = images[i].astype(np.float32) / 255.0
30 | 
31 |     return np.array(images)
32 | 
33 | 
34 | def get_cat_set(
35 |     data_root: str,
36 |     img_shape: Tuple[int, int] = (224, 224),
37 |     train_size=1000,
38 |     test_size=200,
39 | ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
40 | 
41 |     train_X = load_set(os.path.join(data_root, 'training_set'), train_size,
42 |                        img_shape)
43 |     test_X = load_set(os.path.join(data_root, 'test_set'), test_size,
44 |                       img_shape)
45 | 
46 |     train_Y = np.array([1] * train_size + [0] * train_size)
47 |     test_Y = np.array([1] * test_size + [0] * test_size)
48 | 
49 |     return train_X.T, np.expand_dims(train_Y,
50 |                                      0), test_X.T, np.expand_dims(test_Y.T, 0)
51 | 


--------------------------------------------------------------------------------
/dldemos/DeepNetwork/main.py:
--------------------------------------------------------------------------------
 1 | from dldemos.DeepNetwork.dataset import get_cat_set
 2 | from dldemos.DeepNetwork.model import DeepNetwork, train
 3 | 
 4 | 
 5 | def main():
 6 |     train_X, train_Y, test_X, test_Y = get_cat_set(
 7 |         'dldemos/LogisticRegression/data/archive/dataset', train_size=1500)
 8 |     n_x = train_X.shape[0]
 9 |     model = DeepNetwork([n_x, 30, 30, 20, 20, 1],
10 |                         ['relu', 'relu', 'relu', 'relu', 'sigmoid'])
11 |     model.load('work_dirs/model.npz')
12 |     train(model,
13 |           train_X,
14 |           train_Y,
15 |           500,
16 |           learning_rate=0.01,
17 |           print_interval=10,
18 |           test_X=test_X,
19 |           test_Y=test_Y)
20 |     model.save('work_dirs/model.npz')
21 | 
22 | 
23 | if __name__ == '__main__':
24 |     main()
25 | 


--------------------------------------------------------------------------------
/dldemos/DeepNetwork/model.py:
--------------------------------------------------------------------------------
  1 | import abc
  2 | from typing import List
  3 | 
  4 | import numpy as np
  5 | 
  6 | from dldemos.utils import get_activation_de_func, get_activation_func
  7 | 
  8 | 
  9 | class BaseRegressionModel(metaclass=abc.ABCMeta):
 10 | 
 11 |     def __init__(self):
 12 |         pass
 13 | 
 14 |     @abc.abstractmethod
 15 |     def forward(self, X: np.ndarray, train_mode=True) -> np.ndarray:
 16 |         pass
 17 | 
 18 |     @abc.abstractmethod
 19 |     def backward(self, Y: np.ndarray) -> np.ndarray:
 20 |         pass
 21 | 
 22 |     @abc.abstractmethod
 23 |     def gradient_descent(self, learning_rate: float) -> np.ndarray:
 24 |         pass
 25 | 
 26 |     @abc.abstractmethod
 27 |     def save(self, filename: str):
 28 |         pass
 29 | 
 30 |     @abc.abstractmethod
 31 |     def load(self, filename: str):
 32 |         pass
 33 | 
 34 |     def loss(self, Y: np.ndarray, Y_hat: np.ndarray) -> np.ndarray:
 35 |         return np.mean(-(Y * np.log(Y_hat) + (1 - Y) * np.log(1 - Y_hat)))
 36 | 
 37 |     def evaluate(self, X: np.ndarray, Y: np.ndarray, return_loss=False):
 38 |         Y_hat = self.forward(X, train_mode=False)
 39 |         Y_hat_predict = np.where(Y_hat > 0.5, 1, 0)
 40 |         accuracy = np.mean(np.where(Y_hat_predict == Y, 1, 0))
 41 |         if return_loss:
 42 |             loss = self.loss(Y, Y_hat)
 43 |             return accuracy, loss
 44 |         else:
 45 |             return accuracy
 46 | 
 47 | 
 48 | class DeepNetwork(BaseRegressionModel):
 49 | 
 50 |     def __init__(self, neuron_cnt: List[int], activation_func: List[str]):
 51 |         assert len(neuron_cnt) - 1 == len(activation_func)
 52 |         self.num_layer = len(neuron_cnt) - 1
 53 |         self.neuron_cnt = neuron_cnt
 54 |         self.activation_func = activation_func
 55 |         self.W: List[np.ndarray] = []
 56 |         self.b: List[np.ndarray] = []
 57 |         for i in range(self.num_layer):
 58 |             self.W.append(
 59 |                 np.random.randn(neuron_cnt[i + 1], neuron_cnt[i]) * 0.2)
 60 |             self.b.append(np.zeros((neuron_cnt[i + 1], 1)))
 61 | 
 62 |         self.Z_cache = [None] * self.num_layer
 63 |         self.A_cache = [None] * (self.num_layer + 1)
 64 |         self.dW_cache = [None] * self.num_layer
 65 |         self.db_cache = [None] * self.num_layer
 66 | 
 67 |     def forward(self, X, train_mode=True):
 68 |         if train_mode:
 69 |             self.m = X.shape[1]
 70 |         A = X
 71 |         self.A_cache[0] = A
 72 |         for i in range(self.num_layer):
 73 |             Z = np.dot(self.W[i], A) + self.b[i]
 74 |             A = get_activation_func(self.activation_func[i])(Z)
 75 |             if train_mode:
 76 |                 self.Z_cache[i] = Z
 77 |                 self.A_cache[i + 1] = A
 78 |         return A
 79 | 
 80 |     def backward(self, Y):
 81 |         dA = -Y / self.A_cache[-1] + (1 - Y) / (1 - self.A_cache[-1])
 82 |         assert (self.m == Y.shape[1])
 83 | 
 84 |         for i in range(self.num_layer - 1, -1, -1):
 85 |             dZ = dA * get_activation_de_func(self.activation_func[i])(
 86 |                 self.Z_cache[i])
 87 |             dW = np.dot(dZ, self.A_cache[i].T) / self.m
 88 |             db = np.mean(dZ, axis=1, keepdims=True)
 89 |             dA = np.dot(self.W[i].T, dZ)
 90 |             self.dW_cache[i] = dW
 91 |             self.db_cache[i] = db
 92 | 
 93 |     def gradient_descent(self, learning_rate):
 94 |         for i in range(self.num_layer):
 95 |             self.W[i] -= learning_rate * self.dW_cache[i]
 96 |             self.b[i] -= learning_rate * self.db_cache[i]
 97 | 
 98 |     def save(self, filename: str):
 99 |         save_dict = {}
100 |         for i in range(len(self.W)):
101 |             save_dict['W' + str(i)] = self.W[i]
102 |         for i in range(len(self.b)):
103 |             save_dict['b' + str(i)] = self.b[i]
104 |         np.savez(filename, **save_dict)
105 | 
106 |     def load(self, filename: str):
107 |         params = np.load(filename)
108 |         for i in range(len(self.W)):
109 |             self.W[i] = params['W' + str(i)]
110 |         for i in range(len(self.b)):
111 |             self.b[i] = params['b' + str(i)]
112 | 
113 | 
114 | def train(model: BaseRegressionModel,
115 |           X,
116 |           Y,
117 |           step,
118 |           learning_rate,
119 |           print_interval=100,
120 |           test_X=None,
121 |           test_Y=None):
122 |     for s in range(step):
123 |         Y_hat = model.forward(X)
124 |         model.backward(Y)
125 |         model.gradient_descent(learning_rate)
126 |         if s % print_interval == 0:
127 |             loss = model.loss(Y, Y_hat)
128 |             print(f'Step: {s}')
129 |             print(f'Train loss: {loss}')
130 |             if test_X is not None and test_Y is not None:
131 |                 accuracy, loss = model.evaluate(test_X,
132 |                                                 test_Y,
133 |                                                 return_loss=True)
134 |                 print(f'Test loss: {loss}')
135 |                 print(f'Test accuracy: {accuracy}')
136 | 


--------------------------------------------------------------------------------
/dldemos/FourierFeature/kernel_regression.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "%matplotlib inline\n",
 10 |     "\n",
 11 |     "import numpy as np\n",
 12 |     "import matplotlib.pyplot as plt"
 13 |    ]
 14 |   },
 15 |   {
 16 |    "cell_type": "code",
 17 |    "execution_count": null,
 18 |    "metadata": {},
 19 |    "outputs": [],
 20 |    "source": [
 21 |     "def func(x):\n",
 22 |     "    return np.sin(np.pi * x) * (1 - x)\n",
 23 |     "\n",
 24 |     "xs = np.linspace(-1, 1, 100)\n",
 25 |     "ys = func(xs)\n",
 26 |     "plt.plot(xs, ys)\n",
 27 |     "plt.show()"
 28 |    ]
 29 |   },
 30 |   {
 31 |    "cell_type": "code",
 32 |    "execution_count": null,
 33 |    "metadata": {},
 34 |    "outputs": [],
 35 |    "source": [
 36 |     "sample_x = np.linspace(-1, 1, 10)\n",
 37 |     "sample_y = func(sample_x)\n",
 38 |     "plt.scatter(sample_x, sample_y)\n",
 39 |     "plt.show()"
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "code",
 44 |    "execution_count": null,
 45 |    "metadata": {},
 46 |    "outputs": [],
 47 |    "source": [
 48 |     "def kernel_func(x_ref, x_input, sigma=1):\n",
 49 |     "    return np.exp(-(x_input-x_ref)**2 / (2 * sigma**2))\n",
 50 |     "\n",
 51 |     "xs = np.linspace(-1, 1, 100)\n",
 52 |     "ys = kernel_func(0, xs)\n",
 53 |     "plt.plot(xs, ys)\n",
 54 |     "plt.show()"
 55 |    ]
 56 |   },
 57 |   {
 58 |    "cell_type": "code",
 59 |    "execution_count": null,
 60 |    "metadata": {},
 61 |    "outputs": [],
 62 |    "source": [
 63 |     "def kernel_regression(xs, ys, x_input, sigma=1):\n",
 64 |     "    # xs: [n, ]\n",
 65 |     "    # ys: [n, ]\n",
 66 |     "    # x_input: [m, ]\n",
 67 |     "    N = xs.shape[0]\n",
 68 |     "    xs = np.expand_dims(xs, 1)\n",
 69 |     "    ys = np.expand_dims(ys, 1)\n",
 70 |     "    x_input = np.expand_dims(x_input, 0)\n",
 71 |     "    x_input = np.repeat(x_input, N, 0)\n",
 72 |     "    weight = kernel_func(xs, x_input, sigma) # [n, m]\n",
 73 |     "    weight_sum = np.sum(weight, 0)\n",
 74 |     "    weight_dot = weight.T @ ys\n",
 75 |     "    weight_dot = np.squeeze(weight_dot, 1)\n",
 76 |     "    res = weight_dot / weight_sum\n",
 77 |     "    return res\n",
 78 |     "\n",
 79 |     "sigma = 0.5\n",
 80 |     "xs = np.linspace(-1, 1, 100)\n",
 81 |     "ys = kernel_regression(sample_x, sample_y, xs, sigma)\n",
 82 |     "plt.title(f'sigma = {sigma}')\n",
 83 |     "plt.plot(xs, ys)\n",
 84 |     "plt.show()\n"
 85 |    ]
 86 |   }
 87 |  ],
 88 |  "metadata": {
 89 |   "kernelspec": {
 90 |    "display_name": "pt",
 91 |    "language": "python",
 92 |    "name": "python3"
 93 |   },
 94 |   "language_info": {
 95 |    "codemirror_mode": {
 96 |     "name": "ipython",
 97 |     "version": 3
 98 |    },
 99 |    "file_extension": ".py",
100 |    "mimetype": "text/x-python",
101 |    "name": "python",
102 |    "nbconvert_exporter": "python",
103 |    "pygments_lexer": "ipython3",
104 |    "version": "3.7.13"
105 |   }
106 |  },
107 |  "nbformat": 4,
108 |  "nbformat_minor": 2
109 | }
110 | 


--------------------------------------------------------------------------------
/dldemos/FourierFeature/misuzu.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SingleZombie/DL-Demos/739a21ff90f411c318e098823581afb3f8a1d010/dldemos/FourierFeature/misuzu.png


--------------------------------------------------------------------------------
/dldemos/Initialization/README.md:
--------------------------------------------------------------------------------
1 | Run the `main.py`:
2 | 
3 | ```shell
4 | python dldemos/Initialization/main.py
5 | ```
6 | 


--------------------------------------------------------------------------------
/dldemos/Initialization/main.py:
--------------------------------------------------------------------------------
  1 | import abc
  2 | from typing import List
  3 | 
  4 | import numpy as np
  5 | 
  6 | from dldemos.Initialization.points_classification import (generate_plot_set,
  7 |                                                           generate_points,
  8 |                                                           plot_points,
  9 |                                                           visualize)
 10 | from dldemos.utils import get_activation_de_func, get_activation_func, sigmoid
 11 | 
 12 | 
 13 | class BaseRegressionModel(metaclass=abc.ABCMeta):
 14 | 
 15 |     def __init__(self):
 16 |         pass
 17 | 
 18 |     @abc.abstractmethod
 19 |     def forward(self, X: np.ndarray, train_mode=True) -> np.ndarray:
 20 |         pass
 21 | 
 22 |     @abc.abstractmethod
 23 |     def backward(self, Y: np.ndarray) -> np.ndarray:
 24 |         pass
 25 | 
 26 |     @abc.abstractmethod
 27 |     def gradient_descent(self, learning_rate: float) -> np.ndarray:
 28 |         pass
 29 | 
 30 |     def loss(self, Y: np.ndarray, Y_hat: np.ndarray) -> np.ndarray:
 31 |         return np.mean(-(Y * np.log(Y_hat) + (1 - Y) * np.log(1 - Y_hat)))
 32 | 
 33 |     def evaluate(self, X: np.ndarray, Y: np.ndarray, return_loss=False):
 34 |         Y_hat = self.forward(X, train_mode=False)
 35 |         Y_hat_predict = np.where(Y_hat > 0.5, 1, 0)
 36 |         accuracy = np.mean(np.where(Y_hat_predict == Y, 1, 0))
 37 |         if return_loss:
 38 |             loss = self.loss(Y, Y_hat)
 39 |             return accuracy, loss
 40 |         else:
 41 |             return accuracy
 42 | 
 43 | 
 44 | class DeepNetwork(BaseRegressionModel):
 45 | 
 46 |     def __init__(self,
 47 |                  neuron_cnt: List[int],
 48 |                  activation_func: List[str],
 49 |                  initialization='zeros'):
 50 |         assert len(neuron_cnt) - 2 == len(activation_func)
 51 |         self.num_layer = len(neuron_cnt) - 1
 52 |         self.neuron_cnt = neuron_cnt
 53 |         self.activation_func = activation_func
 54 |         self.W: List[np.ndarray] = []
 55 |         self.b: List[np.ndarray] = []
 56 |         for i in range(self.num_layer):
 57 |             if initialization == 'zeros':
 58 |                 self.W.append(np.zeros((neuron_cnt[i + 1], neuron_cnt[i])))
 59 |             elif initialization == 'random':
 60 |                 self.W.append(
 61 |                     np.random.randn(neuron_cnt[i + 1], neuron_cnt[i]) * 5)
 62 |             elif initialization == 'he':
 63 |                 self.W.append(
 64 |                     np.random.randn(neuron_cnt[i + 1], neuron_cnt[i]) *
 65 |                     np.sqrt(2 / neuron_cnt[i]))
 66 |             self.b.append(np.zeros((neuron_cnt[i + 1], 1)))
 67 | 
 68 |         self.Z_cache = [None] * self.num_layer
 69 |         self.A_cache = [None] * (self.num_layer + 1)
 70 |         self.dW_cache = [None] * self.num_layer
 71 |         self.db_cache = [None] * self.num_layer
 72 | 
 73 |     def forward(self, X, train_mode=True):
 74 |         if train_mode:
 75 |             self.m = X.shape[1]
 76 |         A = X
 77 |         self.A_cache[0] = A
 78 |         for i in range(self.num_layer):
 79 |             Z = np.dot(self.W[i], A) + self.b[i]
 80 |             if i == self.num_layer - 1:
 81 |                 A = sigmoid(Z)
 82 |             else:
 83 |                 A = get_activation_func(self.activation_func[i])(Z)
 84 |             if train_mode:
 85 |                 self.Z_cache[i] = Z
 86 |                 self.A_cache[i + 1] = A
 87 |         return A
 88 | 
 89 |     def backward(self, Y):
 90 |         assert (self.m == Y.shape[1])
 91 | 
 92 |         dA = 0
 93 |         for i in range(self.num_layer - 1, -1, -1):
 94 |             if i == self.num_layer - 1:
 95 |                 dZ = self.A_cache[-1] - Y
 96 |             else:
 97 |                 dZ = dA * get_activation_de_func(self.activation_func[i])(
 98 |                     self.Z_cache[i])
 99 |             dW = np.dot(dZ, self.A_cache[i].T) / self.m
100 |             db = np.mean(dZ, axis=1, keepdims=True)
101 |             dA = np.dot(self.W[i].T, dZ)
102 |             self.dW_cache[i] = dW
103 |             self.db_cache[i] = db
104 | 
105 |     def gradient_descent(self, learning_rate):
106 |         for i in range(self.num_layer):
107 |             self.W[i] -= learning_rate * self.dW_cache[i]
108 |             self.b[i] -= learning_rate * self.db_cache[i]
109 | 
110 | 
111 | def train(model: BaseRegressionModel,
112 |           X,
113 |           Y,
114 |           step,
115 |           learning_rate,
116 |           print_interval=100,
117 |           test_X=None,
118 |           test_Y=None):
119 |     for s in range(step):
120 |         Y_hat = model.forward(X)
121 |         model.backward(Y)
122 |         model.gradient_descent(learning_rate)
123 |         if s % print_interval == 0:
124 |             loss = model.loss(Y, Y_hat)
125 |             print(f'Step: {s}')
126 |             print(f'Train loss: {loss}')
127 |             if test_X is not None and test_Y is not None:
128 |                 accuracy, loss = model.evaluate(test_X,
129 |                                                 test_Y,
130 |                                                 return_loss=True)
131 |                 print(f'Test loss: {loss}')
132 |                 print(f'Test accuracy: {accuracy}')
133 | 
134 | 
135 | def main():
136 |     train_X, train_Y = generate_points(400)
137 |     plot_points(train_X, train_Y)
138 |     plot_X = generate_plot_set()
139 | 
140 |     n_x = train_X.shape[0]
141 |     neuron_list = [n_x, 10, 5, 1]
142 |     activation_list = ['relu', 'relu']
143 |     model1 = DeepNetwork(neuron_list, activation_list, 'zeros')
144 |     model2 = DeepNetwork(neuron_list, activation_list, 'random')
145 |     model3 = DeepNetwork(neuron_list, activation_list, 'he')
146 |     train(model1, train_X, train_Y, 20000, 0.01, 1000)
147 |     train(model2, train_X, train_Y, 20000, 0.01, 1000)
148 |     train(model3, train_X, train_Y, 20000, 0.01, 1000)
149 | 
150 |     plot_result1 = model1.forward(plot_X, False)
151 |     plot_result2 = model2.forward(plot_X, False)
152 |     plot_result3 = model3.forward(plot_X, False)
153 | 
154 |     visualize(train_X, train_Y, plot_result1)
155 |     visualize(train_X, train_Y, plot_result2)
156 |     visualize(train_X, train_Y, plot_result3)
157 | 
158 | 
159 | if __name__ == '__main__':
160 |     main()
161 | 


--------------------------------------------------------------------------------
/dldemos/Initialization/points_classification.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | import numpy as np
 3 | 
 4 | 
 5 | def generate_points(cnt):
 6 | 
 7 |     def draw_ring(cnt, inner_radius, outer_radius):
 8 |         angle_arr = np.random.rand(cnt) * np.pi * 2
 9 |         length_arr = np.random.rand(cnt) * (outer_radius -
10 |                                             inner_radius) + inner_radius
11 |         return length_arr * np.cos(angle_arr), length_arr * np.sin(angle_arr)
12 | 
13 |     red_cnt = cnt // 2
14 |     blue_cnt = cnt - red_cnt
15 | 
16 |     red_x, red_y = draw_ring(red_cnt, 5, 6)
17 |     blue_x, blue_y = draw_ring(blue_cnt, 6, 7)
18 |     X = np.stack((np.concatenate(
19 |         (red_x, blue_x)), np.concatenate((red_y, blue_y))), 1)
20 |     Y = np.array([0] * red_cnt + [1] * blue_cnt)
21 |     return X.T, Y[..., np.newaxis].T
22 | 
23 | 
24 | def plot_points(X, Y):
25 |     new_X = X.T
26 |     Y = np.squeeze(Y, 0)
27 |     c = np.where(Y == 0, 'r', 'b')
28 |     new_x = new_X[:, 0]
29 |     new_y = new_X[:, 1]
30 |     plt.scatter(new_x, new_y, color=c)
31 |     plt.show()
32 | 
33 | 
34 | def generate_plot_set():
35 |     x = np.linspace(-10, 10, 100)
36 |     y = np.linspace(-10, 10, 100)
37 |     xx, yy = np.meshgrid(x, y)
38 |     xx = xx.reshape(-1)
39 |     yy = yy.reshape(-1)
40 |     return np.stack((xx, yy), axis=1).T
41 | 
42 | 
43 | def visualize(X, Y, plot_set_result: np.ndarray):
44 |     x = np.linspace(-10, 10, 100)
45 |     y = np.linspace(-10, 10, 100)
46 |     xx, yy = np.meshgrid(x, y)
47 |     color = plot_set_result.squeeze()
48 |     c = np.where(color < 0.5, 'r', 'b')
49 |     plt.scatter(xx, yy, c=c, marker='s')
50 | 
51 |     plt.xlim(-10, 10)
52 |     plt.ylim(-10, 10)
53 | 
54 |     origin_x = X.T[:, 0]
55 |     origin_y = X.T[:, 1]
56 |     origin_color = np.where(Y.squeeze() < 0.5, '#AA0000', '#0000AA')
57 | 
58 |     plt.scatter(origin_x, origin_y, c=origin_color)
59 | 
60 |     plt.show()
61 | 
62 | 
63 | def main():
64 |     plot_points(*generate_points(400))
65 | 
66 | 
67 | if __name__ == '__main__':
68 |     main()
69 | 


--------------------------------------------------------------------------------
/dldemos/LogisticRegression/README.md:
--------------------------------------------------------------------------------
 1 | 1. Download the dataset from https://www.kaggle.com/datasets/fusicfenta/cat-and-dog?resource=download and organize the directory as follows:
 2 | 
 3 | ```plain text
 4 | └─data
 5 |     └─archive
 6 |         └─dataset
 7 |             ├─single_prediction
 8 |             ├─test_set
 9 |             │  ├─cats
10 |             │  └─dogs
11 |             └─training_set
12 |                 ├─cats
13 |                 └─dogs
14 | ```
15 | 
16 | 2. Run `python main.py` on `./LogisticRegression` directory (Or modify the data path in the `main.py`).
17 | 


--------------------------------------------------------------------------------
/dldemos/LogisticRegression/main.py:
--------------------------------------------------------------------------------
  1 | import os.path as osp
  2 | from glob import glob
  3 | from random import shuffle
  4 | 
  5 | import cv2
  6 | import numpy as np
  7 | 
  8 | 
  9 | def generate_data(dir='data/archive/dataset', input_shape=(224, 224)):
 10 | 
 11 |     def load_dataset(dir, data_num):
 12 |         cat_images = glob(osp.join(dir, 'cats', '*.jpg'))
 13 |         dog_images = glob(osp.join(dir, 'dogs', '*.jpg'))
 14 |         cat_tensor = []
 15 |         dog_tensor = []
 16 | 
 17 |         for idx, image in enumerate(cat_images):
 18 |             if idx >= data_num:
 19 |                 break
 20 |             i = cv2.imread(image) / 255
 21 |             i = cv2.resize(i, input_shape)
 22 |             cat_tensor.append(i)
 23 | 
 24 |         for idx, image in enumerate(dog_images):
 25 |             if idx >= data_num:
 26 |                 break
 27 |             i = cv2.imread(image) / 255
 28 |             i = cv2.resize(i, input_shape)
 29 |             dog_tensor.append(i)
 30 | 
 31 |         X = cat_tensor + dog_tensor
 32 |         Y = [1] * len(cat_tensor) + [0] * len(dog_tensor)
 33 |         X_Y = list(zip(X, Y))
 34 |         shuffle(X_Y)
 35 |         X, Y = zip(*X_Y)
 36 |         return X, Y
 37 | 
 38 |     train_X, train_Y = load_dataset(osp.join(dir, 'training_set'), 400)
 39 |     test_X, test_Y = load_dataset(osp.join(dir, 'test_set'), 100)
 40 |     return train_X, train_Y, test_X, test_Y
 41 | 
 42 | 
 43 | def resize_input(a: np.ndarray):
 44 |     h, w, c = a.shape
 45 |     a.resize((h * w * c))
 46 |     return a
 47 | 
 48 | 
 49 | def init_weights(n_x=224 * 224 * 3):
 50 |     w = np.zeros((n_x, 1))
 51 |     b = 0.0
 52 |     return w, b
 53 | 
 54 | 
 55 | def sigmoid(x):
 56 |     return 1 / (1 + np.exp(-x))
 57 | 
 58 | 
 59 | def predict(w, b, X):
 60 |     return sigmoid(np.dot(w.T, X) + b)
 61 | 
 62 | 
 63 | def loss(y_hat, y):
 64 |     return np.mean(-(y * np.log(y_hat) + (1 - y) * np.log(1 - y_hat)))
 65 | 
 66 | 
 67 | def train_step(w, b, X, Y, lr):
 68 |     m = X.shape[1]
 69 |     Z = np.dot(w.T, X) + b
 70 |     A = sigmoid(Z)
 71 |     d_Z = A - Y
 72 |     d_w = np.dot(X, d_Z.T) / m
 73 |     d_b = np.mean(d_Z)
 74 |     return w - lr * d_w, b - lr * d_b
 75 | 
 76 | 
 77 | def train(train_X, train_Y, step=1000, learning_rate=0.00001):
 78 |     w, b = init_weights()
 79 |     print(f'learning rate: {learning_rate}')
 80 |     for i in range(step):
 81 |         w, b = train_step(w, b, train_X, train_Y, learning_rate)
 82 |         if i % 10 == 0:
 83 |             y_hat = predict(w, b, train_X)
 84 |             ls = loss(y_hat, train_Y)
 85 |             print(f'step {i} loss: {ls}')
 86 |     return w, b
 87 | 
 88 | 
 89 | def test(w, b, test_X, test_Y):
 90 |     y_hat = predict(w, b, test_X)
 91 |     predicts = np.where(y_hat > 0.5, 1, 0)
 92 |     score = np.mean(np.where(predicts == test_Y, 1, 0))
 93 |     print(f'Accuracy: {score}')
 94 | 
 95 | 
 96 | def main():
 97 |     train_X, train_Y, test_X, test_Y = generate_data()
 98 | 
 99 |     train_X = [resize_input(x) for x in train_X]
100 |     test_X = [resize_input(x) for x in test_X]
101 |     train_X = np.array(train_X).T
102 |     train_Y = np.array(train_Y)
103 |     train_Y = train_Y.reshape((1, -1))
104 |     test_X = np.array(test_X).T
105 |     test_Y = np.array(test_Y)
106 |     test_Y = test_Y.reshape((1, -1))
107 |     print(f'Training set size: {train_X.shape[1]}')
108 |     print(f'Test set size: {test_X.shape[1]}')
109 | 
110 |     w, b = train(train_X, train_Y, learning_rate=0.0002)
111 | 
112 |     test(w, b, test_X, test_Y)
113 | 
114 | 
115 | if __name__ == '__main__':
116 |     main()
117 | 


--------------------------------------------------------------------------------
/dldemos/MulticlassClassification/README.md:
--------------------------------------------------------------------------------
 1 | 1. Install the repository
 2 | 
 3 | ```shell
 4 | python setup.py develop
 5 | ```
 6 | 
 7 | 2. Install TensorFlow or PyTorch
 8 | 
 9 | 3. Run `tf_main.py` or `pt_main.py`.
10 | 


--------------------------------------------------------------------------------
/dldemos/MulticlassClassification/points_classification.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | import numpy as np
 3 | 
 4 | LLIM = 0
 5 | RLIM = 1
 6 | 
 7 | 
 8 | def generate_points(cnt):
 9 |     x = np.random.rand(cnt)
10 |     y = np.random.rand(cnt)
11 |     X = np.stack([x, y], 1)
12 |     Y = np.where(y > x * x, np.where(y > x**0.5, 0, 1), 2)
13 |     return X.T, Y[..., np.newaxis].T
14 | 
15 | 
16 | def plot_points(X, Y):
17 |     new_X = X.T
18 |     Y = np.squeeze(Y, 0)
19 |     color_map = np.array(['r', 'g', 'b'])
20 |     c = color_map[Y]
21 |     new_x = new_X[:, 0]
22 |     new_y = new_X[:, 1]
23 |     plt.scatter(new_x, new_y, color=c)
24 |     plt.show()
25 | 
26 | 
27 | def generate_plot_set():
28 |     x = np.linspace(LLIM, RLIM, 100)
29 |     y = np.linspace(LLIM, RLIM, 100)
30 |     xx, yy = np.meshgrid(x, y)
31 |     xx = xx.reshape(-1)
32 |     yy = yy.reshape(-1)
33 |     return np.stack((xx, yy), axis=1).T
34 | 
35 | 
36 | def visualize(X, Y, plot_set_result: np.ndarray):
37 |     x = np.linspace(LLIM, RLIM, 100)
38 |     y = np.linspace(LLIM, RLIM, 100)
39 |     xx, yy = np.meshgrid(x, y)
40 |     color = plot_set_result.squeeze()
41 |     color_map_1 = np.array(['r', 'g', 'b'])
42 |     color_map_2 = ['#AA0000', '#00AA00', '#0000AA']
43 | 
44 |     c = color_map_1[color]
45 |     plt.scatter(xx, yy, c=c, marker='s')
46 | 
47 |     plt.xlim(LLIM, RLIM)
48 |     plt.ylim(LLIM, RLIM)
49 | 
50 |     origin_x = X.T[:, 0]
51 |     origin_y = X.T[:, 1]
52 |     origin_color = Y.squeeze(0)
53 |     origin_color = [color_map_2[oc] for oc in origin_color]
54 | 
55 |     plt.scatter(origin_x, origin_y, c=origin_color)
56 | 
57 |     plt.show()
58 | 
59 | 
60 | def main():
61 |     plot_points(*generate_points(400))
62 | 
63 | 
64 | if __name__ == '__main__':
65 |     main()
66 | 


--------------------------------------------------------------------------------
/dldemos/MulticlassClassification/pt_main.py:
--------------------------------------------------------------------------------
  1 | from typing import List
  2 | 
  3 | import numpy as np
  4 | import torch
  5 | import torch.nn.functional as F
  6 | 
  7 | from dldemos.MulticlassClassification.points_classification import (
  8 |     generate_plot_set, generate_points, plot_points, visualize)
  9 | 
 10 | 
 11 | class MulticlassClassificationNet():
 12 | 
 13 |     def __init__(self, neuron_cnt: List[int]):
 14 |         self.num_layer = len(neuron_cnt) - 1
 15 |         self.neuron_cnt = neuron_cnt
 16 |         self.W = []
 17 |         self.b = []
 18 |         for i in range(self.num_layer):
 19 |             new_W = torch.empty(neuron_cnt[i + 1], neuron_cnt[i])
 20 |             new_b = torch.empty(neuron_cnt[i + 1], 1)
 21 |             torch.nn.init.kaiming_normal_(new_W, nonlinearity='relu')
 22 |             torch.nn.init.kaiming_normal_(new_b, nonlinearity='relu')
 23 |             self.W.append(torch.nn.Parameter(new_W))
 24 |             self.b.append(torch.nn.Parameter(new_b))
 25 |         self.trainable_vars = self.W + self.b
 26 |         self.loss_fn = torch.nn.CrossEntropyLoss()
 27 | 
 28 |     def forward(self, X):
 29 |         A = X
 30 |         for i in range(self.num_layer):
 31 |             Z = torch.matmul(self.W[i], A) + self.b[i]
 32 |             if i == self.num_layer - 1:
 33 |                 A = F.softmax(Z, 0)
 34 |             else:
 35 |                 A = F.relu(Z)
 36 | 
 37 |         return A
 38 | 
 39 |     def loss(self, Y, Y_hat):
 40 |         return self.loss_fn(Y_hat.T, Y)
 41 | 
 42 |     def evaluate(self, X, Y, return_loss=False):
 43 |         Y_hat = self.forward(X)
 44 |         Y_predict = Y
 45 |         Y_hat_predict = torch.argmax(Y_hat, 0)
 46 |         res = (Y_predict == Y_hat_predict).float()
 47 |         accuracy = torch.mean(res)
 48 |         if return_loss:
 49 |             loss = self.loss(Y, Y_hat)
 50 |             return accuracy, loss
 51 |         else:
 52 |             return accuracy
 53 | 
 54 | 
 55 | def train(model: MulticlassClassificationNet,
 56 |           X,
 57 |           Y,
 58 |           step,
 59 |           learning_rate,
 60 |           print_interval=100):
 61 |     optimizer = torch.optim.Adam(model.trainable_vars, learning_rate)
 62 |     for s in range(step):
 63 |         Y_hat = model.forward(X)
 64 |         cost = model.loss(Y, Y_hat)
 65 |         optimizer.zero_grad()
 66 |         cost.backward()
 67 |         optimizer.step()
 68 |         if s % print_interval == 0:
 69 |             accuracy, loss = model.evaluate(X, Y, return_loss=True)
 70 |             print(f'Step: {s}')
 71 |             print(f'Accuracy: {accuracy}')
 72 |             print(f'Train loss: {loss}')
 73 | 
 74 | 
 75 | def main():
 76 |     train_X, train_Y = generate_points(400)
 77 |     plot_points(train_X, train_Y)
 78 |     plot_X = generate_plot_set()
 79 | 
 80 |     # X: [2, m]
 81 |     # Y: [1, m]
 82 | 
 83 |     train_X_pt = torch.tensor(train_X, dtype=torch.float32)
 84 |     train_Y_pt = torch.tensor(train_Y.squeeze(0), dtype=torch.long)
 85 | 
 86 |     print(train_X_pt.shape)
 87 |     print(train_Y_pt.shape)
 88 | 
 89 |     # X: [2, m]
 90 |     # Y: [m]
 91 | 
 92 |     n_x = 2
 93 |     neuron_list = [n_x, 10, 10, 3]
 94 |     model = MulticlassClassificationNet(neuron_list)
 95 |     train(model, train_X_pt, train_Y_pt, 5000, 0.001, 1000)
 96 | 
 97 |     plot_result = model.forward(torch.Tensor(plot_X))
 98 |     plot_result = torch.argmax(plot_result, 0).numpy()
 99 |     plot_result = np.expand_dims(plot_result, 0)
100 | 
101 |     visualize(train_X, train_Y, plot_result)
102 | 
103 | 
104 | if __name__ == '__main__':
105 |     main()
106 | 


--------------------------------------------------------------------------------
/dldemos/MulticlassClassification/tf_main.py:
--------------------------------------------------------------------------------
  1 | from typing import List
  2 | 
  3 | import numpy as np
  4 | import tensorflow as tf
  5 | 
  6 | from dldemos.MulticlassClassification.points_classification import (
  7 |     generate_plot_set, generate_points, plot_points, visualize)
  8 | 
  9 | 
 10 | class MulticlassClassificationNet():
 11 | 
 12 |     def __init__(self, neuron_cnt: List[int]):
 13 |         self.num_layer = len(neuron_cnt) - 1
 14 |         self.neuron_cnt = neuron_cnt
 15 |         self.W = []
 16 |         self.b = []
 17 |         initializer = tf.keras.initializers.HeNormal(seed=1)
 18 |         for i in range(self.num_layer):
 19 |             self.W.append(
 20 |                 tf.Variable(
 21 |                     initializer(shape=(neuron_cnt[i + 1], neuron_cnt[i]))))
 22 |             self.b.append(
 23 |                 tf.Variable(initializer(shape=(neuron_cnt[i + 1], 1))))
 24 |         self.trainable_vars = self.W + self.b
 25 | 
 26 |     def forward(self, X):
 27 |         A = X
 28 |         for i in range(self.num_layer):
 29 |             Z = tf.matmul(self.W[i], A) + self.b[i]
 30 |             if i == self.num_layer - 1:
 31 |                 A = tf.keras.activations.softmax(Z)
 32 |             else:
 33 |                 A = tf.keras.activations.relu(Z)
 34 | 
 35 |         return A
 36 | 
 37 |     def loss(self, Y, Y_hat):
 38 |         return tf.reduce_mean(
 39 |             tf.keras.losses.categorical_crossentropy(tf.transpose(Y),
 40 |                                                      tf.transpose(Y_hat)))
 41 | 
 42 |     def evaluate(self, X, Y, return_loss=False):
 43 |         Y_hat = self.forward(X)
 44 |         Y_predict = tf.argmax(Y, 0)
 45 |         Y_hat_predict = tf.argmax(Y_hat, 0)
 46 |         res = tf.cast(Y_predict == Y_hat_predict, tf.float32)
 47 |         accuracy = tf.reduce_mean(res)
 48 |         if return_loss:
 49 |             loss = self.loss(Y, Y_hat)
 50 |             return accuracy, loss
 51 |         else:
 52 |             return accuracy
 53 | 
 54 | 
 55 | def train(model: MulticlassClassificationNet,
 56 |           X,
 57 |           Y,
 58 |           step,
 59 |           learning_rate,
 60 |           print_interval=100):
 61 |     optimizer = tf.keras.optimizers.Adam(learning_rate)
 62 |     for s in range(step):
 63 |         with tf.GradientTape() as tape:
 64 |             Y_hat = model.forward(X)
 65 |             cost = model.loss(Y, Y_hat)
 66 |         grads = tape.gradient(cost, model.trainable_vars)
 67 |         optimizer.apply_gradients(zip(grads, model.trainable_vars))
 68 |         if s % print_interval == 0:
 69 |             accuracy, loss = model.evaluate(X, Y, return_loss=True)
 70 |             print(f'Step: {s}')
 71 |             print(f'Accuracy: {accuracy}')
 72 |             print(f'Train loss: {loss}')
 73 | 
 74 | 
 75 | def main():
 76 |     train_X, train_Y = generate_points(400)
 77 |     plot_points(train_X, train_Y)
 78 |     plot_X = generate_plot_set()
 79 | 
 80 |     # X: [2, m]
 81 |     # Y: [1, m]
 82 | 
 83 |     train_X_tf = tf.constant(train_X, dtype=tf.float32)
 84 |     train_Y_tf = tf.transpose(tf.one_hot(train_Y.squeeze(0), 3))
 85 | 
 86 |     # X: [2, m]
 87 |     # Y: [3, m]
 88 | 
 89 |     n_x = 2
 90 |     neuron_list = [n_x, 10, 10, 3]
 91 |     model = MulticlassClassificationNet(neuron_list)
 92 |     train(model, train_X_tf, train_Y_tf, 5000, 0.001, 1000)
 93 | 
 94 |     plot_result = model.forward(plot_X)
 95 |     plot_result = tf.argmax(plot_result, 0).numpy()
 96 |     plot_result = np.expand_dims(plot_result, 0)
 97 | 
 98 |     visualize(train_X, train_Y, plot_result)
 99 | 
100 | 
101 | if __name__ == '__main__':
102 |     main()
103 | 


--------------------------------------------------------------------------------
/dldemos/MyYOLO/load_coco.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | 
 4 | 
 5 | def print_json():
 6 |     with open('data/coco/annotations/instances_val2014.json') as fp:
 7 |         root = json.load(fp)
 8 |     print('info:')
 9 |     print(root['info'])
10 |     print('categories:')
11 |     print(root['categories'])
12 |     print('Length of images:', len(root['images']))
13 |     print(root['images'][0])
14 |     print('Length of annotations:', len(root['annotations']))
15 |     print(root['annotations'][0])
16 | 
17 | 
18 | def load_img_ann():
19 |     """return [{img_name, [{x, y, h, w, label}]}]"""
20 |     with open('data/coco/annotations/instances_val2014.json') as fp:
21 |         root = json.load(fp)
22 |     img_dict = {}
23 |     for img_info in root['images']:
24 |         img_dict[img_info['id']] = {'name': img_info['file_name'], 'anns': []}
25 |     for ann_info in root['annotations']:
26 |         img_dict[ann_info['image_id']]['anns'].append(
27 |             ann_info['bbox'] + [ann_info['category_id']])
28 | 
29 |     return img_dict
30 | 
31 | 
32 | def show_img_ann(img_info):
33 |     from PIL import Image
34 | 
35 |     from dldemos.nms.show_bbox import draw_bbox
36 |     print(img_info)
37 | 
38 |     with open('data/coco/annotations/instances_val2014.json') as fp:
39 |         root = json.load(fp)
40 |     categories = root['categories']
41 |     category_dict = {int(c['id']): c['name'] for c in categories}
42 | 
43 |     img_path = os.path.join('data/coco/val2014', img_info['name'])
44 |     img = Image.open(img_path)
45 |     for ann in img_info['anns']:
46 |         x, y, w, h = ann[0:4]
47 |         x1, y1, x2, y2 = x, y, x + w, y + h
48 |         draw_bbox(img, (x1, y1, x2, y2), 1.0, text=category_dict[ann[4]])
49 | 
50 |     img.save('work_dirs/tmp.jpg')
51 | 
52 | 
53 | def main():
54 |     print_json()
55 |     img_dict = load_img_ann()
56 |     keys = list(img_dict.keys())
57 |     show_img_ann(img_dict[keys[1]])
58 | 
59 | 
60 | if __name__ == '__main__':
61 |     main()
62 | 


--------------------------------------------------------------------------------
/dldemos/PyTorchDistributed/main.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import torch
 4 | import torch.distributed as dist
 5 | import torch.nn as nn
 6 | import torch.optim as optim
 7 | from torch.nn.parallel import DistributedDataParallel
 8 | from torch.utils.data import DataLoader, Dataset
 9 | from torch.utils.data.distributed import DistributedSampler
10 | 
11 | 
12 | def setup():
13 |     dist.init_process_group('nccl')
14 | 
15 | 
16 | def cleanup():
17 |     dist.destroy_process_group()
18 | 
19 | 
20 | class ToyModel(nn.Module):
21 | 
22 |     def __init__(self) -> None:
23 |         super().__init__()
24 |         self.layer = nn.Linear(1, 1)
25 | 
26 |     def forward(self, x):
27 |         return self.layer(x)
28 | 
29 | 
30 | class MyDataset(Dataset):
31 | 
32 |     def __init__(self):
33 |         super().__init__()
34 |         self.data = torch.tensor([1, 2, 3, 4], dtype=torch.float32)
35 | 
36 |     def __len__(self):
37 |         return len(self.data)
38 | 
39 |     def __getitem__(self, index):
40 |         return self.data[index:index + 1]
41 | 
42 | 
43 | ckpt_path = 'tmp.pth'
44 | 
45 | 
46 | def main():
47 |     setup()
48 |     rank = dist.get_rank()
49 |     pid = os.getpid()
50 |     print(f'current pid: {pid}')
51 |     print(f'Current rank {rank}')
52 |     device_id = rank % torch.cuda.device_count()
53 | 
54 |     dataset = MyDataset()
55 |     sampler = DistributedSampler(dataset)
56 |     dataloader = DataLoader(dataset, batch_size=2, sampler=sampler)
57 | 
58 |     model = ToyModel().to(device_id)
59 |     ddp_model = DistributedDataParallel(model, device_ids=[device_id])
60 |     loss_fn = nn.MSELoss()
61 |     optimizer = optim.SGD(ddp_model.parameters(), lr=0.001)
62 | 
63 |     if rank == 0:
64 |         torch.save(ddp_model.state_dict(), ckpt_path)
65 | 
66 |     dist.barrier()
67 | 
68 |     map_location = {'cuda:0': f'cuda:{device_id}'}
69 |     state_dict = torch.load(ckpt_path, map_location=map_location)
70 |     print(f'rank {rank}: {state_dict}')
71 |     ddp_model.load_state_dict(state_dict)
72 | 
73 |     for epoch in range(2):
74 |         sampler.set_epoch(epoch)
75 |         for x in dataloader:
76 |             print(f'epoch {epoch}, rank {rank} data: {x}')
77 |             x = x.to(device_id)
78 |             y = ddp_model(x)
79 |             optimizer.zero_grad()
80 |             loss = loss_fn(x, y)
81 |             loss.backward()
82 |             optimizer.step()
83 | 
84 |     cleanup()
85 | 
86 | 
87 | if __name__ == '__main__':
88 |     main()
89 | 


--------------------------------------------------------------------------------
/dldemos/Regularization/README.md:
--------------------------------------------------------------------------------
1 | Run the `main.py`:
2 | 
3 | ```shell
4 | python dldemos/Regularization/main.py
5 | ```
6 | 


--------------------------------------------------------------------------------
/dldemos/Regularization/points_classification.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | import numpy as np
 3 | 
 4 | LLIM = 0
 5 | RLIM = 1
 6 | 
 7 | 
 8 | def generate_points(cnt):
 9 | 
10 |     PERCENTAGE = 0.2
11 | 
12 |     X = np.random.rand(int(cnt * (1 - PERCENTAGE)), 2)
13 |     x_1 = X[:, 0]
14 |     x_2 = X[:, 1]
15 |     Y = np.where(x_1 > x_2, 1, 0)
16 | 
17 |     noise_x = np.random.rand(int(cnt * PERCENTAGE)) / 2
18 |     noise_y = noise_x + np.random.rand(int(cnt * PERCENTAGE)) / 2
19 |     noise_label = np.array([1] * len(noise_x))
20 |     noise_X = np.stack((noise_x, noise_y), axis=1)
21 |     X = np.concatenate((X, noise_X), 0)
22 |     Y = np.concatenate((Y, noise_label), 0)
23 | 
24 |     return X.T, Y[:, np.newaxis].T
25 | 
26 | 
27 | def plot_points(X, Y):
28 |     new_X = X.T
29 |     Y = np.squeeze(Y, 0)
30 |     c = np.where(Y == 0, 'r', 'b')
31 |     new_x = new_X[:, 0]
32 |     new_y = new_X[:, 1]
33 |     plt.scatter(new_x, new_y, color=c)
34 |     plt.show()
35 | 
36 | 
37 | def generate_plot_set():
38 |     x = np.linspace(LLIM, RLIM, 100)
39 |     y = np.linspace(LLIM, RLIM, 100)
40 |     xx, yy = np.meshgrid(x, y)
41 |     xx = xx.reshape(-1)
42 |     yy = yy.reshape(-1)
43 |     return np.stack((xx, yy), axis=1).T
44 | 
45 | 
46 | def visualize(X, Y, plot_set_result: np.ndarray):
47 |     x = np.linspace(LLIM, RLIM, 100)
48 |     y = np.linspace(LLIM, RLIM, 100)
49 |     xx, yy = np.meshgrid(x, y)
50 |     color = plot_set_result.squeeze()
51 |     c = np.where(color < 0.5, 'r', 'b')
52 |     plt.scatter(xx, yy, c=c, marker='s')
53 | 
54 |     plt.xlim(LLIM, RLIM)
55 |     plt.ylim(LLIM, RLIM)
56 | 
57 |     origin_x = X.T[:, 0]
58 |     origin_y = X.T[:, 1]
59 |     origin_color = np.where(Y.squeeze() < 0.5, '#AA0000', '#0000AA')
60 | 
61 |     plt.scatter(origin_x, origin_y, c=origin_color)
62 | 
63 |     plt.show()
64 | 
65 | 
66 | def main():
67 |     plot_points(*generate_points(200))
68 | 
69 | 
70 | if __name__ == '__main__':
71 |     main()
72 | 


--------------------------------------------------------------------------------
/dldemos/ResNet/README.md:
--------------------------------------------------------------------------------
 1 | 1. Install the repository
 2 | 
 3 | ```shell
 4 | python setup.py develop
 5 | ```
 6 | 
 7 | 2. Download the dataset from https://www.kaggle.com/datasets/fusicfenta/cat-and-dog?resource=download and organize the directory as follows:
 8 | 
 9 | ```plain text
10 | └─data
11 |     └─archive
12 |         └─dataset
13 |             ├─single_prediction
14 |             ├─test_set
15 |             │  ├─cats
16 |             │  └─dogs
17 |             └─training_set
18 |                 ├─cats
19 |                 └─dogs
20 | ```
21 | 
22 | 3. Modify the path in "main" scripts:
23 | 
24 | ```Python
25 | train_X, train_Y, test_X, test_Y = get_cat_set(
26 |         'dldemos/LogisticRegression/data/archive/dataset', train_size=1500)
27 | ```
28 | 
29 | Replace 'dldemos/LogisticRegression/data/archive/dataset' with your path.
30 | 
31 | 4. Run `tf_main.py`.
32 | 


--------------------------------------------------------------------------------
/dldemos/ResNet/tf_main.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | from tensorflow.keras import layers, models
  3 | 
  4 | from dldemos.BasicCNN.dataset import get_cat_set
  5 | 
  6 | 
  7 | def identity_block_2(x, f, use_shortcut=True):
  8 |     _, _, _, C = x.shape
  9 |     x_shortcut = x
 10 |     x = layers.Conv2D(C, f, padding='same')(x)
 11 |     x = layers.BatchNormalization(axis=3)(x)
 12 |     x = layers.ReLU()(x)
 13 |     x = layers.Conv2D(C, f, padding='same')(x)
 14 |     x = layers.BatchNormalization(axis=3)(x)
 15 |     if use_shortcut:
 16 |         x = x + x_shortcut
 17 |     x = layers.ReLU()(x)
 18 |     return x
 19 | 
 20 | 
 21 | def convolution_block_2(x, f, filters, s: int, use_shortcut=True):
 22 |     x_shortcut = x
 23 |     x = layers.Conv2D(filters, f, strides=(s, s), padding='same')(x)
 24 |     x = layers.BatchNormalization(axis=3)(x)
 25 |     x = layers.ReLU()(x)
 26 |     x = layers.Conv2D(filters, f, padding='same')(x)
 27 |     x = layers.BatchNormalization(axis=3)(x)
 28 |     if use_shortcut:
 29 |         x_shortcut = layers.Conv2D(filters, 1, strides=(s, s),
 30 |                                    padding='valid')(x_shortcut)
 31 |         x_shortcut = layers.BatchNormalization(axis=3)(x_shortcut)
 32 |         x = x + x_shortcut
 33 |     x = layers.ReLU()(x)
 34 |     return x
 35 | 
 36 | 
 37 | def identity_block_3(x, f, filters1, filters2, use_shortcut=True):
 38 |     x_shortcut = x
 39 |     x = layers.Conv2D(filters1, 1, padding='valid')(x)
 40 |     x = layers.BatchNormalization(axis=3)(x)
 41 |     x = layers.Conv2D(filters1, f, padding='same')(x)
 42 |     x = layers.BatchNormalization(axis=3)(x)
 43 |     x = layers.ReLU()(x)
 44 |     x = layers.Conv2D(filters2, 1, padding='valid')(x)
 45 |     x = layers.BatchNormalization(axis=3)(x)
 46 |     if use_shortcut:
 47 |         x = x + x_shortcut
 48 |     x = layers.ReLU()(x)
 49 |     return x
 50 | 
 51 | 
 52 | def convolution_block_3(x, f, filters1, filters2, s: int, use_shortcut=True):
 53 |     x_shortcut = x
 54 |     x = layers.Conv2D(filters1, 1, strides=(s, s), padding='valid')(x)
 55 |     x = layers.BatchNormalization(axis=3)(x)
 56 |     x = layers.Conv2D(filters1, f, padding='same')(x)
 57 |     x = layers.BatchNormalization(axis=3)(x)
 58 |     x = layers.ReLU()(x)
 59 |     x = layers.Conv2D(filters2, 1, padding='valid')(x)
 60 |     x = layers.BatchNormalization(axis=3)(x)
 61 |     if use_shortcut:
 62 |         x_shortcut = layers.Conv2D(filters2,
 63 |                                    1,
 64 |                                    strides=(s, s),
 65 |                                    padding='valid')(x_shortcut)
 66 |         x_shortcut = layers.BatchNormalization(axis=3)(x_shortcut)
 67 |         x = x + x_shortcut
 68 |     x = layers.ReLU()(x)
 69 |     return x
 70 | 
 71 | 
 72 | def init_model(input_shape=(224, 224, 3),
 73 |                model_name='ResNet18',
 74 |                use_shortcut=True) -> tf.keras.models.Model:
 75 |     # Initialize input
 76 |     input = layers.Input(input_shape)
 77 | 
 78 |     # Get output
 79 |     x = layers.Conv2D(64, 7, (2, 2), padding='same')(input)
 80 |     x = layers.MaxPool2D((3, 3), (2, 2))(x)
 81 | 
 82 |     if model_name == 'ResNet18':
 83 |         x = identity_block_2(x, 3, use_shortcut)
 84 |         x = identity_block_2(x, 3, use_shortcut)
 85 |         x = convolution_block_2(x, 3, 128, 2, use_shortcut)
 86 |         x = identity_block_2(x, 3, use_shortcut)
 87 |         x = convolution_block_2(x, 3, 256, 2, use_shortcut)
 88 |         x = identity_block_2(x, 3, use_shortcut)
 89 |         x = convolution_block_2(x, 3, 512, 2, use_shortcut)
 90 |         x = identity_block_2(x, 3, use_shortcut)
 91 |     elif model_name == 'ResNet50':
 92 | 
 93 |         def block_group(x, fs1, fs2, count):
 94 |             x = convolution_block_3(x, 3, fs1, fs2, 2, use_shortcut)
 95 |             for i in range(count - 1):
 96 |                 x = identity_block_3(x, 3, fs1, fs2, use_shortcut)
 97 |             return x
 98 | 
 99 |         x = block_group(x, 64, 256, 3)
100 |         x = block_group(x, 128, 512, 4)
101 |         x = block_group(x, 256, 1024, 6)
102 |         x = block_group(x, 512, 2048, 3)
103 |     else:
104 |         raise NotImplementedError(f'No such model {model_name}')
105 | 
106 |     x = layers.AveragePooling2D((2, 2), (2, 2))(x)
107 |     x = layers.Flatten()(x)
108 |     output = layers.Dense(1, 'sigmoid')(x)
109 | 
110 |     # Build model
111 |     model = models.Model(inputs=input, outputs=output)
112 |     print(model.summary())
113 |     return model
114 | 
115 | 
116 | def main():
117 |     train_X, train_Y, test_X, test_Y = get_cat_set(
118 |         'dldemos/LogisticRegression/data/archive/dataset',
119 |         train_size=500,
120 |         test_size=50)
121 |     print(train_X.shape)  # (m, 224, 224, 3)
122 |     print(train_Y.shape)  # (m , 1)
123 | 
124 |     # model = init_model()
125 |     # model = init_model(use_shortcut=False)
126 |     model = init_model(model_name='ResNet50')
127 |     # model = init_model(model_name='ResNet50', use_shortcut=False)
128 |     model.compile(optimizer='adam',
129 |                   loss='binary_crossentropy',
130 |                   metrics=['accuracy'])
131 | 
132 |     model.fit(train_X, train_Y, epochs=20, batch_size=16)
133 |     model.evaluate(test_X, test_Y)
134 | 
135 | 
136 | if __name__ == '__main__':
137 |     main()
138 | 


--------------------------------------------------------------------------------
/dldemos/SentimentAnalysis/README.md:
--------------------------------------------------------------------------------
1 | 1. Download [IMDb dataset](https://ai.stanford.edu/~amaas/data/sentiment/).
2 | 
3 | 2. Modify the directory in `read_imdb`.
4 | 
5 | 3. Run `main.py` . (Pretrained GloVe will download to default directory)
6 | 


--------------------------------------------------------------------------------
/dldemos/SentimentAnalysis/glove.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torchtext.vocab import GloVe
 3 | 
 4 | glove = GloVe(name='6B', dim=100)
 5 | 
 6 | # Get vectors
 7 | tensor = glove.get_vecs_by_tokens(['', '1998', '199999998', ',', 'cat'], True)
 8 | print(tensor)
 9 | 
10 | # Iterate the vocab
11 | myvocab = glove.itos
12 | print(len(myvocab))
13 | print(myvocab[0], myvocab[1], myvocab[2], myvocab[3])
14 | 
15 | 
16 | def get_counterpart(x1, y1, x2):
17 |     """Find y2 that makes x1-y1=x2-y2."""
18 |     x1_id = glove.stoi[x1]
19 |     y1_id = glove.stoi[y1]
20 |     x2_id = glove.stoi[x2]
21 |     x1, y1, x2 = glove.get_vecs_by_tokens([x1, y1, x2], True)
22 |     target = x2 - x1 + y1
23 |     max_sim = 0
24 |     max_id = -1
25 |     for i in range(len(myvocab)):
26 |         vector = glove.get_vecs_by_tokens([myvocab[i]], True)[0]
27 |         cossim = torch.dot(target, vector)
28 |         if cossim > max_sim and i not in {x1_id, y1_id, x2_id}:
29 |             max_sim = cossim
30 |             max_id = i
31 |     return myvocab[max_id]
32 | 
33 | 
34 | print(get_counterpart('man', 'woman', 'king'))
35 | print(get_counterpart('more', 'less', 'long'))
36 | print(get_counterpart('apple', 'red', 'banana'))
37 | 


--------------------------------------------------------------------------------
/dldemos/SentimentAnalysis/main.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | from torch.nn.utils.rnn import pad_sequence
  4 | from torch.utils.data import DataLoader, Dataset
  5 | from torchtext.data import get_tokenizer
  6 | from torchtext.vocab import GloVe
  7 | 
  8 | from dldemos.SentimentAnalysis.read_imdb import read_imdb
  9 | 
 10 | GLOVE_DIM = 100
 11 | GLOVE = GloVe(name='6B', dim=GLOVE_DIM)
 12 | 
 13 | 
 14 | class IMDBDataset(Dataset):
 15 | 
 16 |     def __init__(self, is_train=True, dir='data/aclImdb'):
 17 |         super().__init__()
 18 |         self.tokenizer = get_tokenizer('basic_english')
 19 |         pos_lines = read_imdb(dir, 'pos', is_train)
 20 |         neg_lines = read_imdb(dir, 'neg', is_train)
 21 |         self.lines = pos_lines + neg_lines
 22 |         self.pos_length = len(pos_lines)
 23 |         self.neg_length = len(neg_lines)
 24 | 
 25 |     def __len__(self):
 26 |         return self.pos_length + self.neg_length
 27 | 
 28 |     def __getitem__(self, index):
 29 |         sentence = self.tokenizer(self.lines[index])
 30 |         x = GLOVE.get_vecs_by_tokens(sentence)
 31 |         label = 1 if index < self.pos_length else 0
 32 |         return x, label
 33 | 
 34 | 
 35 | def get_dataloader(dir='data/aclImdb'):
 36 | 
 37 |     def collate_fn(batch):
 38 |         x, y = zip(*batch)
 39 |         x_pad = pad_sequence(x, batch_first=True)
 40 |         y = torch.Tensor(y)
 41 |         return x_pad, y
 42 | 
 43 |     train_dataloader = DataLoader(IMDBDataset(True, dir),
 44 |                                   batch_size=32,
 45 |                                   shuffle=True,
 46 |                                   collate_fn=collate_fn)
 47 |     test_dataloader = DataLoader(IMDBDataset(False, dir),
 48 |                                  batch_size=32,
 49 |                                  shuffle=True,
 50 |                                  collate_fn=collate_fn)
 51 |     return train_dataloader, test_dataloader
 52 | 
 53 | 
 54 | class RNN(torch.nn.Module):
 55 | 
 56 |     def __init__(self, hidden_units=64, dropout_rate=0.5):
 57 |         super().__init__()
 58 |         self.drop = nn.Dropout(dropout_rate)
 59 |         self.rnn = nn.GRU(GLOVE_DIM, hidden_units, 1, batch_first=True)
 60 |         self.linear = nn.Linear(hidden_units, 1)
 61 |         self.sigmoid = nn.Sigmoid()
 62 | 
 63 |     def forward(self, x: torch.Tensor):
 64 |         # x shape: [batch, max_word_length, embedding_length]
 65 |         emb = self.drop(x)
 66 |         output, _ = self.rnn(emb)
 67 |         output = output[:, -1]
 68 |         output = self.linear(output)
 69 |         output = self.sigmoid(output)
 70 | 
 71 |         return output
 72 | 
 73 | 
 74 | def main():
 75 |     device = 'cuda:0'
 76 |     train_dataloader, test_dataloader = get_dataloader()
 77 |     model = RNN().to(device)
 78 | 
 79 |     # train
 80 | 
 81 |     optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
 82 |     citerion = torch.nn.BCELoss()
 83 |     for epoch in range(100):
 84 | 
 85 |         loss_sum = 0
 86 |         dataset_len = len(train_dataloader.dataset)
 87 | 
 88 |         for x, y in train_dataloader:
 89 |             batchsize = y.shape[0]
 90 |             x = x.to(device)
 91 |             y = y.to(device)
 92 |             hat_y = model(x)
 93 |             hat_y = hat_y.squeeze(-1)
 94 |             loss = citerion(hat_y, y)
 95 | 
 96 |             optimizer.zero_grad()
 97 |             loss.backward()
 98 |             torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5)
 99 |             optimizer.step()
100 | 
101 |             loss_sum += loss * batchsize
102 | 
103 |         print(f'Epoch {epoch}. loss: {loss_sum / dataset_len}')
104 | 
105 |     torch.save(model.state_dict(), 'dldemos/SentimentAnalysis/rnn.pth')
106 | 
107 |     # test
108 | 
109 |     # model.load_state_dict(
110 |     #     torch.load('dldemos/SentimentAnalysis/rnn.pth', 'cuda:0'))
111 | 
112 |     accuracy = 0
113 |     dataset_len = len(test_dataloader.dataset)
114 |     model.eval()
115 |     for x, y in test_dataloader:
116 |         x = x.to(device)
117 |         y = y.to(device)
118 |         with torch.no_grad():
119 |             hat_y = model(x)
120 |         hat_y.squeeze_(1)
121 |         predictions = torch.where(hat_y > 0.5, 1, 0)
122 |         score = torch.sum(torch.where(predictions == y, 1, 0))
123 |         accuracy += score.item()
124 |     accuracy /= dataset_len
125 | 
126 |     print(f'Accuracy: {accuracy}')
127 | 
128 |     # Inference
129 |     tokenizer = get_tokenizer('basic_english')
130 |     article = 'U.S. stock indexes fell Tuesday, driven by expectations for ' \
131 |         'tighter Federal Reserve policy and an energy crisis in Europe. ' \
132 |         'Stocks around the globe have come under pressure in recent weeks ' \
133 |         'as worries about tighter monetary policy in the U.S. and a '\
134 |         'darkening economic outlook in Europe have led investors to '\
135 |         'sell riskier assets.'
136 | 
137 |     x = GLOVE.get_vecs_by_tokens(tokenizer(article)).unsqueeze(0).to(device)
138 |     with torch.no_grad():
139 |         hat_y = model(x)
140 |     hat_y = hat_y.squeeze_().item()
141 |     result = 'positive' if hat_y > 0.5 else 'negative'
142 |     print(result)
143 | 
144 | 
145 | if __name__ == '__main__':
146 |     main()
147 | 


--------------------------------------------------------------------------------
/dldemos/SentimentAnalysis/read_imdb.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from torchtext.data import get_tokenizer
 4 | 
 5 | 
 6 | def read_imdb(dir='data/aclImdb', split='pos', is_train=True):
 7 |     subdir = 'train' if is_train else 'test'
 8 |     dir = os.path.join(dir, subdir, split)
 9 |     lines = []
10 |     for file in os.listdir(dir):
11 |         with open(os.path.join(dir, file), 'rb') as f:
12 |             line = f.read().decode('utf-8')
13 |             lines.append(line)
14 |     return lines
15 | 
16 | 
17 | def main():
18 |     lines = read_imdb()
19 |     print('Length of the file:', len(lines))
20 |     print('lines[0]:', lines[0])
21 |     tokenizer = get_tokenizer('basic_english')
22 |     tokens = tokenizer(lines[0])
23 |     print('lines[0] tokens:', tokens)
24 | 
25 | 
26 | if __name__ == '__main__':
27 |     main()
28 | 


--------------------------------------------------------------------------------
/dldemos/ShallowNetwork/README.md:
--------------------------------------------------------------------------------
 1 | 1. Install the repository
 2 | 
 3 | ```shell
 4 | python setup.py develop
 5 | ```
 6 | 
 7 | 2. Run the following command in the repository root dir:
 8 | 
 9 | ```shell
10 | python dldemos/ShallowNetwork/model.py
11 | ```
12 | 
13 | 3. You can read the source code in `dldemos/ShallowNetwork/model.py`, `dldemos\ShallowNetwork\genereate_points.py` and modify the hyper-parameters and the dataset.
14 | 


--------------------------------------------------------------------------------
/dldemos/ShallowNetwork/genereate_points.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | import numpy as np
 3 | from numpy.random import default_rng
 4 | 
 5 | 
 6 | def vertical_flip():
 7 |     return np.array([[1, 0], [0, -1]])
 8 | 
 9 | 
10 | def rotate(theta):
11 |     return np.array([[np.cos(theta), -np.sin(theta)],
12 |                      [np.sin(theta), np.cos(theta)]])
13 | 
14 | 
15 | def half_oval(cnt, h=10, w=20):
16 |     x = np.linspace(-w, w, cnt)
17 |     y = np.sqrt(h * h * (1 - x * x / w / w))
18 |     return np.stack((x, y), 1)
19 | 
20 | 
21 | def generate_point_set():
22 |     petal1 = half_oval(20)
23 |     petal2 = np.dot(half_oval(20), vertical_flip().T)
24 |     petal = np.concatenate((petal1, petal2), 0)
25 |     petal += [25, 0]
26 |     flower = petal.copy()
27 |     for i in range(5):
28 |         new_petal = np.dot(petal.copy(), rotate(np.radians(60) * (i + 1)).T)
29 |         flower = np.concatenate((flower, new_petal), 0)
30 | 
31 |     label = np.zeros([40 * 6])
32 |     label[0:40] = 1
33 |     label[40:80] = 1
34 |     label[120:160] = 1
35 | 
36 |     rng = default_rng()
37 |     noise_indice1 = rng.choice(40 * 6, 10, replace=False)
38 |     label[noise_indice1] = 1 - label[noise_indice1]
39 | 
40 |     x = flower[:, 0]
41 |     y = flower[:, 1]
42 |     return x, y, label
43 | 
44 | 
45 | def generate_plot_set():
46 |     x = np.linspace(-50, 50, 100)
47 |     y = np.linspace(-50, 50, 100)
48 |     xx, yy = np.meshgrid(x, y)
49 |     xx = xx.reshape(-1)
50 |     yy = yy.reshape(-1)
51 |     return np.stack((xx, yy), axis=1).T
52 | 
53 | 
54 | def visualize(X, Y, plot_set_result: np.ndarray):
55 |     x = np.linspace(-50, 50, 100)
56 |     y = np.linspace(-50, 50, 100)
57 |     xx, yy = np.meshgrid(x, y)
58 |     color = plot_set_result.squeeze()
59 |     c = np.where(color < 0.5, 'r', 'g')
60 |     plt.scatter(xx, yy, c=c, marker='s')
61 | 
62 |     plt.xlim(-50, 50)
63 |     plt.ylim(-50, 50)
64 | 
65 |     origin_x = X[:, 0]
66 |     origin_y = X[:, 1]
67 |     origin_color = np.where(Y.squeeze() < 0.5, '#AA0000', '#00AA00')
68 | 
69 |     plt.scatter(origin_x, origin_y, c=origin_color)
70 | 
71 |     plt.show()
72 | 
73 | 
74 | if __name__ == '__main__':
75 |     x, y, label = generate_point_set()
76 |     c = np.where(label == 0, 'r', 'g')
77 |     plt.scatter(x, y, c=c)
78 | 
79 |     plt.xlim(-50, 50)
80 |     plt.ylim(-50, 50)
81 | 
82 |     plt.show()
83 | 


--------------------------------------------------------------------------------
/dldemos/ShallowNetwork/model.py:
--------------------------------------------------------------------------------
  1 | import abc
  2 | 
  3 | import numpy as np
  4 | 
  5 | from dldemos.utils import relu, relu_de, sigmoid
  6 | 
  7 | 
  8 | class BaseRegressionModel(metaclass=abc.ABCMeta):
  9 |     # Use Cross Entropy as the cost function
 10 | 
 11 |     def __init__(self):
 12 |         pass
 13 | 
 14 |     @abc.abstractmethod
 15 |     def forward(self, X, train_mode=True):
 16 |         # if self.train_mode:
 17 |         # forward_train()
 18 |         # else:
 19 |         # forward_test()
 20 |         pass
 21 | 
 22 |     @abc.abstractmethod
 23 |     def backward(self, Y):
 24 |         pass
 25 | 
 26 |     @abc.abstractmethod
 27 |     def gradient_descent(self, learning_rate=0.001):
 28 |         pass
 29 | 
 30 |     def loss(self, Y_hat, Y):
 31 |         return np.mean(-(Y * np.log(Y_hat) + (1 - Y) * np.log(1 - Y_hat)))
 32 | 
 33 |     def evaluate(self, X, Y):
 34 |         Y_hat = self.forward(X, train_mode=False)
 35 |         predicts = np.where(Y_hat > 0.5, 1, 0)
 36 |         score = np.mean(np.where(predicts == Y, 1, 0))
 37 |         print(f'Accuracy: {score}')
 38 | 
 39 | 
 40 | class LogisticRegression(BaseRegressionModel):
 41 | 
 42 |     def __init__(self, n_x):
 43 |         super().__init__()
 44 |         self.n_x = n_x
 45 |         self.w = np.zeros((n_x, 1))
 46 |         self.b = 0
 47 | 
 48 |     def forward(self, X, train_mode=True):
 49 |         Z = np.dot(self.w.T, X) + self.b
 50 |         A = sigmoid(Z)  # hat_Y = A
 51 |         if train_mode:
 52 |             self.m_cache = X.shape[1]
 53 |             self.X_cache = X
 54 |             self.A_cache = A
 55 |         return A
 56 | 
 57 |     def backward(self, Y):
 58 |         d_Z = self.A_cache - Y
 59 |         d_w = np.dot(self.X_cache, d_Z.T) / self.m_cache
 60 |         d_b = np.mean(d_Z)
 61 |         self.d_w_cache = d_w
 62 |         self.d_b_cache = d_b
 63 | 
 64 |     def gradient_descent(self, learning_rate=0.001):
 65 |         self.w -= learning_rate * self.d_w_cache
 66 |         self.b -= learning_rate * self.d_b_cache
 67 | 
 68 | 
 69 | class ShallowNetwork(BaseRegressionModel):
 70 |     # x -> hidden layer -> output layer -> y
 71 |     # hidden layer (n_1 relu)
 72 |     # output layer (1 sigmoid)
 73 |     def __init__(self, n_x, n_1):
 74 |         super().__init__()
 75 |         self.n_x = n_x
 76 |         self.n_1 = n_1
 77 |         self.W1 = np.random.randn(n_1, n_x) * 0.01
 78 |         self.b1 = np.zeros((n_1, 1))
 79 |         self.W2 = np.random.randn(1, n_1) * 0.01
 80 |         self.b2 = np.zeros((1, 1))
 81 | 
 82 |     def forward(self, X, train_mode=True):
 83 |         Z1 = np.dot(self.W1, X) + self.b1
 84 |         A1 = relu(Z1)
 85 |         Z2 = np.dot(self.W2, A1) + self.b2
 86 |         A2 = sigmoid(Z2)
 87 |         if train_mode:
 88 |             self.m_cache = X.shape[1]
 89 |             self.X_cache = X
 90 |             self.Z1_cache = Z1
 91 |             self.A1_cache = A1
 92 |             self.A2_cache = A2
 93 |         return A2
 94 | 
 95 |     def backward(self, Y):
 96 |         dZ2 = self.A2_cache - Y
 97 |         dW2 = np.dot(dZ2, self.A1_cache.T) / self.m_cache
 98 |         db2 = np.sum(dZ2, axis=1, keepdims=True) / self.m_cache
 99 |         dA1 = np.dot(self.W2.T, dZ2)
100 | 
101 |         dZ1 = dA1 * relu_de(self.Z1_cache)
102 |         dW1 = np.dot(dZ1, self.X_cache.T) / self.m_cache
103 |         db1 = np.sum(dZ1, axis=1, keepdims=True) / self.m_cache
104 | 
105 |         self.dW2_cache = dW2
106 |         self.dW1_cache = dW1
107 |         self.db2_cache = db2
108 |         self.db1_cache = db1
109 | 
110 |     def gradient_descent(self, learning_rate=0.001):
111 |         self.W1 -= learning_rate * self.dW1_cache
112 |         self.b1 -= learning_rate * self.db1_cache
113 |         self.W2 -= learning_rate * self.dW2_cache
114 |         self.b2 -= learning_rate * self.db2_cache
115 | 
116 | 
117 | def train_model(model: BaseRegressionModel,
118 |                 X_train,
119 |                 Y_train,
120 |                 X_test,
121 |                 Y_test,
122 |                 steps=1000,
123 |                 learning_rate=0.001,
124 |                 print_interval=100):
125 |     for step in range(steps):
126 |         Y_hat = model.forward(X_train)
127 |         model.backward(Y_train)
128 |         model.gradient_descent(learning_rate)
129 |         if step % print_interval == 0:
130 |             train_loss = model.loss(Y_hat, Y_train)
131 |             print(f'Step {step}')
132 |             print(f'Train loss: {train_loss}')
133 |             model.evaluate(X_test, Y_test)
134 | 


--------------------------------------------------------------------------------
/dldemos/ShallowNetwork/plot_activation_func.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | import numpy as np
 3 | 
 4 | 
 5 | def sigmoid(x):
 6 |     return 1 / (1 + np.exp(-x))
 7 | 
 8 | 
 9 | def tanh(x):
10 |     return (np.exp(x) - np.exp(-x)) / (np.exp(x) + np.exp(-x))
11 | 
12 | 
13 | def relu(x):
14 |     return np.maximum(x, 0)
15 | 
16 | 
17 | def leaky_relu(x):
18 |     return np.maximum(x, 0.1 * x)
19 | 
20 | 
21 | x = np.linspace(-3, 3, 100)
22 | y1 = sigmoid(x)
23 | y2 = tanh(x)
24 | y3 = relu(x)
25 | y4 = leaky_relu(x)
26 | 
27 | plt.subplot(2, 2, 1)
28 | plt.axvline(x=0, color='k')
29 | plt.axhline(y=0, color='k')
30 | plt.plot(x, y1)
31 | plt.title('sigmoid')
32 | 
33 | plt.subplot(2, 2, 2)
34 | plt.axhline(y=0, color='k')
35 | plt.axvline(x=0, color='k')
36 | plt.plot(x, y2)
37 | plt.title('tanh')
38 | 
39 | plt.subplot(2, 2, 3)
40 | plt.axhline(y=0, color='k')
41 | plt.axvline(x=0, color='k')
42 | plt.plot(x, y3)
43 | plt.title('relu')
44 | 
45 | plt.subplot(2, 2, 4)
46 | plt.axhline(y=0, color='k')
47 | plt.axvline(x=0, color='k')
48 | plt.plot(x, y4)
49 | plt.title('leaky_relu')
50 | 
51 | plt.show()
52 | 


--------------------------------------------------------------------------------
/dldemos/ShallowNetwork/points_classification.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from dldemos.ShallowNetwork.genereate_points import (generate_plot_set,
 4 |                                                      generate_point_set,
 5 |                                                      visualize)
 6 | from dldemos.ShallowNetwork.model import (LogisticRegression, ShallowNetwork,
 7 |                                           train_model)
 8 | 
 9 | 
10 | def main():
11 |     x, y, label = generate_point_set()
12 |     # x: [240]
13 |     # y: [240]
14 |     # label: [240]
15 | 
16 |     X = np.stack((x, y), axis=1)
17 |     Y = np.expand_dims(label, axis=1)
18 |     # X: [240, 2]
19 |     # Y: [240, 1]
20 | 
21 |     indices = np.random.permutation(X.shape[0])
22 |     X_train = X[indices[0:200], :].T
23 |     Y_train = Y[indices[0:200], :].T
24 |     X_test = X[indices[200:], :].T
25 |     Y_test = Y[indices[200:], :].T
26 |     # X_train: [2, 200]
27 |     # Y_train: [1, 200]
28 |     # X_test: [2, 40]
29 |     # Y_test: [1, 40]
30 | 
31 |     n_x = 2
32 | 
33 |     model1 = LogisticRegression(n_x)
34 |     model2 = ShallowNetwork(n_x, 2)
35 |     model3 = ShallowNetwork(n_x, 4)
36 |     model4 = ShallowNetwork(n_x, 10)
37 |     train_model(model1, X_train, Y_train, X_test, Y_test, 500, 0.0001, 50)
38 |     train_model(model2, X_train, Y_train, X_test, Y_test, 2000, 0.01, 100)
39 |     train_model(model3, X_train, Y_train, X_test, Y_test, 5000, 0.01, 500)
40 |     train_model(model4, X_train, Y_train, X_test, Y_test, 5000, 0.01, 500)
41 | 
42 |     visualize_X = generate_plot_set()
43 |     plot_result = model4.forward(visualize_X, train_mode=False)
44 |     visualize(X, Y, plot_result)
45 | 
46 | 
47 | if __name__ == '__main__':
48 |     main()
49 | 


--------------------------------------------------------------------------------
/dldemos/StyleTransfer/README.md:
--------------------------------------------------------------------------------
1 | Install PyTorch and run the scripts `copy_img.py`, `combine_img.py`, `style_transfer.py`.
2 | 


--------------------------------------------------------------------------------
/dldemos/StyleTransfer/combine_img.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn.functional as F
 3 | import torch.optim as optim
 4 | import torchvision.transforms as transforms
 5 | from PIL import Image
 6 | 
 7 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 8 | 
 9 | img_size = (256, 256)
10 | 
11 | 
12 | def read_image(image_path):
13 |     pipeline = transforms.Compose(
14 |         [transforms.Resize((img_size)),
15 |          transforms.ToTensor()])
16 | 
17 |     img = Image.open(image_path)
18 |     img = pipeline(img).unsqueeze(0)
19 |     return img.to(device, torch.float)
20 | 
21 | 
22 | def save_image(tensor, image_path):
23 |     toPIL = transforms.ToPILImage()
24 |     img = tensor.detach().cpu().clone()
25 |     img = img.squeeze(0)
26 |     img = toPIL(img)
27 |     img.save(image_path)
28 | 
29 | 
30 | style_img = read_image('dldemos/StyleTransfer/picasso.jpg')
31 | content_img = read_image('dldemos/StyleTransfer/dancing.jpg')
32 | 
33 | input_img = torch.randn(1, 3, *img_size, device=device)
34 | input_img.requires_grad_(True)
35 | optimizer = optim.LBFGS([input_img])
36 | steps = 0
37 | while steps <= 10:
38 | 
39 |     def closure():
40 |         global steps
41 |         optimizer.zero_grad()
42 |         loss = F.mse_loss(input_img, style_img) + F.mse_loss(
43 |             input_img, content_img)
44 |         loss.backward()
45 |         steps += 1
46 |         if steps % 1 == 0:
47 |             print(f'Step {steps}:')
48 |             print(f'Loss: {loss}')
49 | 
50 |         return loss
51 | 
52 |     optimizer.step(closure)
53 | 
54 | save_image(input_img, 'work_dirs/output.jpg')
55 | 


--------------------------------------------------------------------------------
/dldemos/StyleTransfer/copy_img.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn.functional as F
 3 | import torch.optim as optim
 4 | import torchvision.transforms as transforms
 5 | from PIL import Image
 6 | 
 7 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 8 | 
 9 | img_size = (256, 256)
10 | 
11 | 
12 | def read_image(image_path):
13 |     pipeline = transforms.Compose(
14 |         [transforms.Resize((img_size)),
15 |          transforms.ToTensor()])
16 | 
17 |     img = Image.open(image_path)
18 |     img = pipeline(img).unsqueeze(0)
19 |     return img.to(device, torch.float)
20 | 
21 | 
22 | def save_image(tensor, image_path):
23 |     toPIL = transforms.ToPILImage()
24 |     img = tensor.detach().cpu().clone()
25 |     img = img.squeeze(0)
26 |     img = toPIL(img)
27 |     img.save(image_path)
28 | 
29 | 
30 | style_img = read_image('dldemos/StyleTransfer/picasso.jpg')
31 | content_img = read_image('dldemos/StyleTransfer/dancing.jpg')
32 | 
33 | input_img = torch.randn(1, 3, *img_size, device=device)
34 | input_img.requires_grad_(True)
35 | optimizer = optim.LBFGS([input_img])
36 | steps = 0
37 | while steps <= 10:
38 | 
39 |     def closure():
40 |         global steps
41 |         optimizer.zero_grad()
42 |         loss = F.mse_loss(input_img, style_img)
43 |         loss.backward()
44 |         steps += 1
45 |         if steps % 5 == 0:
46 |             print(f'Step {steps}:')
47 |             print(f'Loss: {loss}')
48 | 
49 |         return loss
50 | 
51 |     optimizer.step(closure)
52 | 
53 | save_image(input_img, 'work_dirs/output.jpg')
54 | 


--------------------------------------------------------------------------------
/dldemos/StyleTransfer/dancing.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SingleZombie/DL-Demos/739a21ff90f411c318e098823581afb3f8a1d010/dldemos/StyleTransfer/dancing.jpg


--------------------------------------------------------------------------------
/dldemos/StyleTransfer/picasso.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SingleZombie/DL-Demos/739a21ff90f411c318e098823581afb3f8a1d010/dldemos/StyleTransfer/picasso.jpg


--------------------------------------------------------------------------------
/dldemos/StyleTransfer/style_transfer.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn.functional as F
  3 | import torch.optim as optim
  4 | import torchvision.models as models
  5 | import torchvision.transforms as transforms
  6 | from PIL import Image
  7 | 
  8 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
  9 | 
 10 | img_size = (256, 256)
 11 | 
 12 | 
 13 | def read_image(image_path):
 14 |     pipeline = transforms.Compose(
 15 |         [transforms.Resize((img_size)),
 16 |          transforms.ToTensor()])
 17 | 
 18 |     img = Image.open(image_path).convert('RGB')
 19 |     img = pipeline(img).unsqueeze(0)
 20 |     return img.to(device, torch.float)
 21 | 
 22 | 
 23 | def save_image(tensor, image_path):
 24 |     toPIL = transforms.ToPILImage()
 25 |     img = tensor.detach().cpu().clone()
 26 |     img = img.squeeze(0)
 27 |     img = toPIL(img)
 28 |     img.save(image_path)
 29 | 
 30 | 
 31 | # Hyperparameters
 32 | style_img = read_image('dldemos/StyleTransfer/picasso.jpg')
 33 | content_img = read_image('dldemos/StyleTransfer/dancing.jpg')
 34 | 
 35 | default_content_layers = ['conv_4']
 36 | default_style_layers = ['conv_1', 'conv_2', 'conv_3', 'conv_4', 'conv_5']
 37 | style_weight = 1e4
 38 | content_weight = 1
 39 | 
 40 | 
 41 | class ContentLoss(torch.nn.Module):
 42 | 
 43 |     def __init__(self, target: torch.Tensor):
 44 |         super().__init__()
 45 |         self.target = target.detach()
 46 | 
 47 |     def forward(self, input):
 48 |         self.loss = F.mse_loss(input, self.target)
 49 |         return input
 50 | 
 51 | 
 52 | def gram(x: torch.Tensor):
 53 |     # x is a [n, c, h, w] array
 54 |     n, c, h, w = x.shape
 55 | 
 56 |     features = x.reshape(n * c, h * w)
 57 |     features = torch.mm(features, features.T) / n / c / h / w
 58 |     return features
 59 | 
 60 | 
 61 | class StyleLoss(torch.nn.Module):
 62 | 
 63 |     def __init__(self, target: torch.Tensor):
 64 |         super().__init__()
 65 |         self.target = gram(target.detach()).detach()
 66 | 
 67 |     def forward(self, input):
 68 |         G = gram(input)
 69 |         self.loss = F.mse_loss(G, self.target)
 70 |         return input
 71 | 
 72 | 
 73 | class Normalization(torch.nn.Module):
 74 | 
 75 |     def __init__(self, mean, std):
 76 |         super().__init__()
 77 |         self.mean = torch.tensor(mean).to(device).reshape(-1, 1, 1)
 78 |         self.std = torch.tensor(std).to(device).reshape(-1, 1, 1)
 79 | 
 80 |     def forward(self, img):
 81 |         return (img - self.mean) / self.std
 82 | 
 83 | 
 84 | def get_model_and_losses(content_img, style_img, content_layers, style_layers):
 85 |     num_loss = 0
 86 |     expected_num_loss = len(content_layers) + len(style_layers)
 87 |     content_losses = []
 88 |     style_losses = []
 89 | 
 90 |     model = torch.nn.Sequential(
 91 |         Normalization([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]))
 92 |     cnn = models.vgg19(pretrained=True).features.to(device).eval()
 93 |     i = 0
 94 |     for layer in cnn.children():
 95 |         if isinstance(layer, torch.nn.Conv2d):
 96 |             i += 1
 97 |             name = f'conv_{i}'
 98 |         elif isinstance(layer, torch.nn.ReLU):
 99 |             name = f'relu_{i}'
100 |             layer = torch.nn.ReLU(inplace=False)
101 |         elif isinstance(layer, torch.nn.MaxPool2d):
102 |             name = f'pool_{i}'
103 |         elif isinstance(layer, torch.nn.BatchNorm2d):
104 |             name = f'bn_{i}'
105 |         else:
106 |             raise RuntimeError(
107 |                 f'Unrecognized layer: {layer.__class__.__name__}')
108 | 
109 |         model.add_module(name, layer)
110 | 
111 |         if name in content_layers:
112 |             # add content loss:
113 |             target = model(content_img)
114 |             content_loss = ContentLoss(target)
115 |             model.add_module(f'content_loss_{i}', content_loss)
116 |             content_losses.append(content_loss)
117 |             num_loss += 1
118 | 
119 |         if name in style_layers:
120 |             target_feature = model(style_img)
121 |             style_loss = StyleLoss(target_feature)
122 |             model.add_module(f'style_loss_{i}', style_loss)
123 |             style_losses.append(style_loss)
124 |             num_loss += 1
125 | 
126 |         if num_loss >= expected_num_loss:
127 |             break
128 | 
129 |     return model, content_losses, style_losses
130 | 
131 | 
132 | input_img = torch.randn(1, 3, *img_size, device=device)
133 | model, content_losses, style_losses = get_model_and_losses(
134 |     content_img, style_img, default_content_layers, default_style_layers)
135 | 
136 | input_img.requires_grad_(True)
137 | model.requires_grad_(False)
138 | 
139 | optimizer = optim.LBFGS([input_img])
140 | steps = 0
141 | prev_loss = 0
142 | while steps <= 1000 and prev_loss < 100:
143 | 
144 |     def closure():
145 |         with torch.no_grad():
146 |             input_img.clamp_(0, 1)
147 |         global steps
148 |         global prev_loss
149 |         optimizer.zero_grad()
150 |         model(input_img)
151 |         content_loss = 0
152 |         style_loss = 0
153 |         for ls in content_losses:
154 |             content_loss += ls.loss
155 |         for ls in style_losses:
156 |             style_loss += ls.loss
157 |         loss = content_weight * content_loss + style_weight * style_loss
158 |         loss.backward()
159 |         steps += 1
160 |         if steps % 50 == 0:
161 |             print(f'Step {steps}:')
162 |             print(f'Loss: {loss}')
163 |             save_image(input_img, f'work_dirs/output_{steps}.jpg')
164 |         prev_loss = loss
165 |         return loss
166 | 
167 |     optimizer.step(closure)
168 | with torch.no_grad():
169 |     input_img.clamp_(0, 1)
170 | save_image(input_img, 'work_dirs/output.jpg')
171 | 


--------------------------------------------------------------------------------
/dldemos/Transformer/data_load.py:
--------------------------------------------------------------------------------
  1 | # Modify from
  2 | # https://github.com/P3n9W31/transformer-pytorch/master/data_load.py
  3 | 
  4 | import codecs
  5 | import os
  6 | import random
  7 | 
  8 | import numpy as np
  9 | import regex
 10 | import requests
 11 | 
 12 | # Words whose occurred less than min_cnt are encoded as <UNK>.
 13 | min_cnt = 0
 14 | # Maximum number of words in a sentence.
 15 | maxlen = 50
 16 | 
 17 | source_train = 'dldemos/Transformer/data/cn.txt'
 18 | target_train = 'dldemos/Transformer/data/en.txt'
 19 | source_test = 'dldemos/Transformer/data/cn.test.txt'
 20 | target_test = 'dldemos/Transformer/data/en.test.txt'
 21 | 
 22 | 
 23 | def load_vocab(language):
 24 |     assert language in ['cn', 'en']
 25 |     vocab = [
 26 |         line.split()[0] for line in codecs.open(
 27 |             'dldemos/Transformer/data/{}.txt.vocab.tsv'.format(language), 'r',
 28 |             'utf-8').read().splitlines() if int(line.split()[1]) >= min_cnt
 29 |     ]
 30 |     word2idx = {word: idx for idx, word in enumerate(vocab)}
 31 |     idx2word = {idx: word for idx, word in enumerate(vocab)}
 32 |     return word2idx, idx2word
 33 | 
 34 | 
 35 | def load_cn_vocab():
 36 |     word2idx, idx2word = load_vocab('cn')
 37 |     return word2idx, idx2word
 38 | 
 39 | 
 40 | def load_en_vocab():
 41 |     word2idx, idx2word = load_vocab('en')
 42 |     return word2idx, idx2word
 43 | 
 44 | 
 45 | def create_data(source_sents, target_sents):
 46 |     cn2idx, idx2cn = load_cn_vocab()
 47 |     en2idx, idx2en = load_en_vocab()
 48 | 
 49 |     # Index
 50 |     x_list, y_list, Sources, Targets = [], [], [], []
 51 |     for source_sent, target_sent in zip(source_sents, target_sents):
 52 |         x = [
 53 |             cn2idx.get(word, 1)
 54 |             for word in ('<S> ' + source_sent + ' </S>').split()
 55 |         ]  # 1: OOV, </S>: End of Text
 56 |         y = [
 57 |             en2idx.get(word, 1)
 58 |             for word in ('<S> ' + target_sent + ' </S>').split()
 59 |         ]
 60 |         if max(len(x), len(y)) <= maxlen:
 61 |             x_list.append(np.array(x))
 62 |             y_list.append(np.array(y))
 63 |             Sources.append(source_sent)
 64 |             Targets.append(target_sent)
 65 | 
 66 |     # Pad
 67 |     X = np.zeros([len(x_list), maxlen], np.int32)
 68 |     Y = np.zeros([len(y_list), maxlen], np.int32)
 69 |     for i, (x, y) in enumerate(zip(x_list, y_list)):
 70 |         X[i] = np.lib.pad(x, [0, maxlen - len(x)],
 71 |                           'constant',
 72 |                           constant_values=(0, 0))
 73 |         Y[i] = np.lib.pad(y, [0, maxlen - len(y)],
 74 |                           'constant',
 75 |                           constant_values=(0, 0))
 76 | 
 77 |     return X, Y, Sources, Targets
 78 | 
 79 | 
 80 | def load_data(data_type):
 81 |     if data_type == 'train':
 82 |         source, target = source_train, target_train
 83 |     elif data_type == 'test':
 84 |         source, target = source_test, target_test
 85 |     assert data_type in ['train', 'test']
 86 |     cn_sents = [
 87 |         regex.sub("[^\s\p{L}']", '', line)  # noqa W605
 88 |         for line in codecs.open(source, 'r', 'utf-8').read().split('\n')
 89 |         if line and line[0] != '<'
 90 |     ]
 91 |     en_sents = [
 92 |         regex.sub("[^\s\p{L}']", '', line)  # noqa W605
 93 |         for line in codecs.open(target, 'r', 'utf-8').read().split('\n')
 94 |         if line and line[0] != '<'
 95 |     ]
 96 | 
 97 |     X, Y, Sources, Targets = create_data(cn_sents, en_sents)
 98 |     return X, Y, Sources, Targets
 99 | 
100 | 
101 | def load_train_data():
102 |     X, Y, _, _ = load_data('train')
103 |     return X, Y
104 | 
105 | 
106 | def load_test_data():
107 |     X, Y, _, _ = load_data('test')
108 |     return X, Y
109 | 
110 | 
111 | def get_batch_indices(total_length, batch_size):
112 |     assert (batch_size <=
113 |             total_length), ('Batch size is large than total data length.'
114 |                             'Check your data or change batch size.')
115 |     current_index = 0
116 |     indexs = [i for i in range(total_length)]
117 |     random.shuffle(indexs)
118 |     while 1:
119 |         if current_index + batch_size >= total_length:
120 |             break
121 |         current_index += batch_size
122 |         yield indexs[current_index:current_index + batch_size], current_index
123 | 
124 | 
125 | def idx_to_sentence(arr, vocab, insert_space=False):
126 |     res = ''
127 |     first_word = True
128 |     for id in arr:
129 |         word = vocab[id.item()]
130 | 
131 |         if insert_space and not first_word:
132 |             res += ' '
133 |         first_word = False
134 | 
135 |         res += word
136 | 
137 |     return res
138 | 
139 | 
140 | def download(url, dir, name=None):
141 |     os.makedirs(dir, exist_ok=True)
142 |     if name is None:
143 |         name = url.split('/')[-1]
144 |     path = os.path.join(dir, name)
145 |     if not os.path.exists(path):
146 |         print(f'Install {name} ...')
147 |         open(path, 'wb').write(requests.get(url).content)
148 |         print('Install successfully.')
149 | 
150 | 
151 | def download_data():
152 |     data_dir = 'dldemos/Transformer/data'
153 |     urls = [('https://raw.githubusercontent.com/P3n9W31/transformer-pytorch/'
154 |              'master/corpora/cn.txt'),
155 |             ('https://raw.githubusercontent.com/P3n9W31/transformer-pytorch/'
156 |              'master/corpora/en.txt'),
157 |             ('https://raw.githubusercontent.com/P3n9W31/transformer-pytorch/'
158 |              'master/preprocessed/cn.txt.vocab.tsv'),
159 |             ('https://raw.githubusercontent.com/P3n9W31/transformer-pytorch/'
160 |              'master/preprocessed/en.txt.vocab.tsv')]
161 |     for url in urls:
162 |         download(url, data_dir)
163 | 
164 | 
165 | if __name__ == '__main__':
166 |     download_data()
167 | 


--------------------------------------------------------------------------------
/dldemos/Transformer/outdated/dataset.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | from collections import Counter
  3 | 
  4 | import numpy as np
  5 | from torchtext.data import get_tokenizer
  6 | 
  7 | SOS_ID = 0
  8 | EOS_ID = 1
  9 | UNK_ID = 2
 10 | PAD_ID = 3
 11 | MAX_SEQ_LEN = 200
 12 | 
 13 | 
 14 | def read_file(json_path):
 15 |     import jieba
 16 |     english_sentences = []
 17 |     chinese_sentences = []
 18 |     tokenizer = get_tokenizer('basic_english')
 19 |     with open(json_path, 'r') as fp:
 20 |         for line in fp:
 21 |             line = json.loads(line)
 22 |             english, chinese = line['english'], line['chinese']
 23 |             # Correct mislabeled data
 24 |             if not english.isascii():
 25 |                 english, chinese = chinese, english
 26 |             # Tokenize
 27 |             english = tokenizer(english)
 28 |             chinese = list(jieba.cut(chinese))
 29 |             chinese = [x for x in chinese if x not in {' ', '\t'}]
 30 |             english_sentences.append(english)
 31 |             chinese_sentences.append(chinese)
 32 |     return english_sentences, chinese_sentences
 33 | 
 34 | 
 35 | def create_vocab(sentences, max_element=None):
 36 |     """Note that max_element includes special characters."""
 37 | 
 38 |     default_list = ['<sos>', '<eos>', '<unk>', '<pad>']
 39 | 
 40 |     char_set = Counter()
 41 |     for sentence in sentences:
 42 |         c_set = Counter(sentence)
 43 |         char_set.update(c_set)
 44 | 
 45 |     if max_element is None:
 46 |         return default_list + list(char_set.keys())
 47 |     else:
 48 |         max_element -= 4
 49 |         words_freq = char_set.most_common(max_element)
 50 |         # pair array to double array
 51 |         words, freq = zip(*words_freq)
 52 |         return default_list + list(words)
 53 | 
 54 | 
 55 | def sentence_to_tensor(sentences, vocab):
 56 |     vocab_map = {k: i for i, k in enumerate(vocab)}
 57 | 
 58 |     def process_word(word):
 59 |         return vocab_map.get(word, UNK_ID)
 60 | 
 61 |     res = []
 62 |     for sentence in sentences:
 63 |         sentence = np.array(list(map(process_word, sentence)), dtype=np.int32)
 64 |         res.append(sentence)
 65 | 
 66 |     return np.array(res, dtype=object)
 67 | 
 68 | 
 69 | def tensor_to_sentence(tensor, mapping, insert_space=False):
 70 |     res = ''
 71 |     first_word = True
 72 |     for id in tensor:
 73 |         word = mapping[int(id.item())]
 74 | 
 75 |         if insert_space and not first_word:
 76 |             res += ' '
 77 |         first_word = False
 78 | 
 79 |         res += word
 80 | 
 81 |     return res
 82 | 
 83 | 
 84 | def main():
 85 |     en_sens, zh_sens = read_file(
 86 |         'data/translation2019zh/translation2019zh_valid.json')
 87 |     print(*en_sens[0:3])
 88 |     print(*zh_sens[0:3])
 89 |     en_vocab = create_vocab(en_sens, 10000)
 90 |     zh_vocab = create_vocab(zh_sens, 30000)
 91 |     print(list(en_vocab)[0:10])
 92 |     print(list(zh_vocab)[0:10])
 93 |     # np.save('data/translation2019zh/en_vocab.npy', en_vocab)
 94 |     # np.save('data/translation2019zh/zh_vocab.npy', zh_vocab)
 95 | 
 96 |     # en_vocab = np.load('data/translation2019zh/en_dict.npy')
 97 |     # zh_vocab = np.load('data/translation2019zh/zh_dict.npy')
 98 | 
 99 |     en_tensors = sentence_to_tensor(en_sens, en_vocab)
100 |     zh_tensors = sentence_to_tensor(zh_sens, zh_vocab)
101 | 
102 |     print(tensor_to_sentence(en_tensors[0], en_vocab, True))
103 |     print(tensor_to_sentence(zh_tensors[0], zh_vocab))
104 | 
105 |     # np.save('data/translation2019zh/en_sentences.npy', en_tensors)
106 |     # np.save('data/translation2019zh/zh_sentences.npy', zh_tensors)
107 | 
108 |     # en_tensors = np.load('data/translation2019zh/en_sentences.npy',
109 |     #                      allow_pickle=True)
110 |     # zh_tensors = np.load('data/translation2019zh/zh_sentences.npy',
111 |     #                      allow_pickle=True)
112 | 
113 | 
114 | if __name__ == '__main__':
115 |     main()
116 | 


--------------------------------------------------------------------------------
/dldemos/Transformer/outdated/dist_train.py:
--------------------------------------------------------------------------------
  1 | # import os
  2 | # import time
  3 | 
  4 | # import torch
  5 | # import torch.distributed as dist
  6 | # import torch.nn as nn
  7 | # from torch.nn.parallel import DistributedDataParallel
  8 | 
  9 | # from dldemos.Transformer.model import Transformer
 10 | # from dldemos.Transformer.preprocess_data import (PAD_ID, get_dataloader,
 11 | #                                                  load_sentences, load_vocab)
 12 | 
 13 | # # Config
 14 | # batch_size = 64
 15 | # lr = 0.0001
 16 | # d_model = 512
 17 | # d_ff = 2048
 18 | # n_layers = 6
 19 | # heads = 8
 20 | 
 21 | # n_epochs = 40
 22 | 
 23 | # def reduce_mean(tensor, nprocs):
 24 | #     rt = tensor.clone()
 25 | #     dist.all_reduce(rt, op=dist.ReduceOp.SUM)
 26 | #     rt /= nprocs
 27 | #     return rt
 28 | 
 29 | # def main():
 30 | #     dist.init_process_group('nccl')
 31 | #     rank = dist.get_rank()
 32 | #     device_id = rank % torch.cuda.device_count()
 33 | 
 34 | #     en_vocab, zh_vocab = load_vocab()
 35 | 
 36 | #     en_train, zh_train, en_valid, zh_valid = load_sentences()
 37 | #     dataloader_train, sampler = get_dataloader(en_train, zh_train,
 38 | # batch_size,
 39 | #                                                True)
 40 | #     dataloader_valid = get_dataloader(en_valid, zh_valid)
 41 | 
 42 | #     print_interval = 1000
 43 | 
 44 | #     model = Transformer(len(en_vocab), len(zh_vocab), PAD_ID, d_model, d_ff,
 45 | #                         n_layers, heads)
 46 | #     model.to(device_id)
 47 | 
 48 | #     model = DistributedDataParallel(model, device_ids=[device_id])
 49 | #     optimizer = torch.optim.Adam(model.parameters(), lr)
 50 | 
 51 | #     # Optional: load model
 52 | #     ckpt_path = 'dldemos/Transformer/model_latest.pth'
 53 | #     optim_path = 'dldemos/Transformer/optimizer_latest.pth'
 54 | #     if os.path.exists(ckpt_path) and os.path.exists(optim_path):
 55 | #         map_location = {'cuda:0': f'cuda:{device_id}'}
 56 | #         state_dict = torch.load(ckpt_path, map_location=map_location)
 57 | #         model.module.load_state_dict(state_dict)
 58 | #         state_dict = torch.load(optim_path, map_location=map_location)
 59 | #         optimizer.load_state_dict(state_dict)
 60 | #         begin_epoch = int(
 61 | #             os.path.split(
 62 | #                 os.readlink(ckpt_path))[-1].split('.')[0].split('_')[1]) + 1
 63 | #     else:
 64 | #         begin_epoch = 0
 65 | 
 66 | #     citerion = nn.CrossEntropyLoss(ignore_index=PAD_ID)
 67 | #     tic = time.time()
 68 | #     cnter = 0
 69 | #     dataset_len = len(dataloader_train.dataset)
 70 | #     if device_id == 0:
 71 | #         print('Dataset size:', dataset_len)
 72 | #     for epoch in range(begin_epoch, n_epochs):
 73 | #         sampler.set_epoch(epoch)
 74 | 
 75 | #         for x, y in dataloader_train:
 76 | #             x, y = x.to(device_id), y.to(device_id)
 77 | #             x_mask = x == PAD_ID
 78 | #             y_mask = y == PAD_ID
 79 | #             y_input = y[:, :-1]
 80 | #             y_label = y[:, 1:]
 81 | #             y_mask = y_mask[:, :-1]
 82 | #             y_hat = model(x, y_input, x_mask, y_mask)
 83 | #             n, seq_len = y_label.shape
 84 | #             y_hat = torch.reshape(y_hat, (n * seq_len, -1))
 85 | #             y_label = torch.reshape(y_label, (n * seq_len, ))
 86 | #             loss = citerion(y_hat, y_label)
 87 | 
 88 | #             y_label_mask = y_label != PAD_ID
 89 | #             preds = torch.argmax(y_hat, -1)
 90 | #             correct = preds == y_label
 91 | #             acc = torch.sum(y_label_mask * correct) / torch.sum(y_label_mask)
 92 | 
 93 | #             optimizer.zero_grad()
 94 | #             loss.backward()
 95 | #             torch.nn.utils.clip_grad_norm_(model.parameters(), 1)
 96 | #             optimizer.step()
 97 | #             loss = reduce_mean(loss, dist.get_world_size())
 98 | #             if device_id == 0:
 99 | #                 toc = time.time()
100 | #                 interval = toc - tic
101 | #                 minutes = int(interval // 60)
102 | #                 seconds = int(interval % 60)
103 | #                 if cnter % print_interval == 0:
104 | #                     print(f'{cnter:08d} {minutes:02d}:{seconds:02d}'
105 | #                           f' loss: {loss.item()} acc: {acc.item()}')
106 | #             cnter += 1
107 | 
108 | #         if device_id == 0:
109 | #             latest_model = 'dldemos/Transformer/model_latest.pth'
110 | #             latest_optimizer = 'dldemos/Transformer/optimizer_latest.pth'
111 | #             model_file = f'dldemos/Transformer/model_{epoch}.pth'
112 | #             optim_file = f'dldemos/Transformer/optimizer_{epoch}.pth'
113 | #             torch.save(model.module.state_dict(), model_file)
114 | #             torch.save(optimizer.state_dict(), optim_file)
115 | 
116 | #             if os.path.exists(latest_model):
117 | #                 os.remove(latest_model)
118 | #             if os.path.exists(latest_optimizer):
119 | #                 os.remove(latest_optimizer)
120 | 
121 | #             os.symlink(os.path.abspath(model_file), latest_model)
122 | #             os.symlink(os.path.abspath(optim_file), latest_optimizer)
123 | 
124 | #             print(f'Model saved to {model_file}')
125 | 
126 | #         dist.barrier()
127 | 
128 | #         # if valid_period
129 | 
130 | #     print('Done.')
131 | 
132 | #     dist.destroy_process_group()
133 | 
134 | # if __name__ == '__main__':
135 | #     main()
136 | 
137 | # # nohup bash dldemos/Transformer/dist_train.sh &
138 | 


--------------------------------------------------------------------------------
/dldemos/Transformer/outdated/dist_train.sh:
--------------------------------------------------------------------------------
1 | torchrun --nproc_per_node=2 dldemos/Transformer/dist_train.py
2 | 


--------------------------------------------------------------------------------
/dldemos/Transformer/outdated/preprocess_data.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | from torch.nn.utils.rnn import pad_sequence
  4 | from torch.utils.data import DataLoader, Dataset
  5 | from torch.utils.data.distributed import DistributedSampler
  6 | 
  7 | from dldemos.Transformer.dataset import (EOS_ID, PAD_ID, SOS_ID, create_vocab,
  8 |                                          read_file, sentence_to_tensor,
  9 |                                          tensor_to_sentence)
 10 | 
 11 | 
 12 | def load_vocab(filename='data/translation2019zh/vocab_30k_80k.npy'):
 13 |     vocab = np.load(filename, allow_pickle=True).item()
 14 |     en_vocab = vocab['en']
 15 |     zh_vocab = vocab['zh']
 16 |     return en_vocab, zh_vocab
 17 | 
 18 | 
 19 | def load_sentences(filename='data/translation2019zh/sentences.npy'):
 20 |     tensors = np.load(filename, allow_pickle=True).item()
 21 |     en_tensors_train = tensors['en_train']
 22 |     zh_tensors_train = tensors['zh_train']
 23 |     en_tensors_valid = tensors['en_valid']
 24 |     zh_tensors_valid = tensors['zh_valid']
 25 |     return (en_tensors_train, zh_tensors_train, en_tensors_valid,
 26 |             zh_tensors_valid)
 27 | 
 28 | 
 29 | class TranslationDataset(Dataset):
 30 | 
 31 |     def __init__(self, en_tensor: np.ndarray, zh_tensor: np.ndarray):
 32 |         super().__init__()
 33 |         assert len(en_tensor) == len(zh_tensor)
 34 |         self.length = len(en_tensor)
 35 |         self.en_tensor = en_tensor
 36 |         self.zh_tensor = zh_tensor
 37 | 
 38 |     def __len__(self):
 39 |         return self.length
 40 | 
 41 |     def __getitem__(self, index):
 42 |         x = np.concatenate(([SOS_ID], self.en_tensor[index], [EOS_ID]))
 43 |         x = torch.from_numpy(x)
 44 |         y = np.concatenate(([SOS_ID], self.zh_tensor[index], [EOS_ID]))
 45 |         y = torch.from_numpy(y)
 46 |         return x, y
 47 | 
 48 | 
 49 | def get_dataloader(en_tensor: np.ndarray,
 50 |                    zh_tensor: np.ndarray,
 51 |                    batch_size=16,
 52 |                    dist_train=False):
 53 | 
 54 |     def collate_fn(batch):
 55 |         x, y = zip(*batch)
 56 |         x_pad = pad_sequence(x, batch_first=True, padding_value=PAD_ID)
 57 |         y_pad = pad_sequence(y, batch_first=True, padding_value=PAD_ID)
 58 | 
 59 |         return x_pad, y_pad
 60 | 
 61 |     dataset = TranslationDataset(en_tensor, zh_tensor)
 62 |     if dist_train:
 63 |         sampler = DistributedSampler(dataset)
 64 |         dataloader = DataLoader(dataset,
 65 |                                 batch_size=batch_size,
 66 |                                 sampler=sampler,
 67 |                                 collate_fn=collate_fn)
 68 |         return dataloader, sampler
 69 |     else:
 70 |         dataloader = DataLoader(dataset,
 71 |                                 batch_size=batch_size,
 72 |                                 shuffle=True,
 73 |                                 collate_fn=collate_fn)
 74 |         return dataloader
 75 | 
 76 | 
 77 | def test1():
 78 |     # en_sens_train, zh_sens_train = read_file(
 79 |     #     'data/translation2019zh/translation2019zh_train.json')
 80 |     en_sens_valid, zh_sens_valid = read_file(
 81 |         'data/translation2019zh/translation2019zh_valid.json')
 82 |     en_vocab = create_vocab(en_sens_valid, 10000)
 83 |     zh_vocab = create_vocab(zh_sens_valid, 30000)
 84 | 
 85 |     en_tensors_valid = sentence_to_tensor(en_sens_valid, en_vocab)
 86 |     zh_tensors_valid = sentence_to_tensor(zh_sens_valid, zh_vocab)
 87 |     print(tensor_to_sentence(en_tensors_valid[1], en_vocab, True))
 88 |     print(tensor_to_sentence(zh_tensors_valid[1], zh_vocab))
 89 |     ds = TranslationDataset(en_tensors_valid, zh_tensors_valid)
 90 |     print(tensor_to_sentence(ds[1][0], en_vocab, True))
 91 |     print(tensor_to_sentence(ds[1][1], zh_vocab))
 92 |     dl = get_dataloader(en_tensors_valid, zh_tensors_valid)
 93 |     e, z = next(iter(dl))
 94 |     print(tensor_to_sentence(e[0], en_vocab, True))
 95 |     print(tensor_to_sentence(z[0], zh_vocab))
 96 | 
 97 | 
 98 | def test2():
 99 |     en_vocab, zh_vocab = load_vocab()
100 | 
101 |     en_train, zh_train, en_valid, zh_valid = load_sentences()
102 |     dataloader_train = get_dataloader(en_train, zh_train)
103 |     dataloader_valid = get_dataloader(en_valid, zh_valid)
104 | 
105 |     en_batch, zh_batch = next(iter(dataloader_train))
106 |     print(tensor_to_sentence(en_batch[2], en_vocab, True))
107 |     print(tensor_to_sentence(zh_batch[2], zh_vocab, False))
108 | 
109 |     en_batch, zh_batch = next(iter(dataloader_valid))
110 |     print(tensor_to_sentence(en_batch[2], en_vocab, True))
111 |     print(tensor_to_sentence(zh_batch[2], zh_vocab, False))
112 | 
113 | 
114 | def main():
115 | 
116 |     en_sens_train, zh_sens_train = read_file(
117 |         'data/translation2019zh/translation2019zh_train.json')
118 |     en_sens_valid, zh_sens_valid = read_file(
119 |         'data/translation2019zh/translation2019zh_valid.json')
120 |     en_vocab = create_vocab(en_sens_train, 30000)
121 |     zh_vocab = create_vocab(zh_sens_train, 80000)
122 |     vocab = {'en': en_vocab, 'zh': zh_vocab}
123 |     np.save('data/translation2019zh/vocab_30k_80k.npy', vocab)
124 | 
125 |     en_tensors_train = sentence_to_tensor(en_sens_train, en_vocab)
126 |     zh_tensors_train = sentence_to_tensor(zh_sens_train, zh_vocab)
127 |     en_tensors_valid = sentence_to_tensor(en_sens_valid, en_vocab)
128 |     zh_tensors_valid = sentence_to_tensor(zh_sens_valid, zh_vocab)
129 |     tensors = {
130 |         'en_train': en_tensors_train,
131 |         'zh_train': zh_tensors_train,
132 |         'en_valid': en_tensors_valid,
133 |         'zh_valid': zh_tensors_valid
134 |     }
135 |     np.save('data/translation2019zh/sentences.npy', tensors)
136 | 
137 | 
138 | if __name__ == '__main__':
139 |     # test1()
140 |     # test2()
141 |     main()
142 | 


--------------------------------------------------------------------------------
/dldemos/Transformer/outdated/test.py:
--------------------------------------------------------------------------------
 1 | # import torch
 2 | 
 3 | # from dldemos.Transformer.dataset import MAX_SEQ_LEN, tensor_to_sentence
 4 | # from dldemos.Transformer.model import Transformer
 5 | # from dldemos.Transformer.outdated.preprocess_data import (EOS_ID, PAD_ID,
 6 | #                                                           SOS_ID,
 7 | #                                                           get_dataloader,
 8 | #                                                           load_sentences,
 9 | #                                                           load_vocab)
10 | 
11 | # # Config
12 | # batch_size = 64
13 | # lr = 0.0001
14 | # d_model = 512
15 | # d_ff = 2048
16 | # n_layers = 6
17 | # heads = 8
18 | 
19 | # def main():
20 | #     model_path = 'dldemos/Transformer/model_latest.pth'
21 | 
22 | #     device = 'cuda'
23 | #     en_vocab, zh_vocab = load_vocab()
24 | 
25 | #     en_train, zh_train, en_valid, zh_valid = load_sentences()
26 | #     dataloader_valid = get_dataloader(en_train, zh_train, 1)
27 | 
28 | #     model = Transformer(len(en_vocab),
29 | #                         len(zh_vocab),
30 | #                         PAD_ID,
31 | #                         d_model,
32 | #                         d_ff,
33 | #                         n_layers,
34 | #                         heads,
35 | #                         max_seq_len=MAX_SEQ_LEN)
36 | #     model.to(device)
37 | #     model.load_state_dict(torch.load(model_path))
38 | 
39 | #     cnt = 0
40 | #     for x, y in dataloader_valid:
41 | #         x, y = x.to(device), y.to(device)
42 | #         x_mask = x == PAD_ID
43 | #         n = x.shape[0]
44 | #         sample = torch.ones(n, MAX_SEQ_LEN,
45 | #                             dtype=torch.long).to(device) * PAD_ID
46 | #         sample[:, 0] = SOS_ID
47 | #         print(tensor_to_sentence(x[0], en_vocab, True))
48 | #         print(tensor_to_sentence(y[0], zh_vocab))
49 | #         for i in range(50):
50 | #             sample_mask = sample == PAD_ID
51 | #             y_predict = model(x, sample, x_mask, sample_mask)
52 | #             y_predict = y_predict[:, i]
53 | #             prob_dist = torch.softmax(y_predict, 1)
54 | #             #new_word = torch.multinomial(prob_dist, 1)
55 | #             _, new_word = torch.max(prob_dist, 1)
56 | #             sample[:, i + 1] = new_word
57 | #             print(tensor_to_sentence(sample[0], zh_vocab))
58 | #         cnt += 1
59 | #         if cnt == 5:
60 | #             break
61 | 
62 | #     print('Done.')
63 | 
64 | # if __name__ == '__main__':
65 | #     main()
66 | 


--------------------------------------------------------------------------------
/dldemos/Transformer/outdated/train.py:
--------------------------------------------------------------------------------
 1 | # import time
 2 | 
 3 | # import numpy as np
 4 | # import torch
 5 | # import torch.nn as nn
 6 | 
 7 | # from dldemos.Transformer.dataset import tensor_to_sentence
 8 | # from dldemos.Transformer.model import Transformer
 9 | # from dldemos.Transformer.preprocess_data import (PAD_ID, get_dataloader,
10 | #                                                  load_sentences, load_vocab)
11 | 
12 | # # Config
13 | # batch_size = 64
14 | # lr = 0.0001
15 | # d_model = 512
16 | # d_ff = 1024
17 | # n_layers = 6
18 | # heads = 8
19 | 
20 | # def main():
21 | #     en_vocab, zh_vocab = load_vocab()
22 | 
23 | #     en_train, zh_train, en_valid, zh_valid = load_sentences()
24 | #     dataloader_train = get_dataloader(en_train, zh_train, batch_size)
25 | 
26 | #     print_interval = 1000
27 | #     device_id = 0
28 | 
29 | #     model = Transformer(len(en_vocab), len(zh_vocab), d_model, d_ff,
30 | # n_layers,
31 | #                         heads)
32 | #     model.to(device_id)
33 | 
34 | #     model.init_weights()
35 | 
36 | #     optimizer = torch.optim.Adam(model.parameters(), lr)
37 | #     citerion = nn.CrossEntropyLoss(ignore_index=PAD_ID)
38 | #     tic = time.time()
39 | #     cnter = 0
40 | #     dataset_len = len(dataloader_train.dataset)
41 | #     print('Dataset size:', dataset_len)
42 | #     for epoch in range(10):
43 | #         loss_sum = 0
44 | 
45 | #         for x, y in dataloader_train:
46 | #             x, y = x.to(device_id), y.to(device_id)
47 | #             x_mask = x == PAD_ID
48 | #             y_mask = y == PAD_ID
49 | #             y_input = y[:, :-1]
50 | #             y_label = y[:, 1:]
51 | #             y_mask = y_mask[:, :-1]
52 | #             y_hat = model(x, y_input, x_mask, y_mask)
53 | #             n, seq_len = y_label.shape
54 | #             y_hat = torch.reshape(y_hat, (n * seq_len, -1))
55 | #             y_label = torch.reshape(y_label, (n * seq_len, ))
56 | #             loss = citerion(y_hat, y_label)
57 | 
58 | #             optimizer.zero_grad()
59 | #             loss.backward()
60 | #             torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5)
61 | #             optimizer.step()
62 | 
63 | #             loss_sum += loss.item()
64 | 
65 | #             toc = time.time()
66 | #             interval = toc - tic
67 | #             minutes = int(interval // 60)
68 | #             seconds = int(interval % 60)
69 | #             if cnter % print_interval == 0:
70 | #                 print(f'{cnter:08d} {minutes:02d}:{seconds:02d}'
71 | #                       f' loss: {loss.item()}')
72 | #             cnter += 1
73 | 
74 | #         print(f'Epoch {epoch}. loss: {loss_sum / dataset_len}')
75 | 
76 | #     torch.save(model.state_dict(), 'dldemos/Transformer/model.pth')
77 | #     print('Done.')
78 | 


--------------------------------------------------------------------------------
/dldemos/Transformer/train.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | 
 3 | import torch
 4 | import torch.nn as nn
 5 | 
 6 | from dldemos.Transformer.data_load import (get_batch_indices, load_cn_vocab,
 7 |                                            load_en_vocab, load_train_data,
 8 |                                            maxlen)
 9 | from dldemos.Transformer.model import Transformer
10 | 
11 | # Config
12 | batch_size = 64
13 | lr = 0.0001
14 | d_model = 512
15 | d_ff = 2048
16 | n_layers = 6
17 | heads = 8
18 | dropout_rate = 0.2
19 | n_epochs = 60
20 | PAD_ID = 0
21 | 
22 | 
23 | def main():
24 |     device = 'cuda'
25 |     cn2idx, idx2cn = load_cn_vocab()
26 |     en2idx, idx2en = load_en_vocab()
27 |     # X: en
28 |     # Y: cn
29 |     Y, X = load_train_data()
30 | 
31 |     print_interval = 100
32 | 
33 |     model = Transformer(len(en2idx), len(cn2idx), PAD_ID, d_model, d_ff,
34 |                         n_layers, heads, dropout_rate, maxlen)
35 |     model.to(device)
36 | 
37 |     optimizer = torch.optim.Adam(model.parameters(), lr)
38 | 
39 |     citerion = nn.CrossEntropyLoss(ignore_index=PAD_ID)
40 |     tic = time.time()
41 |     cnter = 0
42 |     for epoch in range(n_epochs):
43 |         for index, _ in get_batch_indices(len(X), batch_size):
44 |             x_batch = torch.LongTensor(X[index]).to(device)
45 |             y_batch = torch.LongTensor(Y[index]).to(device)
46 |             y_input = y_batch[:, :-1]
47 |             y_label = y_batch[:, 1:]
48 |             y_hat = model(x_batch, y_input)
49 | 
50 |             y_label_mask = y_label != PAD_ID
51 |             preds = torch.argmax(y_hat, -1)
52 |             correct = preds == y_label
53 |             acc = torch.sum(y_label_mask * correct) / torch.sum(y_label_mask)
54 | 
55 |             n, seq_len = y_label.shape
56 |             y_hat = torch.reshape(y_hat, (n * seq_len, -1))
57 |             y_label = torch.reshape(y_label, (n * seq_len, ))
58 |             loss = citerion(y_hat, y_label)
59 | 
60 |             optimizer.zero_grad()
61 |             loss.backward()
62 |             torch.nn.utils.clip_grad_norm_(model.parameters(), 1)
63 |             optimizer.step()
64 | 
65 |             if cnter % print_interval == 0:
66 |                 toc = time.time()
67 |                 interval = toc - tic
68 |                 minutes = int(interval // 60)
69 |                 seconds = int(interval % 60)
70 |                 print(f'{cnter:08d} {minutes:02d}:{seconds:02d}'
71 |                       f' loss: {loss.item()} acc: {acc.item()}')
72 |             cnter += 1
73 | 
74 |     model_path = 'dldemos/Transformer/model.pth'
75 |     torch.save(model.state_dict(), model_path)
76 | 
77 |     print(f'Model saved to {model_path}')
78 | 
79 | 
80 | if __name__ == '__main__':
81 |     main()
82 | 


--------------------------------------------------------------------------------
/dldemos/Transformer/translate.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from dldemos.Transformer.data_load import (idx_to_sentence, load_cn_vocab,
 4 |                                            load_en_vocab, maxlen)
 5 | from dldemos.Transformer.model import Transformer
 6 | 
 7 | # Config
 8 | batch_size = 1
 9 | lr = 0.0001
10 | d_model = 512
11 | d_ff = 2048
12 | n_layers = 6
13 | heads = 8
14 | dropout_rate = 0.2
15 | n_epochs = 60
16 | 
17 | PAD_ID = 0
18 | 
19 | 
20 | def main():
21 |     device = 'cuda'
22 |     cn2idx, idx2cn = load_cn_vocab()
23 |     en2idx, idx2en = load_en_vocab()
24 | 
25 |     model = Transformer(len(en2idx), len(cn2idx), 0, d_model, d_ff, n_layers,
26 |                         heads, dropout_rate, maxlen)
27 |     model.to(device)
28 |     model.eval()
29 | 
30 |     model_path = 'dldemos/Transformer/model.pth'
31 |     model.load_state_dict(torch.load(model_path))
32 | 
33 |     my_input = ['we', 'should', 'protect', 'environment']
34 |     x_batch = torch.LongTensor([[en2idx[x] for x in my_input]]).to(device)
35 | 
36 |     cn_sentence = idx_to_sentence(x_batch[0], idx2en, True)
37 |     print(cn_sentence)
38 | 
39 |     y_input = torch.ones(batch_size, maxlen,
40 |                          dtype=torch.long).to(device) * PAD_ID
41 |     y_input[0] = en2idx['<S>']
42 |     # y_input = y_batch
43 |     with torch.no_grad():
44 |         for i in range(1, y_input.shape[1]):
45 |             y_hat = model(x_batch, y_input)
46 |             for j in range(batch_size):
47 |                 y_input[j, i] = torch.argmax(y_hat[j, i - 1])
48 |     output_sentence = idx_to_sentence(y_input[0], idx2cn, True)
49 |     print(output_sentence)
50 | 
51 | 
52 | if __name__ == '__main__':
53 |     main()
54 | 


--------------------------------------------------------------------------------
/dldemos/VAE/README.md:
--------------------------------------------------------------------------------
1 | 1. Download [CelebA](https://mmlab.ie.cuhk.edu.hk/projects/CelebA.html) Align&Cropped Images.
2 | 2. Modify the path of function `get_dataloader` in `main.py`.
3 | 3. Run `main.py`.  You can choose whether to use `train`, `reconstruct`, and `generate` functions.
4 | 
5 | Acknowledgement: The codes are inspried by [PyTorch-VAE](https://github.com/AntixK/PyTorch-VAE).
6 | 


--------------------------------------------------------------------------------
/dldemos/VAE/load_celebA.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import torch
 4 | from PIL import Image
 5 | from torch.utils.data import DataLoader, Dataset
 6 | from torchvision import transforms
 7 | 
 8 | 
 9 | class CelebADataset(Dataset):
10 | 
11 |     def __init__(self, root, img_shape=(64, 64)) -> None:
12 |         super().__init__()
13 |         self.root = root
14 |         self.img_shape = img_shape
15 |         self.filenames = sorted(os.listdir(root))
16 | 
17 |     def __len__(self) -> int:
18 |         return len(self.filenames)
19 | 
20 |     def __getitem__(self, index: int):
21 |         path = os.path.join(self.root, self.filenames[index])
22 |         img = Image.open(path).convert('RGB')
23 |         pipeline = transforms.Compose([
24 |             transforms.CenterCrop(168),
25 |             transforms.Resize(self.img_shape),
26 |             transforms.ToTensor()
27 |         ])
28 |         return pipeline(img)
29 | 
30 | 
31 | def get_dataloader(root='data/celebA/img_align_celeba', **kwargs):
32 |     dataset = CelebADataset(root, **kwargs)
33 |     return DataLoader(dataset, 16, shuffle=True)
34 | 
35 | 
36 | if __name__ == '__main__':
37 |     dataloader = get_dataloader()
38 |     img = next(iter(dataloader))
39 |     print(img.shape)
40 |     # Concat 4x4 images
41 |     N, C, H, W = img.shape
42 |     assert N == 16
43 |     img = torch.permute(img, (1, 0, 2, 3))
44 |     img = torch.reshape(img, (C, 4, 4 * H, W))
45 |     img = torch.permute(img, (0, 2, 1, 3))
46 |     img = torch.reshape(img, (C, 4 * H, 4 * W))
47 |     img = transforms.ToPILImage()(img)
48 |     img.save('work_dirs/tmp.jpg')
49 | 


--------------------------------------------------------------------------------
/dldemos/VAE/main.py:
--------------------------------------------------------------------------------
 1 | from time import time
 2 | 
 3 | import torch
 4 | import torch.nn.functional as F
 5 | from torchvision.transforms import ToPILImage
 6 | 
 7 | from dldemos.VAE.load_celebA import get_dataloader
 8 | from dldemos.VAE.model import VAE
 9 | 
10 | # Hyperparameters
11 | n_epochs = 10
12 | kl_weight = 0.00025
13 | lr = 0.005
14 | 
15 | 
16 | def loss_fn(y, y_hat, mean, logvar):
17 |     recons_loss = F.mse_loss(y_hat, y)
18 |     kl_loss = torch.mean(
19 |         -0.5 * torch.sum(1 + logvar - mean**2 - torch.exp(logvar), 1), 0)
20 |     loss = recons_loss + kl_loss * kl_weight
21 |     return loss
22 | 
23 | 
24 | def train(device, dataloader, model):
25 |     optimizer = torch.optim.Adam(model.parameters(), lr)
26 |     dataset_len = len(dataloader.dataset)
27 | 
28 |     begin_time = time()
29 |     # train
30 |     for i in range(n_epochs):
31 |         loss_sum = 0
32 |         for x in dataloader:
33 |             x = x.to(device)
34 |             y_hat, mean, logvar = model(x)
35 |             loss = loss_fn(x, y_hat, mean, logvar)
36 |             optimizer.zero_grad()
37 |             loss.backward()
38 |             optimizer.step()
39 |             loss_sum += loss
40 |         loss_sum /= dataset_len
41 |         training_time = time() - begin_time
42 |         minute = int(training_time // 60)
43 |         second = int(training_time % 60)
44 |         print(f'epoch {i}: loss {loss_sum} {minute}:{second}')
45 |         torch.save(model.state_dict(), 'dldemos/VAE/model.pth')
46 | 
47 | 
48 | def reconstruct(device, dataloader, model):
49 |     model.eval()
50 |     batch = next(iter(dataloader))
51 |     x = batch[0:1, ...].to(device)
52 |     output = model(x)[0]
53 |     output = output[0].detach().cpu()
54 |     input = batch[0].detach().cpu()
55 |     combined = torch.cat((output, input), 1)
56 |     img = ToPILImage()(combined)
57 |     img.save('work_dirs/tmp.jpg')
58 | 
59 | 
60 | def generate(device, model):
61 |     model.eval()
62 |     output = model.sample(device)
63 |     output = output[0].detach().cpu()
64 |     img = ToPILImage()(output)
65 |     img.save('work_dirs/tmp.jpg')
66 | 
67 | 
68 | def main():
69 |     device = 'cuda:0'
70 |     dataloader = get_dataloader()
71 | 
72 |     model = VAE().to(device)
73 | 
74 |     # If you obtain the ckpt, load it
75 |     model.load_state_dict(torch.load('dldemos/VAE/model.pth', 'cuda:0'))
76 | 
77 |     # Choose the function
78 |     train(device, dataloader, model)
79 |     reconstruct(device, dataloader, model)
80 |     generate(device, model)
81 | 
82 | 
83 | if __name__ == '__main__':
84 |     main()
85 | 


--------------------------------------------------------------------------------
/dldemos/VAE/model.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | 
 5 | class VAE(nn.Module):
 6 |     """VAE for 64x64 face generation.
 7 | 
 8 |     The hidden dimensions can be tuned.
 9 |     """
10 | 
11 |     def __init__(self, hiddens=[16, 32, 64, 128, 256], latent_dim=128) -> None:
12 |         super().__init__()
13 | 
14 |         # encoder
15 |         prev_channels = 3
16 |         modules = []
17 |         img_length = 64
18 |         for cur_channels in hiddens:
19 |             modules.append(
20 |                 nn.Sequential(
21 |                     nn.Conv2d(prev_channels,
22 |                               cur_channels,
23 |                               kernel_size=3,
24 |                               stride=2,
25 |                               padding=1), nn.BatchNorm2d(cur_channels),
26 |                     nn.ReLU()))
27 |             prev_channels = cur_channels
28 |             img_length //= 2
29 |         self.encoder = nn.Sequential(*modules)
30 |         self.mean_linear = nn.Linear(prev_channels * img_length * img_length,
31 |                                      latent_dim)
32 |         self.var_linear = nn.Linear(prev_channels * img_length * img_length,
33 |                                     latent_dim)
34 |         self.latent_dim = latent_dim
35 |         # decoder
36 |         modules = []
37 |         self.decoder_projection = nn.Linear(
38 |             latent_dim, prev_channels * img_length * img_length)
39 |         self.decoder_input_chw = (prev_channels, img_length, img_length)
40 |         for i in range(len(hiddens) - 1, 0, -1):
41 |             modules.append(
42 |                 nn.Sequential(
43 |                     nn.ConvTranspose2d(hiddens[i],
44 |                                        hiddens[i - 1],
45 |                                        kernel_size=3,
46 |                                        stride=2,
47 |                                        padding=1,
48 |                                        output_padding=1),
49 |                     nn.BatchNorm2d(hiddens[i - 1]), nn.ReLU()))
50 |         modules.append(
51 |             nn.Sequential(
52 |                 nn.ConvTranspose2d(hiddens[0],
53 |                                    hiddens[0],
54 |                                    kernel_size=3,
55 |                                    stride=2,
56 |                                    padding=1,
57 |                                    output_padding=1),
58 |                 nn.BatchNorm2d(hiddens[0]), nn.ReLU(),
59 |                 nn.Conv2d(hiddens[0], 3, kernel_size=3, stride=1, padding=1),
60 |                 nn.ReLU()))
61 |         self.decoder = nn.Sequential(*modules)
62 | 
63 |     def forward(self, x):
64 |         encoded = self.encoder(x)
65 |         encoded = torch.flatten(encoded, 1)
66 |         mean = self.mean_linear(encoded)
67 |         logvar = self.var_linear(encoded)
68 |         eps = torch.randn_like(logvar)
69 |         std = torch.exp(logvar / 2)
70 |         z = eps * std + mean
71 |         x = self.decoder_projection(z)
72 |         x = torch.reshape(x, (-1, *self.decoder_input_chw))
73 |         decoded = self.decoder(x)
74 | 
75 |         return decoded, mean, logvar
76 | 
77 |     def sample(self, device='cuda'):
78 |         z = torch.randn(1, self.latent_dim).to(device)
79 |         x = self.decoder_projection(z)
80 |         x = torch.reshape(x, (-1, *self.decoder_input_chw))
81 |         decoded = self.decoder(x)
82 |         return decoded
83 | 


--------------------------------------------------------------------------------
/dldemos/VQVAE/configs.py:
--------------------------------------------------------------------------------
 1 | mnist_cfg1 = dict(dataset_type='MNIST',
 2 |                   img_shape=(1, 28, 28),
 3 |                   dim=32,
 4 |                   n_embedding=32,
 5 |                   batch_size=256,
 6 |                   n_epochs=20,
 7 |                   l_w_embedding=1,
 8 |                   l_w_commitment=0.25,
 9 |                   lr=2e-4,
10 |                   n_epochs_2=50,
11 |                   batch_size_2=256,
12 |                   pixelcnn_n_blocks=15,
13 |                   pixelcnn_dim=128,
14 |                   pixelcnn_linear_dim=32,
15 |                   vqvae_path='dldemos/VQVAE/model_mnist.pth',
16 |                   gen_model_path='dldemos/VQVAE/gen_model_mnist.pth')
17 | 
18 | celebahq_cfg1 = dict(dataset_type='CelebAHQ',
19 |                      img_shape=(3, 128, 128),
20 |                      dim=128,
21 |                      n_embedding=64,
22 |                      batch_size=64,
23 |                      n_epochs=30,
24 |                      l_w_embedding=1,
25 |                      l_w_commitment=0.25,
26 |                      lr=2e-4,
27 |                      n_epochs_2=200,
28 |                      batch_size_2=32,
29 |                      pixelcnn_n_blocks=15,
30 |                      pixelcnn_dim=384,
31 |                      pixelcnn_linear_dim=256,
32 |                      vqvae_path='dldemos/VQVAE/model_celebahq_1.pth',
33 |                      gen_model_path='dldemos/VQVAE/gen_model_celebahq_1.pth')
34 | 
35 | celebahq_cfg2 = dict(dataset_type='CelebAHQ',
36 |                      img_shape=(3, 128, 128),
37 |                      dim=128,
38 |                      n_embedding=128,
39 |                      batch_size=64,
40 |                      n_epochs=30,
41 |                      l_w_embedding=1,
42 |                      l_w_commitment=0.25,
43 |                      lr=2e-4,
44 |                      n_epochs_2=200,
45 |                      batch_size_2=32,
46 |                      pixelcnn_n_blocks=15,
47 |                      pixelcnn_dim=384,
48 |                      pixelcnn_linear_dim=256,
49 |                      vqvae_path='dldemos/VQVAE/model_celebahq_2.pth',
50 |                      gen_model_path='dldemos/VQVAE/gen_model_celebahq_2.pth')
51 | 
52 | celebahq_cfg3 = dict(dataset_type='CelebAHQ',
53 |                      img_shape=(3, 64, 64),
54 |                      dim=128,
55 |                      n_embedding=64,
56 |                      batch_size=64,
57 |                      n_epochs=20,
58 |                      l_w_embedding=1,
59 |                      l_w_commitment=0.25,
60 |                      lr=2e-4,
61 |                      n_epochs_2=200,
62 |                      batch_size_2=32,
63 |                      pixelcnn_n_blocks=15,
64 |                      pixelcnn_dim=384,
65 |                      pixelcnn_linear_dim=256,
66 |                      vqvae_path='dldemos/VQVAE/model_celebahq_3.pth',
67 |                      gen_model_path='dldemos/VQVAE/gen_model_celebahq_3.pth')
68 | 
69 | celebahq_cfg4 = dict(dataset_type='CelebAHQ',
70 |                      img_shape=(3, 64, 64),
71 |                      dim=128,
72 |                      n_embedding=32,
73 |                      batch_size=64,
74 |                      n_epochs=20,
75 |                      l_w_embedding=1,
76 |                      l_w_commitment=0.25,
77 |                      lr=2e-4,
78 |                      n_epochs_2=100,
79 |                      batch_size_2=32,
80 |                      pixelcnn_n_blocks=15,
81 |                      pixelcnn_dim=384,
82 |                      pixelcnn_linear_dim=256,
83 |                      vqvae_path='dldemos/VQVAE/model_celebahq_4.pth',
84 |                      gen_model_path='dldemos/VQVAE/gen_model_celebahq_4.pth')
85 | 
86 | cfgs = [mnist_cfg1, celebahq_cfg1, celebahq_cfg2, celebahq_cfg3, celebahq_cfg4]
87 | 
88 | 
89 | def get_cfg(id: int):
90 |     return cfgs[id]
91 | 


--------------------------------------------------------------------------------
/dldemos/VQVAE/dataset.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | import einops
  4 | import torchvision
  5 | from PIL import Image
  6 | from torch.utils.data import DataLoader, Dataset
  7 | from torch.utils.data.distributed import DistributedSampler
  8 | from torchvision import transforms
  9 | 
 10 | # Set this tp `True` and run this script to convert dataset to LMDB format
 11 | TO_LMDB = False
 12 | 
 13 | CELEBA_DIR = 'data/celebA/img_align_celeba'
 14 | CELEBA_LMDB_PATH = 'data/celebA/img_align_celeba.lmdb'
 15 | CELEBA_HQ_DIR = 'data/celebA/celeba_hq_256'
 16 | CELEBA_HQ_LMDB_PATH = 'data/celebA/celeba_hq_256.lmdb'
 17 | 
 18 | 
 19 | def download_mnist():
 20 |     mnist = torchvision.datasets.MNIST(root='./data/mnist', download=True)
 21 |     print('length of MNIST', len(mnist))
 22 |     id = 4
 23 |     img, label = mnist[id]
 24 |     print(img)
 25 |     print(label)
 26 | 
 27 |     # On computer with monitor
 28 |     # img.show()
 29 | 
 30 |     img.save('work_dirs/tmp_mnist.jpg')
 31 |     tensor = transforms.ToTensor()(img)
 32 |     print(tensor.shape)
 33 |     print(tensor.max())
 34 |     print(tensor.min())
 35 | 
 36 | 
 37 | class CelebADataset(Dataset):
 38 | 
 39 |     def __init__(self, root, img_shape=(64, 64)):
 40 |         super().__init__()
 41 |         self.root = root
 42 |         self.img_shape = img_shape
 43 |         self.filenames = sorted(os.listdir(root))
 44 | 
 45 |     def __len__(self) -> int:
 46 |         return len(self.filenames)
 47 | 
 48 |     def __getitem__(self, index: int):
 49 |         path = os.path.join(self.root, self.filenames[index])
 50 |         img = Image.open(path)
 51 |         pipeline = transforms.Compose([
 52 |             transforms.CenterCrop(168),
 53 |             transforms.Resize(self.img_shape),
 54 |             transforms.ToTensor()
 55 |         ])
 56 |         return pipeline(img)
 57 | 
 58 | 
 59 | if TO_LMDB:
 60 |     from dldemos.lmdb_loader import ImageFolderLMDB
 61 | 
 62 |     class CelebALMDBDataset(ImageFolderLMDB):
 63 | 
 64 |         def __init__(self, path, img_shape=(64, 64)):
 65 |             pipeline = transforms.Compose([
 66 |                 transforms.CenterCrop(168),
 67 |                 transforms.Resize(img_shape),
 68 |                 transforms.ToTensor()
 69 |             ])
 70 |             super().__init__(path, pipeline)
 71 | 
 72 | 
 73 | class MNISTImageDataset(Dataset):
 74 | 
 75 |     def __init__(self, img_shape=(28, 28)):
 76 |         super().__init__()
 77 |         self.img_shape = img_shape
 78 |         self.mnist = torchvision.datasets.MNIST(root='./data/mnist')
 79 | 
 80 |     def __len__(self):
 81 |         return len(self.mnist)
 82 | 
 83 |     def __getitem__(self, index: int):
 84 |         img = self.mnist[index][0]
 85 |         pipeline = transforms.Compose(
 86 |             [transforms.Resize(self.img_shape),
 87 |              transforms.ToTensor()])
 88 |         return pipeline(img)
 89 | 
 90 | 
 91 | def get_dataloader(type,
 92 |                    batch_size,
 93 |                    img_shape=None,
 94 |                    dist_train=False,
 95 |                    num_workers=4,
 96 |                    use_lmdb=False,
 97 |                    **kwargs):
 98 |     if type == 'CelebA':
 99 |         if img_shape is not None:
100 |             kwargs['img_shape'] = img_shape
101 |         if use_lmdb:
102 |             dataset = CelebALMDBDataset(CELEBA_LMDB_PATH, **kwargs)
103 |         else:
104 |             dataset = CelebADataset(CELEBA_DIR, **kwargs)
105 |     elif type == 'CelebAHQ':
106 |         if img_shape is not None:
107 |             kwargs['img_shape'] = img_shape
108 |         if use_lmdb:
109 |             dataset = CelebALMDBDataset(CELEBA_HQ_LMDB_PATH, **kwargs)
110 |         else:
111 |             dataset = CelebADataset(CELEBA_HQ_DIR, **kwargs)
112 |     elif type == 'MNIST':
113 |         if img_shape is not None:
114 |             dataset = MNISTImageDataset(img_shape)
115 |         else:
116 |             dataset = MNISTImageDataset()
117 |     if dist_train:
118 |         sampler = DistributedSampler(dataset)
119 |         dataloader = DataLoader(dataset,
120 |                                 batch_size=batch_size,
121 |                                 sampler=sampler,
122 |                                 num_workers=num_workers)
123 |         return dataloader, sampler
124 |     else:
125 |         dataloader = DataLoader(dataset,
126 |                                 batch_size=batch_size,
127 |                                 shuffle=True,
128 |                                 num_workers=num_workers)
129 |         return dataloader
130 | 
131 | 
132 | if __name__ == '__main__':
133 |     os.makedirs('work_dirs', exist_ok=True)
134 | 
135 |     if os.path.exists(CELEBA_DIR):
136 |         dataloader = get_dataloader('CelebA', 16)
137 |         img = next(iter(dataloader))
138 |         print(img.shape)
139 |         N = img.shape[0]
140 |         img = einops.rearrange(img,
141 |                                '(n1 n2) c h w -> c (n1 h) (n2 w)',
142 |                                n1=int(N**0.5))
143 |         print(img.shape)
144 |         print(img.max())
145 |         print(img.min())
146 |         img = transforms.ToPILImage()(img)
147 |         img.save('work_dirs/tmp_celeba.jpg')
148 |         if TO_LMDB:
149 |             from dldemos.lmdb_loader import folder2lmdb
150 |             folder2lmdb(CELEBA_DIR, CELEBA_LMDB_PATH)
151 | 
152 |     if os.path.exists(CELEBA_HQ_DIR):
153 |         dataloader = get_dataloader('CelebAHQ', 16)
154 |         img = next(iter(dataloader))
155 |         print(img.shape)
156 |         N = img.shape[0]
157 |         img = einops.rearrange(img,
158 |                                '(n1 n2) c h w -> c (n1 h) (n2 w)',
159 |                                n1=int(N**0.5))
160 |         print(img.shape)
161 |         print(img.max())
162 |         print(img.min())
163 |         img = transforms.ToPILImage()(img)
164 |         img.save('work_dirs/tmp_celebahq.jpg')
165 |         if TO_LMDB:
166 |             from dldemos.lmdb_loader import folder2lmdb
167 |             folder2lmdb(CELEBA_HQ_DIR, CELEBA_HQ_LMDB_PATH)
168 | 
169 |     download_mnist()
170 | 


--------------------------------------------------------------------------------
/dldemos/VQVAE/dist_train_pixelcnn.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os
  3 | import time
  4 | 
  5 | import torch
  6 | import torch.distributed as dist
  7 | import torch.nn as nn
  8 | from torch.nn.parallel import DistributedDataParallel
  9 | 
 10 | from dldemos.VQVAE.configs import get_cfg
 11 | from dldemos.VQVAE.dataset import get_dataloader
 12 | from dldemos.VQVAE.model import VQVAE
 13 | from dldemos.VQVAE.pixelcnn_model import PixelCNNWithEmbedding
 14 | 
 15 | USE_LMDB = True
 16 | 
 17 | 
 18 | def reduce_sum(tensor):
 19 |     rt = tensor.clone()
 20 |     dist.all_reduce(rt, op=dist.ReduceOp.SUM)
 21 |     return rt
 22 | 
 23 | 
 24 | def train_generative_model(vqvae: VQVAE,
 25 |                            model,
 26 |                            img_shape=None,
 27 |                            device='cuda',
 28 |                            ckpt_path='dldemos/VQVAE/gen_model.pth',
 29 |                            dataset_type='MNIST',
 30 |                            batch_size=64,
 31 |                            n_epochs=50):
 32 |     print('batch size:', batch_size)
 33 |     dataloader, sampler = get_dataloader(dataset_type,
 34 |                                          batch_size,
 35 |                                          img_shape=img_shape,
 36 |                                          dist_train=True,
 37 |                                          use_lmdb=USE_LMDB)
 38 |     vqvae.to(device)
 39 |     vqvae.eval()
 40 |     model.to(device)
 41 |     model.train()
 42 |     optimizer = torch.optim.Adam(model.parameters(), 1e-3)
 43 |     loss_fn = nn.CrossEntropyLoss()
 44 |     tic = time.time()
 45 |     for e in range(n_epochs):
 46 |         total_loss = 0
 47 |         sampler.set_epoch(e)
 48 |         for x in dataloader:
 49 |             current_batch_size = x.shape[0]
 50 |             with torch.no_grad():
 51 |                 x = x.to(device)
 52 |                 x = vqvae.encode(x)
 53 | 
 54 |             predict_x = model(x)
 55 |             loss = loss_fn(predict_x, x)
 56 |             optimizer.zero_grad()
 57 |             loss.backward()
 58 |             optimizer.step()
 59 |             loss = reduce_sum(loss)
 60 |             total_loss += loss * current_batch_size
 61 |         total_loss /= len(dataloader.dataset)
 62 |         toc = time.time()
 63 |         if device == 0:
 64 |             torch.save(model.module.state_dict(), ckpt_path)
 65 |             print(f'epoch {e} loss: {total_loss} elapsed {(toc - tic):.2f}s')
 66 |         dist.barrier()
 67 | 
 68 |     print('Done')
 69 | 
 70 | 
 71 | if __name__ == '__main__':
 72 |     dist.init_process_group('nccl')
 73 | 
 74 |     os.makedirs('work_dirs', exist_ok=True)
 75 | 
 76 |     parser = argparse.ArgumentParser()
 77 |     parser.add_argument('-c', type=int, default=0)
 78 |     args = parser.parse_args()
 79 |     cfg = get_cfg(args.c)
 80 | 
 81 |     img_shape = cfg['img_shape']
 82 |     rank = dist.get_rank()
 83 |     device = rank % torch.cuda.device_count()
 84 | 
 85 |     vqvae = VQVAE(img_shape[0], cfg['dim'], cfg['n_embedding'])
 86 |     gen_model = PixelCNNWithEmbedding(cfg['pixelcnn_n_blocks'],
 87 |                                       cfg['pixelcnn_dim'],
 88 |                                       cfg['pixelcnn_linear_dim'], True,
 89 |                                       cfg['n_embedding'])
 90 | 
 91 |     # 3. Train Generative model (Gated PixelCNN in our project)
 92 |     vqvae.load_state_dict(torch.load(cfg['vqvae_path']))
 93 |     vqvae.to(device)
 94 |     gen_model.to(device)
 95 |     gen_model = DistributedDataParallel(gen_model, device_ids=[device])
 96 | 
 97 |     # Optional: resume
 98 |     # map_location = {'cuda:0': f'cuda:{device}'}
 99 |     # state_dict = torch.load(cfg['gen_model_path'], map_location=map_location)
100 |     # gen_model.module.load_state_dict(state_dict)
101 | 
102 |     train_generative_model(vqvae,
103 |                            gen_model,
104 |                            img_shape=(img_shape[1], img_shape[2]),
105 |                            device=device,
106 |                            ckpt_path=cfg['gen_model_path'],
107 |                            dataset_type=cfg['dataset_type'],
108 |                            batch_size=cfg['batch_size_2'],
109 |                            n_epochs=cfg['n_epochs_2'])
110 | 
111 |     dist.destroy_process_group()
112 | 
113 | # torchrun --nproc_per_node=4 dldemos/VQVAE/dist_train_pixelcnn.py -c 1
114 | 


--------------------------------------------------------------------------------
/dldemos/VQVAE/model.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | 
 5 | class ResidualBlock(nn.Module):
 6 | 
 7 |     def __init__(self, dim):
 8 |         super().__init__()
 9 |         self.relu = nn.ReLU()
10 |         self.conv1 = nn.Conv2d(dim, dim, 3, 1, 1)
11 |         self.conv2 = nn.Conv2d(dim, dim, 1)
12 | 
13 |     def forward(self, x):
14 |         tmp = self.relu(x)
15 |         tmp = self.conv1(tmp)
16 |         tmp = self.relu(tmp)
17 |         tmp = self.conv2(tmp)
18 |         return x + tmp
19 | 
20 | 
21 | class VQVAE(nn.Module):
22 | 
23 |     def __init__(self, input_dim, dim, n_embedding):
24 |         super().__init__()
25 |         self.encoder = nn.Sequential(nn.Conv2d(input_dim, dim, 4, 2, 1),
26 |                                      nn.ReLU(), nn.Conv2d(dim, dim, 4, 2, 1),
27 |                                      nn.ReLU(), nn.Conv2d(dim, dim, 3, 1, 1),
28 |                                      ResidualBlock(dim), ResidualBlock(dim))
29 |         self.vq_embedding = nn.Embedding(n_embedding, dim)
30 |         self.vq_embedding.weight.data.uniform_(-1.0 / n_embedding,
31 |                                                1.0 / n_embedding)
32 |         self.decoder = nn.Sequential(
33 |             nn.Conv2d(dim, dim, 3, 1, 1),
34 |             ResidualBlock(dim), ResidualBlock(dim),
35 |             nn.ConvTranspose2d(dim, dim, 4, 2, 1), nn.ReLU(),
36 |             nn.ConvTranspose2d(dim, input_dim, 4, 2, 1))
37 |         self.n_downsample = 2
38 | 
39 |     def forward(self, x):
40 |         # encode
41 |         ze = self.encoder(x)
42 | 
43 |         # ze: [N, C, H, W]
44 |         # embedding [K, C]
45 |         embedding = self.vq_embedding.weight.data
46 |         N, C, H, W = ze.shape
47 |         K, _ = embedding.shape
48 |         embedding_broadcast = embedding.reshape(1, K, C, 1, 1)
49 |         ze_broadcast = ze.reshape(N, 1, C, H, W)
50 |         distance = torch.sum((embedding_broadcast - ze_broadcast)**2, 2)
51 |         nearest_neighbor = torch.argmin(distance, 1)
52 |         # make C to the second dim
53 |         zq = self.vq_embedding(nearest_neighbor).permute(0, 3, 1, 2)
54 |         # stop gradient
55 |         decoder_input = ze + (zq - ze).detach()
56 | 
57 |         # decode
58 |         x_hat = self.decoder(decoder_input)
59 |         return x_hat, ze, zq
60 | 
61 |     @torch.no_grad()
62 |     def encode(self, x):
63 |         ze = self.encoder(x)
64 |         embedding = self.vq_embedding.weight.data
65 | 
66 |         # ze: [N, C, H, W]
67 |         # embedding [K, C]
68 |         N, C, H, W = ze.shape
69 |         K, _ = embedding.shape
70 |         embedding_broadcast = embedding.reshape(1, K, C, 1, 1)
71 |         ze_broadcast = ze.reshape(N, 1, C, H, W)
72 |         distance = torch.sum((embedding_broadcast - ze_broadcast)**2, 2)
73 |         nearest_neighbor = torch.argmin(distance, 1)
74 |         return nearest_neighbor
75 | 
76 |     @torch.no_grad()
77 |     def decode(self, discrete_latent):
78 |         zq = self.vq_embedding(discrete_latent).permute(0, 3, 1, 2)
79 |         x_hat = self.decoder(zq)
80 |         return x_hat
81 | 
82 |     # Shape: [C, H, W]
83 |     def get_latent_HW(self, input_shape):
84 |         C, H, W = input_shape
85 |         return (H // 2**self.n_downsample, W // 2**self.n_downsample)
86 | 


--------------------------------------------------------------------------------
/dldemos/VQVAE/pixelcnn_model.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | 
 3 | from dldemos.pixelcnn.model import GatedBlock, GatedPixelCNN
 4 | 
 5 | 
 6 | class PixelCNNWithEmbedding(GatedPixelCNN):
 7 | 
 8 |     def __init__(self, n_blocks, p, linear_dim, bn=True, color_level=256):
 9 |         super().__init__(n_blocks, p, linear_dim, bn, color_level)
10 |         self.embedding = nn.Embedding(color_level, p)
11 |         self.block1 = GatedBlock('A', p, p, bn)
12 | 
13 |     def forward(self, x):
14 |         x = self.embedding(x)
15 |         x = x.permute(0, 3, 1, 2).contiguous()
16 |         return super().forward(x)
17 | 


--------------------------------------------------------------------------------
/dldemos/attention/README.md:
--------------------------------------------------------------------------------
1 | 1. Install `babel`, `faker`.
2 | 
3 | ```shell
4 | pip install babel faker
5 | ```
6 | 
7 | 2. Run `main.py` .
8 | 


--------------------------------------------------------------------------------
/dldemos/attention/dataset.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | 
 3 | from babel.dates import format_date
 4 | from faker import Faker
 5 | 
 6 | faker = Faker()
 7 | format_list = [
 8 |     'short', 'medium', 'long', 'full', 'd MMM YYY', 'd MMMM YYY', 'dd/MM/YYY',
 9 |     'dd-MM-YYY', 'EE d, MMM YYY', 'EEEE d, MMMM YYY'
10 | ]
11 | 
12 | if __name__ == '__main__':
13 |     for format in format_list:
14 |         date_obj = faker.date_object()
15 |         print(f'{format}:', date_obj,
16 |               format_date(date_obj, format=format, locale='en'))
17 | 
18 | 
19 | def generate_date():
20 |     format = random.choice(format_list)
21 |     date_obj = faker.date_object()
22 |     formated_date = format_date(date_obj, format=format, locale='en')
23 |     return formated_date, date_obj
24 | 
25 | 
26 | def generate_date_data(count, filename):
27 |     with open(filename, 'w') as fp:
28 |         for _ in range(count):
29 |             formated_date, date_obj = generate_date()
30 |             fp.write(f'{formated_date}\t{date_obj}\n')
31 | 
32 | 
33 | def load_date_data(filename):
34 |     with open(filename, 'r') as fp:
35 |         lines = fp.readlines()
36 |         return [line.strip('\n').split('\t') for line in lines]
37 | 
38 | 
39 | # generate_date_data(50000, 'dldemos/attention/train.txt')
40 | # generate_date_data(10000, 'dldemos/attention/test.txt')
41 | 


--------------------------------------------------------------------------------
/dldemos/attention/main.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | from torch.nn.utils.rnn import pad_sequence
  4 | from torch.utils.data import DataLoader, Dataset
  5 | 
  6 | from dldemos.attention.dataset import generate_date, load_date_data
  7 | 
  8 | EMBEDDING_LENGTH = 128
  9 | OUTPUT_LENGTH = 10
 10 | 
 11 | 
 12 | def stoi(str):
 13 |     return torch.LongTensor([ord(char) for char in str])
 14 | 
 15 | 
 16 | def itos(arr):
 17 |     return ''.join([chr(x) for x in arr])
 18 | 
 19 | 
 20 | class DateDataset(Dataset):
 21 | 
 22 |     def __init__(self, lines):
 23 |         self.lines = lines
 24 | 
 25 |     def __len__(self):
 26 |         return len(self.lines)
 27 | 
 28 |     def __getitem__(self, index):
 29 |         line = self.lines[index]
 30 | 
 31 |         return stoi(line[0]), stoi(line[1])
 32 | 
 33 | 
 34 | def get_dataloader(filename):
 35 | 
 36 |     def collate_fn(batch):
 37 |         x, y = zip(*batch)
 38 |         x_pad = pad_sequence(x, batch_first=True)
 39 |         y_pad = pad_sequence(y, batch_first=True)
 40 |         return x_pad, y_pad
 41 | 
 42 |     lines = load_date_data(filename)
 43 |     dataset = DateDataset(lines)
 44 |     return DataLoader(dataset, 32, collate_fn=collate_fn)
 45 | 
 46 | 
 47 | class AttentionModel(nn.Module):
 48 | 
 49 |     def __init__(self,
 50 |                  embeding_dim=32,
 51 |                  encoder_dim=32,
 52 |                  decoder_dim=32,
 53 |                  dropout_rate=0.5):
 54 |         super().__init__()
 55 |         self.drop = nn.Dropout(dropout_rate)
 56 |         self.embedding = nn.Embedding(EMBEDDING_LENGTH, embeding_dim)
 57 |         self.attention_linear = nn.Linear(2 * encoder_dim + decoder_dim, 1)
 58 |         self.softmax = nn.Softmax(-1)
 59 |         self.encoder = nn.LSTM(embeding_dim,
 60 |                                encoder_dim,
 61 |                                1,
 62 |                                batch_first=True,
 63 |                                bidirectional=True)
 64 |         self.decoder = nn.LSTM(EMBEDDING_LENGTH + 2 * encoder_dim,
 65 |                                decoder_dim,
 66 |                                1,
 67 |                                batch_first=True)
 68 |         self.output_linear = nn.Linear(decoder_dim, EMBEDDING_LENGTH)
 69 |         self.decoder_dim = decoder_dim
 70 | 
 71 |     def forward(self, x: torch.Tensor, n_output: int = OUTPUT_LENGTH):
 72 |         # x: [batch, n_sequence, EMBEDDING_LENGTH]
 73 |         batch, n_squence = x.shape[0:2]
 74 | 
 75 |         # x: [batch, n_sequence, embeding_dim]
 76 |         x = self.drop(self.embedding(x))
 77 | 
 78 |         # a: [batch, n_sequence, hidden]
 79 |         a, _ = self.encoder(x)
 80 | 
 81 |         # prev_s: [batch, n_squence=1, hidden]
 82 |         # prev_y: [batch, n_squence=1, EMBEDDING_LENGTH]
 83 |         # y: [batch, n_output, EMBEDDING_LENGTH]
 84 |         prev_s = x.new_zeros(batch, 1, self.decoder_dim)
 85 |         prev_y = x.new_zeros(batch, 1, EMBEDDING_LENGTH)
 86 |         y = x.new_empty(batch, n_output, EMBEDDING_LENGTH)
 87 |         tmp_states = None
 88 |         for i_output in range(n_output):
 89 |             # repeat_s: [batch, n_squence, hidden]
 90 |             repeat_s = prev_s.repeat(1, n_squence, 1)
 91 |             # attention_input: [batch * n_sequence, hidden_s + hidden_a]
 92 |             attention_input = torch.cat((repeat_s, a),
 93 |                                         2).reshape(batch * n_squence, -1)
 94 |             # x: [batch * n_sequence, 1]
 95 |             x = self.attention_linear(attention_input)
 96 |             # x: [batch, n_sequence]
 97 |             x = x.reshape(batch, n_squence)
 98 |             alpha = self.softmax(x)
 99 |             c = torch.sum(a * alpha.reshape(batch, n_squence, 1), 1)
100 |             c = c.unsqueeze(1)
101 |             decoder_input = torch.cat((prev_y, c), 2)
102 | 
103 |             if tmp_states is None:
104 |                 prev_s, tmp_states = self.decoder(decoder_input)
105 |             else:
106 |                 prev_s, tmp_states = self.decoder(decoder_input, tmp_states)
107 | 
108 |             prev_y = self.output_linear(prev_s)
109 |             y[:, i_output] = prev_y.squeeze(1)
110 |         return y
111 | 
112 | 
113 | def main():
114 |     device = 'cuda:0'
115 |     train_dataloader = get_dataloader('dldemos/attention/train.txt')
116 |     test_dataloader = get_dataloader('dldemos/attention/test.txt')
117 | 
118 |     model = AttentionModel().to(device)
119 | 
120 |     # Please close or open the codes with #
121 |     # train
122 | 
123 |     optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
124 |     citerion = torch.nn.CrossEntropyLoss()
125 |     for epoch in range(30):
126 |         loss_sum = 0
127 |         dataset_len = len(train_dataloader.dataset)
128 | 
129 |         for x, y in train_dataloader:
130 |             x = x.to(device)
131 |             y = y.to(device)
132 |             hat_y = model(x)
133 |             n, Tx, _ = hat_y.shape
134 |             hat_y = torch.reshape(hat_y, (n * Tx, -1))
135 |             label_y = torch.reshape(y, (n * Tx, ))
136 |             loss = citerion(hat_y, label_y)
137 | 
138 |             optimizer.zero_grad()
139 |             loss.backward()
140 |             torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5)
141 |             optimizer.step()
142 | 
143 |             loss_sum += loss * n
144 | 
145 |         print(f'Epoch {epoch}. loss: {loss_sum / dataset_len}')
146 | 
147 |     torch.save(model.state_dict(), 'dldemos/attention/model.pth')
148 | 
149 |     # test
150 |     model.load_state_dict(torch.load('dldemos/attention/model.pth'))
151 | 
152 |     accuracy = 0
153 |     dataset_len = len(test_dataloader.dataset)
154 | 
155 |     for x, y in test_dataloader:
156 |         x = x.to(device)
157 |         y = y.to(device)
158 |         hat_y = model(x)
159 |         prediction = torch.argmax(hat_y, 2)
160 |         score = torch.where(torch.sum(prediction - y, -1) == 0, 1, 0)
161 |         accuracy += torch.sum(score)
162 | 
163 |     print(f'Accuracy: {accuracy / dataset_len}')
164 | 
165 |     # inference
166 |     for _ in range(5):
167 |         x, y = generate_date()
168 |         origin_x = x
169 |         x = stoi(x).unsqueeze(0).to(device)
170 |         hat_y = model(x)
171 |         hat_y = hat_y.squeeze(0).argmax(1)
172 |         hat_y = itos(hat_y)
173 |         print(f'input: {origin_x}, prediction: {hat_y}, gt: {y}')
174 | 
175 | 
176 | if __name__ == '__main__':
177 |     main()
178 | 


--------------------------------------------------------------------------------
/dldemos/ddim/configs.py:
--------------------------------------------------------------------------------
 1 | mnist_cfg = {
 2 |     'dataset_type': 'MNIST',
 3 |     'img_shape': [1, 28, 28],
 4 |     'model_path': 'dldemos/ddim/mnist.pth',
 5 |     'batch_size': 512,
 6 |     'n_epochs': 50,
 7 |     'channels': [10, 20, 40, 80],
 8 |     'pe_dim': 128
 9 | }
10 | 
11 | # Deprecated config. It's for model in `network_my.py`
12 | celebahq_cfg1 = {
13 |     'dataset_type': 'CelebAHQ',
14 |     'img_shape': [3, 128, 128],
15 |     'model_path': 'dldemos/ddim/celebahq1.pth',
16 |     'batch_size': 64,
17 |     'n_epochs': 1000,
18 |     'channels': [64, 128, 256, 512, 512],
19 |     'pe_dim': 128,
20 |     'with_attn': [False, False, False, True, False]
21 | }
22 | celebahq_cfg2 = {
23 |     'dataset_type': 'CelebAHQ',
24 |     'img_shape': [3, 64, 64],
25 |     'model_path': 'dldemos/ddim/celebahq2.pth',
26 |     'batch_size': 128,
27 |     'n_epochs': 2500,
28 |     'scheduler_cfg': {
29 |         'lr': 5e-4,
30 |         'milestones': [1500, 2100],
31 |         'gamma': 0.1,
32 |     },
33 |     'channels': [128, 256, 512, 512],
34 |     'pe_dim': 128,
35 |     'with_attn': [False, False, True, True],
36 |     'norm_type': 'gn'
37 | }
38 | celebahq_cfg3 = {
39 |     'dataset_type': 'CelebAHQ',
40 |     'img_shape': [3, 128, 128],
41 |     'model_path': 'dldemos/ddim/celebahq3.pth',
42 |     'batch_size': 32,
43 |     'n_epochs': 1500,
44 |     'scheduler_cfg': {
45 |         'lr': 2e-4,
46 |         'milestones': [800, 1300],
47 |         'gamma': 0.1,
48 |     },
49 |     'channels': [128, 256, 256, 512, 512],
50 |     'pe_dim': 128,
51 |     'with_attn': [False, False, False, True, True],
52 |     'norm_type': 'gn'
53 | }
54 | celebahq_cfg4 = {
55 |     'dataset_type': 'CelebAHQ',
56 |     'img_shape': [3, 256, 256],
57 |     'model_path': 'dldemos/ddim/celebahq4.pth',
58 |     'batch_size': 8,
59 |     'n_epochs': 1000,
60 |     'scheduler_cfg': {
61 |         'lr': 2e-5,
62 |         'milestones': [800],
63 |         'gamma': 0.1,
64 |     },
65 |     'channels': [128, 128, 256, 256, 512, 512],
66 |     'pe_dim': 128,
67 |     'with_attn': [False, False, False, False, True, True],
68 |     'norm_type': 'gn'
69 | }
70 | 
71 | configs = [
72 |     mnist_cfg, celebahq_cfg1, celebahq_cfg2, celebahq_cfg3, celebahq_cfg4
73 | ]
74 | 


--------------------------------------------------------------------------------
/dldemos/ddim/dataset.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | import torchvision
  4 | from PIL import Image
  5 | from torch.utils.data import DataLoader, Dataset
  6 | from torch.utils.data.distributed import DistributedSampler
  7 | from torchvision import transforms
  8 | 
  9 | CELEBA_HQ_DIR = 'data/celebA/celeba_hq_256'
 10 | 
 11 | 
 12 | def download_dataset():
 13 |     mnist = torchvision.datasets.MNIST(root='./data/mnist', download=True)
 14 |     print('length of MNIST', len(mnist))
 15 |     id = 4
 16 |     img, label = mnist[id]
 17 |     print(img)
 18 |     print(label)
 19 | 
 20 |     # On computer with monitor
 21 |     # img.show()
 22 | 
 23 |     img.save('work_dirs/tmp.jpg')
 24 |     tensor = transforms.ToTensor()(img)
 25 |     print(tensor.shape)
 26 |     print(tensor.max())
 27 |     print(tensor.min())
 28 | 
 29 | 
 30 | class MNISTImageDataset(Dataset):
 31 | 
 32 |     def __init__(self):
 33 |         super().__init__()
 34 |         self.mnist = torchvision.datasets.MNIST(root='./data/mnist')
 35 | 
 36 |     def __len__(self):
 37 |         return len(self.mnist)
 38 | 
 39 |     def __getitem__(self, index: int):
 40 |         img = self.mnist[index][0]
 41 |         pipeline = transforms.Compose([
 42 |             transforms.ToTensor(),
 43 |             transforms.Lambda(lambda x: (x - 0.5) * 2)
 44 |         ])
 45 |         return pipeline(img)
 46 | 
 47 | 
 48 | class CelebADataset(Dataset):
 49 | 
 50 |     def __init__(self, root, resolution=(64, 64)):
 51 |         super().__init__()
 52 |         self.root = root
 53 |         self.filenames = sorted(os.listdir(root))
 54 |         self.resolution = resolution
 55 | 
 56 |     def __len__(self) -> int:
 57 |         return len(self.filenames)
 58 | 
 59 |     def __getitem__(self, index: int):
 60 |         path = os.path.join(self.root, self.filenames[index])
 61 |         img = Image.open(path)
 62 |         pipeline = transforms.Compose([
 63 |             transforms.Resize(self.resolution),
 64 |             transforms.ToTensor(),
 65 |             transforms.Lambda(lambda x: (x - 0.5) * 2)
 66 |         ])
 67 |         return pipeline(img)
 68 | 
 69 | 
 70 | def get_dataloader(type,
 71 |                    batch_size,
 72 |                    dist_train=False,
 73 |                    num_workers=4,
 74 |                    resolution=None):
 75 |     if type == 'CelebAHQ':
 76 |         if resolution is not None:
 77 |             dataset = CelebADataset(CELEBA_HQ_DIR, resolution)
 78 |         else:
 79 |             dataset = CelebADataset(CELEBA_HQ_DIR)
 80 |     elif type == 'MNIST':
 81 |         dataset = MNISTImageDataset()
 82 |     if dist_train:
 83 |         sampler = DistributedSampler(dataset)
 84 |         dataloader = DataLoader(dataset,
 85 |                                 batch_size=batch_size,
 86 |                                 sampler=sampler,
 87 |                                 num_workers=num_workers)
 88 |         return dataloader, sampler
 89 |     else:
 90 |         dataloader = DataLoader(dataset,
 91 |                                 batch_size=batch_size,
 92 |                                 shuffle=True,
 93 |                                 num_workers=num_workers)
 94 |         return dataloader
 95 | 
 96 | 
 97 | if __name__ == '__main__':
 98 |     os.makedirs('work_dirs', exist_ok=True)
 99 |     download_dataset()
100 | 


--------------------------------------------------------------------------------
/dldemos/ddim/ddim.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from tqdm import tqdm
 3 | 
 4 | from dldemos.ddim.ddpm import DDPM
 5 | 
 6 | 
 7 | class DDIM(DDPM):
 8 | 
 9 |     def __init__(self,
10 |                  device,
11 |                  n_steps: int,
12 |                  min_beta: float = 0.0001,
13 |                  max_beta: float = 0.02):
14 |         super().__init__(device, n_steps, min_beta, max_beta)
15 | 
16 |     def sample_backward(self,
17 |                         img_or_shape,
18 |                         net,
19 |                         device,
20 |                         simple_var=True,
21 |                         ddim_step=20,
22 |                         eta=1):
23 |         if simple_var:
24 |             eta = 1
25 |         ts = torch.linspace(self.n_steps, 0,
26 |                             (ddim_step + 1)).to(device).to(torch.long)
27 |         if isinstance(img_or_shape, torch.Tensor):
28 |             x = img_or_shape
29 |         else:
30 |             x = torch.randn(img_or_shape).to(device)
31 |         batch_size = x.shape[0]
32 |         net = net.to(device)
33 |         for i in tqdm(range(1, ddim_step + 1),
34 |                       f'DDIM sampling with eta {eta} simple_var {simple_var}'):
35 |             cur_t = ts[i - 1] - 1
36 |             prev_t = ts[i] - 1
37 | 
38 |             ab_cur = self.alpha_bars[cur_t]
39 |             ab_prev = self.alpha_bars[prev_t] if prev_t >= 0 else 1
40 | 
41 |             t_tensor = torch.tensor([cur_t] * batch_size,
42 |                                     dtype=torch.long).to(device).unsqueeze(1)
43 |             eps = net(x, t_tensor)
44 |             var = eta * (1 - ab_prev) / (1 - ab_cur) * (1 - ab_cur / ab_prev)
45 |             noise = torch.randn_like(x)
46 | 
47 |             first_term = (ab_prev / ab_cur)**0.5 * x
48 |             second_term = ((1 - ab_prev - var)**0.5 -
49 |                            (ab_prev * (1 - ab_cur) / ab_cur)**0.5) * eps
50 |             if simple_var:
51 |                 third_term = (1 - ab_cur / ab_prev)**0.5 * noise
52 |             else:
53 |                 third_term = var**0.5 * noise
54 |             x = first_term + second_term + third_term
55 | 
56 |         return x
57 | 


--------------------------------------------------------------------------------
/dldemos/ddim/ddpm.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from tqdm import tqdm
 3 | 
 4 | 
 5 | class DDPM():
 6 | 
 7 |     def __init__(self,
 8 |                  device,
 9 |                  n_steps: int,
10 |                  min_beta: float = 0.0001,
11 |                  max_beta: float = 0.02):
12 |         betas = torch.linspace(min_beta, max_beta, n_steps).to(device)
13 |         alphas = 1 - betas
14 |         alpha_bars = torch.empty_like(alphas)
15 |         product = 1
16 |         for i, alpha in enumerate(alphas):
17 |             product *= alpha
18 |             alpha_bars[i] = product
19 |         self.betas = betas
20 |         self.n_steps = n_steps
21 |         self.alphas = alphas
22 |         self.alpha_bars = alpha_bars
23 | 
24 |     def sample_forward(self, x, t, eps=None):
25 |         alpha_bar = self.alpha_bars[t].reshape(-1, 1, 1, 1)
26 |         if eps is None:
27 |             eps = torch.randn_like(x)
28 |         res = eps * torch.sqrt(1 - alpha_bar) + torch.sqrt(alpha_bar) * x
29 |         return res
30 | 
31 |     def sample_backward(self, img_or_shape, net, device, simple_var=True):
32 |         if isinstance(img_or_shape, torch.Tensor):
33 |             x = img_or_shape
34 |         else:
35 |             x = torch.randn(img_or_shape).to(device)
36 |         net = net.to(device)
37 |         for t in tqdm(range(self.n_steps - 1, -1, -1), 'DDPM sampling'):
38 |             x = self.sample_backward_step(x, t, net, simple_var)
39 | 
40 |         return x
41 | 
42 |     def sample_backward_step(self, x_t, t, net, simple_var=True):
43 | 
44 |         n = x_t.shape[0]
45 |         t_tensor = torch.tensor([t] * n,
46 |                                 dtype=torch.long).to(x_t.device).unsqueeze(1)
47 |         eps = net(x_t, t_tensor)
48 | 
49 |         if t == 0:
50 |             noise = 0
51 |         else:
52 |             if simple_var:
53 |                 var = self.betas[t]
54 |             else:
55 |                 var = (1 - self.alpha_bars[t - 1]) / (
56 |                     1 - self.alpha_bars[t]) * self.betas[t]
57 |             noise = torch.randn_like(x_t)
58 |             noise *= torch.sqrt(var)
59 | 
60 |         mean = (x_t -
61 |                 (1 - self.alphas[t]) / torch.sqrt(1 - self.alpha_bars[t]) *
62 |                 eps) / torch.sqrt(self.alphas[t])
63 |         x_t = mean + noise
64 | 
65 |         return x_t
66 | 


--------------------------------------------------------------------------------
/dldemos/ddim/dist_sample.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os
  3 | 
  4 | import cv2
  5 | import einops
  6 | import torch
  7 | import torch.distributed as dist
  8 | from torch.nn.parallel import DistributedDataParallel
  9 | 
 10 | from dldemos.ddim.configs import configs
 11 | from dldemos.ddim.ddim import DDIM
 12 | from dldemos.ddim.ddpm import DDPM
 13 | from dldemos.ddim.network import UNet
 14 | 
 15 | 
 16 | def sample_imgs(ddpm,
 17 |                 net,
 18 |                 output_dir,
 19 |                 img_shape,
 20 |                 n_sample=30000,
 21 |                 device=0,
 22 |                 simple_var=True,
 23 |                 to_bgr=False,
 24 |                 **kwargs):
 25 |     if img_shape[1] >= 256:
 26 |         max_batch_size = 16
 27 |     elif img_shape[1] >= 128:
 28 |         max_batch_size = 64
 29 |     else:
 30 |         max_batch_size = 256
 31 |     n_devices = dist.get_world_size()
 32 | 
 33 |     net = net.to(device)
 34 |     net = net.eval()
 35 | 
 36 |     os.makedirs(output_dir, exist_ok=True)
 37 | 
 38 |     index = 0
 39 |     with torch.no_grad():
 40 |         while index < n_sample:
 41 |             start_index = index + device * max_batch_size
 42 |             end_index = min(n_sample, index + (device + 1) * max_batch_size)
 43 | 
 44 |             local_batch_size = end_index - start_index
 45 |             if local_batch_size > 0:
 46 |                 shape = (local_batch_size, *img_shape)
 47 |                 imgs = ddpm.sample_backward(shape,
 48 |                                             net,
 49 |                                             device=device,
 50 |                                             simple_var=simple_var,
 51 |                                             **kwargs).detach().cpu()
 52 |                 imgs = (imgs + 1) / 2 * 255
 53 |                 imgs = imgs.clamp(0, 255).to(torch.uint8)
 54 | 
 55 |                 img_list = einops.rearrange(imgs, 'n c h w -> n h w c').numpy()
 56 |                 for i, img in enumerate(img_list):
 57 |                     if to_bgr:
 58 |                         img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
 59 |                     cv2.imwrite(f'{output_dir}/{i+start_index}.jpg', img)
 60 | 
 61 |             index += max_batch_size * n_devices
 62 | 
 63 | 
 64 | if __name__ == '__main__':
 65 |     dist.init_process_group('nccl')
 66 | 
 67 |     os.makedirs('work_dirs', exist_ok=True)
 68 | 
 69 |     parser = argparse.ArgumentParser()
 70 |     parser.add_argument('-c', type=int, default=0)
 71 |     args = parser.parse_args()
 72 |     cfg = configs[args.c]
 73 | 
 74 |     n_steps = 1000
 75 |     rank = dist.get_rank()
 76 |     device = rank % torch.cuda.device_count()
 77 |     model_path = cfg['model_path']
 78 |     img_shape = cfg['img_shape']
 79 |     to_bgr = False if cfg['dataset_type'] == 'MNIST' else True
 80 | 
 81 |     net = UNet(n_steps, img_shape, cfg['channels'], cfg['pe_dim'],
 82 |                cfg.get('with_attn', False), cfg.get('norm_type', 'ln'))
 83 |     net.to(device)
 84 |     net = DistributedDataParallel(net, device_ids=[device])
 85 |     ddpm = DDPM(device, n_steps)
 86 | 
 87 |     # Optional: resume
 88 |     map_location = {'cuda:0': f'cuda:{device}'}
 89 |     resume_path = model_path
 90 |     state_dict = torch.load(resume_path, map_location=map_location)
 91 |     net.module.load_state_dict(state_dict)
 92 | 
 93 |     ddim = DDIM(device, n_steps)
 94 |     sample_imgs(ddpm,
 95 |                 net,
 96 |                 'work_dirs/diffusion_ddpm_sigma_hat',
 97 |                 img_shape,
 98 |                 device=device,
 99 |                 to_bgr=to_bgr)
100 |     dist.barrier()
101 |     sample_imgs(ddim,
102 |                 net,
103 |                 'work_dirs/diffusion_ddpm_eta_0',
104 |                 img_shape,
105 |                 device=device,
106 |                 to_bgr=to_bgr,
107 |                 ddim_step=1000,
108 |                 simple_var=False,
109 |                 eta=0)
110 |     dist.barrier()
111 | 
112 |     dist.destroy_process_group()
113 | 
114 | # torchrun --nproc_per_node=8 dldemos/ddim/dist_sample.py -c 2 \
115 | #   > work_dirs/tmp.txt
116 | 


--------------------------------------------------------------------------------
/dldemos/ddim/dist_train.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os
  3 | import time
  4 | 
  5 | import torch
  6 | import torch.distributed as dist
  7 | import torch.nn as nn
  8 | from torch.nn.parallel import DistributedDataParallel
  9 | 
 10 | from dldemos.ddim.configs import configs
 11 | from dldemos.ddim.dataset import get_dataloader
 12 | from dldemos.ddim.ddpm import DDPM
 13 | from dldemos.ddim.network import UNet
 14 | 
 15 | 
 16 | def reduce_sum(tensor):
 17 |     rt = tensor.clone()
 18 |     dist.all_reduce(rt, op=dist.ReduceOp.SUM)
 19 |     return rt
 20 | 
 21 | 
 22 | def train(ddpm: DDPM,
 23 |           net,
 24 |           dataset_type,
 25 |           resolution=None,
 26 |           batch_size=512,
 27 |           n_epochs=50,
 28 |           scheduler_cfg=None,
 29 |           device='cuda',
 30 |           ckpt_path='dldemos/ddpm/model.pth'):
 31 | 
 32 |     n_steps = ddpm.n_steps
 33 |     dataloader, sampler = get_dataloader(dataset_type,
 34 |                                          batch_size,
 35 |                                          True,
 36 |                                          resolution=resolution)
 37 |     if device == 0:
 38 |         print('batch size: ', batch_size * dist.get_world_size())
 39 |         print('batch size per device: ', batch_size)
 40 | 
 41 |     net = net.to(device)
 42 |     loss_fn = nn.MSELoss()
 43 | 
 44 |     if scheduler_cfg is not None:
 45 |         optimizer = torch.optim.Adam(net.parameters(), scheduler_cfg['lr'])
 46 |         scheduler = torch.optim.lr_scheduler.MultiStepLR(
 47 |             optimizer, scheduler_cfg['milestones'], scheduler_cfg['gamma'])
 48 |     else:
 49 |         optimizer = torch.optim.Adam(net.parameters(), 2e-4)
 50 |         scheduler = None
 51 | 
 52 |     tic = time.time()
 53 |     for e in range(n_epochs):
 54 |         total_loss = 0
 55 |         sampler.set_epoch(e)
 56 |         for x in dataloader:
 57 |             current_batch_size = x.shape[0]
 58 |             x = x.to(device)
 59 |             t = torch.randint(0, n_steps, (current_batch_size, )).to(device)
 60 |             eps = torch.randn_like(x).to(device)
 61 |             x_t = ddpm.sample_forward(x, t, eps)
 62 |             eps_theta = net(x_t, t.reshape(current_batch_size, 1))
 63 |             loss = loss_fn(eps_theta, eps)
 64 |             optimizer.zero_grad()
 65 |             loss.backward()
 66 |             optimizer.step()
 67 |             loss = reduce_sum(loss)
 68 |             total_loss += loss.item() * current_batch_size
 69 |             if scheduler is not None:
 70 |                 scheduler.step()
 71 |         total_loss /= len(dataloader.dataset)
 72 |         toc = time.time()
 73 |         if device == 0:
 74 |             torch.save(net.module.state_dict(), ckpt_path)
 75 |             print(f'epoch {e} loss: {total_loss} elapsed {(toc - tic):.2f}s')
 76 |         dist.barrier()
 77 | 
 78 |     if device == 0:
 79 |         print('Done')
 80 | 
 81 | 
 82 | if __name__ == '__main__':
 83 |     dist.init_process_group('nccl')
 84 | 
 85 |     os.makedirs('work_dirs', exist_ok=True)
 86 | 
 87 |     parser = argparse.ArgumentParser()
 88 |     parser.add_argument('-c', type=int, default=0)
 89 |     args = parser.parse_args()
 90 |     cfg = configs[args.c]
 91 | 
 92 |     n_steps = 1000
 93 |     rank = dist.get_rank()
 94 |     device = rank % torch.cuda.device_count()
 95 |     model_path = cfg['model_path']
 96 |     img_shape = cfg['img_shape']
 97 |     to_bgr = False if cfg['dataset_type'] == 'MNIST' else True
 98 | 
 99 |     net = UNet(n_steps, img_shape, cfg['channels'], cfg['pe_dim'],
100 |                cfg.get('with_attn', False), cfg.get('norm_type', 'ln'))
101 |     net.to(device)
102 |     net = DistributedDataParallel(net, device_ids=[device])
103 |     ddpm = DDPM(device, n_steps)
104 | 
105 |     # Optional: resume
106 |     # map_location = {'cuda:0': f'cuda:{device}'}
107 |     # resume_path = model_path
108 |     # state_dict = torch.load(resume_path, map_location=map_location)
109 |     # net.module.load_state_dict(state_dict)
110 | 
111 |     train(ddpm,
112 |           net,
113 |           cfg['dataset_type'],
114 |           resolution=(img_shape[1], img_shape[2]),
115 |           batch_size=cfg['batch_size'],
116 |           n_epochs=cfg['n_epochs'],
117 |           scheduler_cfg=cfg.get('scheduler_cfg', None),
118 |           device=device,
119 |           ckpt_path=model_path)
120 | 
121 |     dist.destroy_process_group()
122 | 
123 | # torchrun --nproc_per_node=8 dldemos/ddim/dist_train.py -c 1
124 | 


--------------------------------------------------------------------------------
/dldemos/ddpm/dataset.py:
--------------------------------------------------------------------------------
 1 | import torchvision
 2 | from torch.utils.data import DataLoader
 3 | from torchvision.transforms import Compose, Lambda, ToTensor
 4 | 
 5 | 
 6 | def download_dataset():
 7 |     mnist = torchvision.datasets.MNIST(root='./data/mnist', download=True)
 8 |     print('length of MNIST', len(mnist))
 9 |     id = 4
10 |     img, label = mnist[id]
11 |     print(img)
12 |     print(label)
13 | 
14 |     # On computer with monitor
15 |     # img.show()
16 | 
17 |     img.save('work_dirs/tmp.jpg')
18 |     tensor = ToTensor()(img)
19 |     print(tensor.shape)
20 |     print(tensor.max())
21 |     print(tensor.min())
22 | 
23 | 
24 | def get_dataloader(batch_size: int):
25 |     transform = Compose([ToTensor(), Lambda(lambda x: (x - 0.5) * 2)])
26 |     dataset = torchvision.datasets.MNIST(root='./data/mnist',
27 |                                          transform=transform)
28 |     return DataLoader(dataset, batch_size=batch_size, shuffle=True)
29 | 
30 | 
31 | def get_img_shape():
32 |     return (1, 28, 28)
33 | 
34 | 
35 | if __name__ == '__main__':
36 |     import os
37 |     os.makedirs('work_dirs', exist_ok=True)
38 |     download_dataset()
39 | 


--------------------------------------------------------------------------------
/dldemos/ddpm/ddpm.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | 
  3 | 
  4 | class DDPM():
  5 | 
  6 |     def __init__(self,
  7 |                  device,
  8 |                  n_steps: int,
  9 |                  min_beta: float = 0.0001,
 10 |                  max_beta: float = 0.02):
 11 |         betas = torch.linspace(min_beta, max_beta, n_steps).to(device)
 12 |         alphas = 1 - betas
 13 |         alpha_bars = torch.empty_like(alphas)
 14 |         product = 1
 15 |         for i, alpha in enumerate(alphas):
 16 |             product *= alpha
 17 |             alpha_bars[i] = product
 18 |         self.betas = betas
 19 |         self.n_steps = n_steps
 20 |         self.alphas = alphas
 21 |         self.alpha_bars = alpha_bars
 22 |         alpha_prev = torch.empty_like(alpha_bars)
 23 |         alpha_prev[1:] = alpha_bars[0:n_steps - 1]
 24 |         alpha_prev[0] = 1
 25 |         self.coef1 = torch.sqrt(alphas) * (1 - alpha_prev) / (1 - alpha_bars)
 26 |         self.coef2 = torch.sqrt(alpha_prev) * self.betas / (1 - alpha_bars)
 27 | 
 28 |     def sample_forward(self, x, t, eps=None):
 29 |         alpha_bar = self.alpha_bars[t].reshape(-1, 1, 1, 1)
 30 |         if eps is None:
 31 |             eps = torch.randn_like(x)
 32 |         res = eps * torch.sqrt(1 - alpha_bar) + torch.sqrt(alpha_bar) * x
 33 |         return res
 34 | 
 35 |     def sample_backward(self,
 36 |                         img_shape,
 37 |                         net,
 38 |                         device,
 39 |                         simple_var=True,
 40 |                         clip_x0=True):
 41 |         x = torch.randn(img_shape).to(device)
 42 |         net = net.to(device)
 43 |         for t in range(self.n_steps - 1, -1, -1):
 44 |             x = self.sample_backward_step(x, t, net, simple_var, clip_x0)
 45 |         return x
 46 | 
 47 |     def sample_backward_step(self, x_t, t, net, simple_var=True, clip_x0=True):
 48 | 
 49 |         n = x_t.shape[0]
 50 |         t_tensor = torch.tensor([t] * n,
 51 |                                 dtype=torch.long).to(x_t.device).unsqueeze(1)
 52 |         eps = net(x_t, t_tensor)
 53 | 
 54 |         if t == 0:
 55 |             noise = 0
 56 |         else:
 57 |             if simple_var:
 58 |                 var = self.betas[t]
 59 |             else:
 60 |                 var = (1 - self.alpha_bars[t - 1]) / (
 61 |                     1 - self.alpha_bars[t]) * self.betas[t]
 62 |             noise = torch.randn_like(x_t)
 63 |             noise *= torch.sqrt(var)
 64 | 
 65 |         if clip_x0:
 66 |             x_0 = (x_t - torch.sqrt(1 - self.alpha_bars[t]) *
 67 |                    eps) / torch.sqrt(self.alpha_bars[t])
 68 |             x_0 = torch.clip(x_0, -1, 1)
 69 |             mean = self.coef1[t] * x_t + self.coef2[t] * x_0
 70 |         else:
 71 |             mean = (x_t -
 72 |                     (1 - self.alphas[t]) / torch.sqrt(1 - self.alpha_bars[t]) *
 73 |                     eps) / torch.sqrt(self.alphas[t])
 74 |         x_t = mean + noise
 75 | 
 76 |         return x_t
 77 | 
 78 | 
 79 | def visualize_forward():
 80 |     import cv2
 81 |     import einops
 82 |     import numpy as np
 83 | 
 84 |     from dldemos.ddpm.dataset import get_dataloader
 85 | 
 86 |     n_steps = 100
 87 |     device = 'cuda'
 88 |     dataloader = get_dataloader(5)
 89 |     x, _ = next(iter(dataloader))
 90 |     x = x.to(device)
 91 | 
 92 |     ddpm = DDPM(device, n_steps)
 93 |     xts = []
 94 |     percents = torch.linspace(0, 0.99, 10)
 95 |     for percent in percents:
 96 |         t = torch.tensor([int(n_steps * percent)])
 97 |         t = t.unsqueeze(1)
 98 |         x_t = ddpm.sample_forward(x, t)
 99 |         xts.append(x_t)
100 |     res = torch.stack(xts, 0)
101 |     res = einops.rearrange(res, 'n1 n2 c h w -> (n2 h) (n1 w) c')
102 |     res = (res.clip(-1, 1) + 1) / 2 * 255
103 |     res = res.cpu().numpy().astype(np.uint8)
104 | 
105 |     cv2.imwrite('work_dirs/diffusion_forward.jpg', res)
106 | 
107 | 
108 | def main():
109 |     visualize_forward()
110 | 
111 | 
112 | if __name__ == '__main__':
113 |     main()
114 | 


--------------------------------------------------------------------------------
/dldemos/ddpm/ddpm_simple.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | class DDPM():
 5 | 
 6 |     def __init__(self,
 7 |                  device,
 8 |                  n_steps: int,
 9 |                  min_beta: float = 0.0001,
10 |                  max_beta: float = 0.02):
11 |         betas = torch.linspace(min_beta, max_beta, n_steps).to(device)
12 |         alphas = 1 - betas
13 |         alpha_bars = torch.empty_like(alphas)
14 |         product = 1
15 |         for i, alpha in enumerate(alphas):
16 |             product *= alpha
17 |             alpha_bars[i] = product
18 |         self.betas = betas
19 |         self.n_steps = n_steps
20 |         self.alphas = alphas
21 |         self.alpha_bars = alpha_bars
22 | 
23 |     def sample_forward(self, x, t, eps=None):
24 |         alpha_bar = self.alpha_bars[t].reshape(-1, 1, 1, 1)
25 |         if eps is None:
26 |             eps = torch.randn_like(x)
27 |         res = eps * torch.sqrt(1 - alpha_bar) + torch.sqrt(alpha_bar) * x
28 |         return res
29 | 
30 |     def sample_backward(self, img_shape, net, device, simple_var=True):
31 |         x = torch.randn(img_shape).to(device)
32 |         net = net.to(device)
33 |         for t in range(self.n_steps - 1, -1, -1):
34 |             x = self.sample_backward_step(x, t, net, simple_var)
35 |         return x
36 | 
37 |     def sample_backward_step(self, x_t, t, net, simple_var=True):
38 | 
39 |         n = x_t.shape[0]
40 |         t_tensor = torch.tensor([t] * n,
41 |                                 dtype=torch.long).to(x_t.device).unsqueeze(1)
42 |         eps = net(x_t, t_tensor)
43 | 
44 |         if t == 0:
45 |             noise = 0
46 |         else:
47 |             if simple_var:
48 |                 var = self.betas[t]
49 |             else:
50 |                 var = (1 - self.alpha_bars[t - 1]) / (
51 |                     1 - self.alpha_bars[t]) * self.betas[t]
52 |             noise = torch.randn_like(x_t)
53 |             noise *= torch.sqrt(var)
54 | 
55 |         mean = (x_t -
56 |                 (1 - self.alphas[t]) / torch.sqrt(1 - self.alpha_bars[t]) *
57 |                 eps) / torch.sqrt(self.alphas[t])
58 |         x_t = mean + noise
59 | 
60 |         return x_t
61 | 


--------------------------------------------------------------------------------
/dldemos/ddpm/main.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import time
 3 | 
 4 | import cv2
 5 | import einops
 6 | import numpy as np
 7 | import torch
 8 | import torch.nn as nn
 9 | 
10 | from dldemos.ddpm.dataset import get_dataloader, get_img_shape
11 | from dldemos.ddpm.ddpm_simple import DDPM
12 | from dldemos.ddpm.network import (build_network, convnet_big_cfg,
13 |                                   convnet_medium_cfg, convnet_small_cfg,
14 |                                   unet_1_cfg, unet_res_cfg)
15 | 
16 | batch_size = 512
17 | n_epochs = 100
18 | 
19 | 
20 | def train(ddpm: DDPM, net, device='cuda', ckpt_path='dldemos/ddpm/model.pth'):
21 |     print('batch size:', batch_size)
22 |     n_steps = ddpm.n_steps
23 |     dataloader = get_dataloader(batch_size)
24 |     net = net.to(device)
25 |     loss_fn = nn.MSELoss()
26 |     optimizer = torch.optim.Adam(net.parameters(), 1e-3)
27 | 
28 |     tic = time.time()
29 |     for e in range(n_epochs):
30 |         total_loss = 0
31 | 
32 |         for x, _ in dataloader:
33 |             current_batch_size = x.shape[0]
34 |             x = x.to(device)
35 |             t = torch.randint(0, n_steps, (current_batch_size, )).to(device)
36 |             eps = torch.randn_like(x).to(device)
37 |             x_t = ddpm.sample_forward(x, t, eps)
38 |             eps_theta = net(x_t, t.reshape(current_batch_size, 1))
39 |             loss = loss_fn(eps_theta, eps)
40 |             optimizer.zero_grad()
41 |             loss.backward()
42 |             optimizer.step()
43 |             total_loss += loss.item() * current_batch_size
44 |         total_loss /= len(dataloader.dataset)
45 |         toc = time.time()
46 |         torch.save(net.state_dict(), ckpt_path)
47 |         print(f'epoch {e} loss: {total_loss} elapsed {(toc - tic):.2f}s')
48 |     print('Done')
49 | 
50 | 
51 | def sample_imgs(ddpm,
52 |                 net,
53 |                 output_path,
54 |                 n_sample=81,
55 |                 device='cuda',
56 |                 simple_var=True):
57 |     net = net.to(device)
58 |     net = net.eval()
59 |     with torch.no_grad():
60 |         shape = (n_sample, *get_img_shape())  # 1, 3, 28, 28
61 |         imgs = ddpm.sample_backward(shape,
62 |                                     net,
63 |                                     device=device,
64 |                                     simple_var=simple_var).detach().cpu()
65 |         imgs = (imgs + 1) / 2 * 255
66 |         imgs = imgs.clamp(0, 255)
67 |         imgs = einops.rearrange(imgs,
68 |                                 '(b1 b2) c h w -> (b1 h) (b2 w) c',
69 |                                 b1=int(n_sample**0.5))
70 | 
71 |         imgs = imgs.numpy().astype(np.uint8)
72 | 
73 |         cv2.imwrite(output_path, imgs)
74 | 
75 | 
76 | configs = [
77 |     convnet_small_cfg, convnet_medium_cfg, convnet_big_cfg, unet_1_cfg,
78 |     unet_res_cfg
79 | ]
80 | 
81 | if __name__ == '__main__':
82 |     os.makedirs('work_dirs', exist_ok=True)
83 | 
84 |     n_steps = 1000
85 |     config_id = 4
86 |     device = 'cuda'
87 |     model_path = 'dldemos/ddpm/model_unet_res.pth'
88 | 
89 |     config = configs[config_id]
90 |     net = build_network(config, n_steps)
91 |     ddpm = DDPM(device, n_steps)
92 | 
93 |     train(ddpm, net, device=device, ckpt_path=model_path)
94 | 
95 |     net.load_state_dict(torch.load(model_path))
96 |     sample_imgs(ddpm, net, 'work_dirs/diffusion.jpg', device=device)
97 | 


--------------------------------------------------------------------------------
/dldemos/lmdb_loader.py:
--------------------------------------------------------------------------------
  1 | # Modify from
  2 | # https://github.com/xunge/pytorch_lmdb_imagenet/blob/master/folder2lmdb.py
  3 | 
  4 | import os
  5 | import os.path as osp
  6 | import pickle
  7 | 
  8 | import lmdb
  9 | import six
 10 | from PIL import Image
 11 | from torch.utils.data import DataLoader, Dataset
 12 | 
 13 | 
 14 | def raw_reader(path):
 15 |     with open(path, 'rb') as f:
 16 |         bin_data = f.read()
 17 |     return bin_data
 18 | 
 19 | 
 20 | def dumps_data(obj):
 21 |     """Serialize an object.
 22 | 
 23 |     Returns:
 24 |         Implementation-dependent bytes-like object
 25 |     """
 26 |     return pickle.dumps(obj)
 27 | 
 28 | 
 29 | class MyImageFolder(Dataset):
 30 | 
 31 |     def __init__(self, root):
 32 |         super().__init__()
 33 |         self.root = root
 34 |         self.filenames = sorted(os.listdir(root))
 35 | 
 36 |     def __len__(self) -> int:
 37 |         return len(self.filenames)
 38 | 
 39 |     def __getitem__(self, index: int):
 40 |         path = os.path.join(self.root, self.filenames[index])
 41 |         return raw_reader(path)
 42 | 
 43 | 
 44 | def folder2lmdb(img_dir, output_path, write_frequency=5000):
 45 |     directory = img_dir
 46 |     print('Loading dataset from %s' % directory)
 47 |     dataset = MyImageFolder(directory)
 48 |     data_loader = DataLoader(dataset, num_workers=16, collate_fn=lambda x: x)
 49 | 
 50 |     lmdb_path = output_path
 51 |     isdir = os.path.isdir(lmdb_path)
 52 | 
 53 |     print('Generate LMDB to %s' % lmdb_path)
 54 |     db = lmdb.open(lmdb_path,
 55 |                    subdir=isdir,
 56 |                    map_size=1099511627776 * 2,
 57 |                    readonly=False,
 58 |                    meminit=False,
 59 |                    map_async=True)
 60 | 
 61 |     txn = db.begin(write=True)
 62 |     for idx, data in enumerate(data_loader):
 63 |         image = data[0]
 64 | 
 65 |         txn.put(u'{}'.format(idx).encode('ascii'), dumps_data(image))
 66 |         if idx % write_frequency == 0:
 67 |             print('[%d/%d]' % (idx, len(data_loader)))
 68 |             txn.commit()
 69 |             txn = db.begin(write=True)
 70 | 
 71 |     # finish iterating through dataset
 72 |     txn.commit()
 73 |     keys = [u'{}'.format(k).encode('ascii') for k in range(idx + 1)]
 74 |     with db.begin(write=True) as txn:
 75 |         txn.put(b'__keys__', dumps_data(keys))
 76 |         txn.put(b'__len__', dumps_data(len(keys)))
 77 | 
 78 |     print('Flushing database ...')
 79 |     db.sync()
 80 |     db.close()
 81 | 
 82 | 
 83 | def loads_data(buf):
 84 |     """
 85 |     Args:
 86 |         buf: the output of `dumps`.
 87 |     """
 88 |     return pickle.loads(buf)
 89 | 
 90 | 
 91 | class ImageFolderLMDB(Dataset):
 92 | 
 93 |     def __init__(self, db_path, transform=None):
 94 |         self.db_path = db_path
 95 |         self.env = lmdb.open(db_path,
 96 |                              subdir=osp.isdir(db_path),
 97 |                              readonly=True,
 98 |                              lock=False,
 99 |                              readahead=False,
100 |                              meminit=False)
101 |         with self.env.begin(write=False) as txn:
102 |             self.length = loads_data(txn.get(b'__len__'))
103 |             self.keys = loads_data(txn.get(b'__keys__'))
104 | 
105 |         self.transform = transform
106 | 
107 |     def __getitem__(self, index):
108 |         env = self.env
109 |         with env.begin(write=False) as txn:
110 |             byteflow = txn.get(self.keys[index])
111 | 
112 |         unpacked = loads_data(byteflow)
113 | 
114 |         # load img
115 |         imgbuf = unpacked
116 |         buf = six.BytesIO()
117 |         buf.write(imgbuf)
118 |         buf.seek(0)
119 |         img = Image.open(buf).convert('RGB')
120 | 
121 |         if self.transform is not None:
122 |             img = self.transform(img)
123 | 
124 |         return img
125 | 
126 |     def __len__(self):
127 |         return self.length
128 | 
129 |     def __repr__(self):
130 |         return self.__class__.__name__ + ' (' + self.db_path + ')'
131 | 


--------------------------------------------------------------------------------
/dldemos/nms/bboxes.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SingleZombie/DL-Demos/739a21ff90f411c318e098823581afb3f8a1d010/dldemos/nms/bboxes.pt


--------------------------------------------------------------------------------
/dldemos/nms/iou.py:
--------------------------------------------------------------------------------
 1 | from typing import Tuple
 2 | 
 3 | from PIL import Image, ImageDraw, ImageFont
 4 | 
 5 | 
 6 | def box_intersection(
 7 |         b1: Tuple[int, int, int, int],
 8 |         b2: Tuple[int, int, int, int]) -> Tuple[int, int, int, int]:
 9 |     x11, y11, x12, y12 = b1
10 |     x21, y21, x22, y22 = b2
11 | 
12 |     xl = max(x11, x21)
13 |     xr = min(x12, x22)
14 |     yt = max(y11, y21)
15 |     yb = min(y12, y22)
16 |     return (xl, yt, xr, yb)
17 | 
18 | 
19 | def area(box: Tuple[int, int, int, int]) -> float:
20 |     x1, y1, x2, y2 = box
21 |     width = max(x2 - x1, 0)
22 |     height = max(y2 - y1, 0)
23 |     return width * height
24 | 
25 | 
26 | def iou(b1: Tuple[int, int, int, int], b2: Tuple[int, int, int, int]) -> float:
27 |     intersection = box_intersection(b1, b2)
28 |     inter_area = area(intersection)
29 |     union_area = area(b1) + area(b2) - inter_area
30 |     return inter_area / union_area
31 | 
32 | 
33 | def main():
34 |     img0 = Image.new('RGB', (400, 200), 'white')
35 |     imgs = []
36 |     durations = [200]
37 |     img = img0.copy()
38 |     image_darw = ImageDraw.Draw(img)
39 |     bbox1 = (70, 70, 160, 150)
40 |     bbox2 = (40, 60, 140, 130)
41 |     text_x = 170
42 |     text_y = 30
43 |     font = ImageFont.truetype(
44 |         'times.ttf',
45 |         16,
46 |     )
47 | 
48 |     def draw_line_of_text(text: str):
49 |         nonlocal text_y, image_darw
50 |         tw, th = font.getsize(text)
51 |         image_darw.text((text_x, text_y), text, 'black')
52 |         text_y += th
53 | 
54 |     image_darw.rectangle(bbox1, outline='orange', width=2)
55 |     imgs.append(img.copy())
56 |     durations.append(500)
57 |     image_darw.rectangle(bbox2, outline='purple', width=2)
58 |     imgs.append(img.copy())
59 |     durations.append(500)
60 | 
61 |     image_darw.rectangle(bbox1, outline='orange', fill='orange', width=2)
62 |     draw_line_of_text(f'a1 = {area(bbox1)}')
63 |     imgs.append(img.copy())
64 |     durations.append(800)
65 | 
66 |     image_darw.rectangle(bbox2, outline='purple', fill='purple', width=2)
67 |     draw_line_of_text(f'a2 = {area(bbox2)}')
68 |     imgs.append(img.copy())
69 |     durations.append(800)
70 | 
71 |     ibox = box_intersection(bbox1, bbox2)
72 |     image_darw.rectangle(ibox, outline='red', fill='red', width=2)
73 |     draw_line_of_text(f'i = {area(ibox)}')
74 |     imgs.append(img.copy())
75 |     durations.append(1000)
76 | 
77 |     image_darw.rectangle(bbox1, outline='green', fill='green', width=2)
78 |     image_darw.rectangle(bbox2, outline='green', fill='green', width=2)
79 |     draw_line_of_text(
80 |         f'u = a1 + a2 - i = {area(bbox1) + area(bbox2) - area(ibox)}')
81 |     imgs.append(img.copy())
82 |     durations.append(1500)
83 | 
84 |     image_darw.rectangle(ibox, outline='red', fill='red', width=2)
85 |     draw_line_of_text(f'iou = i / o = {iou(bbox1, bbox2)}')
86 |     imgs.append(img.copy())
87 |     durations.append(2000)
88 | 
89 |     img0.save('work_dirs/NMS/1.gif',
90 |               save_all=True,
91 |               append_images=imgs,
92 |               duration=durations,
93 |               loop=0)
94 | 
95 | 
96 | if __name__ == '__main__':
97 |     main()
98 | 


--------------------------------------------------------------------------------
/dldemos/nms/show_bbox.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional, Tuple
 2 | 
 3 | from PIL import Image, ImageDraw, ImageFont
 4 | 
 5 | 
 6 | def draw_bbox(img: Image.Image,
 7 |               bbox: Tuple[float, float, float, float],
 8 |               prob: float,
 9 |               rect_color: Tuple[int, int, int] = (255, 0, 0),
10 |               text: Optional[str] = None,
11 |               better_font: Optional[str] = None):
12 |     img_draw = ImageDraw.Draw(img, 'RGBA')
13 |     x1, y1, x2, y2 = bbox
14 |     if better_font is not None:
15 |         font = ImageFont.truetype(
16 |             better_font,
17 |             12,
18 |         )
19 |     else:
20 |         font = ImageFont.load_default()
21 | 
22 |     img_draw.rectangle((x1 - 2, y1 - 2, x2 + 2, y2 + 2),
23 |                        outline=rect_color,
24 |                        width=2)
25 | 
26 |     # Show class label on the top right corner
27 |     if text is not None:
28 |         tw, th = font.getsize(text)
29 |         img_draw.rectangle((x2 - tw, y1, x2, y1 + th), fill='black')
30 |         img_draw.text((x2 - tw, y1), text, font=font, anchor='rt')
31 | 
32 |     # Show probablity of top left corner
33 |     tw, th = font.getsize(f'{prob:.2f}')
34 |     img_draw.rectangle((x1, y1, x1 + tw, y1 + th), fill='black')
35 |     img_draw.text((x1, y1), f'{prob:.2f}', font=font)
36 | 
37 | 
38 | def main():
39 |     img = Image.open('work_dirs/detection_demo.jpg')
40 |     draw_bbox(img, (191, 105, 294, 157), 0.95)
41 |     draw_bbox(img, (168, 111, 280, 150), 0.8)
42 |     draw_bbox(img, (218, 113, 284, 159), 0.7)
43 |     draw_bbox(img, (193, 140, 231, 153), 0.3)
44 | 
45 |     draw_bbox(img, (323, 112, 380, 145), 0.7)
46 |     draw_bbox(img, (305, 107, 364, 134), 0.8)
47 |     draw_bbox(img, (294, 114, 376, 151), 0.9)
48 |     draw_bbox(img, (319, 138, 358, 155), 0.3)
49 |     img.save('work_dirs/NMS/1.jpg')
50 | 
51 | 
52 | if __name__ == '__main__':
53 |     main()
54 | 


--------------------------------------------------------------------------------
/dldemos/pixelcnn/dataset.py:
--------------------------------------------------------------------------------
 1 | import torchvision
 2 | from torch.utils.data import DataLoader
 3 | from torchvision.transforms import ToTensor
 4 | 
 5 | 
 6 | def download_dataset():
 7 |     mnist = torchvision.datasets.MNIST(root='./data/mnist', download=True)
 8 |     print('length of MNIST', len(mnist))
 9 |     id = 4
10 |     img, label = mnist[id]
11 |     print(img)
12 |     print(label)
13 | 
14 |     # On computer with monitor
15 |     # img.show()
16 | 
17 |     img.save('work_dirs/tmp.jpg')
18 |     tensor = ToTensor()(img)
19 |     print(tensor.shape)
20 |     print(tensor.max())
21 |     print(tensor.min())
22 | 
23 | 
24 | def get_dataloader(batch_size: int):
25 |     dataset = torchvision.datasets.MNIST(root='./data/mnist',
26 |                                          transform=ToTensor())
27 |     return DataLoader(dataset, batch_size=batch_size, shuffle=True)
28 | 
29 | 
30 | def get_img_shape():
31 |     return (1, 28, 28)
32 | 
33 | 
34 | if __name__ == '__main__':
35 |     import os
36 |     os.makedirs('work_dirs', exist_ok=True)
37 |     download_dataset()
38 | 


--------------------------------------------------------------------------------
/dldemos/pixelcnn/main.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import time
 3 | 
 4 | import cv2
 5 | import einops
 6 | import numpy as np
 7 | import torch
 8 | import torch.nn as nn
 9 | import torch.nn.functional as F
10 | 
11 | from dldemos.pixelcnn.dataset import get_dataloader, get_img_shape
12 | from dldemos.pixelcnn.model import GatedPixelCNN, PixelCNN
13 | 
14 | batch_size = 128
15 | # You can set color_level to any value between 2 and 256
16 | color_level = 8
17 | 
18 | 
19 | def train(model, device, model_path):
20 |     dataloader = get_dataloader(batch_size)
21 |     model = model.to(device)
22 |     optimizer = torch.optim.Adam(model.parameters(), 1e-3)
23 |     loss_fn = nn.CrossEntropyLoss()
24 |     n_epochs = 40
25 |     tic = time.time()
26 |     for e in range(n_epochs):
27 |         total_loss = 0
28 |         for x, _ in dataloader:
29 |             current_batch_size = x.shape[0]
30 |             x = x.to(device)
31 |             y = torch.ceil(x * (color_level - 1)).long()
32 |             y = y.squeeze(1)
33 |             predict_y = model(x)
34 |             loss = loss_fn(predict_y, y)
35 |             optimizer.zero_grad()
36 |             loss.backward()
37 |             optimizer.step()
38 |             total_loss += loss.item() * current_batch_size
39 |         total_loss /= len(dataloader.dataset)
40 |         toc = time.time()
41 |         torch.save(model.state_dict(), model_path)
42 |         print(f'epoch {e} loss: {total_loss} elapsed {(toc - tic):.2f}s')
43 |     print('Done')
44 | 
45 | 
46 | def sample(model, device, model_path, output_path, n_sample=81):
47 | 
48 |     model.eval()
49 |     model.load_state_dict(torch.load(model_path))
50 |     model = model.to(device)
51 |     C, H, W = get_img_shape()  # (1, 28, 28)
52 |     x = torch.zeros((n_sample, C, H, W)).to(device)
53 |     with torch.no_grad():
54 |         for i in range(H):
55 |             for j in range(W):
56 |                 output = model(x)
57 |                 prob_dist = F.softmax(output[:, :, i, j], -1)
58 |                 pixel = torch.multinomial(prob_dist,
59 |                                           1).float() / (color_level - 1)
60 |                 x[:, :, i, j] = pixel
61 | 
62 |     imgs = x * 255
63 |     imgs = imgs.clamp(0, 255)
64 |     imgs = einops.rearrange(imgs,
65 |                             '(b1 b2) c h w -> (b1 h) (b2 w) c',
66 |                             b1=int(n_sample**0.5))
67 | 
68 |     imgs = imgs.detach().cpu().numpy().astype(np.uint8)
69 | 
70 |     cv2.imwrite(output_path, imgs)
71 | 
72 | 
73 | models = [
74 |     PixelCNN(15, 128, 32, True, color_level),
75 |     GatedPixelCNN(15, 128, 32, True, color_level)
76 | ]
77 | 
78 | if __name__ == '__main__':
79 |     os.makedirs('work_dirs', exist_ok=True)
80 |     model_id = 1
81 |     model = models[model_id]
82 |     device = 'cuda'
83 |     model_path = f'dldemos/pixelcnn/model_{model_id}_{color_level}.pth'
84 |     train(model, device, model_path)
85 |     sample(model, device, model_path,
86 |            f'work_dirs/pixelcnn_{model_id}_{color_level}.jpg')
87 | 


--------------------------------------------------------------------------------
/dldemos/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .function import (get_activation_de_func, get_activation_func, relu,
2 |                        relu_de, sigmoid)
3 | 
4 | __all__ = [
5 |     'sigmoid', 'relu', 'relu_de', 'get_activation_de_func',
6 |     'get_activation_func'
7 | ]
8 | 


--------------------------------------------------------------------------------
/dldemos/utils/function.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def sigmoid(x):
 5 |     return 1 / (1 + np.exp(-x))
 6 | 
 7 | 
 8 | def sigmoid_de(x):
 9 |     tmp = sigmoid(x)
10 |     return tmp * (1 - tmp)
11 | 
12 | 
13 | def relu(x):
14 |     return np.maximum(x, 0)
15 | 
16 | 
17 | def relu_de(x):
18 |     return np.where(x > 0, 1, 0)
19 | 
20 | 
21 | def get_activation_func(name):
22 |     if name == 'sigmoid':
23 |         return sigmoid
24 |     elif name == 'relu':
25 |         return relu
26 |     else:
27 |         raise KeyError(f'No such activavtion function {name}')
28 | 
29 | 
30 | def get_activation_de_func(name):
31 |     if name == 'sigmoid':
32 |         return sigmoid_de
33 |     elif name == 'relu':
34 |         return relu_de
35 |     else:
36 |         raise KeyError(f'No such activavtion function {name}')
37 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | einops
2 | lmdb
3 | six
4 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import find_packages, setup
2 | 
3 | if __name__ == '__main__':
4 |     setup(
5 |         name='dldemos',
6 |         version=0.1,
7 |         packages=find_packages(),
8 |     )
9 | 


--------------------------------------------------------------------------------