├── .gitignore ├── .isort.cfg ├── .pre-commit-config.yaml ├── README.md ├── dldemos ├── AdvancedOptimizer │ ├── README.md │ ├── compare_optimizer.py │ ├── main.py │ ├── model.py │ ├── optimizer.py │ └── single_step.py ├── BasicCNN │ ├── README.md │ ├── dataset.py │ ├── np_conv.py │ ├── np_conv_backward.py │ ├── pt_main.py │ └── tf_main.py ├── BasicRNN │ ├── README.md │ ├── constant.py │ ├── main.py │ ├── models.py │ └── read_imdb.py ├── DeepNetwork │ ├── README.md │ ├── dataset.py │ ├── main.py │ └── model.py ├── FourierFeature │ ├── image_mlp.ipynb │ ├── kernel_regression.ipynb │ └── misuzu.png ├── Initialization │ ├── README.md │ ├── main.py │ └── points_classification.py ├── LogisticRegression │ ├── README.md │ └── main.py ├── MulticlassClassification │ ├── README.md │ ├── points_classification.py │ ├── pt_main.py │ └── tf_main.py ├── MyYOLO │ └── load_coco.py ├── PyTorchDistributed │ └── main.py ├── Regularization │ ├── README.md │ ├── main.py │ └── points_classification.py ├── ResNet │ ├── README.md │ └── tf_main.py ├── SentimentAnalysis │ ├── README.md │ ├── glove.py │ ├── main.py │ └── read_imdb.py ├── ShallowNetwork │ ├── README.md │ ├── genereate_points.py │ ├── model.py │ ├── plot_activation_func.py │ └── points_classification.py ├── StyleTransfer │ ├── README.md │ ├── combine_img.py │ ├── copy_img.py │ ├── dancing.jpg │ ├── picasso.jpg │ └── style_transfer.py ├── Transformer │ ├── data_load.py │ ├── model.py │ ├── outdated │ │ ├── dataset.py │ │ ├── dist_train.py │ │ ├── dist_train.sh │ │ ├── preprocess_data.py │ │ ├── test.py │ │ └── train.py │ ├── train.py │ └── translate.py ├── VAE │ ├── README.md │ ├── load_celebA.py │ ├── main.py │ └── model.py ├── VQVAE │ ├── configs.py │ ├── dataset.py │ ├── dist_train_pixelcnn.py │ ├── main.py │ ├── model.py │ └── pixelcnn_model.py ├── attention │ ├── README.md │ ├── dataset.py │ └── main.py ├── ddim │ ├── configs.py │ ├── dataset.py │ ├── ddim.py │ ├── ddpm.py │ ├── dist_sample.py │ ├── dist_train.py │ ├── main.py │ ├── network.py │ └── network_my.py ├── ddpm │ ├── dataset.py │ ├── ddpm.py │ ├── ddpm_simple.py │ ├── main.py │ └── network.py ├── lmdb_loader.py ├── nms │ ├── bboxes.pt │ ├── iou.py │ ├── nms.py │ └── show_bbox.py ├── pixelcnn │ ├── dataset.py │ ├── main.py │ └── model.py └── utils │ ├── __init__.py │ └── function.py ├── requirements.txt └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | **/*.pyc 6 | 7 | # C extensions 8 | *.so 9 | 10 | # Distribution / packaging 11 | .Python 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .coverage 43 | .coverage.* 44 | .cache 45 | nosetests.xml 46 | coverage.xml 47 | *.cover 48 | .hypothesis/ 49 | .pytest_cache/ 50 | 51 | # Translations 52 | *.mo 53 | *.pot 54 | 55 | # Django stuff: 56 | *.log 57 | local_settings.py 58 | db.sqlite3 59 | 60 | # Flask stuff: 61 | instance/ 62 | .webassets-cache 63 | 64 | # Scrapy stuff: 65 | .scrapy 66 | 67 | # Sphinx documentation 68 | docs/en/_build/ 69 | docs/zh_cn/_build/ 70 | 71 | # PyBuilder 72 | target/ 73 | 74 | # Jupyter Notebook 75 | .ipynb_checkpoints 76 | 77 | # pyenv 78 | .python-version 79 | 80 | # celery beat schedule file 81 | celerybeat-schedule 82 | 83 | # SageMath parsed files 84 | *.sage.py 85 | 86 | # Environments 87 | .env 88 | .venv 89 | env/ 90 | venv/ 91 | ENV/ 92 | env.bak/ 93 | venv.bak/ 94 | 95 | # Spyder project settings 96 | .spyderproject 97 | .spyproject 98 | 99 | # Rope project settings 100 | .ropeproject 101 | 102 | # mkdocs documentation 103 | /site 104 | 105 | # mypy 106 | .mypy_cache/ 107 | 108 | # custom 109 | .vscode 110 | .idea 111 | *.pkl 112 | *.pkl.json 113 | *.log.json 114 | work_dirs/ 115 | 116 | # Pytorch 117 | *.pth 118 | 119 | # onnx and tensorrt 120 | *.onnx 121 | *.trt 122 | 123 | # local history 124 | .history/** 125 | 126 | # Pytorch Server 127 | *.mar 128 | .DS_Store 129 | 130 | /data/ 131 | /data 132 | data 133 | .vector_cache 134 | 135 | dldemos/*/*.txt 136 | 137 | nohup.out 138 | -------------------------------------------------------------------------------- /.isort.cfg: -------------------------------------------------------------------------------- 1 | [settings] 2 | known_third_party = PIL,babel,cv2,einops,faker,matplotlib,numpy,pytest,setuptools,tensorflow,torch,torchtext,torchvision 3 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/PyCQA/flake8 3 | rev: 4.0.1 4 | hooks: 5 | - id: flake8 6 | - repo: https://github.com/PyCQA/isort 7 | rev: 5.11.5 8 | hooks: 9 | - id: isort 10 | - repo: https://github.com/pre-commit/mirrors-yapf 11 | rev: v0.32.0 12 | hooks: 13 | - id: yapf 14 | - repo: https://github.com/pre-commit/pre-commit-hooks 15 | rev: v4.2.0 16 | hooks: 17 | - id: trailing-whitespace 18 | - id: check-yaml 19 | - id: end-of-file-fixer 20 | - id: requirements-txt-fixer 21 | - id: double-quote-string-fixer 22 | - id: check-merge-conflict 23 | - id: fix-encoding-pragma 24 | args: ["--remove"] 25 | - id: mixed-line-ending 26 | args: ["--fix=lf"] 27 | - repo: https://github.com/executablebooks/mdformat 28 | rev: 0.7.9 29 | hooks: 30 | - id: mdformat 31 | args: ["--number", "--disable-escape", "link-enclosure"] 32 | additional_dependencies: 33 | - mdformat-openmmlab 34 | - mdformat_frontmatter 35 | - linkify-it-py 36 | - repo: https://github.com/myint/docformatter 37 | rev: v1.4 38 | hooks: 39 | - id: docformatter 40 | args: ["--in-place", "--wrap-descriptions", "79"] 41 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Installation 2 | 3 | 1. Clone the git repo: 4 | 5 | ```shell 6 | git clone https://github.com/SingleZombie/DL-Demos.git 7 | ``` 8 | 9 | 2. Run the installation command: 10 | 11 | ```shell 12 | python setup.py develop 13 | pip install -r requirements.txt 14 | ``` 15 | 16 | It is recommended to create a directory named `work_dirs` and put temporary results into it. 17 | 18 | # Description 19 | 20 | Demos for deep learning. 21 | 22 | # Project 23 | 24 | ## Andrew Ng Deep Learning Specialization 25 | 26 | 01. Logistic Regression 27 | 02. Shallow Nerual Network 28 | 03. Deep Nerual Network (MLP) 29 | 04. Parameter Initialization 30 | 05. Regularization 31 | 06. Advanced Optimizer (mini-batch, momentum, Adam) 32 | 07. Multiclass Classification with TensorFlow and PyTorch 33 | 08. NumPy Convolution 2D 34 | 09. Basic CNN 35 | 10. ResNet 36 | 11. NMS 37 | 12. ~~My YOLO model~~ 38 | 13. Letter level language model with PyTorch 39 | 14. Sentiment analysis using Glove with PyTorch 40 | 15. Date translation attention model with PyTorch 41 | 16. Transformer cn-en translation with PyTorch 42 | 43 | ## Generative Model 44 | 45 | 1. VAE with PyTorch 46 | 2. DDPM with PyTorch 47 | 3. PixelCNN with PyTorch 48 | 4. VQVAE with PyTorch 49 | 5. DDIM with PyTorch 50 | 51 | ## Others 52 | 53 | 1. Style Transfer with PyTorch 54 | 2. PyTorch DDP Demo 55 | 3. Fourier Feature 56 | -------------------------------------------------------------------------------- /dldemos/AdvancedOptimizer/README.md: -------------------------------------------------------------------------------- 1 | 1. Install the repository 2 | 3 | ```shell 4 | python setup.py develop 5 | ``` 6 | 7 | 2. Download the dataset from https://www.kaggle.com/datasets/fusicfenta/cat-and-dog?resource=download and organize the directory as follows: 8 | 9 | ```plain text 10 | └─data 11 | └─archive 12 | └─dataset 13 | ├─single_prediction 14 | ├─test_set 15 | │ ├─cats 16 | │ └─dogs 17 | └─training_set 18 | ├─cats 19 | └─dogs 20 | ``` 21 | 22 | 3. Modify the path in `main.py`: 23 | 24 | ```Python 25 | train_X, train_Y, dev_X, dev_Y = get_cat_set( 26 | 'dldemos/LogisticRegression/data/archive/dataset', train_size=1000) 27 | ``` 28 | 29 | Replace 'dldemos/LogisticRegression/data/archive/dataset' with your path. 30 | 31 | 4. Run `main.py`. 32 | 33 | You can tune the hyper-parameters and try different optimizers. 34 | -------------------------------------------------------------------------------- /dldemos/AdvancedOptimizer/compare_optimizer.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | 4 | LEN = 10 5 | 6 | result_dict = { 7 | 'batch_size_8': [ 8 | 0.6954, 0.6527, 0.5950, 0.5475, 0.4941, 0.6317, 0.4309, 0.4870, 0.4461, 9 | 0.2928 10 | ], 11 | 'batch_size_64': [ 12 | 0.6910, 0.6447, 0.6133, 0.5756, 0.5590, 0.5224, 0.5478, 0.4379, 0.4241, 13 | 0.3764 14 | ], 15 | 'batch_size_128': [ 16 | 0.6910, 0.6497, 0.6289, 0.6168, 0.5802, 0.5677, 0.5366, 0.5436, 0.5282, 17 | 0.5344 18 | ], 19 | 'batch_size_2000': [ 20 | 0.6966, 0.6840, 0.6770, 0.6780, 0.6675, 0.6572, 0.6605, 0.6482, 0.6719, 21 | 0.6392 22 | ], 23 | 'Momentum_64': [ 24 | 0.6917, 0.6581, 0.6212, 0.5774, 0.5123, 0.4700, 0.4162, 0.3581, 0.3168, 25 | 0.2996 26 | ], 27 | 'RMSProp_64': [ 28 | 0.6924, 0.6519, 0.6381, 0.6209, 0.6043, 0.5895, 0.5747, 0.5635, 0.5491, 29 | 0.5363 30 | ], 31 | 'Adam_64': [ 32 | 0.6781, 0.6150, 0.5801, 0.5466, 0.5163, 0.4881, 0.4617, 0.4365, 0.4154, 33 | 0.3959 34 | ], 35 | 'Adam_64_decay_0.2': [ 36 | 0.6861, 0.6021, 0.5783, 0.5644, 0.5544, 0.5471, 0.5409, 0.5357, 0.5314, 37 | 0.5276 38 | ], 39 | 'Adam_64_decay_0.005': [ 40 | 0.6900, 0.6047, 0.5558, 0.5283, 0.5068, 0.4843, 0.4462, 0.4307, 0.4145, 41 | 0.3974 42 | ] 43 | } 44 | 45 | 46 | def plot_curves(result_keys): 47 | x = np.linspace(0, 90, LEN) 48 | for k in result_keys: 49 | y = result_dict[k] 50 | plt.plot(x, y, label=k) 51 | plt.xlabel('Epoch') 52 | plt.ylabel('Training Cost') 53 | plt.legend() 54 | 55 | plt.show() 56 | 57 | 58 | plot_curves( 59 | ['batch_size_8', 'batch_size_64', 'batch_size_128', 'batch_size_2000']) 60 | plot_curves(['batch_size_64', 'Momentum_64', 'RMSProp_64', 'Adam_64']) 61 | plot_curves(['Adam_64', 'Adam_64_decay_0.2', 'Adam_64_decay_0.005']) 62 | -------------------------------------------------------------------------------- /dldemos/AdvancedOptimizer/main.py: -------------------------------------------------------------------------------- 1 | from dldemos.AdvancedOptimizer.model import DeepNetwork, train 2 | from dldemos.AdvancedOptimizer.optimizer import (Adam, GradientDescent, 3 | Momentum, RMSProp, 4 | get_hyperbola_func) 5 | from dldemos.DeepNetwork.dataset import get_cat_set 6 | 7 | 8 | def main(): 9 | train_X, train_Y, dev_X, dev_Y = get_cat_set( 10 | 'dldemos/LogisticRegression/data/archive/dataset', train_size=1000) 11 | n_x = train_X.shape[0] 12 | 13 | # train_X: [224*224*3, 2000] 14 | model = DeepNetwork([n_x, 30, 20, 20, 1], 15 | ['relu', 'relu', 'relu', 'sigmoid']) 16 | 17 | # Please close the unused optimizers by comment marks 18 | 19 | optimizer = GradientDescent(model.save(), learning_rate=0.001) 20 | optimizer = Momentum(model.save(), learning_rate=0.001, from_scratch=True) 21 | optimizer = RMSProp(model.save(), learning_rate=0.00001, from_scratch=True) 22 | optimizer = Adam(model.save(), learning_rate=0.00001, from_scratch=True) 23 | 24 | lr_scheduler_1 = get_hyperbola_func(0.2) 25 | lr_scheduler_2 = get_hyperbola_func(0.005) 26 | 27 | optimizer = Adam(model.save(), 28 | learning_rate=0.00001, 29 | from_scratch=True, 30 | lr_scheduler=lr_scheduler_1) 31 | 32 | optimizer = Adam(model.save(), 33 | learning_rate=0.00001, 34 | from_scratch=True, 35 | lr_scheduler=lr_scheduler_2) 36 | 37 | train(model, 38 | optimizer, 39 | train_X, 40 | train_Y, 41 | 100, 42 | model_name='model_64', 43 | save_dir='work_dirs', 44 | recover_from=None, 45 | batch_size=64, 46 | print_interval=10, 47 | dev_X=dev_X, 48 | dev_Y=dev_Y, 49 | plot_mini_batch=False) 50 | 51 | 52 | if __name__ == '__main__': 53 | main() 54 | -------------------------------------------------------------------------------- /dldemos/AdvancedOptimizer/single_step.py: -------------------------------------------------------------------------------- 1 | from dldemos.DeepNetwork.dataset import get_cat_set 2 | from dldemos.DeepNetwork.model import DeepNetwork, train 3 | 4 | 5 | def main(): 6 | train_X, train_Y, test_X, test_Y = get_cat_set( 7 | 'dldemos/LogisticRegression/data/archive/dataset', train_size=1500) 8 | n_x = train_X.shape[0] 9 | model = DeepNetwork([n_x, 30, 30, 20, 20, 1], 10 | ['relu', 'relu', 'relu', 'relu', 'sigmoid']) 11 | train(model, 12 | train_X, 13 | train_Y, 14 | 1, 15 | learning_rate=0.01, 16 | print_interval=10, 17 | test_X=test_X, 18 | test_Y=test_Y) 19 | 20 | 21 | if __name__ == '__main__': 22 | main() 23 | -------------------------------------------------------------------------------- /dldemos/BasicCNN/README.md: -------------------------------------------------------------------------------- 1 | 1. Install the repository 2 | 3 | ```shell 4 | python setup.py develop 5 | ``` 6 | 7 | 2. Download the dataset from https://www.kaggle.com/datasets/fusicfenta/cat-and-dog?resource=download and organize the directory as follows: 8 | 9 | ```plain text 10 | └─data 11 | └─archive 12 | └─dataset 13 | ├─single_prediction 14 | ├─test_set 15 | │ ├─cats 16 | │ └─dogs 17 | └─training_set 18 | ├─cats 19 | └─dogs 20 | ``` 21 | 22 | 3. Modify the path in "main" scripts: 23 | 24 | ```Python 25 | train_X, train_Y, test_X, test_Y = get_cat_set( 26 | 'dldemos/LogisticRegression/data/archive/dataset', train_size=1500) 27 | ``` 28 | 29 | Replace 'dldemos/LogisticRegression/data/archive/dataset' with your path. 30 | 31 | 4. Run `tf_main.py` or `pt_main.py`. 32 | 33 | The NumPy implementation of convolution is in `np_conv` and `np_conv_backward` 34 | -------------------------------------------------------------------------------- /dldemos/BasicCNN/dataset.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import Tuple 3 | 4 | import cv2 5 | import numpy as np 6 | 7 | 8 | def load_set(data_path: str, cnt: int, img_shape: Tuple[int, int]): 9 | cat_dirs = sorted(os.listdir(os.path.join(data_path, 'cats'))) 10 | dog_dirs = sorted(os.listdir(os.path.join(data_path, 'dogs'))) 11 | images = [] 12 | for i, cat_dir in enumerate(cat_dirs): 13 | if i >= cnt: 14 | break 15 | name = os.path.join(data_path, 'cats', cat_dir) 16 | cat = cv2.imread(name) 17 | images.append(cat) 18 | 19 | for i, dog_dir in enumerate(dog_dirs): 20 | if i >= cnt: 21 | break 22 | name = os.path.join(data_path, 'dogs', dog_dir) 23 | dog = cv2.imread(name) 24 | images.append(dog) 25 | 26 | for i in range(len(images)): 27 | images[i] = cv2.resize(images[i], img_shape) 28 | images[i] = images[i].astype(np.float32) / 255.0 29 | 30 | return np.array(images) 31 | 32 | 33 | def get_cat_set( 34 | data_root: str, 35 | img_shape: Tuple[int, int] = (224, 224), 36 | train_size=1000, 37 | test_size=200, 38 | format='nhwc' 39 | ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: 40 | 41 | train_X = load_set(os.path.join(data_root, 'training_set'), train_size, 42 | img_shape) 43 | test_X = load_set(os.path.join(data_root, 'test_set'), test_size, 44 | img_shape) 45 | 46 | train_Y = np.array([1] * train_size + [0] * train_size) 47 | test_Y = np.array([1] * test_size + [0] * test_size) 48 | 49 | if format == 'nhwc': 50 | return train_X, np.expand_dims(train_Y, 51 | 1), test_X, np.expand_dims(test_Y, 1) 52 | elif format == 'nchw': 53 | train_X = np.reshape(train_X, (-1, 3, *img_shape)) 54 | test_X = np.reshape(test_X, (-1, 3, *img_shape)) 55 | return train_X, np.expand_dims(train_Y, 56 | 1), test_X, np.expand_dims(test_Y, 1) 57 | else: 58 | raise NotImplementedError('Format must be "nhwc" or "nchw". ') 59 | -------------------------------------------------------------------------------- /dldemos/BasicCNN/np_conv.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | import torch 4 | 5 | 6 | def conv2d(input: np.ndarray, 7 | weight: np.ndarray, 8 | stride: int, 9 | padding: int, 10 | dilation: int, 11 | groups: int, 12 | bias: np.ndarray = None) -> np.ndarray: 13 | """2D Convolution Implemented with NumPy. 14 | 15 | Args: 16 | input (np.ndarray): The input NumPy array of shape (H, W, C). 17 | weight (np.ndarray): The weight NumPy array of shape 18 | (C', F, F, C / groups). 19 | stride (int): Stride for convolution. 20 | padding (int): The count of zeros to pad on both sides. 21 | dilation (int): The space between kernel elements. 22 | groups (int): Split the input to groups. 23 | bias (np.ndarray | None): The bias NumPy array of shape (C'). 24 | Default: None. 25 | 26 | Outputs: 27 | np.ndarray: The output NumPy array of shape (H', W', C') 28 | """ 29 | h_i, w_i, c_i = input.shape 30 | c_o, f, f_2, c_k = weight.shape 31 | 32 | assert (f == f_2) 33 | assert (c_i % groups == 0) 34 | assert (c_o % groups == 0) 35 | assert (c_i // groups == c_k) 36 | if bias is not None: 37 | assert (bias.shape[0] == c_o) 38 | 39 | f_new = f + (f - 1) * (dilation - 1) 40 | weight_new = np.zeros((c_o, f_new, f_new, c_k), dtype=weight.dtype) 41 | for i_c_o in range(c_o): 42 | for i_c_k in range(c_k): 43 | for i_f in range(f): 44 | for j_f in range(f): 45 | i_f_new = i_f * dilation 46 | j_f_new = j_f * dilation 47 | weight_new[i_c_o, i_f_new, j_f_new, i_c_k] = \ 48 | weight[i_c_o, i_f, j_f, i_c_k] 49 | 50 | input_pad = np.pad(input, [(padding, padding), (padding, padding), (0, 0)]) 51 | 52 | def cal_new_sidelngth(sl, s, f, p): 53 | return (sl + 2 * p - f) // s + 1 54 | 55 | h_o = cal_new_sidelngth(h_i, stride, f_new, padding) 56 | w_o = cal_new_sidelngth(w_i, stride, f_new, padding) 57 | 58 | output = np.empty((h_o, w_o, c_o), dtype=input.dtype) 59 | 60 | c_o_per_group = c_o // groups 61 | 62 | for i_h in range(h_o): 63 | for i_w in range(w_o): 64 | for i_c in range(c_o): 65 | i_g = i_c // c_o_per_group 66 | h_lower = i_h * stride 67 | h_upper = i_h * stride + f_new 68 | w_lower = i_w * stride 69 | w_upper = i_w * stride + f_new 70 | c_lower = i_g * c_k 71 | c_upper = (i_g + 1) * c_k 72 | input_slice = input_pad[h_lower:h_upper, w_lower:w_upper, 73 | c_lower:c_upper] 74 | kernel_slice = weight_new[i_c] 75 | output[i_h, i_w, i_c] = np.sum(input_slice * kernel_slice) 76 | if bias: 77 | output[i_h, i_w, i_c] += bias[i_c] 78 | return output 79 | 80 | 81 | @pytest.mark.parametrize('c_i, c_o', [(3, 6), (2, 2)]) 82 | @pytest.mark.parametrize('kernel_size', [3, 5]) 83 | @pytest.mark.parametrize('stride', [1, 2]) 84 | @pytest.mark.parametrize('padding', [0, 1]) 85 | @pytest.mark.parametrize('dilation', [1, 2]) 86 | @pytest.mark.parametrize('groups', ['1', 'all']) 87 | @pytest.mark.parametrize('bias', [False]) 88 | def test_conv(c_i: int, c_o: int, kernel_size: int, stride: int, padding: str, 89 | dilation: int, groups: str, bias: bool): 90 | if groups == '1': 91 | groups = 1 92 | elif groups == 'all': 93 | groups = c_i 94 | 95 | if bias: 96 | bias = np.random.randn(c_o) 97 | torch_bias = torch.from_numpy(bias) 98 | else: 99 | bias = None 100 | torch_bias = None 101 | 102 | input = np.random.randn(20, 20, c_i) 103 | weight = np.random.randn(c_o, kernel_size, kernel_size, c_i // groups) 104 | 105 | torch_input = torch.from_numpy(np.transpose(input, (2, 0, 1))).unsqueeze(0) 106 | torch_weight = torch.from_numpy(np.transpose(weight, (0, 3, 1, 2))) 107 | torch_output = torch.conv2d(torch_input, torch_weight, torch_bias, stride, 108 | padding, dilation, groups).numpy() 109 | torch_output = np.transpose(torch_output.squeeze(0), (1, 2, 0)) 110 | 111 | numpy_output = conv2d(input, weight, stride, padding, dilation, groups, 112 | bias) 113 | 114 | assert np.allclose(torch_output, numpy_output) 115 | -------------------------------------------------------------------------------- /dldemos/BasicCNN/np_conv_backward.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, Tuple 2 | 3 | import numpy as np 4 | import pytest 5 | import torch 6 | 7 | 8 | def conv2d_forward(input: np.ndarray, weight: np.ndarray, bias: np.ndarray, 9 | stride: int, padding: int) -> Dict[str, np.ndarray]: 10 | """2D Convolution Forward Implemented with NumPy. 11 | 12 | Args: 13 | input (np.ndarray): The input NumPy array of shape (H, W, C). 14 | weight (np.ndarray): The weight NumPy array of shape 15 | (C', F, F, C). 16 | bias (np.ndarray | None): The bias NumPy array of shape (C'). 17 | Default: None. 18 | stride (int): Stride for convolution. 19 | padding (int): The count of zeros to pad on both sides. 20 | 21 | Outputs: 22 | Dict[str, np.ndarray]: Cached data for backward prop. 23 | """ 24 | h_i, w_i, c_i = input.shape 25 | c_o, f, f_2, c_k = weight.shape 26 | 27 | assert (f == f_2) 28 | assert (c_i == c_k) 29 | assert (bias.shape[0] == c_o) 30 | 31 | input_pad = np.pad(input, [(padding, padding), (padding, padding), (0, 0)]) 32 | 33 | def cal_new_sidelngth(sl, s, f, p): 34 | return (sl + 2 * p - f) // s + 1 35 | 36 | h_o = cal_new_sidelngth(h_i, stride, f, padding) 37 | w_o = cal_new_sidelngth(w_i, stride, f, padding) 38 | 39 | output = np.empty((h_o, w_o, c_o), dtype=input.dtype) 40 | 41 | for i_h in range(h_o): 42 | for i_w in range(w_o): 43 | for i_c in range(c_o): 44 | h_lower = i_h * stride 45 | h_upper = i_h * stride + f 46 | w_lower = i_w * stride 47 | w_upper = i_w * stride + f 48 | input_slice = input_pad[h_lower:h_upper, w_lower:w_upper, :] 49 | kernel_slice = weight[i_c] 50 | output[i_h, i_w, i_c] = np.sum(input_slice * kernel_slice) 51 | output[i_h, i_w, i_c] += bias[i_c] 52 | 53 | cache = dict() 54 | cache['Z'] = output 55 | cache['W'] = weight 56 | cache['b'] = bias 57 | cache['A_prev'] = input 58 | return cache 59 | 60 | 61 | def conv2d_backward(dZ: np.ndarray, cache: Dict[str, np.ndarray], stride: int, 62 | padding: int) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: 63 | """2D Convolution Backward Implemented with NumPy. 64 | 65 | Args: 66 | dZ: (np.ndarray): The derivative of the output of conv. 67 | cache (Dict[str, np.ndarray]): Record output 'Z', weight 'W', bias 'b' 68 | and input 'A_prev' of forward function. 69 | stride (int): Stride for convolution. 70 | padding (int): The count of zeros to pad on both sides. 71 | 72 | Outputs: 73 | Tuple[np.ndarray, np.ndarray, np.ndarray]: The derivative of W, b, 74 | A_prev. 75 | """ 76 | W = cache['W'] 77 | b = cache['b'] 78 | A_prev = cache['A_prev'] 79 | dW = np.zeros(W.shape) 80 | db = np.zeros(b.shape) 81 | dA_prev = np.zeros(A_prev.shape) 82 | 83 | _, _, c_i = A_prev.shape 84 | c_o, f, f_2, c_k = W.shape 85 | h_o, w_o, c_o_2 = dZ.shape 86 | 87 | assert (f == f_2) 88 | assert (c_i == c_k) 89 | assert (c_o == c_o_2) 90 | 91 | A_prev_pad = np.pad(A_prev, [(padding, padding), (padding, padding), 92 | (0, 0)]) 93 | dA_prev_pad = np.pad(dA_prev, [(padding, padding), (padding, padding), 94 | (0, 0)]) 95 | 96 | for i_h in range(h_o): 97 | for i_w in range(w_o): 98 | for i_c in range(c_o): 99 | h_lower = i_h * stride 100 | h_upper = i_h * stride + f 101 | w_lower = i_w * stride 102 | w_upper = i_w * stride + f 103 | 104 | input_slice = A_prev_pad[h_lower:h_upper, w_lower:w_upper, :] 105 | # forward 106 | # kernel_slice = W[i_c] 107 | # Z[i_h, i_w, i_c] = np.sum(input_slice * kernel_slice) 108 | # Z[i_h, i_w, i_c] += b[i_c] 109 | 110 | # backward 111 | dW[i_c] += input_slice * dZ[i_h, i_w, i_c] 112 | dA_prev_pad[h_lower:h_upper, 113 | w_lower:w_upper, :] += W[i_c] * dZ[i_h, i_w, i_c] 114 | db[i_c] += dZ[i_h, i_w, i_c] 115 | 116 | if padding > 0: 117 | dA_prev = dA_prev_pad[padding:-padding, padding:-padding, :] 118 | else: 119 | dA_prev = dA_prev_pad 120 | return dW, db, dA_prev 121 | 122 | 123 | @pytest.mark.parametrize('c_i, c_o', [(3, 6), (2, 2)]) 124 | @pytest.mark.parametrize('kernel_size', [3, 5]) 125 | @pytest.mark.parametrize('stride', [1, 2]) 126 | @pytest.mark.parametrize('padding', [0, 1]) 127 | def test_conv(c_i: int, c_o: int, kernel_size: int, stride: int, padding: str): 128 | 129 | # Preprocess 130 | input = np.random.randn(20, 20, c_i) 131 | weight = np.random.randn(c_o, kernel_size, kernel_size, c_i) 132 | bias = np.random.randn(c_o) 133 | 134 | torch_input = torch.from_numpy(np.transpose( 135 | input, (2, 0, 1))).unsqueeze(0).requires_grad_() 136 | torch_weight = torch.from_numpy(np.transpose( 137 | weight, (0, 3, 1, 2))).requires_grad_() 138 | torch_bias = torch.from_numpy(bias).requires_grad_() 139 | 140 | # forward 141 | torch_output_tensor = torch.conv2d(torch_input, torch_weight, torch_bias, 142 | stride, padding) 143 | torch_output = np.transpose( 144 | torch_output_tensor.detach().numpy().squeeze(0), (1, 2, 0)) 145 | 146 | cache = conv2d_forward(input, weight, bias, stride, padding) 147 | numpy_output = cache['Z'] 148 | 149 | assert np.allclose(torch_output, numpy_output) 150 | 151 | # backward 152 | torch_sum = torch.sum(torch_output_tensor) 153 | torch_sum.backward() 154 | torch_dW = np.transpose(torch_weight.grad.numpy(), (0, 2, 3, 1)) 155 | torch_db = torch_bias.grad.numpy() 156 | torch_dA_prev = np.transpose(torch_input.grad.numpy().squeeze(0), 157 | (1, 2, 0)) 158 | 159 | dZ = np.ones(numpy_output.shape) 160 | dW, db, dA_prev = conv2d_backward(dZ, cache, stride, padding) 161 | 162 | assert np.allclose(dW, torch_dW) 163 | assert np.allclose(db, torch_db) 164 | assert np.allclose(dA_prev, torch_dA_prev) 165 | -------------------------------------------------------------------------------- /dldemos/BasicCNN/pt_main.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import numpy as np 4 | import torch 5 | import torch.nn as nn 6 | 7 | from dldemos.BasicCNN.dataset import get_cat_set 8 | 9 | 10 | def init_model(device='cpu'): 11 | model = nn.Sequential(nn.Conv2d(3, 16, 11, 3), nn.BatchNorm2d(16), 12 | nn.ReLU(True), nn.MaxPool2d(2, 2), 13 | nn.Conv2d(16, 32, 5), nn.BatchNorm2d(32), 14 | nn.ReLU(True), nn.MaxPool2d(2, 2), 15 | nn.Conv2d(32, 64, 3, padding=1), nn.BatchNorm2d(64), 16 | nn.ReLU(True), nn.Conv2d(64, 64, 3), 17 | nn.BatchNorm2d(64), nn.ReLU(True), 18 | nn.MaxPool2d(2, 2), nn.Flatten(), 19 | nn.Linear(3136, 2048), nn.ReLU(True), 20 | nn.Linear(2048, 1), nn.Sigmoid()).to(device) 21 | 22 | def weights_init(m): 23 | if isinstance(m, nn.Conv2d): 24 | torch.nn.init.xavier_normal_(m.weight) 25 | m.bias.data.fill_(0) 26 | elif isinstance(m, nn.BatchNorm2d): 27 | m.weight.data.normal_(1.0, 0.02) 28 | m.bias.data.fill_(0) 29 | elif isinstance(m, nn.Linear): 30 | torch.nn.init.xavier_normal_(m.weight) 31 | m.bias.data.fill_(0) 32 | 33 | model.apply(weights_init) 34 | 35 | print(model) 36 | return model 37 | 38 | 39 | def train(model: nn.Module, 40 | train_X: np.ndarray, 41 | train_Y: np.ndarray, 42 | optimizer: torch.optim.Optimizer, 43 | loss_fn: nn.Module, 44 | batch_size: int, 45 | num_epoch: int, 46 | device: str = 'cpu'): 47 | m = train_X.shape[0] 48 | indices = np.random.permutation(m) 49 | shuffle_X = train_X[indices, ...] 50 | shuffle_Y = train_Y[indices, ...] 51 | num_mini_batch = math.ceil(m / batch_size) 52 | mini_batch_XYs = [] 53 | for i in range(num_mini_batch): 54 | if i == num_mini_batch - 1: 55 | mini_batch_X = shuffle_X[i * batch_size:, ...] 56 | mini_batch_Y = shuffle_Y[i * batch_size:, ...] 57 | else: 58 | mini_batch_X = shuffle_X[i * batch_size:(i + 1) * batch_size, ...] 59 | mini_batch_Y = shuffle_Y[i * batch_size:(i + 1) * batch_size, ...] 60 | mini_batch_X = torch.from_numpy(mini_batch_X) 61 | mini_batch_Y = torch.from_numpy(mini_batch_Y).float() 62 | mini_batch_XYs.append((mini_batch_X, mini_batch_Y)) 63 | print(f'Num mini-batch: {num_mini_batch}') 64 | 65 | for e in range(num_epoch): 66 | for mini_batch_X, mini_batch_Y in mini_batch_XYs: 67 | mini_batch_X = mini_batch_X.to(device) 68 | mini_batch_Y = mini_batch_Y.to(device) 69 | mini_batch_Y_hat = model(mini_batch_X) 70 | loss: torch.Tensor = loss_fn(mini_batch_Y_hat, mini_batch_Y) 71 | 72 | optimizer.zero_grad() 73 | loss.backward() 74 | optimizer.step() 75 | 76 | print(f'Epoch {e}. loss: {loss}') 77 | 78 | 79 | def evaluate(model: nn.Module, 80 | test_X: np.ndarray, 81 | test_Y: np.ndarray, 82 | device='cpu'): 83 | test_X = torch.from_numpy(test_X).to(device) 84 | test_Y = torch.from_numpy(test_Y).to(device) 85 | test_Y_hat = model(test_X) 86 | predicts = torch.where(test_Y_hat > 0.5, 1, 0) 87 | score = torch.where(predicts == test_Y, 1.0, 0.0) 88 | acc = torch.mean(score) 89 | print(f'Accuracy: {acc}') 90 | 91 | 92 | def main(): 93 | train_X, train_Y, test_X, test_Y = get_cat_set( 94 | 'dldemos/LogisticRegression/data/archive/dataset', 95 | train_size=1500, 96 | format='nchw') 97 | print(train_X.shape) # (m, 3, 224, 224) 98 | print(train_Y.shape) # (m, 1) 99 | 100 | device = 'cuda:0' 101 | num_epoch = 20 102 | batch_size = 16 103 | model = init_model(device) 104 | optimizer = torch.optim.Adam(model.parameters(), 5e-4) 105 | loss_fn = torch.nn.BCELoss() 106 | train(model, train_X, train_Y, optimizer, loss_fn, batch_size, num_epoch, 107 | device) 108 | evaluate(model, test_X, test_Y, device) 109 | 110 | 111 | if __name__ == '__main__': 112 | main() 113 | -------------------------------------------------------------------------------- /dldemos/BasicCNN/tf_main.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | from dldemos.BasicCNN.dataset import get_cat_set 4 | 5 | 6 | def init_model(input_shape=(224, 224, 3)): 7 | model = tf.keras.Sequential([ 8 | tf.keras.layers.Conv2D(16, 11, (3, 3), input_shape=input_shape), 9 | tf.keras.layers.BatchNormalization(3), 10 | tf.keras.layers.ReLU(), 11 | tf.keras.layers.MaxPool2D(), 12 | tf.keras.layers.Conv2D(32, 5), 13 | tf.keras.layers.BatchNormalization(3), 14 | tf.keras.layers.ReLU(), 15 | tf.keras.layers.MaxPool2D(), 16 | tf.keras.layers.Conv2D(64, 3, padding='same'), 17 | tf.keras.layers.BatchNormalization(3), 18 | tf.keras.layers.ReLU(), 19 | tf.keras.layers.Conv2D(64, 3), 20 | tf.keras.layers.BatchNormalization(3), 21 | tf.keras.layers.ReLU(), 22 | tf.keras.layers.MaxPool2D(), 23 | tf.keras.layers.Flatten(), 24 | tf.keras.layers.Dense(2048, 'relu'), 25 | tf.keras.layers.Dense(1, 'sigmoid') 26 | ]) 27 | 28 | model.summary() 29 | 30 | return model 31 | 32 | 33 | def main(): 34 | train_X, train_Y, test_X, test_Y = get_cat_set( 35 | 'dldemos/LogisticRegression/data/archive/dataset', train_size=1500) 36 | print(train_X.shape) # (m, 224, 224, 3) 37 | print(train_Y.shape) # (m, 1) 38 | 39 | model = init_model() 40 | model.compile(optimizer='adam', 41 | loss='binary_crossentropy', 42 | metrics=['accuracy']) 43 | 44 | model.fit(train_X, train_Y, epochs=20, batch_size=16) 45 | model.evaluate(test_X, test_Y) 46 | 47 | 48 | if __name__ == '__main__': 49 | main() 50 | -------------------------------------------------------------------------------- /dldemos/BasicRNN/README.md: -------------------------------------------------------------------------------- 1 | 1. Download [IMDb dataset](https://ai.stanford.edu/~amaas/data/sentiment/). 2 | 3 | 2. Modify the directory in `read_imdb`. 4 | 5 | 3. Run `main.py` to train and test the language model. You can: 6 | 7 | - Use `rnn1` or `rnn2` 8 | - Switch the dataset by modifying `is_vocab` parameter of `get_dataloader_and_max_length` 9 | - Tune the hyperparameters 10 | 11 | to do more experiments. 12 | -------------------------------------------------------------------------------- /dldemos/BasicRNN/constant.py: -------------------------------------------------------------------------------- 1 | EMBEDDING_LENGTH = 27 2 | LETTER_MAP = {' ': 0} 3 | ENCODING_MAP = [' '] 4 | for i in range(26): 5 | LETTER_MAP[chr(ord('a') + i)] = i + 1 6 | ENCODING_MAP.append(chr(ord('a') + i)) 7 | LETTER_LIST = list(LETTER_MAP.keys()) 8 | -------------------------------------------------------------------------------- /dldemos/BasicRNN/models.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | 6 | from dldemos.BasicRNN.constant import EMBEDDING_LENGTH, LETTER_LIST, LETTER_MAP 7 | 8 | 9 | class RNN1(nn.Module): 10 | 11 | def __init__(self, hidden_units=32): 12 | super().__init__() 13 | self.hidden_units = hidden_units 14 | self.linear_a = nn.Linear(hidden_units + EMBEDDING_LENGTH, 15 | hidden_units) 16 | self.linear_y = nn.Linear(hidden_units, EMBEDDING_LENGTH) 17 | self.tanh = nn.Tanh() 18 | 19 | def forward(self, word: torch.Tensor): 20 | # word shape: [batch, max_word_length, embedding_length] 21 | batch, Tx = word.shape[0:2] 22 | 23 | # word shape: [max_word_length, batch, embedding_length] 24 | word = torch.transpose(word, 0, 1) 25 | 26 | # output shape: [max_word_length, batch, embedding_length] 27 | output = torch.empty_like(word) 28 | 29 | a = torch.zeros(batch, self.hidden_units, device=word.device) 30 | x = torch.zeros(batch, EMBEDDING_LENGTH, device=word.device) 31 | for i in range(Tx): 32 | next_a = self.tanh(self.linear_a(torch.cat((a, x), 1))) 33 | hat_y = self.linear_y(next_a) 34 | output[i] = hat_y 35 | x = word[i] 36 | a = next_a 37 | 38 | # output shape: [batch, max_word_length, embedding_length] 39 | return torch.transpose(output, 0, 1) 40 | 41 | @torch.no_grad() 42 | def language_model(self, word: torch.Tensor): 43 | # word shape: [batch, max_word_length, embedding_length] 44 | batch, Tx = word.shape[0:2] 45 | 46 | # word shape: [max_word_length, batch, embedding_length] 47 | # word_label shape: [max_word_length, batch] 48 | word = torch.transpose(word, 0, 1) 49 | word_label = torch.argmax(word, 2) 50 | 51 | # output shape: [batch] 52 | output = torch.ones(batch, device=word.device) 53 | 54 | a = torch.zeros(batch, self.hidden_units, device=word.device) 55 | x = torch.zeros(batch, EMBEDDING_LENGTH, device=word.device) 56 | for i in range(Tx): 57 | next_a = self.tanh(self.linear_a(torch.cat((a, x), 1))) 58 | tmp = self.linear_y(next_a) 59 | hat_y = F.softmax(tmp, 1) 60 | probs = hat_y[torch.arange(batch), word_label[i]] 61 | output *= probs 62 | x = word[i] 63 | a = next_a 64 | 65 | return output 66 | 67 | @torch.no_grad() 68 | def sample_word(self, device='cuda:0'): 69 | batch = 1 70 | output = '' 71 | 72 | a = torch.zeros(batch, self.hidden_units, device=device) 73 | x = torch.zeros(batch, EMBEDDING_LENGTH, device=device) 74 | for i in range(10): 75 | next_a = self.tanh(self.linear_a(torch.cat((a, x), 1))) 76 | tmp = self.linear_y(next_a) 77 | hat_y = F.softmax(tmp, 1) 78 | 79 | np_prob = hat_y[0].detach().cpu().numpy() 80 | letter = np.random.choice(LETTER_LIST, p=np_prob) 81 | output += letter 82 | 83 | if letter == ' ': 84 | break 85 | 86 | x = torch.zeros(batch, EMBEDDING_LENGTH, device=device) 87 | x[0][LETTER_MAP[letter]] = 1 88 | a = next_a 89 | 90 | return output 91 | 92 | 93 | class RNN2(torch.nn.Module): 94 | 95 | def __init__(self, hidden_units=64, embeding_dim=64, dropout_rate=0.2): 96 | super().__init__() 97 | self.drop = nn.Dropout(dropout_rate) 98 | self.encoder = nn.Embedding(EMBEDDING_LENGTH, embeding_dim) 99 | self.rnn = nn.GRU(embeding_dim, hidden_units, 1, batch_first=True) 100 | self.decoder = torch.nn.Linear(hidden_units, EMBEDDING_LENGTH) 101 | self.hidden_units = hidden_units 102 | 103 | self.init_weights() 104 | 105 | def init_weights(self): 106 | initrange = 0.1 107 | nn.init.uniform_(self.encoder.weight, -initrange, initrange) 108 | nn.init.zeros_(self.decoder.bias) 109 | nn.init.uniform_(self.decoder.weight, -initrange, initrange) 110 | 111 | def forward(self, word: torch.Tensor): 112 | # word shape: [batch, max_word_length] 113 | batch, Tx = word.shape[0:2] 114 | first_letter = word.new_zeros(batch, 1) 115 | x = torch.cat((first_letter, word[:, 0:-1]), 1) 116 | hidden = torch.zeros(1, batch, self.hidden_units, device=word.device) 117 | emb = self.drop(self.encoder(x)) 118 | output, hidden = self.rnn(emb, hidden) 119 | y = self.decoder(output.reshape(batch * Tx, -1)) 120 | 121 | return y.reshape(batch, Tx, -1) 122 | 123 | @torch.no_grad() 124 | def language_model(self, word: torch.Tensor): 125 | batch, Tx = word.shape[0:2] 126 | hat_y = self.forward(word) 127 | hat_y = F.softmax(hat_y, 2) 128 | output = torch.ones(batch, device=word.device) 129 | for i in range(Tx): 130 | probs = hat_y[torch.arange(batch), i, word[:, i]] 131 | output *= probs 132 | 133 | return output 134 | 135 | @torch.no_grad() 136 | def sample_word(self, device='cuda:0'): 137 | batch = 1 138 | output = '' 139 | 140 | hidden = torch.zeros(1, batch, self.hidden_units, device=device) 141 | x = torch.zeros(batch, 1, device=device, dtype=torch.long) 142 | for _ in range(10): 143 | emb = self.drop(self.encoder(x)) 144 | rnn_output, hidden = self.rnn(emb, hidden) 145 | hat_y = self.decoder(rnn_output) 146 | hat_y = F.softmax(hat_y, 2) 147 | 148 | np_prob = hat_y[0, 0].detach().cpu().numpy() 149 | letter = np.random.choice(LETTER_LIST, p=np_prob) 150 | output += letter 151 | 152 | if letter == ' ': 153 | break 154 | 155 | x = torch.zeros(batch, 1, device=device, dtype=torch.long) 156 | x[0] = LETTER_MAP[letter] 157 | 158 | return output 159 | -------------------------------------------------------------------------------- /dldemos/BasicRNN/read_imdb.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | 4 | 5 | def read_imdb(dir='data/aclImdb', split='pos', is_train=True): 6 | subdir = 'train' if is_train else 'test' 7 | dir = os.path.join(dir, subdir, split) 8 | lines = [] 9 | for file in os.listdir(dir): 10 | with open(os.path.join(dir, file), 'rb') as f: 11 | line = f.read().decode('utf-8') 12 | lines.append(line) 13 | return lines 14 | 15 | 16 | def read_imdb_words(dir='data/aclImdb', 17 | split='pos', 18 | is_train=True, 19 | n_files=1000): 20 | subdir = 'train' if is_train else 'test' 21 | dir = os.path.join(dir, subdir, split) 22 | all_str = '' 23 | for file in os.listdir(dir): 24 | if n_files <= 0: 25 | break 26 | with open(os.path.join(dir, file), 'rb') as f: 27 | line = f.read().decode('utf-8') 28 | all_str += line 29 | n_files -= 1 30 | 31 | words = re.sub(u'([^\u0020\u0061-\u007a])', '', all_str.lower()).split(' ') 32 | 33 | return words 34 | 35 | 36 | def read_imdb_vocab(dir='data/aclImdb'): 37 | fn = os.path.join(dir, 'imdb.vocab') 38 | with open(fn, 'rb') as f: 39 | word = f.read().decode('utf-8').replace('\n', ' ') 40 | words = re.sub(u'([^\u0020\u0061-\u007a])', '', 41 | word.lower()).split(' ') 42 | filtered_words = [w for w in words if len(w) > 0] 43 | 44 | return filtered_words 45 | 46 | 47 | def main(): 48 | vocab = read_imdb_vocab() 49 | print(vocab[0]) 50 | print(vocab[1]) 51 | 52 | lines = read_imdb() 53 | print('Length of the file:', len(lines)) 54 | print('lines[0]:', lines[0]) 55 | words = read_imdb_words(n_files=100) 56 | print('Length of the words:', len(words)) 57 | for i in range(5): 58 | print(words[i]) 59 | 60 | 61 | if __name__ == '__main__': 62 | main() 63 | -------------------------------------------------------------------------------- /dldemos/DeepNetwork/README.md: -------------------------------------------------------------------------------- 1 | 1. Install the repository 2 | 3 | ```shell 4 | python setup.py develop 5 | ``` 6 | 7 | 2. Download the dataset from https://www.kaggle.com/datasets/fusicfenta/cat-and-dog?resource=download and organize the directory as follows: 8 | 9 | ```plain text 10 | └─data 11 | └─archive 12 | └─dataset 13 | ├─single_prediction 14 | ├─test_set 15 | │ ├─cats 16 | │ └─dogs 17 | └─training_set 18 | ├─cats 19 | └─dogs 20 | ``` 21 | 22 | 3. Modify the path in `main.py`: 23 | 24 | ```Python 25 | train_X, train_Y, test_X, test_Y = get_cat_set( 26 | 'dldemos/LogisticRegression/data/archive/dataset', train_size=1500) 27 | ``` 28 | 29 | Replace 'dldemos/LogisticRegression/data/archive/dataset' with your path. 30 | 31 | 4. Run `main.py`. (You can open and close `save()` and `load()` using comment) 32 | 33 | You can edit the model hyper-parameters and see what will happen. 34 | -------------------------------------------------------------------------------- /dldemos/DeepNetwork/dataset.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import Tuple 3 | 4 | import cv2 5 | import numpy as np 6 | 7 | 8 | def load_set(data_path: str, cnt: int, img_shape): 9 | cat_dirs = sorted(os.listdir(os.path.join(data_path, 'cats'))) 10 | dog_dirs = sorted(os.listdir(os.path.join(data_path, 'dogs'))) 11 | images = [] 12 | for i, cat_dir in enumerate(cat_dirs): 13 | if i >= cnt: 14 | break 15 | name = os.path.join(data_path, 'cats', cat_dir) 16 | cat = cv2.imread(name) 17 | images.append(cat) 18 | 19 | for i, dog_dir in enumerate(dog_dirs): 20 | if i >= cnt: 21 | break 22 | name = os.path.join(data_path, 'dogs', dog_dir) 23 | dog = cv2.imread(name) 24 | images.append(dog) 25 | 26 | for i in range(len(images)): 27 | images[i] = cv2.resize(images[i], img_shape) 28 | images[i] = np.reshape(images[i], (-1)) 29 | images[i] = images[i].astype(np.float32) / 255.0 30 | 31 | return np.array(images) 32 | 33 | 34 | def get_cat_set( 35 | data_root: str, 36 | img_shape: Tuple[int, int] = (224, 224), 37 | train_size=1000, 38 | test_size=200, 39 | ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: 40 | 41 | train_X = load_set(os.path.join(data_root, 'training_set'), train_size, 42 | img_shape) 43 | test_X = load_set(os.path.join(data_root, 'test_set'), test_size, 44 | img_shape) 45 | 46 | train_Y = np.array([1] * train_size + [0] * train_size) 47 | test_Y = np.array([1] * test_size + [0] * test_size) 48 | 49 | return train_X.T, np.expand_dims(train_Y, 50 | 0), test_X.T, np.expand_dims(test_Y.T, 0) 51 | -------------------------------------------------------------------------------- /dldemos/DeepNetwork/main.py: -------------------------------------------------------------------------------- 1 | from dldemos.DeepNetwork.dataset import get_cat_set 2 | from dldemos.DeepNetwork.model import DeepNetwork, train 3 | 4 | 5 | def main(): 6 | train_X, train_Y, test_X, test_Y = get_cat_set( 7 | 'dldemos/LogisticRegression/data/archive/dataset', train_size=1500) 8 | n_x = train_X.shape[0] 9 | model = DeepNetwork([n_x, 30, 30, 20, 20, 1], 10 | ['relu', 'relu', 'relu', 'relu', 'sigmoid']) 11 | model.load('work_dirs/model.npz') 12 | train(model, 13 | train_X, 14 | train_Y, 15 | 500, 16 | learning_rate=0.01, 17 | print_interval=10, 18 | test_X=test_X, 19 | test_Y=test_Y) 20 | model.save('work_dirs/model.npz') 21 | 22 | 23 | if __name__ == '__main__': 24 | main() 25 | -------------------------------------------------------------------------------- /dldemos/DeepNetwork/model.py: -------------------------------------------------------------------------------- 1 | import abc 2 | from typing import List 3 | 4 | import numpy as np 5 | 6 | from dldemos.utils import get_activation_de_func, get_activation_func 7 | 8 | 9 | class BaseRegressionModel(metaclass=abc.ABCMeta): 10 | 11 | def __init__(self): 12 | pass 13 | 14 | @abc.abstractmethod 15 | def forward(self, X: np.ndarray, train_mode=True) -> np.ndarray: 16 | pass 17 | 18 | @abc.abstractmethod 19 | def backward(self, Y: np.ndarray) -> np.ndarray: 20 | pass 21 | 22 | @abc.abstractmethod 23 | def gradient_descent(self, learning_rate: float) -> np.ndarray: 24 | pass 25 | 26 | @abc.abstractmethod 27 | def save(self, filename: str): 28 | pass 29 | 30 | @abc.abstractmethod 31 | def load(self, filename: str): 32 | pass 33 | 34 | def loss(self, Y: np.ndarray, Y_hat: np.ndarray) -> np.ndarray: 35 | return np.mean(-(Y * np.log(Y_hat) + (1 - Y) * np.log(1 - Y_hat))) 36 | 37 | def evaluate(self, X: np.ndarray, Y: np.ndarray, return_loss=False): 38 | Y_hat = self.forward(X, train_mode=False) 39 | Y_hat_predict = np.where(Y_hat > 0.5, 1, 0) 40 | accuracy = np.mean(np.where(Y_hat_predict == Y, 1, 0)) 41 | if return_loss: 42 | loss = self.loss(Y, Y_hat) 43 | return accuracy, loss 44 | else: 45 | return accuracy 46 | 47 | 48 | class DeepNetwork(BaseRegressionModel): 49 | 50 | def __init__(self, neuron_cnt: List[int], activation_func: List[str]): 51 | assert len(neuron_cnt) - 1 == len(activation_func) 52 | self.num_layer = len(neuron_cnt) - 1 53 | self.neuron_cnt = neuron_cnt 54 | self.activation_func = activation_func 55 | self.W: List[np.ndarray] = [] 56 | self.b: List[np.ndarray] = [] 57 | for i in range(self.num_layer): 58 | self.W.append( 59 | np.random.randn(neuron_cnt[i + 1], neuron_cnt[i]) * 0.2) 60 | self.b.append(np.zeros((neuron_cnt[i + 1], 1))) 61 | 62 | self.Z_cache = [None] * self.num_layer 63 | self.A_cache = [None] * (self.num_layer + 1) 64 | self.dW_cache = [None] * self.num_layer 65 | self.db_cache = [None] * self.num_layer 66 | 67 | def forward(self, X, train_mode=True): 68 | if train_mode: 69 | self.m = X.shape[1] 70 | A = X 71 | self.A_cache[0] = A 72 | for i in range(self.num_layer): 73 | Z = np.dot(self.W[i], A) + self.b[i] 74 | A = get_activation_func(self.activation_func[i])(Z) 75 | if train_mode: 76 | self.Z_cache[i] = Z 77 | self.A_cache[i + 1] = A 78 | return A 79 | 80 | def backward(self, Y): 81 | dA = -Y / self.A_cache[-1] + (1 - Y) / (1 - self.A_cache[-1]) 82 | assert (self.m == Y.shape[1]) 83 | 84 | for i in range(self.num_layer - 1, -1, -1): 85 | dZ = dA * get_activation_de_func(self.activation_func[i])( 86 | self.Z_cache[i]) 87 | dW = np.dot(dZ, self.A_cache[i].T) / self.m 88 | db = np.mean(dZ, axis=1, keepdims=True) 89 | dA = np.dot(self.W[i].T, dZ) 90 | self.dW_cache[i] = dW 91 | self.db_cache[i] = db 92 | 93 | def gradient_descent(self, learning_rate): 94 | for i in range(self.num_layer): 95 | self.W[i] -= learning_rate * self.dW_cache[i] 96 | self.b[i] -= learning_rate * self.db_cache[i] 97 | 98 | def save(self, filename: str): 99 | save_dict = {} 100 | for i in range(len(self.W)): 101 | save_dict['W' + str(i)] = self.W[i] 102 | for i in range(len(self.b)): 103 | save_dict['b' + str(i)] = self.b[i] 104 | np.savez(filename, **save_dict) 105 | 106 | def load(self, filename: str): 107 | params = np.load(filename) 108 | for i in range(len(self.W)): 109 | self.W[i] = params['W' + str(i)] 110 | for i in range(len(self.b)): 111 | self.b[i] = params['b' + str(i)] 112 | 113 | 114 | def train(model: BaseRegressionModel, 115 | X, 116 | Y, 117 | step, 118 | learning_rate, 119 | print_interval=100, 120 | test_X=None, 121 | test_Y=None): 122 | for s in range(step): 123 | Y_hat = model.forward(X) 124 | model.backward(Y) 125 | model.gradient_descent(learning_rate) 126 | if s % print_interval == 0: 127 | loss = model.loss(Y, Y_hat) 128 | print(f'Step: {s}') 129 | print(f'Train loss: {loss}') 130 | if test_X is not None and test_Y is not None: 131 | accuracy, loss = model.evaluate(test_X, 132 | test_Y, 133 | return_loss=True) 134 | print(f'Test loss: {loss}') 135 | print(f'Test accuracy: {accuracy}') 136 | -------------------------------------------------------------------------------- /dldemos/FourierFeature/kernel_regression.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "%matplotlib inline\n", 10 | "\n", 11 | "import numpy as np\n", 12 | "import matplotlib.pyplot as plt" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": null, 18 | "metadata": {}, 19 | "outputs": [], 20 | "source": [ 21 | "def func(x):\n", 22 | " return np.sin(np.pi * x) * (1 - x)\n", 23 | "\n", 24 | "xs = np.linspace(-1, 1, 100)\n", 25 | "ys = func(xs)\n", 26 | "plt.plot(xs, ys)\n", 27 | "plt.show()" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": null, 33 | "metadata": {}, 34 | "outputs": [], 35 | "source": [ 36 | "sample_x = np.linspace(-1, 1, 10)\n", 37 | "sample_y = func(sample_x)\n", 38 | "plt.scatter(sample_x, sample_y)\n", 39 | "plt.show()" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": null, 45 | "metadata": {}, 46 | "outputs": [], 47 | "source": [ 48 | "def kernel_func(x_ref, x_input, sigma=1):\n", 49 | " return np.exp(-(x_input-x_ref)**2 / (2 * sigma**2))\n", 50 | "\n", 51 | "xs = np.linspace(-1, 1, 100)\n", 52 | "ys = kernel_func(0, xs)\n", 53 | "plt.plot(xs, ys)\n", 54 | "plt.show()" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": null, 60 | "metadata": {}, 61 | "outputs": [], 62 | "source": [ 63 | "def kernel_regression(xs, ys, x_input, sigma=1):\n", 64 | " # xs: [n, ]\n", 65 | " # ys: [n, ]\n", 66 | " # x_input: [m, ]\n", 67 | " N = xs.shape[0]\n", 68 | " xs = np.expand_dims(xs, 1)\n", 69 | " ys = np.expand_dims(ys, 1)\n", 70 | " x_input = np.expand_dims(x_input, 0)\n", 71 | " x_input = np.repeat(x_input, N, 0)\n", 72 | " weight = kernel_func(xs, x_input, sigma) # [n, m]\n", 73 | " weight_sum = np.sum(weight, 0)\n", 74 | " weight_dot = weight.T @ ys\n", 75 | " weight_dot = np.squeeze(weight_dot, 1)\n", 76 | " res = weight_dot / weight_sum\n", 77 | " return res\n", 78 | "\n", 79 | "sigma = 0.5\n", 80 | "xs = np.linspace(-1, 1, 100)\n", 81 | "ys = kernel_regression(sample_x, sample_y, xs, sigma)\n", 82 | "plt.title(f'sigma = {sigma}')\n", 83 | "plt.plot(xs, ys)\n", 84 | "plt.show()\n" 85 | ] 86 | } 87 | ], 88 | "metadata": { 89 | "kernelspec": { 90 | "display_name": "pt", 91 | "language": "python", 92 | "name": "python3" 93 | }, 94 | "language_info": { 95 | "codemirror_mode": { 96 | "name": "ipython", 97 | "version": 3 98 | }, 99 | "file_extension": ".py", 100 | "mimetype": "text/x-python", 101 | "name": "python", 102 | "nbconvert_exporter": "python", 103 | "pygments_lexer": "ipython3", 104 | "version": "3.7.13" 105 | } 106 | }, 107 | "nbformat": 4, 108 | "nbformat_minor": 2 109 | } 110 | -------------------------------------------------------------------------------- /dldemos/FourierFeature/misuzu.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SingleZombie/DL-Demos/739a21ff90f411c318e098823581afb3f8a1d010/dldemos/FourierFeature/misuzu.png -------------------------------------------------------------------------------- /dldemos/Initialization/README.md: -------------------------------------------------------------------------------- 1 | Run the `main.py`: 2 | 3 | ```shell 4 | python dldemos/Initialization/main.py 5 | ``` 6 | -------------------------------------------------------------------------------- /dldemos/Initialization/main.py: -------------------------------------------------------------------------------- 1 | import abc 2 | from typing import List 3 | 4 | import numpy as np 5 | 6 | from dldemos.Initialization.points_classification import (generate_plot_set, 7 | generate_points, 8 | plot_points, 9 | visualize) 10 | from dldemos.utils import get_activation_de_func, get_activation_func, sigmoid 11 | 12 | 13 | class BaseRegressionModel(metaclass=abc.ABCMeta): 14 | 15 | def __init__(self): 16 | pass 17 | 18 | @abc.abstractmethod 19 | def forward(self, X: np.ndarray, train_mode=True) -> np.ndarray: 20 | pass 21 | 22 | @abc.abstractmethod 23 | def backward(self, Y: np.ndarray) -> np.ndarray: 24 | pass 25 | 26 | @abc.abstractmethod 27 | def gradient_descent(self, learning_rate: float) -> np.ndarray: 28 | pass 29 | 30 | def loss(self, Y: np.ndarray, Y_hat: np.ndarray) -> np.ndarray: 31 | return np.mean(-(Y * np.log(Y_hat) + (1 - Y) * np.log(1 - Y_hat))) 32 | 33 | def evaluate(self, X: np.ndarray, Y: np.ndarray, return_loss=False): 34 | Y_hat = self.forward(X, train_mode=False) 35 | Y_hat_predict = np.where(Y_hat > 0.5, 1, 0) 36 | accuracy = np.mean(np.where(Y_hat_predict == Y, 1, 0)) 37 | if return_loss: 38 | loss = self.loss(Y, Y_hat) 39 | return accuracy, loss 40 | else: 41 | return accuracy 42 | 43 | 44 | class DeepNetwork(BaseRegressionModel): 45 | 46 | def __init__(self, 47 | neuron_cnt: List[int], 48 | activation_func: List[str], 49 | initialization='zeros'): 50 | assert len(neuron_cnt) - 2 == len(activation_func) 51 | self.num_layer = len(neuron_cnt) - 1 52 | self.neuron_cnt = neuron_cnt 53 | self.activation_func = activation_func 54 | self.W: List[np.ndarray] = [] 55 | self.b: List[np.ndarray] = [] 56 | for i in range(self.num_layer): 57 | if initialization == 'zeros': 58 | self.W.append(np.zeros((neuron_cnt[i + 1], neuron_cnt[i]))) 59 | elif initialization == 'random': 60 | self.W.append( 61 | np.random.randn(neuron_cnt[i + 1], neuron_cnt[i]) * 5) 62 | elif initialization == 'he': 63 | self.W.append( 64 | np.random.randn(neuron_cnt[i + 1], neuron_cnt[i]) * 65 | np.sqrt(2 / neuron_cnt[i])) 66 | self.b.append(np.zeros((neuron_cnt[i + 1], 1))) 67 | 68 | self.Z_cache = [None] * self.num_layer 69 | self.A_cache = [None] * (self.num_layer + 1) 70 | self.dW_cache = [None] * self.num_layer 71 | self.db_cache = [None] * self.num_layer 72 | 73 | def forward(self, X, train_mode=True): 74 | if train_mode: 75 | self.m = X.shape[1] 76 | A = X 77 | self.A_cache[0] = A 78 | for i in range(self.num_layer): 79 | Z = np.dot(self.W[i], A) + self.b[i] 80 | if i == self.num_layer - 1: 81 | A = sigmoid(Z) 82 | else: 83 | A = get_activation_func(self.activation_func[i])(Z) 84 | if train_mode: 85 | self.Z_cache[i] = Z 86 | self.A_cache[i + 1] = A 87 | return A 88 | 89 | def backward(self, Y): 90 | assert (self.m == Y.shape[1]) 91 | 92 | dA = 0 93 | for i in range(self.num_layer - 1, -1, -1): 94 | if i == self.num_layer - 1: 95 | dZ = self.A_cache[-1] - Y 96 | else: 97 | dZ = dA * get_activation_de_func(self.activation_func[i])( 98 | self.Z_cache[i]) 99 | dW = np.dot(dZ, self.A_cache[i].T) / self.m 100 | db = np.mean(dZ, axis=1, keepdims=True) 101 | dA = np.dot(self.W[i].T, dZ) 102 | self.dW_cache[i] = dW 103 | self.db_cache[i] = db 104 | 105 | def gradient_descent(self, learning_rate): 106 | for i in range(self.num_layer): 107 | self.W[i] -= learning_rate * self.dW_cache[i] 108 | self.b[i] -= learning_rate * self.db_cache[i] 109 | 110 | 111 | def train(model: BaseRegressionModel, 112 | X, 113 | Y, 114 | step, 115 | learning_rate, 116 | print_interval=100, 117 | test_X=None, 118 | test_Y=None): 119 | for s in range(step): 120 | Y_hat = model.forward(X) 121 | model.backward(Y) 122 | model.gradient_descent(learning_rate) 123 | if s % print_interval == 0: 124 | loss = model.loss(Y, Y_hat) 125 | print(f'Step: {s}') 126 | print(f'Train loss: {loss}') 127 | if test_X is not None and test_Y is not None: 128 | accuracy, loss = model.evaluate(test_X, 129 | test_Y, 130 | return_loss=True) 131 | print(f'Test loss: {loss}') 132 | print(f'Test accuracy: {accuracy}') 133 | 134 | 135 | def main(): 136 | train_X, train_Y = generate_points(400) 137 | plot_points(train_X, train_Y) 138 | plot_X = generate_plot_set() 139 | 140 | n_x = train_X.shape[0] 141 | neuron_list = [n_x, 10, 5, 1] 142 | activation_list = ['relu', 'relu'] 143 | model1 = DeepNetwork(neuron_list, activation_list, 'zeros') 144 | model2 = DeepNetwork(neuron_list, activation_list, 'random') 145 | model3 = DeepNetwork(neuron_list, activation_list, 'he') 146 | train(model1, train_X, train_Y, 20000, 0.01, 1000) 147 | train(model2, train_X, train_Y, 20000, 0.01, 1000) 148 | train(model3, train_X, train_Y, 20000, 0.01, 1000) 149 | 150 | plot_result1 = model1.forward(plot_X, False) 151 | plot_result2 = model2.forward(plot_X, False) 152 | plot_result3 = model3.forward(plot_X, False) 153 | 154 | visualize(train_X, train_Y, plot_result1) 155 | visualize(train_X, train_Y, plot_result2) 156 | visualize(train_X, train_Y, plot_result3) 157 | 158 | 159 | if __name__ == '__main__': 160 | main() 161 | -------------------------------------------------------------------------------- /dldemos/Initialization/points_classification.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | 4 | 5 | def generate_points(cnt): 6 | 7 | def draw_ring(cnt, inner_radius, outer_radius): 8 | angle_arr = np.random.rand(cnt) * np.pi * 2 9 | length_arr = np.random.rand(cnt) * (outer_radius - 10 | inner_radius) + inner_radius 11 | return length_arr * np.cos(angle_arr), length_arr * np.sin(angle_arr) 12 | 13 | red_cnt = cnt // 2 14 | blue_cnt = cnt - red_cnt 15 | 16 | red_x, red_y = draw_ring(red_cnt, 5, 6) 17 | blue_x, blue_y = draw_ring(blue_cnt, 6, 7) 18 | X = np.stack((np.concatenate( 19 | (red_x, blue_x)), np.concatenate((red_y, blue_y))), 1) 20 | Y = np.array([0] * red_cnt + [1] * blue_cnt) 21 | return X.T, Y[..., np.newaxis].T 22 | 23 | 24 | def plot_points(X, Y): 25 | new_X = X.T 26 | Y = np.squeeze(Y, 0) 27 | c = np.where(Y == 0, 'r', 'b') 28 | new_x = new_X[:, 0] 29 | new_y = new_X[:, 1] 30 | plt.scatter(new_x, new_y, color=c) 31 | plt.show() 32 | 33 | 34 | def generate_plot_set(): 35 | x = np.linspace(-10, 10, 100) 36 | y = np.linspace(-10, 10, 100) 37 | xx, yy = np.meshgrid(x, y) 38 | xx = xx.reshape(-1) 39 | yy = yy.reshape(-1) 40 | return np.stack((xx, yy), axis=1).T 41 | 42 | 43 | def visualize(X, Y, plot_set_result: np.ndarray): 44 | x = np.linspace(-10, 10, 100) 45 | y = np.linspace(-10, 10, 100) 46 | xx, yy = np.meshgrid(x, y) 47 | color = plot_set_result.squeeze() 48 | c = np.where(color < 0.5, 'r', 'b') 49 | plt.scatter(xx, yy, c=c, marker='s') 50 | 51 | plt.xlim(-10, 10) 52 | plt.ylim(-10, 10) 53 | 54 | origin_x = X.T[:, 0] 55 | origin_y = X.T[:, 1] 56 | origin_color = np.where(Y.squeeze() < 0.5, '#AA0000', '#0000AA') 57 | 58 | plt.scatter(origin_x, origin_y, c=origin_color) 59 | 60 | plt.show() 61 | 62 | 63 | def main(): 64 | plot_points(*generate_points(400)) 65 | 66 | 67 | if __name__ == '__main__': 68 | main() 69 | -------------------------------------------------------------------------------- /dldemos/LogisticRegression/README.md: -------------------------------------------------------------------------------- 1 | 1. Download the dataset from https://www.kaggle.com/datasets/fusicfenta/cat-and-dog?resource=download and organize the directory as follows: 2 | 3 | ```plain text 4 | └─data 5 | └─archive 6 | └─dataset 7 | ├─single_prediction 8 | ├─test_set 9 | │ ├─cats 10 | │ └─dogs 11 | └─training_set 12 | ├─cats 13 | └─dogs 14 | ``` 15 | 16 | 2. Run `python main.py` on `./LogisticRegression` directory (Or modify the data path in the `main.py`). 17 | -------------------------------------------------------------------------------- /dldemos/LogisticRegression/main.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | from glob import glob 3 | from random import shuffle 4 | 5 | import cv2 6 | import numpy as np 7 | 8 | 9 | def generate_data(dir='data/archive/dataset', input_shape=(224, 224)): 10 | 11 | def load_dataset(dir, data_num): 12 | cat_images = glob(osp.join(dir, 'cats', '*.jpg')) 13 | dog_images = glob(osp.join(dir, 'dogs', '*.jpg')) 14 | cat_tensor = [] 15 | dog_tensor = [] 16 | 17 | for idx, image in enumerate(cat_images): 18 | if idx >= data_num: 19 | break 20 | i = cv2.imread(image) / 255 21 | i = cv2.resize(i, input_shape) 22 | cat_tensor.append(i) 23 | 24 | for idx, image in enumerate(dog_images): 25 | if idx >= data_num: 26 | break 27 | i = cv2.imread(image) / 255 28 | i = cv2.resize(i, input_shape) 29 | dog_tensor.append(i) 30 | 31 | X = cat_tensor + dog_tensor 32 | Y = [1] * len(cat_tensor) + [0] * len(dog_tensor) 33 | X_Y = list(zip(X, Y)) 34 | shuffle(X_Y) 35 | X, Y = zip(*X_Y) 36 | return X, Y 37 | 38 | train_X, train_Y = load_dataset(osp.join(dir, 'training_set'), 400) 39 | test_X, test_Y = load_dataset(osp.join(dir, 'test_set'), 100) 40 | return train_X, train_Y, test_X, test_Y 41 | 42 | 43 | def resize_input(a: np.ndarray): 44 | h, w, c = a.shape 45 | a.resize((h * w * c)) 46 | return a 47 | 48 | 49 | def init_weights(n_x=224 * 224 * 3): 50 | w = np.zeros((n_x, 1)) 51 | b = 0.0 52 | return w, b 53 | 54 | 55 | def sigmoid(x): 56 | return 1 / (1 + np.exp(-x)) 57 | 58 | 59 | def predict(w, b, X): 60 | return sigmoid(np.dot(w.T, X) + b) 61 | 62 | 63 | def loss(y_hat, y): 64 | return np.mean(-(y * np.log(y_hat) + (1 - y) * np.log(1 - y_hat))) 65 | 66 | 67 | def train_step(w, b, X, Y, lr): 68 | m = X.shape[1] 69 | Z = np.dot(w.T, X) + b 70 | A = sigmoid(Z) 71 | d_Z = A - Y 72 | d_w = np.dot(X, d_Z.T) / m 73 | d_b = np.mean(d_Z) 74 | return w - lr * d_w, b - lr * d_b 75 | 76 | 77 | def train(train_X, train_Y, step=1000, learning_rate=0.00001): 78 | w, b = init_weights() 79 | print(f'learning rate: {learning_rate}') 80 | for i in range(step): 81 | w, b = train_step(w, b, train_X, train_Y, learning_rate) 82 | if i % 10 == 0: 83 | y_hat = predict(w, b, train_X) 84 | ls = loss(y_hat, train_Y) 85 | print(f'step {i} loss: {ls}') 86 | return w, b 87 | 88 | 89 | def test(w, b, test_X, test_Y): 90 | y_hat = predict(w, b, test_X) 91 | predicts = np.where(y_hat > 0.5, 1, 0) 92 | score = np.mean(np.where(predicts == test_Y, 1, 0)) 93 | print(f'Accuracy: {score}') 94 | 95 | 96 | def main(): 97 | train_X, train_Y, test_X, test_Y = generate_data() 98 | 99 | train_X = [resize_input(x) for x in train_X] 100 | test_X = [resize_input(x) for x in test_X] 101 | train_X = np.array(train_X).T 102 | train_Y = np.array(train_Y) 103 | train_Y = train_Y.reshape((1, -1)) 104 | test_X = np.array(test_X).T 105 | test_Y = np.array(test_Y) 106 | test_Y = test_Y.reshape((1, -1)) 107 | print(f'Training set size: {train_X.shape[1]}') 108 | print(f'Test set size: {test_X.shape[1]}') 109 | 110 | w, b = train(train_X, train_Y, learning_rate=0.0002) 111 | 112 | test(w, b, test_X, test_Y) 113 | 114 | 115 | if __name__ == '__main__': 116 | main() 117 | -------------------------------------------------------------------------------- /dldemos/MulticlassClassification/README.md: -------------------------------------------------------------------------------- 1 | 1. Install the repository 2 | 3 | ```shell 4 | python setup.py develop 5 | ``` 6 | 7 | 2. Install TensorFlow or PyTorch 8 | 9 | 3. Run `tf_main.py` or `pt_main.py`. 10 | -------------------------------------------------------------------------------- /dldemos/MulticlassClassification/points_classification.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | 4 | LLIM = 0 5 | RLIM = 1 6 | 7 | 8 | def generate_points(cnt): 9 | x = np.random.rand(cnt) 10 | y = np.random.rand(cnt) 11 | X = np.stack([x, y], 1) 12 | Y = np.where(y > x * x, np.where(y > x**0.5, 0, 1), 2) 13 | return X.T, Y[..., np.newaxis].T 14 | 15 | 16 | def plot_points(X, Y): 17 | new_X = X.T 18 | Y = np.squeeze(Y, 0) 19 | color_map = np.array(['r', 'g', 'b']) 20 | c = color_map[Y] 21 | new_x = new_X[:, 0] 22 | new_y = new_X[:, 1] 23 | plt.scatter(new_x, new_y, color=c) 24 | plt.show() 25 | 26 | 27 | def generate_plot_set(): 28 | x = np.linspace(LLIM, RLIM, 100) 29 | y = np.linspace(LLIM, RLIM, 100) 30 | xx, yy = np.meshgrid(x, y) 31 | xx = xx.reshape(-1) 32 | yy = yy.reshape(-1) 33 | return np.stack((xx, yy), axis=1).T 34 | 35 | 36 | def visualize(X, Y, plot_set_result: np.ndarray): 37 | x = np.linspace(LLIM, RLIM, 100) 38 | y = np.linspace(LLIM, RLIM, 100) 39 | xx, yy = np.meshgrid(x, y) 40 | color = plot_set_result.squeeze() 41 | color_map_1 = np.array(['r', 'g', 'b']) 42 | color_map_2 = ['#AA0000', '#00AA00', '#0000AA'] 43 | 44 | c = color_map_1[color] 45 | plt.scatter(xx, yy, c=c, marker='s') 46 | 47 | plt.xlim(LLIM, RLIM) 48 | plt.ylim(LLIM, RLIM) 49 | 50 | origin_x = X.T[:, 0] 51 | origin_y = X.T[:, 1] 52 | origin_color = Y.squeeze(0) 53 | origin_color = [color_map_2[oc] for oc in origin_color] 54 | 55 | plt.scatter(origin_x, origin_y, c=origin_color) 56 | 57 | plt.show() 58 | 59 | 60 | def main(): 61 | plot_points(*generate_points(400)) 62 | 63 | 64 | if __name__ == '__main__': 65 | main() 66 | -------------------------------------------------------------------------------- /dldemos/MulticlassClassification/pt_main.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | import numpy as np 4 | import torch 5 | import torch.nn.functional as F 6 | 7 | from dldemos.MulticlassClassification.points_classification import ( 8 | generate_plot_set, generate_points, plot_points, visualize) 9 | 10 | 11 | class MulticlassClassificationNet(): 12 | 13 | def __init__(self, neuron_cnt: List[int]): 14 | self.num_layer = len(neuron_cnt) - 1 15 | self.neuron_cnt = neuron_cnt 16 | self.W = [] 17 | self.b = [] 18 | for i in range(self.num_layer): 19 | new_W = torch.empty(neuron_cnt[i + 1], neuron_cnt[i]) 20 | new_b = torch.empty(neuron_cnt[i + 1], 1) 21 | torch.nn.init.kaiming_normal_(new_W, nonlinearity='relu') 22 | torch.nn.init.kaiming_normal_(new_b, nonlinearity='relu') 23 | self.W.append(torch.nn.Parameter(new_W)) 24 | self.b.append(torch.nn.Parameter(new_b)) 25 | self.trainable_vars = self.W + self.b 26 | self.loss_fn = torch.nn.CrossEntropyLoss() 27 | 28 | def forward(self, X): 29 | A = X 30 | for i in range(self.num_layer): 31 | Z = torch.matmul(self.W[i], A) + self.b[i] 32 | if i == self.num_layer - 1: 33 | A = F.softmax(Z, 0) 34 | else: 35 | A = F.relu(Z) 36 | 37 | return A 38 | 39 | def loss(self, Y, Y_hat): 40 | return self.loss_fn(Y_hat.T, Y) 41 | 42 | def evaluate(self, X, Y, return_loss=False): 43 | Y_hat = self.forward(X) 44 | Y_predict = Y 45 | Y_hat_predict = torch.argmax(Y_hat, 0) 46 | res = (Y_predict == Y_hat_predict).float() 47 | accuracy = torch.mean(res) 48 | if return_loss: 49 | loss = self.loss(Y, Y_hat) 50 | return accuracy, loss 51 | else: 52 | return accuracy 53 | 54 | 55 | def train(model: MulticlassClassificationNet, 56 | X, 57 | Y, 58 | step, 59 | learning_rate, 60 | print_interval=100): 61 | optimizer = torch.optim.Adam(model.trainable_vars, learning_rate) 62 | for s in range(step): 63 | Y_hat = model.forward(X) 64 | cost = model.loss(Y, Y_hat) 65 | optimizer.zero_grad() 66 | cost.backward() 67 | optimizer.step() 68 | if s % print_interval == 0: 69 | accuracy, loss = model.evaluate(X, Y, return_loss=True) 70 | print(f'Step: {s}') 71 | print(f'Accuracy: {accuracy}') 72 | print(f'Train loss: {loss}') 73 | 74 | 75 | def main(): 76 | train_X, train_Y = generate_points(400) 77 | plot_points(train_X, train_Y) 78 | plot_X = generate_plot_set() 79 | 80 | # X: [2, m] 81 | # Y: [1, m] 82 | 83 | train_X_pt = torch.tensor(train_X, dtype=torch.float32) 84 | train_Y_pt = torch.tensor(train_Y.squeeze(0), dtype=torch.long) 85 | 86 | print(train_X_pt.shape) 87 | print(train_Y_pt.shape) 88 | 89 | # X: [2, m] 90 | # Y: [m] 91 | 92 | n_x = 2 93 | neuron_list = [n_x, 10, 10, 3] 94 | model = MulticlassClassificationNet(neuron_list) 95 | train(model, train_X_pt, train_Y_pt, 5000, 0.001, 1000) 96 | 97 | plot_result = model.forward(torch.Tensor(plot_X)) 98 | plot_result = torch.argmax(plot_result, 0).numpy() 99 | plot_result = np.expand_dims(plot_result, 0) 100 | 101 | visualize(train_X, train_Y, plot_result) 102 | 103 | 104 | if __name__ == '__main__': 105 | main() 106 | -------------------------------------------------------------------------------- /dldemos/MulticlassClassification/tf_main.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | import numpy as np 4 | import tensorflow as tf 5 | 6 | from dldemos.MulticlassClassification.points_classification import ( 7 | generate_plot_set, generate_points, plot_points, visualize) 8 | 9 | 10 | class MulticlassClassificationNet(): 11 | 12 | def __init__(self, neuron_cnt: List[int]): 13 | self.num_layer = len(neuron_cnt) - 1 14 | self.neuron_cnt = neuron_cnt 15 | self.W = [] 16 | self.b = [] 17 | initializer = tf.keras.initializers.HeNormal(seed=1) 18 | for i in range(self.num_layer): 19 | self.W.append( 20 | tf.Variable( 21 | initializer(shape=(neuron_cnt[i + 1], neuron_cnt[i])))) 22 | self.b.append( 23 | tf.Variable(initializer(shape=(neuron_cnt[i + 1], 1)))) 24 | self.trainable_vars = self.W + self.b 25 | 26 | def forward(self, X): 27 | A = X 28 | for i in range(self.num_layer): 29 | Z = tf.matmul(self.W[i], A) + self.b[i] 30 | if i == self.num_layer - 1: 31 | A = tf.keras.activations.softmax(Z) 32 | else: 33 | A = tf.keras.activations.relu(Z) 34 | 35 | return A 36 | 37 | def loss(self, Y, Y_hat): 38 | return tf.reduce_mean( 39 | tf.keras.losses.categorical_crossentropy(tf.transpose(Y), 40 | tf.transpose(Y_hat))) 41 | 42 | def evaluate(self, X, Y, return_loss=False): 43 | Y_hat = self.forward(X) 44 | Y_predict = tf.argmax(Y, 0) 45 | Y_hat_predict = tf.argmax(Y_hat, 0) 46 | res = tf.cast(Y_predict == Y_hat_predict, tf.float32) 47 | accuracy = tf.reduce_mean(res) 48 | if return_loss: 49 | loss = self.loss(Y, Y_hat) 50 | return accuracy, loss 51 | else: 52 | return accuracy 53 | 54 | 55 | def train(model: MulticlassClassificationNet, 56 | X, 57 | Y, 58 | step, 59 | learning_rate, 60 | print_interval=100): 61 | optimizer = tf.keras.optimizers.Adam(learning_rate) 62 | for s in range(step): 63 | with tf.GradientTape() as tape: 64 | Y_hat = model.forward(X) 65 | cost = model.loss(Y, Y_hat) 66 | grads = tape.gradient(cost, model.trainable_vars) 67 | optimizer.apply_gradients(zip(grads, model.trainable_vars)) 68 | if s % print_interval == 0: 69 | accuracy, loss = model.evaluate(X, Y, return_loss=True) 70 | print(f'Step: {s}') 71 | print(f'Accuracy: {accuracy}') 72 | print(f'Train loss: {loss}') 73 | 74 | 75 | def main(): 76 | train_X, train_Y = generate_points(400) 77 | plot_points(train_X, train_Y) 78 | plot_X = generate_plot_set() 79 | 80 | # X: [2, m] 81 | # Y: [1, m] 82 | 83 | train_X_tf = tf.constant(train_X, dtype=tf.float32) 84 | train_Y_tf = tf.transpose(tf.one_hot(train_Y.squeeze(0), 3)) 85 | 86 | # X: [2, m] 87 | # Y: [3, m] 88 | 89 | n_x = 2 90 | neuron_list = [n_x, 10, 10, 3] 91 | model = MulticlassClassificationNet(neuron_list) 92 | train(model, train_X_tf, train_Y_tf, 5000, 0.001, 1000) 93 | 94 | plot_result = model.forward(plot_X) 95 | plot_result = tf.argmax(plot_result, 0).numpy() 96 | plot_result = np.expand_dims(plot_result, 0) 97 | 98 | visualize(train_X, train_Y, plot_result) 99 | 100 | 101 | if __name__ == '__main__': 102 | main() 103 | -------------------------------------------------------------------------------- /dldemos/MyYOLO/load_coco.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | 4 | 5 | def print_json(): 6 | with open('data/coco/annotations/instances_val2014.json') as fp: 7 | root = json.load(fp) 8 | print('info:') 9 | print(root['info']) 10 | print('categories:') 11 | print(root['categories']) 12 | print('Length of images:', len(root['images'])) 13 | print(root['images'][0]) 14 | print('Length of annotations:', len(root['annotations'])) 15 | print(root['annotations'][0]) 16 | 17 | 18 | def load_img_ann(): 19 | """return [{img_name, [{x, y, h, w, label}]}]""" 20 | with open('data/coco/annotations/instances_val2014.json') as fp: 21 | root = json.load(fp) 22 | img_dict = {} 23 | for img_info in root['images']: 24 | img_dict[img_info['id']] = {'name': img_info['file_name'], 'anns': []} 25 | for ann_info in root['annotations']: 26 | img_dict[ann_info['image_id']]['anns'].append( 27 | ann_info['bbox'] + [ann_info['category_id']]) 28 | 29 | return img_dict 30 | 31 | 32 | def show_img_ann(img_info): 33 | from PIL import Image 34 | 35 | from dldemos.nms.show_bbox import draw_bbox 36 | print(img_info) 37 | 38 | with open('data/coco/annotations/instances_val2014.json') as fp: 39 | root = json.load(fp) 40 | categories = root['categories'] 41 | category_dict = {int(c['id']): c['name'] for c in categories} 42 | 43 | img_path = os.path.join('data/coco/val2014', img_info['name']) 44 | img = Image.open(img_path) 45 | for ann in img_info['anns']: 46 | x, y, w, h = ann[0:4] 47 | x1, y1, x2, y2 = x, y, x + w, y + h 48 | draw_bbox(img, (x1, y1, x2, y2), 1.0, text=category_dict[ann[4]]) 49 | 50 | img.save('work_dirs/tmp.jpg') 51 | 52 | 53 | def main(): 54 | print_json() 55 | img_dict = load_img_ann() 56 | keys = list(img_dict.keys()) 57 | show_img_ann(img_dict[keys[1]]) 58 | 59 | 60 | if __name__ == '__main__': 61 | main() 62 | -------------------------------------------------------------------------------- /dldemos/PyTorchDistributed/main.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import torch 4 | import torch.distributed as dist 5 | import torch.nn as nn 6 | import torch.optim as optim 7 | from torch.nn.parallel import DistributedDataParallel 8 | from torch.utils.data import DataLoader, Dataset 9 | from torch.utils.data.distributed import DistributedSampler 10 | 11 | 12 | def setup(): 13 | dist.init_process_group('nccl') 14 | 15 | 16 | def cleanup(): 17 | dist.destroy_process_group() 18 | 19 | 20 | class ToyModel(nn.Module): 21 | 22 | def __init__(self) -> None: 23 | super().__init__() 24 | self.layer = nn.Linear(1, 1) 25 | 26 | def forward(self, x): 27 | return self.layer(x) 28 | 29 | 30 | class MyDataset(Dataset): 31 | 32 | def __init__(self): 33 | super().__init__() 34 | self.data = torch.tensor([1, 2, 3, 4], dtype=torch.float32) 35 | 36 | def __len__(self): 37 | return len(self.data) 38 | 39 | def __getitem__(self, index): 40 | return self.data[index:index + 1] 41 | 42 | 43 | ckpt_path = 'tmp.pth' 44 | 45 | 46 | def main(): 47 | setup() 48 | rank = dist.get_rank() 49 | pid = os.getpid() 50 | print(f'current pid: {pid}') 51 | print(f'Current rank {rank}') 52 | device_id = rank % torch.cuda.device_count() 53 | 54 | dataset = MyDataset() 55 | sampler = DistributedSampler(dataset) 56 | dataloader = DataLoader(dataset, batch_size=2, sampler=sampler) 57 | 58 | model = ToyModel().to(device_id) 59 | ddp_model = DistributedDataParallel(model, device_ids=[device_id]) 60 | loss_fn = nn.MSELoss() 61 | optimizer = optim.SGD(ddp_model.parameters(), lr=0.001) 62 | 63 | if rank == 0: 64 | torch.save(ddp_model.state_dict(), ckpt_path) 65 | 66 | dist.barrier() 67 | 68 | map_location = {'cuda:0': f'cuda:{device_id}'} 69 | state_dict = torch.load(ckpt_path, map_location=map_location) 70 | print(f'rank {rank}: {state_dict}') 71 | ddp_model.load_state_dict(state_dict) 72 | 73 | for epoch in range(2): 74 | sampler.set_epoch(epoch) 75 | for x in dataloader: 76 | print(f'epoch {epoch}, rank {rank} data: {x}') 77 | x = x.to(device_id) 78 | y = ddp_model(x) 79 | optimizer.zero_grad() 80 | loss = loss_fn(x, y) 81 | loss.backward() 82 | optimizer.step() 83 | 84 | cleanup() 85 | 86 | 87 | if __name__ == '__main__': 88 | main() 89 | -------------------------------------------------------------------------------- /dldemos/Regularization/README.md: -------------------------------------------------------------------------------- 1 | Run the `main.py`: 2 | 3 | ```shell 4 | python dldemos/Regularization/main.py 5 | ``` 6 | -------------------------------------------------------------------------------- /dldemos/Regularization/points_classification.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | 4 | LLIM = 0 5 | RLIM = 1 6 | 7 | 8 | def generate_points(cnt): 9 | 10 | PERCENTAGE = 0.2 11 | 12 | X = np.random.rand(int(cnt * (1 - PERCENTAGE)), 2) 13 | x_1 = X[:, 0] 14 | x_2 = X[:, 1] 15 | Y = np.where(x_1 > x_2, 1, 0) 16 | 17 | noise_x = np.random.rand(int(cnt * PERCENTAGE)) / 2 18 | noise_y = noise_x + np.random.rand(int(cnt * PERCENTAGE)) / 2 19 | noise_label = np.array([1] * len(noise_x)) 20 | noise_X = np.stack((noise_x, noise_y), axis=1) 21 | X = np.concatenate((X, noise_X), 0) 22 | Y = np.concatenate((Y, noise_label), 0) 23 | 24 | return X.T, Y[:, np.newaxis].T 25 | 26 | 27 | def plot_points(X, Y): 28 | new_X = X.T 29 | Y = np.squeeze(Y, 0) 30 | c = np.where(Y == 0, 'r', 'b') 31 | new_x = new_X[:, 0] 32 | new_y = new_X[:, 1] 33 | plt.scatter(new_x, new_y, color=c) 34 | plt.show() 35 | 36 | 37 | def generate_plot_set(): 38 | x = np.linspace(LLIM, RLIM, 100) 39 | y = np.linspace(LLIM, RLIM, 100) 40 | xx, yy = np.meshgrid(x, y) 41 | xx = xx.reshape(-1) 42 | yy = yy.reshape(-1) 43 | return np.stack((xx, yy), axis=1).T 44 | 45 | 46 | def visualize(X, Y, plot_set_result: np.ndarray): 47 | x = np.linspace(LLIM, RLIM, 100) 48 | y = np.linspace(LLIM, RLIM, 100) 49 | xx, yy = np.meshgrid(x, y) 50 | color = plot_set_result.squeeze() 51 | c = np.where(color < 0.5, 'r', 'b') 52 | plt.scatter(xx, yy, c=c, marker='s') 53 | 54 | plt.xlim(LLIM, RLIM) 55 | plt.ylim(LLIM, RLIM) 56 | 57 | origin_x = X.T[:, 0] 58 | origin_y = X.T[:, 1] 59 | origin_color = np.where(Y.squeeze() < 0.5, '#AA0000', '#0000AA') 60 | 61 | plt.scatter(origin_x, origin_y, c=origin_color) 62 | 63 | plt.show() 64 | 65 | 66 | def main(): 67 | plot_points(*generate_points(200)) 68 | 69 | 70 | if __name__ == '__main__': 71 | main() 72 | -------------------------------------------------------------------------------- /dldemos/ResNet/README.md: -------------------------------------------------------------------------------- 1 | 1. Install the repository 2 | 3 | ```shell 4 | python setup.py develop 5 | ``` 6 | 7 | 2. Download the dataset from https://www.kaggle.com/datasets/fusicfenta/cat-and-dog?resource=download and organize the directory as follows: 8 | 9 | ```plain text 10 | └─data 11 | └─archive 12 | └─dataset 13 | ├─single_prediction 14 | ├─test_set 15 | │ ├─cats 16 | │ └─dogs 17 | └─training_set 18 | ├─cats 19 | └─dogs 20 | ``` 21 | 22 | 3. Modify the path in "main" scripts: 23 | 24 | ```Python 25 | train_X, train_Y, test_X, test_Y = get_cat_set( 26 | 'dldemos/LogisticRegression/data/archive/dataset', train_size=1500) 27 | ``` 28 | 29 | Replace 'dldemos/LogisticRegression/data/archive/dataset' with your path. 30 | 31 | 4. Run `tf_main.py`. 32 | -------------------------------------------------------------------------------- /dldemos/ResNet/tf_main.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.keras import layers, models 3 | 4 | from dldemos.BasicCNN.dataset import get_cat_set 5 | 6 | 7 | def identity_block_2(x, f, use_shortcut=True): 8 | _, _, _, C = x.shape 9 | x_shortcut = x 10 | x = layers.Conv2D(C, f, padding='same')(x) 11 | x = layers.BatchNormalization(axis=3)(x) 12 | x = layers.ReLU()(x) 13 | x = layers.Conv2D(C, f, padding='same')(x) 14 | x = layers.BatchNormalization(axis=3)(x) 15 | if use_shortcut: 16 | x = x + x_shortcut 17 | x = layers.ReLU()(x) 18 | return x 19 | 20 | 21 | def convolution_block_2(x, f, filters, s: int, use_shortcut=True): 22 | x_shortcut = x 23 | x = layers.Conv2D(filters, f, strides=(s, s), padding='same')(x) 24 | x = layers.BatchNormalization(axis=3)(x) 25 | x = layers.ReLU()(x) 26 | x = layers.Conv2D(filters, f, padding='same')(x) 27 | x = layers.BatchNormalization(axis=3)(x) 28 | if use_shortcut: 29 | x_shortcut = layers.Conv2D(filters, 1, strides=(s, s), 30 | padding='valid')(x_shortcut) 31 | x_shortcut = layers.BatchNormalization(axis=3)(x_shortcut) 32 | x = x + x_shortcut 33 | x = layers.ReLU()(x) 34 | return x 35 | 36 | 37 | def identity_block_3(x, f, filters1, filters2, use_shortcut=True): 38 | x_shortcut = x 39 | x = layers.Conv2D(filters1, 1, padding='valid')(x) 40 | x = layers.BatchNormalization(axis=3)(x) 41 | x = layers.Conv2D(filters1, f, padding='same')(x) 42 | x = layers.BatchNormalization(axis=3)(x) 43 | x = layers.ReLU()(x) 44 | x = layers.Conv2D(filters2, 1, padding='valid')(x) 45 | x = layers.BatchNormalization(axis=3)(x) 46 | if use_shortcut: 47 | x = x + x_shortcut 48 | x = layers.ReLU()(x) 49 | return x 50 | 51 | 52 | def convolution_block_3(x, f, filters1, filters2, s: int, use_shortcut=True): 53 | x_shortcut = x 54 | x = layers.Conv2D(filters1, 1, strides=(s, s), padding='valid')(x) 55 | x = layers.BatchNormalization(axis=3)(x) 56 | x = layers.Conv2D(filters1, f, padding='same')(x) 57 | x = layers.BatchNormalization(axis=3)(x) 58 | x = layers.ReLU()(x) 59 | x = layers.Conv2D(filters2, 1, padding='valid')(x) 60 | x = layers.BatchNormalization(axis=3)(x) 61 | if use_shortcut: 62 | x_shortcut = layers.Conv2D(filters2, 63 | 1, 64 | strides=(s, s), 65 | padding='valid')(x_shortcut) 66 | x_shortcut = layers.BatchNormalization(axis=3)(x_shortcut) 67 | x = x + x_shortcut 68 | x = layers.ReLU()(x) 69 | return x 70 | 71 | 72 | def init_model(input_shape=(224, 224, 3), 73 | model_name='ResNet18', 74 | use_shortcut=True) -> tf.keras.models.Model: 75 | # Initialize input 76 | input = layers.Input(input_shape) 77 | 78 | # Get output 79 | x = layers.Conv2D(64, 7, (2, 2), padding='same')(input) 80 | x = layers.MaxPool2D((3, 3), (2, 2))(x) 81 | 82 | if model_name == 'ResNet18': 83 | x = identity_block_2(x, 3, use_shortcut) 84 | x = identity_block_2(x, 3, use_shortcut) 85 | x = convolution_block_2(x, 3, 128, 2, use_shortcut) 86 | x = identity_block_2(x, 3, use_shortcut) 87 | x = convolution_block_2(x, 3, 256, 2, use_shortcut) 88 | x = identity_block_2(x, 3, use_shortcut) 89 | x = convolution_block_2(x, 3, 512, 2, use_shortcut) 90 | x = identity_block_2(x, 3, use_shortcut) 91 | elif model_name == 'ResNet50': 92 | 93 | def block_group(x, fs1, fs2, count): 94 | x = convolution_block_3(x, 3, fs1, fs2, 2, use_shortcut) 95 | for i in range(count - 1): 96 | x = identity_block_3(x, 3, fs1, fs2, use_shortcut) 97 | return x 98 | 99 | x = block_group(x, 64, 256, 3) 100 | x = block_group(x, 128, 512, 4) 101 | x = block_group(x, 256, 1024, 6) 102 | x = block_group(x, 512, 2048, 3) 103 | else: 104 | raise NotImplementedError(f'No such model {model_name}') 105 | 106 | x = layers.AveragePooling2D((2, 2), (2, 2))(x) 107 | x = layers.Flatten()(x) 108 | output = layers.Dense(1, 'sigmoid')(x) 109 | 110 | # Build model 111 | model = models.Model(inputs=input, outputs=output) 112 | print(model.summary()) 113 | return model 114 | 115 | 116 | def main(): 117 | train_X, train_Y, test_X, test_Y = get_cat_set( 118 | 'dldemos/LogisticRegression/data/archive/dataset', 119 | train_size=500, 120 | test_size=50) 121 | print(train_X.shape) # (m, 224, 224, 3) 122 | print(train_Y.shape) # (m , 1) 123 | 124 | # model = init_model() 125 | # model = init_model(use_shortcut=False) 126 | model = init_model(model_name='ResNet50') 127 | # model = init_model(model_name='ResNet50', use_shortcut=False) 128 | model.compile(optimizer='adam', 129 | loss='binary_crossentropy', 130 | metrics=['accuracy']) 131 | 132 | model.fit(train_X, train_Y, epochs=20, batch_size=16) 133 | model.evaluate(test_X, test_Y) 134 | 135 | 136 | if __name__ == '__main__': 137 | main() 138 | -------------------------------------------------------------------------------- /dldemos/SentimentAnalysis/README.md: -------------------------------------------------------------------------------- 1 | 1. Download [IMDb dataset](https://ai.stanford.edu/~amaas/data/sentiment/). 2 | 3 | 2. Modify the directory in `read_imdb`. 4 | 5 | 3. Run `main.py` . (Pretrained GloVe will download to default directory) 6 | -------------------------------------------------------------------------------- /dldemos/SentimentAnalysis/glove.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torchtext.vocab import GloVe 3 | 4 | glove = GloVe(name='6B', dim=100) 5 | 6 | # Get vectors 7 | tensor = glove.get_vecs_by_tokens(['', '1998', '199999998', ',', 'cat'], True) 8 | print(tensor) 9 | 10 | # Iterate the vocab 11 | myvocab = glove.itos 12 | print(len(myvocab)) 13 | print(myvocab[0], myvocab[1], myvocab[2], myvocab[3]) 14 | 15 | 16 | def get_counterpart(x1, y1, x2): 17 | """Find y2 that makes x1-y1=x2-y2.""" 18 | x1_id = glove.stoi[x1] 19 | y1_id = glove.stoi[y1] 20 | x2_id = glove.stoi[x2] 21 | x1, y1, x2 = glove.get_vecs_by_tokens([x1, y1, x2], True) 22 | target = x2 - x1 + y1 23 | max_sim = 0 24 | max_id = -1 25 | for i in range(len(myvocab)): 26 | vector = glove.get_vecs_by_tokens([myvocab[i]], True)[0] 27 | cossim = torch.dot(target, vector) 28 | if cossim > max_sim and i not in {x1_id, y1_id, x2_id}: 29 | max_sim = cossim 30 | max_id = i 31 | return myvocab[max_id] 32 | 33 | 34 | print(get_counterpart('man', 'woman', 'king')) 35 | print(get_counterpart('more', 'less', 'long')) 36 | print(get_counterpart('apple', 'red', 'banana')) 37 | -------------------------------------------------------------------------------- /dldemos/SentimentAnalysis/main.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.nn.utils.rnn import pad_sequence 4 | from torch.utils.data import DataLoader, Dataset 5 | from torchtext.data import get_tokenizer 6 | from torchtext.vocab import GloVe 7 | 8 | from dldemos.SentimentAnalysis.read_imdb import read_imdb 9 | 10 | GLOVE_DIM = 100 11 | GLOVE = GloVe(name='6B', dim=GLOVE_DIM) 12 | 13 | 14 | class IMDBDataset(Dataset): 15 | 16 | def __init__(self, is_train=True, dir='data/aclImdb'): 17 | super().__init__() 18 | self.tokenizer = get_tokenizer('basic_english') 19 | pos_lines = read_imdb(dir, 'pos', is_train) 20 | neg_lines = read_imdb(dir, 'neg', is_train) 21 | self.lines = pos_lines + neg_lines 22 | self.pos_length = len(pos_lines) 23 | self.neg_length = len(neg_lines) 24 | 25 | def __len__(self): 26 | return self.pos_length + self.neg_length 27 | 28 | def __getitem__(self, index): 29 | sentence = self.tokenizer(self.lines[index]) 30 | x = GLOVE.get_vecs_by_tokens(sentence) 31 | label = 1 if index < self.pos_length else 0 32 | return x, label 33 | 34 | 35 | def get_dataloader(dir='data/aclImdb'): 36 | 37 | def collate_fn(batch): 38 | x, y = zip(*batch) 39 | x_pad = pad_sequence(x, batch_first=True) 40 | y = torch.Tensor(y) 41 | return x_pad, y 42 | 43 | train_dataloader = DataLoader(IMDBDataset(True, dir), 44 | batch_size=32, 45 | shuffle=True, 46 | collate_fn=collate_fn) 47 | test_dataloader = DataLoader(IMDBDataset(False, dir), 48 | batch_size=32, 49 | shuffle=True, 50 | collate_fn=collate_fn) 51 | return train_dataloader, test_dataloader 52 | 53 | 54 | class RNN(torch.nn.Module): 55 | 56 | def __init__(self, hidden_units=64, dropout_rate=0.5): 57 | super().__init__() 58 | self.drop = nn.Dropout(dropout_rate) 59 | self.rnn = nn.GRU(GLOVE_DIM, hidden_units, 1, batch_first=True) 60 | self.linear = nn.Linear(hidden_units, 1) 61 | self.sigmoid = nn.Sigmoid() 62 | 63 | def forward(self, x: torch.Tensor): 64 | # x shape: [batch, max_word_length, embedding_length] 65 | emb = self.drop(x) 66 | output, _ = self.rnn(emb) 67 | output = output[:, -1] 68 | output = self.linear(output) 69 | output = self.sigmoid(output) 70 | 71 | return output 72 | 73 | 74 | def main(): 75 | device = 'cuda:0' 76 | train_dataloader, test_dataloader = get_dataloader() 77 | model = RNN().to(device) 78 | 79 | # train 80 | 81 | optimizer = torch.optim.Adam(model.parameters(), lr=0.001) 82 | citerion = torch.nn.BCELoss() 83 | for epoch in range(100): 84 | 85 | loss_sum = 0 86 | dataset_len = len(train_dataloader.dataset) 87 | 88 | for x, y in train_dataloader: 89 | batchsize = y.shape[0] 90 | x = x.to(device) 91 | y = y.to(device) 92 | hat_y = model(x) 93 | hat_y = hat_y.squeeze(-1) 94 | loss = citerion(hat_y, y) 95 | 96 | optimizer.zero_grad() 97 | loss.backward() 98 | torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5) 99 | optimizer.step() 100 | 101 | loss_sum += loss * batchsize 102 | 103 | print(f'Epoch {epoch}. loss: {loss_sum / dataset_len}') 104 | 105 | torch.save(model.state_dict(), 'dldemos/SentimentAnalysis/rnn.pth') 106 | 107 | # test 108 | 109 | # model.load_state_dict( 110 | # torch.load('dldemos/SentimentAnalysis/rnn.pth', 'cuda:0')) 111 | 112 | accuracy = 0 113 | dataset_len = len(test_dataloader.dataset) 114 | model.eval() 115 | for x, y in test_dataloader: 116 | x = x.to(device) 117 | y = y.to(device) 118 | with torch.no_grad(): 119 | hat_y = model(x) 120 | hat_y.squeeze_(1) 121 | predictions = torch.where(hat_y > 0.5, 1, 0) 122 | score = torch.sum(torch.where(predictions == y, 1, 0)) 123 | accuracy += score.item() 124 | accuracy /= dataset_len 125 | 126 | print(f'Accuracy: {accuracy}') 127 | 128 | # Inference 129 | tokenizer = get_tokenizer('basic_english') 130 | article = 'U.S. stock indexes fell Tuesday, driven by expectations for ' \ 131 | 'tighter Federal Reserve policy and an energy crisis in Europe. ' \ 132 | 'Stocks around the globe have come under pressure in recent weeks ' \ 133 | 'as worries about tighter monetary policy in the U.S. and a '\ 134 | 'darkening economic outlook in Europe have led investors to '\ 135 | 'sell riskier assets.' 136 | 137 | x = GLOVE.get_vecs_by_tokens(tokenizer(article)).unsqueeze(0).to(device) 138 | with torch.no_grad(): 139 | hat_y = model(x) 140 | hat_y = hat_y.squeeze_().item() 141 | result = 'positive' if hat_y > 0.5 else 'negative' 142 | print(result) 143 | 144 | 145 | if __name__ == '__main__': 146 | main() 147 | -------------------------------------------------------------------------------- /dldemos/SentimentAnalysis/read_imdb.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from torchtext.data import get_tokenizer 4 | 5 | 6 | def read_imdb(dir='data/aclImdb', split='pos', is_train=True): 7 | subdir = 'train' if is_train else 'test' 8 | dir = os.path.join(dir, subdir, split) 9 | lines = [] 10 | for file in os.listdir(dir): 11 | with open(os.path.join(dir, file), 'rb') as f: 12 | line = f.read().decode('utf-8') 13 | lines.append(line) 14 | return lines 15 | 16 | 17 | def main(): 18 | lines = read_imdb() 19 | print('Length of the file:', len(lines)) 20 | print('lines[0]:', lines[0]) 21 | tokenizer = get_tokenizer('basic_english') 22 | tokens = tokenizer(lines[0]) 23 | print('lines[0] tokens:', tokens) 24 | 25 | 26 | if __name__ == '__main__': 27 | main() 28 | -------------------------------------------------------------------------------- /dldemos/ShallowNetwork/README.md: -------------------------------------------------------------------------------- 1 | 1. Install the repository 2 | 3 | ```shell 4 | python setup.py develop 5 | ``` 6 | 7 | 2. Run the following command in the repository root dir: 8 | 9 | ```shell 10 | python dldemos/ShallowNetwork/model.py 11 | ``` 12 | 13 | 3. You can read the source code in `dldemos/ShallowNetwork/model.py`, `dldemos\ShallowNetwork\genereate_points.py` and modify the hyper-parameters and the dataset. 14 | -------------------------------------------------------------------------------- /dldemos/ShallowNetwork/genereate_points.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | from numpy.random import default_rng 4 | 5 | 6 | def vertical_flip(): 7 | return np.array([[1, 0], [0, -1]]) 8 | 9 | 10 | def rotate(theta): 11 | return np.array([[np.cos(theta), -np.sin(theta)], 12 | [np.sin(theta), np.cos(theta)]]) 13 | 14 | 15 | def half_oval(cnt, h=10, w=20): 16 | x = np.linspace(-w, w, cnt) 17 | y = np.sqrt(h * h * (1 - x * x / w / w)) 18 | return np.stack((x, y), 1) 19 | 20 | 21 | def generate_point_set(): 22 | petal1 = half_oval(20) 23 | petal2 = np.dot(half_oval(20), vertical_flip().T) 24 | petal = np.concatenate((petal1, petal2), 0) 25 | petal += [25, 0] 26 | flower = petal.copy() 27 | for i in range(5): 28 | new_petal = np.dot(petal.copy(), rotate(np.radians(60) * (i + 1)).T) 29 | flower = np.concatenate((flower, new_petal), 0) 30 | 31 | label = np.zeros([40 * 6]) 32 | label[0:40] = 1 33 | label[40:80] = 1 34 | label[120:160] = 1 35 | 36 | rng = default_rng() 37 | noise_indice1 = rng.choice(40 * 6, 10, replace=False) 38 | label[noise_indice1] = 1 - label[noise_indice1] 39 | 40 | x = flower[:, 0] 41 | y = flower[:, 1] 42 | return x, y, label 43 | 44 | 45 | def generate_plot_set(): 46 | x = np.linspace(-50, 50, 100) 47 | y = np.linspace(-50, 50, 100) 48 | xx, yy = np.meshgrid(x, y) 49 | xx = xx.reshape(-1) 50 | yy = yy.reshape(-1) 51 | return np.stack((xx, yy), axis=1).T 52 | 53 | 54 | def visualize(X, Y, plot_set_result: np.ndarray): 55 | x = np.linspace(-50, 50, 100) 56 | y = np.linspace(-50, 50, 100) 57 | xx, yy = np.meshgrid(x, y) 58 | color = plot_set_result.squeeze() 59 | c = np.where(color < 0.5, 'r', 'g') 60 | plt.scatter(xx, yy, c=c, marker='s') 61 | 62 | plt.xlim(-50, 50) 63 | plt.ylim(-50, 50) 64 | 65 | origin_x = X[:, 0] 66 | origin_y = X[:, 1] 67 | origin_color = np.where(Y.squeeze() < 0.5, '#AA0000', '#00AA00') 68 | 69 | plt.scatter(origin_x, origin_y, c=origin_color) 70 | 71 | plt.show() 72 | 73 | 74 | if __name__ == '__main__': 75 | x, y, label = generate_point_set() 76 | c = np.where(label == 0, 'r', 'g') 77 | plt.scatter(x, y, c=c) 78 | 79 | plt.xlim(-50, 50) 80 | plt.ylim(-50, 50) 81 | 82 | plt.show() 83 | -------------------------------------------------------------------------------- /dldemos/ShallowNetwork/model.py: -------------------------------------------------------------------------------- 1 | import abc 2 | 3 | import numpy as np 4 | 5 | from dldemos.utils import relu, relu_de, sigmoid 6 | 7 | 8 | class BaseRegressionModel(metaclass=abc.ABCMeta): 9 | # Use Cross Entropy as the cost function 10 | 11 | def __init__(self): 12 | pass 13 | 14 | @abc.abstractmethod 15 | def forward(self, X, train_mode=True): 16 | # if self.train_mode: 17 | # forward_train() 18 | # else: 19 | # forward_test() 20 | pass 21 | 22 | @abc.abstractmethod 23 | def backward(self, Y): 24 | pass 25 | 26 | @abc.abstractmethod 27 | def gradient_descent(self, learning_rate=0.001): 28 | pass 29 | 30 | def loss(self, Y_hat, Y): 31 | return np.mean(-(Y * np.log(Y_hat) + (1 - Y) * np.log(1 - Y_hat))) 32 | 33 | def evaluate(self, X, Y): 34 | Y_hat = self.forward(X, train_mode=False) 35 | predicts = np.where(Y_hat > 0.5, 1, 0) 36 | score = np.mean(np.where(predicts == Y, 1, 0)) 37 | print(f'Accuracy: {score}') 38 | 39 | 40 | class LogisticRegression(BaseRegressionModel): 41 | 42 | def __init__(self, n_x): 43 | super().__init__() 44 | self.n_x = n_x 45 | self.w = np.zeros((n_x, 1)) 46 | self.b = 0 47 | 48 | def forward(self, X, train_mode=True): 49 | Z = np.dot(self.w.T, X) + self.b 50 | A = sigmoid(Z) # hat_Y = A 51 | if train_mode: 52 | self.m_cache = X.shape[1] 53 | self.X_cache = X 54 | self.A_cache = A 55 | return A 56 | 57 | def backward(self, Y): 58 | d_Z = self.A_cache - Y 59 | d_w = np.dot(self.X_cache, d_Z.T) / self.m_cache 60 | d_b = np.mean(d_Z) 61 | self.d_w_cache = d_w 62 | self.d_b_cache = d_b 63 | 64 | def gradient_descent(self, learning_rate=0.001): 65 | self.w -= learning_rate * self.d_w_cache 66 | self.b -= learning_rate * self.d_b_cache 67 | 68 | 69 | class ShallowNetwork(BaseRegressionModel): 70 | # x -> hidden layer -> output layer -> y 71 | # hidden layer (n_1 relu) 72 | # output layer (1 sigmoid) 73 | def __init__(self, n_x, n_1): 74 | super().__init__() 75 | self.n_x = n_x 76 | self.n_1 = n_1 77 | self.W1 = np.random.randn(n_1, n_x) * 0.01 78 | self.b1 = np.zeros((n_1, 1)) 79 | self.W2 = np.random.randn(1, n_1) * 0.01 80 | self.b2 = np.zeros((1, 1)) 81 | 82 | def forward(self, X, train_mode=True): 83 | Z1 = np.dot(self.W1, X) + self.b1 84 | A1 = relu(Z1) 85 | Z2 = np.dot(self.W2, A1) + self.b2 86 | A2 = sigmoid(Z2) 87 | if train_mode: 88 | self.m_cache = X.shape[1] 89 | self.X_cache = X 90 | self.Z1_cache = Z1 91 | self.A1_cache = A1 92 | self.A2_cache = A2 93 | return A2 94 | 95 | def backward(self, Y): 96 | dZ2 = self.A2_cache - Y 97 | dW2 = np.dot(dZ2, self.A1_cache.T) / self.m_cache 98 | db2 = np.sum(dZ2, axis=1, keepdims=True) / self.m_cache 99 | dA1 = np.dot(self.W2.T, dZ2) 100 | 101 | dZ1 = dA1 * relu_de(self.Z1_cache) 102 | dW1 = np.dot(dZ1, self.X_cache.T) / self.m_cache 103 | db1 = np.sum(dZ1, axis=1, keepdims=True) / self.m_cache 104 | 105 | self.dW2_cache = dW2 106 | self.dW1_cache = dW1 107 | self.db2_cache = db2 108 | self.db1_cache = db1 109 | 110 | def gradient_descent(self, learning_rate=0.001): 111 | self.W1 -= learning_rate * self.dW1_cache 112 | self.b1 -= learning_rate * self.db1_cache 113 | self.W2 -= learning_rate * self.dW2_cache 114 | self.b2 -= learning_rate * self.db2_cache 115 | 116 | 117 | def train_model(model: BaseRegressionModel, 118 | X_train, 119 | Y_train, 120 | X_test, 121 | Y_test, 122 | steps=1000, 123 | learning_rate=0.001, 124 | print_interval=100): 125 | for step in range(steps): 126 | Y_hat = model.forward(X_train) 127 | model.backward(Y_train) 128 | model.gradient_descent(learning_rate) 129 | if step % print_interval == 0: 130 | train_loss = model.loss(Y_hat, Y_train) 131 | print(f'Step {step}') 132 | print(f'Train loss: {train_loss}') 133 | model.evaluate(X_test, Y_test) 134 | -------------------------------------------------------------------------------- /dldemos/ShallowNetwork/plot_activation_func.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | 4 | 5 | def sigmoid(x): 6 | return 1 / (1 + np.exp(-x)) 7 | 8 | 9 | def tanh(x): 10 | return (np.exp(x) - np.exp(-x)) / (np.exp(x) + np.exp(-x)) 11 | 12 | 13 | def relu(x): 14 | return np.maximum(x, 0) 15 | 16 | 17 | def leaky_relu(x): 18 | return np.maximum(x, 0.1 * x) 19 | 20 | 21 | x = np.linspace(-3, 3, 100) 22 | y1 = sigmoid(x) 23 | y2 = tanh(x) 24 | y3 = relu(x) 25 | y4 = leaky_relu(x) 26 | 27 | plt.subplot(2, 2, 1) 28 | plt.axvline(x=0, color='k') 29 | plt.axhline(y=0, color='k') 30 | plt.plot(x, y1) 31 | plt.title('sigmoid') 32 | 33 | plt.subplot(2, 2, 2) 34 | plt.axhline(y=0, color='k') 35 | plt.axvline(x=0, color='k') 36 | plt.plot(x, y2) 37 | plt.title('tanh') 38 | 39 | plt.subplot(2, 2, 3) 40 | plt.axhline(y=0, color='k') 41 | plt.axvline(x=0, color='k') 42 | plt.plot(x, y3) 43 | plt.title('relu') 44 | 45 | plt.subplot(2, 2, 4) 46 | plt.axhline(y=0, color='k') 47 | plt.axvline(x=0, color='k') 48 | plt.plot(x, y4) 49 | plt.title('leaky_relu') 50 | 51 | plt.show() 52 | -------------------------------------------------------------------------------- /dldemos/ShallowNetwork/points_classification.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from dldemos.ShallowNetwork.genereate_points import (generate_plot_set, 4 | generate_point_set, 5 | visualize) 6 | from dldemos.ShallowNetwork.model import (LogisticRegression, ShallowNetwork, 7 | train_model) 8 | 9 | 10 | def main(): 11 | x, y, label = generate_point_set() 12 | # x: [240] 13 | # y: [240] 14 | # label: [240] 15 | 16 | X = np.stack((x, y), axis=1) 17 | Y = np.expand_dims(label, axis=1) 18 | # X: [240, 2] 19 | # Y: [240, 1] 20 | 21 | indices = np.random.permutation(X.shape[0]) 22 | X_train = X[indices[0:200], :].T 23 | Y_train = Y[indices[0:200], :].T 24 | X_test = X[indices[200:], :].T 25 | Y_test = Y[indices[200:], :].T 26 | # X_train: [2, 200] 27 | # Y_train: [1, 200] 28 | # X_test: [2, 40] 29 | # Y_test: [1, 40] 30 | 31 | n_x = 2 32 | 33 | model1 = LogisticRegression(n_x) 34 | model2 = ShallowNetwork(n_x, 2) 35 | model3 = ShallowNetwork(n_x, 4) 36 | model4 = ShallowNetwork(n_x, 10) 37 | train_model(model1, X_train, Y_train, X_test, Y_test, 500, 0.0001, 50) 38 | train_model(model2, X_train, Y_train, X_test, Y_test, 2000, 0.01, 100) 39 | train_model(model3, X_train, Y_train, X_test, Y_test, 5000, 0.01, 500) 40 | train_model(model4, X_train, Y_train, X_test, Y_test, 5000, 0.01, 500) 41 | 42 | visualize_X = generate_plot_set() 43 | plot_result = model4.forward(visualize_X, train_mode=False) 44 | visualize(X, Y, plot_result) 45 | 46 | 47 | if __name__ == '__main__': 48 | main() 49 | -------------------------------------------------------------------------------- /dldemos/StyleTransfer/README.md: -------------------------------------------------------------------------------- 1 | Install PyTorch and run the scripts `copy_img.py`, `combine_img.py`, `style_transfer.py`. 2 | -------------------------------------------------------------------------------- /dldemos/StyleTransfer/combine_img.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | import torch.optim as optim 4 | import torchvision.transforms as transforms 5 | from PIL import Image 6 | 7 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 8 | 9 | img_size = (256, 256) 10 | 11 | 12 | def read_image(image_path): 13 | pipeline = transforms.Compose( 14 | [transforms.Resize((img_size)), 15 | transforms.ToTensor()]) 16 | 17 | img = Image.open(image_path) 18 | img = pipeline(img).unsqueeze(0) 19 | return img.to(device, torch.float) 20 | 21 | 22 | def save_image(tensor, image_path): 23 | toPIL = transforms.ToPILImage() 24 | img = tensor.detach().cpu().clone() 25 | img = img.squeeze(0) 26 | img = toPIL(img) 27 | img.save(image_path) 28 | 29 | 30 | style_img = read_image('dldemos/StyleTransfer/picasso.jpg') 31 | content_img = read_image('dldemos/StyleTransfer/dancing.jpg') 32 | 33 | input_img = torch.randn(1, 3, *img_size, device=device) 34 | input_img.requires_grad_(True) 35 | optimizer = optim.LBFGS([input_img]) 36 | steps = 0 37 | while steps <= 10: 38 | 39 | def closure(): 40 | global steps 41 | optimizer.zero_grad() 42 | loss = F.mse_loss(input_img, style_img) + F.mse_loss( 43 | input_img, content_img) 44 | loss.backward() 45 | steps += 1 46 | if steps % 1 == 0: 47 | print(f'Step {steps}:') 48 | print(f'Loss: {loss}') 49 | 50 | return loss 51 | 52 | optimizer.step(closure) 53 | 54 | save_image(input_img, 'work_dirs/output.jpg') 55 | -------------------------------------------------------------------------------- /dldemos/StyleTransfer/copy_img.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | import torch.optim as optim 4 | import torchvision.transforms as transforms 5 | from PIL import Image 6 | 7 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 8 | 9 | img_size = (256, 256) 10 | 11 | 12 | def read_image(image_path): 13 | pipeline = transforms.Compose( 14 | [transforms.Resize((img_size)), 15 | transforms.ToTensor()]) 16 | 17 | img = Image.open(image_path) 18 | img = pipeline(img).unsqueeze(0) 19 | return img.to(device, torch.float) 20 | 21 | 22 | def save_image(tensor, image_path): 23 | toPIL = transforms.ToPILImage() 24 | img = tensor.detach().cpu().clone() 25 | img = img.squeeze(0) 26 | img = toPIL(img) 27 | img.save(image_path) 28 | 29 | 30 | style_img = read_image('dldemos/StyleTransfer/picasso.jpg') 31 | content_img = read_image('dldemos/StyleTransfer/dancing.jpg') 32 | 33 | input_img = torch.randn(1, 3, *img_size, device=device) 34 | input_img.requires_grad_(True) 35 | optimizer = optim.LBFGS([input_img]) 36 | steps = 0 37 | while steps <= 10: 38 | 39 | def closure(): 40 | global steps 41 | optimizer.zero_grad() 42 | loss = F.mse_loss(input_img, style_img) 43 | loss.backward() 44 | steps += 1 45 | if steps % 5 == 0: 46 | print(f'Step {steps}:') 47 | print(f'Loss: {loss}') 48 | 49 | return loss 50 | 51 | optimizer.step(closure) 52 | 53 | save_image(input_img, 'work_dirs/output.jpg') 54 | -------------------------------------------------------------------------------- /dldemos/StyleTransfer/dancing.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SingleZombie/DL-Demos/739a21ff90f411c318e098823581afb3f8a1d010/dldemos/StyleTransfer/dancing.jpg -------------------------------------------------------------------------------- /dldemos/StyleTransfer/picasso.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SingleZombie/DL-Demos/739a21ff90f411c318e098823581afb3f8a1d010/dldemos/StyleTransfer/picasso.jpg -------------------------------------------------------------------------------- /dldemos/StyleTransfer/style_transfer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | import torch.optim as optim 4 | import torchvision.models as models 5 | import torchvision.transforms as transforms 6 | from PIL import Image 7 | 8 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 9 | 10 | img_size = (256, 256) 11 | 12 | 13 | def read_image(image_path): 14 | pipeline = transforms.Compose( 15 | [transforms.Resize((img_size)), 16 | transforms.ToTensor()]) 17 | 18 | img = Image.open(image_path).convert('RGB') 19 | img = pipeline(img).unsqueeze(0) 20 | return img.to(device, torch.float) 21 | 22 | 23 | def save_image(tensor, image_path): 24 | toPIL = transforms.ToPILImage() 25 | img = tensor.detach().cpu().clone() 26 | img = img.squeeze(0) 27 | img = toPIL(img) 28 | img.save(image_path) 29 | 30 | 31 | # Hyperparameters 32 | style_img = read_image('dldemos/StyleTransfer/picasso.jpg') 33 | content_img = read_image('dldemos/StyleTransfer/dancing.jpg') 34 | 35 | default_content_layers = ['conv_4'] 36 | default_style_layers = ['conv_1', 'conv_2', 'conv_3', 'conv_4', 'conv_5'] 37 | style_weight = 1e4 38 | content_weight = 1 39 | 40 | 41 | class ContentLoss(torch.nn.Module): 42 | 43 | def __init__(self, target: torch.Tensor): 44 | super().__init__() 45 | self.target = target.detach() 46 | 47 | def forward(self, input): 48 | self.loss = F.mse_loss(input, self.target) 49 | return input 50 | 51 | 52 | def gram(x: torch.Tensor): 53 | # x is a [n, c, h, w] array 54 | n, c, h, w = x.shape 55 | 56 | features = x.reshape(n * c, h * w) 57 | features = torch.mm(features, features.T) / n / c / h / w 58 | return features 59 | 60 | 61 | class StyleLoss(torch.nn.Module): 62 | 63 | def __init__(self, target: torch.Tensor): 64 | super().__init__() 65 | self.target = gram(target.detach()).detach() 66 | 67 | def forward(self, input): 68 | G = gram(input) 69 | self.loss = F.mse_loss(G, self.target) 70 | return input 71 | 72 | 73 | class Normalization(torch.nn.Module): 74 | 75 | def __init__(self, mean, std): 76 | super().__init__() 77 | self.mean = torch.tensor(mean).to(device).reshape(-1, 1, 1) 78 | self.std = torch.tensor(std).to(device).reshape(-1, 1, 1) 79 | 80 | def forward(self, img): 81 | return (img - self.mean) / self.std 82 | 83 | 84 | def get_model_and_losses(content_img, style_img, content_layers, style_layers): 85 | num_loss = 0 86 | expected_num_loss = len(content_layers) + len(style_layers) 87 | content_losses = [] 88 | style_losses = [] 89 | 90 | model = torch.nn.Sequential( 91 | Normalization([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])) 92 | cnn = models.vgg19(pretrained=True).features.to(device).eval() 93 | i = 0 94 | for layer in cnn.children(): 95 | if isinstance(layer, torch.nn.Conv2d): 96 | i += 1 97 | name = f'conv_{i}' 98 | elif isinstance(layer, torch.nn.ReLU): 99 | name = f'relu_{i}' 100 | layer = torch.nn.ReLU(inplace=False) 101 | elif isinstance(layer, torch.nn.MaxPool2d): 102 | name = f'pool_{i}' 103 | elif isinstance(layer, torch.nn.BatchNorm2d): 104 | name = f'bn_{i}' 105 | else: 106 | raise RuntimeError( 107 | f'Unrecognized layer: {layer.__class__.__name__}') 108 | 109 | model.add_module(name, layer) 110 | 111 | if name in content_layers: 112 | # add content loss: 113 | target = model(content_img) 114 | content_loss = ContentLoss(target) 115 | model.add_module(f'content_loss_{i}', content_loss) 116 | content_losses.append(content_loss) 117 | num_loss += 1 118 | 119 | if name in style_layers: 120 | target_feature = model(style_img) 121 | style_loss = StyleLoss(target_feature) 122 | model.add_module(f'style_loss_{i}', style_loss) 123 | style_losses.append(style_loss) 124 | num_loss += 1 125 | 126 | if num_loss >= expected_num_loss: 127 | break 128 | 129 | return model, content_losses, style_losses 130 | 131 | 132 | input_img = torch.randn(1, 3, *img_size, device=device) 133 | model, content_losses, style_losses = get_model_and_losses( 134 | content_img, style_img, default_content_layers, default_style_layers) 135 | 136 | input_img.requires_grad_(True) 137 | model.requires_grad_(False) 138 | 139 | optimizer = optim.LBFGS([input_img]) 140 | steps = 0 141 | prev_loss = 0 142 | while steps <= 1000 and prev_loss < 100: 143 | 144 | def closure(): 145 | with torch.no_grad(): 146 | input_img.clamp_(0, 1) 147 | global steps 148 | global prev_loss 149 | optimizer.zero_grad() 150 | model(input_img) 151 | content_loss = 0 152 | style_loss = 0 153 | for ls in content_losses: 154 | content_loss += ls.loss 155 | for ls in style_losses: 156 | style_loss += ls.loss 157 | loss = content_weight * content_loss + style_weight * style_loss 158 | loss.backward() 159 | steps += 1 160 | if steps % 50 == 0: 161 | print(f'Step {steps}:') 162 | print(f'Loss: {loss}') 163 | save_image(input_img, f'work_dirs/output_{steps}.jpg') 164 | prev_loss = loss 165 | return loss 166 | 167 | optimizer.step(closure) 168 | with torch.no_grad(): 169 | input_img.clamp_(0, 1) 170 | save_image(input_img, 'work_dirs/output.jpg') 171 | -------------------------------------------------------------------------------- /dldemos/Transformer/data_load.py: -------------------------------------------------------------------------------- 1 | # Modify from 2 | # https://github.com/P3n9W31/transformer-pytorch/master/data_load.py 3 | 4 | import codecs 5 | import os 6 | import random 7 | 8 | import numpy as np 9 | import regex 10 | import requests 11 | 12 | # Words whose occurred less than min_cnt are encoded as . 13 | min_cnt = 0 14 | # Maximum number of words in a sentence. 15 | maxlen = 50 16 | 17 | source_train = 'dldemos/Transformer/data/cn.txt' 18 | target_train = 'dldemos/Transformer/data/en.txt' 19 | source_test = 'dldemos/Transformer/data/cn.test.txt' 20 | target_test = 'dldemos/Transformer/data/en.test.txt' 21 | 22 | 23 | def load_vocab(language): 24 | assert language in ['cn', 'en'] 25 | vocab = [ 26 | line.split()[0] for line in codecs.open( 27 | 'dldemos/Transformer/data/{}.txt.vocab.tsv'.format(language), 'r', 28 | 'utf-8').read().splitlines() if int(line.split()[1]) >= min_cnt 29 | ] 30 | word2idx = {word: idx for idx, word in enumerate(vocab)} 31 | idx2word = {idx: word for idx, word in enumerate(vocab)} 32 | return word2idx, idx2word 33 | 34 | 35 | def load_cn_vocab(): 36 | word2idx, idx2word = load_vocab('cn') 37 | return word2idx, idx2word 38 | 39 | 40 | def load_en_vocab(): 41 | word2idx, idx2word = load_vocab('en') 42 | return word2idx, idx2word 43 | 44 | 45 | def create_data(source_sents, target_sents): 46 | cn2idx, idx2cn = load_cn_vocab() 47 | en2idx, idx2en = load_en_vocab() 48 | 49 | # Index 50 | x_list, y_list, Sources, Targets = [], [], [], [] 51 | for source_sent, target_sent in zip(source_sents, target_sents): 52 | x = [ 53 | cn2idx.get(word, 1) 54 | for word in (' ' + source_sent + ' ').split() 55 | ] # 1: OOV, : End of Text 56 | y = [ 57 | en2idx.get(word, 1) 58 | for word in (' ' + target_sent + ' ').split() 59 | ] 60 | if max(len(x), len(y)) <= maxlen: 61 | x_list.append(np.array(x)) 62 | y_list.append(np.array(y)) 63 | Sources.append(source_sent) 64 | Targets.append(target_sent) 65 | 66 | # Pad 67 | X = np.zeros([len(x_list), maxlen], np.int32) 68 | Y = np.zeros([len(y_list), maxlen], np.int32) 69 | for i, (x, y) in enumerate(zip(x_list, y_list)): 70 | X[i] = np.lib.pad(x, [0, maxlen - len(x)], 71 | 'constant', 72 | constant_values=(0, 0)) 73 | Y[i] = np.lib.pad(y, [0, maxlen - len(y)], 74 | 'constant', 75 | constant_values=(0, 0)) 76 | 77 | return X, Y, Sources, Targets 78 | 79 | 80 | def load_data(data_type): 81 | if data_type == 'train': 82 | source, target = source_train, target_train 83 | elif data_type == 'test': 84 | source, target = source_test, target_test 85 | assert data_type in ['train', 'test'] 86 | cn_sents = [ 87 | regex.sub("[^\s\p{L}']", '', line) # noqa W605 88 | for line in codecs.open(source, 'r', 'utf-8').read().split('\n') 89 | if line and line[0] != '<' 90 | ] 91 | en_sents = [ 92 | regex.sub("[^\s\p{L}']", '', line) # noqa W605 93 | for line in codecs.open(target, 'r', 'utf-8').read().split('\n') 94 | if line and line[0] != '<' 95 | ] 96 | 97 | X, Y, Sources, Targets = create_data(cn_sents, en_sents) 98 | return X, Y, Sources, Targets 99 | 100 | 101 | def load_train_data(): 102 | X, Y, _, _ = load_data('train') 103 | return X, Y 104 | 105 | 106 | def load_test_data(): 107 | X, Y, _, _ = load_data('test') 108 | return X, Y 109 | 110 | 111 | def get_batch_indices(total_length, batch_size): 112 | assert (batch_size <= 113 | total_length), ('Batch size is large than total data length.' 114 | 'Check your data or change batch size.') 115 | current_index = 0 116 | indexs = [i for i in range(total_length)] 117 | random.shuffle(indexs) 118 | while 1: 119 | if current_index + batch_size >= total_length: 120 | break 121 | current_index += batch_size 122 | yield indexs[current_index:current_index + batch_size], current_index 123 | 124 | 125 | def idx_to_sentence(arr, vocab, insert_space=False): 126 | res = '' 127 | first_word = True 128 | for id in arr: 129 | word = vocab[id.item()] 130 | 131 | if insert_space and not first_word: 132 | res += ' ' 133 | first_word = False 134 | 135 | res += word 136 | 137 | return res 138 | 139 | 140 | def download(url, dir, name=None): 141 | os.makedirs(dir, exist_ok=True) 142 | if name is None: 143 | name = url.split('/')[-1] 144 | path = os.path.join(dir, name) 145 | if not os.path.exists(path): 146 | print(f'Install {name} ...') 147 | open(path, 'wb').write(requests.get(url).content) 148 | print('Install successfully.') 149 | 150 | 151 | def download_data(): 152 | data_dir = 'dldemos/Transformer/data' 153 | urls = [('https://raw.githubusercontent.com/P3n9W31/transformer-pytorch/' 154 | 'master/corpora/cn.txt'), 155 | ('https://raw.githubusercontent.com/P3n9W31/transformer-pytorch/' 156 | 'master/corpora/en.txt'), 157 | ('https://raw.githubusercontent.com/P3n9W31/transformer-pytorch/' 158 | 'master/preprocessed/cn.txt.vocab.tsv'), 159 | ('https://raw.githubusercontent.com/P3n9W31/transformer-pytorch/' 160 | 'master/preprocessed/en.txt.vocab.tsv')] 161 | for url in urls: 162 | download(url, data_dir) 163 | 164 | 165 | if __name__ == '__main__': 166 | download_data() 167 | -------------------------------------------------------------------------------- /dldemos/Transformer/outdated/dataset.py: -------------------------------------------------------------------------------- 1 | import json 2 | from collections import Counter 3 | 4 | import numpy as np 5 | from torchtext.data import get_tokenizer 6 | 7 | SOS_ID = 0 8 | EOS_ID = 1 9 | UNK_ID = 2 10 | PAD_ID = 3 11 | MAX_SEQ_LEN = 200 12 | 13 | 14 | def read_file(json_path): 15 | import jieba 16 | english_sentences = [] 17 | chinese_sentences = [] 18 | tokenizer = get_tokenizer('basic_english') 19 | with open(json_path, 'r') as fp: 20 | for line in fp: 21 | line = json.loads(line) 22 | english, chinese = line['english'], line['chinese'] 23 | # Correct mislabeled data 24 | if not english.isascii(): 25 | english, chinese = chinese, english 26 | # Tokenize 27 | english = tokenizer(english) 28 | chinese = list(jieba.cut(chinese)) 29 | chinese = [x for x in chinese if x not in {' ', '\t'}] 30 | english_sentences.append(english) 31 | chinese_sentences.append(chinese) 32 | return english_sentences, chinese_sentences 33 | 34 | 35 | def create_vocab(sentences, max_element=None): 36 | """Note that max_element includes special characters.""" 37 | 38 | default_list = ['', '', '', ''] 39 | 40 | char_set = Counter() 41 | for sentence in sentences: 42 | c_set = Counter(sentence) 43 | char_set.update(c_set) 44 | 45 | if max_element is None: 46 | return default_list + list(char_set.keys()) 47 | else: 48 | max_element -= 4 49 | words_freq = char_set.most_common(max_element) 50 | # pair array to double array 51 | words, freq = zip(*words_freq) 52 | return default_list + list(words) 53 | 54 | 55 | def sentence_to_tensor(sentences, vocab): 56 | vocab_map = {k: i for i, k in enumerate(vocab)} 57 | 58 | def process_word(word): 59 | return vocab_map.get(word, UNK_ID) 60 | 61 | res = [] 62 | for sentence in sentences: 63 | sentence = np.array(list(map(process_word, sentence)), dtype=np.int32) 64 | res.append(sentence) 65 | 66 | return np.array(res, dtype=object) 67 | 68 | 69 | def tensor_to_sentence(tensor, mapping, insert_space=False): 70 | res = '' 71 | first_word = True 72 | for id in tensor: 73 | word = mapping[int(id.item())] 74 | 75 | if insert_space and not first_word: 76 | res += ' ' 77 | first_word = False 78 | 79 | res += word 80 | 81 | return res 82 | 83 | 84 | def main(): 85 | en_sens, zh_sens = read_file( 86 | 'data/translation2019zh/translation2019zh_valid.json') 87 | print(*en_sens[0:3]) 88 | print(*zh_sens[0:3]) 89 | en_vocab = create_vocab(en_sens, 10000) 90 | zh_vocab = create_vocab(zh_sens, 30000) 91 | print(list(en_vocab)[0:10]) 92 | print(list(zh_vocab)[0:10]) 93 | # np.save('data/translation2019zh/en_vocab.npy', en_vocab) 94 | # np.save('data/translation2019zh/zh_vocab.npy', zh_vocab) 95 | 96 | # en_vocab = np.load('data/translation2019zh/en_dict.npy') 97 | # zh_vocab = np.load('data/translation2019zh/zh_dict.npy') 98 | 99 | en_tensors = sentence_to_tensor(en_sens, en_vocab) 100 | zh_tensors = sentence_to_tensor(zh_sens, zh_vocab) 101 | 102 | print(tensor_to_sentence(en_tensors[0], en_vocab, True)) 103 | print(tensor_to_sentence(zh_tensors[0], zh_vocab)) 104 | 105 | # np.save('data/translation2019zh/en_sentences.npy', en_tensors) 106 | # np.save('data/translation2019zh/zh_sentences.npy', zh_tensors) 107 | 108 | # en_tensors = np.load('data/translation2019zh/en_sentences.npy', 109 | # allow_pickle=True) 110 | # zh_tensors = np.load('data/translation2019zh/zh_sentences.npy', 111 | # allow_pickle=True) 112 | 113 | 114 | if __name__ == '__main__': 115 | main() 116 | -------------------------------------------------------------------------------- /dldemos/Transformer/outdated/dist_train.py: -------------------------------------------------------------------------------- 1 | # import os 2 | # import time 3 | 4 | # import torch 5 | # import torch.distributed as dist 6 | # import torch.nn as nn 7 | # from torch.nn.parallel import DistributedDataParallel 8 | 9 | # from dldemos.Transformer.model import Transformer 10 | # from dldemos.Transformer.preprocess_data import (PAD_ID, get_dataloader, 11 | # load_sentences, load_vocab) 12 | 13 | # # Config 14 | # batch_size = 64 15 | # lr = 0.0001 16 | # d_model = 512 17 | # d_ff = 2048 18 | # n_layers = 6 19 | # heads = 8 20 | 21 | # n_epochs = 40 22 | 23 | # def reduce_mean(tensor, nprocs): 24 | # rt = tensor.clone() 25 | # dist.all_reduce(rt, op=dist.ReduceOp.SUM) 26 | # rt /= nprocs 27 | # return rt 28 | 29 | # def main(): 30 | # dist.init_process_group('nccl') 31 | # rank = dist.get_rank() 32 | # device_id = rank % torch.cuda.device_count() 33 | 34 | # en_vocab, zh_vocab = load_vocab() 35 | 36 | # en_train, zh_train, en_valid, zh_valid = load_sentences() 37 | # dataloader_train, sampler = get_dataloader(en_train, zh_train, 38 | # batch_size, 39 | # True) 40 | # dataloader_valid = get_dataloader(en_valid, zh_valid) 41 | 42 | # print_interval = 1000 43 | 44 | # model = Transformer(len(en_vocab), len(zh_vocab), PAD_ID, d_model, d_ff, 45 | # n_layers, heads) 46 | # model.to(device_id) 47 | 48 | # model = DistributedDataParallel(model, device_ids=[device_id]) 49 | # optimizer = torch.optim.Adam(model.parameters(), lr) 50 | 51 | # # Optional: load model 52 | # ckpt_path = 'dldemos/Transformer/model_latest.pth' 53 | # optim_path = 'dldemos/Transformer/optimizer_latest.pth' 54 | # if os.path.exists(ckpt_path) and os.path.exists(optim_path): 55 | # map_location = {'cuda:0': f'cuda:{device_id}'} 56 | # state_dict = torch.load(ckpt_path, map_location=map_location) 57 | # model.module.load_state_dict(state_dict) 58 | # state_dict = torch.load(optim_path, map_location=map_location) 59 | # optimizer.load_state_dict(state_dict) 60 | # begin_epoch = int( 61 | # os.path.split( 62 | # os.readlink(ckpt_path))[-1].split('.')[0].split('_')[1]) + 1 63 | # else: 64 | # begin_epoch = 0 65 | 66 | # citerion = nn.CrossEntropyLoss(ignore_index=PAD_ID) 67 | # tic = time.time() 68 | # cnter = 0 69 | # dataset_len = len(dataloader_train.dataset) 70 | # if device_id == 0: 71 | # print('Dataset size:', dataset_len) 72 | # for epoch in range(begin_epoch, n_epochs): 73 | # sampler.set_epoch(epoch) 74 | 75 | # for x, y in dataloader_train: 76 | # x, y = x.to(device_id), y.to(device_id) 77 | # x_mask = x == PAD_ID 78 | # y_mask = y == PAD_ID 79 | # y_input = y[:, :-1] 80 | # y_label = y[:, 1:] 81 | # y_mask = y_mask[:, :-1] 82 | # y_hat = model(x, y_input, x_mask, y_mask) 83 | # n, seq_len = y_label.shape 84 | # y_hat = torch.reshape(y_hat, (n * seq_len, -1)) 85 | # y_label = torch.reshape(y_label, (n * seq_len, )) 86 | # loss = citerion(y_hat, y_label) 87 | 88 | # y_label_mask = y_label != PAD_ID 89 | # preds = torch.argmax(y_hat, -1) 90 | # correct = preds == y_label 91 | # acc = torch.sum(y_label_mask * correct) / torch.sum(y_label_mask) 92 | 93 | # optimizer.zero_grad() 94 | # loss.backward() 95 | # torch.nn.utils.clip_grad_norm_(model.parameters(), 1) 96 | # optimizer.step() 97 | # loss = reduce_mean(loss, dist.get_world_size()) 98 | # if device_id == 0: 99 | # toc = time.time() 100 | # interval = toc - tic 101 | # minutes = int(interval // 60) 102 | # seconds = int(interval % 60) 103 | # if cnter % print_interval == 0: 104 | # print(f'{cnter:08d} {minutes:02d}:{seconds:02d}' 105 | # f' loss: {loss.item()} acc: {acc.item()}') 106 | # cnter += 1 107 | 108 | # if device_id == 0: 109 | # latest_model = 'dldemos/Transformer/model_latest.pth' 110 | # latest_optimizer = 'dldemos/Transformer/optimizer_latest.pth' 111 | # model_file = f'dldemos/Transformer/model_{epoch}.pth' 112 | # optim_file = f'dldemos/Transformer/optimizer_{epoch}.pth' 113 | # torch.save(model.module.state_dict(), model_file) 114 | # torch.save(optimizer.state_dict(), optim_file) 115 | 116 | # if os.path.exists(latest_model): 117 | # os.remove(latest_model) 118 | # if os.path.exists(latest_optimizer): 119 | # os.remove(latest_optimizer) 120 | 121 | # os.symlink(os.path.abspath(model_file), latest_model) 122 | # os.symlink(os.path.abspath(optim_file), latest_optimizer) 123 | 124 | # print(f'Model saved to {model_file}') 125 | 126 | # dist.barrier() 127 | 128 | # # if valid_period 129 | 130 | # print('Done.') 131 | 132 | # dist.destroy_process_group() 133 | 134 | # if __name__ == '__main__': 135 | # main() 136 | 137 | # # nohup bash dldemos/Transformer/dist_train.sh & 138 | -------------------------------------------------------------------------------- /dldemos/Transformer/outdated/dist_train.sh: -------------------------------------------------------------------------------- 1 | torchrun --nproc_per_node=2 dldemos/Transformer/dist_train.py 2 | -------------------------------------------------------------------------------- /dldemos/Transformer/outdated/preprocess_data.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from torch.nn.utils.rnn import pad_sequence 4 | from torch.utils.data import DataLoader, Dataset 5 | from torch.utils.data.distributed import DistributedSampler 6 | 7 | from dldemos.Transformer.dataset import (EOS_ID, PAD_ID, SOS_ID, create_vocab, 8 | read_file, sentence_to_tensor, 9 | tensor_to_sentence) 10 | 11 | 12 | def load_vocab(filename='data/translation2019zh/vocab_30k_80k.npy'): 13 | vocab = np.load(filename, allow_pickle=True).item() 14 | en_vocab = vocab['en'] 15 | zh_vocab = vocab['zh'] 16 | return en_vocab, zh_vocab 17 | 18 | 19 | def load_sentences(filename='data/translation2019zh/sentences.npy'): 20 | tensors = np.load(filename, allow_pickle=True).item() 21 | en_tensors_train = tensors['en_train'] 22 | zh_tensors_train = tensors['zh_train'] 23 | en_tensors_valid = tensors['en_valid'] 24 | zh_tensors_valid = tensors['zh_valid'] 25 | return (en_tensors_train, zh_tensors_train, en_tensors_valid, 26 | zh_tensors_valid) 27 | 28 | 29 | class TranslationDataset(Dataset): 30 | 31 | def __init__(self, en_tensor: np.ndarray, zh_tensor: np.ndarray): 32 | super().__init__() 33 | assert len(en_tensor) == len(zh_tensor) 34 | self.length = len(en_tensor) 35 | self.en_tensor = en_tensor 36 | self.zh_tensor = zh_tensor 37 | 38 | def __len__(self): 39 | return self.length 40 | 41 | def __getitem__(self, index): 42 | x = np.concatenate(([SOS_ID], self.en_tensor[index], [EOS_ID])) 43 | x = torch.from_numpy(x) 44 | y = np.concatenate(([SOS_ID], self.zh_tensor[index], [EOS_ID])) 45 | y = torch.from_numpy(y) 46 | return x, y 47 | 48 | 49 | def get_dataloader(en_tensor: np.ndarray, 50 | zh_tensor: np.ndarray, 51 | batch_size=16, 52 | dist_train=False): 53 | 54 | def collate_fn(batch): 55 | x, y = zip(*batch) 56 | x_pad = pad_sequence(x, batch_first=True, padding_value=PAD_ID) 57 | y_pad = pad_sequence(y, batch_first=True, padding_value=PAD_ID) 58 | 59 | return x_pad, y_pad 60 | 61 | dataset = TranslationDataset(en_tensor, zh_tensor) 62 | if dist_train: 63 | sampler = DistributedSampler(dataset) 64 | dataloader = DataLoader(dataset, 65 | batch_size=batch_size, 66 | sampler=sampler, 67 | collate_fn=collate_fn) 68 | return dataloader, sampler 69 | else: 70 | dataloader = DataLoader(dataset, 71 | batch_size=batch_size, 72 | shuffle=True, 73 | collate_fn=collate_fn) 74 | return dataloader 75 | 76 | 77 | def test1(): 78 | # en_sens_train, zh_sens_train = read_file( 79 | # 'data/translation2019zh/translation2019zh_train.json') 80 | en_sens_valid, zh_sens_valid = read_file( 81 | 'data/translation2019zh/translation2019zh_valid.json') 82 | en_vocab = create_vocab(en_sens_valid, 10000) 83 | zh_vocab = create_vocab(zh_sens_valid, 30000) 84 | 85 | en_tensors_valid = sentence_to_tensor(en_sens_valid, en_vocab) 86 | zh_tensors_valid = sentence_to_tensor(zh_sens_valid, zh_vocab) 87 | print(tensor_to_sentence(en_tensors_valid[1], en_vocab, True)) 88 | print(tensor_to_sentence(zh_tensors_valid[1], zh_vocab)) 89 | ds = TranslationDataset(en_tensors_valid, zh_tensors_valid) 90 | print(tensor_to_sentence(ds[1][0], en_vocab, True)) 91 | print(tensor_to_sentence(ds[1][1], zh_vocab)) 92 | dl = get_dataloader(en_tensors_valid, zh_tensors_valid) 93 | e, z = next(iter(dl)) 94 | print(tensor_to_sentence(e[0], en_vocab, True)) 95 | print(tensor_to_sentence(z[0], zh_vocab)) 96 | 97 | 98 | def test2(): 99 | en_vocab, zh_vocab = load_vocab() 100 | 101 | en_train, zh_train, en_valid, zh_valid = load_sentences() 102 | dataloader_train = get_dataloader(en_train, zh_train) 103 | dataloader_valid = get_dataloader(en_valid, zh_valid) 104 | 105 | en_batch, zh_batch = next(iter(dataloader_train)) 106 | print(tensor_to_sentence(en_batch[2], en_vocab, True)) 107 | print(tensor_to_sentence(zh_batch[2], zh_vocab, False)) 108 | 109 | en_batch, zh_batch = next(iter(dataloader_valid)) 110 | print(tensor_to_sentence(en_batch[2], en_vocab, True)) 111 | print(tensor_to_sentence(zh_batch[2], zh_vocab, False)) 112 | 113 | 114 | def main(): 115 | 116 | en_sens_train, zh_sens_train = read_file( 117 | 'data/translation2019zh/translation2019zh_train.json') 118 | en_sens_valid, zh_sens_valid = read_file( 119 | 'data/translation2019zh/translation2019zh_valid.json') 120 | en_vocab = create_vocab(en_sens_train, 30000) 121 | zh_vocab = create_vocab(zh_sens_train, 80000) 122 | vocab = {'en': en_vocab, 'zh': zh_vocab} 123 | np.save('data/translation2019zh/vocab_30k_80k.npy', vocab) 124 | 125 | en_tensors_train = sentence_to_tensor(en_sens_train, en_vocab) 126 | zh_tensors_train = sentence_to_tensor(zh_sens_train, zh_vocab) 127 | en_tensors_valid = sentence_to_tensor(en_sens_valid, en_vocab) 128 | zh_tensors_valid = sentence_to_tensor(zh_sens_valid, zh_vocab) 129 | tensors = { 130 | 'en_train': en_tensors_train, 131 | 'zh_train': zh_tensors_train, 132 | 'en_valid': en_tensors_valid, 133 | 'zh_valid': zh_tensors_valid 134 | } 135 | np.save('data/translation2019zh/sentences.npy', tensors) 136 | 137 | 138 | if __name__ == '__main__': 139 | # test1() 140 | # test2() 141 | main() 142 | -------------------------------------------------------------------------------- /dldemos/Transformer/outdated/test.py: -------------------------------------------------------------------------------- 1 | # import torch 2 | 3 | # from dldemos.Transformer.dataset import MAX_SEQ_LEN, tensor_to_sentence 4 | # from dldemos.Transformer.model import Transformer 5 | # from dldemos.Transformer.outdated.preprocess_data import (EOS_ID, PAD_ID, 6 | # SOS_ID, 7 | # get_dataloader, 8 | # load_sentences, 9 | # load_vocab) 10 | 11 | # # Config 12 | # batch_size = 64 13 | # lr = 0.0001 14 | # d_model = 512 15 | # d_ff = 2048 16 | # n_layers = 6 17 | # heads = 8 18 | 19 | # def main(): 20 | # model_path = 'dldemos/Transformer/model_latest.pth' 21 | 22 | # device = 'cuda' 23 | # en_vocab, zh_vocab = load_vocab() 24 | 25 | # en_train, zh_train, en_valid, zh_valid = load_sentences() 26 | # dataloader_valid = get_dataloader(en_train, zh_train, 1) 27 | 28 | # model = Transformer(len(en_vocab), 29 | # len(zh_vocab), 30 | # PAD_ID, 31 | # d_model, 32 | # d_ff, 33 | # n_layers, 34 | # heads, 35 | # max_seq_len=MAX_SEQ_LEN) 36 | # model.to(device) 37 | # model.load_state_dict(torch.load(model_path)) 38 | 39 | # cnt = 0 40 | # for x, y in dataloader_valid: 41 | # x, y = x.to(device), y.to(device) 42 | # x_mask = x == PAD_ID 43 | # n = x.shape[0] 44 | # sample = torch.ones(n, MAX_SEQ_LEN, 45 | # dtype=torch.long).to(device) * PAD_ID 46 | # sample[:, 0] = SOS_ID 47 | # print(tensor_to_sentence(x[0], en_vocab, True)) 48 | # print(tensor_to_sentence(y[0], zh_vocab)) 49 | # for i in range(50): 50 | # sample_mask = sample == PAD_ID 51 | # y_predict = model(x, sample, x_mask, sample_mask) 52 | # y_predict = y_predict[:, i] 53 | # prob_dist = torch.softmax(y_predict, 1) 54 | # #new_word = torch.multinomial(prob_dist, 1) 55 | # _, new_word = torch.max(prob_dist, 1) 56 | # sample[:, i + 1] = new_word 57 | # print(tensor_to_sentence(sample[0], zh_vocab)) 58 | # cnt += 1 59 | # if cnt == 5: 60 | # break 61 | 62 | # print('Done.') 63 | 64 | # if __name__ == '__main__': 65 | # main() 66 | -------------------------------------------------------------------------------- /dldemos/Transformer/outdated/train.py: -------------------------------------------------------------------------------- 1 | # import time 2 | 3 | # import numpy as np 4 | # import torch 5 | # import torch.nn as nn 6 | 7 | # from dldemos.Transformer.dataset import tensor_to_sentence 8 | # from dldemos.Transformer.model import Transformer 9 | # from dldemos.Transformer.preprocess_data import (PAD_ID, get_dataloader, 10 | # load_sentences, load_vocab) 11 | 12 | # # Config 13 | # batch_size = 64 14 | # lr = 0.0001 15 | # d_model = 512 16 | # d_ff = 1024 17 | # n_layers = 6 18 | # heads = 8 19 | 20 | # def main(): 21 | # en_vocab, zh_vocab = load_vocab() 22 | 23 | # en_train, zh_train, en_valid, zh_valid = load_sentences() 24 | # dataloader_train = get_dataloader(en_train, zh_train, batch_size) 25 | 26 | # print_interval = 1000 27 | # device_id = 0 28 | 29 | # model = Transformer(len(en_vocab), len(zh_vocab), d_model, d_ff, 30 | # n_layers, 31 | # heads) 32 | # model.to(device_id) 33 | 34 | # model.init_weights() 35 | 36 | # optimizer = torch.optim.Adam(model.parameters(), lr) 37 | # citerion = nn.CrossEntropyLoss(ignore_index=PAD_ID) 38 | # tic = time.time() 39 | # cnter = 0 40 | # dataset_len = len(dataloader_train.dataset) 41 | # print('Dataset size:', dataset_len) 42 | # for epoch in range(10): 43 | # loss_sum = 0 44 | 45 | # for x, y in dataloader_train: 46 | # x, y = x.to(device_id), y.to(device_id) 47 | # x_mask = x == PAD_ID 48 | # y_mask = y == PAD_ID 49 | # y_input = y[:, :-1] 50 | # y_label = y[:, 1:] 51 | # y_mask = y_mask[:, :-1] 52 | # y_hat = model(x, y_input, x_mask, y_mask) 53 | # n, seq_len = y_label.shape 54 | # y_hat = torch.reshape(y_hat, (n * seq_len, -1)) 55 | # y_label = torch.reshape(y_label, (n * seq_len, )) 56 | # loss = citerion(y_hat, y_label) 57 | 58 | # optimizer.zero_grad() 59 | # loss.backward() 60 | # torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5) 61 | # optimizer.step() 62 | 63 | # loss_sum += loss.item() 64 | 65 | # toc = time.time() 66 | # interval = toc - tic 67 | # minutes = int(interval // 60) 68 | # seconds = int(interval % 60) 69 | # if cnter % print_interval == 0: 70 | # print(f'{cnter:08d} {minutes:02d}:{seconds:02d}' 71 | # f' loss: {loss.item()}') 72 | # cnter += 1 73 | 74 | # print(f'Epoch {epoch}. loss: {loss_sum / dataset_len}') 75 | 76 | # torch.save(model.state_dict(), 'dldemos/Transformer/model.pth') 77 | # print('Done.') 78 | -------------------------------------------------------------------------------- /dldemos/Transformer/train.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | import torch 4 | import torch.nn as nn 5 | 6 | from dldemos.Transformer.data_load import (get_batch_indices, load_cn_vocab, 7 | load_en_vocab, load_train_data, 8 | maxlen) 9 | from dldemos.Transformer.model import Transformer 10 | 11 | # Config 12 | batch_size = 64 13 | lr = 0.0001 14 | d_model = 512 15 | d_ff = 2048 16 | n_layers = 6 17 | heads = 8 18 | dropout_rate = 0.2 19 | n_epochs = 60 20 | PAD_ID = 0 21 | 22 | 23 | def main(): 24 | device = 'cuda' 25 | cn2idx, idx2cn = load_cn_vocab() 26 | en2idx, idx2en = load_en_vocab() 27 | # X: en 28 | # Y: cn 29 | Y, X = load_train_data() 30 | 31 | print_interval = 100 32 | 33 | model = Transformer(len(en2idx), len(cn2idx), PAD_ID, d_model, d_ff, 34 | n_layers, heads, dropout_rate, maxlen) 35 | model.to(device) 36 | 37 | optimizer = torch.optim.Adam(model.parameters(), lr) 38 | 39 | citerion = nn.CrossEntropyLoss(ignore_index=PAD_ID) 40 | tic = time.time() 41 | cnter = 0 42 | for epoch in range(n_epochs): 43 | for index, _ in get_batch_indices(len(X), batch_size): 44 | x_batch = torch.LongTensor(X[index]).to(device) 45 | y_batch = torch.LongTensor(Y[index]).to(device) 46 | y_input = y_batch[:, :-1] 47 | y_label = y_batch[:, 1:] 48 | y_hat = model(x_batch, y_input) 49 | 50 | y_label_mask = y_label != PAD_ID 51 | preds = torch.argmax(y_hat, -1) 52 | correct = preds == y_label 53 | acc = torch.sum(y_label_mask * correct) / torch.sum(y_label_mask) 54 | 55 | n, seq_len = y_label.shape 56 | y_hat = torch.reshape(y_hat, (n * seq_len, -1)) 57 | y_label = torch.reshape(y_label, (n * seq_len, )) 58 | loss = citerion(y_hat, y_label) 59 | 60 | optimizer.zero_grad() 61 | loss.backward() 62 | torch.nn.utils.clip_grad_norm_(model.parameters(), 1) 63 | optimizer.step() 64 | 65 | if cnter % print_interval == 0: 66 | toc = time.time() 67 | interval = toc - tic 68 | minutes = int(interval // 60) 69 | seconds = int(interval % 60) 70 | print(f'{cnter:08d} {minutes:02d}:{seconds:02d}' 71 | f' loss: {loss.item()} acc: {acc.item()}') 72 | cnter += 1 73 | 74 | model_path = 'dldemos/Transformer/model.pth' 75 | torch.save(model.state_dict(), model_path) 76 | 77 | print(f'Model saved to {model_path}') 78 | 79 | 80 | if __name__ == '__main__': 81 | main() 82 | -------------------------------------------------------------------------------- /dldemos/Transformer/translate.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from dldemos.Transformer.data_load import (idx_to_sentence, load_cn_vocab, 4 | load_en_vocab, maxlen) 5 | from dldemos.Transformer.model import Transformer 6 | 7 | # Config 8 | batch_size = 1 9 | lr = 0.0001 10 | d_model = 512 11 | d_ff = 2048 12 | n_layers = 6 13 | heads = 8 14 | dropout_rate = 0.2 15 | n_epochs = 60 16 | 17 | PAD_ID = 0 18 | 19 | 20 | def main(): 21 | device = 'cuda' 22 | cn2idx, idx2cn = load_cn_vocab() 23 | en2idx, idx2en = load_en_vocab() 24 | 25 | model = Transformer(len(en2idx), len(cn2idx), 0, d_model, d_ff, n_layers, 26 | heads, dropout_rate, maxlen) 27 | model.to(device) 28 | model.eval() 29 | 30 | model_path = 'dldemos/Transformer/model.pth' 31 | model.load_state_dict(torch.load(model_path)) 32 | 33 | my_input = ['we', 'should', 'protect', 'environment'] 34 | x_batch = torch.LongTensor([[en2idx[x] for x in my_input]]).to(device) 35 | 36 | cn_sentence = idx_to_sentence(x_batch[0], idx2en, True) 37 | print(cn_sentence) 38 | 39 | y_input = torch.ones(batch_size, maxlen, 40 | dtype=torch.long).to(device) * PAD_ID 41 | y_input[0] = en2idx[''] 42 | # y_input = y_batch 43 | with torch.no_grad(): 44 | for i in range(1, y_input.shape[1]): 45 | y_hat = model(x_batch, y_input) 46 | for j in range(batch_size): 47 | y_input[j, i] = torch.argmax(y_hat[j, i - 1]) 48 | output_sentence = idx_to_sentence(y_input[0], idx2cn, True) 49 | print(output_sentence) 50 | 51 | 52 | if __name__ == '__main__': 53 | main() 54 | -------------------------------------------------------------------------------- /dldemos/VAE/README.md: -------------------------------------------------------------------------------- 1 | 1. Download [CelebA](https://mmlab.ie.cuhk.edu.hk/projects/CelebA.html) Align&Cropped Images. 2 | 2. Modify the path of function `get_dataloader` in `main.py`. 3 | 3. Run `main.py`. You can choose whether to use `train`, `reconstruct`, and `generate` functions. 4 | 5 | Acknowledgement: The codes are inspried by [PyTorch-VAE](https://github.com/AntixK/PyTorch-VAE). 6 | -------------------------------------------------------------------------------- /dldemos/VAE/load_celebA.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import torch 4 | from PIL import Image 5 | from torch.utils.data import DataLoader, Dataset 6 | from torchvision import transforms 7 | 8 | 9 | class CelebADataset(Dataset): 10 | 11 | def __init__(self, root, img_shape=(64, 64)) -> None: 12 | super().__init__() 13 | self.root = root 14 | self.img_shape = img_shape 15 | self.filenames = sorted(os.listdir(root)) 16 | 17 | def __len__(self) -> int: 18 | return len(self.filenames) 19 | 20 | def __getitem__(self, index: int): 21 | path = os.path.join(self.root, self.filenames[index]) 22 | img = Image.open(path).convert('RGB') 23 | pipeline = transforms.Compose([ 24 | transforms.CenterCrop(168), 25 | transforms.Resize(self.img_shape), 26 | transforms.ToTensor() 27 | ]) 28 | return pipeline(img) 29 | 30 | 31 | def get_dataloader(root='data/celebA/img_align_celeba', **kwargs): 32 | dataset = CelebADataset(root, **kwargs) 33 | return DataLoader(dataset, 16, shuffle=True) 34 | 35 | 36 | if __name__ == '__main__': 37 | dataloader = get_dataloader() 38 | img = next(iter(dataloader)) 39 | print(img.shape) 40 | # Concat 4x4 images 41 | N, C, H, W = img.shape 42 | assert N == 16 43 | img = torch.permute(img, (1, 0, 2, 3)) 44 | img = torch.reshape(img, (C, 4, 4 * H, W)) 45 | img = torch.permute(img, (0, 2, 1, 3)) 46 | img = torch.reshape(img, (C, 4 * H, 4 * W)) 47 | img = transforms.ToPILImage()(img) 48 | img.save('work_dirs/tmp.jpg') 49 | -------------------------------------------------------------------------------- /dldemos/VAE/main.py: -------------------------------------------------------------------------------- 1 | from time import time 2 | 3 | import torch 4 | import torch.nn.functional as F 5 | from torchvision.transforms import ToPILImage 6 | 7 | from dldemos.VAE.load_celebA import get_dataloader 8 | from dldemos.VAE.model import VAE 9 | 10 | # Hyperparameters 11 | n_epochs = 10 12 | kl_weight = 0.00025 13 | lr = 0.005 14 | 15 | 16 | def loss_fn(y, y_hat, mean, logvar): 17 | recons_loss = F.mse_loss(y_hat, y) 18 | kl_loss = torch.mean( 19 | -0.5 * torch.sum(1 + logvar - mean**2 - torch.exp(logvar), 1), 0) 20 | loss = recons_loss + kl_loss * kl_weight 21 | return loss 22 | 23 | 24 | def train(device, dataloader, model): 25 | optimizer = torch.optim.Adam(model.parameters(), lr) 26 | dataset_len = len(dataloader.dataset) 27 | 28 | begin_time = time() 29 | # train 30 | for i in range(n_epochs): 31 | loss_sum = 0 32 | for x in dataloader: 33 | x = x.to(device) 34 | y_hat, mean, logvar = model(x) 35 | loss = loss_fn(x, y_hat, mean, logvar) 36 | optimizer.zero_grad() 37 | loss.backward() 38 | optimizer.step() 39 | loss_sum += loss 40 | loss_sum /= dataset_len 41 | training_time = time() - begin_time 42 | minute = int(training_time // 60) 43 | second = int(training_time % 60) 44 | print(f'epoch {i}: loss {loss_sum} {minute}:{second}') 45 | torch.save(model.state_dict(), 'dldemos/VAE/model.pth') 46 | 47 | 48 | def reconstruct(device, dataloader, model): 49 | model.eval() 50 | batch = next(iter(dataloader)) 51 | x = batch[0:1, ...].to(device) 52 | output = model(x)[0] 53 | output = output[0].detach().cpu() 54 | input = batch[0].detach().cpu() 55 | combined = torch.cat((output, input), 1) 56 | img = ToPILImage()(combined) 57 | img.save('work_dirs/tmp.jpg') 58 | 59 | 60 | def generate(device, model): 61 | model.eval() 62 | output = model.sample(device) 63 | output = output[0].detach().cpu() 64 | img = ToPILImage()(output) 65 | img.save('work_dirs/tmp.jpg') 66 | 67 | 68 | def main(): 69 | device = 'cuda:0' 70 | dataloader = get_dataloader() 71 | 72 | model = VAE().to(device) 73 | 74 | # If you obtain the ckpt, load it 75 | model.load_state_dict(torch.load('dldemos/VAE/model.pth', 'cuda:0')) 76 | 77 | # Choose the function 78 | train(device, dataloader, model) 79 | reconstruct(device, dataloader, model) 80 | generate(device, model) 81 | 82 | 83 | if __name__ == '__main__': 84 | main() 85 | -------------------------------------------------------------------------------- /dldemos/VAE/model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class VAE(nn.Module): 6 | """VAE for 64x64 face generation. 7 | 8 | The hidden dimensions can be tuned. 9 | """ 10 | 11 | def __init__(self, hiddens=[16, 32, 64, 128, 256], latent_dim=128) -> None: 12 | super().__init__() 13 | 14 | # encoder 15 | prev_channels = 3 16 | modules = [] 17 | img_length = 64 18 | for cur_channels in hiddens: 19 | modules.append( 20 | nn.Sequential( 21 | nn.Conv2d(prev_channels, 22 | cur_channels, 23 | kernel_size=3, 24 | stride=2, 25 | padding=1), nn.BatchNorm2d(cur_channels), 26 | nn.ReLU())) 27 | prev_channels = cur_channels 28 | img_length //= 2 29 | self.encoder = nn.Sequential(*modules) 30 | self.mean_linear = nn.Linear(prev_channels * img_length * img_length, 31 | latent_dim) 32 | self.var_linear = nn.Linear(prev_channels * img_length * img_length, 33 | latent_dim) 34 | self.latent_dim = latent_dim 35 | # decoder 36 | modules = [] 37 | self.decoder_projection = nn.Linear( 38 | latent_dim, prev_channels * img_length * img_length) 39 | self.decoder_input_chw = (prev_channels, img_length, img_length) 40 | for i in range(len(hiddens) - 1, 0, -1): 41 | modules.append( 42 | nn.Sequential( 43 | nn.ConvTranspose2d(hiddens[i], 44 | hiddens[i - 1], 45 | kernel_size=3, 46 | stride=2, 47 | padding=1, 48 | output_padding=1), 49 | nn.BatchNorm2d(hiddens[i - 1]), nn.ReLU())) 50 | modules.append( 51 | nn.Sequential( 52 | nn.ConvTranspose2d(hiddens[0], 53 | hiddens[0], 54 | kernel_size=3, 55 | stride=2, 56 | padding=1, 57 | output_padding=1), 58 | nn.BatchNorm2d(hiddens[0]), nn.ReLU(), 59 | nn.Conv2d(hiddens[0], 3, kernel_size=3, stride=1, padding=1), 60 | nn.ReLU())) 61 | self.decoder = nn.Sequential(*modules) 62 | 63 | def forward(self, x): 64 | encoded = self.encoder(x) 65 | encoded = torch.flatten(encoded, 1) 66 | mean = self.mean_linear(encoded) 67 | logvar = self.var_linear(encoded) 68 | eps = torch.randn_like(logvar) 69 | std = torch.exp(logvar / 2) 70 | z = eps * std + mean 71 | x = self.decoder_projection(z) 72 | x = torch.reshape(x, (-1, *self.decoder_input_chw)) 73 | decoded = self.decoder(x) 74 | 75 | return decoded, mean, logvar 76 | 77 | def sample(self, device='cuda'): 78 | z = torch.randn(1, self.latent_dim).to(device) 79 | x = self.decoder_projection(z) 80 | x = torch.reshape(x, (-1, *self.decoder_input_chw)) 81 | decoded = self.decoder(x) 82 | return decoded 83 | -------------------------------------------------------------------------------- /dldemos/VQVAE/configs.py: -------------------------------------------------------------------------------- 1 | mnist_cfg1 = dict(dataset_type='MNIST', 2 | img_shape=(1, 28, 28), 3 | dim=32, 4 | n_embedding=32, 5 | batch_size=256, 6 | n_epochs=20, 7 | l_w_embedding=1, 8 | l_w_commitment=0.25, 9 | lr=2e-4, 10 | n_epochs_2=50, 11 | batch_size_2=256, 12 | pixelcnn_n_blocks=15, 13 | pixelcnn_dim=128, 14 | pixelcnn_linear_dim=32, 15 | vqvae_path='dldemos/VQVAE/model_mnist.pth', 16 | gen_model_path='dldemos/VQVAE/gen_model_mnist.pth') 17 | 18 | celebahq_cfg1 = dict(dataset_type='CelebAHQ', 19 | img_shape=(3, 128, 128), 20 | dim=128, 21 | n_embedding=64, 22 | batch_size=64, 23 | n_epochs=30, 24 | l_w_embedding=1, 25 | l_w_commitment=0.25, 26 | lr=2e-4, 27 | n_epochs_2=200, 28 | batch_size_2=32, 29 | pixelcnn_n_blocks=15, 30 | pixelcnn_dim=384, 31 | pixelcnn_linear_dim=256, 32 | vqvae_path='dldemos/VQVAE/model_celebahq_1.pth', 33 | gen_model_path='dldemos/VQVAE/gen_model_celebahq_1.pth') 34 | 35 | celebahq_cfg2 = dict(dataset_type='CelebAHQ', 36 | img_shape=(3, 128, 128), 37 | dim=128, 38 | n_embedding=128, 39 | batch_size=64, 40 | n_epochs=30, 41 | l_w_embedding=1, 42 | l_w_commitment=0.25, 43 | lr=2e-4, 44 | n_epochs_2=200, 45 | batch_size_2=32, 46 | pixelcnn_n_blocks=15, 47 | pixelcnn_dim=384, 48 | pixelcnn_linear_dim=256, 49 | vqvae_path='dldemos/VQVAE/model_celebahq_2.pth', 50 | gen_model_path='dldemos/VQVAE/gen_model_celebahq_2.pth') 51 | 52 | celebahq_cfg3 = dict(dataset_type='CelebAHQ', 53 | img_shape=(3, 64, 64), 54 | dim=128, 55 | n_embedding=64, 56 | batch_size=64, 57 | n_epochs=20, 58 | l_w_embedding=1, 59 | l_w_commitment=0.25, 60 | lr=2e-4, 61 | n_epochs_2=200, 62 | batch_size_2=32, 63 | pixelcnn_n_blocks=15, 64 | pixelcnn_dim=384, 65 | pixelcnn_linear_dim=256, 66 | vqvae_path='dldemos/VQVAE/model_celebahq_3.pth', 67 | gen_model_path='dldemos/VQVAE/gen_model_celebahq_3.pth') 68 | 69 | celebahq_cfg4 = dict(dataset_type='CelebAHQ', 70 | img_shape=(3, 64, 64), 71 | dim=128, 72 | n_embedding=32, 73 | batch_size=64, 74 | n_epochs=20, 75 | l_w_embedding=1, 76 | l_w_commitment=0.25, 77 | lr=2e-4, 78 | n_epochs_2=100, 79 | batch_size_2=32, 80 | pixelcnn_n_blocks=15, 81 | pixelcnn_dim=384, 82 | pixelcnn_linear_dim=256, 83 | vqvae_path='dldemos/VQVAE/model_celebahq_4.pth', 84 | gen_model_path='dldemos/VQVAE/gen_model_celebahq_4.pth') 85 | 86 | cfgs = [mnist_cfg1, celebahq_cfg1, celebahq_cfg2, celebahq_cfg3, celebahq_cfg4] 87 | 88 | 89 | def get_cfg(id: int): 90 | return cfgs[id] 91 | -------------------------------------------------------------------------------- /dldemos/VQVAE/dataset.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import einops 4 | import torchvision 5 | from PIL import Image 6 | from torch.utils.data import DataLoader, Dataset 7 | from torch.utils.data.distributed import DistributedSampler 8 | from torchvision import transforms 9 | 10 | # Set this tp `True` and run this script to convert dataset to LMDB format 11 | TO_LMDB = False 12 | 13 | CELEBA_DIR = 'data/celebA/img_align_celeba' 14 | CELEBA_LMDB_PATH = 'data/celebA/img_align_celeba.lmdb' 15 | CELEBA_HQ_DIR = 'data/celebA/celeba_hq_256' 16 | CELEBA_HQ_LMDB_PATH = 'data/celebA/celeba_hq_256.lmdb' 17 | 18 | 19 | def download_mnist(): 20 | mnist = torchvision.datasets.MNIST(root='./data/mnist', download=True) 21 | print('length of MNIST', len(mnist)) 22 | id = 4 23 | img, label = mnist[id] 24 | print(img) 25 | print(label) 26 | 27 | # On computer with monitor 28 | # img.show() 29 | 30 | img.save('work_dirs/tmp_mnist.jpg') 31 | tensor = transforms.ToTensor()(img) 32 | print(tensor.shape) 33 | print(tensor.max()) 34 | print(tensor.min()) 35 | 36 | 37 | class CelebADataset(Dataset): 38 | 39 | def __init__(self, root, img_shape=(64, 64)): 40 | super().__init__() 41 | self.root = root 42 | self.img_shape = img_shape 43 | self.filenames = sorted(os.listdir(root)) 44 | 45 | def __len__(self) -> int: 46 | return len(self.filenames) 47 | 48 | def __getitem__(self, index: int): 49 | path = os.path.join(self.root, self.filenames[index]) 50 | img = Image.open(path) 51 | pipeline = transforms.Compose([ 52 | transforms.CenterCrop(168), 53 | transforms.Resize(self.img_shape), 54 | transforms.ToTensor() 55 | ]) 56 | return pipeline(img) 57 | 58 | 59 | if TO_LMDB: 60 | from dldemos.lmdb_loader import ImageFolderLMDB 61 | 62 | class CelebALMDBDataset(ImageFolderLMDB): 63 | 64 | def __init__(self, path, img_shape=(64, 64)): 65 | pipeline = transforms.Compose([ 66 | transforms.CenterCrop(168), 67 | transforms.Resize(img_shape), 68 | transforms.ToTensor() 69 | ]) 70 | super().__init__(path, pipeline) 71 | 72 | 73 | class MNISTImageDataset(Dataset): 74 | 75 | def __init__(self, img_shape=(28, 28)): 76 | super().__init__() 77 | self.img_shape = img_shape 78 | self.mnist = torchvision.datasets.MNIST(root='./data/mnist') 79 | 80 | def __len__(self): 81 | return len(self.mnist) 82 | 83 | def __getitem__(self, index: int): 84 | img = self.mnist[index][0] 85 | pipeline = transforms.Compose( 86 | [transforms.Resize(self.img_shape), 87 | transforms.ToTensor()]) 88 | return pipeline(img) 89 | 90 | 91 | def get_dataloader(type, 92 | batch_size, 93 | img_shape=None, 94 | dist_train=False, 95 | num_workers=4, 96 | use_lmdb=False, 97 | **kwargs): 98 | if type == 'CelebA': 99 | if img_shape is not None: 100 | kwargs['img_shape'] = img_shape 101 | if use_lmdb: 102 | dataset = CelebALMDBDataset(CELEBA_LMDB_PATH, **kwargs) 103 | else: 104 | dataset = CelebADataset(CELEBA_DIR, **kwargs) 105 | elif type == 'CelebAHQ': 106 | if img_shape is not None: 107 | kwargs['img_shape'] = img_shape 108 | if use_lmdb: 109 | dataset = CelebALMDBDataset(CELEBA_HQ_LMDB_PATH, **kwargs) 110 | else: 111 | dataset = CelebADataset(CELEBA_HQ_DIR, **kwargs) 112 | elif type == 'MNIST': 113 | if img_shape is not None: 114 | dataset = MNISTImageDataset(img_shape) 115 | else: 116 | dataset = MNISTImageDataset() 117 | if dist_train: 118 | sampler = DistributedSampler(dataset) 119 | dataloader = DataLoader(dataset, 120 | batch_size=batch_size, 121 | sampler=sampler, 122 | num_workers=num_workers) 123 | return dataloader, sampler 124 | else: 125 | dataloader = DataLoader(dataset, 126 | batch_size=batch_size, 127 | shuffle=True, 128 | num_workers=num_workers) 129 | return dataloader 130 | 131 | 132 | if __name__ == '__main__': 133 | os.makedirs('work_dirs', exist_ok=True) 134 | 135 | if os.path.exists(CELEBA_DIR): 136 | dataloader = get_dataloader('CelebA', 16) 137 | img = next(iter(dataloader)) 138 | print(img.shape) 139 | N = img.shape[0] 140 | img = einops.rearrange(img, 141 | '(n1 n2) c h w -> c (n1 h) (n2 w)', 142 | n1=int(N**0.5)) 143 | print(img.shape) 144 | print(img.max()) 145 | print(img.min()) 146 | img = transforms.ToPILImage()(img) 147 | img.save('work_dirs/tmp_celeba.jpg') 148 | if TO_LMDB: 149 | from dldemos.lmdb_loader import folder2lmdb 150 | folder2lmdb(CELEBA_DIR, CELEBA_LMDB_PATH) 151 | 152 | if os.path.exists(CELEBA_HQ_DIR): 153 | dataloader = get_dataloader('CelebAHQ', 16) 154 | img = next(iter(dataloader)) 155 | print(img.shape) 156 | N = img.shape[0] 157 | img = einops.rearrange(img, 158 | '(n1 n2) c h w -> c (n1 h) (n2 w)', 159 | n1=int(N**0.5)) 160 | print(img.shape) 161 | print(img.max()) 162 | print(img.min()) 163 | img = transforms.ToPILImage()(img) 164 | img.save('work_dirs/tmp_celebahq.jpg') 165 | if TO_LMDB: 166 | from dldemos.lmdb_loader import folder2lmdb 167 | folder2lmdb(CELEBA_HQ_DIR, CELEBA_HQ_LMDB_PATH) 168 | 169 | download_mnist() 170 | -------------------------------------------------------------------------------- /dldemos/VQVAE/dist_train_pixelcnn.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import time 4 | 5 | import torch 6 | import torch.distributed as dist 7 | import torch.nn as nn 8 | from torch.nn.parallel import DistributedDataParallel 9 | 10 | from dldemos.VQVAE.configs import get_cfg 11 | from dldemos.VQVAE.dataset import get_dataloader 12 | from dldemos.VQVAE.model import VQVAE 13 | from dldemos.VQVAE.pixelcnn_model import PixelCNNWithEmbedding 14 | 15 | USE_LMDB = True 16 | 17 | 18 | def reduce_sum(tensor): 19 | rt = tensor.clone() 20 | dist.all_reduce(rt, op=dist.ReduceOp.SUM) 21 | return rt 22 | 23 | 24 | def train_generative_model(vqvae: VQVAE, 25 | model, 26 | img_shape=None, 27 | device='cuda', 28 | ckpt_path='dldemos/VQVAE/gen_model.pth', 29 | dataset_type='MNIST', 30 | batch_size=64, 31 | n_epochs=50): 32 | print('batch size:', batch_size) 33 | dataloader, sampler = get_dataloader(dataset_type, 34 | batch_size, 35 | img_shape=img_shape, 36 | dist_train=True, 37 | use_lmdb=USE_LMDB) 38 | vqvae.to(device) 39 | vqvae.eval() 40 | model.to(device) 41 | model.train() 42 | optimizer = torch.optim.Adam(model.parameters(), 1e-3) 43 | loss_fn = nn.CrossEntropyLoss() 44 | tic = time.time() 45 | for e in range(n_epochs): 46 | total_loss = 0 47 | sampler.set_epoch(e) 48 | for x in dataloader: 49 | current_batch_size = x.shape[0] 50 | with torch.no_grad(): 51 | x = x.to(device) 52 | x = vqvae.encode(x) 53 | 54 | predict_x = model(x) 55 | loss = loss_fn(predict_x, x) 56 | optimizer.zero_grad() 57 | loss.backward() 58 | optimizer.step() 59 | loss = reduce_sum(loss) 60 | total_loss += loss * current_batch_size 61 | total_loss /= len(dataloader.dataset) 62 | toc = time.time() 63 | if device == 0: 64 | torch.save(model.module.state_dict(), ckpt_path) 65 | print(f'epoch {e} loss: {total_loss} elapsed {(toc - tic):.2f}s') 66 | dist.barrier() 67 | 68 | print('Done') 69 | 70 | 71 | if __name__ == '__main__': 72 | dist.init_process_group('nccl') 73 | 74 | os.makedirs('work_dirs', exist_ok=True) 75 | 76 | parser = argparse.ArgumentParser() 77 | parser.add_argument('-c', type=int, default=0) 78 | args = parser.parse_args() 79 | cfg = get_cfg(args.c) 80 | 81 | img_shape = cfg['img_shape'] 82 | rank = dist.get_rank() 83 | device = rank % torch.cuda.device_count() 84 | 85 | vqvae = VQVAE(img_shape[0], cfg['dim'], cfg['n_embedding']) 86 | gen_model = PixelCNNWithEmbedding(cfg['pixelcnn_n_blocks'], 87 | cfg['pixelcnn_dim'], 88 | cfg['pixelcnn_linear_dim'], True, 89 | cfg['n_embedding']) 90 | 91 | # 3. Train Generative model (Gated PixelCNN in our project) 92 | vqvae.load_state_dict(torch.load(cfg['vqvae_path'])) 93 | vqvae.to(device) 94 | gen_model.to(device) 95 | gen_model = DistributedDataParallel(gen_model, device_ids=[device]) 96 | 97 | # Optional: resume 98 | # map_location = {'cuda:0': f'cuda:{device}'} 99 | # state_dict = torch.load(cfg['gen_model_path'], map_location=map_location) 100 | # gen_model.module.load_state_dict(state_dict) 101 | 102 | train_generative_model(vqvae, 103 | gen_model, 104 | img_shape=(img_shape[1], img_shape[2]), 105 | device=device, 106 | ckpt_path=cfg['gen_model_path'], 107 | dataset_type=cfg['dataset_type'], 108 | batch_size=cfg['batch_size_2'], 109 | n_epochs=cfg['n_epochs_2']) 110 | 111 | dist.destroy_process_group() 112 | 113 | # torchrun --nproc_per_node=4 dldemos/VQVAE/dist_train_pixelcnn.py -c 1 114 | -------------------------------------------------------------------------------- /dldemos/VQVAE/model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class ResidualBlock(nn.Module): 6 | 7 | def __init__(self, dim): 8 | super().__init__() 9 | self.relu = nn.ReLU() 10 | self.conv1 = nn.Conv2d(dim, dim, 3, 1, 1) 11 | self.conv2 = nn.Conv2d(dim, dim, 1) 12 | 13 | def forward(self, x): 14 | tmp = self.relu(x) 15 | tmp = self.conv1(tmp) 16 | tmp = self.relu(tmp) 17 | tmp = self.conv2(tmp) 18 | return x + tmp 19 | 20 | 21 | class VQVAE(nn.Module): 22 | 23 | def __init__(self, input_dim, dim, n_embedding): 24 | super().__init__() 25 | self.encoder = nn.Sequential(nn.Conv2d(input_dim, dim, 4, 2, 1), 26 | nn.ReLU(), nn.Conv2d(dim, dim, 4, 2, 1), 27 | nn.ReLU(), nn.Conv2d(dim, dim, 3, 1, 1), 28 | ResidualBlock(dim), ResidualBlock(dim)) 29 | self.vq_embedding = nn.Embedding(n_embedding, dim) 30 | self.vq_embedding.weight.data.uniform_(-1.0 / n_embedding, 31 | 1.0 / n_embedding) 32 | self.decoder = nn.Sequential( 33 | nn.Conv2d(dim, dim, 3, 1, 1), 34 | ResidualBlock(dim), ResidualBlock(dim), 35 | nn.ConvTranspose2d(dim, dim, 4, 2, 1), nn.ReLU(), 36 | nn.ConvTranspose2d(dim, input_dim, 4, 2, 1)) 37 | self.n_downsample = 2 38 | 39 | def forward(self, x): 40 | # encode 41 | ze = self.encoder(x) 42 | 43 | # ze: [N, C, H, W] 44 | # embedding [K, C] 45 | embedding = self.vq_embedding.weight.data 46 | N, C, H, W = ze.shape 47 | K, _ = embedding.shape 48 | embedding_broadcast = embedding.reshape(1, K, C, 1, 1) 49 | ze_broadcast = ze.reshape(N, 1, C, H, W) 50 | distance = torch.sum((embedding_broadcast - ze_broadcast)**2, 2) 51 | nearest_neighbor = torch.argmin(distance, 1) 52 | # make C to the second dim 53 | zq = self.vq_embedding(nearest_neighbor).permute(0, 3, 1, 2) 54 | # stop gradient 55 | decoder_input = ze + (zq - ze).detach() 56 | 57 | # decode 58 | x_hat = self.decoder(decoder_input) 59 | return x_hat, ze, zq 60 | 61 | @torch.no_grad() 62 | def encode(self, x): 63 | ze = self.encoder(x) 64 | embedding = self.vq_embedding.weight.data 65 | 66 | # ze: [N, C, H, W] 67 | # embedding [K, C] 68 | N, C, H, W = ze.shape 69 | K, _ = embedding.shape 70 | embedding_broadcast = embedding.reshape(1, K, C, 1, 1) 71 | ze_broadcast = ze.reshape(N, 1, C, H, W) 72 | distance = torch.sum((embedding_broadcast - ze_broadcast)**2, 2) 73 | nearest_neighbor = torch.argmin(distance, 1) 74 | return nearest_neighbor 75 | 76 | @torch.no_grad() 77 | def decode(self, discrete_latent): 78 | zq = self.vq_embedding(discrete_latent).permute(0, 3, 1, 2) 79 | x_hat = self.decoder(zq) 80 | return x_hat 81 | 82 | # Shape: [C, H, W] 83 | def get_latent_HW(self, input_shape): 84 | C, H, W = input_shape 85 | return (H // 2**self.n_downsample, W // 2**self.n_downsample) 86 | -------------------------------------------------------------------------------- /dldemos/VQVAE/pixelcnn_model.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | from dldemos.pixelcnn.model import GatedBlock, GatedPixelCNN 4 | 5 | 6 | class PixelCNNWithEmbedding(GatedPixelCNN): 7 | 8 | def __init__(self, n_blocks, p, linear_dim, bn=True, color_level=256): 9 | super().__init__(n_blocks, p, linear_dim, bn, color_level) 10 | self.embedding = nn.Embedding(color_level, p) 11 | self.block1 = GatedBlock('A', p, p, bn) 12 | 13 | def forward(self, x): 14 | x = self.embedding(x) 15 | x = x.permute(0, 3, 1, 2).contiguous() 16 | return super().forward(x) 17 | -------------------------------------------------------------------------------- /dldemos/attention/README.md: -------------------------------------------------------------------------------- 1 | 1. Install `babel`, `faker`. 2 | 3 | ```shell 4 | pip install babel faker 5 | ``` 6 | 7 | 2. Run `main.py` . 8 | -------------------------------------------------------------------------------- /dldemos/attention/dataset.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | from babel.dates import format_date 4 | from faker import Faker 5 | 6 | faker = Faker() 7 | format_list = [ 8 | 'short', 'medium', 'long', 'full', 'd MMM YYY', 'd MMMM YYY', 'dd/MM/YYY', 9 | 'dd-MM-YYY', 'EE d, MMM YYY', 'EEEE d, MMMM YYY' 10 | ] 11 | 12 | if __name__ == '__main__': 13 | for format in format_list: 14 | date_obj = faker.date_object() 15 | print(f'{format}:', date_obj, 16 | format_date(date_obj, format=format, locale='en')) 17 | 18 | 19 | def generate_date(): 20 | format = random.choice(format_list) 21 | date_obj = faker.date_object() 22 | formated_date = format_date(date_obj, format=format, locale='en') 23 | return formated_date, date_obj 24 | 25 | 26 | def generate_date_data(count, filename): 27 | with open(filename, 'w') as fp: 28 | for _ in range(count): 29 | formated_date, date_obj = generate_date() 30 | fp.write(f'{formated_date}\t{date_obj}\n') 31 | 32 | 33 | def load_date_data(filename): 34 | with open(filename, 'r') as fp: 35 | lines = fp.readlines() 36 | return [line.strip('\n').split('\t') for line in lines] 37 | 38 | 39 | # generate_date_data(50000, 'dldemos/attention/train.txt') 40 | # generate_date_data(10000, 'dldemos/attention/test.txt') 41 | -------------------------------------------------------------------------------- /dldemos/attention/main.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.nn.utils.rnn import pad_sequence 4 | from torch.utils.data import DataLoader, Dataset 5 | 6 | from dldemos.attention.dataset import generate_date, load_date_data 7 | 8 | EMBEDDING_LENGTH = 128 9 | OUTPUT_LENGTH = 10 10 | 11 | 12 | def stoi(str): 13 | return torch.LongTensor([ord(char) for char in str]) 14 | 15 | 16 | def itos(arr): 17 | return ''.join([chr(x) for x in arr]) 18 | 19 | 20 | class DateDataset(Dataset): 21 | 22 | def __init__(self, lines): 23 | self.lines = lines 24 | 25 | def __len__(self): 26 | return len(self.lines) 27 | 28 | def __getitem__(self, index): 29 | line = self.lines[index] 30 | 31 | return stoi(line[0]), stoi(line[1]) 32 | 33 | 34 | def get_dataloader(filename): 35 | 36 | def collate_fn(batch): 37 | x, y = zip(*batch) 38 | x_pad = pad_sequence(x, batch_first=True) 39 | y_pad = pad_sequence(y, batch_first=True) 40 | return x_pad, y_pad 41 | 42 | lines = load_date_data(filename) 43 | dataset = DateDataset(lines) 44 | return DataLoader(dataset, 32, collate_fn=collate_fn) 45 | 46 | 47 | class AttentionModel(nn.Module): 48 | 49 | def __init__(self, 50 | embeding_dim=32, 51 | encoder_dim=32, 52 | decoder_dim=32, 53 | dropout_rate=0.5): 54 | super().__init__() 55 | self.drop = nn.Dropout(dropout_rate) 56 | self.embedding = nn.Embedding(EMBEDDING_LENGTH, embeding_dim) 57 | self.attention_linear = nn.Linear(2 * encoder_dim + decoder_dim, 1) 58 | self.softmax = nn.Softmax(-1) 59 | self.encoder = nn.LSTM(embeding_dim, 60 | encoder_dim, 61 | 1, 62 | batch_first=True, 63 | bidirectional=True) 64 | self.decoder = nn.LSTM(EMBEDDING_LENGTH + 2 * encoder_dim, 65 | decoder_dim, 66 | 1, 67 | batch_first=True) 68 | self.output_linear = nn.Linear(decoder_dim, EMBEDDING_LENGTH) 69 | self.decoder_dim = decoder_dim 70 | 71 | def forward(self, x: torch.Tensor, n_output: int = OUTPUT_LENGTH): 72 | # x: [batch, n_sequence, EMBEDDING_LENGTH] 73 | batch, n_squence = x.shape[0:2] 74 | 75 | # x: [batch, n_sequence, embeding_dim] 76 | x = self.drop(self.embedding(x)) 77 | 78 | # a: [batch, n_sequence, hidden] 79 | a, _ = self.encoder(x) 80 | 81 | # prev_s: [batch, n_squence=1, hidden] 82 | # prev_y: [batch, n_squence=1, EMBEDDING_LENGTH] 83 | # y: [batch, n_output, EMBEDDING_LENGTH] 84 | prev_s = x.new_zeros(batch, 1, self.decoder_dim) 85 | prev_y = x.new_zeros(batch, 1, EMBEDDING_LENGTH) 86 | y = x.new_empty(batch, n_output, EMBEDDING_LENGTH) 87 | tmp_states = None 88 | for i_output in range(n_output): 89 | # repeat_s: [batch, n_squence, hidden] 90 | repeat_s = prev_s.repeat(1, n_squence, 1) 91 | # attention_input: [batch * n_sequence, hidden_s + hidden_a] 92 | attention_input = torch.cat((repeat_s, a), 93 | 2).reshape(batch * n_squence, -1) 94 | # x: [batch * n_sequence, 1] 95 | x = self.attention_linear(attention_input) 96 | # x: [batch, n_sequence] 97 | x = x.reshape(batch, n_squence) 98 | alpha = self.softmax(x) 99 | c = torch.sum(a * alpha.reshape(batch, n_squence, 1), 1) 100 | c = c.unsqueeze(1) 101 | decoder_input = torch.cat((prev_y, c), 2) 102 | 103 | if tmp_states is None: 104 | prev_s, tmp_states = self.decoder(decoder_input) 105 | else: 106 | prev_s, tmp_states = self.decoder(decoder_input, tmp_states) 107 | 108 | prev_y = self.output_linear(prev_s) 109 | y[:, i_output] = prev_y.squeeze(1) 110 | return y 111 | 112 | 113 | def main(): 114 | device = 'cuda:0' 115 | train_dataloader = get_dataloader('dldemos/attention/train.txt') 116 | test_dataloader = get_dataloader('dldemos/attention/test.txt') 117 | 118 | model = AttentionModel().to(device) 119 | 120 | # Please close or open the codes with # 121 | # train 122 | 123 | optimizer = torch.optim.Adam(model.parameters(), lr=0.001) 124 | citerion = torch.nn.CrossEntropyLoss() 125 | for epoch in range(30): 126 | loss_sum = 0 127 | dataset_len = len(train_dataloader.dataset) 128 | 129 | for x, y in train_dataloader: 130 | x = x.to(device) 131 | y = y.to(device) 132 | hat_y = model(x) 133 | n, Tx, _ = hat_y.shape 134 | hat_y = torch.reshape(hat_y, (n * Tx, -1)) 135 | label_y = torch.reshape(y, (n * Tx, )) 136 | loss = citerion(hat_y, label_y) 137 | 138 | optimizer.zero_grad() 139 | loss.backward() 140 | torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5) 141 | optimizer.step() 142 | 143 | loss_sum += loss * n 144 | 145 | print(f'Epoch {epoch}. loss: {loss_sum / dataset_len}') 146 | 147 | torch.save(model.state_dict(), 'dldemos/attention/model.pth') 148 | 149 | # test 150 | model.load_state_dict(torch.load('dldemos/attention/model.pth')) 151 | 152 | accuracy = 0 153 | dataset_len = len(test_dataloader.dataset) 154 | 155 | for x, y in test_dataloader: 156 | x = x.to(device) 157 | y = y.to(device) 158 | hat_y = model(x) 159 | prediction = torch.argmax(hat_y, 2) 160 | score = torch.where(torch.sum(prediction - y, -1) == 0, 1, 0) 161 | accuracy += torch.sum(score) 162 | 163 | print(f'Accuracy: {accuracy / dataset_len}') 164 | 165 | # inference 166 | for _ in range(5): 167 | x, y = generate_date() 168 | origin_x = x 169 | x = stoi(x).unsqueeze(0).to(device) 170 | hat_y = model(x) 171 | hat_y = hat_y.squeeze(0).argmax(1) 172 | hat_y = itos(hat_y) 173 | print(f'input: {origin_x}, prediction: {hat_y}, gt: {y}') 174 | 175 | 176 | if __name__ == '__main__': 177 | main() 178 | -------------------------------------------------------------------------------- /dldemos/ddim/configs.py: -------------------------------------------------------------------------------- 1 | mnist_cfg = { 2 | 'dataset_type': 'MNIST', 3 | 'img_shape': [1, 28, 28], 4 | 'model_path': 'dldemos/ddim/mnist.pth', 5 | 'batch_size': 512, 6 | 'n_epochs': 50, 7 | 'channels': [10, 20, 40, 80], 8 | 'pe_dim': 128 9 | } 10 | 11 | # Deprecated config. It's for model in `network_my.py` 12 | celebahq_cfg1 = { 13 | 'dataset_type': 'CelebAHQ', 14 | 'img_shape': [3, 128, 128], 15 | 'model_path': 'dldemos/ddim/celebahq1.pth', 16 | 'batch_size': 64, 17 | 'n_epochs': 1000, 18 | 'channels': [64, 128, 256, 512, 512], 19 | 'pe_dim': 128, 20 | 'with_attn': [False, False, False, True, False] 21 | } 22 | celebahq_cfg2 = { 23 | 'dataset_type': 'CelebAHQ', 24 | 'img_shape': [3, 64, 64], 25 | 'model_path': 'dldemos/ddim/celebahq2.pth', 26 | 'batch_size': 128, 27 | 'n_epochs': 2500, 28 | 'scheduler_cfg': { 29 | 'lr': 5e-4, 30 | 'milestones': [1500, 2100], 31 | 'gamma': 0.1, 32 | }, 33 | 'channels': [128, 256, 512, 512], 34 | 'pe_dim': 128, 35 | 'with_attn': [False, False, True, True], 36 | 'norm_type': 'gn' 37 | } 38 | celebahq_cfg3 = { 39 | 'dataset_type': 'CelebAHQ', 40 | 'img_shape': [3, 128, 128], 41 | 'model_path': 'dldemos/ddim/celebahq3.pth', 42 | 'batch_size': 32, 43 | 'n_epochs': 1500, 44 | 'scheduler_cfg': { 45 | 'lr': 2e-4, 46 | 'milestones': [800, 1300], 47 | 'gamma': 0.1, 48 | }, 49 | 'channels': [128, 256, 256, 512, 512], 50 | 'pe_dim': 128, 51 | 'with_attn': [False, False, False, True, True], 52 | 'norm_type': 'gn' 53 | } 54 | celebahq_cfg4 = { 55 | 'dataset_type': 'CelebAHQ', 56 | 'img_shape': [3, 256, 256], 57 | 'model_path': 'dldemos/ddim/celebahq4.pth', 58 | 'batch_size': 8, 59 | 'n_epochs': 1000, 60 | 'scheduler_cfg': { 61 | 'lr': 2e-5, 62 | 'milestones': [800], 63 | 'gamma': 0.1, 64 | }, 65 | 'channels': [128, 128, 256, 256, 512, 512], 66 | 'pe_dim': 128, 67 | 'with_attn': [False, False, False, False, True, True], 68 | 'norm_type': 'gn' 69 | } 70 | 71 | configs = [ 72 | mnist_cfg, celebahq_cfg1, celebahq_cfg2, celebahq_cfg3, celebahq_cfg4 73 | ] 74 | -------------------------------------------------------------------------------- /dldemos/ddim/dataset.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import torchvision 4 | from PIL import Image 5 | from torch.utils.data import DataLoader, Dataset 6 | from torch.utils.data.distributed import DistributedSampler 7 | from torchvision import transforms 8 | 9 | CELEBA_HQ_DIR = 'data/celebA/celeba_hq_256' 10 | 11 | 12 | def download_dataset(): 13 | mnist = torchvision.datasets.MNIST(root='./data/mnist', download=True) 14 | print('length of MNIST', len(mnist)) 15 | id = 4 16 | img, label = mnist[id] 17 | print(img) 18 | print(label) 19 | 20 | # On computer with monitor 21 | # img.show() 22 | 23 | img.save('work_dirs/tmp.jpg') 24 | tensor = transforms.ToTensor()(img) 25 | print(tensor.shape) 26 | print(tensor.max()) 27 | print(tensor.min()) 28 | 29 | 30 | class MNISTImageDataset(Dataset): 31 | 32 | def __init__(self): 33 | super().__init__() 34 | self.mnist = torchvision.datasets.MNIST(root='./data/mnist') 35 | 36 | def __len__(self): 37 | return len(self.mnist) 38 | 39 | def __getitem__(self, index: int): 40 | img = self.mnist[index][0] 41 | pipeline = transforms.Compose([ 42 | transforms.ToTensor(), 43 | transforms.Lambda(lambda x: (x - 0.5) * 2) 44 | ]) 45 | return pipeline(img) 46 | 47 | 48 | class CelebADataset(Dataset): 49 | 50 | def __init__(self, root, resolution=(64, 64)): 51 | super().__init__() 52 | self.root = root 53 | self.filenames = sorted(os.listdir(root)) 54 | self.resolution = resolution 55 | 56 | def __len__(self) -> int: 57 | return len(self.filenames) 58 | 59 | def __getitem__(self, index: int): 60 | path = os.path.join(self.root, self.filenames[index]) 61 | img = Image.open(path) 62 | pipeline = transforms.Compose([ 63 | transforms.Resize(self.resolution), 64 | transforms.ToTensor(), 65 | transforms.Lambda(lambda x: (x - 0.5) * 2) 66 | ]) 67 | return pipeline(img) 68 | 69 | 70 | def get_dataloader(type, 71 | batch_size, 72 | dist_train=False, 73 | num_workers=4, 74 | resolution=None): 75 | if type == 'CelebAHQ': 76 | if resolution is not None: 77 | dataset = CelebADataset(CELEBA_HQ_DIR, resolution) 78 | else: 79 | dataset = CelebADataset(CELEBA_HQ_DIR) 80 | elif type == 'MNIST': 81 | dataset = MNISTImageDataset() 82 | if dist_train: 83 | sampler = DistributedSampler(dataset) 84 | dataloader = DataLoader(dataset, 85 | batch_size=batch_size, 86 | sampler=sampler, 87 | num_workers=num_workers) 88 | return dataloader, sampler 89 | else: 90 | dataloader = DataLoader(dataset, 91 | batch_size=batch_size, 92 | shuffle=True, 93 | num_workers=num_workers) 94 | return dataloader 95 | 96 | 97 | if __name__ == '__main__': 98 | os.makedirs('work_dirs', exist_ok=True) 99 | download_dataset() 100 | -------------------------------------------------------------------------------- /dldemos/ddim/ddim.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from tqdm import tqdm 3 | 4 | from dldemos.ddim.ddpm import DDPM 5 | 6 | 7 | class DDIM(DDPM): 8 | 9 | def __init__(self, 10 | device, 11 | n_steps: int, 12 | min_beta: float = 0.0001, 13 | max_beta: float = 0.02): 14 | super().__init__(device, n_steps, min_beta, max_beta) 15 | 16 | def sample_backward(self, 17 | img_or_shape, 18 | net, 19 | device, 20 | simple_var=True, 21 | ddim_step=20, 22 | eta=1): 23 | if simple_var: 24 | eta = 1 25 | ts = torch.linspace(self.n_steps, 0, 26 | (ddim_step + 1)).to(device).to(torch.long) 27 | if isinstance(img_or_shape, torch.Tensor): 28 | x = img_or_shape 29 | else: 30 | x = torch.randn(img_or_shape).to(device) 31 | batch_size = x.shape[0] 32 | net = net.to(device) 33 | for i in tqdm(range(1, ddim_step + 1), 34 | f'DDIM sampling with eta {eta} simple_var {simple_var}'): 35 | cur_t = ts[i - 1] - 1 36 | prev_t = ts[i] - 1 37 | 38 | ab_cur = self.alpha_bars[cur_t] 39 | ab_prev = self.alpha_bars[prev_t] if prev_t >= 0 else 1 40 | 41 | t_tensor = torch.tensor([cur_t] * batch_size, 42 | dtype=torch.long).to(device).unsqueeze(1) 43 | eps = net(x, t_tensor) 44 | var = eta * (1 - ab_prev) / (1 - ab_cur) * (1 - ab_cur / ab_prev) 45 | noise = torch.randn_like(x) 46 | 47 | first_term = (ab_prev / ab_cur)**0.5 * x 48 | second_term = ((1 - ab_prev - var)**0.5 - 49 | (ab_prev * (1 - ab_cur) / ab_cur)**0.5) * eps 50 | if simple_var: 51 | third_term = (1 - ab_cur / ab_prev)**0.5 * noise 52 | else: 53 | third_term = var**0.5 * noise 54 | x = first_term + second_term + third_term 55 | 56 | return x 57 | -------------------------------------------------------------------------------- /dldemos/ddim/ddpm.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from tqdm import tqdm 3 | 4 | 5 | class DDPM(): 6 | 7 | def __init__(self, 8 | device, 9 | n_steps: int, 10 | min_beta: float = 0.0001, 11 | max_beta: float = 0.02): 12 | betas = torch.linspace(min_beta, max_beta, n_steps).to(device) 13 | alphas = 1 - betas 14 | alpha_bars = torch.empty_like(alphas) 15 | product = 1 16 | for i, alpha in enumerate(alphas): 17 | product *= alpha 18 | alpha_bars[i] = product 19 | self.betas = betas 20 | self.n_steps = n_steps 21 | self.alphas = alphas 22 | self.alpha_bars = alpha_bars 23 | 24 | def sample_forward(self, x, t, eps=None): 25 | alpha_bar = self.alpha_bars[t].reshape(-1, 1, 1, 1) 26 | if eps is None: 27 | eps = torch.randn_like(x) 28 | res = eps * torch.sqrt(1 - alpha_bar) + torch.sqrt(alpha_bar) * x 29 | return res 30 | 31 | def sample_backward(self, img_or_shape, net, device, simple_var=True): 32 | if isinstance(img_or_shape, torch.Tensor): 33 | x = img_or_shape 34 | else: 35 | x = torch.randn(img_or_shape).to(device) 36 | net = net.to(device) 37 | for t in tqdm(range(self.n_steps - 1, -1, -1), 'DDPM sampling'): 38 | x = self.sample_backward_step(x, t, net, simple_var) 39 | 40 | return x 41 | 42 | def sample_backward_step(self, x_t, t, net, simple_var=True): 43 | 44 | n = x_t.shape[0] 45 | t_tensor = torch.tensor([t] * n, 46 | dtype=torch.long).to(x_t.device).unsqueeze(1) 47 | eps = net(x_t, t_tensor) 48 | 49 | if t == 0: 50 | noise = 0 51 | else: 52 | if simple_var: 53 | var = self.betas[t] 54 | else: 55 | var = (1 - self.alpha_bars[t - 1]) / ( 56 | 1 - self.alpha_bars[t]) * self.betas[t] 57 | noise = torch.randn_like(x_t) 58 | noise *= torch.sqrt(var) 59 | 60 | mean = (x_t - 61 | (1 - self.alphas[t]) / torch.sqrt(1 - self.alpha_bars[t]) * 62 | eps) / torch.sqrt(self.alphas[t]) 63 | x_t = mean + noise 64 | 65 | return x_t 66 | -------------------------------------------------------------------------------- /dldemos/ddim/dist_sample.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | 4 | import cv2 5 | import einops 6 | import torch 7 | import torch.distributed as dist 8 | from torch.nn.parallel import DistributedDataParallel 9 | 10 | from dldemos.ddim.configs import configs 11 | from dldemos.ddim.ddim import DDIM 12 | from dldemos.ddim.ddpm import DDPM 13 | from dldemos.ddim.network import UNet 14 | 15 | 16 | def sample_imgs(ddpm, 17 | net, 18 | output_dir, 19 | img_shape, 20 | n_sample=30000, 21 | device=0, 22 | simple_var=True, 23 | to_bgr=False, 24 | **kwargs): 25 | if img_shape[1] >= 256: 26 | max_batch_size = 16 27 | elif img_shape[1] >= 128: 28 | max_batch_size = 64 29 | else: 30 | max_batch_size = 256 31 | n_devices = dist.get_world_size() 32 | 33 | net = net.to(device) 34 | net = net.eval() 35 | 36 | os.makedirs(output_dir, exist_ok=True) 37 | 38 | index = 0 39 | with torch.no_grad(): 40 | while index < n_sample: 41 | start_index = index + device * max_batch_size 42 | end_index = min(n_sample, index + (device + 1) * max_batch_size) 43 | 44 | local_batch_size = end_index - start_index 45 | if local_batch_size > 0: 46 | shape = (local_batch_size, *img_shape) 47 | imgs = ddpm.sample_backward(shape, 48 | net, 49 | device=device, 50 | simple_var=simple_var, 51 | **kwargs).detach().cpu() 52 | imgs = (imgs + 1) / 2 * 255 53 | imgs = imgs.clamp(0, 255).to(torch.uint8) 54 | 55 | img_list = einops.rearrange(imgs, 'n c h w -> n h w c').numpy() 56 | for i, img in enumerate(img_list): 57 | if to_bgr: 58 | img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) 59 | cv2.imwrite(f'{output_dir}/{i+start_index}.jpg', img) 60 | 61 | index += max_batch_size * n_devices 62 | 63 | 64 | if __name__ == '__main__': 65 | dist.init_process_group('nccl') 66 | 67 | os.makedirs('work_dirs', exist_ok=True) 68 | 69 | parser = argparse.ArgumentParser() 70 | parser.add_argument('-c', type=int, default=0) 71 | args = parser.parse_args() 72 | cfg = configs[args.c] 73 | 74 | n_steps = 1000 75 | rank = dist.get_rank() 76 | device = rank % torch.cuda.device_count() 77 | model_path = cfg['model_path'] 78 | img_shape = cfg['img_shape'] 79 | to_bgr = False if cfg['dataset_type'] == 'MNIST' else True 80 | 81 | net = UNet(n_steps, img_shape, cfg['channels'], cfg['pe_dim'], 82 | cfg.get('with_attn', False), cfg.get('norm_type', 'ln')) 83 | net.to(device) 84 | net = DistributedDataParallel(net, device_ids=[device]) 85 | ddpm = DDPM(device, n_steps) 86 | 87 | # Optional: resume 88 | map_location = {'cuda:0': f'cuda:{device}'} 89 | resume_path = model_path 90 | state_dict = torch.load(resume_path, map_location=map_location) 91 | net.module.load_state_dict(state_dict) 92 | 93 | ddim = DDIM(device, n_steps) 94 | sample_imgs(ddpm, 95 | net, 96 | 'work_dirs/diffusion_ddpm_sigma_hat', 97 | img_shape, 98 | device=device, 99 | to_bgr=to_bgr) 100 | dist.barrier() 101 | sample_imgs(ddim, 102 | net, 103 | 'work_dirs/diffusion_ddpm_eta_0', 104 | img_shape, 105 | device=device, 106 | to_bgr=to_bgr, 107 | ddim_step=1000, 108 | simple_var=False, 109 | eta=0) 110 | dist.barrier() 111 | 112 | dist.destroy_process_group() 113 | 114 | # torchrun --nproc_per_node=8 dldemos/ddim/dist_sample.py -c 2 \ 115 | # > work_dirs/tmp.txt 116 | -------------------------------------------------------------------------------- /dldemos/ddim/dist_train.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import time 4 | 5 | import torch 6 | import torch.distributed as dist 7 | import torch.nn as nn 8 | from torch.nn.parallel import DistributedDataParallel 9 | 10 | from dldemos.ddim.configs import configs 11 | from dldemos.ddim.dataset import get_dataloader 12 | from dldemos.ddim.ddpm import DDPM 13 | from dldemos.ddim.network import UNet 14 | 15 | 16 | def reduce_sum(tensor): 17 | rt = tensor.clone() 18 | dist.all_reduce(rt, op=dist.ReduceOp.SUM) 19 | return rt 20 | 21 | 22 | def train(ddpm: DDPM, 23 | net, 24 | dataset_type, 25 | resolution=None, 26 | batch_size=512, 27 | n_epochs=50, 28 | scheduler_cfg=None, 29 | device='cuda', 30 | ckpt_path='dldemos/ddpm/model.pth'): 31 | 32 | n_steps = ddpm.n_steps 33 | dataloader, sampler = get_dataloader(dataset_type, 34 | batch_size, 35 | True, 36 | resolution=resolution) 37 | if device == 0: 38 | print('batch size: ', batch_size * dist.get_world_size()) 39 | print('batch size per device: ', batch_size) 40 | 41 | net = net.to(device) 42 | loss_fn = nn.MSELoss() 43 | 44 | if scheduler_cfg is not None: 45 | optimizer = torch.optim.Adam(net.parameters(), scheduler_cfg['lr']) 46 | scheduler = torch.optim.lr_scheduler.MultiStepLR( 47 | optimizer, scheduler_cfg['milestones'], scheduler_cfg['gamma']) 48 | else: 49 | optimizer = torch.optim.Adam(net.parameters(), 2e-4) 50 | scheduler = None 51 | 52 | tic = time.time() 53 | for e in range(n_epochs): 54 | total_loss = 0 55 | sampler.set_epoch(e) 56 | for x in dataloader: 57 | current_batch_size = x.shape[0] 58 | x = x.to(device) 59 | t = torch.randint(0, n_steps, (current_batch_size, )).to(device) 60 | eps = torch.randn_like(x).to(device) 61 | x_t = ddpm.sample_forward(x, t, eps) 62 | eps_theta = net(x_t, t.reshape(current_batch_size, 1)) 63 | loss = loss_fn(eps_theta, eps) 64 | optimizer.zero_grad() 65 | loss.backward() 66 | optimizer.step() 67 | loss = reduce_sum(loss) 68 | total_loss += loss.item() * current_batch_size 69 | if scheduler is not None: 70 | scheduler.step() 71 | total_loss /= len(dataloader.dataset) 72 | toc = time.time() 73 | if device == 0: 74 | torch.save(net.module.state_dict(), ckpt_path) 75 | print(f'epoch {e} loss: {total_loss} elapsed {(toc - tic):.2f}s') 76 | dist.barrier() 77 | 78 | if device == 0: 79 | print('Done') 80 | 81 | 82 | if __name__ == '__main__': 83 | dist.init_process_group('nccl') 84 | 85 | os.makedirs('work_dirs', exist_ok=True) 86 | 87 | parser = argparse.ArgumentParser() 88 | parser.add_argument('-c', type=int, default=0) 89 | args = parser.parse_args() 90 | cfg = configs[args.c] 91 | 92 | n_steps = 1000 93 | rank = dist.get_rank() 94 | device = rank % torch.cuda.device_count() 95 | model_path = cfg['model_path'] 96 | img_shape = cfg['img_shape'] 97 | to_bgr = False if cfg['dataset_type'] == 'MNIST' else True 98 | 99 | net = UNet(n_steps, img_shape, cfg['channels'], cfg['pe_dim'], 100 | cfg.get('with_attn', False), cfg.get('norm_type', 'ln')) 101 | net.to(device) 102 | net = DistributedDataParallel(net, device_ids=[device]) 103 | ddpm = DDPM(device, n_steps) 104 | 105 | # Optional: resume 106 | # map_location = {'cuda:0': f'cuda:{device}'} 107 | # resume_path = model_path 108 | # state_dict = torch.load(resume_path, map_location=map_location) 109 | # net.module.load_state_dict(state_dict) 110 | 111 | train(ddpm, 112 | net, 113 | cfg['dataset_type'], 114 | resolution=(img_shape[1], img_shape[2]), 115 | batch_size=cfg['batch_size'], 116 | n_epochs=cfg['n_epochs'], 117 | scheduler_cfg=cfg.get('scheduler_cfg', None), 118 | device=device, 119 | ckpt_path=model_path) 120 | 121 | dist.destroy_process_group() 122 | 123 | # torchrun --nproc_per_node=8 dldemos/ddim/dist_train.py -c 1 124 | -------------------------------------------------------------------------------- /dldemos/ddpm/dataset.py: -------------------------------------------------------------------------------- 1 | import torchvision 2 | from torch.utils.data import DataLoader 3 | from torchvision.transforms import Compose, Lambda, ToTensor 4 | 5 | 6 | def download_dataset(): 7 | mnist = torchvision.datasets.MNIST(root='./data/mnist', download=True) 8 | print('length of MNIST', len(mnist)) 9 | id = 4 10 | img, label = mnist[id] 11 | print(img) 12 | print(label) 13 | 14 | # On computer with monitor 15 | # img.show() 16 | 17 | img.save('work_dirs/tmp.jpg') 18 | tensor = ToTensor()(img) 19 | print(tensor.shape) 20 | print(tensor.max()) 21 | print(tensor.min()) 22 | 23 | 24 | def get_dataloader(batch_size: int): 25 | transform = Compose([ToTensor(), Lambda(lambda x: (x - 0.5) * 2)]) 26 | dataset = torchvision.datasets.MNIST(root='./data/mnist', 27 | transform=transform) 28 | return DataLoader(dataset, batch_size=batch_size, shuffle=True) 29 | 30 | 31 | def get_img_shape(): 32 | return (1, 28, 28) 33 | 34 | 35 | if __name__ == '__main__': 36 | import os 37 | os.makedirs('work_dirs', exist_ok=True) 38 | download_dataset() 39 | -------------------------------------------------------------------------------- /dldemos/ddpm/ddpm.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class DDPM(): 5 | 6 | def __init__(self, 7 | device, 8 | n_steps: int, 9 | min_beta: float = 0.0001, 10 | max_beta: float = 0.02): 11 | betas = torch.linspace(min_beta, max_beta, n_steps).to(device) 12 | alphas = 1 - betas 13 | alpha_bars = torch.empty_like(alphas) 14 | product = 1 15 | for i, alpha in enumerate(alphas): 16 | product *= alpha 17 | alpha_bars[i] = product 18 | self.betas = betas 19 | self.n_steps = n_steps 20 | self.alphas = alphas 21 | self.alpha_bars = alpha_bars 22 | alpha_prev = torch.empty_like(alpha_bars) 23 | alpha_prev[1:] = alpha_bars[0:n_steps - 1] 24 | alpha_prev[0] = 1 25 | self.coef1 = torch.sqrt(alphas) * (1 - alpha_prev) / (1 - alpha_bars) 26 | self.coef2 = torch.sqrt(alpha_prev) * self.betas / (1 - alpha_bars) 27 | 28 | def sample_forward(self, x, t, eps=None): 29 | alpha_bar = self.alpha_bars[t].reshape(-1, 1, 1, 1) 30 | if eps is None: 31 | eps = torch.randn_like(x) 32 | res = eps * torch.sqrt(1 - alpha_bar) + torch.sqrt(alpha_bar) * x 33 | return res 34 | 35 | def sample_backward(self, 36 | img_shape, 37 | net, 38 | device, 39 | simple_var=True, 40 | clip_x0=True): 41 | x = torch.randn(img_shape).to(device) 42 | net = net.to(device) 43 | for t in range(self.n_steps - 1, -1, -1): 44 | x = self.sample_backward_step(x, t, net, simple_var, clip_x0) 45 | return x 46 | 47 | def sample_backward_step(self, x_t, t, net, simple_var=True, clip_x0=True): 48 | 49 | n = x_t.shape[0] 50 | t_tensor = torch.tensor([t] * n, 51 | dtype=torch.long).to(x_t.device).unsqueeze(1) 52 | eps = net(x_t, t_tensor) 53 | 54 | if t == 0: 55 | noise = 0 56 | else: 57 | if simple_var: 58 | var = self.betas[t] 59 | else: 60 | var = (1 - self.alpha_bars[t - 1]) / ( 61 | 1 - self.alpha_bars[t]) * self.betas[t] 62 | noise = torch.randn_like(x_t) 63 | noise *= torch.sqrt(var) 64 | 65 | if clip_x0: 66 | x_0 = (x_t - torch.sqrt(1 - self.alpha_bars[t]) * 67 | eps) / torch.sqrt(self.alpha_bars[t]) 68 | x_0 = torch.clip(x_0, -1, 1) 69 | mean = self.coef1[t] * x_t + self.coef2[t] * x_0 70 | else: 71 | mean = (x_t - 72 | (1 - self.alphas[t]) / torch.sqrt(1 - self.alpha_bars[t]) * 73 | eps) / torch.sqrt(self.alphas[t]) 74 | x_t = mean + noise 75 | 76 | return x_t 77 | 78 | 79 | def visualize_forward(): 80 | import cv2 81 | import einops 82 | import numpy as np 83 | 84 | from dldemos.ddpm.dataset import get_dataloader 85 | 86 | n_steps = 100 87 | device = 'cuda' 88 | dataloader = get_dataloader(5) 89 | x, _ = next(iter(dataloader)) 90 | x = x.to(device) 91 | 92 | ddpm = DDPM(device, n_steps) 93 | xts = [] 94 | percents = torch.linspace(0, 0.99, 10) 95 | for percent in percents: 96 | t = torch.tensor([int(n_steps * percent)]) 97 | t = t.unsqueeze(1) 98 | x_t = ddpm.sample_forward(x, t) 99 | xts.append(x_t) 100 | res = torch.stack(xts, 0) 101 | res = einops.rearrange(res, 'n1 n2 c h w -> (n2 h) (n1 w) c') 102 | res = (res.clip(-1, 1) + 1) / 2 * 255 103 | res = res.cpu().numpy().astype(np.uint8) 104 | 105 | cv2.imwrite('work_dirs/diffusion_forward.jpg', res) 106 | 107 | 108 | def main(): 109 | visualize_forward() 110 | 111 | 112 | if __name__ == '__main__': 113 | main() 114 | -------------------------------------------------------------------------------- /dldemos/ddpm/ddpm_simple.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class DDPM(): 5 | 6 | def __init__(self, 7 | device, 8 | n_steps: int, 9 | min_beta: float = 0.0001, 10 | max_beta: float = 0.02): 11 | betas = torch.linspace(min_beta, max_beta, n_steps).to(device) 12 | alphas = 1 - betas 13 | alpha_bars = torch.empty_like(alphas) 14 | product = 1 15 | for i, alpha in enumerate(alphas): 16 | product *= alpha 17 | alpha_bars[i] = product 18 | self.betas = betas 19 | self.n_steps = n_steps 20 | self.alphas = alphas 21 | self.alpha_bars = alpha_bars 22 | 23 | def sample_forward(self, x, t, eps=None): 24 | alpha_bar = self.alpha_bars[t].reshape(-1, 1, 1, 1) 25 | if eps is None: 26 | eps = torch.randn_like(x) 27 | res = eps * torch.sqrt(1 - alpha_bar) + torch.sqrt(alpha_bar) * x 28 | return res 29 | 30 | def sample_backward(self, img_shape, net, device, simple_var=True): 31 | x = torch.randn(img_shape).to(device) 32 | net = net.to(device) 33 | for t in range(self.n_steps - 1, -1, -1): 34 | x = self.sample_backward_step(x, t, net, simple_var) 35 | return x 36 | 37 | def sample_backward_step(self, x_t, t, net, simple_var=True): 38 | 39 | n = x_t.shape[0] 40 | t_tensor = torch.tensor([t] * n, 41 | dtype=torch.long).to(x_t.device).unsqueeze(1) 42 | eps = net(x_t, t_tensor) 43 | 44 | if t == 0: 45 | noise = 0 46 | else: 47 | if simple_var: 48 | var = self.betas[t] 49 | else: 50 | var = (1 - self.alpha_bars[t - 1]) / ( 51 | 1 - self.alpha_bars[t]) * self.betas[t] 52 | noise = torch.randn_like(x_t) 53 | noise *= torch.sqrt(var) 54 | 55 | mean = (x_t - 56 | (1 - self.alphas[t]) / torch.sqrt(1 - self.alpha_bars[t]) * 57 | eps) / torch.sqrt(self.alphas[t]) 58 | x_t = mean + noise 59 | 60 | return x_t 61 | -------------------------------------------------------------------------------- /dldemos/ddpm/main.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | 4 | import cv2 5 | import einops 6 | import numpy as np 7 | import torch 8 | import torch.nn as nn 9 | 10 | from dldemos.ddpm.dataset import get_dataloader, get_img_shape 11 | from dldemos.ddpm.ddpm_simple import DDPM 12 | from dldemos.ddpm.network import (build_network, convnet_big_cfg, 13 | convnet_medium_cfg, convnet_small_cfg, 14 | unet_1_cfg, unet_res_cfg) 15 | 16 | batch_size = 512 17 | n_epochs = 100 18 | 19 | 20 | def train(ddpm: DDPM, net, device='cuda', ckpt_path='dldemos/ddpm/model.pth'): 21 | print('batch size:', batch_size) 22 | n_steps = ddpm.n_steps 23 | dataloader = get_dataloader(batch_size) 24 | net = net.to(device) 25 | loss_fn = nn.MSELoss() 26 | optimizer = torch.optim.Adam(net.parameters(), 1e-3) 27 | 28 | tic = time.time() 29 | for e in range(n_epochs): 30 | total_loss = 0 31 | 32 | for x, _ in dataloader: 33 | current_batch_size = x.shape[0] 34 | x = x.to(device) 35 | t = torch.randint(0, n_steps, (current_batch_size, )).to(device) 36 | eps = torch.randn_like(x).to(device) 37 | x_t = ddpm.sample_forward(x, t, eps) 38 | eps_theta = net(x_t, t.reshape(current_batch_size, 1)) 39 | loss = loss_fn(eps_theta, eps) 40 | optimizer.zero_grad() 41 | loss.backward() 42 | optimizer.step() 43 | total_loss += loss.item() * current_batch_size 44 | total_loss /= len(dataloader.dataset) 45 | toc = time.time() 46 | torch.save(net.state_dict(), ckpt_path) 47 | print(f'epoch {e} loss: {total_loss} elapsed {(toc - tic):.2f}s') 48 | print('Done') 49 | 50 | 51 | def sample_imgs(ddpm, 52 | net, 53 | output_path, 54 | n_sample=81, 55 | device='cuda', 56 | simple_var=True): 57 | net = net.to(device) 58 | net = net.eval() 59 | with torch.no_grad(): 60 | shape = (n_sample, *get_img_shape()) # 1, 3, 28, 28 61 | imgs = ddpm.sample_backward(shape, 62 | net, 63 | device=device, 64 | simple_var=simple_var).detach().cpu() 65 | imgs = (imgs + 1) / 2 * 255 66 | imgs = imgs.clamp(0, 255) 67 | imgs = einops.rearrange(imgs, 68 | '(b1 b2) c h w -> (b1 h) (b2 w) c', 69 | b1=int(n_sample**0.5)) 70 | 71 | imgs = imgs.numpy().astype(np.uint8) 72 | 73 | cv2.imwrite(output_path, imgs) 74 | 75 | 76 | configs = [ 77 | convnet_small_cfg, convnet_medium_cfg, convnet_big_cfg, unet_1_cfg, 78 | unet_res_cfg 79 | ] 80 | 81 | if __name__ == '__main__': 82 | os.makedirs('work_dirs', exist_ok=True) 83 | 84 | n_steps = 1000 85 | config_id = 4 86 | device = 'cuda' 87 | model_path = 'dldemos/ddpm/model_unet_res.pth' 88 | 89 | config = configs[config_id] 90 | net = build_network(config, n_steps) 91 | ddpm = DDPM(device, n_steps) 92 | 93 | train(ddpm, net, device=device, ckpt_path=model_path) 94 | 95 | net.load_state_dict(torch.load(model_path)) 96 | sample_imgs(ddpm, net, 'work_dirs/diffusion.jpg', device=device) 97 | -------------------------------------------------------------------------------- /dldemos/lmdb_loader.py: -------------------------------------------------------------------------------- 1 | # Modify from 2 | # https://github.com/xunge/pytorch_lmdb_imagenet/blob/master/folder2lmdb.py 3 | 4 | import os 5 | import os.path as osp 6 | import pickle 7 | 8 | import lmdb 9 | import six 10 | from PIL import Image 11 | from torch.utils.data import DataLoader, Dataset 12 | 13 | 14 | def raw_reader(path): 15 | with open(path, 'rb') as f: 16 | bin_data = f.read() 17 | return bin_data 18 | 19 | 20 | def dumps_data(obj): 21 | """Serialize an object. 22 | 23 | Returns: 24 | Implementation-dependent bytes-like object 25 | """ 26 | return pickle.dumps(obj) 27 | 28 | 29 | class MyImageFolder(Dataset): 30 | 31 | def __init__(self, root): 32 | super().__init__() 33 | self.root = root 34 | self.filenames = sorted(os.listdir(root)) 35 | 36 | def __len__(self) -> int: 37 | return len(self.filenames) 38 | 39 | def __getitem__(self, index: int): 40 | path = os.path.join(self.root, self.filenames[index]) 41 | return raw_reader(path) 42 | 43 | 44 | def folder2lmdb(img_dir, output_path, write_frequency=5000): 45 | directory = img_dir 46 | print('Loading dataset from %s' % directory) 47 | dataset = MyImageFolder(directory) 48 | data_loader = DataLoader(dataset, num_workers=16, collate_fn=lambda x: x) 49 | 50 | lmdb_path = output_path 51 | isdir = os.path.isdir(lmdb_path) 52 | 53 | print('Generate LMDB to %s' % lmdb_path) 54 | db = lmdb.open(lmdb_path, 55 | subdir=isdir, 56 | map_size=1099511627776 * 2, 57 | readonly=False, 58 | meminit=False, 59 | map_async=True) 60 | 61 | txn = db.begin(write=True) 62 | for idx, data in enumerate(data_loader): 63 | image = data[0] 64 | 65 | txn.put(u'{}'.format(idx).encode('ascii'), dumps_data(image)) 66 | if idx % write_frequency == 0: 67 | print('[%d/%d]' % (idx, len(data_loader))) 68 | txn.commit() 69 | txn = db.begin(write=True) 70 | 71 | # finish iterating through dataset 72 | txn.commit() 73 | keys = [u'{}'.format(k).encode('ascii') for k in range(idx + 1)] 74 | with db.begin(write=True) as txn: 75 | txn.put(b'__keys__', dumps_data(keys)) 76 | txn.put(b'__len__', dumps_data(len(keys))) 77 | 78 | print('Flushing database ...') 79 | db.sync() 80 | db.close() 81 | 82 | 83 | def loads_data(buf): 84 | """ 85 | Args: 86 | buf: the output of `dumps`. 87 | """ 88 | return pickle.loads(buf) 89 | 90 | 91 | class ImageFolderLMDB(Dataset): 92 | 93 | def __init__(self, db_path, transform=None): 94 | self.db_path = db_path 95 | self.env = lmdb.open(db_path, 96 | subdir=osp.isdir(db_path), 97 | readonly=True, 98 | lock=False, 99 | readahead=False, 100 | meminit=False) 101 | with self.env.begin(write=False) as txn: 102 | self.length = loads_data(txn.get(b'__len__')) 103 | self.keys = loads_data(txn.get(b'__keys__')) 104 | 105 | self.transform = transform 106 | 107 | def __getitem__(self, index): 108 | env = self.env 109 | with env.begin(write=False) as txn: 110 | byteflow = txn.get(self.keys[index]) 111 | 112 | unpacked = loads_data(byteflow) 113 | 114 | # load img 115 | imgbuf = unpacked 116 | buf = six.BytesIO() 117 | buf.write(imgbuf) 118 | buf.seek(0) 119 | img = Image.open(buf).convert('RGB') 120 | 121 | if self.transform is not None: 122 | img = self.transform(img) 123 | 124 | return img 125 | 126 | def __len__(self): 127 | return self.length 128 | 129 | def __repr__(self): 130 | return self.__class__.__name__ + ' (' + self.db_path + ')' 131 | -------------------------------------------------------------------------------- /dldemos/nms/bboxes.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SingleZombie/DL-Demos/739a21ff90f411c318e098823581afb3f8a1d010/dldemos/nms/bboxes.pt -------------------------------------------------------------------------------- /dldemos/nms/iou.py: -------------------------------------------------------------------------------- 1 | from typing import Tuple 2 | 3 | from PIL import Image, ImageDraw, ImageFont 4 | 5 | 6 | def box_intersection( 7 | b1: Tuple[int, int, int, int], 8 | b2: Tuple[int, int, int, int]) -> Tuple[int, int, int, int]: 9 | x11, y11, x12, y12 = b1 10 | x21, y21, x22, y22 = b2 11 | 12 | xl = max(x11, x21) 13 | xr = min(x12, x22) 14 | yt = max(y11, y21) 15 | yb = min(y12, y22) 16 | return (xl, yt, xr, yb) 17 | 18 | 19 | def area(box: Tuple[int, int, int, int]) -> float: 20 | x1, y1, x2, y2 = box 21 | width = max(x2 - x1, 0) 22 | height = max(y2 - y1, 0) 23 | return width * height 24 | 25 | 26 | def iou(b1: Tuple[int, int, int, int], b2: Tuple[int, int, int, int]) -> float: 27 | intersection = box_intersection(b1, b2) 28 | inter_area = area(intersection) 29 | union_area = area(b1) + area(b2) - inter_area 30 | return inter_area / union_area 31 | 32 | 33 | def main(): 34 | img0 = Image.new('RGB', (400, 200), 'white') 35 | imgs = [] 36 | durations = [200] 37 | img = img0.copy() 38 | image_darw = ImageDraw.Draw(img) 39 | bbox1 = (70, 70, 160, 150) 40 | bbox2 = (40, 60, 140, 130) 41 | text_x = 170 42 | text_y = 30 43 | font = ImageFont.truetype( 44 | 'times.ttf', 45 | 16, 46 | ) 47 | 48 | def draw_line_of_text(text: str): 49 | nonlocal text_y, image_darw 50 | tw, th = font.getsize(text) 51 | image_darw.text((text_x, text_y), text, 'black') 52 | text_y += th 53 | 54 | image_darw.rectangle(bbox1, outline='orange', width=2) 55 | imgs.append(img.copy()) 56 | durations.append(500) 57 | image_darw.rectangle(bbox2, outline='purple', width=2) 58 | imgs.append(img.copy()) 59 | durations.append(500) 60 | 61 | image_darw.rectangle(bbox1, outline='orange', fill='orange', width=2) 62 | draw_line_of_text(f'a1 = {area(bbox1)}') 63 | imgs.append(img.copy()) 64 | durations.append(800) 65 | 66 | image_darw.rectangle(bbox2, outline='purple', fill='purple', width=2) 67 | draw_line_of_text(f'a2 = {area(bbox2)}') 68 | imgs.append(img.copy()) 69 | durations.append(800) 70 | 71 | ibox = box_intersection(bbox1, bbox2) 72 | image_darw.rectangle(ibox, outline='red', fill='red', width=2) 73 | draw_line_of_text(f'i = {area(ibox)}') 74 | imgs.append(img.copy()) 75 | durations.append(1000) 76 | 77 | image_darw.rectangle(bbox1, outline='green', fill='green', width=2) 78 | image_darw.rectangle(bbox2, outline='green', fill='green', width=2) 79 | draw_line_of_text( 80 | f'u = a1 + a2 - i = {area(bbox1) + area(bbox2) - area(ibox)}') 81 | imgs.append(img.copy()) 82 | durations.append(1500) 83 | 84 | image_darw.rectangle(ibox, outline='red', fill='red', width=2) 85 | draw_line_of_text(f'iou = i / o = {iou(bbox1, bbox2)}') 86 | imgs.append(img.copy()) 87 | durations.append(2000) 88 | 89 | img0.save('work_dirs/NMS/1.gif', 90 | save_all=True, 91 | append_images=imgs, 92 | duration=durations, 93 | loop=0) 94 | 95 | 96 | if __name__ == '__main__': 97 | main() 98 | -------------------------------------------------------------------------------- /dldemos/nms/show_bbox.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, Tuple 2 | 3 | from PIL import Image, ImageDraw, ImageFont 4 | 5 | 6 | def draw_bbox(img: Image.Image, 7 | bbox: Tuple[float, float, float, float], 8 | prob: float, 9 | rect_color: Tuple[int, int, int] = (255, 0, 0), 10 | text: Optional[str] = None, 11 | better_font: Optional[str] = None): 12 | img_draw = ImageDraw.Draw(img, 'RGBA') 13 | x1, y1, x2, y2 = bbox 14 | if better_font is not None: 15 | font = ImageFont.truetype( 16 | better_font, 17 | 12, 18 | ) 19 | else: 20 | font = ImageFont.load_default() 21 | 22 | img_draw.rectangle((x1 - 2, y1 - 2, x2 + 2, y2 + 2), 23 | outline=rect_color, 24 | width=2) 25 | 26 | # Show class label on the top right corner 27 | if text is not None: 28 | tw, th = font.getsize(text) 29 | img_draw.rectangle((x2 - tw, y1, x2, y1 + th), fill='black') 30 | img_draw.text((x2 - tw, y1), text, font=font, anchor='rt') 31 | 32 | # Show probablity of top left corner 33 | tw, th = font.getsize(f'{prob:.2f}') 34 | img_draw.rectangle((x1, y1, x1 + tw, y1 + th), fill='black') 35 | img_draw.text((x1, y1), f'{prob:.2f}', font=font) 36 | 37 | 38 | def main(): 39 | img = Image.open('work_dirs/detection_demo.jpg') 40 | draw_bbox(img, (191, 105, 294, 157), 0.95) 41 | draw_bbox(img, (168, 111, 280, 150), 0.8) 42 | draw_bbox(img, (218, 113, 284, 159), 0.7) 43 | draw_bbox(img, (193, 140, 231, 153), 0.3) 44 | 45 | draw_bbox(img, (323, 112, 380, 145), 0.7) 46 | draw_bbox(img, (305, 107, 364, 134), 0.8) 47 | draw_bbox(img, (294, 114, 376, 151), 0.9) 48 | draw_bbox(img, (319, 138, 358, 155), 0.3) 49 | img.save('work_dirs/NMS/1.jpg') 50 | 51 | 52 | if __name__ == '__main__': 53 | main() 54 | -------------------------------------------------------------------------------- /dldemos/pixelcnn/dataset.py: -------------------------------------------------------------------------------- 1 | import torchvision 2 | from torch.utils.data import DataLoader 3 | from torchvision.transforms import ToTensor 4 | 5 | 6 | def download_dataset(): 7 | mnist = torchvision.datasets.MNIST(root='./data/mnist', download=True) 8 | print('length of MNIST', len(mnist)) 9 | id = 4 10 | img, label = mnist[id] 11 | print(img) 12 | print(label) 13 | 14 | # On computer with monitor 15 | # img.show() 16 | 17 | img.save('work_dirs/tmp.jpg') 18 | tensor = ToTensor()(img) 19 | print(tensor.shape) 20 | print(tensor.max()) 21 | print(tensor.min()) 22 | 23 | 24 | def get_dataloader(batch_size: int): 25 | dataset = torchvision.datasets.MNIST(root='./data/mnist', 26 | transform=ToTensor()) 27 | return DataLoader(dataset, batch_size=batch_size, shuffle=True) 28 | 29 | 30 | def get_img_shape(): 31 | return (1, 28, 28) 32 | 33 | 34 | if __name__ == '__main__': 35 | import os 36 | os.makedirs('work_dirs', exist_ok=True) 37 | download_dataset() 38 | -------------------------------------------------------------------------------- /dldemos/pixelcnn/main.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | 4 | import cv2 5 | import einops 6 | import numpy as np 7 | import torch 8 | import torch.nn as nn 9 | import torch.nn.functional as F 10 | 11 | from dldemos.pixelcnn.dataset import get_dataloader, get_img_shape 12 | from dldemos.pixelcnn.model import GatedPixelCNN, PixelCNN 13 | 14 | batch_size = 128 15 | # You can set color_level to any value between 2 and 256 16 | color_level = 8 17 | 18 | 19 | def train(model, device, model_path): 20 | dataloader = get_dataloader(batch_size) 21 | model = model.to(device) 22 | optimizer = torch.optim.Adam(model.parameters(), 1e-3) 23 | loss_fn = nn.CrossEntropyLoss() 24 | n_epochs = 40 25 | tic = time.time() 26 | for e in range(n_epochs): 27 | total_loss = 0 28 | for x, _ in dataloader: 29 | current_batch_size = x.shape[0] 30 | x = x.to(device) 31 | y = torch.ceil(x * (color_level - 1)).long() 32 | y = y.squeeze(1) 33 | predict_y = model(x) 34 | loss = loss_fn(predict_y, y) 35 | optimizer.zero_grad() 36 | loss.backward() 37 | optimizer.step() 38 | total_loss += loss.item() * current_batch_size 39 | total_loss /= len(dataloader.dataset) 40 | toc = time.time() 41 | torch.save(model.state_dict(), model_path) 42 | print(f'epoch {e} loss: {total_loss} elapsed {(toc - tic):.2f}s') 43 | print('Done') 44 | 45 | 46 | def sample(model, device, model_path, output_path, n_sample=81): 47 | 48 | model.eval() 49 | model.load_state_dict(torch.load(model_path)) 50 | model = model.to(device) 51 | C, H, W = get_img_shape() # (1, 28, 28) 52 | x = torch.zeros((n_sample, C, H, W)).to(device) 53 | with torch.no_grad(): 54 | for i in range(H): 55 | for j in range(W): 56 | output = model(x) 57 | prob_dist = F.softmax(output[:, :, i, j], -1) 58 | pixel = torch.multinomial(prob_dist, 59 | 1).float() / (color_level - 1) 60 | x[:, :, i, j] = pixel 61 | 62 | imgs = x * 255 63 | imgs = imgs.clamp(0, 255) 64 | imgs = einops.rearrange(imgs, 65 | '(b1 b2) c h w -> (b1 h) (b2 w) c', 66 | b1=int(n_sample**0.5)) 67 | 68 | imgs = imgs.detach().cpu().numpy().astype(np.uint8) 69 | 70 | cv2.imwrite(output_path, imgs) 71 | 72 | 73 | models = [ 74 | PixelCNN(15, 128, 32, True, color_level), 75 | GatedPixelCNN(15, 128, 32, True, color_level) 76 | ] 77 | 78 | if __name__ == '__main__': 79 | os.makedirs('work_dirs', exist_ok=True) 80 | model_id = 1 81 | model = models[model_id] 82 | device = 'cuda' 83 | model_path = f'dldemos/pixelcnn/model_{model_id}_{color_level}.pth' 84 | train(model, device, model_path) 85 | sample(model, device, model_path, 86 | f'work_dirs/pixelcnn_{model_id}_{color_level}.jpg') 87 | -------------------------------------------------------------------------------- /dldemos/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .function import (get_activation_de_func, get_activation_func, relu, 2 | relu_de, sigmoid) 3 | 4 | __all__ = [ 5 | 'sigmoid', 'relu', 'relu_de', 'get_activation_de_func', 6 | 'get_activation_func' 7 | ] 8 | -------------------------------------------------------------------------------- /dldemos/utils/function.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def sigmoid(x): 5 | return 1 / (1 + np.exp(-x)) 6 | 7 | 8 | def sigmoid_de(x): 9 | tmp = sigmoid(x) 10 | return tmp * (1 - tmp) 11 | 12 | 13 | def relu(x): 14 | return np.maximum(x, 0) 15 | 16 | 17 | def relu_de(x): 18 | return np.where(x > 0, 1, 0) 19 | 20 | 21 | def get_activation_func(name): 22 | if name == 'sigmoid': 23 | return sigmoid 24 | elif name == 'relu': 25 | return relu 26 | else: 27 | raise KeyError(f'No such activavtion function {name}') 28 | 29 | 30 | def get_activation_de_func(name): 31 | if name == 'sigmoid': 32 | return sigmoid_de 33 | elif name == 'relu': 34 | return relu_de 35 | else: 36 | raise KeyError(f'No such activavtion function {name}') 37 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | einops 2 | lmdb 3 | six 4 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import find_packages, setup 2 | 3 | if __name__ == '__main__': 4 | setup( 5 | name='dldemos', 6 | version=0.1, 7 | packages=find_packages(), 8 | ) 9 | --------------------------------------------------------------------------------