├── .gitignore
├── CycleGANColab
└── CycleGANColab.ipynb
├── CycleGANSolution
├── README.md
├── a4-code-v2-updated
│ ├── README.md
│ ├── cycle_gan.py
│ ├── data_loader.py
│ ├── model_checker.py
│ ├── models.py
│ ├── utils.py
│ └── vanilla_gan.py
└── a4-handout.pdf
├── GAN
├── README.md
├── Wiley's
│ ├── models.py
│ └── vanilla_gan.py
└── vanilla_gan.py
├── LICENSE
├── LoRA
├── lora_hello_world.ipynb
├── lora_hello_world2.ipynb
└── lora_hello_world3.ipynb
├── MachineTranslation
├── README.md
├── ReferenceExample.ipynb
├── seq2seq_translation_tutorial.ipynb
├── torchtext_translation_tutorial.ipynb
└── torchtext_translation_tutorial_with_transformers.ipynb
├── NeuralArchitectureSearch
├── Autokeras.ipynb
└── NeuralArchitectureSearch.ipynb
├── ProphetCode
└── main.py
├── Quantization
└── Quantization.ipynb
├── README.md
├── RL
└── RL.ipynb
├── RL_from_human_feedback
└── RL_from_human_feedback.ipynb
├── ReinforcmentLearning
└── simple_example.py
├── SiameseNetwork
├── siamese_network.ipynb
├── siamese_original_network.ipynb
└── twin_network.ipynb
├── SinGAN
├── CatGAN.ipynb
├── DoubleGAN.ipynb
├── SinGAN.ipynb
├── SinGANOfficialImplementation.ipynb
└── SinGAN_on_custom_image.ipynb
├── TabularXGBoost
└── TabularDataXGBoostTutorial.ipynb
├── Transformers
├── README.md
├── Transformers.ipynb
├── requirements.txt
└── translation
│ └── train.py
├── TwinNetwork
└── twin_network.ipynb
├── VisionTransformers
├── VisionTransformers.ipynb
├── VisionTransformers_cleaned_up_code_2021-08-24.ipynb
├── VisionTransformers_with_PyTorch_Transformers.ipynb
└── VisionTransformers_with_PyTorch_Transformers_with_BatchNorm.ipynb
├── handwriting-synthesis
├── .gitignore
├── .travis.yml
├── checkpoints
│ ├── checkpoint
│ ├── model-17900.data-00000-of-00001
│ ├── model-17900.index
│ └── model-17900.meta
├── data
│ ├── blacklist.npy
│ ├── processed
│ │ ├── .gitattributes
│ │ ├── c.npy
│ │ ├── c_len.npy
│ │ ├── w_id.npy
│ │ ├── x.npy
│ │ └── x_len.npy
│ └── raw
│ │ └── readme.md
├── data_frame.py
├── demo.py
├── drawing.py
├── img
│ ├── all_star.svg
│ ├── banner.svg
│ ├── downtown.svg
│ ├── give_up.svg
│ └── usage_demo.svg
├── lyrics.py
├── prepare_data.py
├── readme.md
├── requirements.txt
├── rnn.py
├── rnn_cell.py
├── rnn_ops.py
├── styles
│ ├── style-0-chars.npy
│ ├── style-0-strokes.npy
│ ├── style-1-chars.npy
│ ├── style-1-strokes.npy
│ ├── style-1.npy
│ ├── style-10-chars.npy
│ ├── style-10-strokes.npy
│ ├── style-11-chars.npy
│ ├── style-11-strokes.npy
│ ├── style-12-chars.npy
│ ├── style-12-strokes.npy
│ ├── style-2-chars.npy
│ ├── style-2-strokes.npy
│ ├── style-2.npy
│ ├── style-3-chars.npy
│ ├── style-3-strokes.npy
│ ├── style-4-chars.npy
│ ├── style-4-strokes.npy
│ ├── style-5-chars.npy
│ ├── style-5-strokes.npy
│ ├── style-6-chars.npy
│ ├── style-6-strokes.npy
│ ├── style-7-chars.npy
│ ├── style-7-strokes.npy
│ ├── style-8-chars.npy
│ ├── style-8-strokes.npy
│ ├── style-9-chars.npy
│ └── style-9-strokes.npy
├── test_example.py
├── tf_base_model.py
├── tf_utils.py
└── upgrade_tf2.sh
├── handwriting_generator
├── IBM.csv
├── IBM_Transformer+TimeEmbedding.ipynb
├── handwriting_generator.ipynb
└── saved.tgz
├── minGPT
├── .gitignore
├── LICENSE
├── README.md
├── mingpt.jpg
├── mingpt
│ ├── __init__.py
│ ├── model.py
│ ├── trainer.py
│ └── utils.py
├── play_char.ipynb
├── play_image.ipynb
└── play_math.ipynb
└── sound
├── preprocess
├── README.md
├── mp3_to_wav.py
├── to_16000_wav.py
└── trim.py
├── project-keyword-spotter
├── .DS_Store
├── CONTRIBUTING.md
├── Icon
├── LICENSE
├── README.md
├── audio_recorder.py
├── config
│ ├── Icon
│ ├── commands_v2.txt
│ ├── commands_v2_snake.txt
│ ├── labels_gc2.raw.txt
│ └── labels_simple_audio.txt
├── features.py
├── hearing_snake_metadata.json
├── install_requirements.sh
├── media
│ ├── Icon
│ └── startscreen.png
├── mel_features.py
├── model.py
├── model_yamnet.py
├── models
│ ├── Icon
│ ├── model-backup1.tflite
│ ├── model.tflite
│ ├── model_quantized_edgetpu.tflite
│ ├── voice_commands_v0.7_edgetpu.tflite
│ └── voice_commands_v0.8_edgetpu.tflite
├── params.py
├── pygame_images
│ ├── Icon
│ ├── apple.png
│ ├── bg.jpg
│ ├── snake_head_with_ears.png
│ └── snake_tail.png
├── run_hearing_snake.py
├── run_model.py
├── run_model_yamnet.py
├── run_snake.sh
├── run_yt_voice_control.py
├── run_yt_voice_control.sh
├── yamnet.py
└── yamnet_class_map.csv
├── simple_audio.ipynb
├── simple_audio_custom_cough_dataset_compiled.ipynb
├── simple_audio_load_vggish.ipynb
├── simple_audio_load_vggish_with_layer.ipynb
├── simple_audio_load_yamnet.ipynb
├── simple_audio_new_spectrogram.ipynb
├── simple_audio_new_spectrogram_custom_cough_dataset.ipynb
├── simple_audio_new_spectrogram_custom_cough_dataset_quantize.ipynb
├── simple_audio_new_spectrogram_custom_dataset.ipynb
├── simple_audio_new_spectrogram_numpy.ipynb
├── simple_audio_new_spectrogram_numpy_and_normalize.ipynb
├── simple_audio_new_spectrogram_numpy_and_normalize_only_left_right_working.ipynb
├── simple_audio_working_vggish.ipynb
├── simple_audio_working_vggish_clean.ipynb
├── simple_audio_working_vggish_clean_freeze_vggish_weights.ipynb
├── simple_audio_working_vggish_dataset.ipynb
└── sound.ipynb
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 | MANIFEST
27 |
28 | # PyInstaller
29 | # Usually these files are written by a python script from a template
30 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
31 | *.manifest
32 | *.spec
33 |
34 | # Installer logs
35 | pip-log.txt
36 | pip-delete-this-directory.txt
37 |
38 | # Unit test / coverage reports
39 | htmlcov/
40 | .tox/
41 | .coverage
42 | .coverage.*
43 | .cache
44 | nosetests.xml
45 | coverage.xml
46 | *.cover
47 | .hypothesis/
48 | .pytest_cache/
49 |
50 | # Translations
51 | *.mo
52 | *.pot
53 |
54 | # Django stuff:
55 | *.log
56 | local_settings.py
57 | db.sqlite3
58 |
59 | # Flask stuff:
60 | instance/
61 | .webassets-cache
62 |
63 | # Scrapy stuff:
64 | .scrapy
65 |
66 | # Sphinx documentation
67 | docs/_build/
68 |
69 | # PyBuilder
70 | target/
71 |
72 | # Jupyter Notebook
73 | .ipynb_checkpoints
74 |
75 | # pyenv
76 | .python-version
77 |
78 | # celery beat schedule file
79 | celerybeat-schedule
80 |
81 | # SageMath parsed files
82 | *.sage.py
83 |
84 | # Environments
85 | .env
86 | .venv
87 | env/
88 | venv/
89 | ENV/
90 | env.bak/
91 | venv.bak/
92 |
93 | # Spyder project settings
94 | .spyderproject
95 | .spyproject
96 |
97 | # Rope project settings
98 | .ropeproject
99 |
100 | # mkdocs documentation
101 | /site
102 |
103 | # mypy
104 | .mypy_cache/
105 |
--------------------------------------------------------------------------------
/CycleGANColab/CycleGANColab.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "name": "CycleGAN.ipynb",
7 | "provenance": [],
8 | "include_colab_link": true
9 | },
10 | "kernelspec": {
11 | "name": "python3",
12 | "display_name": "Python 3"
13 | },
14 | "accelerator": "GPU"
15 | },
16 | "cells": [
17 | {
18 | "cell_type": "markdown",
19 | "metadata": {
20 | "id": "view-in-github",
21 | "colab_type": "text"
22 | },
23 | "source": [
24 | "
"
25 | ]
26 | },
27 | {
28 | "cell_type": "markdown",
29 | "metadata": {
30 | "id": "fTGhQfaYH_QV",
31 | "colab_type": "text"
32 | },
33 | "source": [
34 | "# CycleGAN\n",
35 | "\n",
36 | "This notebook makes the CycleGAN homework assignment runnable on Google Colab (free GPU), so you don't need a physical GPU to run this assignment.\n",
37 | "\n",
38 | "Code available on https://github.com/wileyw/DeepLearningDemos.git\n",
39 | "\n",
40 | "Homework Assignment: https://www.cs.toronto.edu/~rgrosse/courses/csc321_2018/assignments/a4-handout.pdf"
41 | ]
42 | },
43 | {
44 | "cell_type": "code",
45 | "metadata": {
46 | "id": "g-vpRE5yJebK",
47 | "colab_type": "code",
48 | "colab": {}
49 | },
50 | "source": [
51 | "!git clone https://github.com/wileyw/DeepLearningDemos.git"
52 | ],
53 | "execution_count": 0,
54 | "outputs": []
55 | },
56 | {
57 | "cell_type": "code",
58 | "metadata": {
59 | "id": "haTvhcMrH8ke",
60 | "colab_type": "code",
61 | "colab": {}
62 | },
63 | "source": [
64 | "!wget http://www.cs.toronto.edu/~rgrosse/courses/csc321_2018/assignments/a4-code.zip \n",
65 | "!unzip -q a4-code.zip"
66 | ],
67 | "execution_count": 0,
68 | "outputs": []
69 | },
70 | {
71 | "cell_type": "code",
72 | "metadata": {
73 | "id": "ks4AwPQYN-bo",
74 | "colab_type": "code",
75 | "colab": {}
76 | },
77 | "source": [
78 | "!ls\n",
79 | "!mv a4-code-v2-updated/emojis .\n",
80 | "!mv a4-code-v2-updated/checker_files ."
81 | ],
82 | "execution_count": 0,
83 | "outputs": []
84 | },
85 | {
86 | "cell_type": "code",
87 | "metadata": {
88 | "id": "5NyF3QrVKNqr",
89 | "colab_type": "code",
90 | "colab": {}
91 | },
92 | "source": [
93 | "!python3 DeepLearningDemos/CycleGANSolution/a4-code-v2-updated/model_checker.py"
94 | ],
95 | "execution_count": 0,
96 | "outputs": []
97 | },
98 | {
99 | "cell_type": "code",
100 | "metadata": {
101 | "id": "9EHeytGb46j0",
102 | "colab_type": "code",
103 | "colab": {}
104 | },
105 | "source": [
106 | "import sys\n",
107 | "sys.path.append('DeepLearningDemos/CycleGANSolution/a4-code-v2-updated')\n",
108 | "import cycle_gan\n",
109 | "from cycle_gan import *\n",
110 | "\n",
111 | "sys.argv[:] = ['cycle_gan.py']\n",
112 | "parser = create_parser()\n",
113 | "opts = parser.parse_args()\n",
114 | "\n",
115 | "opts.use_cycle_consistency_loss = True\n",
116 | "\n",
117 | "batch_size = opts.batch_size\n",
118 | "cycle_gan.batch_size = batch_size\n",
119 | "\n",
120 | "print(opts)\n",
121 | "main(opts)"
122 | ],
123 | "execution_count": 0,
124 | "outputs": []
125 | },
126 | {
127 | "cell_type": "code",
128 | "metadata": {
129 | "id": "ac_qDfPs5S_g",
130 | "colab_type": "code",
131 | "colab": {}
132 | },
133 | "source": [
134 | "from IPython.display import Image\n",
135 | "import matplotlib.pyplot as plt\n",
136 | "import glob\n",
137 | "images = sorted(glob.glob('./samples_cyclegan/*X-Y.png'))\n",
138 | "Image(images[-1])"
139 | ],
140 | "execution_count": 0,
141 | "outputs": []
142 | },
143 | {
144 | "cell_type": "code",
145 | "metadata": {
146 | "id": "uVnVGQo66bYF",
147 | "colab_type": "code",
148 | "colab": {}
149 | },
150 | "source": [
151 | "from IPython.display import Image\n",
152 | "import matplotlib.pyplot as plt\n",
153 | "import glob\n",
154 | "images = sorted(glob.glob('./samples_cyclegan/*Y-X.png'))\n",
155 | "Image(images[-1])"
156 | ],
157 | "execution_count": 0,
158 | "outputs": []
159 | },
160 | {
161 | "cell_type": "code",
162 | "metadata": {
163 | "id": "ZxN74FK-wdYV",
164 | "colab_type": "code",
165 | "colab": {}
166 | },
167 | "source": [
168 | "import sys\n",
169 | "sys.path.append('DeepLearningDemos/CycleGANSolution/a4-code-v2-updated')\n",
170 | "import vanilla_gan\n",
171 | "from vanilla_gan import *\n",
172 | "\n",
173 | "# Run Vanilla GAN\n",
174 | "sys.argv[:] = ['vanilla_gan.py']\n",
175 | "parser = create_parser()\n",
176 | "opts = parser.parse_args()\n",
177 | "\n",
178 | "batch_size = opts.batch_size\n",
179 | "vanilla_gan.batch_size = batch_size\n",
180 | "\n",
181 | "print(opts)\n",
182 | "main(opts)"
183 | ],
184 | "execution_count": 0,
185 | "outputs": []
186 | },
187 | {
188 | "cell_type": "code",
189 | "metadata": {
190 | "id": "NTCwanSr1S_D",
191 | "colab_type": "code",
192 | "colab": {}
193 | },
194 | "source": [
195 | "# View images\n",
196 | "from IPython.display import Image\n",
197 | "import matplotlib.pyplot as plt\n",
198 | "import glob\n",
199 | "images = sorted(glob.glob('./samples_vanilla/*.png'))\n",
200 | "Image(images[-1])\n"
201 | ],
202 | "execution_count": 0,
203 | "outputs": []
204 | }
205 | ]
206 | }
--------------------------------------------------------------------------------
/CycleGANSolution/README.md:
--------------------------------------------------------------------------------
1 | # Cycle GAN and Vanilla GAN
2 | Homework Assignment:
3 | https://www.cs.toronto.edu/~rgrosse/courses/csc321_2018/assignments/a4-handout.pdf
4 |
5 | ## Download Dataset:
6 | This .zip file contains the starting code as well. When we unzip this file, we should not overwrite the existing .py files.
7 | ```
8 | wget http://www.cs.toronto.edu/~rgrosse/courses/csc321_2018/assignments/a4-code.zip
9 |
10 | # When prompted to overwrite, select [N]one
11 | unzip a4-code.zip
12 | ```
13 |
14 | ## Original Course Website:
15 | http://www.cs.toronto.edu/~rgrosse/courses/csc421_2019/
16 |
--------------------------------------------------------------------------------
/CycleGANSolution/a4-code-v2-updated/README.md:
--------------------------------------------------------------------------------
1 | Vanilla GAN
2 | ## 1. Run the model checker
3 | ```
4 | python3 model_checker.py
5 | ```
6 |
7 | ## 2. Run vanilla GAN
8 | ```
9 | python3 vanilla_gan.py
10 | ```
11 |
12 | ## 3. Run Cycle GAN
13 | ```
14 | python3 cycle_gan.py --use_cycle_consistency_loss
15 | ```
16 |
--------------------------------------------------------------------------------
/CycleGANSolution/a4-code-v2-updated/data_loader.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import torch
4 | from torch.utils.data import DataLoader
5 |
6 | from torchvision import datasets
7 | from torchvision import transforms
8 |
9 |
10 | def get_emoji_loader(emoji_type, opts):
11 | """Creates training and test data loaders.
12 | """
13 | transform = transforms.Compose([
14 | transforms.Scale(opts.image_size),
15 | transforms.ToTensor(),
16 | transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
17 | ])
18 |
19 | train_path = os.path.join('./emojis', emoji_type)
20 | test_path = os.path.join('./emojis', 'Test_{}'.format(emoji_type))
21 |
22 | train_dataset = datasets.ImageFolder(train_path, transform)
23 | test_dataset = datasets.ImageFolder(test_path, transform)
24 |
25 | train_dloader = DataLoader(dataset=train_dataset, batch_size=opts.batch_size, shuffle=True, num_workers=opts.num_workers)
26 | test_dloader = DataLoader(dataset=test_dataset, batch_size=opts.batch_size, shuffle=False, num_workers=opts.num_workers)
27 |
28 | return train_dloader, test_dloader
29 |
--------------------------------------------------------------------------------
/CycleGANSolution/a4-code-v2-updated/model_checker.py:
--------------------------------------------------------------------------------
1 | # CSC 321, Assignment 4
2 | #
3 | # This is a script to check whether the outputs of your CycleGenerator, DCDiscriminator, and
4 | # CycleGenerator models produce the expected outputs.
5 | #
6 | # NOTE THAT THIS MODEL CHECKER IS PROVIDED FOR CONVENIENCE ONLY, AND MAY PRODUCE FALSE NEGATIVES.
7 | # DO NOT USE THIS AS THE ONLY WAY TO CHECK THAT YOUR MODEL IS CORRECT.
8 | #
9 | # Usage:
10 | # ======
11 | #
12 | # python model_checker.py
13 | #
14 |
15 | import warnings
16 | warnings.filterwarnings("ignore")
17 |
18 | # Torch imports
19 | import torch
20 | from torch.autograd import Variable
21 |
22 | # Numpy
23 | import numpy as np
24 |
25 | # Local imports
26 | from models import DCGenerator, DCDiscriminator, CycleGenerator
27 |
28 |
29 | def count_parameters(model):
30 | """Finds the total number of trainable parameters in a model.
31 | """
32 | return sum(p.numel() for p in model.parameters() if p.requires_grad)
33 |
34 |
35 | def sample_noise(dim):
36 | """
37 | Generate a PyTorch Tensor of uniform random noise.
38 |
39 | Input:
40 | - batch_size: Integer giving the batch size of noise to generate.
41 | - dim: Integer giving the dimension of noise to generate.
42 |
43 | Output:
44 | - A PyTorch Tensor of shape (1, dim, 1, 1) containing uniform
45 | random noise in the range (-1, 1).
46 | """
47 | return Variable(torch.rand(1, dim) * 2 - 1).unsqueeze(2).unsqueeze(3)
48 |
49 |
50 | def check_dc_generator():
51 | """Checks the output and number of parameters of the DCGenerator class.
52 | """
53 | state = torch.load('checker_files/dc_generator.pt')
54 |
55 | G = DCGenerator(noise_size=100, conv_dim=32)
56 | G.load_state_dict(state['state_dict'])
57 | noise = state['input']
58 | dc_generator_expected = state['output']
59 |
60 | output = G(noise)
61 | output_np = output.data.cpu().numpy()
62 |
63 | if np.allclose(output_np, dc_generator_expected, atol=1e-06):
64 | print('DCGenerator output: EQUAL')
65 | else:
66 | print('DCGenerator output: NOT EQUAL')
67 |
68 | num_params = count_parameters(G)
69 | expected_params = 370624
70 |
71 | print('DCGenerator #params = {}, expected #params = {}, {}'.format(
72 | num_params, expected_params, 'EQUAL' if num_params == expected_params else 'NOT EQUAL'))
73 |
74 | print('-' * 80)
75 |
76 |
77 | def check_dc_discriminator():
78 | """Checks the output and number of parameters of the DCDiscriminator class.
79 | """
80 | state = torch.load('checker_files/dc_discriminator.pt')
81 |
82 | D = DCDiscriminator(conv_dim=32)
83 | D.load_state_dict(state['state_dict'])
84 | images = state['input']
85 | dc_discriminator_expected = state['output']
86 |
87 | output = D(images)
88 | output_np = output.data.cpu().numpy()
89 |
90 | if np.allclose(output_np, dc_discriminator_expected, atol=1e-06):
91 | print('DCDiscriminator output: EQUAL')
92 | else:
93 | print('DCDiscriminator output: NOT EQUAL')
94 |
95 | num_params = count_parameters(D)
96 | expected_params = 167872
97 |
98 | print('DCDiscriminator #params = {}, expected #params = {}, {}'.format(
99 | num_params, expected_params, 'EQUAL' if num_params == expected_params else 'NOT EQUAL'))
100 |
101 | print('-' * 80)
102 |
103 |
104 | def check_cycle_generator():
105 | """Checks the output and number of parameters of the CycleGenerator class.
106 | """
107 | state = torch.load('checker_files/cycle_generator.pt')
108 |
109 | G_XtoY = CycleGenerator(conv_dim=32, init_zero_weights=False)
110 | G_XtoY.load_state_dict(state['state_dict'])
111 | images = state['input']
112 | cycle_generator_expected = state['output']
113 |
114 | output = G_XtoY(images)
115 | output_np = output.data.cpu().numpy()
116 |
117 | if np.allclose(output_np, cycle_generator_expected, atol=1e-06):
118 | print('CycleGenerator output: EQUAL')
119 | else:
120 | print('CycleGenerator output: NOT EQUAL')
121 |
122 | num_params = count_parameters(G_XtoY)
123 | expected_params = 105856
124 |
125 | print('CycleGenerator #params = {}, expected #params = {}, {}'.format(
126 | num_params, expected_params, 'EQUAL' if num_params == expected_params else 'NOT EQUAL'))
127 |
128 | print('-' * 80)
129 |
130 |
131 | if __name__ == '__main__':
132 |
133 | try:
134 | check_dc_generator()
135 | except:
136 | print('Crashed while checking DCGenerator. Maybe not implemented yet?')
137 |
138 | try:
139 | check_dc_discriminator()
140 | except:
141 | print('Crashed while checking DCDiscriminator. Maybe not implemented yet?')
142 |
143 | #try:
144 | check_cycle_generator()
145 | #except:
146 | # print('Crashed while checking CycleGenerator. Maybe not implemented yet?')
147 |
--------------------------------------------------------------------------------
/CycleGANSolution/a4-code-v2-updated/models.py:
--------------------------------------------------------------------------------
1 | # CSC 321, Assignment 4
2 | #
3 | # This file contains the models used for both parts of the assignment:
4 | #
5 | # - DCGenerator --> Used in the vanilla GAN in Part 1
6 | # - CycleGenerator --> Used in the CycleGAN in Part 2
7 | # - DCDiscriminator --> Used in both the vanilla GAN and CycleGAN (Parts 1 and 2)
8 | #
9 | # For the assignment, you are asked to create the architectures of these three networks by
10 | # filling in the __init__ methods in the DCGenerator, CycleGenerator, and DCDiscriminator classes.
11 | # Note that the forward passes of these models are provided for you, so the only part you need to
12 | # fill in is __init__.
13 |
14 | import pdb
15 | import torch
16 | import torch.nn as nn
17 | import torch.nn.functional as F
18 |
19 |
20 | def deconv(in_channels, out_channels, kernel_size, stride=2, padding=1, batch_norm=True):
21 | """Creates a transposed-convolutional layer, with optional batch normalization.
22 | """
23 | layers = []
24 | layers.append(nn.ConvTranspose2d(in_channels, out_channels, kernel_size, stride, padding, bias=False))
25 | if batch_norm:
26 | layers.append(nn.BatchNorm2d(out_channels))
27 | return nn.Sequential(*layers)
28 |
29 |
30 | def conv(in_channels, out_channels, kernel_size, stride=2, padding=1, batch_norm=True, init_zero_weights=False):
31 | """Creates a convolutional layer, with optional batch normalization.
32 | """
33 | layers = []
34 | conv_layer = nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, padding=padding, bias=False)
35 | if init_zero_weights:
36 | conv_layer.weight.data = torch.randn(out_channels, in_channels, kernel_size, kernel_size) * 0.001
37 | layers.append(conv_layer)
38 |
39 | if batch_norm:
40 | layers.append(nn.BatchNorm2d(out_channels))
41 | return nn.Sequential(*layers)
42 |
43 |
44 | class DCGenerator(nn.Module):
45 | def __init__(self, noise_size, conv_dim):
46 | super(DCGenerator, self).__init__()
47 |
48 | ###########################################
49 | ## FILL THIS IN: CREATE ARCHITECTURE ##
50 | ###########################################
51 |
52 | self.deconv1 = deconv(noise_size, 128, 4, stride=1, padding=0, batch_norm=True)
53 | self.deconv2 = deconv(128, 64, 4, stride=2, padding=1, batch_norm=True)
54 | self.deconv3 = deconv(64, 32, 4, stride=2, padding=1, batch_norm=True)
55 | self.deconv4 = deconv(32, 3, 4, stride=2, padding=1, batch_norm=False)
56 |
57 | def forward(self, z):
58 | """Generates an image given a sample of random noise.
59 |
60 | Input
61 | -----
62 | z: BS x noise_size x 1 x 1 --> 16x100x1x1
63 |
64 | Output
65 | ------
66 | out: BS x channels x image_width x image_height --> 16x3x32x32
67 | """
68 |
69 | out = F.relu(self.deconv1(z))
70 | out = F.relu(self.deconv2(out))
71 | out = F.relu(self.deconv3(out))
72 | out = F.tanh(self.deconv4(out))
73 | return out
74 |
75 |
76 | class ResnetBlock(nn.Module):
77 | def __init__(self, conv_dim):
78 | super(ResnetBlock, self).__init__()
79 | self.conv_layer = conv(in_channels=conv_dim, out_channels=conv_dim, kernel_size=3, stride=1, padding=1)
80 |
81 | def forward(self, x):
82 | out = x + self.conv_layer(x)
83 | return out
84 |
85 |
86 | class CycleGenerator(nn.Module):
87 | """Defines the architecture of the generator network.
88 | Note: Both generators G_XtoY and G_YtoX have the same architecture in this assignment.
89 | """
90 | def __init__(self, conv_dim=64, init_zero_weights=False):
91 | super(CycleGenerator, self).__init__()
92 |
93 | ###########################################
94 | ## FILL THIS IN: CREATE ARCHITECTURE ##
95 | ###########################################
96 |
97 | # 1. Define the encoder part of the generator (that extracts features from the input image)
98 | self.conv1 = conv(3, 32, 4, stride=2, padding=1, batch_norm=True, init_zero_weights=init_zero_weights)
99 | self.conv2 = conv(32, 64, 4, stride=2, padding=1, batch_norm=True, init_zero_weights=init_zero_weights)
100 |
101 | # 2. Define the transformation part of the generator
102 | self.resnet_block = ResnetBlock(64)
103 |
104 | # 3. Define the decoder part of the generator (that builds up the output image from features)
105 | self.deconv1 = deconv(64, 32, 4, stride=2, padding=1, batch_norm=True)
106 | self.deconv2 = deconv(32, 3, 4, stride=2, padding=1, batch_norm=False)
107 |
108 | def forward(self, x):
109 | """Generates an image conditioned on an input image.
110 |
111 | Input
112 | -----
113 | x: BS x 3 x 32 x 32
114 |
115 | Output
116 | ------
117 | out: BS x 3 x 32 x 32
118 | """
119 |
120 | out = F.relu(self.conv1(x))
121 | out = F.relu(self.conv2(out))
122 |
123 | out = F.relu(self.resnet_block(out))
124 |
125 | out = F.relu(self.deconv1(out))
126 | out = F.tanh(self.deconv2(out))
127 |
128 | return out
129 |
130 |
131 | class DCDiscriminator(nn.Module):
132 | """Defines the architecture of the discriminator network.
133 | Note: Both discriminators D_X and D_Y have the same architecture in this assignment.
134 | """
135 | def __init__(self, conv_dim=64):
136 | super(DCDiscriminator, self).__init__()
137 |
138 | ###########################################
139 | ## FILL THIS IN: CREATE ARCHITECTURE ##
140 | ###########################################
141 |
142 | self.conv1 = conv(3, 32, 4, stride=2, padding=1, batch_norm=True, init_zero_weights=False)
143 | self.conv2 = conv(32, 64, 4, stride=2, padding=1, batch_norm=True, init_zero_weights=False)
144 | self.conv3 = conv(64, 128, 4, stride=2, padding=1, batch_norm=True, init_zero_weights=False)
145 | self.conv4 = conv(128, 1, 4, stride=1, padding=0, batch_norm=False, init_zero_weights=False)
146 |
147 | def forward(self, x):
148 |
149 | out = F.relu(self.conv1(x))
150 | out = F.relu(self.conv2(out))
151 | out = F.relu(self.conv3(out))
152 |
153 | out = self.conv4(out).squeeze()
154 | out = F.sigmoid(out)
155 | return out
156 |
--------------------------------------------------------------------------------
/CycleGANSolution/a4-code-v2-updated/utils.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import torch
4 | from torch.autograd import Variable
5 |
6 |
7 | def to_var(x):
8 | """Converts numpy to variable."""
9 | if torch.cuda.is_available():
10 | x = x.cuda()
11 | return Variable(x)
12 |
13 |
14 | def to_data(x):
15 | """Converts variable to numpy."""
16 | if torch.cuda.is_available():
17 | x = x.cpu()
18 | return x.data.numpy()
19 |
20 |
21 | def create_dir(directory):
22 | """Creates a directory if it does not already exist.
23 | """
24 | if not os.path.exists(directory):
25 | os.makedirs(directory)
26 |
--------------------------------------------------------------------------------
/CycleGANSolution/a4-handout.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/CycleGANSolution/a4-handout.pdf
--------------------------------------------------------------------------------
/GAN/README.md:
--------------------------------------------------------------------------------
1 | # Vanilla GAN
2 | Homework Assignment:
3 | https://www.cs.toronto.edu/~rgrosse/courses/csc321_2018/assignments/a4-handout.pdf
4 |
5 | Download Code:
6 | ```
7 | wget http://www.cs.toronto.edu/~rgrosse/courses/csc321_2018/assignments/a4-code.zip
8 | unzip a4-code.zip
9 | ```
10 |
--------------------------------------------------------------------------------
/GAN/Wiley's/models.py:
--------------------------------------------------------------------------------
1 | # CSC 321, Assignment 4
2 | #
3 | # This file contains the models used for both parts of the assignment:
4 | #
5 | # - DCGenerator --> Used in the vanilla GAN in Part 1
6 | # - CycleGenerator --> Used in the CycleGAN in Part 2
7 | # - DCDiscriminator --> Used in both the vanilla GAN and CycleGAN (Parts 1 and 2)
8 | #
9 | # For the assignment, you are asked to create the architectures of these three networks by
10 | # filling in the __init__ methods in the DCGenerator, CycleGenerator, and DCDiscriminator classes.
11 | # Note that the forward passes of these models are provided for you, so the only part you need to
12 | # fill in is __init__.
13 |
14 | import pdb
15 | import torch
16 | import torch.nn as nn
17 | import torch.nn.functional as F
18 |
19 |
20 | def deconv(
21 | in_channels, out_channels, kernel_size, stride=2, padding=1, batch_norm=True
22 | ):
23 | """Creates a transposed-convolutional layer, with optional batch normalization.
24 | """
25 | layers = []
26 | layers.append(
27 | nn.ConvTranspose2d(
28 | in_channels, out_channels, kernel_size, stride, padding, bias=False
29 | )
30 | )
31 | if batch_norm:
32 | layers.append(nn.BatchNorm2d(out_channels))
33 | return nn.Sequential(*layers)
34 |
35 |
36 | def conv(
37 | in_channels,
38 | out_channels,
39 | kernel_size,
40 | stride=2,
41 | padding=1,
42 | batch_norm=True,
43 | init_zero_weights=False,
44 | ):
45 | """Creates a convolutional layer, with optional batch normalization.
46 | """
47 | layers = []
48 | conv_layer = nn.Conv2d(
49 | in_channels=in_channels,
50 | out_channels=out_channels,
51 | kernel_size=kernel_size,
52 | stride=stride,
53 | padding=padding,
54 | bias=False,
55 | )
56 | if init_zero_weights:
57 | conv_layer.weight.data = (
58 | torch.randn(out_channels, in_channels, kernel_size, kernel_size) * 0.001
59 | )
60 | layers.append(conv_layer)
61 |
62 | if batch_norm:
63 | layers.append(nn.BatchNorm2d(out_channels))
64 | return nn.Sequential(*layers)
65 |
66 |
67 | class DCGenerator(nn.Module):
68 | def __init__(self, noise_size, conv_dim):
69 | super(DCGenerator, self).__init__()
70 |
71 | ###########################################
72 | ## FILL THIS IN: CREATE ARCHITECTURE ##
73 | ###########################################
74 | kernel_size = 4
75 |
76 | self.deconv1 = deconv(100, conv_dim * 4, kernel_size, padding=0)
77 | self.deconv2 = deconv(conv_dim * 4, conv_dim * 2, kernel_size)
78 | self.deconv3 = deconv(conv_dim * 2, conv_dim, kernel_size)
79 | self.deconv4 = deconv(conv_dim, 3, kernel_size, 2, batch_norm=False)
80 |
81 | def forward(self, z):
82 | """Generates an image given a sample of random noise.
83 |
84 | Input
85 | -----
86 | z: BS x noise_size x 1 x 1 --> 16x100x1x1
87 |
88 | Output
89 | ------
90 | out: BS x channels x image_width x image_height --> 16x3x32x32
91 | """
92 |
93 | out = F.relu(self.deconv1(z))
94 | out = F.relu(self.deconv2(out))
95 | out = F.relu(self.deconv3(out))
96 | out = F.tanh(self.deconv4(out))
97 | return out
98 |
99 |
100 | class ResnetBlock(nn.Module):
101 | def __init__(self, conv_dim):
102 | super(ResnetBlock, self).__init__()
103 | self.conv_layer = conv(
104 | in_channels=conv_dim,
105 | out_channels=conv_dim,
106 | kernel_size=3,
107 | stride=1,
108 | padding=1,
109 | )
110 |
111 | def forward(self, x):
112 | out = x + self.conv_layer(x)
113 | return out
114 |
115 |
116 | class CycleGenerator(nn.Module):
117 | """Defines the architecture of the generator network.
118 | Note: Both generators G_XtoY and G_YtoX have the same architecture in this assignment.
119 | """
120 |
121 | def __init__(self, conv_dim=64, init_zero_weights=False):
122 | super(CycleGenerator, self).__init__()
123 |
124 | ###########################################
125 | ## FILL THIS IN: CREATE ARCHITECTURE ##
126 | ###########################################
127 |
128 | kernel_size = 4
129 | self.conv1 = conv(3, conv_dim, kernel_size)
130 | self.conv2 = conv(conv_dim, conv_dim * 2, kernel_size)
131 |
132 | self.resnet_block = ResnetBlock(conv_dim * 2)
133 |
134 | self.deconv1 = deconv(conv_dim * 2, conv_dim, kernel_size)
135 | self.deconv2 = deconv(conv_dim, 3, kernel_size, 2, batch_norm=False)
136 |
137 |
138 | # 1. Define the encoder part of the generator (that extracts features from the input image)
139 | # self.conv1 = conv(...)
140 | # self.conv2 = conv(...)
141 |
142 | # 2. Define the transformation part of the generator
143 | # self.resnet_block = ...
144 |
145 | # 3. Define the decoder part of the generator (that builds up the output image from features)
146 | # self.deconv1 = deconv(...)
147 | # self.deconv2 = deconv(...)
148 |
149 | def forward(self, x):
150 | """Generates an image conditioned on an input image.
151 |
152 | Input
153 | -----
154 | x: BS x 3 x 32 x 32
155 |
156 | Output
157 | ------
158 | out: BS x 3 x 32 x 32
159 | """
160 |
161 | out = F.relu(self.conv1(x))
162 | out = F.relu(self.conv2(out))
163 |
164 | out = F.relu(self.resnet_block(out))
165 |
166 | out = F.relu(self.deconv1(out))
167 | out = F.tanh(self.deconv2(out))
168 |
169 | return out
170 |
171 |
172 | class DCDiscriminator(nn.Module):
173 | """Defines the architecture of the discriminator network.
174 | Note: Both discriminators D_X and D_Y have the same architecture in this
175 | assignment.
176 | """
177 |
178 | def __init__(self, conv_dim=64):
179 | super(DCDiscriminator, self).__init__()
180 |
181 | ###########################################
182 | ## FILL THIS IN: CREATE ARCHITECTURE ##
183 | ###########################################
184 |
185 | kernel_size = 4
186 | self.conv1 = conv(3, conv_dim, kernel_size)
187 | self.conv2 = conv(conv_dim, conv_dim * 2, kernel_size)
188 | self.conv3 = conv(conv_dim * 2, conv_dim * 4, kernel_size)
189 | self.conv4 = conv(conv_dim * 4, 1, kernel_size, 2, padding=0, batch_norm=False)
190 |
191 | def forward(self, x):
192 |
193 | out = F.relu(self.conv1(x))
194 | out = F.relu(self.conv2(out))
195 | out = F.relu(self.conv3(out))
196 |
197 | out = self.conv4(out).squeeze()
198 | out = F.sigmoid(out)
199 | return out
200 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2019 Wiley
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/LoRA/lora_hello_world.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "private_outputs": true,
7 | "provenance": [],
8 | "gpuType": "T4",
9 | "authorship_tag": "ABX9TyPWmZoHOxQbf2DbGURay9eI",
10 | "include_colab_link": true
11 | },
12 | "kernelspec": {
13 | "name": "python3",
14 | "display_name": "Python 3"
15 | },
16 | "language_info": {
17 | "name": "python"
18 | },
19 | "accelerator": "GPU",
20 | "gpuClass": "standard"
21 | },
22 | "cells": [
23 | {
24 | "cell_type": "markdown",
25 | "metadata": {
26 | "id": "view-in-github",
27 | "colab_type": "text"
28 | },
29 | "source": [
30 | "
"
31 | ]
32 | },
33 | {
34 | "cell_type": "code",
35 | "source": [
36 | "!pip3 uninstall torch -y"
37 | ],
38 | "metadata": {
39 | "id": "EzfLQmy-c9fY"
40 | },
41 | "execution_count": null,
42 | "outputs": []
43 | },
44 | {
45 | "cell_type": "code",
46 | "source": [
47 | "!pip install torch==1.11.0"
48 | ],
49 | "metadata": {
50 | "id": "4-IPSPXGcXsP"
51 | },
52 | "execution_count": null,
53 | "outputs": []
54 | },
55 | {
56 | "cell_type": "code",
57 | "source": [
58 | "import torch\n",
59 | "print(torch.__version__)"
60 | ],
61 | "metadata": {
62 | "id": "Et1rcuQMeDjf"
63 | },
64 | "execution_count": null,
65 | "outputs": []
66 | },
67 | {
68 | "cell_type": "code",
69 | "execution_count": null,
70 | "metadata": {
71 | "id": "FK-QGuS3gsMZ"
72 | },
73 | "outputs": [],
74 | "source": [
75 | "print('test')"
76 | ]
77 | },
78 | {
79 | "cell_type": "code",
80 | "source": [
81 | "!sudo apt-get update"
82 | ],
83 | "metadata": {
84 | "id": "61kVAU1MirsS"
85 | },
86 | "execution_count": null,
87 | "outputs": []
88 | },
89 | {
90 | "cell_type": "code",
91 | "source": [
92 | "!sudo apt-get -y install git jq virtualenv"
93 | ],
94 | "metadata": {
95 | "id": "DOUW4eOIizbG"
96 | },
97 | "execution_count": null,
98 | "outputs": []
99 | },
100 | {
101 | "cell_type": "code",
102 | "source": [
103 | "!git clone https://github.com/microsoft/LoRA.git; cd LoRA"
104 | ],
105 | "metadata": {
106 | "id": "DEUOudE_i4Yv"
107 | },
108 | "execution_count": null,
109 | "outputs": []
110 | },
111 | {
112 | "cell_type": "code",
113 | "source": [
114 | "!ls"
115 | ],
116 | "metadata": {
117 | "id": "bokzXL8ei8UP"
118 | },
119 | "execution_count": null,
120 | "outputs": []
121 | },
122 | {
123 | "cell_type": "code",
124 | "source": [
125 | "!ls LoRA/examples/NLG"
126 | ],
127 | "metadata": {
128 | "id": "WPU5_vxWjCvp"
129 | },
130 | "execution_count": null,
131 | "outputs": []
132 | },
133 | {
134 | "cell_type": "code",
135 | "source": [
136 | "!python3 -m pip install --upgrade pip"
137 | ],
138 | "metadata": {
139 | "id": "7C80M_rujyM5"
140 | },
141 | "execution_count": null,
142 | "outputs": []
143 | },
144 | {
145 | "cell_type": "code",
146 | "source": [
147 | "!python3 -m pip install -r LoRA/examples/NLG/requirement.txt"
148 | ],
149 | "metadata": {
150 | "id": "x8oqUQjHjmj4"
151 | },
152 | "execution_count": null,
153 | "outputs": []
154 | },
155 | {
156 | "cell_type": "code",
157 | "source": [
158 | "!python3 -m pip install transformers"
159 | ],
160 | "metadata": {
161 | "id": "lS2ZmMO3klGw"
162 | },
163 | "execution_count": null,
164 | "outputs": []
165 | },
166 | {
167 | "cell_type": "code",
168 | "source": [
169 | "!python3 -m pip install spacy tqdm tensorboard progress"
170 | ],
171 | "metadata": {
172 | "id": "l2uD_wpFk4gP"
173 | },
174 | "execution_count": null,
175 | "outputs": []
176 | },
177 | {
178 | "cell_type": "code",
179 | "source": [
180 | "import torch\n",
181 | "print(torch.__version__)"
182 | ],
183 | "metadata": {
184 | "id": "Q7dAMI4lkR2u"
185 | },
186 | "execution_count": null,
187 | "outputs": []
188 | },
189 | {
190 | "cell_type": "code",
191 | "source": [
192 | "%cd LoRA/examples/NLG"
193 | ],
194 | "metadata": {
195 | "id": "YbDu2w3FlC9l"
196 | },
197 | "execution_count": null,
198 | "outputs": []
199 | },
200 | {
201 | "cell_type": "code",
202 | "source": [
203 | "!ls"
204 | ],
205 | "metadata": {
206 | "id": "QVEol-7IlI1A"
207 | },
208 | "execution_count": null,
209 | "outputs": []
210 | },
211 | {
212 | "cell_type": "code",
213 | "source": [
214 | "!bash download_pretrained_checkpoints.sh"
215 | ],
216 | "metadata": {
217 | "id": "hTaJ7ZYzlMsf"
218 | },
219 | "execution_count": null,
220 | "outputs": []
221 | },
222 | {
223 | "cell_type": "code",
224 | "source": [
225 | "!bash create_datasets.sh"
226 | ],
227 | "metadata": {
228 | "id": "e2HL_HV-lQlb"
229 | },
230 | "execution_count": null,
231 | "outputs": []
232 | },
233 | {
234 | "cell_type": "code",
235 | "source": [
236 | "%cd ./eval"
237 | ],
238 | "metadata": {
239 | "id": "JDyJhqaUlSsf"
240 | },
241 | "execution_count": null,
242 | "outputs": []
243 | },
244 | {
245 | "cell_type": "code",
246 | "source": [
247 | "!bash download_evalscript.sh"
248 | ],
249 | "metadata": {
250 | "id": "K5Cw5xVRl6wM"
251 | },
252 | "execution_count": null,
253 | "outputs": []
254 | },
255 | {
256 | "cell_type": "code",
257 | "source": [
258 | "%cd .."
259 | ],
260 | "metadata": {
261 | "id": "Smyk-DKyl8rP"
262 | },
263 | "execution_count": null,
264 | "outputs": []
265 | },
266 | {
267 | "cell_type": "code",
268 | "source": [
269 | "!ls"
270 | ],
271 | "metadata": {
272 | "id": "n8rIjmXNmWj3"
273 | },
274 | "execution_count": null,
275 | "outputs": []
276 | },
277 | {
278 | "cell_type": "code",
279 | "source": [
280 | "!python3 -m pip install loralib"
281 | ],
282 | "metadata": {
283 | "id": "gQX04dU0oGW1"
284 | },
285 | "execution_count": null,
286 | "outputs": []
287 | },
288 | {
289 | "cell_type": "code",
290 | "source": [
291 | "!python3 -m torch.distributed.launch --nproc_per_node=1 src/gpt2_ft.py \\\n",
292 | " --train_data ./data/e2e/train.jsonl \\\n",
293 | " --valid_data ./data/e2e/valid.jsonl \\\n",
294 | " --train_batch_size 4 \\\n",
295 | " --grad_acc 1 \\\n",
296 | " --valid_batch_size 2 \\\n",
297 | " --seq_len 512 \\\n",
298 | " --model_card gpt2.md \\\n",
299 | " --init_checkpoint ./pretrained_checkpoints/gpt2-medium-pytorch_model.bin \\\n",
300 | " --platform local \\\n",
301 | " --clip 0.0 \\\n",
302 | " --lr 0.0002 \\\n",
303 | " --weight_decay 0.01 \\\n",
304 | " --correct_bias \\\n",
305 | " --adam_beta2 0.999 \\\n",
306 | " --scheduler linear \\\n",
307 | " --warmup_step 500 \\\n",
308 | " --max_epoch 5 \\\n",
309 | " --save_interval 1000 \\\n",
310 | " --lora_dim 4 \\\n",
311 | " --lora_alpha 32 \\\n",
312 | " --lora_dropout 0.1 \\\n",
313 | " --label_smooth 0.1 \\\n",
314 | " --work_dir ./trained_models/GPT2_M/e2e \\\n",
315 | " --random_seed 110"
316 | ],
317 | "metadata": {
318 | "id": "wWn2H2somOZ5"
319 | },
320 | "execution_count": null,
321 | "outputs": []
322 | }
323 | ]
324 | }
--------------------------------------------------------------------------------
/LoRA/lora_hello_world2.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "private_outputs": true,
7 | "provenance": [],
8 | "gpuType": "T4",
9 | "authorship_tag": "ABX9TyPfd2szk9I+NCou6SCoJGZw",
10 | "include_colab_link": true
11 | },
12 | "kernelspec": {
13 | "name": "python3",
14 | "display_name": "Python 3"
15 | },
16 | "language_info": {
17 | "name": "python"
18 | },
19 | "accelerator": "GPU",
20 | "gpuClass": "standard"
21 | },
22 | "cells": [
23 | {
24 | "cell_type": "markdown",
25 | "metadata": {
26 | "id": "view-in-github",
27 | "colab_type": "text"
28 | },
29 | "source": [
30 | "
"
31 | ]
32 | },
33 | {
34 | "cell_type": "code",
35 | "source": [
36 | "!pip3 uninstall torch -y"
37 | ],
38 | "metadata": {
39 | "id": "EzfLQmy-c9fY"
40 | },
41 | "execution_count": null,
42 | "outputs": []
43 | },
44 | {
45 | "cell_type": "code",
46 | "source": [
47 | "!pip install torch==1.11.0"
48 | ],
49 | "metadata": {
50 | "id": "4-IPSPXGcXsP"
51 | },
52 | "execution_count": null,
53 | "outputs": []
54 | },
55 | {
56 | "cell_type": "code",
57 | "source": [
58 | "import torch\n",
59 | "print(torch.__version__)"
60 | ],
61 | "metadata": {
62 | "id": "Et1rcuQMeDjf"
63 | },
64 | "execution_count": null,
65 | "outputs": []
66 | },
67 | {
68 | "cell_type": "code",
69 | "execution_count": null,
70 | "metadata": {
71 | "id": "FK-QGuS3gsMZ"
72 | },
73 | "outputs": [],
74 | "source": [
75 | "print('test')"
76 | ]
77 | },
78 | {
79 | "cell_type": "code",
80 | "source": [
81 | "!sudo apt-get update"
82 | ],
83 | "metadata": {
84 | "id": "61kVAU1MirsS"
85 | },
86 | "execution_count": null,
87 | "outputs": []
88 | },
89 | {
90 | "cell_type": "code",
91 | "source": [
92 | "!sudo apt-get -y install git jq virtualenv"
93 | ],
94 | "metadata": {
95 | "id": "DOUW4eOIizbG"
96 | },
97 | "execution_count": null,
98 | "outputs": []
99 | },
100 | {
101 | "cell_type": "code",
102 | "source": [
103 | "!git clone https://github.com/microsoft/LoRA.git; cd LoRA"
104 | ],
105 | "metadata": {
106 | "id": "DEUOudE_i4Yv"
107 | },
108 | "execution_count": null,
109 | "outputs": []
110 | },
111 | {
112 | "cell_type": "code",
113 | "source": [
114 | "!ls"
115 | ],
116 | "metadata": {
117 | "id": "bokzXL8ei8UP"
118 | },
119 | "execution_count": null,
120 | "outputs": []
121 | },
122 | {
123 | "cell_type": "code",
124 | "source": [
125 | "!ls LoRA/examples/NLG"
126 | ],
127 | "metadata": {
128 | "id": "WPU5_vxWjCvp"
129 | },
130 | "execution_count": null,
131 | "outputs": []
132 | },
133 | {
134 | "cell_type": "code",
135 | "source": [
136 | "!python3 -m pip install --upgrade pip"
137 | ],
138 | "metadata": {
139 | "id": "7C80M_rujyM5"
140 | },
141 | "execution_count": null,
142 | "outputs": []
143 | },
144 | {
145 | "cell_type": "code",
146 | "source": [
147 | "!python3 -m pip install -r LoRA/examples/NLG/requirement.txt"
148 | ],
149 | "metadata": {
150 | "id": "x8oqUQjHjmj4"
151 | },
152 | "execution_count": null,
153 | "outputs": []
154 | },
155 | {
156 | "cell_type": "code",
157 | "source": [
158 | "!python3 -m pip install transformers"
159 | ],
160 | "metadata": {
161 | "id": "lS2ZmMO3klGw"
162 | },
163 | "execution_count": null,
164 | "outputs": []
165 | },
166 | {
167 | "cell_type": "code",
168 | "source": [
169 | "!python3 -m pip install spacy tqdm tensorboard progress"
170 | ],
171 | "metadata": {
172 | "id": "l2uD_wpFk4gP"
173 | },
174 | "execution_count": null,
175 | "outputs": []
176 | },
177 | {
178 | "cell_type": "code",
179 | "source": [
180 | "import torch\n",
181 | "print(torch.__version__)"
182 | ],
183 | "metadata": {
184 | "id": "Q7dAMI4lkR2u"
185 | },
186 | "execution_count": null,
187 | "outputs": []
188 | },
189 | {
190 | "cell_type": "code",
191 | "source": [
192 | "%cd LoRA/examples/NLG"
193 | ],
194 | "metadata": {
195 | "id": "YbDu2w3FlC9l"
196 | },
197 | "execution_count": null,
198 | "outputs": []
199 | },
200 | {
201 | "cell_type": "code",
202 | "source": [
203 | "!ls"
204 | ],
205 | "metadata": {
206 | "id": "QVEol-7IlI1A"
207 | },
208 | "execution_count": null,
209 | "outputs": []
210 | },
211 | {
212 | "cell_type": "code",
213 | "source": [
214 | "!bash download_pretrained_checkpoints.sh"
215 | ],
216 | "metadata": {
217 | "id": "hTaJ7ZYzlMsf"
218 | },
219 | "execution_count": null,
220 | "outputs": []
221 | },
222 | {
223 | "cell_type": "code",
224 | "source": [
225 | "!bash create_datasets2.sh"
226 | ],
227 | "metadata": {
228 | "id": "e2HL_HV-lQlb"
229 | },
230 | "execution_count": null,
231 | "outputs": []
232 | },
233 | {
234 | "cell_type": "code",
235 | "source": [
236 | "%cd ./eval"
237 | ],
238 | "metadata": {
239 | "id": "JDyJhqaUlSsf"
240 | },
241 | "execution_count": null,
242 | "outputs": []
243 | },
244 | {
245 | "cell_type": "code",
246 | "source": [
247 | "!bash download_evalscript.sh"
248 | ],
249 | "metadata": {
250 | "id": "K5Cw5xVRl6wM"
251 | },
252 | "execution_count": null,
253 | "outputs": []
254 | },
255 | {
256 | "cell_type": "code",
257 | "source": [
258 | "%cd .."
259 | ],
260 | "metadata": {
261 | "id": "Smyk-DKyl8rP"
262 | },
263 | "execution_count": null,
264 | "outputs": []
265 | },
266 | {
267 | "cell_type": "code",
268 | "source": [
269 | "!ls"
270 | ],
271 | "metadata": {
272 | "id": "n8rIjmXNmWj3"
273 | },
274 | "execution_count": null,
275 | "outputs": []
276 | },
277 | {
278 | "cell_type": "code",
279 | "source": [
280 | "!python3 -m pip install loralib"
281 | ],
282 | "metadata": {
283 | "id": "gQX04dU0oGW1"
284 | },
285 | "execution_count": null,
286 | "outputs": []
287 | },
288 | {
289 | "cell_type": "code",
290 | "source": [
291 | "!ls -l data/e2e"
292 | ],
293 | "metadata": {
294 | "id": "5BvLJ03yZx_M"
295 | },
296 | "execution_count": null,
297 | "outputs": []
298 | },
299 | {
300 | "cell_type": "code",
301 | "source": [
302 | "!python3 -m torch.distributed.launch --nproc_per_node=1 src/gpt2_ft.py \\\n",
303 | " --train_data ./data/e2e/train.jsonl \\\n",
304 | " --valid_data ./data/e2e/valid.jsonl \\\n",
305 | " --train_batch_size 4 \\\n",
306 | " --grad_acc 1 \\\n",
307 | " --valid_batch_size 2 \\\n",
308 | " --seq_len 512 \\\n",
309 | " --model_card gpt2.md \\\n",
310 | " --init_checkpoint ./pretrained_checkpoints/gpt2-medium-pytorch_model.bin \\\n",
311 | " --platform local \\\n",
312 | " --clip 0.0 \\\n",
313 | " --lr 0.0002 \\\n",
314 | " --weight_decay 0.01 \\\n",
315 | " --correct_bias \\\n",
316 | " --adam_beta2 0.999 \\\n",
317 | " --scheduler linear \\\n",
318 | " --warmup_step 500 \\\n",
319 | " --max_epoch 5 \\\n",
320 | " --save_interval 1000 \\\n",
321 | " --lora_dim 4 \\\n",
322 | " --lora_alpha 32 \\\n",
323 | " --lora_dropout 0.1 \\\n",
324 | " --label_smooth 0.1 \\\n",
325 | " --work_dir ./trained_models/GPT2_M/e2e \\\n",
326 | " --random_seed 110"
327 | ],
328 | "metadata": {
329 | "id": "wWn2H2somOZ5"
330 | },
331 | "execution_count": null,
332 | "outputs": []
333 | }
334 | ]
335 | }
--------------------------------------------------------------------------------
/MachineTranslation/README.md:
--------------------------------------------------------------------------------
1 | # Machine Translation
2 |
3 | ```
4 | # Download the dataset
5 | wget https://www.manythings.org/anki/fra-eng.zip
6 |
7 | unzip fra-eng.zip
8 | ```
9 |
10 | ## Tutorials
11 | - https://pytorch.org/tutorials/intermediate/seq2seq_translation_tutorial.html#
12 | - [Link to Colab Notebook](https://colab.research.google.com/github/pytorch/tutorials/blob/gh-pages/_downloads/a60617788061539b5449701ae76aee56/seq2seq_translation_tutorial.ipynb)
13 | - [TorchText and nn.Transformer](https://pytorch.org/tutorials/beginner/transformer_tutorial.html)
14 | - https://towardsdatascience.com/neural-machine-translation-15ecf6b0b
15 | - https://medium.com/analytics-vidhya/a-must-read-nlp-tutorial-on-neural-machine-translation-the-technique-powering-google-translate-c5c8d97d7587
16 |
17 | ## Datasets
18 | - [Downloads](https://tatoeba.org/eng/downloads)
19 | - [Splitting language pairs into individual text files](https://www.manythings.org/anki/)
20 |
--------------------------------------------------------------------------------
/MachineTranslation/ReferenceExample.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "name": "ReferenceExample.ipynb",
7 | "provenance": [],
8 | "authorship_tag": "ABX9TyNfR2AxE7LGpIZL/UWgBjMn",
9 | "include_colab_link": true
10 | },
11 | "kernelspec": {
12 | "name": "python3",
13 | "display_name": "Python 3"
14 | },
15 | "accelerator": "GPU"
16 | },
17 | "cells": [
18 | {
19 | "cell_type": "markdown",
20 | "metadata": {
21 | "id": "view-in-github",
22 | "colab_type": "text"
23 | },
24 | "source": [
25 | "
"
26 | ]
27 | },
28 | {
29 | "cell_type": "code",
30 | "metadata": {
31 | "id": "CsFfALRqWl-r"
32 | },
33 | "source": [
34 | "# https://github.com/andrewpeng02/transformer-translation\n",
35 | "!git clone https://github.com/andrewpeng02/transformer-translation.git"
36 | ],
37 | "execution_count": null,
38 | "outputs": []
39 | },
40 | {
41 | "cell_type": "code",
42 | "metadata": {
43 | "id": "15vsyh8nXIyo"
44 | },
45 | "source": [
46 | "!python3 -m pip install --upgrade pip\n",
47 | "!python3 -m pip install click==7.0\n",
48 | "!python3 -m pip install dill==0.3.1.1 --use-feature=2020-resolver\n",
49 | "!python3 -m pip install einops==0.1.0\n",
50 | "!python3 -m pip install en-core-web-sm==2.1.0\n",
51 | "!python3 -m pip install fr-core-news-sm==2.1.0\n",
52 | "!python3 -m pip install joblib==0.13.2\n",
53 | "!python3 -m pip install torchtext==0.4.0"
54 | ],
55 | "execution_count": null,
56 | "outputs": []
57 | },
58 | {
59 | "cell_type": "code",
60 | "metadata": {
61 | "id": "R4pZuNrQX_Gq"
62 | },
63 | "source": [
64 | "!ls transformer-translation/data\n",
65 | "%cd transformer-translation/data\n",
66 | "!wget http://www.manythings.org/anki/fra-eng.zip\n",
67 | "!unzip fra-eng.zip\n",
68 | "%cd ../../\n",
69 | "!ls\n"
70 | ],
71 | "execution_count": null,
72 | "outputs": []
73 | },
74 | {
75 | "cell_type": "code",
76 | "metadata": {
77 | "id": "0kL2dvTwZxW1"
78 | },
79 | "source": [
80 | "cd transformer-translation"
81 | ],
82 | "execution_count": null,
83 | "outputs": []
84 | },
85 | {
86 | "cell_type": "code",
87 | "metadata": {
88 | "id": "bS3Kkzrlbmzr"
89 | },
90 | "source": [
91 | "!python3 -m spacy download en\n",
92 | "!python3 -m spacy download fr"
93 | ],
94 | "execution_count": null,
95 | "outputs": []
96 | },
97 | {
98 | "cell_type": "code",
99 | "metadata": {
100 | "id": "rAyZTlUKYuun"
101 | },
102 | "source": [
103 | "!python3 process-tatoeba-data.py\n",
104 | "!python3 preprocess-data.py"
105 | ],
106 | "execution_count": null,
107 | "outputs": []
108 | },
109 | {
110 | "cell_type": "code",
111 | "metadata": {
112 | "id": "DH3jMH4pZM1d"
113 | },
114 | "source": [
115 | "!ls\n",
116 | "!echo -----------------\n",
117 | "!ls data/processed\n",
118 | "!echo -----------------\n",
119 | "!ls -l data/processed/fr"
120 | ],
121 | "execution_count": null,
122 | "outputs": []
123 | },
124 | {
125 | "cell_type": "code",
126 | "metadata": {
127 | "id": "wMtzQQUQb6Bw"
128 | },
129 | "source": [
130 | "!python3 train.py"
131 | ],
132 | "execution_count": null,
133 | "outputs": []
134 | },
135 | {
136 | "cell_type": "code",
137 | "metadata": {
138 | "id": "xse_C7t_prc3"
139 | },
140 | "source": [
141 | "!python3 translate-sentence.py"
142 | ],
143 | "execution_count": null,
144 | "outputs": []
145 | }
146 | ]
147 | }
--------------------------------------------------------------------------------
/NeuralArchitectureSearch/Autokeras.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "name": "Autokeras.ipynb",
7 | "provenance": [],
8 | "authorship_tag": "ABX9TyMXe9x8u7a1Wy8NJkVoxkF6",
9 | "include_colab_link": true
10 | },
11 | "kernelspec": {
12 | "name": "python3",
13 | "display_name": "Python 3"
14 | },
15 | "accelerator": "GPU"
16 | },
17 | "cells": [
18 | {
19 | "cell_type": "markdown",
20 | "metadata": {
21 | "id": "view-in-github",
22 | "colab_type": "text"
23 | },
24 | "source": [
25 | "
"
26 | ]
27 | },
28 | {
29 | "cell_type": "markdown",
30 | "metadata": {
31 | "id": "tKCAuX00SO_k",
32 | "colab_type": "text"
33 | },
34 | "source": [
35 | "# AutoKeras\n",
36 | "\n",
37 | "[Paper](https://arxiv.org/pdf/1806.10282.pdf)"
38 | ]
39 | },
40 | {
41 | "cell_type": "markdown",
42 | "metadata": {
43 | "id": "0fQ17-24Vjbb",
44 | "colab_type": "text"
45 | },
46 | "source": [
47 | "In this example, we get an accuracy of 91.13% with only 3 trials and 3 epochs. The above minimal code AutoKeras example shows how simple and easy AutoKeras is to use.\n"
48 | ]
49 | },
50 | {
51 | "cell_type": "markdown",
52 | "metadata": {
53 | "id": "JeGbfKc8SAjv",
54 | "colab_type": "text"
55 | },
56 | "source": [
57 | "# Questions\n",
58 | "## 1. What is Edit-Distance in a Neural Network?\n",
59 | "Edit-distance is the number of operations needed to morph one architecture into another architecture."
60 | ]
61 | },
62 | {
63 | "cell_type": "code",
64 | "metadata": {
65 | "id": "b4q_-v76Lht7",
66 | "colab_type": "code",
67 | "colab": {}
68 | },
69 | "source": [
70 | "!python3 -m pip install autokeras\n",
71 | "!pip install tensorflow-gpu==2.1.0"
72 | ],
73 | "execution_count": 0,
74 | "outputs": []
75 | },
76 | {
77 | "cell_type": "code",
78 | "metadata": {
79 | "id": "s9icubivMy5Y",
80 | "colab_type": "code",
81 | "colab": {}
82 | },
83 | "source": [
84 | "%tensorflow_version 2.x"
85 | ],
86 | "execution_count": 0,
87 | "outputs": []
88 | },
89 | {
90 | "cell_type": "code",
91 | "metadata": {
92 | "id": "iDaqLMGGMj_y",
93 | "colab_type": "code",
94 | "colab": {}
95 | },
96 | "source": [
97 | "import tensorflow as tf\n",
98 | "print(tf.__version__)"
99 | ],
100 | "execution_count": 0,
101 | "outputs": []
102 | },
103 | {
104 | "cell_type": "code",
105 | "metadata": {
106 | "id": "K1bwRM-ELrD1",
107 | "colab_type": "code",
108 | "colab": {}
109 | },
110 | "source": [
111 | "#from tensorflow.keras.datasets import mnist\n",
112 | "from tensorflow.keras.datasets import fashion_mnist as mnist\n",
113 | "(x_train, y_train), (x_test, y_test) = mnist.load_data()\n",
114 | "print(x_train.shape) # (60000, 28, 28)\n",
115 | "print(y_train.shape) # (60000,)\n",
116 | "print(y_train[:3]) # array([7, 2, 1], dtype=uint8)"
117 | ],
118 | "execution_count": 0,
119 | "outputs": []
120 | },
121 | {
122 | "cell_type": "code",
123 | "metadata": {
124 | "id": "qheYaZkjL8kv",
125 | "colab_type": "code",
126 | "colab": {}
127 | },
128 | "source": [
129 | "import autokeras as ak\n",
130 | "\n",
131 | "# Initialize the image classifier.\n",
132 | "clf = ak.ImageClassifier(max_trials=3) # It tries 3 different models.\n",
133 | "# Feed the image classifier with training data.\n",
134 | "clf.fit(x_train, y_train,epochs=3)"
135 | ],
136 | "execution_count": 0,
137 | "outputs": []
138 | },
139 | {
140 | "cell_type": "code",
141 | "metadata": {
142 | "id": "h919aZ_7L-yX",
143 | "colab_type": "code",
144 | "colab": {}
145 | },
146 | "source": [
147 | "# Predict with the best model.\n",
148 | "predicted_y = clf.predict(x_test)\n",
149 | "print(predicted_y)"
150 | ],
151 | "execution_count": 0,
152 | "outputs": []
153 | },
154 | {
155 | "cell_type": "code",
156 | "metadata": {
157 | "id": "djrTIS6kMAk-",
158 | "colab_type": "code",
159 | "colab": {}
160 | },
161 | "source": [
162 | "# Evaluate the best model with testing data.\n",
163 | "print(clf.evaluate(x_test, y_test))\n",
164 | "model = clf.export_model()\n",
165 | "print(model.summary())"
166 | ],
167 | "execution_count": 0,
168 | "outputs": []
169 | },
170 | {
171 | "cell_type": "markdown",
172 | "metadata": {
173 | "id": "R7fcbu9WT_7Z",
174 | "colab_type": "text"
175 | },
176 | "source": [
177 | "# Resources\n",
178 | "- [Autokeras paper](https://arxiv.org/pdf/1806.10282.pdf)\n",
179 | "- [Autokeras website](https://autokeras.com/)\n",
180 | "- [Custom Autokeras Model](https://autokeras.com/tutorial/customized/)"
181 | ]
182 | }
183 | ]
184 | }
--------------------------------------------------------------------------------
/NeuralArchitectureSearch/NeuralArchitectureSearch.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "name": "NeuralArchitectureSearch",
7 | "provenance": [],
8 | "authorship_tag": "ABX9TyP1Xhoo+Gdh4xH/x0QaPkqg",
9 | "include_colab_link": true
10 | },
11 | "kernelspec": {
12 | "name": "python3",
13 | "display_name": "Python 3"
14 | }
15 | },
16 | "cells": [
17 | {
18 | "cell_type": "markdown",
19 | "metadata": {
20 | "id": "view-in-github",
21 | "colab_type": "text"
22 | },
23 | "source": [
24 | "
"
25 | ]
26 | },
27 | {
28 | "cell_type": "markdown",
29 | "metadata": {
30 | "id": "8k2XRoS1zFOk",
31 | "colab_type": "text"
32 | },
33 | "source": [
34 | "# Neural Architecture Search\n",
35 | "\n",
36 | "## EfficientNet\n",
37 | "\n",
38 | "### What's the difference between MBConv1 versus MBConv6?\n",
39 | "- MBConv1 expands the number of channels by a factor of 1.\n",
40 | "- MBConv6 expands the number of channels by a factor of 6.\n",
41 | "\n",
42 | "[MBConv6](https://machinethink.net/blog/mobile-architectures/)\n",
43 | "\n",
44 | "## Do you need to do your own Neural Architecture Search?\n",
45 | "- No, you can use transfer learning to apply an existing neural net architecture and retrain the network on your own dataset\n",
46 | "- [AutoML and Neural Architecture Search](https://towardsdatascience.com/everything-you-need-to-know-about-automl-and-neural-architecture-search-8db1863682bf)\n",
47 | "\n",
48 | "\n",
49 | "## Goals\n",
50 | "1. Understand EfficientNet\n",
51 | "2. Understand why EfficientNet is better than Mobilenet V3, could be a blog post\n",
52 | "3. PyTorch EfficientDet\n",
53 | "4. Run EfficientNet interence\n",
54 | "5. Does EfficientNet produce a different network depending on the dataset?\n",
55 | "6. How does MNas work?"
56 | ]
57 | },
58 | {
59 | "cell_type": "markdown",
60 | "metadata": {
61 | "id": "ZCTfU5hmxFiW",
62 | "colab_type": "text"
63 | },
64 | "source": [
65 | "# Talking Points\n",
66 | "## 1. What are Inverted Residual Blocks?\n",
67 | "- 1x1 Convolution Expands the Filter Dimensions --> 3x3 Depthwise Convolution --> 1x1 Separable Convolution Reduces the number of Filter Dimensions\n",
68 | "- NOTE: The Blocks used for Skip Connections do not have non-linearities (e.g. activation functions)\n",
69 | "\n",
70 | "# How to build EfficientNet\n",
71 | "## Step 1.\n",
72 | "Find the baseline EfficientNet network using Neural Architecture Search\n",
73 | "\n",
74 | "## Step 2.\n",
75 | "Scale up the baseline network using grid search"
76 | ]
77 | },
78 | {
79 | "cell_type": "markdown",
80 | "metadata": {
81 | "id": "PMA8zOk-5kPD",
82 | "colab_type": "text"
83 | },
84 | "source": [
85 | "# Neural Architecture Search\n",
86 | "## Tunable Parameters\n",
87 | "- Depth, Width (Channels), Resolutions"
88 | ]
89 | },
90 | {
91 | "cell_type": "code",
92 | "metadata": {
93 | "id": "ewrzyHGT3i2x",
94 | "colab_type": "code",
95 | "colab": {}
96 | },
97 | "source": [
98 | "!python3 -m pip install efficientnet_pytorch\n",
99 | "!python3 -m pip install torchsummary"
100 | ],
101 | "execution_count": 0,
102 | "outputs": []
103 | },
104 | {
105 | "cell_type": "code",
106 | "metadata": {
107 | "id": "9FIc6zqIEQzY",
108 | "colab_type": "code",
109 | "colab": {}
110 | },
111 | "source": [
112 | "!git clone https://github.com/lukemelas/EfficientNet-PyTorch"
113 | ],
114 | "execution_count": 0,
115 | "outputs": []
116 | },
117 | {
118 | "cell_type": "code",
119 | "metadata": {
120 | "id": "BicJzIuGESzR",
121 | "colab_type": "code",
122 | "colab": {}
123 | },
124 | "source": [
125 | "!ls EfficientNet-PyTorch/examples\n",
126 | "!ls EfficientNet-PyTorch/examples/simple/\n",
127 | "!cp EfficientNet-PyTorch/examples/simple/img.jpg .\n",
128 | "!cp EfficientNet-PyTorch/examples/simple/labels_map.txt ."
129 | ],
130 | "execution_count": 0,
131 | "outputs": []
132 | },
133 | {
134 | "cell_type": "code",
135 | "metadata": {
136 | "id": "g_0rOBICy6zP",
137 | "colab_type": "code",
138 | "colab": {}
139 | },
140 | "source": [
141 | "import torch\n",
142 | "import torchsummary\n",
143 | "from efficientnet_pytorch import EfficientNet\n",
144 | "\n",
145 | "model = EfficientNet.from_name('efficientnet-b0')\n",
146 | "\n",
147 | "print(model)\n",
148 | "torchsummary.summary(model, input_size=(3, 224, 224))"
149 | ],
150 | "execution_count": 0,
151 | "outputs": []
152 | },
153 | {
154 | "cell_type": "code",
155 | "metadata": {
156 | "id": "pbSWEHqzEFrn",
157 | "colab_type": "code",
158 | "colab": {}
159 | },
160 | "source": [
161 | "import json\n",
162 | "from PIL import Image\n",
163 | "import torch\n",
164 | "from torchvision import transforms\n",
165 | "\n",
166 | "from efficientnet_pytorch import EfficientNet\n",
167 | "model = EfficientNet.from_pretrained('efficientnet-b0')\n",
168 | "\n",
169 | "# Preprocess image\n",
170 | "tfms = transforms.Compose([transforms.Resize(224), transforms.ToTensor(),\n",
171 | " transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),])\n",
172 | "img = tfms(Image.open('img.jpg')).unsqueeze(0)\n",
173 | "print(img.shape) # torch.Size([1, 3, 224, 224])\n",
174 | "\n",
175 | "# Load ImageNet class names\n",
176 | "labels_map = json.load(open('labels_map.txt'))\n",
177 | "labels_map = [labels_map[str(i)] for i in range(1000)]\n",
178 | "\n",
179 | "# Classify\n",
180 | "model.eval()\n",
181 | "with torch.no_grad():\n",
182 | " outputs = model(img)\n",
183 | "\n",
184 | "# Print predictions\n",
185 | "print('-----')\n",
186 | "for idx in torch.topk(outputs, k=5).indices.squeeze(0).tolist():\n",
187 | " prob = torch.softmax(outputs, dim=1)[0, idx].item()\n",
188 | " print('{label:<75} ({p:.2f}%)'.format(label=labels_map[idx], p=prob*100))"
189 | ],
190 | "execution_count": 0,
191 | "outputs": []
192 | },
193 | {
194 | "cell_type": "markdown",
195 | "metadata": {
196 | "id": "7WzcGARdzZjW",
197 | "colab_type": "text"
198 | },
199 | "source": [
200 | "# Resources\n",
201 | "\n",
202 | "* [EfficientNet Paper](https://arxiv.org/pdf/1905.11946.pdf)\n",
203 | "* [EfficientDet Paper](https://arxiv.org/pdf/1911.09070.pdf)\n",
204 | "* [Learning OpenCV EfficientNet](https://www.learnopencv.com/efficientnet-theory-code/)\n",
205 | "* [Tensorflow EfficientNet Implementation](https://github.com/tensorflow/tpu/blob/master/models/official/efficientnet/efficientnet_model.py)\n",
206 | "* [PyTorch Implementation](https://github.com/lukemelas/EfficientNet-PyTorch)\n",
207 | "* [MnasFPN : Learning Latency-aware Pyramid Architecture\n",
208 | "for Object Detection on Mobile Devices](https://arxiv.org/pdf/1912.01106.pdf)\n",
209 | "* [Yolo V4](https://arxiv.org/pdf/2004.10934.pdf)\n",
210 | "* [EffResNetComparison](https://colab.research.google.com/github/rwightman/pytorch-image-models/blob/master/notebooks/EffResNetComparison.ipynb#scrollTo=SKA-MF-yShDW)\n"
211 | ]
212 | }
213 | ]
214 | }
--------------------------------------------------------------------------------
/ProphetCode/main.py:
--------------------------------------------------------------------------------
1 | """
2 | Quick Start
3 | https://facebook.github.io/prophet/docs/quick_start.html#python-api
4 | """
5 |
6 | import pandas as pd
7 | from prophet import Prophet
8 |
9 | def main():
10 | print('main')
11 | # Python
12 | df = pd.read_csv('https://raw.githubusercontent.com/facebook/prophet/main/examples/example_wp_log_peyton_manning.csv')
13 | print(df.columns)
14 | if True:
15 | df = pd.read_csv('archive/GlobalLandTemperaturesByMajorCity.csv')
16 | df = df.rename(columns={"dt": "ds", "AverageTemperature": "y"})
17 | df = df[df['City'] == "New York"]
18 | #df = df[df['City'] == "Sydney"]
19 | #df = df[df['City'] == "Cape Town"]
20 | #df = df[df['City'] == "New Delhi"]
21 | df = df[df.y.notnull()]
22 | print(df.columns)
23 | print(df.head())
24 |
25 | # Python
26 | m = Prophet()
27 | m.fit(df)
28 |
29 | future = m.make_future_dataframe(periods=365)
30 | print(future.tail())
31 |
32 | forecast = m.predict(future)
33 | print(forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail())
34 |
35 | fig1 = m.plot(forecast)
36 | fig1.savefig('test.png')
37 |
38 | fig2 = m.plot_components(forecast)
39 | fig2.savefig('test2.png')
40 |
41 |
42 | if __name__ == '__main__':
43 | main()
44 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # DeepLearningDemos
2 | We're a team of Machine Learning Engineers that blog about advances in Deep Learning.
3 |
4 | # 1. Blog Posts
5 | 1. [Vision Transformers for Computer Vision](https://deepganteam.medium.com/vision-transformers-for-computer-vision-9f70418fe41a) (2021-09-07)
6 | 1. [Few Shot Learning from Scratch](https://deepganteam.medium.com/few-shot-learning-from-scratch-a3422b111e05) (2021-07-06)
7 | 1. [Basic Policy Gradients with the Reparameterization Trick](https://deepganteam.medium.com/basic-policy-gradients-with-the-reparameterization-trick-24312c7dbcd) (2021-04-13)
8 | 1. [A Little Rusty? ML Refresher on Linear Regression](https://deepganteam.medium.com/a-little-rusty-ml-refresher-on-linear-regression-76ef4afc6474) (2021-02-19)
9 | 1. [Language Translation with Transformers in PyTorch](https://chatbotslife.com/language-translation-with-transformers-in-pytorch-ff8b32cf848?gi=df7018b86372) (2021-01-22)
10 | 1. [What are Transformers?](https://medium.com/@deepganteam/what-are-transformers-b687f2bcdf49) (2020-09-02)
11 | 1. [Searching for Better Neural Architecture Search](https://medium.com/@deepganteam/searching-for-better-neural-architecture-search-ea91338caa11) (2020-06-17)
12 | 1. [Making SinGAN Double](https://medium.com/@deepganteam/making-singan-double-8568490b572e) (2020-04-15)
13 |
14 | # 2. Notebooks
15 | 1. [AutoKeras Notebook](https://github.com/wileyw/DeepLearningDemos/blob/master/NeuralArchitectureSearch/Autokeras.ipynb) (2020-06-11)
16 |
17 | # 3. Extra
18 | ## Project Ideas
19 | 1. https://www.cs.toronto.edu/~graves/handwriting.html
20 |
21 | ## Specific Interesting Architectures
22 | Note:
23 | 1. Hour glass
24 | 1. U-net
25 | 1. Dense-net/Resnet
26 | 1. SqueezeNet
27 |
28 |
--------------------------------------------------------------------------------
/RL_from_human_feedback/RL_from_human_feedback.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "provenance": [],
7 | "authorship_tag": "ABX9TyNv29WPr2hSjlJeI/CBi3nZ",
8 | "include_colab_link": true
9 | },
10 | "kernelspec": {
11 | "name": "python3",
12 | "display_name": "Python 3"
13 | },
14 | "language_info": {
15 | "name": "python"
16 | },
17 | "accelerator": "GPU",
18 | "gpuClass": "standard"
19 | },
20 | "cells": [
21 | {
22 | "cell_type": "markdown",
23 | "metadata": {
24 | "id": "view-in-github",
25 | "colab_type": "text"
26 | },
27 | "source": [
28 | "
"
29 | ]
30 | },
31 | {
32 | "cell_type": "code",
33 | "source": [
34 | "!git clone https://github.com/tatsu-lab/stanford_alpaca.git"
35 | ],
36 | "metadata": {
37 | "id": "Mcfmw95BVnkk"
38 | },
39 | "execution_count": null,
40 | "outputs": []
41 | },
42 | {
43 | "cell_type": "code",
44 | "source": [
45 | "%%python3 -m pip install -r stanford_alpaca/requirements.txt"
46 | ],
47 | "metadata": {
48 | "id": "6VQWwYyBWGQP"
49 | },
50 | "execution_count": null,
51 | "outputs": []
52 | },
53 | {
54 | "cell_type": "code",
55 | "source": [
56 | "!git clone https://github.com/huggingface/transformers.git"
57 | ],
58 | "metadata": {
59 | "id": "ghX-6yKfWoGe"
60 | },
61 | "execution_count": null,
62 | "outputs": []
63 | },
64 | {
65 | "cell_type": "code",
66 | "source": [
67 | "%cd transformers"
68 | ],
69 | "metadata": {
70 | "id": "E8iEJocrWtyz"
71 | },
72 | "execution_count": null,
73 | "outputs": []
74 | },
75 | {
76 | "cell_type": "code",
77 | "execution_count": null,
78 | "metadata": {
79 | "id": "I9OAmFjMQNYF"
80 | },
81 | "outputs": [],
82 | "source": [
83 | "import torch\n",
84 | "\n",
85 | "import sys\n",
86 | "\n",
87 | "sys.version"
88 | ]
89 | },
90 | {
91 | "cell_type": "code",
92 | "source": [
93 | "%cd .."
94 | ],
95 | "metadata": {
96 | "id": "0G3yWBv_XgrN"
97 | },
98 | "execution_count": null,
99 | "outputs": []
100 | },
101 | {
102 | "cell_type": "code",
103 | "source": [
104 | "%ls stanford_alpaca/\n"
105 | ],
106 | "metadata": {
107 | "id": "41QrI6gNQQLS"
108 | },
109 | "execution_count": null,
110 | "outputs": []
111 | },
112 | {
113 | "cell_type": "code",
114 | "source": [
115 | "%cd transformers"
116 | ],
117 | "metadata": {
118 | "id": "DIjJ0b8WYy5J"
119 | },
120 | "execution_count": null,
121 | "outputs": []
122 | },
123 | {
124 | "cell_type": "code",
125 | "source": [
126 | "!python src/transformers/models/llama/convert_llama_weights_to_hf.py \\\n",
127 | " --input_dir /path/to/downloaded/llama/weights \\\n",
128 | " --model_size 7B \\\n",
129 | " --output_dir /output/path"
130 | ],
131 | "metadata": {
132 | "id": "Y4hVfUwoZDJF"
133 | },
134 | "execution_count": null,
135 | "outputs": []
136 | },
137 | {
138 | "cell_type": "code",
139 | "source": [
140 | "%cd stanford_alpaca"
141 | ],
142 | "metadata": {
143 | "id": "HXpP37VVXmd-"
144 | },
145 | "execution_count": null,
146 | "outputs": []
147 | },
148 | {
149 | "cell_type": "code",
150 | "source": [
151 | "mkdir output"
152 | ],
153 | "metadata": {
154 | "id": "Rph7DmASX4wU"
155 | },
156 | "execution_count": null,
157 | "outputs": []
158 | },
159 | {
160 | "cell_type": "code",
161 | "source": [
162 | "!torchrun --nproc_per_node=1 --master_port= train.py \\\n",
163 | " --model_name_or_path \\\n",
164 | " --data_path ./alpaca_data.json \\\n",
165 | " --bf16 True \\\n",
166 | " --output_dir /content/stanford_alpaca \\\n",
167 | " --num_train_epochs 3 \\\n",
168 | " --per_device_train_batch_size 4 \\\n",
169 | " --per_device_eval_batch_size 4 \\\n",
170 | " --gradient_accumulation_steps 8 \\\n",
171 | " --evaluation_strategy \"no\" \\\n",
172 | " --save_strategy \"steps\" \\\n",
173 | " --save_steps 2000 \\\n",
174 | " --save_total_limit 1 \\\n",
175 | " --learning_rate 2e-5 \\\n",
176 | " --weight_decay 0. \\\n",
177 | " --warmup_ratio 0.03 \\\n",
178 | " --lr_scheduler_type \"cosine\" \\\n",
179 | " --logging_steps 1 \\\n",
180 | " --fsdp \"full_shard auto_wrap\" \\\n",
181 | " --fsdp_transformer_layer_cls_to_wrap 'LLaMADecoderLayer' \\\n",
182 | " --tf32 True"
183 | ],
184 | "metadata": {
185 | "id": "U5NIKbFvXpPW"
186 | },
187 | "execution_count": null,
188 | "outputs": []
189 | }
190 | ]
191 | }
--------------------------------------------------------------------------------
/ReinforcmentLearning/simple_example.py:
--------------------------------------------------------------------------------
1 | import gym
2 | env = gym.make("MsPacman-ram-v0")
3 | observation = env.reset()
4 | import time
5 | for _ in range(1000):
6 | env.render()
7 | action = env.action_space.sample() # your agent here (this takes random actions)
8 | observation, reward, done, info = env.step(action)
9 |
10 | if done:
11 | observation = env.reset()
12 | env.close()
13 |
--------------------------------------------------------------------------------
/SinGAN/SinGANOfficialImplementation.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "name": "SinGANOfficialImplementation.ipynb",
7 | "provenance": [],
8 | "private_outputs": true,
9 | "authorship_tag": "ABX9TyPERozuok0RMoHj/JkJzqSS",
10 | "include_colab_link": true
11 | },
12 | "kernelspec": {
13 | "name": "python3",
14 | "display_name": "Python 3"
15 | },
16 | "accelerator": "GPU"
17 | },
18 | "cells": [
19 | {
20 | "cell_type": "markdown",
21 | "metadata": {
22 | "id": "view-in-github",
23 | "colab_type": "text"
24 | },
25 | "source": [
26 | "
"
27 | ]
28 | },
29 | {
30 | "cell_type": "markdown",
31 | "metadata": {
32 | "id": "_teh4A4skGRT",
33 | "colab_type": "text"
34 | },
35 | "source": [
36 | "# SinGAN\n",
37 | "\n",
38 | "[Official SinGAN Repository](https://github.com/tamarott/SinGAN)\n",
39 | "\n",
40 | "In this notebook, we will implement and create a SinGAN homework assignment for other's to learn how to implement SinGAN as well."
41 | ]
42 | },
43 | {
44 | "cell_type": "code",
45 | "metadata": {
46 | "id": "TF-QL9B0z7sl",
47 | "colab_type": "code",
48 | "colab": {}
49 | },
50 | "source": [
51 | "!git clone https://github.com/tamarott/SinGAN.git"
52 | ],
53 | "execution_count": 0,
54 | "outputs": []
55 | },
56 | {
57 | "cell_type": "code",
58 | "metadata": {
59 | "id": "ul_FoSC40EjR",
60 | "colab_type": "code",
61 | "colab": {}
62 | },
63 | "source": [
64 | "%cd /content/SinGAN\n",
65 | "!ls\n",
66 | "!pwd\n",
67 | "!python3 main_train.py --input_name birds.png"
68 | ],
69 | "execution_count": 0,
70 | "outputs": []
71 | },
72 | {
73 | "cell_type": "code",
74 | "metadata": {
75 | "id": "ktEI7QEk1Wmf",
76 | "colab_type": "code",
77 | "colab": {}
78 | },
79 | "source": [
80 | "!python3 random_samples.py --input_name birds.png --mode random_samples_arbitrary_sizes --scale_h 1 --scale_v 1"
81 | ],
82 | "execution_count": 0,
83 | "outputs": []
84 | },
85 | {
86 | "cell_type": "code",
87 | "metadata": {
88 | "id": "VBErCpu1LKd8",
89 | "colab_type": "code",
90 | "colab": {}
91 | },
92 | "source": [
93 | "!ls\n",
94 | "!ls -l Output/RandomSamples/birds\n",
95 | "!ls -l Output/RandomSamples/birds/gen_start_scale=0"
96 | ],
97 | "execution_count": 0,
98 | "outputs": []
99 | },
100 | {
101 | "cell_type": "code",
102 | "metadata": {
103 | "id": "qTHAHNbnL5W6",
104 | "colab_type": "code",
105 | "colab": {}
106 | },
107 | "source": [
108 | "import cv2\n",
109 | "import glob\n",
110 | "from google.colab.patches import cv2_imshow\n",
111 | "\n",
112 | "print('original image')\n",
113 | "original_img_path = 'Input/Images/birds.png'\n",
114 | "img = cv2.imread(original_img_path)\n",
115 | "cv2_imshow(img)\n",
116 | "\n",
117 | "print('random sample')\n",
118 | "img_paths = glob.glob('Output/RandomSamples/birds/gen_start_scale=0/*.png')\n",
119 | "img = cv2.imread(img_paths[0])\n",
120 | "cv2_imshow(img)"
121 | ],
122 | "execution_count": 0,
123 | "outputs": []
124 | },
125 | {
126 | "cell_type": "code",
127 | "metadata": {
128 | "id": "6jpfpY2_kFeX",
129 | "colab_type": "code",
130 | "colab": {}
131 | },
132 | "source": [
133 | "import torch\n",
134 | "\n",
135 | "print('Implement SinGAN here')\n",
136 | "print(torch)"
137 | ],
138 | "execution_count": 0,
139 | "outputs": []
140 | }
141 | ]
142 | }
143 |
--------------------------------------------------------------------------------
/SinGAN/SinGAN_on_custom_image.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "name": "SinGANOfficialImplementation.ipynb",
7 | "provenance": [],
8 | "private_outputs": true,
9 | "authorship_tag": "ABX9TyOqSL8ngNwZVEvzOBFhFwfA",
10 | "include_colab_link": true
11 | },
12 | "kernelspec": {
13 | "name": "python3",
14 | "display_name": "Python 3"
15 | },
16 | "accelerator": "GPU"
17 | },
18 | "cells": [
19 | {
20 | "cell_type": "markdown",
21 | "metadata": {
22 | "id": "view-in-github",
23 | "colab_type": "text"
24 | },
25 | "source": [
26 | "
"
27 | ]
28 | },
29 | {
30 | "cell_type": "markdown",
31 | "metadata": {
32 | "id": "_teh4A4skGRT",
33 | "colab_type": "text"
34 | },
35 | "source": [
36 | "# SinGAN\n",
37 | "\n",
38 | "[Official SinGAN Repository](https://github.com/tamarott/SinGAN)\n",
39 | "\n",
40 | "In this notebook, we will implement and create a SinGAN homework assignment for other's to learn how to implement SinGAN as well."
41 | ]
42 | },
43 | {
44 | "cell_type": "code",
45 | "metadata": {
46 | "id": "UemrsEXyvxqQ",
47 | "colab_type": "code",
48 | "colab": {}
49 | },
50 | "source": [
51 | "def upload_files():\n",
52 | " from google.colab import files\n",
53 | " uploaded = files.upload()\n",
54 | " for k, v in uploaded.items():\n",
55 | " open(k, 'wb').write(v)\n",
56 | " return list(uploaded.keys())\n",
57 | "upload_files()"
58 | ],
59 | "execution_count": 0,
60 | "outputs": []
61 | },
62 | {
63 | "cell_type": "code",
64 | "metadata": {
65 | "id": "TF-QL9B0z7sl",
66 | "colab_type": "code",
67 | "colab": {}
68 | },
69 | "source": [
70 | "!git clone https://github.com/tamarott/SinGAN.git"
71 | ],
72 | "execution_count": 0,
73 | "outputs": []
74 | },
75 | {
76 | "cell_type": "code",
77 | "metadata": {
78 | "id": "vZLLI4c7sJjv",
79 | "colab_type": "code",
80 | "colab": {}
81 | },
82 | "source": [
83 | "!ls /content/SinGAN/Input/Images/\n",
84 | "!cp carrots_whole.4BVRRZ6FNXYQN.png /content/SinGAN/Input/Images/custom.png"
85 | ],
86 | "execution_count": 0,
87 | "outputs": []
88 | },
89 | {
90 | "cell_type": "code",
91 | "metadata": {
92 | "id": "uKVDKOFetVEW",
93 | "colab_type": "code",
94 | "colab": {}
95 | },
96 | "source": [
97 | "%cd /content/SinGAN/Input/Images/\n",
98 | "!ls\n",
99 | "#import cv2\n",
100 | "#custom = cv2.imread('custom.jpg')\n",
101 | "#cv2.imwrite('custom.png', custom)"
102 | ],
103 | "execution_count": 0,
104 | "outputs": []
105 | },
106 | {
107 | "cell_type": "code",
108 | "metadata": {
109 | "id": "ul_FoSC40EjR",
110 | "colab_type": "code",
111 | "colab": {}
112 | },
113 | "source": [
114 | "%cd /content/SinGAN\n",
115 | "!ls\n",
116 | "!pwd\n",
117 | "!python3 main_train.py --input_name custom.png"
118 | ],
119 | "execution_count": 0,
120 | "outputs": []
121 | },
122 | {
123 | "cell_type": "code",
124 | "metadata": {
125 | "id": "ktEI7QEk1Wmf",
126 | "colab_type": "code",
127 | "colab": {}
128 | },
129 | "source": [
130 | "!python3 random_samples.py --input_name custom.png --mode random_samples_arbitrary_sizes --scale_h 1 --scale_v 1"
131 | ],
132 | "execution_count": 0,
133 | "outputs": []
134 | },
135 | {
136 | "cell_type": "code",
137 | "metadata": {
138 | "id": "VBErCpu1LKd8",
139 | "colab_type": "code",
140 | "colab": {}
141 | },
142 | "source": [
143 | "!ls\n",
144 | "!ls -l Output/RandomSamples/custom\n",
145 | "!ls -l Output/RandomSamples/custom/gen_start_scale=0"
146 | ],
147 | "execution_count": 0,
148 | "outputs": []
149 | },
150 | {
151 | "cell_type": "code",
152 | "metadata": {
153 | "id": "qTHAHNbnL5W6",
154 | "colab_type": "code",
155 | "colab": {}
156 | },
157 | "source": [
158 | "import cv2\n",
159 | "import glob\n",
160 | "from google.colab.patches import cv2_imshow\n",
161 | "\n",
162 | "print('original image')\n",
163 | "original_img_path = 'Input/Images/custom.png'\n",
164 | "img = cv2.imread(original_img_path)\n",
165 | "cv2_imshow(img)\n",
166 | "\n",
167 | "print('random sample')\n",
168 | "img_paths = glob.glob('Output/RandomSamples/custom/gen_start_scale=0/*.png')\n",
169 | "img = cv2.imread(img_paths[0])\n",
170 | "cv2_imshow(img)"
171 | ],
172 | "execution_count": 0,
173 | "outputs": []
174 | },
175 | {
176 | "cell_type": "code",
177 | "metadata": {
178 | "id": "6jpfpY2_kFeX",
179 | "colab_type": "code",
180 | "colab": {}
181 | },
182 | "source": [
183 | "import torch\n",
184 | "\n",
185 | "print('Implement SinGAN here')\n",
186 | "print(torch)"
187 | ],
188 | "execution_count": 0,
189 | "outputs": []
190 | }
191 | ]
192 | }
--------------------------------------------------------------------------------
/Transformers/README.md:
--------------------------------------------------------------------------------
1 | - [Huggingface Transformers](https://github.com/huggingface/transformers)
2 |
--------------------------------------------------------------------------------
/Transformers/Transformers.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "name": "Transformers.ipynb",
7 | "provenance": [],
8 | "authorship_tag": "ABX9TyMq7zDv55D6nmR0jiBGxxXn",
9 | "include_colab_link": true
10 | },
11 | "kernelspec": {
12 | "name": "python3",
13 | "display_name": "Python 3"
14 | }
15 | },
16 | "cells": [
17 | {
18 | "cell_type": "markdown",
19 | "metadata": {
20 | "id": "view-in-github",
21 | "colab_type": "text"
22 | },
23 | "source": [
24 | "
"
25 | ]
26 | },
27 | {
28 | "cell_type": "markdown",
29 | "metadata": {
30 | "id": "3oRmLL1owcS6",
31 | "colab_type": "text"
32 | },
33 | "source": [
34 | "# Transformers\n",
35 | "\n",
36 | "Initial commit of colab notebook for Transformers."
37 | ]
38 | },
39 | {
40 | "cell_type": "code",
41 | "metadata": {
42 | "id": "LllIDkqLwYuS",
43 | "colab_type": "code",
44 | "colab": {}
45 | },
46 | "source": [
47 | "import torch\n",
48 | "import tensorflow as tf"
49 | ],
50 | "execution_count": 2,
51 | "outputs": []
52 | },
53 | {
54 | "cell_type": "markdown",
55 | "metadata": {
56 | "id": "wnZMPSeHyG4J",
57 | "colab_type": "text"
58 | },
59 | "source": [
60 | "# Resources\n",
61 | "- [Gelu Activation Function](https://mlfromscratch.com/activation-functions-explained/)"
62 | ]
63 | }
64 | ]
65 | }
--------------------------------------------------------------------------------
/Transformers/requirements.txt:
--------------------------------------------------------------------------------
1 | transformers
2 |
--------------------------------------------------------------------------------
/Transformers/translation/train.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 |
4 | """
5 | Parameters
6 |
7 | d_model – the number of expected features in the encoder/decoder inputs (default=512).
8 |
9 | nhead – the number of heads in the multiheadattention models (default=8).
10 |
11 | num_encoder_layers – the number of sub-encoder-layers in the encoder (default=6).
12 |
13 | num_decoder_layers – the number of sub-decoder-layers in the decoder (default=6).
14 |
15 | dim_feedforward – the dimension of the feedforward network model (default=2048).
16 |
17 | dropout – the dropout value (default=0.1).
18 |
19 | activation – the activation function of encoder/decoder intermediate layer, relu or gelu (default=relu).
20 |
21 | custom_encoder – custom encoder (default=None).
22 |
23 | custom_decoder – custom decoder (default=None).
24 | """
25 | trfmr_config = {
26 | 'd_model': 256, # number of features in embedding
27 | 'nhead': 8, # number of attention heads
28 | 'num_encoder_layers': 8,
29 | 'num_decoder_layers': 8,
30 | 'dim_feedforward': 2048,
31 | 'activation': 'relu',
32 | }
33 |
34 | opt_config = {
35 | 'lr': 3e-4,
36 | 'beta1': 0.5,
37 | 'beta2': 0.999,
38 | 'num_epochs': 300
39 | }
40 |
41 |
42 | def main():
43 | # Initialize model.
44 | trfm_model = torch.nn.Transformer(**trmfr_config)
45 |
46 | # Initialize optimizer.
47 | opt = torch.optim.AdamW(trfm.parameters(), opt_config['lr'],
48 | [opt_config['beta1'], opt_config['beta2']])
49 |
50 | # Set loss function.
51 | loss_fn = torch.nn.BCELoss
52 |
53 | # Load Data.
54 | # TODO: implement data loading.
55 | data = []
56 |
57 | # Training loop
58 | for epoch in range in range(opt_config['num_epochs']):
59 | for expected_out, batch in data:
60 | opt.zero_grad()
61 | actual_out = trfm_model(batch)
62 | loss = torch.nn.BCELoss(actual_out, expected_out)
63 | loss.backward()
64 | opt.step()
65 |
66 |
67 | if __name__ == '__main__':
68 | main()
--------------------------------------------------------------------------------
/handwriting-synthesis/.gitignore:
--------------------------------------------------------------------------------
1 | data/raw/ascii
2 | data/raw/lineStrokes
3 | data/raw/original
4 | data/processed
5 |
6 | logs
7 | predictions
8 |
--------------------------------------------------------------------------------
/handwriting-synthesis/.travis.yml:
--------------------------------------------------------------------------------
1 | language: python
2 | cache: pip
3 | python:
4 | - 2.7
5 | - 3.6
6 | #- nightly
7 | #- pypy
8 | #- pypy3
9 | matrix:
10 | allow_failures:
11 | - python: nightly
12 | - python: pypy
13 | - python: pypy3
14 | install:
15 | #- pip install -r requirements.txt
16 | - pip install flake8 # pytest # add another testing frameworks later
17 | before_script:
18 | # stop the build if there are Python syntax errors or undefined names
19 | - flake8 . --count --select=E901,E999,F821,F822,F823 --show-source --statistics
20 | # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
21 | - flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
22 | script:
23 | - true # pytest --capture=sys # add other tests here
24 | notifications:
25 | on_success: change
26 | on_failure: change # `always` will be the setting once code changes slow down
27 |
--------------------------------------------------------------------------------
/handwriting-synthesis/checkpoints/checkpoint:
--------------------------------------------------------------------------------
1 | model_checkpoint_path: "model-17900"
2 | all_model_checkpoint_paths: "model-17900"
3 |
--------------------------------------------------------------------------------
/handwriting-synthesis/checkpoints/model-17900.data-00000-of-00001:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/handwriting-synthesis/checkpoints/model-17900.data-00000-of-00001
--------------------------------------------------------------------------------
/handwriting-synthesis/checkpoints/model-17900.index:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/handwriting-synthesis/checkpoints/model-17900.index
--------------------------------------------------------------------------------
/handwriting-synthesis/checkpoints/model-17900.meta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/handwriting-synthesis/checkpoints/model-17900.meta
--------------------------------------------------------------------------------
/handwriting-synthesis/data/blacklist.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/handwriting-synthesis/data/blacklist.npy
--------------------------------------------------------------------------------
/handwriting-synthesis/data/processed/.gitattributes:
--------------------------------------------------------------------------------
1 | *.npy filter=lfs diff=lfs merge=lfs -text
2 |
--------------------------------------------------------------------------------
/handwriting-synthesis/data/processed/c.npy:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:c292920514ca4f12673a81b96ccdadbf99ee28ef4aa0f7b60a85706691c87abe
3 | size 871253
4 |
--------------------------------------------------------------------------------
/handwriting-synthesis/data/processed/c_len.npy:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:afe6b3a07822d7f90b3974a08722b55017319fe1dcee24fc9424ee81a9683195
3 | size 11743
4 |
--------------------------------------------------------------------------------
/handwriting-synthesis/data/processed/w_id.npy:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:02939b9bbc2347eca0faeb23e12a33453f69b868ef7ee410287fe0af4121d8d9
3 | size 23358
4 |
--------------------------------------------------------------------------------
/handwriting-synthesis/data/processed/x.npy:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:c6b95c4b53f6ac656884e0bee483b7d3eb0a2e1352de4a102fae546cac3ae3e8
3 | size 167256128
4 |
--------------------------------------------------------------------------------
/handwriting-synthesis/data/processed/x_len.npy:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:dde6dd72f5017c5608c40114d68657adf72be571e7c90493832f151da88f6ab1
3 | size 23358
4 |
--------------------------------------------------------------------------------
/handwriting-synthesis/data/raw/readme.md:
--------------------------------------------------------------------------------
1 | ## Model Training Instructions
2 |
3 | In order to train a model, data must be downloaded and placed in this directory.
4 |
5 | Follow the download instructions here http://www.fki.inf.unibe.ch/databases/iam-on-line-handwriting-database.
6 |
7 | Only a subset of the downloaded data is required. Move the relevant download data so the directory structure is as folllows:
8 |
9 | ```
10 | data/
11 | ├── raw/
12 | │ ├── ascii/
13 | │ ├── lineStrokes/
14 | │ ├── original/
15 | | blacklist.npy
16 | ```
17 |
18 | Once this is completed, run `prepare_data.py` extract the data and dump it to numpy files.
19 |
20 | To train the model, run `rnn.py`. This takes a couple days on a single Tesla K80.
21 |
22 |
--------------------------------------------------------------------------------
/handwriting-synthesis/data_frame.py:
--------------------------------------------------------------------------------
1 | import copy
2 |
3 | import numpy as np
4 | import pandas as pd
5 | from sklearn.model_selection import train_test_split
6 |
7 |
8 | class DataFrame(object):
9 |
10 | """Minimal pd.DataFrame analog for handling n-dimensional numpy matrices with additional
11 | support for shuffling, batching, and train/test splitting.
12 |
13 | Args:
14 | columns: List of names corresponding to the matrices in data.
15 | data: List of n-dimensional data matrices ordered in correspondence with columns.
16 | All matrices must have the same leading dimension. Data can also be fed a list of
17 | instances of np.memmap, in which case RAM usage can be limited to the size of a
18 | single batch.
19 | """
20 |
21 | def __init__(self, columns, data):
22 | assert len(columns) == len(data), 'columns length does not match data length'
23 |
24 | lengths = [mat.shape[0] for mat in data]
25 | assert len(set(lengths)) == 1, 'all matrices in data must have same first dimension'
26 |
27 | self.length = lengths[0]
28 | self.columns = columns
29 | self.data = data
30 | self.dict = dict(zip(self.columns, self.data))
31 | self.idx = np.arange(self.length)
32 |
33 | def shapes(self):
34 | return pd.Series(dict(zip(self.columns, [mat.shape for mat in self.data])))
35 |
36 | def dtypes(self):
37 | return pd.Series(dict(zip(self.columns, [mat.dtype for mat in self.data])))
38 |
39 | def shuffle(self):
40 | np.random.shuffle(self.idx)
41 |
42 | def train_test_split(self, train_size, random_state=np.random.randint(1000), stratify=None):
43 | train_idx, test_idx = train_test_split(
44 | self.idx,
45 | train_size=train_size,
46 | random_state=random_state,
47 | stratify=stratify
48 | )
49 | train_df = DataFrame(copy.copy(self.columns), [mat[train_idx] for mat in self.data])
50 | test_df = DataFrame(copy.copy(self.columns), [mat[test_idx] for mat in self.data])
51 | return train_df, test_df
52 |
53 | def batch_generator(self, batch_size, shuffle=True, num_epochs=10000, allow_smaller_final_batch=False):
54 | epoch_num = 0
55 | while epoch_num < num_epochs:
56 | if shuffle:
57 | self.shuffle()
58 |
59 | for i in range(0, self.length + 1, batch_size):
60 | batch_idx = self.idx[i: i + batch_size]
61 | if not allow_smaller_final_batch and len(batch_idx) != batch_size:
62 | break
63 | yield DataFrame(
64 | columns=copy.copy(self.columns),
65 | data=[mat[batch_idx].copy() for mat in self.data]
66 | )
67 |
68 | epoch_num += 1
69 |
70 | def iterrows(self):
71 | for i in self.idx:
72 | yield self[i]
73 |
74 | def mask(self, mask):
75 | return DataFrame(copy.copy(self.columns), [mat[mask] for mat in self.data])
76 |
77 | def concat(self, other_df):
78 | mats = []
79 | for column in self.columns:
80 | mats.append(np.concatenate([self[column], other_df[column]], axis=0))
81 | return DataFrame(copy.copy(self.columns), mats)
82 |
83 | def items(self):
84 | return self.dict.items()
85 |
86 | def __iter__(self):
87 | return self.dict.items().__iter__()
88 |
89 | def __len__(self):
90 | return self.length
91 |
92 | def __getitem__(self, key):
93 | if isinstance(key, str):
94 | return self.dict[key]
95 |
96 | elif isinstance(key, int):
97 | return pd.Series(dict(zip(self.columns, [mat[self.idx[key]] for mat in self.data])))
98 |
99 | def __setitem__(self, key, value):
100 | assert value.shape[0] == len(self), 'matrix first dimension does not match'
101 | if key not in self.columns:
102 | self.columns.append(key)
103 | self.data.append(value)
104 | self.dict[key] = value
105 |
--------------------------------------------------------------------------------
/handwriting-synthesis/demo.py:
--------------------------------------------------------------------------------
1 | import os
2 | import logging
3 |
4 | import numpy as np
5 | import svgwrite
6 |
7 | import drawing
8 | import lyrics
9 | from rnn import rnn
10 |
11 |
12 | class Hand(object):
13 |
14 | def __init__(self):
15 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
16 | self.nn = rnn(
17 | log_dir='logs',
18 | checkpoint_dir='checkpoints',
19 | prediction_dir='predictions',
20 | learning_rates=[.0001, .00005, .00002],
21 | batch_sizes=[32, 64, 64],
22 | patiences=[1500, 1000, 500],
23 | beta1_decays=[.9, .9, .9],
24 | validation_batch_size=32,
25 | optimizer='rms',
26 | num_training_steps=100000,
27 | warm_start_init_step=17900,
28 | regularization_constant=0.0,
29 | keep_prob=1.0,
30 | enable_parameter_averaging=False,
31 | min_steps_to_checkpoint=2000,
32 | log_interval=20,
33 | logging_level=logging.CRITICAL,
34 | grad_clip=10,
35 | lstm_size=400,
36 | output_mixture_components=20,
37 | attention_mixture_components=10
38 | )
39 | self.nn.restore()
40 |
41 | def write(self, filename, lines, biases=None, styles=None, stroke_colors=None, stroke_widths=None):
42 | valid_char_set = set(drawing.alphabet)
43 | for line_num, line in enumerate(lines):
44 | if len(line) > 75:
45 | raise ValueError(
46 | (
47 | "Each line must be at most 75 characters. "
48 | "Line {} contains {}"
49 | ).format(line_num, len(line))
50 | )
51 |
52 | for char in line:
53 | if char not in valid_char_set:
54 | raise ValueError(
55 | (
56 | "Invalid character {} detected in line {}. "
57 | "Valid character set is {}"
58 | ).format(char, line_num, valid_char_set)
59 | )
60 |
61 | strokes = self._sample(lines, biases=biases, styles=styles)
62 | self._draw(strokes, lines, filename, stroke_colors=stroke_colors, stroke_widths=stroke_widths)
63 |
64 | def _sample(self, lines, biases=None, styles=None):
65 | num_samples = len(lines)
66 | max_tsteps = 40*max([len(i) for i in lines])
67 | biases = biases if biases is not None else [0.5]*num_samples
68 |
69 | x_prime = np.zeros([num_samples, 1200, 3])
70 | x_prime_len = np.zeros([num_samples])
71 | chars = np.zeros([num_samples, 120])
72 | chars_len = np.zeros([num_samples])
73 |
74 | if styles is not None:
75 | for i, (cs, style) in enumerate(zip(lines, styles)):
76 | x_p = np.load('styles/style-{}-strokes.npy'.format(style))
77 | c_p = np.load('styles/style-{}-chars.npy'.format(style)).tostring().decode('utf-8')
78 |
79 | c_p = str(c_p) + " " + cs
80 | c_p = drawing.encode_ascii(c_p)
81 | c_p = np.array(c_p)
82 |
83 | x_prime[i, :len(x_p), :] = x_p
84 | x_prime_len[i] = len(x_p)
85 | chars[i, :len(c_p)] = c_p
86 | chars_len[i] = len(c_p)
87 |
88 | else:
89 | for i in range(num_samples):
90 | encoded = drawing.encode_ascii(lines[i])
91 | chars[i, :len(encoded)] = encoded
92 | chars_len[i] = len(encoded)
93 |
94 | [samples] = self.nn.session.run(
95 | [self.nn.sampled_sequence],
96 | feed_dict={
97 | self.nn.prime: styles is not None,
98 | self.nn.x_prime: x_prime,
99 | self.nn.x_prime_len: x_prime_len,
100 | self.nn.num_samples: num_samples,
101 | self.nn.sample_tsteps: max_tsteps,
102 | self.nn.c: chars,
103 | self.nn.c_len: chars_len,
104 | self.nn.bias: biases
105 | }
106 | )
107 | samples = [sample[~np.all(sample == 0.0, axis=1)] for sample in samples]
108 | return samples
109 |
110 | def _draw(self, strokes, lines, filename, stroke_colors=None, stroke_widths=None):
111 | stroke_colors = stroke_colors or ['black']*len(lines)
112 | stroke_widths = stroke_widths or [2]*len(lines)
113 |
114 | line_height = 60
115 | view_width = 1000
116 | view_height = line_height*(len(strokes) + 1)
117 |
118 | dwg = svgwrite.Drawing(filename=filename)
119 | dwg.viewbox(width=view_width, height=view_height)
120 | dwg.add(dwg.rect(insert=(0, 0), size=(view_width, view_height), fill='white'))
121 |
122 | initial_coord = np.array([0, -(3*line_height / 4)])
123 | for offsets, line, color, width in zip(strokes, lines, stroke_colors, stroke_widths):
124 |
125 | if not line:
126 | initial_coord[1] -= line_height
127 | continue
128 |
129 | offsets[:, :2] *= 1.5
130 | strokes = drawing.offsets_to_coords(offsets)
131 | strokes = drawing.denoise(strokes)
132 | strokes[:, :2] = drawing.align(strokes[:, :2])
133 |
134 | strokes[:, 1] *= -1
135 | strokes[:, :2] -= strokes[:, :2].min() + initial_coord
136 | strokes[:, 0] += (view_width - strokes[:, 0].max()) / 2
137 |
138 | prev_eos = 1.0
139 | p = "M{},{} ".format(0, 0)
140 | for x, y, eos in zip(*strokes.T):
141 | p += '{}{},{} '.format('M' if prev_eos == 1.0 else 'L', x, y)
142 | prev_eos = eos
143 | path = svgwrite.path.Path(p)
144 | path = path.stroke(color=color, width=width, linecap='round').fill("none")
145 | dwg.add(path)
146 |
147 | initial_coord[1] -= line_height
148 |
149 | dwg.save()
150 |
151 |
152 | if __name__ == '__main__':
153 | hand = Hand()
154 |
155 | # usage demo
156 | lines = [
157 | "Now this is a story all about how",
158 | "My life got flipped turned upside down",
159 | "And I'd like to take a minute, just sit right there",
160 | "I'll tell you how I became the prince of a town called Bel-Air",
161 | ]
162 | biases = [.75 for i in lines]
163 | styles = [9 for i in lines]
164 | stroke_colors = ['red', 'green', 'black', 'blue']
165 | stroke_widths = [1, 2, 1, 2]
166 |
167 | hand.write(
168 | filename='img/usage_demo.svg',
169 | lines=lines,
170 | biases=biases,
171 | styles=styles,
172 | stroke_colors=stroke_colors,
173 | stroke_widths=stroke_widths
174 | )
175 |
176 | # demo number 1 - fixed bias, fixed style
177 | lines = lyrics.all_star.split("\n")
178 | biases = [.75 for i in lines]
179 | styles = [12 for i in lines]
180 |
181 | hand.write(
182 | filename='img/all_star.svg',
183 | lines=lines,
184 | biases=biases,
185 | styles=styles,
186 | )
187 |
188 | # demo number 2 - fixed bias, varying style
189 | lines = lyrics.downtown.split("\n")
190 | biases = [.75 for i in lines]
191 | styles = np.cumsum(np.array([len(i) for i in lines]) == 0).astype(int)
192 |
193 | hand.write(
194 | filename='img/downtown.svg',
195 | lines=lines,
196 | biases=biases,
197 | styles=styles,
198 | )
199 |
200 | # demo number 3 - varying bias, fixed style
201 | lines = lyrics.give_up.split("\n")
202 | biases = .2*np.flip(np.cumsum([len(i) == 0 for i in lines]), 0)
203 | styles = [7 for i in lines]
204 |
205 | hand.write(
206 | filename='img/give_up.svg',
207 | lines=lines,
208 | biases=biases,
209 | styles=styles,
210 | )
211 |
--------------------------------------------------------------------------------
/handwriting-synthesis/drawing.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 | from collections import defaultdict
3 |
4 | import matplotlib.pyplot as plt
5 | import numpy as np
6 | from scipy.signal import savgol_filter
7 | from scipy.interpolate import interp1d
8 |
9 |
10 | alphabet = [
11 | '\x00', ' ', '!', '"', '#', "'", '(', ')', ',', '-', '.',
12 | '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';',
13 | '?', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K',
14 | 'L', 'M', 'N', 'O', 'P', 'R', 'S', 'T', 'U', 'V', 'W', 'Y',
15 | 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l',
16 | 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x',
17 | 'y', 'z'
18 | ]
19 | alphabet_ord = list(map(ord, alphabet))
20 | alpha_to_num = defaultdict(int, list(map(reversed, enumerate(alphabet))))
21 | num_to_alpha = dict(enumerate(alphabet_ord))
22 |
23 | MAX_STROKE_LEN = 1200
24 | MAX_CHAR_LEN = 75
25 |
26 |
27 | def align(coords):
28 | """
29 | corrects for global slant/offset in handwriting strokes
30 | """
31 | coords = np.copy(coords)
32 | X, Y = coords[:, 0].reshape(-1, 1), coords[:, 1].reshape(-1, 1)
33 | X = np.concatenate([np.ones([X.shape[0], 1]), X], axis=1)
34 | offset, slope = np.linalg.inv(X.T.dot(X)).dot(X.T).dot(Y).squeeze()
35 | theta = np.arctan(slope)
36 | rotation_matrix = np.array(
37 | [[np.cos(theta), -np.sin(theta)],
38 | [np.sin(theta), np.cos(theta)]]
39 | )
40 | coords[:, :2] = np.dot(coords[:, :2], rotation_matrix) - offset
41 | return coords
42 |
43 |
44 | def skew(coords, degrees):
45 | """
46 | skews strokes by given degrees
47 | """
48 | coords = np.copy(coords)
49 | theta = degrees * np.pi/180
50 | A = np.array([[np.cos(-theta), 0], [np.sin(-theta), 1]])
51 | coords[:, :2] = np.dot(coords[:, :2], A)
52 | return coords
53 |
54 |
55 | def stretch(coords, x_factor, y_factor):
56 | """
57 | stretches strokes along x and y axis
58 | """
59 | coords = np.copy(coords)
60 | coords[:, :2] *= np.array([x_factor, y_factor])
61 | return coords
62 |
63 |
64 | def add_noise(coords, scale):
65 | """
66 | adds gaussian noise to strokes
67 | """
68 | coords = np.copy(coords)
69 | coords[1:, :2] += np.random.normal(loc=0.0, scale=scale, size=coords[1:, :2].shape)
70 | return coords
71 |
72 |
73 | def encode_ascii(ascii_string):
74 | """
75 | encodes ascii string to array of ints
76 | """
77 | return np.array(list(map(lambda x: alpha_to_num[x], ascii_string)) + [0])
78 |
79 |
80 | def denoise(coords):
81 | """
82 | smoothing filter to mitigate some artifacts of the data collection
83 | """
84 | coords = np.split(coords, np.where(coords[:, 2] == 1)[0] + 1, axis=0)
85 | new_coords = []
86 | for stroke in coords:
87 | if len(stroke) != 0:
88 | x_new = savgol_filter(stroke[:, 0], 7, 3, mode='nearest')
89 | y_new = savgol_filter(stroke[:, 1], 7, 3, mode='nearest')
90 | xy_coords = np.hstack([x_new.reshape(-1, 1), y_new.reshape(-1, 1)])
91 | stroke = np.concatenate([xy_coords, stroke[:, 2].reshape(-1, 1)], axis=1)
92 | new_coords.append(stroke)
93 |
94 | coords = np.vstack(new_coords)
95 | return coords
96 |
97 |
98 | def interpolate(coords, factor=2):
99 | """
100 | interpolates strokes using cubic spline
101 | """
102 | coords = np.split(coords, np.where(coords[:, 2] == 1)[0] + 1, axis=0)
103 | new_coords = []
104 | for stroke in coords:
105 |
106 | if len(stroke) == 0:
107 | continue
108 |
109 | xy_coords = stroke[:, :2]
110 |
111 | if len(stroke) > 3:
112 | f_x = interp1d(np.arange(len(stroke)), stroke[:, 0], kind='cubic')
113 | f_y = interp1d(np.arange(len(stroke)), stroke[:, 1], kind='cubic')
114 |
115 | xx = np.linspace(0, len(stroke) - 1, factor*(len(stroke)))
116 | yy = np.linspace(0, len(stroke) - 1, factor*(len(stroke)))
117 |
118 | x_new = f_x(xx)
119 | y_new = f_y(yy)
120 |
121 | xy_coords = np.hstack([x_new.reshape(-1, 1), y_new.reshape(-1, 1)])
122 |
123 | stroke_eos = np.zeros([len(xy_coords), 1])
124 | stroke_eos[-1] = 1.0
125 | stroke = np.concatenate([xy_coords, stroke_eos], axis=1)
126 | new_coords.append(stroke)
127 |
128 | coords = np.vstack(new_coords)
129 | return coords
130 |
131 |
132 | def normalize(offsets):
133 | """
134 | normalizes strokes to median unit norm
135 | """
136 | offsets = np.copy(offsets)
137 | offsets[:, :2] /= np.median(np.linalg.norm(offsets[:, :2], axis=1))
138 | return offsets
139 |
140 |
141 | def coords_to_offsets(coords):
142 | """
143 | convert from coordinates to offsets
144 | """
145 | offsets = np.concatenate([coords[1:, :2] - coords[:-1, :2], coords[1:, 2:3]], axis=1)
146 | offsets = np.concatenate([np.array([[0, 0, 1]]), offsets], axis=0)
147 | return offsets
148 |
149 |
150 | def offsets_to_coords(offsets):
151 | """
152 | convert from offsets to coordinates
153 | """
154 | return np.concatenate([np.cumsum(offsets[:, :2], axis=0), offsets[:, 2:3]], axis=1)
155 |
156 |
157 | def draw(
158 | offsets,
159 | ascii_seq=None,
160 | align_strokes=True,
161 | denoise_strokes=True,
162 | interpolation_factor=None,
163 | save_file=None
164 | ):
165 | strokes = offsets_to_coords(offsets)
166 |
167 | if denoise_strokes:
168 | strokes = denoise(strokes)
169 |
170 | if interpolation_factor is not None:
171 | strokes = interpolate(strokes, factor=interpolation_factor)
172 |
173 | if align_strokes:
174 | strokes[:, :2] = align(strokes[:, :2])
175 |
176 | fig, ax = plt.subplots(figsize=(12, 3))
177 |
178 | stroke = []
179 | for x, y, eos in strokes:
180 | stroke.append((x, y))
181 | if eos == 1:
182 | coords = zip(*stroke)
183 | ax.plot(coords[0], coords[1], 'k')
184 | stroke = []
185 | if stroke:
186 | coords = zip(*stroke)
187 | ax.plot(coords[0], coords[1], 'k')
188 | stroke = []
189 |
190 | ax.set_xlim(-50, 600)
191 | ax.set_ylim(-40, 40)
192 |
193 | ax.set_aspect('equal')
194 | plt.tick_params(
195 | axis='both',
196 | left='off',
197 | top='off',
198 | right='off',
199 | bottom='off',
200 | labelleft='off',
201 | labeltop='off',
202 | labelright='off',
203 | labelbottom='off'
204 | )
205 |
206 | if ascii_seq is not None:
207 | if not isinstance(ascii_seq, str):
208 | ascii_seq = ''.join(list(map(chr, ascii_seq)))
209 | plt.title(ascii_seq)
210 |
211 | if save_file is not None:
212 | plt.savefig(save_file)
213 | print('saved to {}'.format(save_file))
214 | else:
215 | plt.show()
216 | plt.close('all')
217 |
--------------------------------------------------------------------------------
/handwriting-synthesis/lyrics.py:
--------------------------------------------------------------------------------
1 | """lyrics taken from https://www.azlyrics.com/"""
2 |
3 | all_star = """Somebody once told me the world is gonna roll me
4 | I ain't the sharpest tool in the shed
5 | She was looking kind of dumb with her finger and her thumb
6 | In the shape of an "L" on her forehead
7 |
8 | Well, the years start coming and they don't stop coming
9 | Fed to the rules and I hit the ground running
10 | Didn't make sense not to live for fun
11 | Your brain gets smart but your head gets dumb
12 |
13 | So much to do, so much to see
14 | So what's wrong with taking the back streets?
15 | You'll never know if you don't go
16 | You'll never shine if you don't glow
17 |
18 | Hey, now, you're an All Star, get your game on, go play
19 | Hey, now, you're a Rock Star, get the show on, get paid
20 | And all that glitters is gold
21 | Only shooting stars break the mold
22 |
23 | It's a cool place and they say it gets colder
24 | You're bundled up now wait 'til you get older
25 | But the meteor men beg to differ
26 | Judging by the hole in the satellite picture
27 |
28 | The ice we skate is getting pretty thin
29 | The water's getting warm so you might as well swim
30 | My world's on fire. How about yours?
31 | That's the way I like it and I'll never get bored.
32 |
33 | Somebody once asked could I spare some change for gas
34 | I need to get myself away from this place
35 | I said yep, what a concept
36 | I could use a little fuel myself
37 | And we could all use a little change
38 |
39 | Well, the years start coming and they don't stop coming
40 | Fed to the rules and I hit the ground running
41 | Didn't make sense not to live for fun
42 | Your brain gets smart but your head gets dumb
43 |
44 | So much to do, so much to see
45 | So what's wrong with taking the back streets?
46 | You'll never know if you don't go
47 | You'll never shine if you don't glow.
48 |
49 | And all that glitters is gold
50 | Only shooting stars break the mold"""
51 |
52 | downtown = """Making my way downtown
53 | Walking fast
54 | Faces pass
55 | And I'm home-bound
56 |
57 | Staring blankly ahead
58 | Just making my way
59 | Making a way
60 | Through the crowd
61 |
62 | And I need you
63 | And I miss you
64 | And now I wonder
65 |
66 | If I could fall into the sky
67 | Do you think time would pass me by?
68 | 'Cause you know I'd walk a thousand miles
69 | If I could just see you tonight
70 |
71 | It's always times like these
72 | When I think of you
73 | And I wonder if you ever think of me
74 | 'Cause everything's so wrong
75 | And I don't belong
76 | Living in your precious memory
77 |
78 | 'Cause I need you
79 | And I miss you
80 | And now I wonder
81 |
82 | If I could fall into the sky
83 | Do you think time would pass me by?
84 | 'Cause you know I'd walk a thousand miles
85 | If I could just see you tonight
86 |
87 | And I, I don't wanna let you know
88 | I, I drown in your memory
89 | I, I don't wanna let this go
90 | I, I don't
91 |
92 | Making my way downtown
93 | Walking fast
94 | Faces pass
95 | And I'm home-bound
96 |
97 | Staring blankly ahead
98 | Just making my way
99 | Making a way
100 | Through the crowd
101 |
102 | And I still need you
103 | And I still miss you
104 | And now I wonder
105 |
106 | If I could fall into the sky
107 | Do you think time would pass us by?
108 | 'Cause you know I'd walk a thousand miles
109 | If I could just see you
110 |
111 | If I could fall into the sky
112 | Do you think time would pass me by?
113 | 'Cause you know I'd walk a thousand miles
114 | If I could just see you
115 | If I could just hold you tonight"""
116 |
117 | give_up = """We're no strangers to love
118 | You know the rules and so do I
119 | A full commitment's what I'm thinking of
120 | You wouldn't get this from any other guy
121 |
122 | I just wanna tell you how I'm feeling
123 | Gotta make you understand
124 |
125 | Never gonna give you up
126 | Never gonna let you down
127 | Never gonna run around and desert you
128 | Never gonna make you cry
129 | Never gonna say goodbye
130 | Never gonna tell a lie and hurt you
131 |
132 | We've known each other for so long
133 | Your heart's been aching, but
134 | You're too shy to say it
135 | Inside, we both know what's been going on
136 | We know the game and we're gonna play it
137 |
138 | And if you ask me how I'm feeling
139 | Don't tell me you're too blind to see
140 |
141 | Never gonna give you up
142 | Never gonna let you down
143 | Never gonna run around and desert you
144 | Never gonna make you cry
145 | Never gonna say goodbye
146 | Never gonna tell a lie and hurt you
147 |
148 | Never gonna give you up
149 | Never gonna let you down
150 | Never gonna run around and desert you
151 | Never gonna make you cry
152 | Never gonna say goodbye
153 | Never gonna tell a lie and hurt you
154 |
155 | (Ooh, give you up)
156 | (Ooh, give you up)
157 | Never gonna give, never gonna give
158 | (Give you up)
159 | Never gonna give, never gonna give
160 | (Give you up)
161 |
162 | We've known each other for so long
163 | Your heart's been aching, but
164 | You're too shy to say it
165 | Inside, we both know what's been going on
166 | We know the game and we're gonna play it
167 |
168 | I just wanna tell you how I'm feeling
169 | Gotta make you understand
170 |
171 | Never gonna give you up
172 | Never gonna let you down
173 | Never gonna run around and desert you
174 | Never gonna make you cry
175 | Never gonna say goodbye
176 | Never gonna tell a lie and hurt you
177 |
178 | Never gonna give you up
179 | Never gonna let you down
180 | Never gonna run around and desert you
181 | Never gonna make you cry
182 | Never gonna say goodbye
183 | Never gonna tell a lie and hurt you
184 |
185 | Never gonna give you up
186 | Never gonna let you down
187 | Never gonna run around and desert you
188 | Never gonna make you cry
189 | Never gonna say goodbye
190 | Never gonna tell a lie and hurt you"""
191 |
--------------------------------------------------------------------------------
/handwriting-synthesis/prepare_data.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 | import os
3 | from xml.etree import ElementTree
4 |
5 | import numpy as np
6 |
7 | import drawing
8 |
9 |
10 | def get_stroke_sequence(filename):
11 | tree = ElementTree.parse(filename).getroot()
12 | strokes = [i for i in tree if i.tag == 'StrokeSet'][0]
13 |
14 | coords = []
15 | for stroke in strokes:
16 | for i, point in enumerate(stroke):
17 | coords.append([
18 | int(point.attrib['x']),
19 | -1*int(point.attrib['y']),
20 | int(i == len(stroke) - 1)
21 | ])
22 | coords = np.array(coords)
23 |
24 | coords = drawing.align(coords)
25 | coords = drawing.denoise(coords)
26 | offsets = drawing.coords_to_offsets(coords)
27 | offsets = offsets[:drawing.MAX_STROKE_LEN]
28 | offsets = drawing.normalize(offsets)
29 | return offsets
30 |
31 |
32 | def get_ascii_sequences(filename):
33 | sequences = open(filename, 'r').read()
34 | sequences = sequences.replace(r'%%%%%%%%%%%', '\n')
35 | sequences = [i.strip() for i in sequences.split('\n')]
36 | lines = sequences[sequences.index('CSR:') + 2:]
37 | lines = [line.strip() for line in lines if line.strip()]
38 | lines = [drawing.encode_ascii(line)[:drawing.MAX_CHAR_LEN] for line in lines]
39 | return lines
40 |
41 |
42 | def collect_data():
43 | fnames = []
44 | for dirpath, dirnames, filenames in os.walk('data/raw/ascii/'):
45 | if dirnames:
46 | continue
47 | for filename in filenames:
48 | if filename.startswith('.'):
49 | continue
50 | fnames.append(os.path.join(dirpath, filename))
51 |
52 | # low quality samples (selected by collecting samples to
53 | # which the trained model assigned very low likelihood)
54 | blacklist = set(np.load('data/blacklist.npy'))
55 |
56 | stroke_fnames, transcriptions, writer_ids = [], [], []
57 | for i, fname in enumerate(fnames):
58 | print(i, fname)
59 | if fname == 'data/raw/ascii/z01/z01-000/z01-000z.txt':
60 | continue
61 |
62 | head, tail = os.path.split(fname)
63 | last_letter = os.path.splitext(fname)[0][-1]
64 | last_letter = last_letter if last_letter.isalpha() else ''
65 |
66 | line_stroke_dir = head.replace('ascii', 'lineStrokes')
67 | line_stroke_fname_prefix = os.path.split(head)[-1] + last_letter + '-'
68 |
69 | if not os.path.isdir(line_stroke_dir):
70 | continue
71 | line_stroke_fnames = sorted([f for f in os.listdir(line_stroke_dir)
72 | if f.startswith(line_stroke_fname_prefix)])
73 | if not line_stroke_fnames:
74 | continue
75 |
76 | original_dir = head.replace('ascii', 'original')
77 | original_xml = os.path.join(original_dir, 'strokes' + last_letter + '.xml')
78 | tree = ElementTree.parse(original_xml)
79 | root = tree.getroot()
80 |
81 | general = root.find('General')
82 | if general is not None:
83 | writer_id = int(general[0].attrib.get('writerID', '0'))
84 | else:
85 | writer_id = int('0')
86 |
87 | ascii_sequences = get_ascii_sequences(fname)
88 | assert len(ascii_sequences) == len(line_stroke_fnames)
89 |
90 | for ascii_seq, line_stroke_fname in zip(ascii_sequences, line_stroke_fnames):
91 | if line_stroke_fname in blacklist:
92 | continue
93 |
94 | stroke_fnames.append(os.path.join(line_stroke_dir, line_stroke_fname))
95 | transcriptions.append(ascii_seq)
96 | writer_ids.append(writer_id)
97 |
98 | return stroke_fnames, transcriptions, writer_ids
99 |
100 |
101 | if __name__ == '__main__':
102 | print('traversing data directory...')
103 | stroke_fnames, transcriptions, writer_ids = collect_data()
104 |
105 | print('dumping to numpy arrays...')
106 | x = np.zeros([len(stroke_fnames), drawing.MAX_STROKE_LEN, 3], dtype=np.float32)
107 | x_len = np.zeros([len(stroke_fnames)], dtype=np.int16)
108 | c = np.zeros([len(stroke_fnames), drawing.MAX_CHAR_LEN], dtype=np.int8)
109 | c_len = np.zeros([len(stroke_fnames)], dtype=np.int8)
110 | w_id = np.zeros([len(stroke_fnames)], dtype=np.int16)
111 | valid_mask = np.zeros([len(stroke_fnames)], dtype=np.bool)
112 |
113 | for i, (stroke_fname, c_i, w_id_i) in enumerate(zip(stroke_fnames, transcriptions, writer_ids)):
114 | if i % 200 == 0:
115 | print(i, '\t', '/', len(stroke_fnames))
116 | x_i = get_stroke_sequence(stroke_fname)
117 | valid_mask[i] = ~np.any(np.linalg.norm(x_i[:, :2], axis=1) > 60)
118 |
119 | x[i, :len(x_i), :] = x_i
120 | x_len[i] = len(x_i)
121 |
122 | c[i, :len(c_i)] = c_i
123 | c_len[i] = len(c_i)
124 |
125 | w_id[i] = w_id_i
126 |
127 | if not os.path.isdir('data/processed'):
128 | os.makedirs('data/processed')
129 |
130 | np.save('data/processed/x.npy', x[valid_mask])
131 | np.save('data/processed/x_len.npy', x_len[valid_mask])
132 | np.save('data/processed/c.npy', c[valid_mask])
133 | np.save('data/processed/c_len.npy', c_len[valid_mask])
134 | np.save('data/processed/w_id.npy', w_id[valid_mask])
135 |
--------------------------------------------------------------------------------
/handwriting-synthesis/readme.md:
--------------------------------------------------------------------------------
1 | 
2 | # Handwriting Synthesis
3 | Implementation of the handwriting synthesis experiments in the paper Generating Sequences with Recurrent Neural Networks by Alex Graves. The implementation closely follows the original paper, with a few slight deviations, and the generated samples are of similar quality to those presented in the paper.
4 |
5 | Web demo is available here.
6 |
7 | ## Usage
8 | ```python
9 | lines = [
10 | "Now this is a story all about how",
11 | "My life got flipped turned upside down",
12 | "And I'd like to take a minute, just sit right there",
13 | "I'll tell you how I became the prince of a town called Bel-Air",
14 | ]
15 | biases = [.75 for i in lines]
16 | styles = [9 for i in lines]
17 | stroke_colors = ['red', 'green', 'black', 'blue']
18 | stroke_widths = [1, 2, 1, 2]
19 |
20 | hand = Hand()
21 | hand.write(
22 | filename='img/usage_demo.svg',
23 | lines=lines,
24 | biases=biases,
25 | styles=styles,
26 | stroke_colors=stroke_colors,
27 | stroke_widths=stroke_widths
28 | )
29 | ```
30 | 
31 |
32 | Currently, the `Hand` class must be imported from `demo.py`. If someone would like to package this project to make it more usable, please [contribute](#contribute).
33 |
34 | A pretrained model is included, but if you'd like to train your own, read these instructions.
35 |
36 | ## Demonstrations
37 | Below are a few hundred samples from the model, including some samples demonstrating the effect of priming and biasing the model. Loosely speaking, biasing controls the neatness of the samples and priming controls the style of the samples. The code for these demonstrations can be found in `demo.py`.
38 |
39 | ### Demo #1:
40 | The following samples were generated with a fixed style and fixed bias.
41 |
42 | **Smash Mouth – All Star (lyrics)**
43 | 
44 |
45 | ### Demo #2
46 | The following samples were generated with varying style and fixed bias. Each verse is generated in a different style.
47 |
48 | **Vanessa Carlton – A Thousand Miles (lyrics)**
49 | 
50 |
51 | ### Demo #3
52 | The following samples were generated with a fixed style and varying bias. Each verse has a lower bias than the previous, with the last verse being unbiased.
53 |
54 | **Leonard Cohen – Hallelujah (lyrics)**
55 | 
56 |
57 | ## Contribute
58 | This project was intended to serve as a reference implementation for a research paper, but since the results are of decent quality, it may be worthwile to make the project more broadly usable. I plan to continue focusing on the machine learning side of things. That said, I'd welcome contributors who can:
59 |
60 | - Package this, and otherwise make it look more like a usable software project and less like research code.
61 | - Add support for more sophisticated drawing, animations, or anything else in this direction. Currently, the project only creates some simple svg files.
62 |
--------------------------------------------------------------------------------
/handwriting-synthesis/requirements.txt:
--------------------------------------------------------------------------------
1 | matplotlib>=2.1.0
2 | pandas>= 0.22.0
3 | scikit-learn>=0.19.1
4 | scipy>=1.0.0
5 | svgwrite>=1.1.12
6 | tensorflow==2.11.1
7 |
--------------------------------------------------------------------------------
/handwriting-synthesis/rnn_cell.py:
--------------------------------------------------------------------------------
1 | from collections import namedtuple
2 |
3 | import tensorflow as tf
4 | import tensorflow.contrib.distributions as tfd
5 | import numpy as np
6 |
7 | from tf_utils import dense_layer, shape
8 |
9 |
10 | LSTMAttentionCellState = namedtuple(
11 | 'LSTMAttentionCellState',
12 | ['h1', 'c1', 'h2', 'c2', 'h3', 'c3', 'alpha', 'beta', 'kappa', 'w', 'phi']
13 | )
14 |
15 |
16 | class LSTMAttentionCell(tf.nn.rnn_cell.RNNCell):
17 |
18 | def __init__(
19 | self,
20 | lstm_size,
21 | num_attn_mixture_components,
22 | attention_values,
23 | attention_values_lengths,
24 | num_output_mixture_components,
25 | bias,
26 | reuse=None,
27 | ):
28 | self.reuse = reuse
29 | self.lstm_size = lstm_size
30 | self.num_attn_mixture_components = num_attn_mixture_components
31 | self.attention_values = attention_values
32 | self.attention_values_lengths = attention_values_lengths
33 | self.window_size = shape(self.attention_values, 2)
34 | self.char_len = tf.shape(attention_values)[1]
35 | self.batch_size = tf.shape(attention_values)[0]
36 | self.num_output_mixture_components = num_output_mixture_components
37 | self.output_units = 6*self.num_output_mixture_components + 1
38 | self.bias = bias
39 |
40 | @property
41 | def state_size(self):
42 | return LSTMAttentionCellState(
43 | self.lstm_size,
44 | self.lstm_size,
45 | self.lstm_size,
46 | self.lstm_size,
47 | self.lstm_size,
48 | self.lstm_size,
49 | self.num_attn_mixture_components,
50 | self.num_attn_mixture_components,
51 | self.num_attn_mixture_components,
52 | self.window_size,
53 | self.char_len,
54 | )
55 |
56 | @property
57 | def output_size(self):
58 | return self.lstm_size
59 |
60 | def zero_state(self, batch_size, dtype):
61 | return LSTMAttentionCellState(
62 | tf.zeros([batch_size, self.lstm_size]),
63 | tf.zeros([batch_size, self.lstm_size]),
64 | tf.zeros([batch_size, self.lstm_size]),
65 | tf.zeros([batch_size, self.lstm_size]),
66 | tf.zeros([batch_size, self.lstm_size]),
67 | tf.zeros([batch_size, self.lstm_size]),
68 | tf.zeros([batch_size, self.num_attn_mixture_components]),
69 | tf.zeros([batch_size, self.num_attn_mixture_components]),
70 | tf.zeros([batch_size, self.num_attn_mixture_components]),
71 | tf.zeros([batch_size, self.window_size]),
72 | tf.zeros([batch_size, self.char_len]),
73 | )
74 |
75 | def __call__(self, inputs, state, scope=None):
76 | with tf.variable_scope(scope or type(self).__name__, reuse=tf.AUTO_REUSE):
77 |
78 | # lstm 1
79 | s1_in = tf.concat([state.w, inputs], axis=1)
80 | cell1 = tf.contrib.rnn.LSTMCell(self.lstm_size)
81 | s1_out, s1_state = cell1(s1_in, state=(state.c1, state.h1))
82 |
83 | # attention
84 | attention_inputs = tf.concat([state.w, inputs, s1_out], axis=1)
85 | attention_params = dense_layer(attention_inputs, 3*self.num_attn_mixture_components, scope='attention')
86 | alpha, beta, kappa = tf.split(tf.nn.softplus(attention_params), 3, axis=1)
87 | kappa = state.kappa + kappa / 25.0
88 | beta = tf.clip_by_value(beta, .01, np.inf)
89 |
90 | kappa_flat, alpha_flat, beta_flat = kappa, alpha, beta
91 | kappa, alpha, beta = tf.expand_dims(kappa, 2), tf.expand_dims(alpha, 2), tf.expand_dims(beta, 2)
92 |
93 | enum = tf.reshape(tf.range(self.char_len), (1, 1, self.char_len))
94 | u = tf.cast(tf.tile(enum, (self.batch_size, self.num_attn_mixture_components, 1)), tf.float32)
95 | phi_flat = tf.reduce_sum(alpha*tf.exp(-tf.square(kappa - u) / beta), axis=1)
96 |
97 | phi = tf.expand_dims(phi_flat, 2)
98 | sequence_mask = tf.cast(tf.sequence_mask(self.attention_values_lengths, maxlen=self.char_len), tf.float32)
99 | sequence_mask = tf.expand_dims(sequence_mask, 2)
100 | w = tf.reduce_sum(phi*self.attention_values*sequence_mask, axis=1)
101 |
102 | # lstm 2
103 | s2_in = tf.concat([inputs, s1_out, w], axis=1)
104 | cell2 = tf.contrib.rnn.LSTMCell(self.lstm_size)
105 | s2_out, s2_state = cell2(s2_in, state=(state.c2, state.h2))
106 |
107 | # lstm 3
108 | s3_in = tf.concat([inputs, s2_out, w], axis=1)
109 | cell3 = tf.contrib.rnn.LSTMCell(self.lstm_size)
110 | s3_out, s3_state = cell3(s3_in, state=(state.c3, state.h3))
111 |
112 | new_state = LSTMAttentionCellState(
113 | s1_state.h,
114 | s1_state.c,
115 | s2_state.h,
116 | s2_state.c,
117 | s3_state.h,
118 | s3_state.c,
119 | alpha_flat,
120 | beta_flat,
121 | kappa_flat,
122 | w,
123 | phi_flat,
124 | )
125 |
126 | return s3_out, new_state
127 |
128 | def output_function(self, state):
129 | params = dense_layer(state.h3, self.output_units, scope='gmm', reuse=tf.AUTO_REUSE)
130 | pis, mus, sigmas, rhos, es = self._parse_parameters(params)
131 | mu1, mu2 = tf.split(mus, 2, axis=1)
132 | mus = tf.stack([mu1, mu2], axis=2)
133 | sigma1, sigma2 = tf.split(sigmas, 2, axis=1)
134 |
135 | covar_matrix = [tf.square(sigma1), rhos*sigma1*sigma2,
136 | rhos*sigma1*sigma2, tf.square(sigma2)]
137 | covar_matrix = tf.stack(covar_matrix, axis=2)
138 | covar_matrix = tf.reshape(covar_matrix, (self.batch_size, self.num_output_mixture_components, 2, 2))
139 |
140 | mvn = tfd.MultivariateNormalFullCovariance(loc=mus, covariance_matrix=covar_matrix)
141 | b = tfd.Bernoulli(probs=es)
142 | c = tfd.Categorical(probs=pis)
143 |
144 | sampled_e = b.sample()
145 | sampled_coords = mvn.sample()
146 | sampled_idx = c.sample()
147 |
148 | idx = tf.stack([tf.range(self.batch_size), sampled_idx], axis=1)
149 | coords = tf.gather_nd(sampled_coords, idx)
150 | return tf.concat([coords, tf.cast(sampled_e, tf.float32)], axis=1)
151 |
152 | def termination_condition(self, state):
153 | char_idx = tf.cast(tf.argmax(state.phi, axis=1), tf.int32)
154 | final_char = char_idx >= self.attention_values_lengths - 1
155 | past_final_char = char_idx >= self.attention_values_lengths
156 | output = self.output_function(state)
157 | es = tf.cast(output[:, 2], tf.int32)
158 | is_eos = tf.equal(es, np.ones_like(es))
159 | return tf.logical_or(tf.logical_and(final_char, is_eos), past_final_char)
160 |
161 | def _parse_parameters(self, gmm_params, eps=1e-8, sigma_eps=1e-4):
162 | pis, sigmas, rhos, mus, es = tf.split(
163 | gmm_params,
164 | [
165 | 1*self.num_output_mixture_components,
166 | 2*self.num_output_mixture_components,
167 | 1*self.num_output_mixture_components,
168 | 2*self.num_output_mixture_components,
169 | 1
170 | ],
171 | axis=-1
172 | )
173 | pis = pis*(1 + tf.expand_dims(self.bias, 1))
174 | sigmas = sigmas - tf.expand_dims(self.bias, 1)
175 |
176 | pis = tf.nn.softmax(pis, axis=-1)
177 | pis = tf.where(pis < .01, tf.zeros_like(pis), pis)
178 | sigmas = tf.clip_by_value(tf.exp(sigmas), sigma_eps, np.inf)
179 | rhos = tf.clip_by_value(tf.tanh(rhos), eps - 1.0, 1.0 - eps)
180 | es = tf.clip_by_value(tf.nn.sigmoid(es), eps, 1.0 - eps)
181 | es = tf.where(es < .01, tf.zeros_like(es), es)
182 |
183 | return pis, mus, sigmas, rhos, es
184 |
--------------------------------------------------------------------------------
/handwriting-synthesis/styles/style-0-chars.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/handwriting-synthesis/styles/style-0-chars.npy
--------------------------------------------------------------------------------
/handwriting-synthesis/styles/style-0-strokes.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/handwriting-synthesis/styles/style-0-strokes.npy
--------------------------------------------------------------------------------
/handwriting-synthesis/styles/style-1-chars.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/handwriting-synthesis/styles/style-1-chars.npy
--------------------------------------------------------------------------------
/handwriting-synthesis/styles/style-1-strokes.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/handwriting-synthesis/styles/style-1-strokes.npy
--------------------------------------------------------------------------------
/handwriting-synthesis/styles/style-1.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/handwriting-synthesis/styles/style-1.npy
--------------------------------------------------------------------------------
/handwriting-synthesis/styles/style-10-chars.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/handwriting-synthesis/styles/style-10-chars.npy
--------------------------------------------------------------------------------
/handwriting-synthesis/styles/style-10-strokes.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/handwriting-synthesis/styles/style-10-strokes.npy
--------------------------------------------------------------------------------
/handwriting-synthesis/styles/style-11-chars.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/handwriting-synthesis/styles/style-11-chars.npy
--------------------------------------------------------------------------------
/handwriting-synthesis/styles/style-11-strokes.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/handwriting-synthesis/styles/style-11-strokes.npy
--------------------------------------------------------------------------------
/handwriting-synthesis/styles/style-12-chars.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/handwriting-synthesis/styles/style-12-chars.npy
--------------------------------------------------------------------------------
/handwriting-synthesis/styles/style-12-strokes.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/handwriting-synthesis/styles/style-12-strokes.npy
--------------------------------------------------------------------------------
/handwriting-synthesis/styles/style-2-chars.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/handwriting-synthesis/styles/style-2-chars.npy
--------------------------------------------------------------------------------
/handwriting-synthesis/styles/style-2-strokes.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/handwriting-synthesis/styles/style-2-strokes.npy
--------------------------------------------------------------------------------
/handwriting-synthesis/styles/style-2.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/handwriting-synthesis/styles/style-2.npy
--------------------------------------------------------------------------------
/handwriting-synthesis/styles/style-3-chars.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/handwriting-synthesis/styles/style-3-chars.npy
--------------------------------------------------------------------------------
/handwriting-synthesis/styles/style-3-strokes.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/handwriting-synthesis/styles/style-3-strokes.npy
--------------------------------------------------------------------------------
/handwriting-synthesis/styles/style-4-chars.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/handwriting-synthesis/styles/style-4-chars.npy
--------------------------------------------------------------------------------
/handwriting-synthesis/styles/style-4-strokes.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/handwriting-synthesis/styles/style-4-strokes.npy
--------------------------------------------------------------------------------
/handwriting-synthesis/styles/style-5-chars.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/handwriting-synthesis/styles/style-5-chars.npy
--------------------------------------------------------------------------------
/handwriting-synthesis/styles/style-5-strokes.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/handwriting-synthesis/styles/style-5-strokes.npy
--------------------------------------------------------------------------------
/handwriting-synthesis/styles/style-6-chars.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/handwriting-synthesis/styles/style-6-chars.npy
--------------------------------------------------------------------------------
/handwriting-synthesis/styles/style-6-strokes.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/handwriting-synthesis/styles/style-6-strokes.npy
--------------------------------------------------------------------------------
/handwriting-synthesis/styles/style-7-chars.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/handwriting-synthesis/styles/style-7-chars.npy
--------------------------------------------------------------------------------
/handwriting-synthesis/styles/style-7-strokes.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/handwriting-synthesis/styles/style-7-strokes.npy
--------------------------------------------------------------------------------
/handwriting-synthesis/styles/style-8-chars.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/handwriting-synthesis/styles/style-8-chars.npy
--------------------------------------------------------------------------------
/handwriting-synthesis/styles/style-8-strokes.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/handwriting-synthesis/styles/style-8-strokes.npy
--------------------------------------------------------------------------------
/handwriting-synthesis/styles/style-9-chars.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/handwriting-synthesis/styles/style-9-chars.npy
--------------------------------------------------------------------------------
/handwriting-synthesis/styles/style-9-strokes.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/handwriting-synthesis/styles/style-9-strokes.npy
--------------------------------------------------------------------------------
/handwriting-synthesis/test_example.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 | import os
3 |
4 | import numpy as np
5 |
6 | import drawing
7 | from data_frame import DataFrame
8 | from drawing import alphabet
9 |
10 | import svgwrite
11 |
12 |
13 | class DataReader(object):
14 |
15 | def __init__(self, data_dir):
16 | data_cols = ['x', 'x_len', 'c', 'c_len']
17 | data = [np.load(os.path.join(data_dir, '{}.npy'.format(i))) for i in data_cols]
18 |
19 | self.test_df = DataFrame(columns=data_cols, data=data)
20 | self.train_df, self.val_df = self.test_df.train_test_split(train_size=0.95, random_state=2018)
21 |
22 | print('train size', len(self.train_df))
23 | print('val size', len(self.val_df))
24 | print('test size', len(self.test_df))
25 |
26 | def train_batch_generator(self, batch_size):
27 | return self.batch_generator(
28 | batch_size=batch_size,
29 | df=self.train_df,
30 | shuffle=True,
31 | num_epochs=10000,
32 | mode='train'
33 | )
34 |
35 | def val_batch_generator(self, batch_size):
36 | return self.batch_generator(
37 | batch_size=batch_size,
38 | df=self.val_df,
39 | shuffle=True,
40 | num_epochs=10000,
41 | mode='val'
42 | )
43 |
44 | def test_batch_generator(self, batch_size):
45 | return self.batch_generator(
46 | batch_size=batch_size,
47 | df=self.test_df,
48 | shuffle=False,
49 | num_epochs=1,
50 | mode='test'
51 | )
52 |
53 | def batch_generator(self, batch_size, df, shuffle=True, num_epochs=10000, mode='train'):
54 | gen = df.batch_generator(
55 | batch_size=batch_size,
56 | shuffle=shuffle,
57 | num_epochs=num_epochs,
58 | allow_smaller_final_batch=(mode == 'test')
59 | )
60 | for batch in gen:
61 | batch['x_len'] = batch['x_len'] - 1
62 | max_x_len = np.max(batch['x_len'])
63 | max_c_len = np.max(batch['c_len'])
64 | batch['y'] = batch['x'][:, 1:max_x_len + 1, :]
65 | batch['x'] = batch['x'][:, :max_x_len, :]
66 | batch['c'] = batch['c'][:, :max_c_len]
67 | yield batch
68 |
69 |
70 | def _draw(strokes, lines, filename, stroke_colors=None, stroke_widths=None):
71 | stroke_colors = stroke_colors or ['black']*len(lines)
72 | stroke_widths = stroke_widths or [2]*len(lines)
73 |
74 | line_height = 60
75 | view_width = 1000
76 | view_height = line_height*(len(strokes) + 1)
77 |
78 | dwg = svgwrite.Drawing(filename=filename)
79 | dwg.viewbox(width=view_width, height=view_height)
80 | dwg.add(dwg.rect(insert=(0, 0), size=(view_width, view_height), fill='white'))
81 |
82 | initial_coord = np.array([0, -(3*line_height / 4)])
83 | for offsets, line, color, width in zip(strokes, lines, stroke_colors, stroke_widths):
84 |
85 | if not line:
86 | initial_coord[1] -= line_height
87 | continue
88 |
89 | offsets[:, :2] *= 1.5
90 | strokes = drawing.offsets_to_coords(offsets)
91 | strokes = drawing.denoise(strokes)
92 | strokes[:, :2] = drawing.align(strokes[:, :2])
93 |
94 | strokes[:, 1] *= -1
95 | strokes[:, :2] -= strokes[:, :2].min() + initial_coord
96 | strokes[:, 0] += (view_width - strokes[:, 0].max()) / 2
97 |
98 | prev_eos = 1.0
99 | p = "M{},{} ".format(0, 0)
100 | for x, y, eos in zip(*strokes.T):
101 | p += '{}{},{} '.format('M' if prev_eos == 1.0 else 'L', x, y)
102 | prev_eos = eos
103 | path = svgwrite.path.Path(p)
104 | path = path.stroke(color=color, width=width, linecap='round').fill("none")
105 | dwg.add(path)
106 |
107 | initial_coord[1] -= line_height
108 |
109 | dwg.save()
110 |
111 |
112 | def num_to_string(c, c_len):
113 | indices = c[:c_len - 1]
114 | str_out = ''.join([alphabet[x] for x in indices])
115 | return str_out
116 |
117 |
118 | if __name__ == '__main__':
119 | dr = DataReader(data_dir='data/processed/')
120 | # import ipdb; ipdb.set_trace()
121 |
122 | stroke_colors = ['red', 'green', 'black', 'blue']
123 | stroke_widths = [1, 2, 1, 2]
124 |
125 | lines = [
126 | num_to_string(dr.test_df['c'][0], dr.test_df['c_len'][0]),
127 | num_to_string(dr.test_df['c'][1], dr.test_df['c_len'][1]),
128 | ]
129 | strokes = [
130 | dr.test_df['x'][0][:dr.test_df['x_len'][0]],
131 | dr.test_df['x'][1][:dr.test_df['x_len'][1]],
132 | ]
133 |
134 | import ipdb; ipdb.set_trace()
135 |
136 | _draw(strokes, lines, "test.svg", stroke_colors=stroke_colors, stroke_widths=stroke_widths)
137 |
--------------------------------------------------------------------------------
/handwriting-synthesis/tf_utils.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 |
3 |
4 | def dense_layer(inputs, output_units, bias=True, activation=None, batch_norm=None,
5 | dropout=None, scope='dense-layer', reuse=False):
6 | """
7 | Applies a dense layer to a 2D tensor of shape [batch_size, input_units]
8 | to produce a tensor of shape [batch_size, output_units].
9 | Args:
10 | inputs: Tensor of shape [batch size, input_units].
11 | output_units: Number of output units.
12 | activation: activation function.
13 | dropout: dropout keep prob.
14 | Returns:
15 | Tensor of shape [batch size, output_units].
16 | """
17 | with tf.variable_scope(scope, reuse=reuse):
18 | W = tf.get_variable(
19 | name='weights',
20 | initializer=tf.contrib.layers.variance_scaling_initializer(),
21 | shape=[shape(inputs, -1), output_units]
22 | )
23 | z = tf.matmul(inputs, W)
24 | if bias:
25 | b = tf.get_variable(
26 | name='biases',
27 | initializer=tf.constant_initializer(),
28 | shape=[output_units]
29 | )
30 | z = z + b
31 |
32 | if batch_norm is not None:
33 | z = tf.layers.batch_normalization(z, training=batch_norm, reuse=reuse)
34 |
35 | z = activation(z) if activation else z
36 | z = tf.nn.dropout(z, dropout) if dropout is not None else z
37 | return z
38 |
39 |
40 | def time_distributed_dense_layer(
41 | inputs, output_units, bias=True, activation=None, batch_norm=None,
42 | dropout=None, scope='time-distributed-dense-layer', reuse=False):
43 | """
44 | Applies a shared dense layer to each timestep of a tensor of shape
45 | [batch_size, max_seq_len, input_units] to produce a tensor of shape
46 | [batch_size, max_seq_len, output_units].
47 |
48 | Args:
49 | inputs: Tensor of shape [batch size, max sequence length, ...].
50 | output_units: Number of output units.
51 | activation: activation function.
52 | dropout: dropout keep prob.
53 |
54 | Returns:
55 | Tensor of shape [batch size, max sequence length, output_units].
56 | """
57 | with tf.variable_scope(scope, reuse=reuse):
58 | W = tf.get_variable(
59 | name='weights',
60 | initializer=tf.contrib.layers.variance_scaling_initializer(),
61 | shape=[shape(inputs, -1), output_units]
62 | )
63 | z = tf.einsum('ijk,kl->ijl', inputs, W)
64 | if bias:
65 | b = tf.get_variable(
66 | name='biases',
67 | initializer=tf.constant_initializer(),
68 | shape=[output_units]
69 | )
70 | z = z + b
71 |
72 | if batch_norm is not None:
73 | z = tf.layers.batch_normalization(z, training=batch_norm, reuse=reuse)
74 |
75 | z = activation(z) if activation else z
76 | z = tf.nn.dropout(z, dropout) if dropout is not None else z
77 | return z
78 |
79 |
80 | def shape(tensor, dim=None):
81 | """Get tensor shape/dimension as list/int"""
82 | if dim is None:
83 | return tensor.shape.as_list()
84 | else:
85 | return tensor.shape.as_list()[dim]
86 |
87 |
88 | def rank(tensor):
89 | """Get tensor rank as python list"""
90 | return len(tensor.shape.as_list())
91 |
--------------------------------------------------------------------------------
/handwriting-synthesis/upgrade_tf2.sh:
--------------------------------------------------------------------------------
1 | # NOTE: tf_upgrade_v2 does not successfully convert all these files to tf2
2 | tf_upgrade_v2 --infile rnn_cell.py --outfile rnn_cell.py
3 | tf_upgrade_v2 --infile rnn.py --outfile rnn.py
4 | tf_upgrade_v2 --infile rnn_ops.py --outfile rnn_ops.py
5 | tf_upgrade_v2 --infile tf_utils.py --outfile tf_utils.py
6 |
--------------------------------------------------------------------------------
/handwriting_generator/saved.tgz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/handwriting_generator/saved.tgz
--------------------------------------------------------------------------------
/minGPT/.gitignore:
--------------------------------------------------------------------------------
1 | .ipynb_checkpoints/
2 | __pycache__/
3 |
--------------------------------------------------------------------------------
/minGPT/LICENSE:
--------------------------------------------------------------------------------
1 | The MIT License (MIT) Copyright (c) 2020 Andrej Karpathy
2 |
3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
4 |
5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
6 |
7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
8 |
--------------------------------------------------------------------------------
/minGPT/README.md:
--------------------------------------------------------------------------------
1 |
2 | # minGPT
3 |
4 | 
5 |
6 | A PyTorch re-implementation of [GPT](https://github.com/openai/gpt-3) training. minGPT tries to be small, clean, interpretable and educational, as most of the currently available ones are a bit sprawling. GPT is not a complicated model and this implementation is appropriately about 300 lines of code, including boilerplate and a totally unnecessary custom causal self-attention module. Anyway, all that's going on is that a sequence of indices goes into a sequence of transformer blocks, and a probability distribution of the next index comes out. The rest of the complexity is just being clever with batching (both across examples and over sequence length) so that training is efficient.
7 |
8 | The core minGPT "library" (hah) is two files: `mingpt/model.py` contains the actual Transformer model definition and `mingpt/trainer.py` is (GPT-independent) PyTorch boilerplate that trains the model. The attached Jupyter notebooks then show how the "library" (hah) can be used to train sequence models:
9 |
10 | - `play_math.ipynb` trains a GPT focused on addition (inspired by the addition section in the GPT-3 paper)
11 | - `play_char.ipynb` trains a GPT to be a character-level language model on arbitrary text, similar to my older char-rnn but with a transformer instead of an RNN
12 | - `play_image.ipynb` trains a GPT on (small) images (CIFAR-10), showing that we can model images just as text, as both can be reduced to just a sequence of integers
13 | - `play_words.ipynb` a BPE version that does not yet exist
14 |
15 | With a bpe encoder, distributed training and maybe fp16 this implementation may be able to reproduce GPT-1/GPT-2 results, though I haven't tried $$$. GPT-3 is likely out of reach as my understanding is that it does not fit into GPU memory and requires a more careful model-parallel treatment.
16 |
17 | ### Example usage
18 |
19 | This code is simple enough to just hack inline, not "used", but current API looks something like:
20 |
21 | ```python
22 |
23 | # you're on your own to define a class that returns individual examples as PyTorch LongTensors
24 | from torch.utils.data import Dataset
25 | train_dataset = MyDataset(...)
26 | test_dataset = MyDataset(...)
27 |
28 | # construct a GPT model
29 | from mingpt.model import GPT, GPTConfig
30 | mconf = GPTConfig(vocab_size, block_size, n_layer=12, n_head=12, n_embd=768) # a GPT-1
31 | model = GPT(mconf)
32 |
33 | # construct a trainer
34 | from mingpt.trainer import Trainer, TrainerConfig
35 | tconf = TrainerConfig(max_epochs=10, batch_size=256)
36 | trainer = Trainer(model, train_dataset, test_dataset, tconf)
37 | trainer.train()
38 | # (... enjoy the show for a while... )
39 |
40 | # sample from the model (the [None, ...] and [0] are to push/pop a needed dummy batch dimension)
41 | from mingpt.utils import sample
42 | x = torch.tensor([1, 2, 3], dtype=torch.long)[None, ...] # context conditioning
43 | y = sample(model, x, steps=30, temperature=1.0, sample=True, top_k=5)[0]
44 | print(y) # our model filled in the integer sequence with 30 additional likely integers
45 | ```
46 |
47 | ### References
48 |
49 | Code:
50 |
51 | - [openai/gpt-2](https://github.com/openai/gpt-2) has the model but not the training code, and in TensorFlow
52 | - [openai/image-gpt](https://github.com/openai/image-gpt) has some more modern gpt-3 like modification in its code, good reference as well
53 | - huggingface/transformers has a [language-modeling example](https://github.com/huggingface/transformers/tree/master/examples/language-modeling). It is full-featured but as a result also somewhat challenging to trace. E.g. some large functions have as much as 90% unused code behind various branching statements that is unused in the default setting of simple language modeling.
54 |
55 | Papers + some implementation notes:
56 |
57 | #### Improving Language Understanding by Generative Pre-Training (GPT-1)
58 |
59 | - Our model largely follows the original transformer work
60 | - We trained a 12-layer decoder-only transformer with masked self-attention heads (768 dimensional states and 12 attention heads). For the position-wise feed-forward networks, we used 3072 dimensional inner states.
61 | - Adam max learning rate of 2.5e-4. (later GPT-3 for this model size uses 6e-4)
62 | - LR decay: increased linearly from zero over the first 2000 updates and annealed to 0 using a cosine schedule
63 | - We train for 100 epochs on minibatches of 64 randomly sampled, contiguous sequences of 512 tokens.
64 | - Since layernorm is used extensively throughout the model, a simple weight initialization of N(0, 0.02) was sufficient
65 | - bytepair encoding (BPE) vocabulary with 40,000 merges
66 | - residual, embedding, and attention dropouts with a rate of 0.1 for regularization.
67 | - modified version of L2 regularization proposed in (37), with w = 0.01 on all non bias or gain weights
68 | - For the activation function, we used the Gaussian Error Linear Unit (GELU).
69 | - We used learned position embeddings instead of the sinusoidal version proposed in the original work
70 | - For finetuning: We add dropout to the classifier with a rate of 0.1. learning rate of 6.25e-5 and a batchsize of 32. 3 epochs. We use a linear learning rate decay schedule with warmup over 0.2% of training. λ was set to 0.5.
71 | - GPT-1 model is 12 layers and d_model 768, ~117M params
72 |
73 | #### Language Models are Unsupervised Multitask Learners (GPT-2)
74 |
75 | - LayerNorm was moved to the input of each sub-block, similar to a pre-activation residual network
76 | - an additional layer normalization was added after the final self-attention block.
77 | - modified initialization which accounts for the accumulation on the residual path with model depth is used. We scale the weights of residual layers at initialization by a factor of 1/√N where N is the number of residual layers. (weird because in their released code i can only find a simple use of the old 0.02... in their release of image-gpt I found it used for c_proj, and even then only for attn, not for mlp. huh. https://github.com/openai/image-gpt/blob/master/src/model.py)
78 | - the vocabulary is expanded to 50,257
79 | - increase the context size from 512 to 1024 tokens
80 | - larger batchsize of 512 is used
81 | - GPT-2 used 48 layers and d_model 1600 (vs. original 12 layers and d_model 768). ~1.542B params
82 |
83 | #### Language Models are Few-Shot Learners (GPT-3)
84 |
85 | - GPT-3: 96 layers, 96 heads, with d_model of 12,288 (175B parameters).
86 | - GPT-1-like: 12 layers, 12 heads, d_model 768 (125M)
87 | - We use the same model and architecture as GPT-2, including the modified initialization, pre-normalization, and reversible tokenization described therein
88 | - we use alternating dense and locally banded sparse attention patterns in the layers of the transformer, similar to the Sparse Transformer
89 | - we always have the feedforward layer four times the size of the bottleneck layer, dff = 4 ∗ dmodel
90 | - all models use a context window of nctx = 2048 tokens.
91 | - Adam with β1 = 0.9, β2 = 0.95, and eps = 10−8
92 | - All models use weight decay of 0.1 to provide a small amount of regularization. (NOTE: GPT-1 used 0.01 I believe, see above)
93 | - clip the global norm of the gradient at 1.0
94 | - Linear LR warmup over the first 375 million tokens. Then use cosine decay for learning rate down to 10% of its value, over 260 billion tokens.
95 | - gradually increase the batch size linearly from a small value (32k tokens) to the full value over the first 4-12 billion tokens of training, depending on the model size.
96 | - full 2048-sized time context window is always used, with a special END OF DOCUMENT token delimiter
97 |
98 | #### Generative Pretraining from Pixels (Image GPT)
99 |
100 | - When working with images, we pick the identity permutation πi = i for 1 ≤ i ≤ n, also known as raster order.
101 | - we create our own 9-bit color palette by clustering (R, G, B) pixel values using k-means with k = 512.
102 | - Our largest model, iGPT-XL, contains L = 60 layers and uses an embedding size of d = 3072 for a total of 6.8B parameters.
103 | - Our next largest model, iGPT-L, is essentially identical to GPT-2 with L = 48 layers, but contains a slightly smaller embedding size of d = 1536 (vs 1600) for a total of 1.4M parameters.
104 | - We use the same model code as GPT-2, except that we initialize weights in the layerdependent fashion as in Sparse Transformer (Child et al., 2019) and zero-initialize all projections producing logits.
105 | - We also train iGPT-M, a 455M parameter model with L = 36 and d = 1024
106 | - iGPT-S, a 76M parameter model with L = 24 and d = 512 (okay, and how many heads? looks like the Github code claims 8)
107 | - When pre-training iGPT-XL, we use a batch size of 64 and train for 2M iterations, and for all other models we use a batch size of 128 and train for 1M iterations.
108 | - Adam with β1 = 0.9 and β2 = 0.95
109 | - The learning rate is warmed up for one epoch, and then decays to 0
110 | - We did not use weight decay because applying a small weight decay of 0.01 did not change representation quality.
111 | - iGPT-S lr 0.003
112 | - No dropout is used.
113 |
114 | ### License
115 |
116 | MIT
117 |
--------------------------------------------------------------------------------
/minGPT/mingpt.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/minGPT/mingpt.jpg
--------------------------------------------------------------------------------
/minGPT/mingpt/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/minGPT/mingpt/__init__.py
--------------------------------------------------------------------------------
/minGPT/mingpt/trainer.py:
--------------------------------------------------------------------------------
1 | """
2 | Simple training loop; Boilerplate that could apply to any arbitrary neural network,
3 | so nothing in this file really has anything to do with GPT specifically.
4 | """
5 |
6 | import math
7 | import logging
8 |
9 | from tqdm import tqdm
10 | import numpy as np
11 |
12 | import torch
13 | import torch.optim as optim
14 | from torch.optim.lr_scheduler import LambdaLR
15 | from torch.utils.data.dataloader import DataLoader
16 |
17 | logger = logging.getLogger(__name__)
18 |
19 | class TrainerConfig:
20 | # optimization parameters
21 | max_epochs = 10
22 | batch_size = 64
23 | learning_rate = 3e-4
24 | betas = (0.9, 0.95)
25 | grad_norm_clip = 1.0
26 | weight_decay = 0.1 # only applied on matmul weights
27 | # learning rate decay params: linear warmup followed by cosine decay to 10% of original
28 | lr_decay = False
29 | warmup_tokens = 375e6 # these two numbers come from the GPT-3 paper, but may not be good defaults elsewhere
30 | final_tokens = 260e9 # (at what point we reach 10% of original LR)
31 | # checkpoint settings
32 | ckpt_path = None
33 | num_workers = 0 # for DataLoader
34 |
35 | def __init__(self, **kwargs):
36 | for k,v in kwargs.items():
37 | setattr(self, k, v)
38 |
39 | class Trainer:
40 |
41 | def __init__(self, model, train_dataset, test_dataset, config):
42 | self.model = model
43 | self.train_dataset = train_dataset
44 | self.test_dataset = test_dataset
45 | self.config = config
46 |
47 | # take over whatever gpus are on the system
48 | self.device = 'cpu'
49 | if torch.cuda.is_available():
50 | self.device = torch.cuda.current_device()
51 | self.model = torch.nn.DataParallel(self.model).to(self.device)
52 |
53 | def save_checkpoint(self):
54 | # DataParallel wrappers keep raw model object in .module attribute
55 | raw_model = self.model.module if hasattr(self.model, "module") else self.model
56 | logger.info("saving %s", self.config.ckpt_path)
57 | torch.save(raw_model.state_dict(), self.config.ckpt_path)
58 |
59 | def train(self):
60 | model, config = self.model, self.config
61 | raw_model = model.module if hasattr(self.model, "module") else model
62 | optimizer = raw_model.configure_optimizers(config)
63 |
64 | def run_epoch(split):
65 | is_train = split == 'train'
66 | model.train(is_train)
67 | data = self.train_dataset if is_train else self.test_dataset
68 | loader = DataLoader(data, shuffle=True, pin_memory=True,
69 | batch_size=config.batch_size,
70 | num_workers=config.num_workers)
71 |
72 | losses = []
73 | pbar = tqdm(enumerate(loader), total=len(loader)) if is_train else enumerate(loader)
74 | for it, (x, y) in pbar:
75 |
76 | # place data on the correct device
77 | x = x.to(self.device)
78 | y = y.to(self.device)
79 |
80 | # forward the model
81 | with torch.set_grad_enabled(is_train):
82 | logits, loss = model(x, y)
83 | loss = loss.mean() # collapse all losses if they are scattered on multiple gpus
84 | losses.append(loss.item())
85 |
86 | if is_train:
87 |
88 | # backprop and update the parameters
89 | model.zero_grad()
90 | loss.backward()
91 | torch.nn.utils.clip_grad_norm_(model.parameters(), config.grad_norm_clip)
92 | optimizer.step()
93 |
94 | # decay the learning rate based on our progress
95 | if config.lr_decay:
96 | self.tokens += (y >= 0).sum() # number of tokens processed this step (i.e. label is not -100)
97 | if self.tokens < config.warmup_tokens:
98 | # linear warmup
99 | lr_mult = float(self.tokens) / float(max(1, config.warmup_tokens))
100 | else:
101 | # cosine learning rate decay
102 | progress = float(self.tokens - config.warmup_tokens) / float(max(1, config.final_tokens - config.warmup_tokens))
103 | lr_mult = max(0.1, 0.5 * (1.0 + math.cos(math.pi * progress)))
104 | lr = config.learning_rate * lr_mult
105 | for param_group in optimizer.param_groups:
106 | param_group['lr'] = lr
107 | else:
108 | lr = config.learning_rate
109 |
110 | # report progress
111 | pbar.set_description(f"epoch {epoch+1} iter {it}: train loss {loss.item():.5f}. lr {lr:e}")
112 |
113 | if not is_train:
114 | test_loss = float(np.mean(losses))
115 | logger.info("test loss: %f", test_loss)
116 | return test_loss
117 |
118 | best_loss = float('inf')
119 | self.tokens = 0 # counter used for learning rate decay
120 | for epoch in range(config.max_epochs):
121 |
122 | run_epoch('train')
123 | if self.test_dataset is not None:
124 | test_loss = run_epoch('test')
125 |
126 | # supports early stopping based on the test loss, or just save always if no test set is provided
127 | good_model = self.test_dataset is None or test_loss < best_loss
128 | if self.config.ckpt_path is not None and good_model:
129 | best_loss = test_loss
130 | self.save_checkpoint()
131 |
--------------------------------------------------------------------------------
/minGPT/mingpt/utils.py:
--------------------------------------------------------------------------------
1 | import random
2 | import numpy as np
3 | import torch
4 | import torch.nn as nn
5 | from torch.nn import functional as F
6 |
7 | def set_seed(seed):
8 | random.seed(seed)
9 | np.random.seed(seed)
10 | torch.manual_seed(seed)
11 | torch.cuda.manual_seed_all(seed)
12 |
13 | def top_k_logits(logits, k):
14 | v, ix = torch.topk(logits, k)
15 | out = logits.clone()
16 | out[out < v[:, [-1]]] = -float('Inf')
17 | return out
18 |
19 | @torch.no_grad()
20 | def sample(model, x, steps, temperature=1.0, sample=False, top_k=None):
21 | """
22 | take a conditioning sequence of indices in x (of shape (b,t)) and predict the next token in
23 | the sequence, feeding the predictions back into the model each time. Clearly the sampling
24 | has quadratic complexity unlike an RNN that is only linear, and has a finite context window
25 | of block_size, unlike an RNN that has an infinite context window.
26 | """
27 | block_size = model.get_block_size()
28 | model.eval()
29 | for k in range(steps):
30 | x_cond = x if x.size(1) <= block_size else x[:, -block_size:] # crop context if needed
31 | logits, _ = model(x_cond)
32 | # pluck the logits at the final step and scale by temperature
33 | logits = logits[:, -1, :] / temperature
34 | # optionally crop probabilities to only the top k options
35 | if top_k is not None:
36 | logits = top_k_logits(logits, top_k)
37 | # apply softmax to convert to probabilities
38 | probs = F.softmax(logits, dim=-1)
39 | # sample from the distribution or take the most likely
40 | if sample:
41 | ix = torch.multinomial(probs, num_samples=1)
42 | else:
43 | _, ix = torch.topk(probs, k=1, dim=-1)
44 | # append to the sequence and continue
45 | x = torch.cat((x, ix), dim=1)
46 |
47 | return x
48 |
--------------------------------------------------------------------------------
/sound/preprocess/README.md:
--------------------------------------------------------------------------------
1 | # Preprocess
2 | ```
3 | python3 to_16000_wav.py INPUT_DIR OUTPUT_DIR
4 | python3 trim.py INPUT_DIR OUTPUT_DIR
5 | ```
6 |
--------------------------------------------------------------------------------
/sound/preprocess/mp3_to_wav.py:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | import os
5 | import glob
6 | import sys
7 |
8 | def main():
9 | if len(sys.argv) < 3:
10 | print('Usage: python3 mp3_to_wav.py INPUT_DIR OUTPUT_DIR')
11 | return
12 |
13 | INPUT_DIR = sys.argv[1]
14 | OUTPUT_DIR = sys.argv[2]
15 | if not os.path.exists(OUTPUT_DIR):
16 | os.makedirs(OUTPUT_DIR)
17 |
18 | for mp3_path in glob.glob(os.path.join(INPUT_DIR, '*.mp3')):
19 | name = os.path.split(mp3_path)[1][:-len('.mp3')]
20 | output_path = os.path.join(OUTPUT_DIR, name + '.wav')
21 | os.system('ffmpeg -i "{}" -ar 16000 "{}"'.format(mp3_path, output_path))
22 | print(mp3_path)
23 | print(output_path)
24 |
25 | if __name__ == '__main__':
26 | main()
27 |
--------------------------------------------------------------------------------
/sound/preprocess/to_16000_wav.py:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | import os
5 | import glob
6 | import sys
7 |
8 | def main():
9 | if len(sys.argv) < 3:
10 | print('Usage: python3 mp3_to_wav.py INPUT_DIR OUTPUT_DIR')
11 | return
12 |
13 | INPUT_DIR = sys.argv[1]
14 | OUTPUT_DIR = sys.argv[2]
15 | if not os.path.exists(OUTPUT_DIR):
16 | os.makedirs(OUTPUT_DIR)
17 |
18 | for wav_path in glob.glob(os.path.join(INPUT_DIR, '*.wav')):
19 | name = os.path.split(wav_path)[1][:-len('.wav')]
20 | output_path = os.path.join(OUTPUT_DIR, name + '.wav')
21 | os.system('ffmpeg -i "{}" -ar 16000 "{}"'.format(wav_path, output_path))
22 | print(wav_path)
23 | print(output_path)
24 |
25 | if __name__ == '__main__':
26 | main()
27 |
--------------------------------------------------------------------------------
/sound/preprocess/trim.py:
--------------------------------------------------------------------------------
1 | """
2 | Convert to .wav
3 | ffmpeg -i input.m4a output.wav
4 |
5 | Split .wav files with ffmpeg:
6 | NAME=name.wav
7 | ffmpeg -i $NAME.wav -f segment -segment_time 2 -c copy one_second/$NAME%03d.wav
8 |
9 | python3 to_16000_wav.py INPUT_DIR OUTPUT_DIR
10 | python3 trim.py INPUT_DIR OUTPUT_DIR
11 |
12 | https://petewarden.com/2017/07/17/a-quick-hack-to-align-single-word-audio-recordings/
13 |
14 | NOTE: Run make from the extract_loudest_section repo before running this script
15 | """
16 | import glob
17 | import os
18 | import sys
19 |
20 | def main():
21 | if len(sys.argv) < 3:
22 | print('Usage: python3 trim.py INPUT_DIR OUTPUT_DIR')
23 | return
24 |
25 | if not os.path.exists(sys.argv[2]):
26 | os.makedirs(sys.argv[2])
27 |
28 | file_names = glob.glob(os.path.join(sys.argv[1], '*.wav'))
29 | for filename in file_names:
30 | print(filename)
31 | os.system('/tmp/extract_loudest_section/gen/bin/extract_loudest_section "{}" "{}"'.format(filename, sys.argv[2]))
32 |
33 | if __name__ == '__main__':
34 | main()
35 |
--------------------------------------------------------------------------------
/sound/project-keyword-spotter/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/sound/project-keyword-spotter/.DS_Store
--------------------------------------------------------------------------------
/sound/project-keyword-spotter/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # How to Contribute
2 |
3 | We'd love to accept your patches and contributions to this project. There are
4 | just a few small guidelines you need to follow.
5 |
6 | ## Contributor License Agreement
7 |
8 | Contributions to this project must be accompanied by a Contributor License
9 | Agreement. You (or your employer) retain the copyright to your contribution;
10 | this simply gives us permission to use and redistribute your contributions as
11 | part of the project. Head over to to see
12 | your current agreements on file or to sign a new one.
13 |
14 | You generally only need to submit a CLA once, so if you've already submitted one
15 | (even if it was for a different project), you probably don't need to do it
16 | again.
17 |
18 | ## Code reviews
19 |
20 | All submissions, including submissions by project members, require review. We
21 | use GitHub pull requests for this purpose. Consult
22 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more
23 | information on using pull requests.
24 |
25 | ## Community Guidelines
26 |
27 | This project follows [Google's Open Source Community
28 | Guidelines](https://opensource.google.com/conduct/).
29 |
--------------------------------------------------------------------------------
/sound/project-keyword-spotter/Icon
:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/sound/project-keyword-spotter/Icon
--------------------------------------------------------------------------------
/sound/project-keyword-spotter/audio_recorder.py:
--------------------------------------------------------------------------------
1 | # Copyright 2019 Google LLC
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # https://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | """Interface to asynchronously capture continuous audio from PyAudio.
16 |
17 |
18 | This module requires pyaudio. See here for installation instructions:
19 | http://people.csail.mit.edu/hubert/pyaudio/
20 |
21 | This module provides one class, AudioRecorder, which buffers chunks of audio
22 | from PyAudio.
23 | """
24 |
25 | from __future__ import absolute_import
26 | from __future__ import division
27 | from __future__ import print_function
28 |
29 | import logging
30 |
31 | import math
32 | import time
33 |
34 | import numpy as np
35 | import pyaudio
36 | import queue
37 |
38 | logger = logging.getLogger(__name__)
39 |
40 |
41 | class TimeoutError(Exception):
42 | """A timeout while waiting for pyaudio to buffer samples."""
43 | pass
44 |
45 |
46 | class AudioRecorder(object):
47 | """Asynchronously record and buffer audio using pyaudio.
48 |
49 | This class wraps the pyaudio interface. It contains a queue.Queue object to
50 | hold chunks of raw audio, and a callback function _enqueue_audio() which
51 | places raw audio into this queue. This allows the pyaudio.Stream object to
52 | record asynchronously at low latency.
53 |
54 | The class acts as a context manager. When entering the context it creates a
55 | pyaudio.Stream object and starts recording; it stops recording on exit. The
56 | Stream saves all of its audio to the Queue as two-tuples of
57 | (timestamp, raw_audio). The raw_audio is available from the queue as a numpy
58 | array using the get_audio() function.
59 |
60 | This class uses the term "frame" in the same sense that PortAudio does, so
61 | "frame" means something different here than elsewhere in the daredevil stack.
62 | A frame in PortAudio is one audio sample across all channels, so one frame of
63 | 16-bit stereo audio is four bytes of data as two 16-bit integers.
64 | """
65 | pyaudio_format = pyaudio.paInt16
66 | numpy_format = np.int16
67 | num_channels = 1
68 |
69 | # How many frames of audio PyAudio will fetch at once.
70 | # Higher numbers will increase the latancy.
71 | frames_per_chunk = 2**9
72 |
73 | # Limit queue to this number of audio chunks.
74 | max_queue_chunks = 1200
75 |
76 | # Timeout if we can't get a chunk from the queue for timeout_factor times the
77 | # chunk duration.
78 | timeout_factor = 8
79 |
80 | def __init__(self, raw_audio_sample_rate_hz=48000,
81 | downsample_factor=3,
82 | device_index=None):
83 | self._downsample_factor = downsample_factor
84 | self._raw_audio_sample_rate_hz = raw_audio_sample_rate_hz
85 | self.audio_sample_rate_hz = self._raw_audio_sample_rate_hz // self._downsample_factor
86 | self._raw_audio_queue = queue.Queue(self.max_queue_chunks)
87 | self._audio = pyaudio.PyAudio()
88 | self._print_input_devices()
89 | self._device_index = device_index
90 |
91 | def __enter__(self):
92 | if self._device_index is None:
93 | self._device_index = self._audio.get_default_input_device_info()["index"]
94 | kwargs = {
95 | "input_device_index": self._device_index
96 | }
97 | device_info = self._audio.get_device_info_by_host_api_device_index(
98 | 0, self._device_index)
99 | if device_info.get("maxInputChannels") <= 0:
100 | raise ValueError("Audio device has insufficient input channels.")
101 | print("Using audio device '%s' for index %d" % (
102 | device_info["name"], device_info["index"]))
103 | self._stream = self._audio.open(
104 | format=self.pyaudio_format,
105 | channels=self.num_channels,
106 | rate=self._raw_audio_sample_rate_hz,
107 | input=True,
108 | output=False,
109 | frames_per_buffer=self.frames_per_chunk,
110 | start=True,
111 | stream_callback=self._enqueue_raw_audio,
112 | **kwargs)
113 | logger.info("Started audio stream.")
114 | return self
115 |
116 | def __exit__(self, exception_type, exception_value, traceback):
117 | self._stream.stop_stream()
118 | self._stream.close()
119 | logger.info("Stopped and closed audio stream.")
120 |
121 | def __del__(self):
122 | self._audio.terminate()
123 | logger.info("Terminated PyAudio/PortAudio.")
124 |
125 | @property
126 | def is_active(self):
127 | return self._stream.is_active()
128 |
129 | @property
130 | def bytes_per_sample(self):
131 | return pyaudio.get_sample_size(self.pyaudio_format)
132 |
133 | @property
134 | def _chunk_duration_seconds(self):
135 | return self.frames_per_chunk / self._raw_audio_sample_rate_hz
136 |
137 | def _print_input_devices(self):
138 | info = self._audio.get_host_api_info_by_index(0)
139 | print("\nInput microphone devices:")
140 | for i in range(0, info.get("deviceCount")):
141 | device_info = self._audio.get_device_info_by_host_api_device_index(0, i)
142 | if device_info.get("maxInputChannels") <= 0: continue
143 | print(" ID: ", i, " - ", device_info.get("name"))
144 |
145 | def _enqueue_raw_audio(self, in_data, *_): # unused args to match expected
146 | try:
147 | self._raw_audio_queue.put((in_data, time.time()), block=False)
148 | return None, pyaudio.paContinue
149 | except queue.Full:
150 | error_message = "Raw audio buffer full."
151 | logger.critical(error_message)
152 | raise TimeoutError(error_message)
153 |
154 | def _get_chunk(self, timeout=None):
155 | raw_data, timestamp = self._raw_audio_queue.get(timeout=timeout)
156 | array_data = np.fromstring(raw_data, self.numpy_format).reshape(
157 | -1, self.num_channels)
158 | return array_data, timestamp
159 |
160 | def get_audio_device_info(self):
161 | if self._device_index is None:
162 | return self._audio.get_default_input_device_info()
163 | else:
164 | return self._audio.get_device_info_by_index(self._device_index)
165 |
166 | def sample_duration_seconds(self, num_samples):
167 | return num_samples / self.audio_sample_rate_hz / self.num_channels
168 |
169 | def clear_queue(self):
170 | logger.debug("Purging %d chunks from queue.", self._raw_audio_queue.qsize())
171 | while not self._raw_audio_queue.empty():
172 | self._raw_audio_queue.get()
173 |
174 | def get_audio(self, num_audio_frames):
175 | """Grab at least num_audio_frames frames of audio.
176 |
177 | Record at least num_audio_frames of audio and transform it into a
178 | numpy array. The term "frame" is in the sense used by PortAudio; see the
179 | note in the class docstring for details.
180 |
181 | Audio returned will be the earliest audio in the queue; it could be from
182 | before this function was called.
183 |
184 | Args:
185 | num_audio_frames: minimum number of samples of audio to grab.
186 |
187 | Returns:
188 | A tuple of (audio, first_timestamp, last_timestamp).
189 | """
190 | num_audio_chunks = int(math.ceil(num_audio_frames *
191 | self._downsample_factor / self.frames_per_chunk))
192 | logger.debug("Capturing %d chunks to get at least %d frames.",
193 | num_audio_chunks, num_audio_frames)
194 | if num_audio_chunks < 1:
195 | num_audio_chunks = 1
196 | try:
197 | timeout = self.timeout_factor * self._chunk_duration_seconds
198 | chunks, timestamps = zip(
199 | *[self._get_chunk(timeout=timeout) for _ in range(num_audio_chunks)])
200 | except queue.Empty:
201 | error_message = "Audio capture timed out after %.1f seconds." % timeout
202 | logger.critical(error_message)
203 | raise TimeoutError(error_message)
204 |
205 | assert len(chunks) == num_audio_chunks
206 | logger.debug("Got %d chunks. Chunk 0 has shape %s and dtype %s.",
207 | len(chunks), chunks[0].shape, chunks[0].dtype)
208 | if self._raw_audio_queue.qsize() > (0.8 * self.max_queue_chunks):
209 | logger.warning("%d chunks remain in the queue.",
210 | self._raw_audio_queue.qsize())
211 | else:
212 | logger.debug("%d chunks remain in the queue.",
213 | self._raw_audio_queue.qsize())
214 |
215 | audio = np.concatenate(chunks)
216 | if self._downsample_factor != 1:
217 | audio = audio[::self._downsample_factor]
218 | logging.debug("Audio array has shape %s and dtype %s.", audio.shape,
219 | audio.dtype)
220 | return audio * 0.5, timestamps[0], timestamps[-1]
221 |
--------------------------------------------------------------------------------
/sound/project-keyword-spotter/config/Icon
:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/sound/project-keyword-spotter/config/Icon
--------------------------------------------------------------------------------
/sound/project-keyword-spotter/config/commands_v2.txt:
--------------------------------------------------------------------------------
1 | volume_up,up,
2 | volume_down,down,
3 | next_song,shift+n,
4 | next_video,shift+n,
5 | next_game,shift+n,
6 | last_song,shift+p,
7 | last_video,shift+p,
8 | last_game,shift+p,
9 | random_song,r,
10 | random_video,r,
11 | pause_song, ,
12 | pause_video, ,
13 | pause_game, ,
14 | stop_song, ,
15 | stop_video, ,
16 | start_song, ,
17 | start_video, ,
18 | previous_song,shift+p,
19 | previous_video,shift+p,
20 | move_backwards,left,
21 | move_forwards,right,
22 | go_backwards,left,
23 | go_forwards,right,0.8
24 | position_zero,0,
25 | position_one,1,
26 | position_two,2,
27 | position_three,3,
28 | position_four,4,
29 | position_five,5,
30 | position_six,6,
31 | position_seven,7,
32 | position_eight,8,
33 | position_nine,9,
34 | mute,m,
35 |
--------------------------------------------------------------------------------
/sound/project-keyword-spotter/config/commands_v2_snake.txt:
--------------------------------------------------------------------------------
1 | start_application,go,
2 | start_game,go,
3 | start_program,go,
4 | start_task,go,
5 | begin_window,go,
6 | begin_application,go,
7 | begin_game,go,
8 | begin_program,go,
9 | begin_task,go,
10 | launch_window,go,
11 | launch_application,go,
12 | launch_game,go,
13 | launch_program,go,
14 | launch_task,go,
15 | close_window,stop,
16 | close_application,stop,
17 | close_game,stop,
18 | close_program,stop,
19 | close_task,stop,
20 | stop_window,stop,
21 | stop_application,stop,
22 | stop_game,stop,
23 | stop_program,stop,
24 | stop_task,stop,
25 | exit_window,stop,
26 | exit_application,stop,
27 | exit_game,stop,
28 | exit_program,stop,
29 | exit_task,stop,
30 | kill_window,stop,
31 | kill_application,stop,
32 | kill_game,stop,
33 | kill_program,stop,
34 | kill_task,stop,
35 | kill_tab,stop,
36 | engage,go,
37 | switch_on,go,
38 | switch_off,stop,
39 | move_up,up,
40 | move_down,down,
41 | move_left,left,
42 | move_right,right,
43 | turn_up,up,
44 | turn_down,down,
45 | turn_left,left,
46 | turn_right,right,
47 | go_up,up,
48 | go_down,down,
49 | go_left,left,
50 | go_right,right,
51 |
--------------------------------------------------------------------------------
/sound/project-keyword-spotter/config/labels_gc2.raw.txt:
--------------------------------------------------------------------------------
1 | what_can_i_say
2 | what_can_you_do
3 | yes
4 | no
5 | start_window
6 | start_application
7 | start_game
8 | start_program
9 | start_task
10 | start_tab
11 | begin_window
12 | begin_application
13 | begin_game
14 | begin_program
15 | begin_task
16 | begin_tab
17 | launch_window
18 | launch_application
19 | launch_game
20 | launch_program
21 | launch_task
22 | launch_tab
23 | open_window
24 | open_application
25 | open_game
26 | open_program
27 | open_task
28 | open_tab
29 | close_window
30 | close_application
31 | close_game
32 | close_program
33 | close_task
34 | close_tab
35 | stop_window
36 | stop_application
37 | stop_game
38 | stop_program
39 | stop_task
40 | stop_tab
41 | terminate_window
42 | terminate_application
43 | terminate_game
44 | terminate_program
45 | terminate_task
46 | terminate_tab
47 | exit_window
48 | exit_application
49 | exit_game
50 | exit_program
51 | exit_task
52 | exit_tab
53 | kill_window
54 | kill_application
55 | kill_game
56 | kill_program
57 | kill_task
58 | kill_tab
59 | engage
60 | target
61 | switch_on
62 | switch_off
63 | pick_up
64 | volume_up
65 | volume_down
66 | remove
67 | delete
68 | mute
69 | unmute
70 | silence
71 | reverse
72 | next_song
73 | next_video
74 | next_game
75 | last_song
76 | last_video
77 | last_game
78 | random_song
79 | random_video
80 | random_game
81 | pause_song
82 | pause_video
83 | pause_game
84 | stop_song
85 | stop_video
86 | start_song
87 | start_video
88 | previous_song
89 | previous_video
90 | insert
91 | select
92 | unselect
93 | move_up
94 | move_down
95 | move_left
96 | move_right
97 | move_backwards
98 | move_forwards
99 | turn_up
100 | turn_down
101 | turn_left
102 | turn_right
103 | turn_backwards
104 | turn_forwards
105 | go_up
106 | go_down
107 | go_left
108 | go_right
109 | go_backwards
110 | go_forwards
111 | channel_zero
112 | position_zero
113 | one_o_clock
114 | channel_one
115 | position_one
116 | two_o_clock
117 | channel_two
118 | position_two
119 | three_o_clock
120 | channel_three
121 | position_three
122 | four_o_clock
123 | channel_four
124 | position_four
125 | five_o_clock
126 | channel_five
127 | position_five
128 | six_o_clock
129 | channel_six
130 | position_six
131 | seven_o_clock
132 | channel_seven
133 | position_seven
134 | eight_o_clock
135 | channel_eight
136 | position_eight
137 | nine_o_clock
138 | channel_nine
139 | position_nine
140 | ten_o_clock
141 | channel_ten
142 | position_ten
143 | eleven_o_clock
144 | channel_eleven
145 | position_eleven
146 | twelve_o_clock
147 | channel_twelve
148 | position_twelve
149 |
--------------------------------------------------------------------------------
/sound/project-keyword-spotter/config/labels_simple_audio.txt:
--------------------------------------------------------------------------------
1 | cough
2 | unknown
3 |
--------------------------------------------------------------------------------
/sound/project-keyword-spotter/features.py:
--------------------------------------------------------------------------------
1 | # Copyright 2019 The TensorFlow Authors All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 |
16 | """Feature computation for YAMNet."""
17 |
18 | import numpy as np
19 | import tensorflow as tf
20 |
21 |
22 | def waveform_to_log_mel_spectrogram_patches(waveform, params):
23 | """Compute log mel spectrogram patches of a 1-D waveform."""
24 | with tf.name_scope('log_mel_features'):
25 | # waveform has shape [<# samples>]
26 |
27 | # Convert waveform into spectrogram using a Short-Time Fourier Transform.
28 | # Note that tf.signal.stft() uses a periodic Hann window by default.
29 | window_length_samples = int(
30 | round(params.sample_rate * params.stft_window_seconds))
31 | hop_length_samples = int(
32 | round(params.sample_rate * params.stft_hop_seconds))
33 | fft_length = 2 ** int(np.ceil(np.log(window_length_samples) / np.log(2.0)))
34 | num_spectrogram_bins = fft_length // 2 + 1
35 | if params.tflite_compatible:
36 | magnitude_spectrogram = _tflite_stft_magnitude(
37 | signal=waveform,
38 | frame_length=window_length_samples,
39 | frame_step=hop_length_samples,
40 | fft_length=fft_length)
41 | else:
42 | magnitude_spectrogram = tf.abs(tf.signal.stft(
43 | signals=waveform,
44 | frame_length=window_length_samples,
45 | frame_step=hop_length_samples,
46 | fft_length=fft_length))
47 | # magnitude_spectrogram has shape [<# STFT frames>, num_spectrogram_bins]
48 |
49 | # Convert spectrogram into log mel spectrogram.
50 | linear_to_mel_weight_matrix = tf.signal.linear_to_mel_weight_matrix(
51 | num_mel_bins=params.mel_bands,
52 | num_spectrogram_bins=num_spectrogram_bins,
53 | sample_rate=params.sample_rate,
54 | lower_edge_hertz=params.mel_min_hz,
55 | upper_edge_hertz=params.mel_max_hz)
56 | mel_spectrogram = tf.matmul(
57 | magnitude_spectrogram, linear_to_mel_weight_matrix)
58 | log_mel_spectrogram = tf.math.log(mel_spectrogram + params.log_offset)
59 | # log_mel_spectrogram has shape [<# STFT frames>, params.mel_bands]
60 |
61 | # Frame spectrogram (shape [<# STFT frames>, params.mel_bands]) into patches
62 | # (the input examples). Only complete frames are emitted, so if there is
63 | # less than params.patch_window_seconds of waveform then nothing is emitted
64 | # (to avoid this, zero-pad before processing).
65 | spectrogram_hop_length_samples = int(
66 | round(params.sample_rate * params.stft_hop_seconds))
67 | spectrogram_sample_rate = params.sample_rate / spectrogram_hop_length_samples
68 | patch_window_length_samples = int(
69 | round(spectrogram_sample_rate * params.patch_window_seconds))
70 | patch_hop_length_samples = int(
71 | round(spectrogram_sample_rate * params.patch_hop_seconds))
72 | features = tf.signal.frame(
73 | signal=log_mel_spectrogram,
74 | frame_length=patch_window_length_samples,
75 | frame_step=patch_hop_length_samples,
76 | axis=0)
77 | # features has shape [<# patches>, <# STFT frames in an patch>, params.mel_bands]
78 |
79 | return log_mel_spectrogram, features
80 |
81 |
82 | def pad_waveform(waveform, params):
83 | """Pads waveform with silence if needed to get an integral number of patches."""
84 | # In order to produce one patch of log mel spectrogram input to YAMNet, we
85 | # need at least one patch window length of waveform plus enough extra samples
86 | # to complete the final STFT analysis window.
87 | min_waveform_seconds = (
88 | params.patch_window_seconds +
89 | params.stft_window_seconds - params.stft_hop_seconds)
90 | min_num_samples = tf.cast(min_waveform_seconds * params.sample_rate, tf.int32)
91 | num_samples = tf.shape(waveform)[0]
92 | num_padding_samples = tf.maximum(0, min_num_samples - num_samples)
93 |
94 | # In addition, there might be enough waveform for one or more additional
95 | # patches formed by hopping forward. If there are more samples than one patch,
96 | # round up to an integral number of hops.
97 | num_samples = tf.maximum(num_samples, min_num_samples)
98 | num_samples_after_first_patch = num_samples - min_num_samples
99 | hop_samples = tf.cast(params.patch_hop_seconds * params.sample_rate, tf.int32)
100 | num_hops_after_first_patch = tf.cast(tf.math.ceil(
101 | tf.cast(num_samples_after_first_patch, tf.float32) /
102 | tf.cast(hop_samples, tf.float32)), tf.int32)
103 | num_padding_samples += (
104 | hop_samples * num_hops_after_first_patch - num_samples_after_first_patch)
105 |
106 | padded_waveform = tf.pad(waveform, [[0, num_padding_samples]],
107 | mode='CONSTANT', constant_values=0.0)
108 | return padded_waveform
109 |
110 |
111 | def _tflite_stft_magnitude(signal, frame_length, frame_step, fft_length):
112 | """TF-Lite-compatible version of tf.abs(tf.signal.stft())."""
113 | def _hann_window():
114 | return tf.reshape(
115 | tf.constant(
116 | (0.5 - 0.5 * np.cos(2 * np.pi * np.arange(0, 1.0, 1.0 / frame_length))
117 | ).astype(np.float32),
118 | name='hann_window'), [1, frame_length])
119 |
120 | def _dft_matrix(dft_length):
121 | """Calculate the full DFT matrix in NumPy."""
122 | # See https://en.wikipedia.org/wiki/DFT_matrix
123 | omega = (0 + 1j) * 2.0 * np.pi / float(dft_length)
124 | # Don't include 1/sqrt(N) scaling, tf.signal.rfft doesn't apply it.
125 | return np.exp(omega * np.outer(np.arange(dft_length), np.arange(dft_length)))
126 |
127 | def _rdft(framed_signal, fft_length):
128 | """Implement real-input Discrete Fourier Transform by matmul."""
129 | # We are right-multiplying by the DFT matrix, and we are keeping only the
130 | # first half ("positive frequencies"). So discard the second half of rows,
131 | # but transpose the array for right-multiplication. The DFT matrix is
132 | # symmetric, so we could have done it more directly, but this reflects our
133 | # intention better.
134 | complex_dft_matrix_kept_values = _dft_matrix(fft_length)[:(
135 | fft_length // 2 + 1), :].transpose()
136 | real_dft_matrix = tf.constant(
137 | np.real(complex_dft_matrix_kept_values).astype(np.float32),
138 | name='real_dft_matrix')
139 | imag_dft_matrix = tf.constant(
140 | np.imag(complex_dft_matrix_kept_values).astype(np.float32),
141 | name='imaginary_dft_matrix')
142 | signal_frame_length = tf.shape(framed_signal)[-1]
143 | half_pad = (fft_length - signal_frame_length) // 2
144 | padded_frames = tf.pad(
145 | framed_signal,
146 | [
147 | # Don't add any padding in the frame dimension.
148 | [0, 0],
149 | # Pad before and after the signal within each frame.
150 | [half_pad, fft_length - signal_frame_length - half_pad]
151 | ],
152 | mode='CONSTANT',
153 | constant_values=0.0)
154 | real_stft = tf.matmul(padded_frames, real_dft_matrix)
155 | imag_stft = tf.matmul(padded_frames, imag_dft_matrix)
156 | return real_stft, imag_stft
157 |
158 | def _complex_abs(real, imag):
159 | return tf.sqrt(tf.add(real * real, imag * imag))
160 |
161 | framed_signal = tf.signal.frame(signal, frame_length, frame_step)
162 | windowed_signal = framed_signal * _hann_window()
163 | real_stft, imag_stft = _rdft(windowed_signal, fft_length)
164 | stft_magnitude = _complex_abs(real_stft, imag_stft)
165 | return stft_magnitude
166 |
--------------------------------------------------------------------------------
/sound/project-keyword-spotter/hearing_snake_metadata.json:
--------------------------------------------------------------------------------
1 | {
2 | "version": 1.0,
3 | "best_scores": [
4 | 169,
5 | 34,
6 | 21,
7 | 0,
8 | 0,
9 | 0,
10 | 0,
11 | 0,
12 | 0,
13 | 0
14 | ]
15 | }
--------------------------------------------------------------------------------
/sound/project-keyword-spotter/install_requirements.sh:
--------------------------------------------------------------------------------
1 | # Copyright 2019 Google LLC
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # https://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | #!/bin/bash
15 | sudo apt-get install -y python3 python3-pyaudio python3-numpy python3-scipy
16 |
17 | sudo apt-get install -y python3-dev libsdl-image1.2-dev libsdl-mixer1.2-dev libsdl-ttf2.0-dev libsdl1.2-dev libsmpeg-dev python-numpy subversion libportmidi-dev ffmpeg libswscale-dev libavformat-dev libavcodec-dev libfreetype6-dev
18 |
19 | sudo apt-get install -y python3-pyaudio
20 |
21 | pip3 install pygame
22 |
23 | pip3 install PyUserInput
24 |
--------------------------------------------------------------------------------
/sound/project-keyword-spotter/media/Icon
:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/sound/project-keyword-spotter/media/Icon
--------------------------------------------------------------------------------
/sound/project-keyword-spotter/media/startscreen.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/sound/project-keyword-spotter/media/startscreen.png
--------------------------------------------------------------------------------
/sound/project-keyword-spotter/models/Icon
:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/sound/project-keyword-spotter/models/Icon
--------------------------------------------------------------------------------
/sound/project-keyword-spotter/models/model-backup1.tflite:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/sound/project-keyword-spotter/models/model-backup1.tflite
--------------------------------------------------------------------------------
/sound/project-keyword-spotter/models/model.tflite:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/sound/project-keyword-spotter/models/model.tflite
--------------------------------------------------------------------------------
/sound/project-keyword-spotter/models/model_quantized_edgetpu.tflite:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/sound/project-keyword-spotter/models/model_quantized_edgetpu.tflite
--------------------------------------------------------------------------------
/sound/project-keyword-spotter/models/voice_commands_v0.7_edgetpu.tflite:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/sound/project-keyword-spotter/models/voice_commands_v0.7_edgetpu.tflite
--------------------------------------------------------------------------------
/sound/project-keyword-spotter/models/voice_commands_v0.8_edgetpu.tflite:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/sound/project-keyword-spotter/models/voice_commands_v0.8_edgetpu.tflite
--------------------------------------------------------------------------------
/sound/project-keyword-spotter/params.py:
--------------------------------------------------------------------------------
1 | # Copyright 2019 The TensorFlow Authors All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 |
16 | """Hyperparameters for YAMNet."""
17 |
18 | from dataclasses import dataclass
19 |
20 | # The following hyperparameters (except patch_hop_seconds) were used to train YAMNet,
21 | # so expect some variability in performance if you change these. The patch hop can
22 | # be changed arbitrarily: a smaller hop should give you more patches from the same
23 | # clip and possibly better performance at a larger computational cost.
24 | @dataclass(frozen=True) # Instances of this class are immutable.
25 | class Params:
26 | sample_rate: float = 16000.0
27 | stft_window_seconds: float = 0.025
28 | stft_hop_seconds: float = 0.010
29 | mel_bands: int = 64
30 | mel_min_hz: float = 125.0
31 | mel_max_hz: float = 7500.0
32 | log_offset: float = 0.001
33 | patch_window_seconds: float = 0.96
34 | patch_hop_seconds: float = 0.48
35 |
36 | @property
37 | def patch_frames(self):
38 | return int(round(self.patch_window_seconds / self.stft_hop_seconds))
39 |
40 | @property
41 | def patch_bands(self):
42 | return self.mel_bands
43 |
44 | num_classes: int = 521
45 | conv_padding: str = 'same'
46 | batchnorm_center: bool = True
47 | batchnorm_scale: bool = False
48 | batchnorm_epsilon: float = 1e-4
49 | classifier_activation: str = 'sigmoid'
50 |
51 | tflite_compatible: bool = False
52 |
--------------------------------------------------------------------------------
/sound/project-keyword-spotter/pygame_images/Icon
:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/sound/project-keyword-spotter/pygame_images/Icon
--------------------------------------------------------------------------------
/sound/project-keyword-spotter/pygame_images/apple.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/sound/project-keyword-spotter/pygame_images/apple.png
--------------------------------------------------------------------------------
/sound/project-keyword-spotter/pygame_images/bg.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/sound/project-keyword-spotter/pygame_images/bg.jpg
--------------------------------------------------------------------------------
/sound/project-keyword-spotter/pygame_images/snake_head_with_ears.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/sound/project-keyword-spotter/pygame_images/snake_head_with_ears.png
--------------------------------------------------------------------------------
/sound/project-keyword-spotter/pygame_images/snake_tail.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/sound/project-keyword-spotter/pygame_images/snake_tail.png
--------------------------------------------------------------------------------
/sound/project-keyword-spotter/run_model.py:
--------------------------------------------------------------------------------
1 | # Copyright 2019 Google LLC
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # https://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | """Runs a model on the edgetpu.
16 |
17 | Useage:
18 | python3 run_model.py --model_file model_edgetpu.tflite
19 | """
20 | from __future__ import absolute_import
21 | from __future__ import division
22 | from __future__ import print_function
23 |
24 | import argparse
25 | import sys
26 | import model
27 | import numpy as np
28 |
29 |
30 | def print_results(result, commands, labels, top=1):
31 | """Example callback function that prints the passed detections."""
32 | top_results = np.argsort(-result)[:top]
33 | for p in range(top):
34 | l = labels[top_results[p]]
35 | if l in commands.keys():
36 | threshold = commands[labels[top_results[p]]]["conf"]
37 | else:
38 | threshold = 0.5
39 | if top_results[p] and result[top_results[p]] > threshold:
40 | sys.stdout.write("\033[1m\033[93m*%15s*\033[0m (%.3f)" %
41 | (l, result[top_results[p]]))
42 | elif result[top_results[p]] > 0.005:
43 | sys.stdout.write(" %15s (%.3f)" % (l, result[top_results[p]]))
44 | sys.stdout.write("\n")
45 |
46 |
47 | def main():
48 | parser = argparse.ArgumentParser()
49 | model.add_model_flags(parser)
50 | args = parser.parse_args()
51 | interpreter = model.make_interpreter(args.model_file)
52 | interpreter.allocate_tensors()
53 | mic = args.mic if args.mic is None else int(args.mic)
54 | model.classify_audio(mic, interpreter,
55 | labels_file="config/labels_simple_audio.txt",
56 | result_callback=print_results,
57 | sample_rate_hz=int(args.sample_rate_hz),
58 | num_frames_hop=int(args.num_frames_hop))
59 |
60 | if __name__ == "__main__":
61 | main()
62 |
--------------------------------------------------------------------------------
/sound/project-keyword-spotter/run_model_yamnet.py:
--------------------------------------------------------------------------------
1 | # Copyright 2019 Google LLC
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # https://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | """Runs a model on the edgetpu.
16 |
17 | Useage:
18 | python3 run_model.py --model_file model_edgetpu.tflite
19 | """
20 | from __future__ import absolute_import
21 | from __future__ import division
22 | from __future__ import print_function
23 |
24 | import argparse
25 | import sys
26 | import model_yamnet
27 | import numpy as np
28 |
29 |
30 | def print_results(result, commands, labels, top=1):
31 | """Example callback function that prints the passed detections."""
32 | top_results = np.argsort(-result)[:top]
33 | for p in range(top):
34 | l = labels[top_results[p]]
35 | if l in commands.keys():
36 | threshold = commands[labels[top_results[p]]]["conf"]
37 | else:
38 | threshold = 0.5
39 | if top_results[p] and result[top_results[p]] > threshold:
40 | sys.stdout.write("\033[1m\033[93m*%15s*\033[0m (%.3f)" %
41 | (l, result[top_results[p]]))
42 | elif result[top_results[p]] > 0.005:
43 | sys.stdout.write(" %15s (%.3f)" % (l, result[top_results[p]]))
44 | sys.stdout.write("\n")
45 |
46 |
47 | def main():
48 | parser = argparse.ArgumentParser()
49 | model_yamnet.add_model_flags(parser)
50 | args = parser.parse_args()
51 | interpreter = model_yamnet.make_interpreter(args.model_file)
52 | interpreter.allocate_tensors()
53 | mic = args.mic if args.mic is None else int(args.mic)
54 | model_yamnet.classify_audio(mic, interpreter,
55 | labels_file="config/labels_simple_audio.txt",
56 | result_callback=print_results,
57 | sample_rate_hz=int(args.sample_rate_hz),
58 | num_frames_hop=int(args.num_frames_hop))
59 |
60 | if __name__ == "__main__":
61 | main()
62 |
--------------------------------------------------------------------------------
/sound/project-keyword-spotter/run_snake.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Copyright 2019 Google LLC
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # https://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | export DISPLAY="${DISPLAY:-:0}"
16 | python3 run_hearing_snake.py
17 |
--------------------------------------------------------------------------------
/sound/project-keyword-spotter/run_yt_voice_control.py:
--------------------------------------------------------------------------------
1 | # Copyright 2019 Google LLC
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # https://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | """Controls a YouTube using voice commands.
16 |
17 |
18 | Usage:
19 | Requires YouTube to be running in a browser tab and focus to be on the
20 | YouTube player.
21 |
22 | python3 run_yt_voice_control.py
23 | """
24 | from __future__ import absolute_import
25 | from __future__ import division
26 | from __future__ import print_function
27 |
28 | import argparse
29 | import sys
30 | import model
31 | from pykeyboard import PyKeyboard
32 |
33 |
34 | class YoutubeControl(object):
35 | """Maps voice command detections to youtube controls."""
36 |
37 | def __init__(self):
38 | """Creates an instance of `YoutubeControl`."""
39 | self._keyboard = PyKeyboard()
40 | self._command_lookup = {
41 | "left": self._keyboard.left_key,
42 | "right": self._keyboard.right_key,
43 | "up": self._keyboard.up_key,
44 | "down": self._keyboard.down_key,
45 | "shift": self._keyboard.shift_key
46 | }
47 |
48 | def run_command(self, command):
49 | """Parses and excecuted a command."""
50 | if len(command) == 1:
51 | self._keyboard.tap_key(command)
52 | elif command in self._command_lookup.keys():
53 | self._keyboard.tap_key(self._command_lookup[command])
54 | elif "+" in command:
55 | keys = command.split("+")
56 | press_list = []
57 | for key in keys:
58 | if len(key) == 1:
59 | press_list.append(key)
60 | elif key in self._command_lookup.keys():
61 | press_list.append(self._command_lookup[key])
62 | else:
63 | print("Can't parse: ", command)
64 | return
65 | self._keyboard.press_keys(press_list)
66 |
67 |
68 | def main():
69 | parser = argparse.ArgumentParser()
70 | model.add_model_flags(parser)
71 | args = parser.parse_args()
72 | interpreter = model.make_interpreter(args.model_file)
73 | interpreter.allocate_tensors()
74 | mic = args.mic if args.mic is None else int(args.mic)
75 | yt_control = YoutubeControl()
76 | sys.stdout.write("--------------------\n")
77 | sys.stdout.write("This script will control Youtube.\n")
78 | sys.stdout.write("Just ensure that focus is on the YouTube player.\n")
79 | sys.stdout.write("--------------------\n")
80 |
81 | model.classify_audio(mic, interpreter,
82 | labels_file="config/labels_gc2.raw.txt",
83 | commands_file="config/commands_v2.txt",
84 | dectection_callback=yt_control.run_command,
85 | sample_rate_hz=int(args.sample_rate_hz),
86 | num_frames_hop=int(args.num_frames_hop))
87 |
88 |
89 | if __name__ == "__main__":
90 | main()
91 |
--------------------------------------------------------------------------------
/sound/project-keyword-spotter/run_yt_voice_control.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Copyright 2019 Google LLC
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # https://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | export DISPLAY="${DISPLAY:-:0}"
16 | python3 run_yt_voice_control.py
17 |
--------------------------------------------------------------------------------
/sound/project-keyword-spotter/yamnet.py:
--------------------------------------------------------------------------------
1 | # Copyright 2019 The TensorFlow Authors All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 |
16 | """Core model definition of YAMNet."""
17 |
18 | import csv
19 |
20 | import numpy as np
21 | import tensorflow as tf
22 | from tensorflow.keras import Model, layers
23 |
24 | import features as features_lib
25 |
26 |
27 | def _batch_norm(name, params):
28 | def _bn_layer(layer_input):
29 | return layers.BatchNormalization(
30 | name=name,
31 | center=params.batchnorm_center,
32 | scale=params.batchnorm_scale,
33 | epsilon=params.batchnorm_epsilon)(layer_input)
34 | return _bn_layer
35 |
36 |
37 | def _conv(name, kernel, stride, filters, params):
38 | def _conv_layer(layer_input):
39 | output = layers.Conv2D(name='{}/conv'.format(name),
40 | filters=filters,
41 | kernel_size=kernel,
42 | strides=stride,
43 | padding=params.conv_padding,
44 | use_bias=False,
45 | activation=None)(layer_input)
46 | output = _batch_norm('{}/conv/bn'.format(name), params)(output)
47 | output = layers.ReLU(name='{}/relu'.format(name))(output)
48 | return output
49 | return _conv_layer
50 |
51 |
52 | def _separable_conv(name, kernel, stride, filters, params):
53 | def _separable_conv_layer(layer_input):
54 | output = layers.DepthwiseConv2D(name='{}/depthwise_conv'.format(name),
55 | kernel_size=kernel,
56 | strides=stride,
57 | depth_multiplier=1,
58 | padding=params.conv_padding,
59 | use_bias=False,
60 | activation=None)(layer_input)
61 | output = _batch_norm('{}/depthwise_conv/bn'.format(name), params)(output)
62 | output = layers.ReLU(name='{}/depthwise_conv/relu'.format(name))(output)
63 | output = layers.Conv2D(name='{}/pointwise_conv'.format(name),
64 | filters=filters,
65 | kernel_size=(1, 1),
66 | strides=1,
67 | padding=params.conv_padding,
68 | use_bias=False,
69 | activation=None)(output)
70 | output = _batch_norm('{}/pointwise_conv/bn'.format(name), params)(output)
71 | output = layers.ReLU(name='{}/pointwise_conv/relu'.format(name))(output)
72 | return output
73 | return _separable_conv_layer
74 |
75 |
76 | _YAMNET_LAYER_DEFS = [
77 | # (layer_function, kernel, stride, num_filters)
78 | (_conv, [3, 3], 2, 32),
79 | (_separable_conv, [3, 3], 1, 64),
80 | (_separable_conv, [3, 3], 2, 128),
81 | (_separable_conv, [3, 3], 1, 128),
82 | (_separable_conv, [3, 3], 2, 256),
83 | (_separable_conv, [3, 3], 1, 256),
84 | (_separable_conv, [3, 3], 2, 512),
85 | (_separable_conv, [3, 3], 1, 512),
86 | (_separable_conv, [3, 3], 1, 512),
87 | (_separable_conv, [3, 3], 1, 512),
88 | (_separable_conv, [3, 3], 1, 512),
89 | (_separable_conv, [3, 3], 1, 512),
90 | (_separable_conv, [3, 3], 2, 1024),
91 | (_separable_conv, [3, 3], 1, 1024)
92 | ]
93 |
94 |
95 | def yamnet(features, params):
96 | """Define the core YAMNet mode in Keras."""
97 | net = layers.Reshape(
98 | (params.patch_frames, params.patch_bands, 1),
99 | input_shape=(params.patch_frames, params.patch_bands))(features)
100 | for (i, (layer_fun, kernel, stride, filters)) in enumerate(_YAMNET_LAYER_DEFS):
101 | net = layer_fun('layer{}'.format(i + 1), kernel, stride, filters, params)(net)
102 | embeddings = layers.GlobalAveragePooling2D()(net)
103 | logits = layers.Dense(units=params.num_classes, use_bias=True)(embeddings)
104 | predictions = layers.Activation(activation=params.classifier_activation)(logits)
105 | return predictions, embeddings
106 |
107 |
108 | def yamnet_frames_model(params):
109 | """Defines the YAMNet waveform-to-class-scores model.
110 |
111 | Args:
112 | params: An instance of Params containing hyperparameters.
113 |
114 | Returns:
115 | A model accepting (num_samples,) waveform input and emitting:
116 | - predictions: (num_patches, num_classes) matrix of class scores per time frame
117 | - embeddings: (num_patches, embedding size) matrix of embeddings per time frame
118 | - log_mel_spectrogram: (num_spectrogram_frames, num_mel_bins) spectrogram feature matrix
119 | """
120 | waveform = layers.Input(batch_shape=(None,), dtype=tf.float32)
121 | waveform_padded = features_lib.pad_waveform(waveform, params)
122 | log_mel_spectrogram, features = features_lib.waveform_to_log_mel_spectrogram_patches(
123 | waveform_padded, params)
124 | predictions, embeddings = yamnet(features, params)
125 | frames_model = Model(
126 | name='yamnet_frames', inputs=waveform,
127 | outputs=[predictions, embeddings, log_mel_spectrogram])
128 | return frames_model
129 |
130 |
131 | def class_names(class_map_csv):
132 | """Read the class name definition file and return a list of strings."""
133 | if tf.is_tensor(class_map_csv):
134 | class_map_csv = class_map_csv.numpy()
135 | with open(class_map_csv) as csv_file:
136 | reader = csv.reader(csv_file)
137 | next(reader) # Skip header
138 | return np.array([display_name for (_, _, display_name) in reader])
139 |
--------------------------------------------------------------------------------
/sound/sound.ipynb:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------