├── .gitignore
├── CycleGANColab
    └── CycleGANColab.ipynb
├── CycleGANSolution
    ├── README.md
    ├── a4-code-v2-updated
    │   ├── README.md
    │   ├── cycle_gan.py
    │   ├── data_loader.py
    │   ├── model_checker.py
    │   ├── models.py
    │   ├── utils.py
    │   └── vanilla_gan.py
    └── a4-handout.pdf
├── GAN
    ├── README.md
    ├── Wiley's
    │   ├── models.py
    │   └── vanilla_gan.py
    └── vanilla_gan.py
├── LICENSE
├── LoRA
    ├── lora_hello_world.ipynb
    ├── lora_hello_world2.ipynb
    └── lora_hello_world3.ipynb
├── MachineTranslation
    ├── README.md
    ├── ReferenceExample.ipynb
    ├── seq2seq_translation_tutorial.ipynb
    ├── torchtext_translation_tutorial.ipynb
    └── torchtext_translation_tutorial_with_transformers.ipynb
├── NeuralArchitectureSearch
    ├── Autokeras.ipynb
    └── NeuralArchitectureSearch.ipynb
├── ProphetCode
    └── main.py
├── Quantization
    └── Quantization.ipynb
├── README.md
├── RL
    └── RL.ipynb
├── RL_from_human_feedback
    └── RL_from_human_feedback.ipynb
├── ReinforcmentLearning
    └── simple_example.py
├── SiameseNetwork
    ├── siamese_network.ipynb
    ├── siamese_original_network.ipynb
    └── twin_network.ipynb
├── SinGAN
    ├── CatGAN.ipynb
    ├── DoubleGAN.ipynb
    ├── SinGAN.ipynb
    ├── SinGANOfficialImplementation.ipynb
    └── SinGAN_on_custom_image.ipynb
├── TabularXGBoost
    └── TabularDataXGBoostTutorial.ipynb
├── Transformers
    ├── README.md
    ├── Transformers.ipynb
    ├── requirements.txt
    └── translation
    │   └── train.py
├── TwinNetwork
    └── twin_network.ipynb
├── VisionTransformers
    ├── VisionTransformers.ipynb
    ├── VisionTransformers_cleaned_up_code_2021-08-24.ipynb
    ├── VisionTransformers_with_PyTorch_Transformers.ipynb
    └── VisionTransformers_with_PyTorch_Transformers_with_BatchNorm.ipynb
├── handwriting-synthesis
    ├── .gitignore
    ├── .travis.yml
    ├── checkpoints
    │   ├── checkpoint
    │   ├── model-17900.data-00000-of-00001
    │   ├── model-17900.index
    │   └── model-17900.meta
    ├── data
    │   ├── blacklist.npy
    │   ├── processed
    │   │   ├── .gitattributes
    │   │   ├── c.npy
    │   │   ├── c_len.npy
    │   │   ├── w_id.npy
    │   │   ├── x.npy
    │   │   └── x_len.npy
    │   └── raw
    │   │   └── readme.md
    ├── data_frame.py
    ├── demo.py
    ├── drawing.py
    ├── img
    │   ├── all_star.svg
    │   ├── banner.svg
    │   ├── downtown.svg
    │   ├── give_up.svg
    │   └── usage_demo.svg
    ├── lyrics.py
    ├── prepare_data.py
    ├── readme.md
    ├── requirements.txt
    ├── rnn.py
    ├── rnn_cell.py
    ├── rnn_ops.py
    ├── styles
    │   ├── style-0-chars.npy
    │   ├── style-0-strokes.npy
    │   ├── style-1-chars.npy
    │   ├── style-1-strokes.npy
    │   ├── style-1.npy
    │   ├── style-10-chars.npy
    │   ├── style-10-strokes.npy
    │   ├── style-11-chars.npy
    │   ├── style-11-strokes.npy
    │   ├── style-12-chars.npy
    │   ├── style-12-strokes.npy
    │   ├── style-2-chars.npy
    │   ├── style-2-strokes.npy
    │   ├── style-2.npy
    │   ├── style-3-chars.npy
    │   ├── style-3-strokes.npy
    │   ├── style-4-chars.npy
    │   ├── style-4-strokes.npy
    │   ├── style-5-chars.npy
    │   ├── style-5-strokes.npy
    │   ├── style-6-chars.npy
    │   ├── style-6-strokes.npy
    │   ├── style-7-chars.npy
    │   ├── style-7-strokes.npy
    │   ├── style-8-chars.npy
    │   ├── style-8-strokes.npy
    │   ├── style-9-chars.npy
    │   └── style-9-strokes.npy
    ├── test_example.py
    ├── tf_base_model.py
    ├── tf_utils.py
    └── upgrade_tf2.sh
├── handwriting_generator
    ├── IBM.csv
    ├── IBM_Transformer+TimeEmbedding.ipynb
    ├── handwriting_generator.ipynb
    └── saved.tgz
├── minGPT
    ├── .gitignore
    ├── LICENSE
    ├── README.md
    ├── mingpt.jpg
    ├── mingpt
    │   ├── __init__.py
    │   ├── model.py
    │   ├── trainer.py
    │   └── utils.py
    ├── play_char.ipynb
    ├── play_image.ipynb
    └── play_math.ipynb
└── sound
    ├── preprocess
        ├── README.md
        ├── mp3_to_wav.py
        ├── to_16000_wav.py
        └── trim.py
    ├── project-keyword-spotter
        ├── .DS_Store
        ├── CONTRIBUTING.md
        ├── Icon
        ├── LICENSE
        ├── README.md
        ├── audio_recorder.py
        ├── config
        │   ├── Icon
        │   ├── commands_v2.txt
        │   ├── commands_v2_snake.txt
        │   ├── labels_gc2.raw.txt
        │   └── labels_simple_audio.txt
        ├── features.py
        ├── hearing_snake_metadata.json
        ├── install_requirements.sh
        ├── media
        │   ├── Icon
        │   └── startscreen.png
        ├── mel_features.py
        ├── model.py
        ├── model_yamnet.py
        ├── models
        │   ├── Icon
        │   ├── model-backup1.tflite
        │   ├── model.tflite
        │   ├── model_quantized_edgetpu.tflite
        │   ├── voice_commands_v0.7_edgetpu.tflite
        │   └── voice_commands_v0.8_edgetpu.tflite
        ├── params.py
        ├── pygame_images
        │   ├── Icon
        │   ├── apple.png
        │   ├── bg.jpg
        │   ├── snake_head_with_ears.png
        │   └── snake_tail.png
        ├── run_hearing_snake.py
        ├── run_model.py
        ├── run_model_yamnet.py
        ├── run_snake.sh
        ├── run_yt_voice_control.py
        ├── run_yt_voice_control.sh
        ├── yamnet.py
        └── yamnet_class_map.csv
    ├── simple_audio.ipynb
    ├── simple_audio_custom_cough_dataset_compiled.ipynb
    ├── simple_audio_load_vggish.ipynb
    ├── simple_audio_load_vggish_with_layer.ipynb
    ├── simple_audio_load_yamnet.ipynb
    ├── simple_audio_new_spectrogram.ipynb
    ├── simple_audio_new_spectrogram_custom_cough_dataset.ipynb
    ├── simple_audio_new_spectrogram_custom_cough_dataset_quantize.ipynb
    ├── simple_audio_new_spectrogram_custom_dataset.ipynb
    ├── simple_audio_new_spectrogram_numpy.ipynb
    ├── simple_audio_new_spectrogram_numpy_and_normalize.ipynb
    ├── simple_audio_new_spectrogram_numpy_and_normalize_only_left_right_working.ipynb
    ├── simple_audio_working_vggish.ipynb
    ├── simple_audio_working_vggish_clean.ipynb
    ├── simple_audio_working_vggish_clean_freeze_vggish_weights.ipynb
    ├── simple_audio_working_vggish_dataset.ipynb
    └── sound.ipynb


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | MANIFEST
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | .pytest_cache/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | local_settings.py
 57 | db.sqlite3
 58 | 
 59 | # Flask stuff:
 60 | instance/
 61 | .webassets-cache
 62 | 
 63 | # Scrapy stuff:
 64 | .scrapy
 65 | 
 66 | # Sphinx documentation
 67 | docs/_build/
 68 | 
 69 | # PyBuilder
 70 | target/
 71 | 
 72 | # Jupyter Notebook
 73 | .ipynb_checkpoints
 74 | 
 75 | # pyenv
 76 | .python-version
 77 | 
 78 | # celery beat schedule file
 79 | celerybeat-schedule
 80 | 
 81 | # SageMath parsed files
 82 | *.sage.py
 83 | 
 84 | # Environments
 85 | .env
 86 | .venv
 87 | env/
 88 | venv/
 89 | ENV/
 90 | env.bak/
 91 | venv.bak/
 92 | 
 93 | # Spyder project settings
 94 | .spyderproject
 95 | .spyproject
 96 | 
 97 | # Rope project settings
 98 | .ropeproject
 99 | 
100 | # mkdocs documentation
101 | /site
102 | 
103 | # mypy
104 | .mypy_cache/
105 | 


--------------------------------------------------------------------------------
/CycleGANColab/CycleGANColab.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "name": "CycleGAN.ipynb",
  7 |       "provenance": [],
  8 |       "include_colab_link": true
  9 |     },
 10 |     "kernelspec": {
 11 |       "name": "python3",
 12 |       "display_name": "Python 3"
 13 |     },
 14 |     "accelerator": "GPU"
 15 |   },
 16 |   "cells": [
 17 |     {
 18 |       "cell_type": "markdown",
 19 |       "metadata": {
 20 |         "id": "view-in-github",
 21 |         "colab_type": "text"
 22 |       },
 23 |       "source": [
 24 |         "<a href=\"https://colab.research.google.com/github/wileyw/DeepLearningDemos/blob/master/CycleGANColab/CycleGANColab.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
 25 |       ]
 26 |     },
 27 |     {
 28 |       "cell_type": "markdown",
 29 |       "metadata": {
 30 |         "id": "fTGhQfaYH_QV",
 31 |         "colab_type": "text"
 32 |       },
 33 |       "source": [
 34 |         "# CycleGAN\n",
 35 |         "\n",
 36 |         "This notebook makes the CycleGAN homework assignment runnable on Google Colab (free GPU), so you don't need a physical GPU to run this assignment.\n",
 37 |         "\n",
 38 |         "Code available on https://github.com/wileyw/DeepLearningDemos.git\n",
 39 |         "\n",
 40 |         "Homework Assignment: https://www.cs.toronto.edu/~rgrosse/courses/csc321_2018/assignments/a4-handout.pdf"
 41 |       ]
 42 |     },
 43 |     {
 44 |       "cell_type": "code",
 45 |       "metadata": {
 46 |         "id": "g-vpRE5yJebK",
 47 |         "colab_type": "code",
 48 |         "colab": {}
 49 |       },
 50 |       "source": [
 51 |         "!git clone https://github.com/wileyw/DeepLearningDemos.git"
 52 |       ],
 53 |       "execution_count": 0,
 54 |       "outputs": []
 55 |     },
 56 |     {
 57 |       "cell_type": "code",
 58 |       "metadata": {
 59 |         "id": "haTvhcMrH8ke",
 60 |         "colab_type": "code",
 61 |         "colab": {}
 62 |       },
 63 |       "source": [
 64 |         "!wget http://www.cs.toronto.edu/~rgrosse/courses/csc321_2018/assignments/a4-code.zip \n",
 65 |         "!unzip -q a4-code.zip"
 66 |       ],
 67 |       "execution_count": 0,
 68 |       "outputs": []
 69 |     },
 70 |     {
 71 |       "cell_type": "code",
 72 |       "metadata": {
 73 |         "id": "ks4AwPQYN-bo",
 74 |         "colab_type": "code",
 75 |         "colab": {}
 76 |       },
 77 |       "source": [
 78 |         "!ls\n",
 79 |         "!mv a4-code-v2-updated/emojis .\n",
 80 |         "!mv a4-code-v2-updated/checker_files ."
 81 |       ],
 82 |       "execution_count": 0,
 83 |       "outputs": []
 84 |     },
 85 |     {
 86 |       "cell_type": "code",
 87 |       "metadata": {
 88 |         "id": "5NyF3QrVKNqr",
 89 |         "colab_type": "code",
 90 |         "colab": {}
 91 |       },
 92 |       "source": [
 93 |         "!python3 DeepLearningDemos/CycleGANSolution/a4-code-v2-updated/model_checker.py"
 94 |       ],
 95 |       "execution_count": 0,
 96 |       "outputs": []
 97 |     },
 98 |     {
 99 |       "cell_type": "code",
100 |       "metadata": {
101 |         "id": "9EHeytGb46j0",
102 |         "colab_type": "code",
103 |         "colab": {}
104 |       },
105 |       "source": [
106 |         "import sys\n",
107 |         "sys.path.append('DeepLearningDemos/CycleGANSolution/a4-code-v2-updated')\n",
108 |         "import cycle_gan\n",
109 |         "from cycle_gan import *\n",
110 |         "\n",
111 |         "sys.argv[:] = ['cycle_gan.py']\n",
112 |         "parser = create_parser()\n",
113 |         "opts = parser.parse_args()\n",
114 |         "\n",
115 |         "opts.use_cycle_consistency_loss = True\n",
116 |         "\n",
117 |         "batch_size = opts.batch_size\n",
118 |         "cycle_gan.batch_size = batch_size\n",
119 |         "\n",
120 |         "print(opts)\n",
121 |         "main(opts)"
122 |       ],
123 |       "execution_count": 0,
124 |       "outputs": []
125 |     },
126 |     {
127 |       "cell_type": "code",
128 |       "metadata": {
129 |         "id": "ac_qDfPs5S_g",
130 |         "colab_type": "code",
131 |         "colab": {}
132 |       },
133 |       "source": [
134 |         "from IPython.display import Image\n",
135 |         "import matplotlib.pyplot as plt\n",
136 |         "import glob\n",
137 |         "images = sorted(glob.glob('./samples_cyclegan/*X-Y.png'))\n",
138 |         "Image(images[-1])"
139 |       ],
140 |       "execution_count": 0,
141 |       "outputs": []
142 |     },
143 |     {
144 |       "cell_type": "code",
145 |       "metadata": {
146 |         "id": "uVnVGQo66bYF",
147 |         "colab_type": "code",
148 |         "colab": {}
149 |       },
150 |       "source": [
151 |         "from IPython.display import Image\n",
152 |         "import matplotlib.pyplot as plt\n",
153 |         "import glob\n",
154 |         "images = sorted(glob.glob('./samples_cyclegan/*Y-X.png'))\n",
155 |         "Image(images[-1])"
156 |       ],
157 |       "execution_count": 0,
158 |       "outputs": []
159 |     },
160 |     {
161 |       "cell_type": "code",
162 |       "metadata": {
163 |         "id": "ZxN74FK-wdYV",
164 |         "colab_type": "code",
165 |         "colab": {}
166 |       },
167 |       "source": [
168 |         "import sys\n",
169 |         "sys.path.append('DeepLearningDemos/CycleGANSolution/a4-code-v2-updated')\n",
170 |         "import vanilla_gan\n",
171 |         "from vanilla_gan import *\n",
172 |         "\n",
173 |         "# Run Vanilla GAN\n",
174 |         "sys.argv[:] = ['vanilla_gan.py']\n",
175 |         "parser = create_parser()\n",
176 |         "opts = parser.parse_args()\n",
177 |         "\n",
178 |         "batch_size = opts.batch_size\n",
179 |         "vanilla_gan.batch_size = batch_size\n",
180 |         "\n",
181 |         "print(opts)\n",
182 |         "main(opts)"
183 |       ],
184 |       "execution_count": 0,
185 |       "outputs": []
186 |     },
187 |     {
188 |       "cell_type": "code",
189 |       "metadata": {
190 |         "id": "NTCwanSr1S_D",
191 |         "colab_type": "code",
192 |         "colab": {}
193 |       },
194 |       "source": [
195 |         "# View images\n",
196 |         "from IPython.display import Image\n",
197 |         "import matplotlib.pyplot as plt\n",
198 |         "import glob\n",
199 |         "images = sorted(glob.glob('./samples_vanilla/*.png'))\n",
200 |         "Image(images[-1])\n"
201 |       ],
202 |       "execution_count": 0,
203 |       "outputs": []
204 |     }
205 |   ]
206 | }


--------------------------------------------------------------------------------
/CycleGANSolution/README.md:
--------------------------------------------------------------------------------
 1 | # Cycle GAN and Vanilla GAN
 2 | Homework Assignment:
 3 | https://www.cs.toronto.edu/~rgrosse/courses/csc321_2018/assignments/a4-handout.pdf
 4 | 
 5 | ## Download Dataset:
 6 | This .zip file contains the starting code as well. When we unzip this file, we should not overwrite the existing .py files.
 7 | ```
 8 | wget http://www.cs.toronto.edu/~rgrosse/courses/csc321_2018/assignments/a4-code.zip
 9 | 
10 | # When prompted to overwrite, select [N]one
11 | unzip a4-code.zip
12 | ```
13 | 
14 | ## Original Course Website:
15 | http://www.cs.toronto.edu/~rgrosse/courses/csc421_2019/
16 | 


--------------------------------------------------------------------------------
/CycleGANSolution/a4-code-v2-updated/README.md:
--------------------------------------------------------------------------------
 1 | Vanilla GAN
 2 | ## 1. Run the model checker
 3 | ```
 4 | python3 model_checker.py
 5 | ```
 6 | 
 7 | ## 2. Run vanilla GAN
 8 | ```
 9 | python3 vanilla_gan.py
10 | ```
11 | 
12 | ## 3. Run Cycle GAN
13 | ```
14 | python3 cycle_gan.py --use_cycle_consistency_loss
15 | ```
16 | 


--------------------------------------------------------------------------------
/CycleGANSolution/a4-code-v2-updated/data_loader.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import torch
 4 | from torch.utils.data import DataLoader
 5 | 
 6 | from torchvision import datasets
 7 | from torchvision import transforms
 8 | 
 9 | 
10 | def get_emoji_loader(emoji_type, opts):
11 |     """Creates training and test data loaders.
12 |     """
13 |     transform = transforms.Compose([
14 |                     transforms.Scale(opts.image_size),
15 |                     transforms.ToTensor(),
16 |                     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
17 |                 ])
18 | 
19 |     train_path = os.path.join('./emojis', emoji_type)
20 |     test_path = os.path.join('./emojis', 'Test_{}'.format(emoji_type))
21 | 
22 |     train_dataset = datasets.ImageFolder(train_path, transform)
23 |     test_dataset = datasets.ImageFolder(test_path, transform)
24 | 
25 |     train_dloader = DataLoader(dataset=train_dataset, batch_size=opts.batch_size, shuffle=True, num_workers=opts.num_workers)
26 |     test_dloader = DataLoader(dataset=test_dataset, batch_size=opts.batch_size, shuffle=False, num_workers=opts.num_workers)
27 | 
28 |     return train_dloader, test_dloader
29 | 


--------------------------------------------------------------------------------
/CycleGANSolution/a4-code-v2-updated/model_checker.py:
--------------------------------------------------------------------------------
  1 | # CSC 321, Assignment 4
  2 | #
  3 | # This is a script to check whether the outputs of your CycleGenerator, DCDiscriminator, and
  4 | # CycleGenerator models produce the expected outputs.
  5 | #
  6 | # NOTE THAT THIS MODEL CHECKER IS PROVIDED FOR CONVENIENCE ONLY, AND MAY PRODUCE FALSE NEGATIVES.
  7 | # DO NOT USE THIS AS THE ONLY WAY TO CHECK THAT YOUR MODEL IS CORRECT.
  8 | #
  9 | # Usage:
 10 | # ======
 11 | #
 12 | #    python model_checker.py
 13 | #
 14 | 
 15 | import warnings
 16 | warnings.filterwarnings("ignore")
 17 | 
 18 | # Torch imports
 19 | import torch
 20 | from torch.autograd import Variable
 21 | 
 22 | # Numpy
 23 | import numpy as np
 24 | 
 25 | # Local imports
 26 | from models import DCGenerator, DCDiscriminator, CycleGenerator
 27 | 
 28 | 
 29 | def count_parameters(model):
 30 |     """Finds the total number of trainable parameters in a model.
 31 |     """
 32 |     return sum(p.numel() for p in model.parameters() if p.requires_grad)
 33 | 
 34 | 
 35 | def sample_noise(dim):
 36 |     """
 37 |     Generate a PyTorch Tensor of uniform random noise.
 38 | 
 39 |     Input:
 40 |     - batch_size: Integer giving the batch size of noise to generate.
 41 |     - dim: Integer giving the dimension of noise to generate.
 42 | 
 43 |     Output:
 44 |     - A PyTorch Tensor of shape (1, dim, 1, 1) containing uniform
 45 |       random noise in the range (-1, 1).
 46 |     """
 47 |     return Variable(torch.rand(1, dim) * 2 - 1).unsqueeze(2).unsqueeze(3)
 48 | 
 49 | 
 50 | def check_dc_generator():
 51 |     """Checks the output and number of parameters of the DCGenerator class.
 52 |     """
 53 |     state = torch.load('checker_files/dc_generator.pt')
 54 | 
 55 |     G = DCGenerator(noise_size=100, conv_dim=32)
 56 |     G.load_state_dict(state['state_dict'])
 57 |     noise = state['input']
 58 |     dc_generator_expected = state['output']
 59 | 
 60 |     output = G(noise)
 61 |     output_np = output.data.cpu().numpy()
 62 | 
 63 |     if np.allclose(output_np, dc_generator_expected, atol=1e-06):
 64 |         print('DCGenerator output: EQUAL')
 65 |     else:
 66 |         print('DCGenerator output: NOT EQUAL')
 67 | 
 68 |     num_params = count_parameters(G)
 69 |     expected_params = 370624
 70 | 
 71 |     print('DCGenerator #params = {}, expected #params = {}, {}'.format(
 72 |           num_params, expected_params, 'EQUAL' if num_params == expected_params else 'NOT EQUAL'))
 73 | 
 74 |     print('-' * 80)
 75 | 
 76 | 
 77 | def check_dc_discriminator():
 78 |     """Checks the output and number of parameters of the DCDiscriminator class.
 79 |     """
 80 |     state = torch.load('checker_files/dc_discriminator.pt')
 81 | 
 82 |     D = DCDiscriminator(conv_dim=32)
 83 |     D.load_state_dict(state['state_dict'])
 84 |     images = state['input']
 85 |     dc_discriminator_expected = state['output']
 86 | 
 87 |     output = D(images)
 88 |     output_np = output.data.cpu().numpy()
 89 | 
 90 |     if np.allclose(output_np, dc_discriminator_expected, atol=1e-06):
 91 |         print('DCDiscriminator output: EQUAL')
 92 |     else:
 93 |         print('DCDiscriminator output: NOT EQUAL')
 94 | 
 95 |     num_params = count_parameters(D)
 96 |     expected_params = 167872
 97 | 
 98 |     print('DCDiscriminator #params = {}, expected #params = {}, {}'.format(
 99 |           num_params, expected_params, 'EQUAL' if num_params == expected_params else 'NOT EQUAL'))
100 | 
101 |     print('-' * 80)
102 | 
103 | 
104 | def check_cycle_generator():
105 |     """Checks the output and number of parameters of the CycleGenerator class.
106 |     """
107 |     state = torch.load('checker_files/cycle_generator.pt')
108 | 
109 |     G_XtoY = CycleGenerator(conv_dim=32, init_zero_weights=False)
110 |     G_XtoY.load_state_dict(state['state_dict'])
111 |     images = state['input']
112 |     cycle_generator_expected = state['output']
113 | 
114 |     output = G_XtoY(images)
115 |     output_np = output.data.cpu().numpy()
116 | 
117 |     if np.allclose(output_np, cycle_generator_expected, atol=1e-06):
118 |         print('CycleGenerator output: EQUAL')
119 |     else:
120 |         print('CycleGenerator output: NOT EQUAL')
121 | 
122 |     num_params = count_parameters(G_XtoY)
123 |     expected_params = 105856
124 | 
125 |     print('CycleGenerator #params = {}, expected #params = {}, {}'.format(
126 |           num_params, expected_params, 'EQUAL' if num_params == expected_params else 'NOT EQUAL'))
127 | 
128 |     print('-' * 80)
129 | 
130 | 
131 | if __name__ == '__main__':
132 | 
133 |     try:
134 |         check_dc_generator()
135 |     except:
136 |         print('Crashed while checking DCGenerator. Maybe not implemented yet?')
137 | 
138 |     try:
139 |         check_dc_discriminator()
140 |     except:
141 |         print('Crashed while checking DCDiscriminator. Maybe not implemented yet?')
142 | 
143 |     #try:
144 |     check_cycle_generator()
145 |     #except:
146 |     #    print('Crashed while checking CycleGenerator. Maybe not implemented yet?')
147 | 


--------------------------------------------------------------------------------
/CycleGANSolution/a4-code-v2-updated/models.py:
--------------------------------------------------------------------------------
  1 | # CSC 321, Assignment 4
  2 | #
  3 | # This file contains the models used for both parts of the assignment:
  4 | #
  5 | #   - DCGenerator       --> Used in the vanilla GAN in Part 1
  6 | #   - CycleGenerator    --> Used in the CycleGAN in Part 2
  7 | #   - DCDiscriminator   --> Used in both the vanilla GAN and CycleGAN (Parts 1 and 2)
  8 | #
  9 | # For the assignment, you are asked to create the architectures of these three networks by
 10 | # filling in the __init__ methods in the DCGenerator, CycleGenerator, and DCDiscriminator classes.
 11 | # Note that the forward passes of these models are provided for you, so the only part you need to
 12 | # fill in is __init__.
 13 | 
 14 | import pdb
 15 | import torch
 16 | import torch.nn as nn
 17 | import torch.nn.functional as F
 18 | 
 19 | 
 20 | def deconv(in_channels, out_channels, kernel_size, stride=2, padding=1, batch_norm=True):
 21 |     """Creates a transposed-convolutional layer, with optional batch normalization.
 22 |     """
 23 |     layers = []
 24 |     layers.append(nn.ConvTranspose2d(in_channels, out_channels, kernel_size, stride, padding, bias=False))
 25 |     if batch_norm:
 26 |         layers.append(nn.BatchNorm2d(out_channels))
 27 |     return nn.Sequential(*layers)
 28 | 
 29 | 
 30 | def conv(in_channels, out_channels, kernel_size, stride=2, padding=1, batch_norm=True, init_zero_weights=False):
 31 |     """Creates a convolutional layer, with optional batch normalization.
 32 |     """
 33 |     layers = []
 34 |     conv_layer = nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, padding=padding, bias=False)
 35 |     if init_zero_weights:
 36 |         conv_layer.weight.data = torch.randn(out_channels, in_channels, kernel_size, kernel_size) * 0.001
 37 |     layers.append(conv_layer)
 38 | 
 39 |     if batch_norm:
 40 |         layers.append(nn.BatchNorm2d(out_channels))
 41 |     return nn.Sequential(*layers)
 42 | 
 43 | 
 44 | class DCGenerator(nn.Module):
 45 |     def __init__(self, noise_size, conv_dim):
 46 |         super(DCGenerator, self).__init__()
 47 | 
 48 |         ###########################################
 49 |         ##   FILL THIS IN: CREATE ARCHITECTURE   ##
 50 |         ###########################################
 51 | 
 52 |         self.deconv1 = deconv(noise_size, 128, 4, stride=1, padding=0, batch_norm=True)
 53 |         self.deconv2 = deconv(128, 64, 4, stride=2, padding=1, batch_norm=True)
 54 |         self.deconv3 = deconv(64, 32, 4, stride=2, padding=1, batch_norm=True)
 55 |         self.deconv4 = deconv(32, 3, 4, stride=2, padding=1, batch_norm=False)
 56 | 
 57 |     def forward(self, z):
 58 |         """Generates an image given a sample of random noise.
 59 | 
 60 |             Input
 61 |             -----
 62 |                 z: BS x noise_size x 1 x 1   -->  16x100x1x1
 63 | 
 64 |             Output
 65 |             ------
 66 |                 out: BS x channels x image_width x image_height  -->  16x3x32x32
 67 |         """
 68 | 
 69 |         out = F.relu(self.deconv1(z))
 70 |         out = F.relu(self.deconv2(out))
 71 |         out = F.relu(self.deconv3(out))
 72 |         out = F.tanh(self.deconv4(out))
 73 |         return out
 74 | 
 75 | 
 76 | class ResnetBlock(nn.Module):
 77 |     def __init__(self, conv_dim):
 78 |         super(ResnetBlock, self).__init__()
 79 |         self.conv_layer = conv(in_channels=conv_dim, out_channels=conv_dim, kernel_size=3, stride=1, padding=1)
 80 | 
 81 |     def forward(self, x):
 82 |         out = x + self.conv_layer(x)
 83 |         return out
 84 | 
 85 | 
 86 | class CycleGenerator(nn.Module):
 87 |     """Defines the architecture of the generator network.
 88 |        Note: Both generators G_XtoY and G_YtoX have the same architecture in this assignment.
 89 |     """
 90 |     def __init__(self, conv_dim=64, init_zero_weights=False):
 91 |         super(CycleGenerator, self).__init__()
 92 | 
 93 |         ###########################################
 94 |         ##   FILL THIS IN: CREATE ARCHITECTURE   ##
 95 |         ###########################################
 96 | 
 97 |         # 1. Define the encoder part of the generator (that extracts features from the input image)
 98 |         self.conv1 = conv(3, 32, 4, stride=2, padding=1, batch_norm=True, init_zero_weights=init_zero_weights)
 99 |         self.conv2 = conv(32, 64, 4, stride=2, padding=1, batch_norm=True, init_zero_weights=init_zero_weights)
100 | 
101 |         # 2. Define the transformation part of the generator
102 |         self.resnet_block = ResnetBlock(64)
103 | 
104 |         # 3. Define the decoder part of the generator (that builds up the output image from features)
105 |         self.deconv1 = deconv(64, 32, 4, stride=2, padding=1, batch_norm=True)
106 |         self.deconv2 = deconv(32, 3, 4, stride=2, padding=1, batch_norm=False)
107 | 
108 |     def forward(self, x):
109 |         """Generates an image conditioned on an input image.
110 | 
111 |             Input
112 |             -----
113 |                 x: BS x 3 x 32 x 32
114 | 
115 |             Output
116 |             ------
117 |                 out: BS x 3 x 32 x 32
118 |         """
119 | 
120 |         out = F.relu(self.conv1(x))
121 |         out = F.relu(self.conv2(out))
122 | 
123 |         out = F.relu(self.resnet_block(out))
124 | 
125 |         out = F.relu(self.deconv1(out))
126 |         out = F.tanh(self.deconv2(out))
127 | 
128 |         return out
129 | 
130 | 
131 | class DCDiscriminator(nn.Module):
132 |     """Defines the architecture of the discriminator network.
133 |        Note: Both discriminators D_X and D_Y have the same architecture in this assignment.
134 |     """
135 |     def __init__(self, conv_dim=64):
136 |         super(DCDiscriminator, self).__init__()
137 | 
138 |         ###########################################
139 |         ##   FILL THIS IN: CREATE ARCHITECTURE   ##
140 |         ###########################################
141 | 
142 |         self.conv1 = conv(3, 32, 4, stride=2, padding=1, batch_norm=True, init_zero_weights=False)
143 |         self.conv2 = conv(32, 64, 4, stride=2, padding=1, batch_norm=True, init_zero_weights=False)
144 |         self.conv3 = conv(64, 128, 4, stride=2, padding=1, batch_norm=True, init_zero_weights=False)
145 |         self.conv4 = conv(128, 1, 4, stride=1, padding=0, batch_norm=False, init_zero_weights=False)
146 | 
147 |     def forward(self, x):
148 | 
149 |         out = F.relu(self.conv1(x))
150 |         out = F.relu(self.conv2(out))
151 |         out = F.relu(self.conv3(out))
152 | 
153 |         out = self.conv4(out).squeeze()
154 |         out = F.sigmoid(out)
155 |         return out
156 | 


--------------------------------------------------------------------------------
/CycleGANSolution/a4-code-v2-updated/utils.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import torch
 4 | from torch.autograd import Variable
 5 | 
 6 | 
 7 | def to_var(x):
 8 |     """Converts numpy to variable."""
 9 |     if torch.cuda.is_available():
10 |         x = x.cuda()
11 |     return Variable(x)
12 | 
13 | 
14 | def to_data(x):
15 |     """Converts variable to numpy."""
16 |     if torch.cuda.is_available():
17 |         x = x.cpu()
18 |     return x.data.numpy()
19 | 
20 | 
21 | def create_dir(directory):
22 |     """Creates a directory if it does not already exist.
23 |     """
24 |     if not os.path.exists(directory):
25 |         os.makedirs(directory)
26 | 


--------------------------------------------------------------------------------
/CycleGANSolution/a4-handout.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/CycleGANSolution/a4-handout.pdf


--------------------------------------------------------------------------------
/GAN/README.md:
--------------------------------------------------------------------------------
 1 | # Vanilla GAN
 2 | Homework Assignment:
 3 | https://www.cs.toronto.edu/~rgrosse/courses/csc321_2018/assignments/a4-handout.pdf
 4 | 
 5 | Download Code:
 6 | ```
 7 | wget http://www.cs.toronto.edu/~rgrosse/courses/csc321_2018/assignments/a4-code.zip
 8 | unzip a4-code.zip
 9 | ```
10 | 


--------------------------------------------------------------------------------
/GAN/Wiley's/models.py:
--------------------------------------------------------------------------------
  1 | # CSC 321, Assignment 4
  2 | #
  3 | # This file contains the models used for both parts of the assignment:
  4 | #
  5 | #   - DCGenerator       --> Used in the vanilla GAN in Part 1
  6 | #   - CycleGenerator    --> Used in the CycleGAN in Part 2
  7 | #   - DCDiscriminator   --> Used in both the vanilla GAN and CycleGAN (Parts 1 and 2)
  8 | #
  9 | # For the assignment, you are asked to create the architectures of these three networks by
 10 | # filling in the __init__ methods in the DCGenerator, CycleGenerator, and DCDiscriminator classes.
 11 | # Note that the forward passes of these models are provided for you, so the only part you need to
 12 | # fill in is __init__.
 13 | 
 14 | import pdb
 15 | import torch
 16 | import torch.nn as nn
 17 | import torch.nn.functional as F
 18 | 
 19 | 
 20 | def deconv(
 21 |     in_channels, out_channels, kernel_size, stride=2, padding=1, batch_norm=True
 22 | ):
 23 |     """Creates a transposed-convolutional layer, with optional batch normalization.
 24 |     """
 25 |     layers = []
 26 |     layers.append(
 27 |         nn.ConvTranspose2d(
 28 |             in_channels, out_channels, kernel_size, stride, padding, bias=False
 29 |         )
 30 |     )
 31 |     if batch_norm:
 32 |         layers.append(nn.BatchNorm2d(out_channels))
 33 |     return nn.Sequential(*layers)
 34 | 
 35 | 
 36 | def conv(
 37 |     in_channels,
 38 |     out_channels,
 39 |     kernel_size,
 40 |     stride=2,
 41 |     padding=1,
 42 |     batch_norm=True,
 43 |     init_zero_weights=False,
 44 | ):
 45 |     """Creates a convolutional layer, with optional batch normalization.
 46 |     """
 47 |     layers = []
 48 |     conv_layer = nn.Conv2d(
 49 |         in_channels=in_channels,
 50 |         out_channels=out_channels,
 51 |         kernel_size=kernel_size,
 52 |         stride=stride,
 53 |         padding=padding,
 54 |         bias=False,
 55 |     )
 56 |     if init_zero_weights:
 57 |         conv_layer.weight.data = (
 58 |             torch.randn(out_channels, in_channels, kernel_size, kernel_size) * 0.001
 59 |         )
 60 |     layers.append(conv_layer)
 61 | 
 62 |     if batch_norm:
 63 |         layers.append(nn.BatchNorm2d(out_channels))
 64 |     return nn.Sequential(*layers)
 65 | 
 66 | 
 67 | class DCGenerator(nn.Module):
 68 |     def __init__(self, noise_size, conv_dim):
 69 |         super(DCGenerator, self).__init__()
 70 | 
 71 |         ###########################################
 72 |         ##   FILL THIS IN: CREATE ARCHITECTURE   ##
 73 |         ###########################################
 74 |         kernel_size = 4
 75 | 
 76 |         self.deconv1 = deconv(100, conv_dim * 4, kernel_size, padding=0)
 77 |         self.deconv2 = deconv(conv_dim * 4, conv_dim * 2, kernel_size)
 78 |         self.deconv3 = deconv(conv_dim * 2, conv_dim, kernel_size)
 79 |         self.deconv4 = deconv(conv_dim, 3, kernel_size, 2, batch_norm=False)
 80 | 
 81 |     def forward(self, z):
 82 |         """Generates an image given a sample of random noise.
 83 | 
 84 |             Input
 85 |             -----
 86 |                 z: BS x noise_size x 1 x 1   -->  16x100x1x1
 87 | 
 88 |             Output
 89 |             ------
 90 |                 out: BS x channels x image_width x image_height  -->  16x3x32x32
 91 |         """
 92 | 
 93 |         out = F.relu(self.deconv1(z))
 94 |         out = F.relu(self.deconv2(out))
 95 |         out = F.relu(self.deconv3(out))
 96 |         out = F.tanh(self.deconv4(out))
 97 |         return out
 98 | 
 99 | 
100 | class ResnetBlock(nn.Module):
101 |     def __init__(self, conv_dim):
102 |         super(ResnetBlock, self).__init__()
103 |         self.conv_layer = conv(
104 |             in_channels=conv_dim,
105 |             out_channels=conv_dim,
106 |             kernel_size=3,
107 |             stride=1,
108 |             padding=1,
109 |         )
110 | 
111 |     def forward(self, x):
112 |         out = x + self.conv_layer(x)
113 |         return out
114 | 
115 | 
116 | class CycleGenerator(nn.Module):
117 |     """Defines the architecture of the generator network.
118 |        Note: Both generators G_XtoY and G_YtoX have the same architecture in this assignment.
119 |     """
120 | 
121 |     def __init__(self, conv_dim=64, init_zero_weights=False):
122 |         super(CycleGenerator, self).__init__()
123 | 
124 |         ###########################################
125 |         ##   FILL THIS IN: CREATE ARCHITECTURE   ##
126 |         ###########################################
127 | 
128 |         kernel_size = 4
129 |         self.conv1 = conv(3, conv_dim, kernel_size)
130 |         self.conv2 = conv(conv_dim, conv_dim * 2, kernel_size)
131 | 
132 |         self.resnet_block = ResnetBlock(conv_dim * 2)
133 | 
134 |         self.deconv1 = deconv(conv_dim * 2, conv_dim, kernel_size)
135 |         self.deconv2 = deconv(conv_dim, 3, kernel_size, 2, batch_norm=False)
136 | 
137 | 
138 |         # 1. Define the encoder part of the generator (that extracts features from the input image)
139 |         # self.conv1 = conv(...)
140 |         # self.conv2 = conv(...)
141 | 
142 |         # 2. Define the transformation part of the generator
143 |         # self.resnet_block = ...
144 | 
145 |         # 3. Define the decoder part of the generator (that builds up the output image from features)
146 |         # self.deconv1 = deconv(...)
147 |         # self.deconv2 = deconv(...)
148 | 
149 |     def forward(self, x):
150 |         """Generates an image conditioned on an input image.
151 | 
152 |             Input
153 |             -----
154 |                 x: BS x 3 x 32 x 32
155 | 
156 |             Output
157 |             ------
158 |                 out: BS x 3 x 32 x 32
159 |         """
160 | 
161 |         out = F.relu(self.conv1(x))
162 |         out = F.relu(self.conv2(out))
163 | 
164 |         out = F.relu(self.resnet_block(out))
165 | 
166 |         out = F.relu(self.deconv1(out))
167 |         out = F.tanh(self.deconv2(out))
168 | 
169 |         return out
170 | 
171 | 
172 | class DCDiscriminator(nn.Module):
173 |     """Defines the architecture of the discriminator network.
174 |        Note: Both discriminators D_X and D_Y have the same architecture in this
175 |        assignment.
176 |     """
177 | 
178 |     def __init__(self, conv_dim=64):
179 |         super(DCDiscriminator, self).__init__()
180 | 
181 |         ###########################################
182 |         ##   FILL THIS IN: CREATE ARCHITECTURE   ##
183 |         ###########################################
184 | 
185 |         kernel_size = 4
186 |         self.conv1 = conv(3, conv_dim, kernel_size)
187 |         self.conv2 = conv(conv_dim, conv_dim * 2, kernel_size)
188 |         self.conv3 = conv(conv_dim * 2, conv_dim * 4, kernel_size)
189 |         self.conv4 = conv(conv_dim * 4, 1, kernel_size, 2, padding=0, batch_norm=False)
190 | 
191 |     def forward(self, x):
192 | 
193 |         out = F.relu(self.conv1(x))
194 |         out = F.relu(self.conv2(out))
195 |         out = F.relu(self.conv3(out))
196 | 
197 |         out = self.conv4(out).squeeze()
198 |         out = F.sigmoid(out)
199 |         return out
200 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 Wiley
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/LoRA/lora_hello_world.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "private_outputs": true,
  7 |       "provenance": [],
  8 |       "gpuType": "T4",
  9 |       "authorship_tag": "ABX9TyPWmZoHOxQbf2DbGURay9eI",
 10 |       "include_colab_link": true
 11 |     },
 12 |     "kernelspec": {
 13 |       "name": "python3",
 14 |       "display_name": "Python 3"
 15 |     },
 16 |     "language_info": {
 17 |       "name": "python"
 18 |     },
 19 |     "accelerator": "GPU",
 20 |     "gpuClass": "standard"
 21 |   },
 22 |   "cells": [
 23 |     {
 24 |       "cell_type": "markdown",
 25 |       "metadata": {
 26 |         "id": "view-in-github",
 27 |         "colab_type": "text"
 28 |       },
 29 |       "source": [
 30 |         "<a href=\"https://colab.research.google.com/github/wileyw/DeepLearningDemos/blob/master/LoRA/lora_hello_world.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
 31 |       ]
 32 |     },
 33 |     {
 34 |       "cell_type": "code",
 35 |       "source": [
 36 |         "!pip3 uninstall torch -y"
 37 |       ],
 38 |       "metadata": {
 39 |         "id": "EzfLQmy-c9fY"
 40 |       },
 41 |       "execution_count": null,
 42 |       "outputs": []
 43 |     },
 44 |     {
 45 |       "cell_type": "code",
 46 |       "source": [
 47 |         "!pip install torch==1.11.0"
 48 |       ],
 49 |       "metadata": {
 50 |         "id": "4-IPSPXGcXsP"
 51 |       },
 52 |       "execution_count": null,
 53 |       "outputs": []
 54 |     },
 55 |     {
 56 |       "cell_type": "code",
 57 |       "source": [
 58 |         "import torch\n",
 59 |         "print(torch.__version__)"
 60 |       ],
 61 |       "metadata": {
 62 |         "id": "Et1rcuQMeDjf"
 63 |       },
 64 |       "execution_count": null,
 65 |       "outputs": []
 66 |     },
 67 |     {
 68 |       "cell_type": "code",
 69 |       "execution_count": null,
 70 |       "metadata": {
 71 |         "id": "FK-QGuS3gsMZ"
 72 |       },
 73 |       "outputs": [],
 74 |       "source": [
 75 |         "print('test')"
 76 |       ]
 77 |     },
 78 |     {
 79 |       "cell_type": "code",
 80 |       "source": [
 81 |         "!sudo apt-get update"
 82 |       ],
 83 |       "metadata": {
 84 |         "id": "61kVAU1MirsS"
 85 |       },
 86 |       "execution_count": null,
 87 |       "outputs": []
 88 |     },
 89 |     {
 90 |       "cell_type": "code",
 91 |       "source": [
 92 |         "!sudo apt-get -y install git jq virtualenv"
 93 |       ],
 94 |       "metadata": {
 95 |         "id": "DOUW4eOIizbG"
 96 |       },
 97 |       "execution_count": null,
 98 |       "outputs": []
 99 |     },
100 |     {
101 |       "cell_type": "code",
102 |       "source": [
103 |         "!git clone https://github.com/microsoft/LoRA.git; cd LoRA"
104 |       ],
105 |       "metadata": {
106 |         "id": "DEUOudE_i4Yv"
107 |       },
108 |       "execution_count": null,
109 |       "outputs": []
110 |     },
111 |     {
112 |       "cell_type": "code",
113 |       "source": [
114 |         "!ls"
115 |       ],
116 |       "metadata": {
117 |         "id": "bokzXL8ei8UP"
118 |       },
119 |       "execution_count": null,
120 |       "outputs": []
121 |     },
122 |     {
123 |       "cell_type": "code",
124 |       "source": [
125 |         "!ls LoRA/examples/NLG"
126 |       ],
127 |       "metadata": {
128 |         "id": "WPU5_vxWjCvp"
129 |       },
130 |       "execution_count": null,
131 |       "outputs": []
132 |     },
133 |     {
134 |       "cell_type": "code",
135 |       "source": [
136 |         "!python3 -m pip install --upgrade pip"
137 |       ],
138 |       "metadata": {
139 |         "id": "7C80M_rujyM5"
140 |       },
141 |       "execution_count": null,
142 |       "outputs": []
143 |     },
144 |     {
145 |       "cell_type": "code",
146 |       "source": [
147 |         "!python3 -m pip install -r LoRA/examples/NLG/requirement.txt"
148 |       ],
149 |       "metadata": {
150 |         "id": "x8oqUQjHjmj4"
151 |       },
152 |       "execution_count": null,
153 |       "outputs": []
154 |     },
155 |     {
156 |       "cell_type": "code",
157 |       "source": [
158 |         "!python3 -m pip install transformers"
159 |       ],
160 |       "metadata": {
161 |         "id": "lS2ZmMO3klGw"
162 |       },
163 |       "execution_count": null,
164 |       "outputs": []
165 |     },
166 |     {
167 |       "cell_type": "code",
168 |       "source": [
169 |         "!python3 -m pip install spacy tqdm tensorboard progress"
170 |       ],
171 |       "metadata": {
172 |         "id": "l2uD_wpFk4gP"
173 |       },
174 |       "execution_count": null,
175 |       "outputs": []
176 |     },
177 |     {
178 |       "cell_type": "code",
179 |       "source": [
180 |         "import torch\n",
181 |         "print(torch.__version__)"
182 |       ],
183 |       "metadata": {
184 |         "id": "Q7dAMI4lkR2u"
185 |       },
186 |       "execution_count": null,
187 |       "outputs": []
188 |     },
189 |     {
190 |       "cell_type": "code",
191 |       "source": [
192 |         "%cd LoRA/examples/NLG"
193 |       ],
194 |       "metadata": {
195 |         "id": "YbDu2w3FlC9l"
196 |       },
197 |       "execution_count": null,
198 |       "outputs": []
199 |     },
200 |     {
201 |       "cell_type": "code",
202 |       "source": [
203 |         "!ls"
204 |       ],
205 |       "metadata": {
206 |         "id": "QVEol-7IlI1A"
207 |       },
208 |       "execution_count": null,
209 |       "outputs": []
210 |     },
211 |     {
212 |       "cell_type": "code",
213 |       "source": [
214 |         "!bash download_pretrained_checkpoints.sh"
215 |       ],
216 |       "metadata": {
217 |         "id": "hTaJ7ZYzlMsf"
218 |       },
219 |       "execution_count": null,
220 |       "outputs": []
221 |     },
222 |     {
223 |       "cell_type": "code",
224 |       "source": [
225 |         "!bash create_datasets.sh"
226 |       ],
227 |       "metadata": {
228 |         "id": "e2HL_HV-lQlb"
229 |       },
230 |       "execution_count": null,
231 |       "outputs": []
232 |     },
233 |     {
234 |       "cell_type": "code",
235 |       "source": [
236 |         "%cd ./eval"
237 |       ],
238 |       "metadata": {
239 |         "id": "JDyJhqaUlSsf"
240 |       },
241 |       "execution_count": null,
242 |       "outputs": []
243 |     },
244 |     {
245 |       "cell_type": "code",
246 |       "source": [
247 |         "!bash download_evalscript.sh"
248 |       ],
249 |       "metadata": {
250 |         "id": "K5Cw5xVRl6wM"
251 |       },
252 |       "execution_count": null,
253 |       "outputs": []
254 |     },
255 |     {
256 |       "cell_type": "code",
257 |       "source": [
258 |         "%cd .."
259 |       ],
260 |       "metadata": {
261 |         "id": "Smyk-DKyl8rP"
262 |       },
263 |       "execution_count": null,
264 |       "outputs": []
265 |     },
266 |     {
267 |       "cell_type": "code",
268 |       "source": [
269 |         "!ls"
270 |       ],
271 |       "metadata": {
272 |         "id": "n8rIjmXNmWj3"
273 |       },
274 |       "execution_count": null,
275 |       "outputs": []
276 |     },
277 |     {
278 |       "cell_type": "code",
279 |       "source": [
280 |         "!python3 -m pip install loralib"
281 |       ],
282 |       "metadata": {
283 |         "id": "gQX04dU0oGW1"
284 |       },
285 |       "execution_count": null,
286 |       "outputs": []
287 |     },
288 |     {
289 |       "cell_type": "code",
290 |       "source": [
291 |         "!python3 -m torch.distributed.launch --nproc_per_node=1 src/gpt2_ft.py \\\n",
292 |         "    --train_data ./data/e2e/train.jsonl \\\n",
293 |         "    --valid_data ./data/e2e/valid.jsonl \\\n",
294 |         "    --train_batch_size 4 \\\n",
295 |         "    --grad_acc 1 \\\n",
296 |         "    --valid_batch_size 2 \\\n",
297 |         "    --seq_len 512 \\\n",
298 |         "    --model_card gpt2.md \\\n",
299 |         "    --init_checkpoint ./pretrained_checkpoints/gpt2-medium-pytorch_model.bin \\\n",
300 |         "    --platform local \\\n",
301 |         "    --clip 0.0 \\\n",
302 |         "    --lr 0.0002 \\\n",
303 |         "    --weight_decay 0.01 \\\n",
304 |         "    --correct_bias \\\n",
305 |         "    --adam_beta2 0.999 \\\n",
306 |         "    --scheduler linear \\\n",
307 |         "    --warmup_step 500 \\\n",
308 |         "    --max_epoch 5 \\\n",
309 |         "    --save_interval 1000 \\\n",
310 |         "    --lora_dim 4 \\\n",
311 |         "    --lora_alpha 32 \\\n",
312 |         "    --lora_dropout 0.1 \\\n",
313 |         "    --label_smooth 0.1 \\\n",
314 |         "    --work_dir ./trained_models/GPT2_M/e2e \\\n",
315 |         "    --random_seed 110"
316 |       ],
317 |       "metadata": {
318 |         "id": "wWn2H2somOZ5"
319 |       },
320 |       "execution_count": null,
321 |       "outputs": []
322 |     }
323 |   ]
324 | }


--------------------------------------------------------------------------------
/LoRA/lora_hello_world2.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "private_outputs": true,
  7 |       "provenance": [],
  8 |       "gpuType": "T4",
  9 |       "authorship_tag": "ABX9TyPfd2szk9I+NCou6SCoJGZw",
 10 |       "include_colab_link": true
 11 |     },
 12 |     "kernelspec": {
 13 |       "name": "python3",
 14 |       "display_name": "Python 3"
 15 |     },
 16 |     "language_info": {
 17 |       "name": "python"
 18 |     },
 19 |     "accelerator": "GPU",
 20 |     "gpuClass": "standard"
 21 |   },
 22 |   "cells": [
 23 |     {
 24 |       "cell_type": "markdown",
 25 |       "metadata": {
 26 |         "id": "view-in-github",
 27 |         "colab_type": "text"
 28 |       },
 29 |       "source": [
 30 |         "<a href=\"https://colab.research.google.com/github/wileyw/DeepLearningDemos/blob/master/LoRA/lora_hello_world2.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
 31 |       ]
 32 |     },
 33 |     {
 34 |       "cell_type": "code",
 35 |       "source": [
 36 |         "!pip3 uninstall torch -y"
 37 |       ],
 38 |       "metadata": {
 39 |         "id": "EzfLQmy-c9fY"
 40 |       },
 41 |       "execution_count": null,
 42 |       "outputs": []
 43 |     },
 44 |     {
 45 |       "cell_type": "code",
 46 |       "source": [
 47 |         "!pip install torch==1.11.0"
 48 |       ],
 49 |       "metadata": {
 50 |         "id": "4-IPSPXGcXsP"
 51 |       },
 52 |       "execution_count": null,
 53 |       "outputs": []
 54 |     },
 55 |     {
 56 |       "cell_type": "code",
 57 |       "source": [
 58 |         "import torch\n",
 59 |         "print(torch.__version__)"
 60 |       ],
 61 |       "metadata": {
 62 |         "id": "Et1rcuQMeDjf"
 63 |       },
 64 |       "execution_count": null,
 65 |       "outputs": []
 66 |     },
 67 |     {
 68 |       "cell_type": "code",
 69 |       "execution_count": null,
 70 |       "metadata": {
 71 |         "id": "FK-QGuS3gsMZ"
 72 |       },
 73 |       "outputs": [],
 74 |       "source": [
 75 |         "print('test')"
 76 |       ]
 77 |     },
 78 |     {
 79 |       "cell_type": "code",
 80 |       "source": [
 81 |         "!sudo apt-get update"
 82 |       ],
 83 |       "metadata": {
 84 |         "id": "61kVAU1MirsS"
 85 |       },
 86 |       "execution_count": null,
 87 |       "outputs": []
 88 |     },
 89 |     {
 90 |       "cell_type": "code",
 91 |       "source": [
 92 |         "!sudo apt-get -y install git jq virtualenv"
 93 |       ],
 94 |       "metadata": {
 95 |         "id": "DOUW4eOIizbG"
 96 |       },
 97 |       "execution_count": null,
 98 |       "outputs": []
 99 |     },
100 |     {
101 |       "cell_type": "code",
102 |       "source": [
103 |         "!git clone https://github.com/microsoft/LoRA.git; cd LoRA"
104 |       ],
105 |       "metadata": {
106 |         "id": "DEUOudE_i4Yv"
107 |       },
108 |       "execution_count": null,
109 |       "outputs": []
110 |     },
111 |     {
112 |       "cell_type": "code",
113 |       "source": [
114 |         "!ls"
115 |       ],
116 |       "metadata": {
117 |         "id": "bokzXL8ei8UP"
118 |       },
119 |       "execution_count": null,
120 |       "outputs": []
121 |     },
122 |     {
123 |       "cell_type": "code",
124 |       "source": [
125 |         "!ls LoRA/examples/NLG"
126 |       ],
127 |       "metadata": {
128 |         "id": "WPU5_vxWjCvp"
129 |       },
130 |       "execution_count": null,
131 |       "outputs": []
132 |     },
133 |     {
134 |       "cell_type": "code",
135 |       "source": [
136 |         "!python3 -m pip install --upgrade pip"
137 |       ],
138 |       "metadata": {
139 |         "id": "7C80M_rujyM5"
140 |       },
141 |       "execution_count": null,
142 |       "outputs": []
143 |     },
144 |     {
145 |       "cell_type": "code",
146 |       "source": [
147 |         "!python3 -m pip install -r LoRA/examples/NLG/requirement.txt"
148 |       ],
149 |       "metadata": {
150 |         "id": "x8oqUQjHjmj4"
151 |       },
152 |       "execution_count": null,
153 |       "outputs": []
154 |     },
155 |     {
156 |       "cell_type": "code",
157 |       "source": [
158 |         "!python3 -m pip install transformers"
159 |       ],
160 |       "metadata": {
161 |         "id": "lS2ZmMO3klGw"
162 |       },
163 |       "execution_count": null,
164 |       "outputs": []
165 |     },
166 |     {
167 |       "cell_type": "code",
168 |       "source": [
169 |         "!python3 -m pip install spacy tqdm tensorboard progress"
170 |       ],
171 |       "metadata": {
172 |         "id": "l2uD_wpFk4gP"
173 |       },
174 |       "execution_count": null,
175 |       "outputs": []
176 |     },
177 |     {
178 |       "cell_type": "code",
179 |       "source": [
180 |         "import torch\n",
181 |         "print(torch.__version__)"
182 |       ],
183 |       "metadata": {
184 |         "id": "Q7dAMI4lkR2u"
185 |       },
186 |       "execution_count": null,
187 |       "outputs": []
188 |     },
189 |     {
190 |       "cell_type": "code",
191 |       "source": [
192 |         "%cd LoRA/examples/NLG"
193 |       ],
194 |       "metadata": {
195 |         "id": "YbDu2w3FlC9l"
196 |       },
197 |       "execution_count": null,
198 |       "outputs": []
199 |     },
200 |     {
201 |       "cell_type": "code",
202 |       "source": [
203 |         "!ls"
204 |       ],
205 |       "metadata": {
206 |         "id": "QVEol-7IlI1A"
207 |       },
208 |       "execution_count": null,
209 |       "outputs": []
210 |     },
211 |     {
212 |       "cell_type": "code",
213 |       "source": [
214 |         "!bash download_pretrained_checkpoints.sh"
215 |       ],
216 |       "metadata": {
217 |         "id": "hTaJ7ZYzlMsf"
218 |       },
219 |       "execution_count": null,
220 |       "outputs": []
221 |     },
222 |     {
223 |       "cell_type": "code",
224 |       "source": [
225 |         "!bash create_datasets2.sh"
226 |       ],
227 |       "metadata": {
228 |         "id": "e2HL_HV-lQlb"
229 |       },
230 |       "execution_count": null,
231 |       "outputs": []
232 |     },
233 |     {
234 |       "cell_type": "code",
235 |       "source": [
236 |         "%cd ./eval"
237 |       ],
238 |       "metadata": {
239 |         "id": "JDyJhqaUlSsf"
240 |       },
241 |       "execution_count": null,
242 |       "outputs": []
243 |     },
244 |     {
245 |       "cell_type": "code",
246 |       "source": [
247 |         "!bash download_evalscript.sh"
248 |       ],
249 |       "metadata": {
250 |         "id": "K5Cw5xVRl6wM"
251 |       },
252 |       "execution_count": null,
253 |       "outputs": []
254 |     },
255 |     {
256 |       "cell_type": "code",
257 |       "source": [
258 |         "%cd .."
259 |       ],
260 |       "metadata": {
261 |         "id": "Smyk-DKyl8rP"
262 |       },
263 |       "execution_count": null,
264 |       "outputs": []
265 |     },
266 |     {
267 |       "cell_type": "code",
268 |       "source": [
269 |         "!ls"
270 |       ],
271 |       "metadata": {
272 |         "id": "n8rIjmXNmWj3"
273 |       },
274 |       "execution_count": null,
275 |       "outputs": []
276 |     },
277 |     {
278 |       "cell_type": "code",
279 |       "source": [
280 |         "!python3 -m pip install loralib"
281 |       ],
282 |       "metadata": {
283 |         "id": "gQX04dU0oGW1"
284 |       },
285 |       "execution_count": null,
286 |       "outputs": []
287 |     },
288 |     {
289 |       "cell_type": "code",
290 |       "source": [
291 |         "!ls -l data/e2e"
292 |       ],
293 |       "metadata": {
294 |         "id": "5BvLJ03yZx_M"
295 |       },
296 |       "execution_count": null,
297 |       "outputs": []
298 |     },
299 |     {
300 |       "cell_type": "code",
301 |       "source": [
302 |         "!python3 -m torch.distributed.launch --nproc_per_node=1 src/gpt2_ft.py \\\n",
303 |         "    --train_data ./data/e2e/train.jsonl \\\n",
304 |         "    --valid_data ./data/e2e/valid.jsonl \\\n",
305 |         "    --train_batch_size 4 \\\n",
306 |         "    --grad_acc 1 \\\n",
307 |         "    --valid_batch_size 2 \\\n",
308 |         "    --seq_len 512 \\\n",
309 |         "    --model_card gpt2.md \\\n",
310 |         "    --init_checkpoint ./pretrained_checkpoints/gpt2-medium-pytorch_model.bin \\\n",
311 |         "    --platform local \\\n",
312 |         "    --clip 0.0 \\\n",
313 |         "    --lr 0.0002 \\\n",
314 |         "    --weight_decay 0.01 \\\n",
315 |         "    --correct_bias \\\n",
316 |         "    --adam_beta2 0.999 \\\n",
317 |         "    --scheduler linear \\\n",
318 |         "    --warmup_step 500 \\\n",
319 |         "    --max_epoch 5 \\\n",
320 |         "    --save_interval 1000 \\\n",
321 |         "    --lora_dim 4 \\\n",
322 |         "    --lora_alpha 32 \\\n",
323 |         "    --lora_dropout 0.1 \\\n",
324 |         "    --label_smooth 0.1 \\\n",
325 |         "    --work_dir ./trained_models/GPT2_M/e2e \\\n",
326 |         "    --random_seed 110"
327 |       ],
328 |       "metadata": {
329 |         "id": "wWn2H2somOZ5"
330 |       },
331 |       "execution_count": null,
332 |       "outputs": []
333 |     }
334 |   ]
335 | }


--------------------------------------------------------------------------------
/MachineTranslation/README.md:
--------------------------------------------------------------------------------
 1 | # Machine Translation
 2 | 
 3 | ```
 4 | # Download the dataset
 5 | wget https://www.manythings.org/anki/fra-eng.zip
 6 | 
 7 | unzip fra-eng.zip
 8 | ```
 9 | 
10 | ## Tutorials
11 | - https://pytorch.org/tutorials/intermediate/seq2seq_translation_tutorial.html#
12 |     - [Link to Colab Notebook](https://colab.research.google.com/github/pytorch/tutorials/blob/gh-pages/_downloads/a60617788061539b5449701ae76aee56/seq2seq_translation_tutorial.ipynb)
13 |     - [TorchText and nn.Transformer](https://pytorch.org/tutorials/beginner/transformer_tutorial.html)
14 | - https://towardsdatascience.com/neural-machine-translation-15ecf6b0b
15 | - https://medium.com/analytics-vidhya/a-must-read-nlp-tutorial-on-neural-machine-translation-the-technique-powering-google-translate-c5c8d97d7587
16 | 
17 | ## Datasets
18 | - [Downloads](https://tatoeba.org/eng/downloads)
19 | - [Splitting language pairs into individual text files](https://www.manythings.org/anki/)
20 | 


--------------------------------------------------------------------------------
/MachineTranslation/ReferenceExample.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "name": "ReferenceExample.ipynb",
  7 |       "provenance": [],
  8 |       "authorship_tag": "ABX9TyNfR2AxE7LGpIZL/UWgBjMn",
  9 |       "include_colab_link": true
 10 |     },
 11 |     "kernelspec": {
 12 |       "name": "python3",
 13 |       "display_name": "Python 3"
 14 |     },
 15 |     "accelerator": "GPU"
 16 |   },
 17 |   "cells": [
 18 |     {
 19 |       "cell_type": "markdown",
 20 |       "metadata": {
 21 |         "id": "view-in-github",
 22 |         "colab_type": "text"
 23 |       },
 24 |       "source": [
 25 |         "<a href=\"https://colab.research.google.com/github/wileyw/DeepLearningDemos/blob/master/MachineTranslation/ReferenceExample.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
 26 |       ]
 27 |     },
 28 |     {
 29 |       "cell_type": "code",
 30 |       "metadata": {
 31 |         "id": "CsFfALRqWl-r"
 32 |       },
 33 |       "source": [
 34 |         "# https://github.com/andrewpeng02/transformer-translation\n",
 35 |         "!git clone https://github.com/andrewpeng02/transformer-translation.git"
 36 |       ],
 37 |       "execution_count": null,
 38 |       "outputs": []
 39 |     },
 40 |     {
 41 |       "cell_type": "code",
 42 |       "metadata": {
 43 |         "id": "15vsyh8nXIyo"
 44 |       },
 45 |       "source": [
 46 |         "!python3 -m pip install --upgrade pip\n",
 47 |         "!python3 -m pip install click==7.0\n",
 48 |         "!python3 -m pip install dill==0.3.1.1 --use-feature=2020-resolver\n",
 49 |         "!python3 -m pip install einops==0.1.0\n",
 50 |         "!python3 -m pip install en-core-web-sm==2.1.0\n",
 51 |         "!python3 -m pip install fr-core-news-sm==2.1.0\n",
 52 |         "!python3 -m pip install joblib==0.13.2\n",
 53 |         "!python3 -m pip install torchtext==0.4.0"
 54 |       ],
 55 |       "execution_count": null,
 56 |       "outputs": []
 57 |     },
 58 |     {
 59 |       "cell_type": "code",
 60 |       "metadata": {
 61 |         "id": "R4pZuNrQX_Gq"
 62 |       },
 63 |       "source": [
 64 |         "!ls transformer-translation/data\n",
 65 |         "%cd transformer-translation/data\n",
 66 |         "!wget http://www.manythings.org/anki/fra-eng.zip\n",
 67 |         "!unzip fra-eng.zip\n",
 68 |         "%cd ../../\n",
 69 |         "!ls\n"
 70 |       ],
 71 |       "execution_count": null,
 72 |       "outputs": []
 73 |     },
 74 |     {
 75 |       "cell_type": "code",
 76 |       "metadata": {
 77 |         "id": "0kL2dvTwZxW1"
 78 |       },
 79 |       "source": [
 80 |         "cd transformer-translation"
 81 |       ],
 82 |       "execution_count": null,
 83 |       "outputs": []
 84 |     },
 85 |     {
 86 |       "cell_type": "code",
 87 |       "metadata": {
 88 |         "id": "bS3Kkzrlbmzr"
 89 |       },
 90 |       "source": [
 91 |         "!python3 -m spacy download en\n",
 92 |         "!python3 -m spacy download fr"
 93 |       ],
 94 |       "execution_count": null,
 95 |       "outputs": []
 96 |     },
 97 |     {
 98 |       "cell_type": "code",
 99 |       "metadata": {
100 |         "id": "rAyZTlUKYuun"
101 |       },
102 |       "source": [
103 |         "!python3 process-tatoeba-data.py\n",
104 |         "!python3 preprocess-data.py"
105 |       ],
106 |       "execution_count": null,
107 |       "outputs": []
108 |     },
109 |     {
110 |       "cell_type": "code",
111 |       "metadata": {
112 |         "id": "DH3jMH4pZM1d"
113 |       },
114 |       "source": [
115 |         "!ls\n",
116 |         "!echo -----------------\n",
117 |         "!ls data/processed\n",
118 |         "!echo -----------------\n",
119 |         "!ls -l data/processed/fr"
120 |       ],
121 |       "execution_count": null,
122 |       "outputs": []
123 |     },
124 |     {
125 |       "cell_type": "code",
126 |       "metadata": {
127 |         "id": "wMtzQQUQb6Bw"
128 |       },
129 |       "source": [
130 |         "!python3 train.py"
131 |       ],
132 |       "execution_count": null,
133 |       "outputs": []
134 |     },
135 |     {
136 |       "cell_type": "code",
137 |       "metadata": {
138 |         "id": "xse_C7t_prc3"
139 |       },
140 |       "source": [
141 |         "!python3 translate-sentence.py"
142 |       ],
143 |       "execution_count": null,
144 |       "outputs": []
145 |     }
146 |   ]
147 | }


--------------------------------------------------------------------------------
/NeuralArchitectureSearch/Autokeras.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "name": "Autokeras.ipynb",
  7 |       "provenance": [],
  8 |       "authorship_tag": "ABX9TyMXe9x8u7a1Wy8NJkVoxkF6",
  9 |       "include_colab_link": true
 10 |     },
 11 |     "kernelspec": {
 12 |       "name": "python3",
 13 |       "display_name": "Python 3"
 14 |     },
 15 |     "accelerator": "GPU"
 16 |   },
 17 |   "cells": [
 18 |     {
 19 |       "cell_type": "markdown",
 20 |       "metadata": {
 21 |         "id": "view-in-github",
 22 |         "colab_type": "text"
 23 |       },
 24 |       "source": [
 25 |         "<a href=\"https://colab.research.google.com/github/wileyw/DeepLearningDemos/blob/master/NeuralArchitectureSearch/Autokeras.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
 26 |       ]
 27 |     },
 28 |     {
 29 |       "cell_type": "markdown",
 30 |       "metadata": {
 31 |         "id": "tKCAuX00SO_k",
 32 |         "colab_type": "text"
 33 |       },
 34 |       "source": [
 35 |         "# AutoKeras\n",
 36 |         "\n",
 37 |         "[Paper](https://arxiv.org/pdf/1806.10282.pdf)"
 38 |       ]
 39 |     },
 40 |     {
 41 |       "cell_type": "markdown",
 42 |       "metadata": {
 43 |         "id": "0fQ17-24Vjbb",
 44 |         "colab_type": "text"
 45 |       },
 46 |       "source": [
 47 |         "In this example, we get an accuracy of 91.13% with only 3 trials and 3 epochs. The above minimal code AutoKeras example shows how simple and easy AutoKeras is to use.\n"
 48 |       ]
 49 |     },
 50 |     {
 51 |       "cell_type": "markdown",
 52 |       "metadata": {
 53 |         "id": "JeGbfKc8SAjv",
 54 |         "colab_type": "text"
 55 |       },
 56 |       "source": [
 57 |         "# Questions\n",
 58 |         "## 1. What is Edit-Distance in a Neural Network?\n",
 59 |         "Edit-distance is the number of operations needed to morph one architecture into another architecture."
 60 |       ]
 61 |     },
 62 |     {
 63 |       "cell_type": "code",
 64 |       "metadata": {
 65 |         "id": "b4q_-v76Lht7",
 66 |         "colab_type": "code",
 67 |         "colab": {}
 68 |       },
 69 |       "source": [
 70 |         "!python3 -m pip install autokeras\n",
 71 |         "!pip install tensorflow-gpu==2.1.0"
 72 |       ],
 73 |       "execution_count": 0,
 74 |       "outputs": []
 75 |     },
 76 |     {
 77 |       "cell_type": "code",
 78 |       "metadata": {
 79 |         "id": "s9icubivMy5Y",
 80 |         "colab_type": "code",
 81 |         "colab": {}
 82 |       },
 83 |       "source": [
 84 |         "%tensorflow_version 2.x"
 85 |       ],
 86 |       "execution_count": 0,
 87 |       "outputs": []
 88 |     },
 89 |     {
 90 |       "cell_type": "code",
 91 |       "metadata": {
 92 |         "id": "iDaqLMGGMj_y",
 93 |         "colab_type": "code",
 94 |         "colab": {}
 95 |       },
 96 |       "source": [
 97 |         "import tensorflow as tf\n",
 98 |         "print(tf.__version__)"
 99 |       ],
100 |       "execution_count": 0,
101 |       "outputs": []
102 |     },
103 |     {
104 |       "cell_type": "code",
105 |       "metadata": {
106 |         "id": "K1bwRM-ELrD1",
107 |         "colab_type": "code",
108 |         "colab": {}
109 |       },
110 |       "source": [
111 |         "#from tensorflow.keras.datasets import mnist\n",
112 |         "from tensorflow.keras.datasets import fashion_mnist as mnist\n",
113 |         "(x_train, y_train), (x_test, y_test) = mnist.load_data()\n",
114 |         "print(x_train.shape) # (60000, 28, 28)\n",
115 |         "print(y_train.shape) # (60000,)\n",
116 |         "print(y_train[:3]) # array([7, 2, 1], dtype=uint8)"
117 |       ],
118 |       "execution_count": 0,
119 |       "outputs": []
120 |     },
121 |     {
122 |       "cell_type": "code",
123 |       "metadata": {
124 |         "id": "qheYaZkjL8kv",
125 |         "colab_type": "code",
126 |         "colab": {}
127 |       },
128 |       "source": [
129 |         "import autokeras as ak\n",
130 |         "\n",
131 |         "# Initialize the image classifier.\n",
132 |         "clf = ak.ImageClassifier(max_trials=3) # It tries 3 different models.\n",
133 |         "# Feed the image classifier with training data.\n",
134 |         "clf.fit(x_train, y_train,epochs=3)"
135 |       ],
136 |       "execution_count": 0,
137 |       "outputs": []
138 |     },
139 |     {
140 |       "cell_type": "code",
141 |       "metadata": {
142 |         "id": "h919aZ_7L-yX",
143 |         "colab_type": "code",
144 |         "colab": {}
145 |       },
146 |       "source": [
147 |         "# Predict with the best model.\n",
148 |         "predicted_y = clf.predict(x_test)\n",
149 |         "print(predicted_y)"
150 |       ],
151 |       "execution_count": 0,
152 |       "outputs": []
153 |     },
154 |     {
155 |       "cell_type": "code",
156 |       "metadata": {
157 |         "id": "djrTIS6kMAk-",
158 |         "colab_type": "code",
159 |         "colab": {}
160 |       },
161 |       "source": [
162 |         "# Evaluate the best model with testing data.\n",
163 |         "print(clf.evaluate(x_test, y_test))\n",
164 |         "model = clf.export_model()\n",
165 |         "print(model.summary())"
166 |       ],
167 |       "execution_count": 0,
168 |       "outputs": []
169 |     },
170 |     {
171 |       "cell_type": "markdown",
172 |       "metadata": {
173 |         "id": "R7fcbu9WT_7Z",
174 |         "colab_type": "text"
175 |       },
176 |       "source": [
177 |         "# Resources\n",
178 |         "- [Autokeras paper](https://arxiv.org/pdf/1806.10282.pdf)\n",
179 |         "- [Autokeras website](https://autokeras.com/)\n",
180 |         "- [Custom Autokeras Model](https://autokeras.com/tutorial/customized/)"
181 |       ]
182 |     }
183 |   ]
184 | }


--------------------------------------------------------------------------------
/NeuralArchitectureSearch/NeuralArchitectureSearch.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "name": "NeuralArchitectureSearch",
  7 |       "provenance": [],
  8 |       "authorship_tag": "ABX9TyP1Xhoo+Gdh4xH/x0QaPkqg",
  9 |       "include_colab_link": true
 10 |     },
 11 |     "kernelspec": {
 12 |       "name": "python3",
 13 |       "display_name": "Python 3"
 14 |     }
 15 |   },
 16 |   "cells": [
 17 |     {
 18 |       "cell_type": "markdown",
 19 |       "metadata": {
 20 |         "id": "view-in-github",
 21 |         "colab_type": "text"
 22 |       },
 23 |       "source": [
 24 |         "<a href=\"https://colab.research.google.com/github/wileyw/DeepLearningDemos/blob/master/NeuralArchitectureSearch/NeuralArchitectureSearch.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
 25 |       ]
 26 |     },
 27 |     {
 28 |       "cell_type": "markdown",
 29 |       "metadata": {
 30 |         "id": "8k2XRoS1zFOk",
 31 |         "colab_type": "text"
 32 |       },
 33 |       "source": [
 34 |         "# Neural Architecture Search\n",
 35 |         "\n",
 36 |         "## EfficientNet\n",
 37 |         "\n",
 38 |         "### What's the difference between MBConv1 versus MBConv6?\n",
 39 |         "- MBConv1 expands the number of channels by a factor of 1.\n",
 40 |         "- MBConv6 expands the number of channels by a factor of 6.\n",
 41 |         "\n",
 42 |         "[MBConv6](https://machinethink.net/blog/mobile-architectures/)\n",
 43 |         "\n",
 44 |         "## Do you need to do your own Neural Architecture Search?\n",
 45 |         "- No, you can use transfer learning to apply an existing neural net architecture and retrain the network on your own dataset\n",
 46 |         "- [AutoML and Neural Architecture Search](https://towardsdatascience.com/everything-you-need-to-know-about-automl-and-neural-architecture-search-8db1863682bf)\n",
 47 |         "\n",
 48 |         "\n",
 49 |         "## Goals\n",
 50 |         "1. Understand EfficientNet\n",
 51 |         "2. Understand why EfficientNet is better than Mobilenet V3, could be a blog post\n",
 52 |         "3. PyTorch EfficientDet\n",
 53 |         "4. Run EfficientNet interence\n",
 54 |         "5. Does EfficientNet produce a different network depending on the dataset?\n",
 55 |         "6. How does MNas work?"
 56 |       ]
 57 |     },
 58 |     {
 59 |       "cell_type": "markdown",
 60 |       "metadata": {
 61 |         "id": "ZCTfU5hmxFiW",
 62 |         "colab_type": "text"
 63 |       },
 64 |       "source": [
 65 |         "# Talking Points\n",
 66 |         "## 1. What are Inverted Residual Blocks?\n",
 67 |         "- 1x1 Convolution Expands the Filter Dimensions --> 3x3 Depthwise Convolution --> 1x1 Separable Convolution Reduces the number of Filter Dimensions\n",
 68 |         "- NOTE: The Blocks used for Skip Connections do not have non-linearities (e.g. activation functions)\n",
 69 |         "\n",
 70 |         "# How to build EfficientNet\n",
 71 |         "## Step 1.\n",
 72 |         "Find the baseline EfficientNet network using Neural Architecture Search\n",
 73 |         "\n",
 74 |         "## Step 2.\n",
 75 |         "Scale up the baseline network using grid search"
 76 |       ]
 77 |     },
 78 |     {
 79 |       "cell_type": "markdown",
 80 |       "metadata": {
 81 |         "id": "PMA8zOk-5kPD",
 82 |         "colab_type": "text"
 83 |       },
 84 |       "source": [
 85 |         "# Neural Architecture Search\n",
 86 |         "## Tunable Parameters\n",
 87 |         "- Depth, Width (Channels), Resolutions"
 88 |       ]
 89 |     },
 90 |     {
 91 |       "cell_type": "code",
 92 |       "metadata": {
 93 |         "id": "ewrzyHGT3i2x",
 94 |         "colab_type": "code",
 95 |         "colab": {}
 96 |       },
 97 |       "source": [
 98 |         "!python3 -m pip install efficientnet_pytorch\n",
 99 |         "!python3 -m pip install torchsummary"
100 |       ],
101 |       "execution_count": 0,
102 |       "outputs": []
103 |     },
104 |     {
105 |       "cell_type": "code",
106 |       "metadata": {
107 |         "id": "9FIc6zqIEQzY",
108 |         "colab_type": "code",
109 |         "colab": {}
110 |       },
111 |       "source": [
112 |         "!git clone https://github.com/lukemelas/EfficientNet-PyTorch"
113 |       ],
114 |       "execution_count": 0,
115 |       "outputs": []
116 |     },
117 |     {
118 |       "cell_type": "code",
119 |       "metadata": {
120 |         "id": "BicJzIuGESzR",
121 |         "colab_type": "code",
122 |         "colab": {}
123 |       },
124 |       "source": [
125 |         "!ls EfficientNet-PyTorch/examples\n",
126 |         "!ls EfficientNet-PyTorch/examples/simple/\n",
127 |         "!cp EfficientNet-PyTorch/examples/simple/img.jpg .\n",
128 |         "!cp EfficientNet-PyTorch/examples/simple/labels_map.txt ."
129 |       ],
130 |       "execution_count": 0,
131 |       "outputs": []
132 |     },
133 |     {
134 |       "cell_type": "code",
135 |       "metadata": {
136 |         "id": "g_0rOBICy6zP",
137 |         "colab_type": "code",
138 |         "colab": {}
139 |       },
140 |       "source": [
141 |         "import torch\n",
142 |         "import torchsummary\n",
143 |         "from efficientnet_pytorch import EfficientNet\n",
144 |         "\n",
145 |         "model = EfficientNet.from_name('efficientnet-b0')\n",
146 |         "\n",
147 |         "print(model)\n",
148 |         "torchsummary.summary(model, input_size=(3, 224, 224))"
149 |       ],
150 |       "execution_count": 0,
151 |       "outputs": []
152 |     },
153 |     {
154 |       "cell_type": "code",
155 |       "metadata": {
156 |         "id": "pbSWEHqzEFrn",
157 |         "colab_type": "code",
158 |         "colab": {}
159 |       },
160 |       "source": [
161 |         "import json\n",
162 |         "from PIL import Image\n",
163 |         "import torch\n",
164 |         "from torchvision import transforms\n",
165 |         "\n",
166 |         "from efficientnet_pytorch import EfficientNet\n",
167 |         "model = EfficientNet.from_pretrained('efficientnet-b0')\n",
168 |         "\n",
169 |         "# Preprocess image\n",
170 |         "tfms = transforms.Compose([transforms.Resize(224), transforms.ToTensor(),\n",
171 |         "    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),])\n",
172 |         "img = tfms(Image.open('img.jpg')).unsqueeze(0)\n",
173 |         "print(img.shape) # torch.Size([1, 3, 224, 224])\n",
174 |         "\n",
175 |         "# Load ImageNet class names\n",
176 |         "labels_map = json.load(open('labels_map.txt'))\n",
177 |         "labels_map = [labels_map[str(i)] for i in range(1000)]\n",
178 |         "\n",
179 |         "# Classify\n",
180 |         "model.eval()\n",
181 |         "with torch.no_grad():\n",
182 |         "    outputs = model(img)\n",
183 |         "\n",
184 |         "# Print predictions\n",
185 |         "print('-----')\n",
186 |         "for idx in torch.topk(outputs, k=5).indices.squeeze(0).tolist():\n",
187 |         "    prob = torch.softmax(outputs, dim=1)[0, idx].item()\n",
188 |         "    print('{label:<75} ({p:.2f}%)'.format(label=labels_map[idx], p=prob*100))"
189 |       ],
190 |       "execution_count": 0,
191 |       "outputs": []
192 |     },
193 |     {
194 |       "cell_type": "markdown",
195 |       "metadata": {
196 |         "id": "7WzcGARdzZjW",
197 |         "colab_type": "text"
198 |       },
199 |       "source": [
200 |         "# Resources\n",
201 |         "\n",
202 |         "* [EfficientNet Paper](https://arxiv.org/pdf/1905.11946.pdf)\n",
203 |         "* [EfficientDet Paper](https://arxiv.org/pdf/1911.09070.pdf)\n",
204 |         "* [Learning OpenCV EfficientNet](https://www.learnopencv.com/efficientnet-theory-code/)\n",
205 |         "* [Tensorflow EfficientNet Implementation](https://github.com/tensorflow/tpu/blob/master/models/official/efficientnet/efficientnet_model.py)\n",
206 |         "* [PyTorch Implementation](https://github.com/lukemelas/EfficientNet-PyTorch)\n",
207 |         "* [MnasFPN : Learning Latency-aware Pyramid Architecture\n",
208 |         "for Object Detection on Mobile Devices](https://arxiv.org/pdf/1912.01106.pdf)\n",
209 |         "* [Yolo V4](https://arxiv.org/pdf/2004.10934.pdf)\n",
210 |         "* [EffResNetComparison](https://colab.research.google.com/github/rwightman/pytorch-image-models/blob/master/notebooks/EffResNetComparison.ipynb#scrollTo=SKA-MF-yShDW)\n"
211 |       ]
212 |     }
213 |   ]
214 | }


--------------------------------------------------------------------------------
/ProphetCode/main.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Quick Start
 3 | https://facebook.github.io/prophet/docs/quick_start.html#python-api
 4 | """
 5 | 
 6 | import pandas as pd
 7 | from prophet import Prophet
 8 | 
 9 | def main():
10 |     print('main')
11 |     # Python
12 |     df = pd.read_csv('https://raw.githubusercontent.com/facebook/prophet/main/examples/example_wp_log_peyton_manning.csv')
13 |     print(df.columns)
14 |     if True:
15 |         df = pd.read_csv('archive/GlobalLandTemperaturesByMajorCity.csv')
16 |         df = df.rename(columns={"dt": "ds", "AverageTemperature": "y"})
17 |         df = df[df['City'] == "New York"]
18 |         #df = df[df['City'] == "Sydney"]
19 |         #df = df[df['City'] == "Cape Town"]
20 |         #df = df[df['City'] == "New Delhi"]
21 |         df = df[df.y.notnull()]
22 |         print(df.columns)
23 |         print(df.head())
24 | 
25 |     # Python
26 |     m = Prophet()
27 |     m.fit(df)
28 | 
29 |     future = m.make_future_dataframe(periods=365)
30 |     print(future.tail())
31 | 
32 |     forecast = m.predict(future)
33 |     print(forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail())
34 | 
35 |     fig1 = m.plot(forecast)
36 |     fig1.savefig('test.png')
37 | 
38 |     fig2 = m.plot_components(forecast)
39 |     fig2.savefig('test2.png')
40 | 
41 | 
42 | if __name__ == '__main__':
43 |     main()
44 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # DeepLearningDemos
 2 | We're a team of Machine Learning Engineers that blog about advances in Deep Learning.
 3 | 
 4 | # 1. Blog Posts
 5 | 1. [Vision Transformers for Computer Vision](https://deepganteam.medium.com/vision-transformers-for-computer-vision-9f70418fe41a) (2021-09-07)
 6 | 1. [Few Shot Learning from Scratch](https://deepganteam.medium.com/few-shot-learning-from-scratch-a3422b111e05) (2021-07-06)
 7 | 1. [Basic Policy Gradients with the Reparameterization Trick](https://deepganteam.medium.com/basic-policy-gradients-with-the-reparameterization-trick-24312c7dbcd) (2021-04-13)
 8 | 1. [A Little Rusty? ML Refresher on Linear Regression](https://deepganteam.medium.com/a-little-rusty-ml-refresher-on-linear-regression-76ef4afc6474) (2021-02-19)
 9 | 1. [Language Translation with Transformers in PyTorch](https://chatbotslife.com/language-translation-with-transformers-in-pytorch-ff8b32cf848?gi=df7018b86372) (2021-01-22)
10 | 1. [What are Transformers?](https://medium.com/@deepganteam/what-are-transformers-b687f2bcdf49) (2020-09-02)
11 | 1. [Searching for Better Neural Architecture Search](https://medium.com/@deepganteam/searching-for-better-neural-architecture-search-ea91338caa11) (2020-06-17)
12 | 1. [Making SinGAN Double](https://medium.com/@deepganteam/making-singan-double-8568490b572e) (2020-04-15)
13 | 
14 | # 2. Notebooks
15 | 1. [AutoKeras Notebook](https://github.com/wileyw/DeepLearningDemos/blob/master/NeuralArchitectureSearch/Autokeras.ipynb) (2020-06-11)
16 | 
17 | # 3. Extra
18 | ## Project Ideas
19 | 1. https://www.cs.toronto.edu/~graves/handwriting.html
20 | 
21 | ## Specific Interesting Architectures
22 | Note:
23 | 1. Hour glass
24 | 1. U-net
25 | 1. Dense-net/Resnet
26 | 1. SqueezeNet
27 | 
28 | 


--------------------------------------------------------------------------------
/RL_from_human_feedback/RL_from_human_feedback.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "provenance": [],
  7 |       "authorship_tag": "ABX9TyNv29WPr2hSjlJeI/CBi3nZ",
  8 |       "include_colab_link": true
  9 |     },
 10 |     "kernelspec": {
 11 |       "name": "python3",
 12 |       "display_name": "Python 3"
 13 |     },
 14 |     "language_info": {
 15 |       "name": "python"
 16 |     },
 17 |     "accelerator": "GPU",
 18 |     "gpuClass": "standard"
 19 |   },
 20 |   "cells": [
 21 |     {
 22 |       "cell_type": "markdown",
 23 |       "metadata": {
 24 |         "id": "view-in-github",
 25 |         "colab_type": "text"
 26 |       },
 27 |       "source": [
 28 |         "<a href=\"https://colab.research.google.com/github/wileyw/DeepLearningDemos/blob/master/RL_from_human_feedback/RL_from_human_feedback.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
 29 |       ]
 30 |     },
 31 |     {
 32 |       "cell_type": "code",
 33 |       "source": [
 34 |         "!git clone https://github.com/tatsu-lab/stanford_alpaca.git"
 35 |       ],
 36 |       "metadata": {
 37 |         "id": "Mcfmw95BVnkk"
 38 |       },
 39 |       "execution_count": null,
 40 |       "outputs": []
 41 |     },
 42 |     {
 43 |       "cell_type": "code",
 44 |       "source": [
 45 |         "%%python3 -m pip install -r stanford_alpaca/requirements.txt"
 46 |       ],
 47 |       "metadata": {
 48 |         "id": "6VQWwYyBWGQP"
 49 |       },
 50 |       "execution_count": null,
 51 |       "outputs": []
 52 |     },
 53 |     {
 54 |       "cell_type": "code",
 55 |       "source": [
 56 |         "!git clone https://github.com/huggingface/transformers.git"
 57 |       ],
 58 |       "metadata": {
 59 |         "id": "ghX-6yKfWoGe"
 60 |       },
 61 |       "execution_count": null,
 62 |       "outputs": []
 63 |     },
 64 |     {
 65 |       "cell_type": "code",
 66 |       "source": [
 67 |         "%cd transformers"
 68 |       ],
 69 |       "metadata": {
 70 |         "id": "E8iEJocrWtyz"
 71 |       },
 72 |       "execution_count": null,
 73 |       "outputs": []
 74 |     },
 75 |     {
 76 |       "cell_type": "code",
 77 |       "execution_count": null,
 78 |       "metadata": {
 79 |         "id": "I9OAmFjMQNYF"
 80 |       },
 81 |       "outputs": [],
 82 |       "source": [
 83 |         "import torch\n",
 84 |         "\n",
 85 |         "import sys\n",
 86 |         "\n",
 87 |         "sys.version"
 88 |       ]
 89 |     },
 90 |     {
 91 |       "cell_type": "code",
 92 |       "source": [
 93 |         "%cd .."
 94 |       ],
 95 |       "metadata": {
 96 |         "id": "0G3yWBv_XgrN"
 97 |       },
 98 |       "execution_count": null,
 99 |       "outputs": []
100 |     },
101 |     {
102 |       "cell_type": "code",
103 |       "source": [
104 |         "%ls stanford_alpaca/\n"
105 |       ],
106 |       "metadata": {
107 |         "id": "41QrI6gNQQLS"
108 |       },
109 |       "execution_count": null,
110 |       "outputs": []
111 |     },
112 |     {
113 |       "cell_type": "code",
114 |       "source": [
115 |         "%cd transformers"
116 |       ],
117 |       "metadata": {
118 |         "id": "DIjJ0b8WYy5J"
119 |       },
120 |       "execution_count": null,
121 |       "outputs": []
122 |     },
123 |     {
124 |       "cell_type": "code",
125 |       "source": [
126 |         "!python src/transformers/models/llama/convert_llama_weights_to_hf.py \\\n",
127 |         "    --input_dir /path/to/downloaded/llama/weights \\\n",
128 |         "    --model_size 7B \\\n",
129 |         "    --output_dir /output/path"
130 |       ],
131 |       "metadata": {
132 |         "id": "Y4hVfUwoZDJF"
133 |       },
134 |       "execution_count": null,
135 |       "outputs": []
136 |     },
137 |     {
138 |       "cell_type": "code",
139 |       "source": [
140 |         "%cd stanford_alpaca"
141 |       ],
142 |       "metadata": {
143 |         "id": "HXpP37VVXmd-"
144 |       },
145 |       "execution_count": null,
146 |       "outputs": []
147 |     },
148 |     {
149 |       "cell_type": "code",
150 |       "source": [
151 |         "mkdir output"
152 |       ],
153 |       "metadata": {
154 |         "id": "Rph7DmASX4wU"
155 |       },
156 |       "execution_count": null,
157 |       "outputs": []
158 |     },
159 |     {
160 |       "cell_type": "code",
161 |       "source": [
162 |         "!torchrun --nproc_per_node=1 --master_port=<your_random_port> train.py \\\n",
163 |         "    --model_name_or_path <your_path_to_hf_converted_llama_ckpt_and_tokenizer> \\\n",
164 |         "    --data_path ./alpaca_data.json \\\n",
165 |         "    --bf16 True \\\n",
166 |         "    --output_dir /content/stanford_alpaca \\\n",
167 |         "    --num_train_epochs 3 \\\n",
168 |         "    --per_device_train_batch_size 4 \\\n",
169 |         "    --per_device_eval_batch_size 4 \\\n",
170 |         "    --gradient_accumulation_steps 8 \\\n",
171 |         "    --evaluation_strategy \"no\" \\\n",
172 |         "    --save_strategy \"steps\" \\\n",
173 |         "    --save_steps 2000 \\\n",
174 |         "    --save_total_limit 1 \\\n",
175 |         "    --learning_rate 2e-5 \\\n",
176 |         "    --weight_decay 0. \\\n",
177 |         "    --warmup_ratio 0.03 \\\n",
178 |         "    --lr_scheduler_type \"cosine\" \\\n",
179 |         "    --logging_steps 1 \\\n",
180 |         "    --fsdp \"full_shard auto_wrap\" \\\n",
181 |         "    --fsdp_transformer_layer_cls_to_wrap 'LLaMADecoderLayer' \\\n",
182 |         "    --tf32 True"
183 |       ],
184 |       "metadata": {
185 |         "id": "U5NIKbFvXpPW"
186 |       },
187 |       "execution_count": null,
188 |       "outputs": []
189 |     }
190 |   ]
191 | }


--------------------------------------------------------------------------------
/ReinforcmentLearning/simple_example.py:
--------------------------------------------------------------------------------
 1 | import gym
 2 | env = gym.make("MsPacman-ram-v0")
 3 | observation = env.reset()
 4 | import time
 5 | for _ in range(1000):
 6 |   env.render()
 7 |   action = env.action_space.sample() # your agent here (this takes random actions)
 8 |   observation, reward, done, info = env.step(action)
 9 | 
10 |   if done:
11 |     observation = env.reset()
12 | env.close()
13 | 


--------------------------------------------------------------------------------
/SinGAN/SinGANOfficialImplementation.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "name": "SinGANOfficialImplementation.ipynb",
  7 |       "provenance": [],
  8 |       "private_outputs": true,
  9 |       "authorship_tag": "ABX9TyPERozuok0RMoHj/JkJzqSS",
 10 |       "include_colab_link": true
 11 |     },
 12 |     "kernelspec": {
 13 |       "name": "python3",
 14 |       "display_name": "Python 3"
 15 |     },
 16 |     "accelerator": "GPU"
 17 |   },
 18 |   "cells": [
 19 |     {
 20 |       "cell_type": "markdown",
 21 |       "metadata": {
 22 |         "id": "view-in-github",
 23 |         "colab_type": "text"
 24 |       },
 25 |       "source": [
 26 |         "<a href=\"https://colab.research.google.com/github/wileyw/DeepLearningDemos/blob/master/SinGAN/SinGANOfficialImplementation.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
 27 |       ]
 28 |     },
 29 |     {
 30 |       "cell_type": "markdown",
 31 |       "metadata": {
 32 |         "id": "_teh4A4skGRT",
 33 |         "colab_type": "text"
 34 |       },
 35 |       "source": [
 36 |         "# SinGAN\n",
 37 |         "\n",
 38 |         "[Official SinGAN Repository](https://github.com/tamarott/SinGAN)\n",
 39 |         "\n",
 40 |         "In this notebook, we will implement and create a SinGAN homework assignment for other's to learn how to implement SinGAN as well."
 41 |       ]
 42 |     },
 43 |     {
 44 |       "cell_type": "code",
 45 |       "metadata": {
 46 |         "id": "TF-QL9B0z7sl",
 47 |         "colab_type": "code",
 48 |         "colab": {}
 49 |       },
 50 |       "source": [
 51 |         "!git clone https://github.com/tamarott/SinGAN.git"
 52 |       ],
 53 |       "execution_count": 0,
 54 |       "outputs": []
 55 |     },
 56 |     {
 57 |       "cell_type": "code",
 58 |       "metadata": {
 59 |         "id": "ul_FoSC40EjR",
 60 |         "colab_type": "code",
 61 |         "colab": {}
 62 |       },
 63 |       "source": [
 64 |         "%cd /content/SinGAN\n",
 65 |         "!ls\n",
 66 |         "!pwd\n",
 67 |         "!python3 main_train.py --input_name birds.png"
 68 |       ],
 69 |       "execution_count": 0,
 70 |       "outputs": []
 71 |     },
 72 |     {
 73 |       "cell_type": "code",
 74 |       "metadata": {
 75 |         "id": "ktEI7QEk1Wmf",
 76 |         "colab_type": "code",
 77 |         "colab": {}
 78 |       },
 79 |       "source": [
 80 |         "!python3 random_samples.py --input_name birds.png --mode random_samples_arbitrary_sizes --scale_h 1 --scale_v 1"
 81 |       ],
 82 |       "execution_count": 0,
 83 |       "outputs": []
 84 |     },
 85 |     {
 86 |       "cell_type": "code",
 87 |       "metadata": {
 88 |         "id": "VBErCpu1LKd8",
 89 |         "colab_type": "code",
 90 |         "colab": {}
 91 |       },
 92 |       "source": [
 93 |         "!ls\n",
 94 |         "!ls -l Output/RandomSamples/birds\n",
 95 |         "!ls -l Output/RandomSamples/birds/gen_start_scale=0"
 96 |       ],
 97 |       "execution_count": 0,
 98 |       "outputs": []
 99 |     },
100 |     {
101 |       "cell_type": "code",
102 |       "metadata": {
103 |         "id": "qTHAHNbnL5W6",
104 |         "colab_type": "code",
105 |         "colab": {}
106 |       },
107 |       "source": [
108 |         "import cv2\n",
109 |         "import glob\n",
110 |         "from google.colab.patches import cv2_imshow\n",
111 |         "\n",
112 |         "print('original image')\n",
113 |         "original_img_path = 'Input/Images/birds.png'\n",
114 |         "img = cv2.imread(original_img_path)\n",
115 |         "cv2_imshow(img)\n",
116 |         "\n",
117 |         "print('random sample')\n",
118 |         "img_paths = glob.glob('Output/RandomSamples/birds/gen_start_scale=0/*.png')\n",
119 |         "img = cv2.imread(img_paths[0])\n",
120 |         "cv2_imshow(img)"
121 |       ],
122 |       "execution_count": 0,
123 |       "outputs": []
124 |     },
125 |     {
126 |       "cell_type": "code",
127 |       "metadata": {
128 |         "id": "6jpfpY2_kFeX",
129 |         "colab_type": "code",
130 |         "colab": {}
131 |       },
132 |       "source": [
133 |         "import torch\n",
134 |         "\n",
135 |         "print('Implement SinGAN here')\n",
136 |         "print(torch)"
137 |       ],
138 |       "execution_count": 0,
139 |       "outputs": []
140 |     }
141 |   ]
142 | }
143 | 


--------------------------------------------------------------------------------
/SinGAN/SinGAN_on_custom_image.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "name": "SinGANOfficialImplementation.ipynb",
  7 |       "provenance": [],
  8 |       "private_outputs": true,
  9 |       "authorship_tag": "ABX9TyOqSL8ngNwZVEvzOBFhFwfA",
 10 |       "include_colab_link": true
 11 |     },
 12 |     "kernelspec": {
 13 |       "name": "python3",
 14 |       "display_name": "Python 3"
 15 |     },
 16 |     "accelerator": "GPU"
 17 |   },
 18 |   "cells": [
 19 |     {
 20 |       "cell_type": "markdown",
 21 |       "metadata": {
 22 |         "id": "view-in-github",
 23 |         "colab_type": "text"
 24 |       },
 25 |       "source": [
 26 |         "<a href=\"https://colab.research.google.com/github/wileyw/DeepLearningDemos/blob/master/SinGAN/SinGAN_on_custom_image.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
 27 |       ]
 28 |     },
 29 |     {
 30 |       "cell_type": "markdown",
 31 |       "metadata": {
 32 |         "id": "_teh4A4skGRT",
 33 |         "colab_type": "text"
 34 |       },
 35 |       "source": [
 36 |         "# SinGAN\n",
 37 |         "\n",
 38 |         "[Official SinGAN Repository](https://github.com/tamarott/SinGAN)\n",
 39 |         "\n",
 40 |         "In this notebook, we will implement and create a SinGAN homework assignment for other's to learn how to implement SinGAN as well."
 41 |       ]
 42 |     },
 43 |     {
 44 |       "cell_type": "code",
 45 |       "metadata": {
 46 |         "id": "UemrsEXyvxqQ",
 47 |         "colab_type": "code",
 48 |         "colab": {}
 49 |       },
 50 |       "source": [
 51 |         "def upload_files():\n",
 52 |         "  from google.colab import files\n",
 53 |         "  uploaded = files.upload()\n",
 54 |         "  for k, v in uploaded.items():\n",
 55 |         "    open(k, 'wb').write(v)\n",
 56 |         "  return list(uploaded.keys())\n",
 57 |         "upload_files()"
 58 |       ],
 59 |       "execution_count": 0,
 60 |       "outputs": []
 61 |     },
 62 |     {
 63 |       "cell_type": "code",
 64 |       "metadata": {
 65 |         "id": "TF-QL9B0z7sl",
 66 |         "colab_type": "code",
 67 |         "colab": {}
 68 |       },
 69 |       "source": [
 70 |         "!git clone https://github.com/tamarott/SinGAN.git"
 71 |       ],
 72 |       "execution_count": 0,
 73 |       "outputs": []
 74 |     },
 75 |     {
 76 |       "cell_type": "code",
 77 |       "metadata": {
 78 |         "id": "vZLLI4c7sJjv",
 79 |         "colab_type": "code",
 80 |         "colab": {}
 81 |       },
 82 |       "source": [
 83 |         "!ls /content/SinGAN/Input/Images/\n",
 84 |         "!cp carrots_whole.4BVRRZ6FNXYQN.png /content/SinGAN/Input/Images/custom.png"
 85 |       ],
 86 |       "execution_count": 0,
 87 |       "outputs": []
 88 |     },
 89 |     {
 90 |       "cell_type": "code",
 91 |       "metadata": {
 92 |         "id": "uKVDKOFetVEW",
 93 |         "colab_type": "code",
 94 |         "colab": {}
 95 |       },
 96 |       "source": [
 97 |         "%cd /content/SinGAN/Input/Images/\n",
 98 |         "!ls\n",
 99 |         "#import cv2\n",
100 |         "#custom = cv2.imread('custom.jpg')\n",
101 |         "#cv2.imwrite('custom.png', custom)"
102 |       ],
103 |       "execution_count": 0,
104 |       "outputs": []
105 |     },
106 |     {
107 |       "cell_type": "code",
108 |       "metadata": {
109 |         "id": "ul_FoSC40EjR",
110 |         "colab_type": "code",
111 |         "colab": {}
112 |       },
113 |       "source": [
114 |         "%cd /content/SinGAN\n",
115 |         "!ls\n",
116 |         "!pwd\n",
117 |         "!python3 main_train.py --input_name custom.png"
118 |       ],
119 |       "execution_count": 0,
120 |       "outputs": []
121 |     },
122 |     {
123 |       "cell_type": "code",
124 |       "metadata": {
125 |         "id": "ktEI7QEk1Wmf",
126 |         "colab_type": "code",
127 |         "colab": {}
128 |       },
129 |       "source": [
130 |         "!python3 random_samples.py --input_name custom.png --mode random_samples_arbitrary_sizes --scale_h 1 --scale_v 1"
131 |       ],
132 |       "execution_count": 0,
133 |       "outputs": []
134 |     },
135 |     {
136 |       "cell_type": "code",
137 |       "metadata": {
138 |         "id": "VBErCpu1LKd8",
139 |         "colab_type": "code",
140 |         "colab": {}
141 |       },
142 |       "source": [
143 |         "!ls\n",
144 |         "!ls -l Output/RandomSamples/custom\n",
145 |         "!ls -l Output/RandomSamples/custom/gen_start_scale=0"
146 |       ],
147 |       "execution_count": 0,
148 |       "outputs": []
149 |     },
150 |     {
151 |       "cell_type": "code",
152 |       "metadata": {
153 |         "id": "qTHAHNbnL5W6",
154 |         "colab_type": "code",
155 |         "colab": {}
156 |       },
157 |       "source": [
158 |         "import cv2\n",
159 |         "import glob\n",
160 |         "from google.colab.patches import cv2_imshow\n",
161 |         "\n",
162 |         "print('original image')\n",
163 |         "original_img_path = 'Input/Images/custom.png'\n",
164 |         "img = cv2.imread(original_img_path)\n",
165 |         "cv2_imshow(img)\n",
166 |         "\n",
167 |         "print('random sample')\n",
168 |         "img_paths = glob.glob('Output/RandomSamples/custom/gen_start_scale=0/*.png')\n",
169 |         "img = cv2.imread(img_paths[0])\n",
170 |         "cv2_imshow(img)"
171 |       ],
172 |       "execution_count": 0,
173 |       "outputs": []
174 |     },
175 |     {
176 |       "cell_type": "code",
177 |       "metadata": {
178 |         "id": "6jpfpY2_kFeX",
179 |         "colab_type": "code",
180 |         "colab": {}
181 |       },
182 |       "source": [
183 |         "import torch\n",
184 |         "\n",
185 |         "print('Implement SinGAN here')\n",
186 |         "print(torch)"
187 |       ],
188 |       "execution_count": 0,
189 |       "outputs": []
190 |     }
191 |   ]
192 | }


--------------------------------------------------------------------------------
/Transformers/README.md:
--------------------------------------------------------------------------------
1 | - [Huggingface Transformers](https://github.com/huggingface/transformers)
2 | 


--------------------------------------------------------------------------------
/Transformers/Transformers.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |   "nbformat": 4,
 3 |   "nbformat_minor": 0,
 4 |   "metadata": {
 5 |     "colab": {
 6 |       "name": "Transformers.ipynb",
 7 |       "provenance": [],
 8 |       "authorship_tag": "ABX9TyMq7zDv55D6nmR0jiBGxxXn",
 9 |       "include_colab_link": true
10 |     },
11 |     "kernelspec": {
12 |       "name": "python3",
13 |       "display_name": "Python 3"
14 |     }
15 |   },
16 |   "cells": [
17 |     {
18 |       "cell_type": "markdown",
19 |       "metadata": {
20 |         "id": "view-in-github",
21 |         "colab_type": "text"
22 |       },
23 |       "source": [
24 |         "<a href=\"https://colab.research.google.com/github/wileyw/DeepLearningDemos/blob/master/Transformers/Transformers.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
25 |       ]
26 |     },
27 |     {
28 |       "cell_type": "markdown",
29 |       "metadata": {
30 |         "id": "3oRmLL1owcS6",
31 |         "colab_type": "text"
32 |       },
33 |       "source": [
34 |         "# Transformers\n",
35 |         "\n",
36 |         "Initial commit of colab notebook for Transformers."
37 |       ]
38 |     },
39 |     {
40 |       "cell_type": "code",
41 |       "metadata": {
42 |         "id": "LllIDkqLwYuS",
43 |         "colab_type": "code",
44 |         "colab": {}
45 |       },
46 |       "source": [
47 |         "import torch\n",
48 |         "import tensorflow as tf"
49 |       ],
50 |       "execution_count": 2,
51 |       "outputs": []
52 |     },
53 |     {
54 |       "cell_type": "markdown",
55 |       "metadata": {
56 |         "id": "wnZMPSeHyG4J",
57 |         "colab_type": "text"
58 |       },
59 |       "source": [
60 |         "# Resources\n",
61 |         "- [Gelu Activation Function](https://mlfromscratch.com/activation-functions-explained/)"
62 |       ]
63 |     }
64 |   ]
65 | }


--------------------------------------------------------------------------------
/Transformers/requirements.txt:
--------------------------------------------------------------------------------
1 | transformers
2 | 


--------------------------------------------------------------------------------
/Transformers/translation/train.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | """
 5 | Parameters
 6 | 
 7 |     d_model – the number of expected features in the encoder/decoder inputs (default=512).
 8 | 
 9 |     nhead – the number of heads in the multiheadattention models (default=8).
10 | 
11 |     num_encoder_layers – the number of sub-encoder-layers in the encoder (default=6).
12 | 
13 |     num_decoder_layers – the number of sub-decoder-layers in the decoder (default=6).
14 | 
15 |     dim_feedforward – the dimension of the feedforward network model (default=2048).
16 | 
17 |     dropout – the dropout value (default=0.1).
18 | 
19 |     activation – the activation function of encoder/decoder intermediate layer, relu or gelu (default=relu).
20 | 
21 |     custom_encoder – custom encoder (default=None).
22 | 
23 |     custom_decoder – custom decoder (default=None).
24 | """
25 | trfmr_config = {
26 |     'd_model': 256,  # number of features in embedding
27 |     'nhead': 8,  # number of attention heads
28 |     'num_encoder_layers': 8,
29 |     'num_decoder_layers': 8,
30 |     'dim_feedforward': 2048,
31 |     'activation': 'relu',
32 | }
33 | 
34 | opt_config = {
35 |     'lr': 3e-4,
36 |     'beta1': 0.5,
37 |     'beta2': 0.999,
38 |     'num_epochs': 300
39 | }
40 | 
41 | 
42 | def main():
43 |     # Initialize model.
44 |     trfm_model = torch.nn.Transformer(**trmfr_config)
45 | 
46 |     # Initialize optimizer.
47 |     opt = torch.optim.AdamW(trfm.parameters(), opt_config['lr'],
48 |                             [opt_config['beta1'], opt_config['beta2']])
49 | 
50 |     # Set loss function.
51 |     loss_fn = torch.nn.BCELoss
52 | 
53 |     # Load Data.
54 |     # TODO: implement data loading.
55 |     data = []
56 | 
57 |     # Training loop
58 |     for epoch in range in range(opt_config['num_epochs']):
59 |         for expected_out, batch in data:
60 |             opt.zero_grad()
61 |             actual_out = trfm_model(batch)
62 |             loss = torch.nn.BCELoss(actual_out, expected_out)
63 |             loss.backward()
64 |             opt.step()
65 | 
66 | 
67 | if __name__ == '__main__':
68 |     main()


--------------------------------------------------------------------------------
/handwriting-synthesis/.gitignore:
--------------------------------------------------------------------------------
1 | data/raw/ascii
2 | data/raw/lineStrokes
3 | data/raw/original
4 | data/processed
5 | 
6 | logs
7 | predictions
8 | 


--------------------------------------------------------------------------------
/handwriting-synthesis/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | cache: pip
 3 | python:
 4 |     - 2.7
 5 |     - 3.6
 6 |     #- nightly
 7 |     #- pypy
 8 |     #- pypy3
 9 | matrix:
10 |     allow_failures:
11 |         - python: nightly
12 |         - python: pypy
13 |         - python: pypy3
14 | install:
15 |     #- pip install -r requirements.txt
16 |     - pip install flake8  # pytest  # add another testing frameworks later
17 | before_script:
18 |     # stop the build if there are Python syntax errors or undefined names
19 |     - flake8 . --count --select=E901,E999,F821,F822,F823 --show-source --statistics
20 |     # exit-zero treats all errors as warnings.  The GitHub editor is 127 chars wide
21 |     - flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
22 | script:
23 |     - true  # pytest --capture=sys  # add other tests here
24 | notifications:
25 |     on_success: change
26 |     on_failure: change  # `always` will be the setting once code changes slow down
27 | 


--------------------------------------------------------------------------------
/handwriting-synthesis/checkpoints/checkpoint:
--------------------------------------------------------------------------------
1 | model_checkpoint_path: "model-17900"
2 | all_model_checkpoint_paths: "model-17900"
3 | 


--------------------------------------------------------------------------------
/handwriting-synthesis/checkpoints/model-17900.data-00000-of-00001:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/handwriting-synthesis/checkpoints/model-17900.data-00000-of-00001


--------------------------------------------------------------------------------
/handwriting-synthesis/checkpoints/model-17900.index:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/handwriting-synthesis/checkpoints/model-17900.index


--------------------------------------------------------------------------------
/handwriting-synthesis/checkpoints/model-17900.meta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/handwriting-synthesis/checkpoints/model-17900.meta


--------------------------------------------------------------------------------
/handwriting-synthesis/data/blacklist.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/handwriting-synthesis/data/blacklist.npy


--------------------------------------------------------------------------------
/handwriting-synthesis/data/processed/.gitattributes:
--------------------------------------------------------------------------------
1 | *.npy filter=lfs diff=lfs merge=lfs -text
2 | 


--------------------------------------------------------------------------------
/handwriting-synthesis/data/processed/c.npy:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:c292920514ca4f12673a81b96ccdadbf99ee28ef4aa0f7b60a85706691c87abe
3 | size 871253
4 | 


--------------------------------------------------------------------------------
/handwriting-synthesis/data/processed/c_len.npy:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:afe6b3a07822d7f90b3974a08722b55017319fe1dcee24fc9424ee81a9683195
3 | size 11743
4 | 


--------------------------------------------------------------------------------
/handwriting-synthesis/data/processed/w_id.npy:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:02939b9bbc2347eca0faeb23e12a33453f69b868ef7ee410287fe0af4121d8d9
3 | size 23358
4 | 


--------------------------------------------------------------------------------
/handwriting-synthesis/data/processed/x.npy:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:c6b95c4b53f6ac656884e0bee483b7d3eb0a2e1352de4a102fae546cac3ae3e8
3 | size 167256128
4 | 


--------------------------------------------------------------------------------
/handwriting-synthesis/data/processed/x_len.npy:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:dde6dd72f5017c5608c40114d68657adf72be571e7c90493832f151da88f6ab1
3 | size 23358
4 | 


--------------------------------------------------------------------------------
/handwriting-synthesis/data/raw/readme.md:
--------------------------------------------------------------------------------
 1 | ## Model Training Instructions
 2 | 
 3 | In order to train a model, data must be downloaded and placed in this directory.
 4 | 
 5 | Follow the download instructions here http://www.fki.inf.unibe.ch/databases/iam-on-line-handwriting-database.
 6 | 
 7 | Only a subset of the downloaded data is required.  Move the relevant download data so the directory structure is as folllows:
 8 | 
 9 | ```
10 | data/
11 | ├── raw/
12 | │   ├── ascii/
13 | │   ├── lineStrokes/
14 | │   ├── original/
15 | |   blacklist.npy
16 | ```
17 | 
18 | Once this is completed, run `prepare_data.py` extract the data and dump it to numpy files.
19 | 
20 | To train the model, run `rnn.py`.  This takes a couple days on a single Tesla K80.
21 | 
22 | 


--------------------------------------------------------------------------------
/handwriting-synthesis/data_frame.py:
--------------------------------------------------------------------------------
  1 | import copy
  2 | 
  3 | import numpy as np
  4 | import pandas as pd
  5 | from sklearn.model_selection import train_test_split
  6 | 
  7 | 
  8 | class DataFrame(object):
  9 | 
 10 |     """Minimal pd.DataFrame analog for handling n-dimensional numpy matrices with additional
 11 |     support for shuffling, batching, and train/test splitting.
 12 | 
 13 |     Args:
 14 |         columns: List of names corresponding to the matrices in data.
 15 |         data: List of n-dimensional data matrices ordered in correspondence with columns.
 16 |             All matrices must have the same leading dimension.  Data can also be fed a list of
 17 |             instances of np.memmap, in which case RAM usage can be limited to the size of a
 18 |             single batch.
 19 |     """
 20 | 
 21 |     def __init__(self, columns, data):
 22 |         assert len(columns) == len(data), 'columns length does not match data length'
 23 | 
 24 |         lengths = [mat.shape[0] for mat in data]
 25 |         assert len(set(lengths)) == 1, 'all matrices in data must have same first dimension'
 26 | 
 27 |         self.length = lengths[0]
 28 |         self.columns = columns
 29 |         self.data = data
 30 |         self.dict = dict(zip(self.columns, self.data))
 31 |         self.idx = np.arange(self.length)
 32 | 
 33 |     def shapes(self):
 34 |         return pd.Series(dict(zip(self.columns, [mat.shape for mat in self.data])))
 35 | 
 36 |     def dtypes(self):
 37 |         return pd.Series(dict(zip(self.columns, [mat.dtype for mat in self.data])))
 38 | 
 39 |     def shuffle(self):
 40 |         np.random.shuffle(self.idx)
 41 | 
 42 |     def train_test_split(self, train_size, random_state=np.random.randint(1000), stratify=None):
 43 |         train_idx, test_idx = train_test_split(
 44 |             self.idx,
 45 |             train_size=train_size,
 46 |             random_state=random_state,
 47 |             stratify=stratify
 48 |         )
 49 |         train_df = DataFrame(copy.copy(self.columns), [mat[train_idx] for mat in self.data])
 50 |         test_df = DataFrame(copy.copy(self.columns), [mat[test_idx] for mat in self.data])
 51 |         return train_df, test_df
 52 | 
 53 |     def batch_generator(self, batch_size, shuffle=True, num_epochs=10000, allow_smaller_final_batch=False):
 54 |         epoch_num = 0
 55 |         while epoch_num < num_epochs:
 56 |             if shuffle:
 57 |                 self.shuffle()
 58 | 
 59 |             for i in range(0, self.length + 1, batch_size):
 60 |                 batch_idx = self.idx[i: i + batch_size]
 61 |                 if not allow_smaller_final_batch and len(batch_idx) != batch_size:
 62 |                     break
 63 |                 yield DataFrame(
 64 |                     columns=copy.copy(self.columns),
 65 |                     data=[mat[batch_idx].copy() for mat in self.data]
 66 |                 )
 67 | 
 68 |             epoch_num += 1
 69 | 
 70 |     def iterrows(self):
 71 |         for i in self.idx:
 72 |             yield self[i]
 73 | 
 74 |     def mask(self, mask):
 75 |         return DataFrame(copy.copy(self.columns), [mat[mask] for mat in self.data])
 76 | 
 77 |     def concat(self, other_df):
 78 |         mats = []
 79 |         for column in self.columns:
 80 |             mats.append(np.concatenate([self[column], other_df[column]], axis=0))
 81 |         return DataFrame(copy.copy(self.columns), mats)
 82 | 
 83 |     def items(self):
 84 |         return self.dict.items()
 85 | 
 86 |     def __iter__(self):
 87 |         return self.dict.items().__iter__()
 88 | 
 89 |     def __len__(self):
 90 |         return self.length
 91 | 
 92 |     def __getitem__(self, key):
 93 |         if isinstance(key, str):
 94 |             return self.dict[key]
 95 | 
 96 |         elif isinstance(key, int):
 97 |             return pd.Series(dict(zip(self.columns, [mat[self.idx[key]] for mat in self.data])))
 98 | 
 99 |     def __setitem__(self, key, value):
100 |         assert value.shape[0] == len(self), 'matrix first dimension does not match'
101 |         if key not in self.columns:
102 |             self.columns.append(key)
103 |             self.data.append(value)
104 |         self.dict[key] = value
105 | 


--------------------------------------------------------------------------------
/handwriting-synthesis/demo.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import logging
  3 | 
  4 | import numpy as np
  5 | import svgwrite
  6 | 
  7 | import drawing
  8 | import lyrics
  9 | from rnn import rnn
 10 | 
 11 | 
 12 | class Hand(object):
 13 | 
 14 |     def __init__(self):
 15 |         os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
 16 |         self.nn = rnn(
 17 |             log_dir='logs',
 18 |             checkpoint_dir='checkpoints',
 19 |             prediction_dir='predictions',
 20 |             learning_rates=[.0001, .00005, .00002],
 21 |             batch_sizes=[32, 64, 64],
 22 |             patiences=[1500, 1000, 500],
 23 |             beta1_decays=[.9, .9, .9],
 24 |             validation_batch_size=32,
 25 |             optimizer='rms',
 26 |             num_training_steps=100000,
 27 |             warm_start_init_step=17900,
 28 |             regularization_constant=0.0,
 29 |             keep_prob=1.0,
 30 |             enable_parameter_averaging=False,
 31 |             min_steps_to_checkpoint=2000,
 32 |             log_interval=20,
 33 |             logging_level=logging.CRITICAL,
 34 |             grad_clip=10,
 35 |             lstm_size=400,
 36 |             output_mixture_components=20,
 37 |             attention_mixture_components=10
 38 |         )
 39 |         self.nn.restore()
 40 | 
 41 |     def write(self, filename, lines, biases=None, styles=None, stroke_colors=None, stroke_widths=None):
 42 |         valid_char_set = set(drawing.alphabet)
 43 |         for line_num, line in enumerate(lines):
 44 |             if len(line) > 75:
 45 |                 raise ValueError(
 46 |                     (
 47 |                         "Each line must be at most 75 characters. "
 48 |                         "Line {} contains {}"
 49 |                     ).format(line_num, len(line))
 50 |                 )
 51 | 
 52 |             for char in line:
 53 |                 if char not in valid_char_set:
 54 |                     raise ValueError(
 55 |                         (
 56 |                             "Invalid character {} detected in line {}. "
 57 |                             "Valid character set is {}"
 58 |                         ).format(char, line_num, valid_char_set)
 59 |                     )
 60 | 
 61 |         strokes = self._sample(lines, biases=biases, styles=styles)
 62 |         self._draw(strokes, lines, filename, stroke_colors=stroke_colors, stroke_widths=stroke_widths)
 63 | 
 64 |     def _sample(self, lines, biases=None, styles=None):
 65 |         num_samples = len(lines)
 66 |         max_tsteps = 40*max([len(i) for i in lines])
 67 |         biases = biases if biases is not None else [0.5]*num_samples
 68 | 
 69 |         x_prime = np.zeros([num_samples, 1200, 3])
 70 |         x_prime_len = np.zeros([num_samples])
 71 |         chars = np.zeros([num_samples, 120])
 72 |         chars_len = np.zeros([num_samples])
 73 | 
 74 |         if styles is not None:
 75 |             for i, (cs, style) in enumerate(zip(lines, styles)):
 76 |                 x_p = np.load('styles/style-{}-strokes.npy'.format(style))
 77 |                 c_p = np.load('styles/style-{}-chars.npy'.format(style)).tostring().decode('utf-8')
 78 | 
 79 |                 c_p = str(c_p) + " " + cs
 80 |                 c_p = drawing.encode_ascii(c_p)
 81 |                 c_p = np.array(c_p)
 82 | 
 83 |                 x_prime[i, :len(x_p), :] = x_p
 84 |                 x_prime_len[i] = len(x_p)
 85 |                 chars[i, :len(c_p)] = c_p
 86 |                 chars_len[i] = len(c_p)
 87 | 
 88 |         else:
 89 |             for i in range(num_samples):
 90 |                 encoded = drawing.encode_ascii(lines[i])
 91 |                 chars[i, :len(encoded)] = encoded
 92 |                 chars_len[i] = len(encoded)
 93 | 
 94 |         [samples] = self.nn.session.run(
 95 |             [self.nn.sampled_sequence],
 96 |             feed_dict={
 97 |                 self.nn.prime: styles is not None,
 98 |                 self.nn.x_prime: x_prime,
 99 |                 self.nn.x_prime_len: x_prime_len,
100 |                 self.nn.num_samples: num_samples,
101 |                 self.nn.sample_tsteps: max_tsteps,
102 |                 self.nn.c: chars,
103 |                 self.nn.c_len: chars_len,
104 |                 self.nn.bias: biases
105 |             }
106 |         )
107 |         samples = [sample[~np.all(sample == 0.0, axis=1)] for sample in samples]
108 |         return samples
109 | 
110 |     def _draw(self, strokes, lines, filename, stroke_colors=None, stroke_widths=None):
111 |         stroke_colors = stroke_colors or ['black']*len(lines)
112 |         stroke_widths = stroke_widths or [2]*len(lines)
113 | 
114 |         line_height = 60
115 |         view_width = 1000
116 |         view_height = line_height*(len(strokes) + 1)
117 | 
118 |         dwg = svgwrite.Drawing(filename=filename)
119 |         dwg.viewbox(width=view_width, height=view_height)
120 |         dwg.add(dwg.rect(insert=(0, 0), size=(view_width, view_height), fill='white'))
121 | 
122 |         initial_coord = np.array([0, -(3*line_height / 4)])
123 |         for offsets, line, color, width in zip(strokes, lines, stroke_colors, stroke_widths):
124 | 
125 |             if not line:
126 |                 initial_coord[1] -= line_height
127 |                 continue
128 | 
129 |             offsets[:, :2] *= 1.5
130 |             strokes = drawing.offsets_to_coords(offsets)
131 |             strokes = drawing.denoise(strokes)
132 |             strokes[:, :2] = drawing.align(strokes[:, :2])
133 | 
134 |             strokes[:, 1] *= -1
135 |             strokes[:, :2] -= strokes[:, :2].min() + initial_coord
136 |             strokes[:, 0] += (view_width - strokes[:, 0].max()) / 2
137 | 
138 |             prev_eos = 1.0
139 |             p = "M{},{} ".format(0, 0)
140 |             for x, y, eos in zip(*strokes.T):
141 |                 p += '{}{},{} '.format('M' if prev_eos == 1.0 else 'L', x, y)
142 |                 prev_eos = eos
143 |             path = svgwrite.path.Path(p)
144 |             path = path.stroke(color=color, width=width, linecap='round').fill("none")
145 |             dwg.add(path)
146 | 
147 |             initial_coord[1] -= line_height
148 | 
149 |         dwg.save()
150 | 
151 | 
152 | if __name__ == '__main__':
153 |     hand = Hand()
154 | 
155 |     # usage demo
156 |     lines = [
157 |         "Now this is a story all about how",
158 |         "My life got flipped turned upside down",
159 |         "And I'd like to take a minute, just sit right there",
160 |         "I'll tell you how I became the prince of a town called Bel-Air",
161 |     ]
162 |     biases = [.75 for i in lines]
163 |     styles = [9 for i in lines]
164 |     stroke_colors = ['red', 'green', 'black', 'blue']
165 |     stroke_widths = [1, 2, 1, 2]
166 | 
167 |     hand.write(
168 |         filename='img/usage_demo.svg',
169 |         lines=lines,
170 |         biases=biases,
171 |         styles=styles,
172 |         stroke_colors=stroke_colors,
173 |         stroke_widths=stroke_widths
174 |     )
175 | 
176 |     # demo number 1 - fixed bias, fixed style
177 |     lines = lyrics.all_star.split("\n")
178 |     biases = [.75 for i in lines]
179 |     styles = [12 for i in lines]
180 | 
181 |     hand.write(
182 |         filename='img/all_star.svg',
183 |         lines=lines,
184 |         biases=biases,
185 |         styles=styles,
186 |     )
187 | 
188 |     # demo number 2 - fixed bias, varying style
189 |     lines = lyrics.downtown.split("\n")
190 |     biases = [.75 for i in lines]
191 |     styles = np.cumsum(np.array([len(i) for i in lines]) == 0).astype(int)
192 | 
193 |     hand.write(
194 |         filename='img/downtown.svg',
195 |         lines=lines,
196 |         biases=biases,
197 |         styles=styles,
198 |     )
199 | 
200 |     # demo number 3 - varying bias, fixed style
201 |     lines = lyrics.give_up.split("\n")
202 |     biases = .2*np.flip(np.cumsum([len(i) == 0 for i in lines]), 0)
203 |     styles = [7 for i in lines]
204 | 
205 |     hand.write(
206 |         filename='img/give_up.svg',
207 |         lines=lines,
208 |         biases=biases,
209 |         styles=styles,
210 |     )
211 | 


--------------------------------------------------------------------------------
/handwriting-synthesis/drawing.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | from collections import defaultdict
  3 | 
  4 | import matplotlib.pyplot as plt
  5 | import numpy as np
  6 | from scipy.signal import savgol_filter
  7 | from scipy.interpolate import interp1d
  8 | 
  9 | 
 10 | alphabet = [
 11 |     '\x00', ' ', '!', '"', '#', "'", '(', ')', ',', '-', '.',
 12 |     '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';',
 13 |     '?', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K',
 14 |     'L', 'M', 'N', 'O', 'P', 'R', 'S', 'T', 'U', 'V', 'W', 'Y',
 15 |     'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l',
 16 |     'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x',
 17 |     'y', 'z'
 18 | ]
 19 | alphabet_ord = list(map(ord, alphabet))
 20 | alpha_to_num = defaultdict(int, list(map(reversed, enumerate(alphabet))))
 21 | num_to_alpha = dict(enumerate(alphabet_ord))
 22 | 
 23 | MAX_STROKE_LEN = 1200
 24 | MAX_CHAR_LEN = 75
 25 | 
 26 | 
 27 | def align(coords):
 28 |     """
 29 |     corrects for global slant/offset in handwriting strokes
 30 |     """
 31 |     coords = np.copy(coords)
 32 |     X, Y = coords[:, 0].reshape(-1, 1), coords[:, 1].reshape(-1, 1)
 33 |     X = np.concatenate([np.ones([X.shape[0], 1]), X], axis=1)
 34 |     offset, slope = np.linalg.inv(X.T.dot(X)).dot(X.T).dot(Y).squeeze()
 35 |     theta = np.arctan(slope)
 36 |     rotation_matrix = np.array(
 37 |         [[np.cos(theta), -np.sin(theta)],
 38 |          [np.sin(theta), np.cos(theta)]]
 39 |     )
 40 |     coords[:, :2] = np.dot(coords[:, :2], rotation_matrix) - offset
 41 |     return coords
 42 | 
 43 | 
 44 | def skew(coords, degrees):
 45 |     """
 46 |     skews strokes by given degrees
 47 |     """
 48 |     coords = np.copy(coords)
 49 |     theta = degrees * np.pi/180
 50 |     A = np.array([[np.cos(-theta), 0], [np.sin(-theta), 1]])
 51 |     coords[:, :2] = np.dot(coords[:, :2], A)
 52 |     return coords
 53 | 
 54 | 
 55 | def stretch(coords, x_factor, y_factor):
 56 |     """
 57 |     stretches strokes along x and y axis
 58 |     """
 59 |     coords = np.copy(coords)
 60 |     coords[:, :2] *= np.array([x_factor, y_factor])
 61 |     return coords
 62 | 
 63 | 
 64 | def add_noise(coords, scale):
 65 |     """
 66 |     adds gaussian noise to strokes
 67 |     """
 68 |     coords = np.copy(coords)
 69 |     coords[1:, :2] += np.random.normal(loc=0.0, scale=scale, size=coords[1:, :2].shape)
 70 |     return coords
 71 | 
 72 | 
 73 | def encode_ascii(ascii_string):
 74 |     """
 75 |     encodes ascii string to array of ints
 76 |     """
 77 |     return np.array(list(map(lambda x: alpha_to_num[x], ascii_string)) + [0])
 78 | 
 79 | 
 80 | def denoise(coords):
 81 |     """
 82 |     smoothing filter to mitigate some artifacts of the data collection
 83 |     """
 84 |     coords = np.split(coords, np.where(coords[:, 2] == 1)[0] + 1, axis=0)
 85 |     new_coords = []
 86 |     for stroke in coords:
 87 |         if len(stroke) != 0:
 88 |             x_new = savgol_filter(stroke[:, 0], 7, 3, mode='nearest')
 89 |             y_new = savgol_filter(stroke[:, 1], 7, 3, mode='nearest')
 90 |             xy_coords = np.hstack([x_new.reshape(-1, 1), y_new.reshape(-1, 1)])
 91 |             stroke = np.concatenate([xy_coords, stroke[:, 2].reshape(-1, 1)], axis=1)
 92 |             new_coords.append(stroke)
 93 | 
 94 |     coords = np.vstack(new_coords)
 95 |     return coords
 96 | 
 97 | 
 98 | def interpolate(coords, factor=2):
 99 |     """
100 |     interpolates strokes using cubic spline
101 |     """
102 |     coords = np.split(coords, np.where(coords[:, 2] == 1)[0] + 1, axis=0)
103 |     new_coords = []
104 |     for stroke in coords:
105 | 
106 |         if len(stroke) == 0:
107 |             continue
108 | 
109 |         xy_coords = stroke[:, :2]
110 | 
111 |         if len(stroke) > 3:
112 |             f_x = interp1d(np.arange(len(stroke)), stroke[:, 0], kind='cubic')
113 |             f_y = interp1d(np.arange(len(stroke)), stroke[:, 1], kind='cubic')
114 | 
115 |             xx = np.linspace(0, len(stroke) - 1, factor*(len(stroke)))
116 |             yy = np.linspace(0, len(stroke) - 1, factor*(len(stroke)))
117 | 
118 |             x_new = f_x(xx)
119 |             y_new = f_y(yy)
120 | 
121 |             xy_coords = np.hstack([x_new.reshape(-1, 1), y_new.reshape(-1, 1)])
122 | 
123 |         stroke_eos = np.zeros([len(xy_coords), 1])
124 |         stroke_eos[-1] = 1.0
125 |         stroke = np.concatenate([xy_coords, stroke_eos], axis=1)
126 |         new_coords.append(stroke)
127 | 
128 |     coords = np.vstack(new_coords)
129 |     return coords
130 | 
131 | 
132 | def normalize(offsets):
133 |     """
134 |     normalizes strokes to median unit norm
135 |     """
136 |     offsets = np.copy(offsets)
137 |     offsets[:, :2] /= np.median(np.linalg.norm(offsets[:, :2], axis=1))
138 |     return offsets
139 | 
140 | 
141 | def coords_to_offsets(coords):
142 |     """
143 |     convert from coordinates to offsets
144 |     """
145 |     offsets = np.concatenate([coords[1:, :2] - coords[:-1, :2], coords[1:, 2:3]], axis=1)
146 |     offsets = np.concatenate([np.array([[0, 0, 1]]), offsets], axis=0)
147 |     return offsets
148 | 
149 | 
150 | def offsets_to_coords(offsets):
151 |     """
152 |     convert from offsets to coordinates
153 |     """
154 |     return np.concatenate([np.cumsum(offsets[:, :2], axis=0), offsets[:, 2:3]], axis=1)
155 | 
156 | 
157 | def draw(
158 |         offsets,
159 |         ascii_seq=None,
160 |         align_strokes=True,
161 |         denoise_strokes=True,
162 |         interpolation_factor=None,
163 |         save_file=None
164 | ):
165 |     strokes = offsets_to_coords(offsets)
166 | 
167 |     if denoise_strokes:
168 |         strokes = denoise(strokes)
169 | 
170 |     if interpolation_factor is not None:
171 |         strokes = interpolate(strokes, factor=interpolation_factor)
172 | 
173 |     if align_strokes:
174 |         strokes[:, :2] = align(strokes[:, :2])
175 | 
176 |     fig, ax = plt.subplots(figsize=(12, 3))
177 | 
178 |     stroke = []
179 |     for x, y, eos in strokes:
180 |         stroke.append((x, y))
181 |         if eos == 1:
182 |             coords = zip(*stroke)
183 |             ax.plot(coords[0], coords[1], 'k')
184 |             stroke = []
185 |     if stroke:
186 |         coords = zip(*stroke)
187 |         ax.plot(coords[0], coords[1], 'k')
188 |         stroke = []
189 | 
190 |     ax.set_xlim(-50, 600)
191 |     ax.set_ylim(-40, 40)
192 | 
193 |     ax.set_aspect('equal')
194 |     plt.tick_params(
195 |         axis='both',
196 |         left='off',
197 |         top='off',
198 |         right='off',
199 |         bottom='off',
200 |         labelleft='off',
201 |         labeltop='off',
202 |         labelright='off',
203 |         labelbottom='off'
204 |     )
205 | 
206 |     if ascii_seq is not None:
207 |         if not isinstance(ascii_seq, str):
208 |             ascii_seq = ''.join(list(map(chr, ascii_seq)))
209 |         plt.title(ascii_seq)
210 | 
211 |     if save_file is not None:
212 |         plt.savefig(save_file)
213 |         print('saved to {}'.format(save_file))
214 |     else:
215 |         plt.show()
216 |     plt.close('all')
217 | 


--------------------------------------------------------------------------------
/handwriting-synthesis/lyrics.py:
--------------------------------------------------------------------------------
  1 | """lyrics taken from https://www.azlyrics.com/"""
  2 | 
  3 | all_star = """Somebody once told me the world is gonna roll me
  4 | I ain't the sharpest tool in the shed
  5 | She was looking kind of dumb with her finger and her thumb
  6 | In the shape of an "L" on her forehead
  7 | 
  8 | Well, the years start coming and they don't stop coming
  9 | Fed to the rules and I hit the ground running
 10 | Didn't make sense not to live for fun
 11 | Your brain gets smart but your head gets dumb
 12 | 
 13 | So much to do, so much to see
 14 | So what's wrong with taking the back streets?
 15 | You'll never know if you don't go
 16 | You'll never shine if you don't glow
 17 | 
 18 | Hey, now, you're an All Star, get your game on, go play
 19 | Hey, now, you're a Rock Star, get the show on, get paid
 20 | And all that glitters is gold
 21 | Only shooting stars break the mold
 22 | 
 23 | It's a cool place and they say it gets colder
 24 | You're bundled up now wait 'til you get older
 25 | But the meteor men beg to differ
 26 | Judging by the hole in the satellite picture
 27 | 
 28 | The ice we skate is getting pretty thin
 29 | The water's getting warm so you might as well swim
 30 | My world's on fire. How about yours?
 31 | That's the way I like it and I'll never get bored.
 32 | 
 33 | Somebody once asked could I spare some change for gas
 34 | I need to get myself away from this place
 35 | I said yep, what a concept
 36 | I could use a little fuel myself
 37 | And we could all use a little change
 38 | 
 39 | Well, the years start coming and they don't stop coming
 40 | Fed to the rules and I hit the ground running
 41 | Didn't make sense not to live for fun
 42 | Your brain gets smart but your head gets dumb
 43 | 
 44 | So much to do, so much to see
 45 | So what's wrong with taking the back streets?
 46 | You'll never know if you don't go
 47 | You'll never shine if you don't glow.
 48 | 
 49 | And all that glitters is gold
 50 | Only shooting stars break the mold"""
 51 | 
 52 | downtown = """Making my way downtown
 53 | Walking fast
 54 | Faces pass
 55 | And I'm home-bound
 56 | 
 57 | Staring blankly ahead
 58 | Just making my way
 59 | Making a way
 60 | Through the crowd
 61 | 
 62 | And I need you
 63 | And I miss you
 64 | And now I wonder
 65 | 
 66 | If I could fall into the sky
 67 | Do you think time would pass me by?
 68 | 'Cause you know I'd walk a thousand miles
 69 | If I could just see you tonight
 70 | 
 71 | It's always times like these
 72 | When I think of you
 73 | And I wonder if you ever think of me
 74 | 'Cause everything's so wrong
 75 | And I don't belong
 76 | Living in your precious memory
 77 | 
 78 | 'Cause I need you
 79 | And I miss you
 80 | And now I wonder
 81 | 
 82 | If I could fall into the sky
 83 | Do you think time would pass me by?
 84 | 'Cause you know I'd walk a thousand miles
 85 | If I could just see you tonight
 86 | 
 87 | And I, I don't wanna let you know
 88 | I, I drown in your memory
 89 | I, I don't wanna let this go
 90 | I, I don't
 91 | 
 92 | Making my way downtown
 93 | Walking fast
 94 | Faces pass
 95 | And I'm home-bound
 96 | 
 97 | Staring blankly ahead
 98 | Just making my way
 99 | Making a way
100 | Through the crowd
101 | 
102 | And I still need you
103 | And I still miss you
104 | And now I wonder
105 | 
106 | If I could fall into the sky
107 | Do you think time would pass us by?
108 | 'Cause you know I'd walk a thousand miles
109 | If I could just see you
110 | 
111 | If I could fall into the sky
112 | Do you think time would pass me by?
113 | 'Cause you know I'd walk a thousand miles
114 | If I could just see you
115 | If I could just hold you tonight"""
116 | 
117 | give_up = """We're no strangers to love
118 | You know the rules and so do I
119 | A full commitment's what I'm thinking of
120 | You wouldn't get this from any other guy
121 | 
122 | I just wanna tell you how I'm feeling
123 | Gotta make you understand
124 | 
125 | Never gonna give you up
126 | Never gonna let you down
127 | Never gonna run around and desert you
128 | Never gonna make you cry
129 | Never gonna say goodbye
130 | Never gonna tell a lie and hurt you
131 | 
132 | We've known each other for so long
133 | Your heart's been aching, but
134 | You're too shy to say it
135 | Inside, we both know what's been going on
136 | We know the game and we're gonna play it
137 | 
138 | And if you ask me how I'm feeling
139 | Don't tell me you're too blind to see
140 | 
141 | Never gonna give you up
142 | Never gonna let you down
143 | Never gonna run around and desert you
144 | Never gonna make you cry
145 | Never gonna say goodbye
146 | Never gonna tell a lie and hurt you
147 | 
148 | Never gonna give you up
149 | Never gonna let you down
150 | Never gonna run around and desert you
151 | Never gonna make you cry
152 | Never gonna say goodbye
153 | Never gonna tell a lie and hurt you
154 | 
155 | (Ooh, give you up)
156 | (Ooh, give you up)
157 | Never gonna give, never gonna give
158 | (Give you up)
159 | Never gonna give, never gonna give
160 | (Give you up)
161 | 
162 | We've known each other for so long
163 | Your heart's been aching, but
164 | You're too shy to say it
165 | Inside, we both know what's been going on
166 | We know the game and we're gonna play it
167 | 
168 | I just wanna tell you how I'm feeling
169 | Gotta make you understand
170 | 
171 | Never gonna give you up
172 | Never gonna let you down
173 | Never gonna run around and desert you
174 | Never gonna make you cry
175 | Never gonna say goodbye
176 | Never gonna tell a lie and hurt you
177 | 
178 | Never gonna give you up
179 | Never gonna let you down
180 | Never gonna run around and desert you
181 | Never gonna make you cry
182 | Never gonna say goodbye
183 | Never gonna tell a lie and hurt you
184 | 
185 | Never gonna give you up
186 | Never gonna let you down
187 | Never gonna run around and desert you
188 | Never gonna make you cry
189 | Never gonna say goodbye
190 | Never gonna tell a lie and hurt you"""
191 | 


--------------------------------------------------------------------------------
/handwriting-synthesis/prepare_data.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import os
  3 | from xml.etree import ElementTree
  4 | 
  5 | import numpy as np
  6 | 
  7 | import drawing
  8 | 
  9 | 
 10 | def get_stroke_sequence(filename):
 11 |     tree = ElementTree.parse(filename).getroot()
 12 |     strokes = [i for i in tree if i.tag == 'StrokeSet'][0]
 13 | 
 14 |     coords = []
 15 |     for stroke in strokes:
 16 |         for i, point in enumerate(stroke):
 17 |             coords.append([
 18 |                 int(point.attrib['x']),
 19 |                 -1*int(point.attrib['y']),
 20 |                 int(i == len(stroke) - 1)
 21 |             ])
 22 |     coords = np.array(coords)
 23 | 
 24 |     coords = drawing.align(coords)
 25 |     coords = drawing.denoise(coords)
 26 |     offsets = drawing.coords_to_offsets(coords)
 27 |     offsets = offsets[:drawing.MAX_STROKE_LEN]
 28 |     offsets = drawing.normalize(offsets)
 29 |     return offsets
 30 | 
 31 | 
 32 | def get_ascii_sequences(filename):
 33 |     sequences = open(filename, 'r').read()
 34 |     sequences = sequences.replace(r'%%%%%%%%%%%', '\n')
 35 |     sequences = [i.strip() for i in sequences.split('\n')]
 36 |     lines = sequences[sequences.index('CSR:') + 2:]
 37 |     lines = [line.strip() for line in lines if line.strip()]
 38 |     lines = [drawing.encode_ascii(line)[:drawing.MAX_CHAR_LEN] for line in lines]
 39 |     return lines
 40 | 
 41 | 
 42 | def collect_data():
 43 |     fnames = []
 44 |     for dirpath, dirnames, filenames in os.walk('data/raw/ascii/'):
 45 |         if dirnames:
 46 |             continue
 47 |         for filename in filenames:
 48 |             if filename.startswith('.'):
 49 |                 continue
 50 |             fnames.append(os.path.join(dirpath, filename))
 51 | 
 52 |     # low quality samples (selected by collecting samples to
 53 |     # which the trained model assigned very low likelihood)
 54 |     blacklist = set(np.load('data/blacklist.npy'))
 55 | 
 56 |     stroke_fnames, transcriptions, writer_ids = [], [], []
 57 |     for i, fname in enumerate(fnames):
 58 |         print(i, fname)
 59 |         if fname == 'data/raw/ascii/z01/z01-000/z01-000z.txt':
 60 |             continue
 61 | 
 62 |         head, tail = os.path.split(fname)
 63 |         last_letter = os.path.splitext(fname)[0][-1]
 64 |         last_letter = last_letter if last_letter.isalpha() else ''
 65 | 
 66 |         line_stroke_dir = head.replace('ascii', 'lineStrokes')
 67 |         line_stroke_fname_prefix = os.path.split(head)[-1] + last_letter + '-'
 68 | 
 69 |         if not os.path.isdir(line_stroke_dir):
 70 |             continue
 71 |         line_stroke_fnames = sorted([f for f in os.listdir(line_stroke_dir)
 72 |                                      if f.startswith(line_stroke_fname_prefix)])
 73 |         if not line_stroke_fnames:
 74 |             continue
 75 | 
 76 |         original_dir = head.replace('ascii', 'original')
 77 |         original_xml = os.path.join(original_dir, 'strokes' + last_letter + '.xml')
 78 |         tree = ElementTree.parse(original_xml)
 79 |         root = tree.getroot()
 80 | 
 81 |         general = root.find('General')
 82 |         if general is not None:
 83 |             writer_id = int(general[0].attrib.get('writerID', '0'))
 84 |         else:
 85 |             writer_id = int('0')
 86 | 
 87 |         ascii_sequences = get_ascii_sequences(fname)
 88 |         assert len(ascii_sequences) == len(line_stroke_fnames)
 89 | 
 90 |         for ascii_seq, line_stroke_fname in zip(ascii_sequences, line_stroke_fnames):
 91 |             if line_stroke_fname in blacklist:
 92 |                 continue
 93 | 
 94 |             stroke_fnames.append(os.path.join(line_stroke_dir, line_stroke_fname))
 95 |             transcriptions.append(ascii_seq)
 96 |             writer_ids.append(writer_id)
 97 | 
 98 |     return stroke_fnames, transcriptions, writer_ids
 99 | 
100 | 
101 | if __name__ == '__main__':
102 |     print('traversing data directory...')
103 |     stroke_fnames, transcriptions, writer_ids = collect_data()
104 | 
105 |     print('dumping to numpy arrays...')
106 |     x = np.zeros([len(stroke_fnames), drawing.MAX_STROKE_LEN, 3], dtype=np.float32)
107 |     x_len = np.zeros([len(stroke_fnames)], dtype=np.int16)
108 |     c = np.zeros([len(stroke_fnames), drawing.MAX_CHAR_LEN], dtype=np.int8)
109 |     c_len = np.zeros([len(stroke_fnames)], dtype=np.int8)
110 |     w_id = np.zeros([len(stroke_fnames)], dtype=np.int16)
111 |     valid_mask = np.zeros([len(stroke_fnames)], dtype=np.bool)
112 | 
113 |     for i, (stroke_fname, c_i, w_id_i) in enumerate(zip(stroke_fnames, transcriptions, writer_ids)):
114 |         if i % 200 == 0:
115 |             print(i, '\t', '/', len(stroke_fnames))
116 |         x_i = get_stroke_sequence(stroke_fname)
117 |         valid_mask[i] = ~np.any(np.linalg.norm(x_i[:, :2], axis=1) > 60)
118 | 
119 |         x[i, :len(x_i), :] = x_i
120 |         x_len[i] = len(x_i)
121 | 
122 |         c[i, :len(c_i)] = c_i
123 |         c_len[i] = len(c_i)
124 | 
125 |         w_id[i] = w_id_i
126 | 
127 |     if not os.path.isdir('data/processed'):
128 |         os.makedirs('data/processed')
129 | 
130 |     np.save('data/processed/x.npy', x[valid_mask])
131 |     np.save('data/processed/x_len.npy', x_len[valid_mask])
132 |     np.save('data/processed/c.npy', c[valid_mask])
133 |     np.save('data/processed/c_len.npy', c_len[valid_mask])
134 |     np.save('data/processed/w_id.npy', w_id[valid_mask])
135 | 


--------------------------------------------------------------------------------
/handwriting-synthesis/readme.md:
--------------------------------------------------------------------------------
 1 | ![](img/banner.svg)
 2 | # Handwriting Synthesis
 3 | Implementation of the handwriting synthesis experiments in the paper <a href="https://arxiv.org/abs/1308.0850">Generating Sequences with Recurrent Neural Networks</a> by Alex Graves.  The implementation closely follows the original paper, with a few slight deviations, and the generated samples are of similar quality to those presented in the paper.
 4 | 
 5 | Web demo is available <a href="https://seanvasquez.com/handwriting-generation/">here</a>.
 6 | 
 7 | ## Usage
 8 | ```python
 9 | lines = [
10 |     "Now this is a story all about how",
11 |     "My life got flipped turned upside down",
12 |     "And I'd like to take a minute, just sit right there",
13 |     "I'll tell you how I became the prince of a town called Bel-Air",
14 | ]
15 | biases = [.75 for i in lines]
16 | styles = [9 for i in lines]
17 | stroke_colors = ['red', 'green', 'black', 'blue']
18 | stroke_widths = [1, 2, 1, 2]
19 | 
20 | hand = Hand()
21 | hand.write(
22 |     filename='img/usage_demo.svg',
23 |     lines=lines,
24 |     biases=biases,
25 |     styles=styles,
26 |     stroke_colors=stroke_colors,
27 |     stroke_widths=stroke_widths
28 | )
29 | ```
30 | ![](img/usage_demo.svg)
31 | 
32 | Currently, the `Hand` class must be imported from `demo.py`.  If someone would like to package this project to make it more usable, please [contribute](#contribute).
33 | 
34 | A pretrained model is included, but if you'd like to train your own, read <a href='https://github.com/sjvasquez/handwriting-synthesis/tree/master/data/raw'>these instructions</a>.
35 | 
36 | ## Demonstrations
37 | Below are a few hundred samples from the model, including some samples demonstrating the effect of priming and biasing the model.  Loosely speaking, biasing controls the neatness of the samples and priming controls the style of the samples. The code for these demonstrations can be found in `demo.py`.
38 | 
39 | ### Demo #1:
40 | The following samples were generated with a fixed style and fixed bias.
41 | 
42 | **Smash Mouth – All Star (<a href="https://www.azlyrics.com/lyrics/smashmouth/allstar.html">lyrics</a>)**
43 | ![](img/all_star.svg)
44 | 
45 | ### Demo #2
46 | The following samples were generated with varying style and fixed bias.  Each verse is generated in a different style.
47 | 
48 | **Vanessa Carlton – A Thousand Miles (<a href="https://www.azlyrics.com/lyrics/vanessacarlton/athousandmiles.html">lyrics</a>)**
49 | ![](img/downtown.svg)
50 | 
51 | ### Demo #3
52 | The following samples were generated with a fixed style and varying bias.  Each verse has a lower bias than the previous, with the last verse being unbiased.
53 | 
54 | **Leonard Cohen – Hallelujah (<a href="https://www.youtube.com/watch?v=dQw4w9WgXcQ">lyrics</a>)**
55 | ![](img/give_up.svg)
56 | 
57 | ## Contribute
58 | This project was intended to serve as a reference implementation for a research paper, but since the results are of decent quality, it may be worthwile to make the project more broadly usable.  I plan to continue focusing on the machine learning side of things.  That said, I'd welcome contributors who can:
59 | 
60 |   - Package this, and otherwise make it look more like a usable software project and less like research code.
61 |   - Add support for more sophisticated drawing, animations, or anything else in this direction.  Currently, the project only creates some simple svg files.
62 | 


--------------------------------------------------------------------------------
/handwriting-synthesis/requirements.txt:
--------------------------------------------------------------------------------
1 | matplotlib>=2.1.0
2 | pandas>= 0.22.0
3 | scikit-learn>=0.19.1
4 | scipy>=1.0.0
5 | svgwrite>=1.1.12
6 | tensorflow==2.11.1
7 | 


--------------------------------------------------------------------------------
/handwriting-synthesis/rnn_cell.py:
--------------------------------------------------------------------------------
  1 | from collections import namedtuple
  2 | 
  3 | import tensorflow as tf
  4 | import tensorflow.contrib.distributions as tfd
  5 | import numpy as np
  6 | 
  7 | from tf_utils import dense_layer, shape
  8 | 
  9 | 
 10 | LSTMAttentionCellState = namedtuple(
 11 |     'LSTMAttentionCellState',
 12 |     ['h1', 'c1', 'h2', 'c2', 'h3', 'c3', 'alpha', 'beta', 'kappa', 'w', 'phi']
 13 | )
 14 | 
 15 | 
 16 | class LSTMAttentionCell(tf.nn.rnn_cell.RNNCell):
 17 | 
 18 |     def __init__(
 19 |         self,
 20 |         lstm_size,
 21 |         num_attn_mixture_components,
 22 |         attention_values,
 23 |         attention_values_lengths,
 24 |         num_output_mixture_components,
 25 |         bias,
 26 |         reuse=None,
 27 |     ):
 28 |         self.reuse = reuse
 29 |         self.lstm_size = lstm_size
 30 |         self.num_attn_mixture_components = num_attn_mixture_components
 31 |         self.attention_values = attention_values
 32 |         self.attention_values_lengths = attention_values_lengths
 33 |         self.window_size = shape(self.attention_values, 2)
 34 |         self.char_len = tf.shape(attention_values)[1]
 35 |         self.batch_size = tf.shape(attention_values)[0]
 36 |         self.num_output_mixture_components = num_output_mixture_components
 37 |         self.output_units = 6*self.num_output_mixture_components + 1
 38 |         self.bias = bias
 39 | 
 40 |     @property
 41 |     def state_size(self):
 42 |         return LSTMAttentionCellState(
 43 |             self.lstm_size,
 44 |             self.lstm_size,
 45 |             self.lstm_size,
 46 |             self.lstm_size,
 47 |             self.lstm_size,
 48 |             self.lstm_size,
 49 |             self.num_attn_mixture_components,
 50 |             self.num_attn_mixture_components,
 51 |             self.num_attn_mixture_components,
 52 |             self.window_size,
 53 |             self.char_len,
 54 |         )
 55 | 
 56 |     @property
 57 |     def output_size(self):
 58 |         return self.lstm_size
 59 | 
 60 |     def zero_state(self, batch_size, dtype):
 61 |         return LSTMAttentionCellState(
 62 |             tf.zeros([batch_size, self.lstm_size]),
 63 |             tf.zeros([batch_size, self.lstm_size]),
 64 |             tf.zeros([batch_size, self.lstm_size]),
 65 |             tf.zeros([batch_size, self.lstm_size]),
 66 |             tf.zeros([batch_size, self.lstm_size]),
 67 |             tf.zeros([batch_size, self.lstm_size]),
 68 |             tf.zeros([batch_size, self.num_attn_mixture_components]),
 69 |             tf.zeros([batch_size, self.num_attn_mixture_components]),
 70 |             tf.zeros([batch_size, self.num_attn_mixture_components]),
 71 |             tf.zeros([batch_size, self.window_size]),
 72 |             tf.zeros([batch_size, self.char_len]),
 73 |         )
 74 | 
 75 |     def __call__(self, inputs, state, scope=None):
 76 |         with tf.variable_scope(scope or type(self).__name__, reuse=tf.AUTO_REUSE):
 77 | 
 78 |             # lstm 1
 79 |             s1_in = tf.concat([state.w, inputs], axis=1)
 80 |             cell1 = tf.contrib.rnn.LSTMCell(self.lstm_size)
 81 |             s1_out, s1_state = cell1(s1_in, state=(state.c1, state.h1))
 82 | 
 83 |             # attention
 84 |             attention_inputs = tf.concat([state.w, inputs, s1_out], axis=1)
 85 |             attention_params = dense_layer(attention_inputs, 3*self.num_attn_mixture_components, scope='attention')
 86 |             alpha, beta, kappa = tf.split(tf.nn.softplus(attention_params), 3, axis=1)
 87 |             kappa = state.kappa + kappa / 25.0
 88 |             beta = tf.clip_by_value(beta, .01, np.inf)
 89 | 
 90 |             kappa_flat, alpha_flat, beta_flat = kappa, alpha, beta
 91 |             kappa, alpha, beta = tf.expand_dims(kappa, 2), tf.expand_dims(alpha, 2), tf.expand_dims(beta, 2)
 92 | 
 93 |             enum = tf.reshape(tf.range(self.char_len), (1, 1, self.char_len))
 94 |             u = tf.cast(tf.tile(enum, (self.batch_size, self.num_attn_mixture_components, 1)), tf.float32)
 95 |             phi_flat = tf.reduce_sum(alpha*tf.exp(-tf.square(kappa - u) / beta), axis=1)
 96 | 
 97 |             phi = tf.expand_dims(phi_flat, 2)
 98 |             sequence_mask = tf.cast(tf.sequence_mask(self.attention_values_lengths, maxlen=self.char_len), tf.float32)
 99 |             sequence_mask = tf.expand_dims(sequence_mask, 2)
100 |             w = tf.reduce_sum(phi*self.attention_values*sequence_mask, axis=1)
101 | 
102 |             # lstm 2
103 |             s2_in = tf.concat([inputs, s1_out, w], axis=1)
104 |             cell2 = tf.contrib.rnn.LSTMCell(self.lstm_size)
105 |             s2_out, s2_state = cell2(s2_in, state=(state.c2, state.h2))
106 | 
107 |             # lstm 3
108 |             s3_in = tf.concat([inputs, s2_out, w], axis=1)
109 |             cell3 = tf.contrib.rnn.LSTMCell(self.lstm_size)
110 |             s3_out, s3_state = cell3(s3_in, state=(state.c3, state.h3))
111 | 
112 |             new_state = LSTMAttentionCellState(
113 |                 s1_state.h,
114 |                 s1_state.c,
115 |                 s2_state.h,
116 |                 s2_state.c,
117 |                 s3_state.h,
118 |                 s3_state.c,
119 |                 alpha_flat,
120 |                 beta_flat,
121 |                 kappa_flat,
122 |                 w,
123 |                 phi_flat,
124 |             )
125 | 
126 |             return s3_out, new_state
127 | 
128 |     def output_function(self, state):
129 |         params = dense_layer(state.h3, self.output_units, scope='gmm', reuse=tf.AUTO_REUSE)
130 |         pis, mus, sigmas, rhos, es = self._parse_parameters(params)
131 |         mu1, mu2 = tf.split(mus, 2, axis=1)
132 |         mus = tf.stack([mu1, mu2], axis=2)
133 |         sigma1, sigma2 = tf.split(sigmas, 2, axis=1)
134 | 
135 |         covar_matrix = [tf.square(sigma1), rhos*sigma1*sigma2,
136 |                         rhos*sigma1*sigma2, tf.square(sigma2)]
137 |         covar_matrix = tf.stack(covar_matrix, axis=2)
138 |         covar_matrix = tf.reshape(covar_matrix, (self.batch_size, self.num_output_mixture_components, 2, 2))
139 | 
140 |         mvn = tfd.MultivariateNormalFullCovariance(loc=mus, covariance_matrix=covar_matrix)
141 |         b = tfd.Bernoulli(probs=es)
142 |         c = tfd.Categorical(probs=pis)
143 | 
144 |         sampled_e = b.sample()
145 |         sampled_coords = mvn.sample()
146 |         sampled_idx = c.sample()
147 | 
148 |         idx = tf.stack([tf.range(self.batch_size), sampled_idx], axis=1)
149 |         coords = tf.gather_nd(sampled_coords, idx)
150 |         return tf.concat([coords, tf.cast(sampled_e, tf.float32)], axis=1)
151 | 
152 |     def termination_condition(self, state):
153 |         char_idx = tf.cast(tf.argmax(state.phi, axis=1), tf.int32)
154 |         final_char = char_idx >= self.attention_values_lengths - 1
155 |         past_final_char = char_idx >= self.attention_values_lengths
156 |         output = self.output_function(state)
157 |         es = tf.cast(output[:, 2], tf.int32)
158 |         is_eos = tf.equal(es, np.ones_like(es))
159 |         return tf.logical_or(tf.logical_and(final_char, is_eos), past_final_char)
160 | 
161 |     def _parse_parameters(self, gmm_params, eps=1e-8, sigma_eps=1e-4):
162 |         pis, sigmas, rhos, mus, es = tf.split(
163 |             gmm_params,
164 |             [
165 |                 1*self.num_output_mixture_components,
166 |                 2*self.num_output_mixture_components,
167 |                 1*self.num_output_mixture_components,
168 |                 2*self.num_output_mixture_components,
169 |                 1
170 |             ],
171 |             axis=-1
172 |         )
173 |         pis = pis*(1 + tf.expand_dims(self.bias, 1))
174 |         sigmas = sigmas - tf.expand_dims(self.bias, 1)
175 | 
176 |         pis = tf.nn.softmax(pis, axis=-1)
177 |         pis = tf.where(pis < .01, tf.zeros_like(pis), pis)
178 |         sigmas = tf.clip_by_value(tf.exp(sigmas), sigma_eps, np.inf)
179 |         rhos = tf.clip_by_value(tf.tanh(rhos), eps - 1.0, 1.0 - eps)
180 |         es = tf.clip_by_value(tf.nn.sigmoid(es), eps, 1.0 - eps)
181 |         es = tf.where(es < .01, tf.zeros_like(es), es)
182 | 
183 |         return pis, mus, sigmas, rhos, es
184 | 


--------------------------------------------------------------------------------
/handwriting-synthesis/styles/style-0-chars.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/handwriting-synthesis/styles/style-0-chars.npy


--------------------------------------------------------------------------------
/handwriting-synthesis/styles/style-0-strokes.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/handwriting-synthesis/styles/style-0-strokes.npy


--------------------------------------------------------------------------------
/handwriting-synthesis/styles/style-1-chars.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/handwriting-synthesis/styles/style-1-chars.npy


--------------------------------------------------------------------------------
/handwriting-synthesis/styles/style-1-strokes.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/handwriting-synthesis/styles/style-1-strokes.npy


--------------------------------------------------------------------------------
/handwriting-synthesis/styles/style-1.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/handwriting-synthesis/styles/style-1.npy


--------------------------------------------------------------------------------
/handwriting-synthesis/styles/style-10-chars.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/handwriting-synthesis/styles/style-10-chars.npy


--------------------------------------------------------------------------------
/handwriting-synthesis/styles/style-10-strokes.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/handwriting-synthesis/styles/style-10-strokes.npy


--------------------------------------------------------------------------------
/handwriting-synthesis/styles/style-11-chars.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/handwriting-synthesis/styles/style-11-chars.npy


--------------------------------------------------------------------------------
/handwriting-synthesis/styles/style-11-strokes.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/handwriting-synthesis/styles/style-11-strokes.npy


--------------------------------------------------------------------------------
/handwriting-synthesis/styles/style-12-chars.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/handwriting-synthesis/styles/style-12-chars.npy


--------------------------------------------------------------------------------
/handwriting-synthesis/styles/style-12-strokes.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/handwriting-synthesis/styles/style-12-strokes.npy


--------------------------------------------------------------------------------
/handwriting-synthesis/styles/style-2-chars.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/handwriting-synthesis/styles/style-2-chars.npy


--------------------------------------------------------------------------------
/handwriting-synthesis/styles/style-2-strokes.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/handwriting-synthesis/styles/style-2-strokes.npy


--------------------------------------------------------------------------------
/handwriting-synthesis/styles/style-2.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/handwriting-synthesis/styles/style-2.npy


--------------------------------------------------------------------------------
/handwriting-synthesis/styles/style-3-chars.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/handwriting-synthesis/styles/style-3-chars.npy


--------------------------------------------------------------------------------
/handwriting-synthesis/styles/style-3-strokes.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/handwriting-synthesis/styles/style-3-strokes.npy


--------------------------------------------------------------------------------
/handwriting-synthesis/styles/style-4-chars.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/handwriting-synthesis/styles/style-4-chars.npy


--------------------------------------------------------------------------------
/handwriting-synthesis/styles/style-4-strokes.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/handwriting-synthesis/styles/style-4-strokes.npy


--------------------------------------------------------------------------------
/handwriting-synthesis/styles/style-5-chars.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/handwriting-synthesis/styles/style-5-chars.npy


--------------------------------------------------------------------------------
/handwriting-synthesis/styles/style-5-strokes.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/handwriting-synthesis/styles/style-5-strokes.npy


--------------------------------------------------------------------------------
/handwriting-synthesis/styles/style-6-chars.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/handwriting-synthesis/styles/style-6-chars.npy


--------------------------------------------------------------------------------
/handwriting-synthesis/styles/style-6-strokes.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/handwriting-synthesis/styles/style-6-strokes.npy


--------------------------------------------------------------------------------
/handwriting-synthesis/styles/style-7-chars.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/handwriting-synthesis/styles/style-7-chars.npy


--------------------------------------------------------------------------------
/handwriting-synthesis/styles/style-7-strokes.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/handwriting-synthesis/styles/style-7-strokes.npy


--------------------------------------------------------------------------------
/handwriting-synthesis/styles/style-8-chars.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/handwriting-synthesis/styles/style-8-chars.npy


--------------------------------------------------------------------------------
/handwriting-synthesis/styles/style-8-strokes.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/handwriting-synthesis/styles/style-8-strokes.npy


--------------------------------------------------------------------------------
/handwriting-synthesis/styles/style-9-chars.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/handwriting-synthesis/styles/style-9-chars.npy


--------------------------------------------------------------------------------
/handwriting-synthesis/styles/style-9-strokes.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/handwriting-synthesis/styles/style-9-strokes.npy


--------------------------------------------------------------------------------
/handwriting-synthesis/test_example.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import os
  3 | 
  4 | import numpy as np
  5 | 
  6 | import drawing
  7 | from data_frame import DataFrame
  8 | from drawing import alphabet
  9 | 
 10 | import svgwrite
 11 | 
 12 | 
 13 | class DataReader(object):
 14 | 
 15 |     def __init__(self, data_dir):
 16 |         data_cols = ['x', 'x_len', 'c', 'c_len']
 17 |         data = [np.load(os.path.join(data_dir, '{}.npy'.format(i))) for i in data_cols]
 18 | 
 19 |         self.test_df = DataFrame(columns=data_cols, data=data)
 20 |         self.train_df, self.val_df = self.test_df.train_test_split(train_size=0.95, random_state=2018)
 21 | 
 22 |         print('train size', len(self.train_df))
 23 |         print('val size', len(self.val_df))
 24 |         print('test size', len(self.test_df))
 25 | 
 26 |     def train_batch_generator(self, batch_size):
 27 |         return self.batch_generator(
 28 |             batch_size=batch_size,
 29 |             df=self.train_df,
 30 |             shuffle=True,
 31 |             num_epochs=10000,
 32 |             mode='train'
 33 |         )
 34 | 
 35 |     def val_batch_generator(self, batch_size):
 36 |         return self.batch_generator(
 37 |             batch_size=batch_size,
 38 |             df=self.val_df,
 39 |             shuffle=True,
 40 |             num_epochs=10000,
 41 |             mode='val'
 42 |         )
 43 | 
 44 |     def test_batch_generator(self, batch_size):
 45 |         return self.batch_generator(
 46 |             batch_size=batch_size,
 47 |             df=self.test_df,
 48 |             shuffle=False,
 49 |             num_epochs=1,
 50 |             mode='test'
 51 |         )
 52 | 
 53 |     def batch_generator(self, batch_size, df, shuffle=True, num_epochs=10000, mode='train'):
 54 |         gen = df.batch_generator(
 55 |             batch_size=batch_size,
 56 |             shuffle=shuffle,
 57 |             num_epochs=num_epochs,
 58 |             allow_smaller_final_batch=(mode == 'test')
 59 |         )
 60 |         for batch in gen:
 61 |             batch['x_len'] = batch['x_len'] - 1
 62 |             max_x_len = np.max(batch['x_len'])
 63 |             max_c_len = np.max(batch['c_len'])
 64 |             batch['y'] = batch['x'][:, 1:max_x_len + 1, :]
 65 |             batch['x'] = batch['x'][:, :max_x_len, :]
 66 |             batch['c'] = batch['c'][:, :max_c_len]
 67 |             yield batch
 68 | 
 69 | 
 70 | def _draw(strokes, lines, filename, stroke_colors=None, stroke_widths=None):
 71 |     stroke_colors = stroke_colors or ['black']*len(lines)
 72 |     stroke_widths = stroke_widths or [2]*len(lines)
 73 | 
 74 |     line_height = 60
 75 |     view_width = 1000
 76 |     view_height = line_height*(len(strokes) + 1)
 77 | 
 78 |     dwg = svgwrite.Drawing(filename=filename)
 79 |     dwg.viewbox(width=view_width, height=view_height)
 80 |     dwg.add(dwg.rect(insert=(0, 0), size=(view_width, view_height), fill='white'))
 81 | 
 82 |     initial_coord = np.array([0, -(3*line_height / 4)])
 83 |     for offsets, line, color, width in zip(strokes, lines, stroke_colors, stroke_widths):
 84 | 
 85 |         if not line:
 86 |             initial_coord[1] -= line_height
 87 |             continue
 88 | 
 89 |         offsets[:, :2] *= 1.5
 90 |         strokes = drawing.offsets_to_coords(offsets)
 91 |         strokes = drawing.denoise(strokes)
 92 |         strokes[:, :2] = drawing.align(strokes[:, :2])
 93 | 
 94 |         strokes[:, 1] *= -1
 95 |         strokes[:, :2] -= strokes[:, :2].min() + initial_coord
 96 |         strokes[:, 0] += (view_width - strokes[:, 0].max()) / 2
 97 | 
 98 |         prev_eos = 1.0
 99 |         p = "M{},{} ".format(0, 0)
100 |         for x, y, eos in zip(*strokes.T):
101 |             p += '{}{},{} '.format('M' if prev_eos == 1.0 else 'L', x, y)
102 |             prev_eos = eos
103 |         path = svgwrite.path.Path(p)
104 |         path = path.stroke(color=color, width=width, linecap='round').fill("none")
105 |         dwg.add(path)
106 | 
107 |         initial_coord[1] -= line_height
108 | 
109 |     dwg.save()
110 | 
111 | 
112 | def num_to_string(c, c_len):
113 |     indices = c[:c_len - 1]
114 |     str_out = ''.join([alphabet[x] for x in indices])
115 |     return str_out
116 | 
117 | 
118 | if __name__ == '__main__':
119 |     dr = DataReader(data_dir='data/processed/')
120 |     # import ipdb; ipdb.set_trace()
121 | 
122 |     stroke_colors = ['red', 'green', 'black', 'blue']
123 |     stroke_widths = [1, 2, 1, 2]
124 | 
125 |     lines = [
126 |         num_to_string(dr.test_df['c'][0], dr.test_df['c_len'][0]),
127 |         num_to_string(dr.test_df['c'][1], dr.test_df['c_len'][1]),
128 |     ]
129 |     strokes = [
130 |         dr.test_df['x'][0][:dr.test_df['x_len'][0]],
131 |         dr.test_df['x'][1][:dr.test_df['x_len'][1]],
132 |     ]
133 | 
134 |     import ipdb; ipdb.set_trace()
135 | 
136 |     _draw(strokes, lines, "test.svg", stroke_colors=stroke_colors, stroke_widths=stroke_widths)
137 | 


--------------------------------------------------------------------------------
/handwriting-synthesis/tf_utils.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | 
 4 | def dense_layer(inputs, output_units, bias=True, activation=None, batch_norm=None,
 5 |                 dropout=None, scope='dense-layer', reuse=False):
 6 |     """
 7 |     Applies a dense layer to a 2D tensor of shape [batch_size, input_units]
 8 |     to produce a tensor of shape [batch_size, output_units].
 9 |     Args:
10 |         inputs: Tensor of shape [batch size, input_units].
11 |         output_units: Number of output units.
12 |         activation: activation function.
13 |         dropout: dropout keep prob.
14 |     Returns:
15 |         Tensor of shape [batch size, output_units].
16 |     """
17 |     with tf.variable_scope(scope, reuse=reuse):
18 |         W = tf.get_variable(
19 |             name='weights',
20 |             initializer=tf.contrib.layers.variance_scaling_initializer(),
21 |             shape=[shape(inputs, -1), output_units]
22 |         )
23 |         z = tf.matmul(inputs, W)
24 |         if bias:
25 |             b = tf.get_variable(
26 |                 name='biases',
27 |                 initializer=tf.constant_initializer(),
28 |                 shape=[output_units]
29 |             )
30 |             z = z + b
31 | 
32 |         if batch_norm is not None:
33 |             z = tf.layers.batch_normalization(z, training=batch_norm, reuse=reuse)
34 | 
35 |         z = activation(z) if activation else z
36 |         z = tf.nn.dropout(z, dropout) if dropout is not None else z
37 |         return z
38 | 
39 | 
40 | def time_distributed_dense_layer(
41 |         inputs, output_units, bias=True, activation=None, batch_norm=None,
42 |         dropout=None, scope='time-distributed-dense-layer', reuse=False):
43 |     """
44 |     Applies a shared dense layer to each timestep of a tensor of shape
45 |     [batch_size, max_seq_len, input_units] to produce a tensor of shape
46 |     [batch_size, max_seq_len, output_units].
47 | 
48 |     Args:
49 |         inputs: Tensor of shape [batch size, max sequence length, ...].
50 |         output_units: Number of output units.
51 |         activation: activation function.
52 |         dropout: dropout keep prob.
53 | 
54 |     Returns:
55 |         Tensor of shape [batch size, max sequence length, output_units].
56 |     """
57 |     with tf.variable_scope(scope, reuse=reuse):
58 |         W = tf.get_variable(
59 |             name='weights',
60 |             initializer=tf.contrib.layers.variance_scaling_initializer(),
61 |             shape=[shape(inputs, -1), output_units]
62 |         )
63 |         z = tf.einsum('ijk,kl->ijl', inputs, W)
64 |         if bias:
65 |             b = tf.get_variable(
66 |                 name='biases',
67 |                 initializer=tf.constant_initializer(),
68 |                 shape=[output_units]
69 |             )
70 |             z = z + b
71 | 
72 |         if batch_norm is not None:
73 |             z = tf.layers.batch_normalization(z, training=batch_norm, reuse=reuse)
74 | 
75 |         z = activation(z) if activation else z
76 |         z = tf.nn.dropout(z, dropout) if dropout is not None else z
77 |         return z
78 | 
79 | 
80 | def shape(tensor, dim=None):
81 |     """Get tensor shape/dimension as list/int"""
82 |     if dim is None:
83 |         return tensor.shape.as_list()
84 |     else:
85 |         return tensor.shape.as_list()[dim]
86 | 
87 | 
88 | def rank(tensor):
89 |     """Get tensor rank as python list"""
90 |     return len(tensor.shape.as_list())
91 | 


--------------------------------------------------------------------------------
/handwriting-synthesis/upgrade_tf2.sh:
--------------------------------------------------------------------------------
1 | # NOTE: tf_upgrade_v2 does not successfully convert all these files to tf2
2 | tf_upgrade_v2 --infile rnn_cell.py --outfile rnn_cell.py
3 | tf_upgrade_v2 --infile rnn.py --outfile rnn.py
4 | tf_upgrade_v2 --infile rnn_ops.py --outfile rnn_ops.py
5 | tf_upgrade_v2 --infile tf_utils.py --outfile tf_utils.py
6 | 


--------------------------------------------------------------------------------
/handwriting_generator/saved.tgz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/handwriting_generator/saved.tgz


--------------------------------------------------------------------------------
/minGPT/.gitignore:
--------------------------------------------------------------------------------
1 | .ipynb_checkpoints/
2 | __pycache__/
3 | 


--------------------------------------------------------------------------------
/minGPT/LICENSE:
--------------------------------------------------------------------------------
1 | The MIT License (MIT) Copyright (c) 2020 Andrej Karpathy
2 | 
3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
4 | 
5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
6 | 
7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
8 | 


--------------------------------------------------------------------------------
/minGPT/README.md:
--------------------------------------------------------------------------------
  1 | 
  2 | # minGPT
  3 | 
  4 | ![mingpt](mingpt.jpg)
  5 | 
  6 | A PyTorch re-implementation of [GPT](https://github.com/openai/gpt-3) training. minGPT tries to be small, clean, interpretable and educational, as most of the currently available ones are a bit sprawling. GPT is not a complicated model and this implementation is appropriately about 300 lines of code, including boilerplate and a totally unnecessary custom causal self-attention module. Anyway, all that's going on is that a sequence of indices goes into a sequence of transformer blocks, and a probability distribution of the next index comes out. The rest of the complexity is just being clever with batching (both across examples and over sequence length) so that training is efficient.
  7 | 
  8 | The core minGPT "library" (hah) is two files: `mingpt/model.py` contains the actual Transformer model definition and `mingpt/trainer.py` is (GPT-independent) PyTorch boilerplate that trains the model. The attached Jupyter notebooks then show how the "library" (hah) can be used to train sequence models:
  9 | 
 10 | - `play_math.ipynb` trains a GPT focused on addition (inspired by the addition section in the GPT-3 paper)
 11 | - `play_char.ipynb` trains a GPT to be a character-level language model on arbitrary text, similar to my older char-rnn but with a transformer instead of an RNN
 12 | - `play_image.ipynb` trains a GPT on (small) images (CIFAR-10), showing that we can model images just as text, as both can be reduced to just a sequence of integers
 13 | - `play_words.ipynb` a BPE version that does not yet exist
 14 | 
 15 | With a bpe encoder, distributed training and maybe fp16 this implementation may be able to reproduce GPT-1/GPT-2 results, though I haven't tried $$$. GPT-3 is likely out of reach as my understanding is that it does not fit into GPU memory and requires a more careful model-parallel treatment.
 16 | 
 17 | ### Example usage
 18 | 
 19 | This code is simple enough to just hack inline, not "used", but current API looks something like:
 20 | 
 21 | ```python
 22 | 
 23 | # you're on your own to define a class that returns individual examples as PyTorch LongTensors
 24 | from torch.utils.data import Dataset
 25 | train_dataset = MyDataset(...)
 26 | test_dataset = MyDataset(...)
 27 | 
 28 | # construct a GPT model
 29 | from mingpt.model import GPT, GPTConfig
 30 | mconf = GPTConfig(vocab_size, block_size, n_layer=12, n_head=12, n_embd=768) # a GPT-1
 31 | model = GPT(mconf)
 32 | 
 33 | # construct a trainer
 34 | from mingpt.trainer import Trainer, TrainerConfig
 35 | tconf = TrainerConfig(max_epochs=10, batch_size=256)
 36 | trainer = Trainer(model, train_dataset, test_dataset, tconf)
 37 | trainer.train()
 38 | # (... enjoy the show for a while... )
 39 | 
 40 | # sample from the model (the [None, ...] and [0] are to push/pop a needed dummy batch dimension)
 41 | from mingpt.utils import sample
 42 | x = torch.tensor([1, 2, 3], dtype=torch.long)[None, ...] # context conditioning
 43 | y = sample(model, x, steps=30, temperature=1.0, sample=True, top_k=5)[0]
 44 | print(y) # our model filled in the integer sequence with 30 additional likely integers
 45 | ```
 46 | 
 47 | ### References
 48 | 
 49 | Code:
 50 | 
 51 | - [openai/gpt-2](https://github.com/openai/gpt-2) has the model but not the training code, and in TensorFlow
 52 | - [openai/image-gpt](https://github.com/openai/image-gpt) has some more modern gpt-3 like modification in its code, good reference as well
 53 | - huggingface/transformers has a [language-modeling example](https://github.com/huggingface/transformers/tree/master/examples/language-modeling). It is full-featured but as a result also somewhat challenging to trace. E.g. some large functions have as much as 90% unused code behind various branching statements that is unused in the default setting of simple language modeling.
 54 | 
 55 | Papers + some implementation notes:
 56 | 
 57 | #### Improving Language Understanding by Generative Pre-Training (GPT-1)
 58 | 
 59 | - Our model largely follows the original transformer work
 60 | - We trained a 12-layer decoder-only transformer with masked self-attention heads (768 dimensional states and 12 attention heads). For the position-wise feed-forward networks, we used 3072 dimensional inner states.
 61 | - Adam max learning rate of 2.5e-4. (later GPT-3 for this model size uses 6e-4)
 62 | - LR decay: increased linearly from zero over the first 2000 updates and annealed to 0 using a cosine schedule
 63 | - We train for 100 epochs on minibatches of 64 randomly sampled, contiguous sequences of 512 tokens.
 64 | - Since layernorm is used extensively throughout the model, a simple weight initialization of N(0, 0.02) was sufficient
 65 | - bytepair encoding (BPE) vocabulary with 40,000 merges
 66 | - residual, embedding, and attention dropouts with a rate of 0.1 for regularization.
 67 | - modified version of L2 regularization proposed in (37), with w = 0.01 on all non bias or gain weights
 68 | - For the activation function, we used the Gaussian Error Linear Unit (GELU).
 69 | - We used learned position embeddings instead of the sinusoidal version proposed in the original work
 70 | - For finetuning: We add dropout to the classifier with a rate of 0.1. learning rate of 6.25e-5 and a batchsize of 32. 3 epochs. We use a linear learning rate decay schedule with warmup over 0.2% of training. λ was set to 0.5.
 71 | - GPT-1 model is 12 layers and d_model 768, ~117M params
 72 | 
 73 | #### Language Models are Unsupervised Multitask Learners (GPT-2)
 74 | 
 75 | - LayerNorm was moved to the input of each sub-block, similar to a pre-activation residual network
 76 | - an additional layer normalization was added after the final self-attention block.
 77 | - modified initialization which accounts for the accumulation on the residual path with model depth is used. We scale the weights of residual layers at initialization by a factor of 1/√N where N is the number of residual layers. (weird because in their released code i can only find a simple use of the old 0.02... in their release of image-gpt I found it used for c_proj, and even then only for attn, not for mlp. huh. https://github.com/openai/image-gpt/blob/master/src/model.py)
 78 | - the vocabulary is expanded to 50,257
 79 | - increase the context size from 512 to 1024 tokens
 80 | - larger batchsize of 512 is used
 81 | - GPT-2 used 48 layers and d_model 1600 (vs. original 12 layers and d_model 768). ~1.542B params
 82 | 
 83 | #### Language Models are Few-Shot Learners (GPT-3)
 84 | 
 85 | - GPT-3: 96 layers, 96 heads, with d_model of 12,288 (175B parameters).
 86 | - GPT-1-like: 12 layers, 12 heads, d_model 768 (125M)
 87 | - We use the same model and architecture as GPT-2, including the modified initialization, pre-normalization, and reversible tokenization described therein
 88 | - we use alternating dense and locally banded sparse attention patterns in the layers of the transformer, similar to the Sparse Transformer
 89 | - we always have the feedforward layer four times the size of the bottleneck layer, dff = 4 ∗ dmodel
 90 | - all models use a context window of nctx = 2048 tokens.
 91 | - Adam with β1 = 0.9, β2 = 0.95, and eps = 10−8
 92 | - All models use weight decay of 0.1 to provide a small amount of regularization. (NOTE: GPT-1 used 0.01 I believe, see above)
 93 | - clip the global norm of the gradient at 1.0
 94 | - Linear LR warmup over the first 375 million tokens. Then use cosine decay for learning rate down to 10% of its value, over 260 billion tokens.
 95 | - gradually increase the batch size linearly from a small value (32k tokens) to the full value over the first 4-12 billion tokens of training, depending on the model size.
 96 | - full 2048-sized time context window is always used, with a special END OF DOCUMENT token delimiter
 97 | 
 98 | #### Generative Pretraining from Pixels (Image GPT)
 99 | 
100 | - When working with images, we pick the identity permutation πi = i for 1 ≤ i ≤ n, also known as raster order.
101 | - we create our own 9-bit color palette by clustering (R, G, B) pixel values using k-means with k = 512.
102 | - Our largest model, iGPT-XL, contains L = 60 layers and uses an embedding size of d = 3072 for a total of 6.8B parameters.
103 | - Our next largest model, iGPT-L, is essentially identical to GPT-2 with L = 48 layers, but contains a slightly smaller embedding size of d = 1536 (vs 1600) for a total of 1.4M parameters.
104 | - We use the same model code as GPT-2, except that we initialize weights in the layerdependent fashion as in Sparse Transformer (Child et al., 2019) and zero-initialize all projections producing logits.
105 | - We also train iGPT-M, a 455M parameter model with L = 36 and d = 1024
106 | - iGPT-S, a 76M parameter model with L = 24 and d = 512 (okay, and how many heads? looks like the Github code claims 8)
107 | - When pre-training iGPT-XL, we use a batch size of 64 and train for 2M iterations, and for all other models we use a batch size of 128 and train for 1M iterations.
108 | - Adam with β1 = 0.9 and β2 = 0.95
109 | - The learning rate is warmed up for one epoch, and then decays to 0
110 | - We did not use weight decay because applying a small weight decay of 0.01 did not change representation quality.
111 | - iGPT-S lr 0.003
112 | - No dropout is used.
113 | 
114 | ### License
115 | 
116 | MIT
117 | 


--------------------------------------------------------------------------------
/minGPT/mingpt.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/minGPT/mingpt.jpg


--------------------------------------------------------------------------------
/minGPT/mingpt/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/minGPT/mingpt/__init__.py


--------------------------------------------------------------------------------
/minGPT/mingpt/trainer.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Simple training loop; Boilerplate that could apply to any arbitrary neural network,
  3 | so nothing in this file really has anything to do with GPT specifically.
  4 | """
  5 | 
  6 | import math
  7 | import logging
  8 | 
  9 | from tqdm import tqdm
 10 | import numpy as np
 11 | 
 12 | import torch
 13 | import torch.optim as optim
 14 | from torch.optim.lr_scheduler import LambdaLR
 15 | from torch.utils.data.dataloader import DataLoader
 16 | 
 17 | logger = logging.getLogger(__name__)
 18 | 
 19 | class TrainerConfig:
 20 |     # optimization parameters
 21 |     max_epochs = 10
 22 |     batch_size = 64
 23 |     learning_rate = 3e-4
 24 |     betas = (0.9, 0.95)
 25 |     grad_norm_clip = 1.0
 26 |     weight_decay = 0.1 # only applied on matmul weights
 27 |     # learning rate decay params: linear warmup followed by cosine decay to 10% of original
 28 |     lr_decay = False
 29 |     warmup_tokens = 375e6 # these two numbers come from the GPT-3 paper, but may not be good defaults elsewhere
 30 |     final_tokens = 260e9 # (at what point we reach 10% of original LR)
 31 |     # checkpoint settings
 32 |     ckpt_path = None
 33 |     num_workers = 0 # for DataLoader
 34 | 
 35 |     def __init__(self, **kwargs):
 36 |         for k,v in kwargs.items():
 37 |             setattr(self, k, v)
 38 | 
 39 | class Trainer:
 40 | 
 41 |     def __init__(self, model, train_dataset, test_dataset, config):
 42 |         self.model = model
 43 |         self.train_dataset = train_dataset
 44 |         self.test_dataset = test_dataset
 45 |         self.config = config
 46 | 
 47 |         # take over whatever gpus are on the system
 48 |         self.device = 'cpu'
 49 |         if torch.cuda.is_available():
 50 |             self.device = torch.cuda.current_device()
 51 |             self.model = torch.nn.DataParallel(self.model).to(self.device)
 52 | 
 53 |     def save_checkpoint(self):
 54 |         # DataParallel wrappers keep raw model object in .module attribute
 55 |         raw_model = self.model.module if hasattr(self.model, "module") else self.model
 56 |         logger.info("saving %s", self.config.ckpt_path)
 57 |         torch.save(raw_model.state_dict(), self.config.ckpt_path)
 58 | 
 59 |     def train(self):
 60 |         model, config = self.model, self.config
 61 |         raw_model = model.module if hasattr(self.model, "module") else model
 62 |         optimizer = raw_model.configure_optimizers(config)
 63 | 
 64 |         def run_epoch(split):
 65 |             is_train = split == 'train'
 66 |             model.train(is_train)
 67 |             data = self.train_dataset if is_train else self.test_dataset
 68 |             loader = DataLoader(data, shuffle=True, pin_memory=True,
 69 |                                 batch_size=config.batch_size,
 70 |                                 num_workers=config.num_workers)
 71 | 
 72 |             losses = []
 73 |             pbar = tqdm(enumerate(loader), total=len(loader)) if is_train else enumerate(loader)
 74 |             for it, (x, y) in pbar:
 75 | 
 76 |                 # place data on the correct device
 77 |                 x = x.to(self.device)
 78 |                 y = y.to(self.device)
 79 | 
 80 |                 # forward the model
 81 |                 with torch.set_grad_enabled(is_train):
 82 |                     logits, loss = model(x, y)
 83 |                     loss = loss.mean() # collapse all losses if they are scattered on multiple gpus
 84 |                     losses.append(loss.item())
 85 | 
 86 |                 if is_train:
 87 | 
 88 |                     # backprop and update the parameters
 89 |                     model.zero_grad()
 90 |                     loss.backward()
 91 |                     torch.nn.utils.clip_grad_norm_(model.parameters(), config.grad_norm_clip)
 92 |                     optimizer.step()
 93 | 
 94 |                     # decay the learning rate based on our progress
 95 |                     if config.lr_decay:
 96 |                         self.tokens += (y >= 0).sum() # number of tokens processed this step (i.e. label is not -100)
 97 |                         if self.tokens < config.warmup_tokens:
 98 |                             # linear warmup
 99 |                             lr_mult = float(self.tokens) / float(max(1, config.warmup_tokens))
100 |                         else:
101 |                             # cosine learning rate decay
102 |                             progress = float(self.tokens - config.warmup_tokens) / float(max(1, config.final_tokens - config.warmup_tokens))
103 |                             lr_mult = max(0.1, 0.5 * (1.0 + math.cos(math.pi * progress)))
104 |                         lr = config.learning_rate * lr_mult
105 |                         for param_group in optimizer.param_groups:
106 |                             param_group['lr'] = lr
107 |                     else:
108 |                         lr = config.learning_rate
109 | 
110 |                     # report progress
111 |                     pbar.set_description(f"epoch {epoch+1} iter {it}: train loss {loss.item():.5f}. lr {lr:e}")
112 | 
113 |             if not is_train:
114 |                 test_loss = float(np.mean(losses))
115 |                 logger.info("test loss: %f", test_loss)
116 |                 return test_loss
117 | 
118 |         best_loss = float('inf')
119 |         self.tokens = 0 # counter used for learning rate decay
120 |         for epoch in range(config.max_epochs):
121 | 
122 |             run_epoch('train')
123 |             if self.test_dataset is not None:
124 |                 test_loss = run_epoch('test')
125 | 
126 |             # supports early stopping based on the test loss, or just save always if no test set is provided
127 |             good_model = self.test_dataset is None or test_loss < best_loss
128 |             if self.config.ckpt_path is not None and good_model:
129 |                 best_loss = test_loss
130 |                 self.save_checkpoint()
131 | 


--------------------------------------------------------------------------------
/minGPT/mingpt/utils.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | import numpy as np
 3 | import torch
 4 | import torch.nn as nn
 5 | from torch.nn import functional as F
 6 | 
 7 | def set_seed(seed):
 8 |     random.seed(seed)
 9 |     np.random.seed(seed)
10 |     torch.manual_seed(seed)
11 |     torch.cuda.manual_seed_all(seed)
12 | 
13 | def top_k_logits(logits, k):
14 |     v, ix = torch.topk(logits, k)
15 |     out = logits.clone()
16 |     out[out < v[:, [-1]]] = -float('Inf')
17 |     return out
18 | 
19 | @torch.no_grad()
20 | def sample(model, x, steps, temperature=1.0, sample=False, top_k=None):
21 |     """
22 |     take a conditioning sequence of indices in x (of shape (b,t)) and predict the next token in
23 |     the sequence, feeding the predictions back into the model each time. Clearly the sampling
24 |     has quadratic complexity unlike an RNN that is only linear, and has a finite context window
25 |     of block_size, unlike an RNN that has an infinite context window.
26 |     """
27 |     block_size = model.get_block_size()
28 |     model.eval()
29 |     for k in range(steps):
30 |         x_cond = x if x.size(1) <= block_size else x[:, -block_size:] # crop context if needed
31 |         logits, _ = model(x_cond)
32 |         # pluck the logits at the final step and scale by temperature
33 |         logits = logits[:, -1, :] / temperature
34 |         # optionally crop probabilities to only the top k options
35 |         if top_k is not None:
36 |             logits = top_k_logits(logits, top_k)
37 |         # apply softmax to convert to probabilities
38 |         probs = F.softmax(logits, dim=-1)
39 |         # sample from the distribution or take the most likely
40 |         if sample:
41 |             ix = torch.multinomial(probs, num_samples=1)
42 |         else:
43 |             _, ix = torch.topk(probs, k=1, dim=-1)
44 |         # append to the sequence and continue
45 |         x = torch.cat((x, ix), dim=1)
46 | 
47 |     return x
48 | 


--------------------------------------------------------------------------------
/sound/preprocess/README.md:
--------------------------------------------------------------------------------
1 | # Preprocess
2 | ```
3 | python3 to_16000_wav.py INPUT_DIR OUTPUT_DIR
4 | python3 trim.py INPUT_DIR OUTPUT_DIR
5 | ```
6 | 


--------------------------------------------------------------------------------
/sound/preprocess/mp3_to_wav.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | 
 4 | import os
 5 | import glob
 6 | import sys
 7 | 
 8 | def main():
 9 |     if len(sys.argv) < 3:
10 |         print('Usage: python3 mp3_to_wav.py INPUT_DIR OUTPUT_DIR')
11 |         return
12 | 
13 |     INPUT_DIR = sys.argv[1]
14 |     OUTPUT_DIR = sys.argv[2]
15 |     if not os.path.exists(OUTPUT_DIR):
16 |         os.makedirs(OUTPUT_DIR)
17 | 
18 |     for mp3_path in glob.glob(os.path.join(INPUT_DIR, '*.mp3')):
19 |         name = os.path.split(mp3_path)[1][:-len('.mp3')]
20 |         output_path = os.path.join(OUTPUT_DIR, name + '.wav')
21 |         os.system('ffmpeg -i "{}" -ar 16000 "{}"'.format(mp3_path, output_path))
22 |         print(mp3_path)
23 |         print(output_path)
24 | 
25 | if __name__ == '__main__':
26 |     main()
27 | 


--------------------------------------------------------------------------------
/sound/preprocess/to_16000_wav.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | 
 4 | import os
 5 | import glob
 6 | import sys
 7 | 
 8 | def main():
 9 |     if len(sys.argv) < 3:
10 |         print('Usage: python3 mp3_to_wav.py INPUT_DIR OUTPUT_DIR')
11 |         return
12 | 
13 |     INPUT_DIR = sys.argv[1]
14 |     OUTPUT_DIR = sys.argv[2]
15 |     if not os.path.exists(OUTPUT_DIR):
16 |         os.makedirs(OUTPUT_DIR)
17 | 
18 |     for wav_path in glob.glob(os.path.join(INPUT_DIR, '*.wav')):
19 |         name = os.path.split(wav_path)[1][:-len('.wav')]
20 |         output_path = os.path.join(OUTPUT_DIR, name + '.wav')
21 |         os.system('ffmpeg -i "{}" -ar 16000 "{}"'.format(wav_path, output_path))
22 |         print(wav_path)
23 |         print(output_path)
24 | 
25 | if __name__ == '__main__':
26 |     main()
27 | 


--------------------------------------------------------------------------------
/sound/preprocess/trim.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Convert to .wav
 3 | ffmpeg -i input.m4a output.wav
 4 | 
 5 | Split .wav files with ffmpeg:
 6 | NAME=name.wav
 7 | ffmpeg -i $NAME.wav -f segment -segment_time 2 -c copy one_second/$NAME%03d.wav
 8 | 
 9 | python3 to_16000_wav.py INPUT_DIR OUTPUT_DIR
10 | python3 trim.py INPUT_DIR OUTPUT_DIR
11 | 
12 | https://petewarden.com/2017/07/17/a-quick-hack-to-align-single-word-audio-recordings/
13 | 
14 | NOTE: Run make from the extract_loudest_section repo before running this script
15 | """
16 | import glob
17 | import os
18 | import sys
19 | 
20 | def main():
21 |     if len(sys.argv) < 3:
22 |         print('Usage: python3 trim.py INPUT_DIR OUTPUT_DIR')
23 |         return
24 | 
25 |     if not os.path.exists(sys.argv[2]):
26 |         os.makedirs(sys.argv[2])
27 | 
28 |     file_names = glob.glob(os.path.join(sys.argv[1], '*.wav'))
29 |     for filename in file_names:
30 |         print(filename)
31 |         os.system('/tmp/extract_loudest_section/gen/bin/extract_loudest_section "{}" "{}"'.format(filename, sys.argv[2]))
32 | 
33 | if __name__ == '__main__':
34 |     main()
35 | 


--------------------------------------------------------------------------------
/sound/project-keyword-spotter/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/sound/project-keyword-spotter/.DS_Store


--------------------------------------------------------------------------------
/sound/project-keyword-spotter/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # How to Contribute
 2 | 
 3 | We'd love to accept your patches and contributions to this project. There are
 4 | just a few small guidelines you need to follow.
 5 | 
 6 | ## Contributor License Agreement
 7 | 
 8 | Contributions to this project must be accompanied by a Contributor License
 9 | Agreement. You (or your employer) retain the copyright to your contribution;
10 | this simply gives us permission to use and redistribute your contributions as
11 | part of the project. Head over to <https://cla.developers.google.com/> to see
12 | your current agreements on file or to sign a new one.
13 | 
14 | You generally only need to submit a CLA once, so if you've already submitted one
15 | (even if it was for a different project), you probably don't need to do it
16 | again.
17 | 
18 | ## Code reviews
19 | 
20 | All submissions, including submissions by project members, require review. We
21 | use GitHub pull requests for this purpose. Consult
22 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more
23 | information on using pull requests.
24 | 
25 | ## Community Guidelines
26 | 
27 | This project follows [Google's Open Source Community
28 | Guidelines](https://opensource.google.com/conduct/).
29 | 


--------------------------------------------------------------------------------
/sound/project-keyword-spotter/Icon:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/sound/project-keyword-spotter/Icon


--------------------------------------------------------------------------------
/sound/project-keyword-spotter/audio_recorder.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2019 Google LLC
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     https://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | """Interface to asynchronously capture continuous audio from PyAudio.
 16 | 
 17 | 
 18 | This module requires pyaudio. See here for installation instructions:
 19 | http://people.csail.mit.edu/hubert/pyaudio/
 20 | 
 21 | This module provides one class, AudioRecorder, which buffers chunks of audio
 22 | from PyAudio.
 23 | """
 24 | 
 25 | from __future__ import absolute_import
 26 | from __future__ import division
 27 | from __future__ import print_function
 28 | 
 29 | import logging
 30 | 
 31 | import math
 32 | import time
 33 | 
 34 | import numpy as np
 35 | import pyaudio
 36 | import queue
 37 | 
 38 | logger = logging.getLogger(__name__)
 39 | 
 40 | 
 41 | class TimeoutError(Exception):
 42 |   """A timeout while waiting for pyaudio to buffer samples."""
 43 |   pass
 44 | 
 45 | 
 46 | class AudioRecorder(object):
 47 |   """Asynchronously record and buffer audio using pyaudio.
 48 | 
 49 |   This class wraps the pyaudio interface. It contains a queue.Queue object to
 50 |   hold chunks of raw audio, and a callback function _enqueue_audio() which
 51 |   places raw audio into this queue. This allows the pyaudio.Stream object to
 52 |   record asynchronously at low latency.
 53 | 
 54 |   The class acts as a context manager. When entering the context it creates a
 55 |   pyaudio.Stream object and starts recording; it stops recording on exit. The
 56 |   Stream saves all of its audio to the Queue as two-tuples of
 57 |   (timestamp, raw_audio). The raw_audio is available from the queue as a numpy
 58 |   array using the get_audio() function.
 59 | 
 60 |   This class uses the term "frame" in the same sense that PortAudio does, so
 61 |   "frame" means something different here than elsewhere in the daredevil stack.
 62 |   A frame in PortAudio is one audio sample across all channels, so one frame of
 63 |   16-bit stereo audio is four bytes of data as two 16-bit integers.
 64 |   """
 65 |   pyaudio_format = pyaudio.paInt16
 66 |   numpy_format = np.int16
 67 |   num_channels = 1
 68 | 
 69 |   # How many frames of audio PyAudio will fetch at once.
 70 |   # Higher numbers will increase the latancy.
 71 |   frames_per_chunk = 2**9
 72 | 
 73 |   # Limit queue to this number of audio chunks.
 74 |   max_queue_chunks = 1200
 75 | 
 76 |   # Timeout if we can't get a chunk from the queue for timeout_factor times the
 77 |   # chunk duration.
 78 |   timeout_factor = 8
 79 | 
 80 |   def __init__(self, raw_audio_sample_rate_hz=48000,
 81 |                      downsample_factor=3,
 82 |                      device_index=None):
 83 |     self._downsample_factor = downsample_factor
 84 |     self._raw_audio_sample_rate_hz = raw_audio_sample_rate_hz
 85 |     self.audio_sample_rate_hz = self._raw_audio_sample_rate_hz // self._downsample_factor
 86 |     self._raw_audio_queue = queue.Queue(self.max_queue_chunks)
 87 |     self._audio = pyaudio.PyAudio()
 88 |     self._print_input_devices()
 89 |     self._device_index = device_index
 90 | 
 91 |   def __enter__(self):
 92 |     if self._device_index is None:
 93 |       self._device_index = self._audio.get_default_input_device_info()["index"]
 94 |     kwargs = {
 95 |         "input_device_index": self._device_index
 96 |     }
 97 |     device_info = self._audio.get_device_info_by_host_api_device_index(
 98 |         0, self._device_index)
 99 |     if device_info.get("maxInputChannels") <= 0:
100 |       raise ValueError("Audio device has insufficient input channels.")
101 |     print("Using audio device '%s' for index %d" % (
102 |         device_info["name"], device_info["index"]))
103 |     self._stream = self._audio.open(
104 |         format=self.pyaudio_format,
105 |         channels=self.num_channels,
106 |         rate=self._raw_audio_sample_rate_hz,
107 |         input=True,
108 |         output=False,
109 |         frames_per_buffer=self.frames_per_chunk,
110 |         start=True,
111 |         stream_callback=self._enqueue_raw_audio,
112 |         **kwargs)
113 |     logger.info("Started audio stream.")
114 |     return self
115 | 
116 |   def __exit__(self, exception_type, exception_value, traceback):
117 |     self._stream.stop_stream()
118 |     self._stream.close()
119 |     logger.info("Stopped and closed audio stream.")
120 | 
121 |   def __del__(self):
122 |     self._audio.terminate()
123 |     logger.info("Terminated PyAudio/PortAudio.")
124 | 
125 |   @property
126 |   def is_active(self):
127 |     return self._stream.is_active()
128 | 
129 |   @property
130 |   def bytes_per_sample(self):
131 |     return pyaudio.get_sample_size(self.pyaudio_format)
132 | 
133 |   @property
134 |   def _chunk_duration_seconds(self):
135 |     return self.frames_per_chunk / self._raw_audio_sample_rate_hz
136 | 
137 |   def _print_input_devices(self):
138 |     info = self._audio.get_host_api_info_by_index(0)
139 |     print("\nInput microphone devices:")
140 |     for i in range(0, info.get("deviceCount")):
141 |       device_info = self._audio.get_device_info_by_host_api_device_index(0, i)
142 |       if device_info.get("maxInputChannels") <= 0: continue
143 |       print("  ID: ", i, " - ", device_info.get("name"))
144 | 
145 |   def _enqueue_raw_audio(self, in_data, *_):  # unused args to match expected
146 |     try:
147 |       self._raw_audio_queue.put((in_data, time.time()), block=False)
148 |       return None, pyaudio.paContinue
149 |     except queue.Full:
150 |       error_message = "Raw audio buffer full."
151 |       logger.critical(error_message)
152 |       raise TimeoutError(error_message)
153 | 
154 |   def _get_chunk(self, timeout=None):
155 |     raw_data, timestamp = self._raw_audio_queue.get(timeout=timeout)
156 |     array_data = np.fromstring(raw_data, self.numpy_format).reshape(
157 |         -1, self.num_channels)
158 |     return array_data, timestamp
159 | 
160 |   def get_audio_device_info(self):
161 |     if self._device_index is None:
162 |       return self._audio.get_default_input_device_info()
163 |     else:
164 |       return self._audio.get_device_info_by_index(self._device_index)
165 | 
166 |   def sample_duration_seconds(self, num_samples):
167 |     return num_samples / self.audio_sample_rate_hz / self.num_channels
168 | 
169 |   def clear_queue(self):
170 |     logger.debug("Purging %d chunks from queue.", self._raw_audio_queue.qsize())
171 |     while not self._raw_audio_queue.empty():
172 |       self._raw_audio_queue.get()
173 | 
174 |   def get_audio(self, num_audio_frames):
175 |     """Grab at least num_audio_frames frames of audio.
176 | 
177 |     Record at least num_audio_frames of audio and transform it into a
178 |     numpy array. The term "frame" is in the sense used by PortAudio; see the
179 |     note in the class docstring for details.
180 | 
181 |     Audio returned will be the earliest audio in the queue; it could be from
182 |     before this function was called.
183 | 
184 |     Args:
185 |       num_audio_frames: minimum number of samples of audio to grab.
186 | 
187 |     Returns:
188 |       A tuple of (audio, first_timestamp, last_timestamp).
189 |     """
190 |     num_audio_chunks = int(math.ceil(num_audio_frames *
191 |                     self._downsample_factor / self.frames_per_chunk))
192 |     logger.debug("Capturing %d chunks to get at least %d frames.",
193 |                  num_audio_chunks, num_audio_frames)
194 |     if num_audio_chunks < 1:
195 |       num_audio_chunks = 1
196 |     try:
197 |       timeout = self.timeout_factor * self._chunk_duration_seconds
198 |       chunks, timestamps = zip(
199 |           *[self._get_chunk(timeout=timeout) for _ in range(num_audio_chunks)])
200 |     except queue.Empty:
201 |       error_message = "Audio capture timed out after %.1f seconds." % timeout
202 |       logger.critical(error_message)
203 |       raise TimeoutError(error_message)
204 | 
205 |     assert len(chunks) == num_audio_chunks
206 |     logger.debug("Got %d chunks. Chunk 0 has shape %s and dtype %s.",
207 |                  len(chunks), chunks[0].shape, chunks[0].dtype)
208 |     if self._raw_audio_queue.qsize() > (0.8 * self.max_queue_chunks):
209 |       logger.warning("%d chunks remain in the queue.",
210 |                      self._raw_audio_queue.qsize())
211 |     else:
212 |       logger.debug("%d chunks remain in the queue.",
213 |                    self._raw_audio_queue.qsize())
214 | 
215 |     audio = np.concatenate(chunks)
216 |     if self._downsample_factor != 1:
217 |       audio = audio[::self._downsample_factor]
218 |     logging.debug("Audio array has shape %s and dtype %s.", audio.shape,
219 |                   audio.dtype)
220 |     return audio * 0.5, timestamps[0], timestamps[-1]
221 | 


--------------------------------------------------------------------------------
/sound/project-keyword-spotter/config/Icon:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/sound/project-keyword-spotter/config/Icon


--------------------------------------------------------------------------------
/sound/project-keyword-spotter/config/commands_v2.txt:
--------------------------------------------------------------------------------
 1 | volume_up,up,
 2 | volume_down,down,
 3 | next_song,shift+n,
 4 | next_video,shift+n,
 5 | next_game,shift+n,
 6 | last_song,shift+p,
 7 | last_video,shift+p,
 8 | last_game,shift+p,
 9 | random_song,r,
10 | random_video,r,
11 | pause_song, ,
12 | pause_video, , 
13 | pause_game, ,
14 | stop_song, ,
15 | stop_video, ,
16 | start_song, ,
17 | start_video, ,
18 | previous_song,shift+p,
19 | previous_video,shift+p,
20 | move_backwards,left,
21 | move_forwards,right,
22 | go_backwards,left,
23 | go_forwards,right,0.8
24 | position_zero,0,
25 | position_one,1,
26 | position_two,2,
27 | position_three,3,
28 | position_four,4,
29 | position_five,5,
30 | position_six,6,
31 | position_seven,7,
32 | position_eight,8,
33 | position_nine,9,
34 | mute,m,
35 | 


--------------------------------------------------------------------------------
/sound/project-keyword-spotter/config/commands_v2_snake.txt:
--------------------------------------------------------------------------------
 1 | start_application,go,
 2 | start_game,go,
 3 | start_program,go,
 4 | start_task,go,
 5 | begin_window,go,
 6 | begin_application,go,
 7 | begin_game,go,
 8 | begin_program,go,
 9 | begin_task,go,
10 | launch_window,go,
11 | launch_application,go,
12 | launch_game,go,
13 | launch_program,go,
14 | launch_task,go,
15 | close_window,stop,
16 | close_application,stop,
17 | close_game,stop,
18 | close_program,stop,
19 | close_task,stop,
20 | stop_window,stop,
21 | stop_application,stop,
22 | stop_game,stop,
23 | stop_program,stop,
24 | stop_task,stop,
25 | exit_window,stop,
26 | exit_application,stop,
27 | exit_game,stop,
28 | exit_program,stop,
29 | exit_task,stop,
30 | kill_window,stop,
31 | kill_application,stop,
32 | kill_game,stop,
33 | kill_program,stop,
34 | kill_task,stop,
35 | kill_tab,stop,
36 | engage,go,
37 | switch_on,go,
38 | switch_off,stop,
39 | move_up,up,
40 | move_down,down,
41 | move_left,left,
42 | move_right,right,
43 | turn_up,up,
44 | turn_down,down,
45 | turn_left,left,
46 | turn_right,right,
47 | go_up,up,
48 | go_down,down,
49 | go_left,left,
50 | go_right,right,
51 | 


--------------------------------------------------------------------------------
/sound/project-keyword-spotter/config/labels_gc2.raw.txt:
--------------------------------------------------------------------------------
  1 | what_can_i_say
  2 | what_can_you_do
  3 | yes
  4 | no
  5 | start_window
  6 | start_application
  7 | start_game
  8 | start_program
  9 | start_task
 10 | start_tab
 11 | begin_window
 12 | begin_application
 13 | begin_game
 14 | begin_program
 15 | begin_task
 16 | begin_tab
 17 | launch_window
 18 | launch_application
 19 | launch_game
 20 | launch_program
 21 | launch_task
 22 | launch_tab
 23 | open_window
 24 | open_application
 25 | open_game
 26 | open_program
 27 | open_task
 28 | open_tab
 29 | close_window
 30 | close_application
 31 | close_game
 32 | close_program
 33 | close_task
 34 | close_tab
 35 | stop_window
 36 | stop_application
 37 | stop_game
 38 | stop_program
 39 | stop_task
 40 | stop_tab
 41 | terminate_window
 42 | terminate_application
 43 | terminate_game
 44 | terminate_program
 45 | terminate_task
 46 | terminate_tab
 47 | exit_window
 48 | exit_application
 49 | exit_game
 50 | exit_program
 51 | exit_task
 52 | exit_tab
 53 | kill_window
 54 | kill_application
 55 | kill_game
 56 | kill_program
 57 | kill_task
 58 | kill_tab
 59 | engage
 60 | target
 61 | switch_on
 62 | switch_off
 63 | pick_up
 64 | volume_up
 65 | volume_down
 66 | remove
 67 | delete
 68 | mute
 69 | unmute
 70 | silence
 71 | reverse
 72 | next_song
 73 | next_video
 74 | next_game
 75 | last_song
 76 | last_video
 77 | last_game
 78 | random_song
 79 | random_video
 80 | random_game
 81 | pause_song
 82 | pause_video
 83 | pause_game
 84 | stop_song
 85 | stop_video
 86 | start_song
 87 | start_video
 88 | previous_song
 89 | previous_video
 90 | insert
 91 | select
 92 | unselect
 93 | move_up
 94 | move_down
 95 | move_left
 96 | move_right
 97 | move_backwards
 98 | move_forwards
 99 | turn_up
100 | turn_down
101 | turn_left
102 | turn_right
103 | turn_backwards
104 | turn_forwards
105 | go_up
106 | go_down
107 | go_left
108 | go_right
109 | go_backwards
110 | go_forwards
111 | channel_zero
112 | position_zero
113 | one_o_clock
114 | channel_one
115 | position_one
116 | two_o_clock
117 | channel_two
118 | position_two
119 | three_o_clock
120 | channel_three
121 | position_three
122 | four_o_clock
123 | channel_four
124 | position_four
125 | five_o_clock
126 | channel_five
127 | position_five
128 | six_o_clock
129 | channel_six
130 | position_six
131 | seven_o_clock
132 | channel_seven
133 | position_seven
134 | eight_o_clock
135 | channel_eight
136 | position_eight
137 | nine_o_clock
138 | channel_nine
139 | position_nine
140 | ten_o_clock
141 | channel_ten
142 | position_ten
143 | eleven_o_clock
144 | channel_eleven
145 | position_eleven
146 | twelve_o_clock
147 | channel_twelve
148 | position_twelve
149 | 


--------------------------------------------------------------------------------
/sound/project-keyword-spotter/config/labels_simple_audio.txt:
--------------------------------------------------------------------------------
1 | cough
2 | unknown
3 | 


--------------------------------------------------------------------------------
/sound/project-keyword-spotter/features.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2019 The TensorFlow Authors All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | 
 16 | """Feature computation for YAMNet."""
 17 | 
 18 | import numpy as np
 19 | import tensorflow as tf
 20 | 
 21 | 
 22 | def waveform_to_log_mel_spectrogram_patches(waveform, params):
 23 |   """Compute log mel spectrogram patches of a 1-D waveform."""
 24 |   with tf.name_scope('log_mel_features'):
 25 |     # waveform has shape [<# samples>]
 26 | 
 27 |     # Convert waveform into spectrogram using a Short-Time Fourier Transform.
 28 |     # Note that tf.signal.stft() uses a periodic Hann window by default.
 29 |     window_length_samples = int(
 30 |       round(params.sample_rate * params.stft_window_seconds))
 31 |     hop_length_samples = int(
 32 |       round(params.sample_rate * params.stft_hop_seconds))
 33 |     fft_length = 2 ** int(np.ceil(np.log(window_length_samples) / np.log(2.0)))
 34 |     num_spectrogram_bins = fft_length // 2 + 1
 35 |     if params.tflite_compatible:
 36 |       magnitude_spectrogram = _tflite_stft_magnitude(
 37 |           signal=waveform,
 38 |           frame_length=window_length_samples,
 39 |           frame_step=hop_length_samples,
 40 |           fft_length=fft_length)
 41 |     else:
 42 |       magnitude_spectrogram = tf.abs(tf.signal.stft(
 43 |           signals=waveform,
 44 |           frame_length=window_length_samples,
 45 |           frame_step=hop_length_samples,
 46 |           fft_length=fft_length))
 47 |     # magnitude_spectrogram has shape [<# STFT frames>, num_spectrogram_bins]
 48 | 
 49 |     # Convert spectrogram into log mel spectrogram.
 50 |     linear_to_mel_weight_matrix = tf.signal.linear_to_mel_weight_matrix(
 51 |         num_mel_bins=params.mel_bands,
 52 |         num_spectrogram_bins=num_spectrogram_bins,
 53 |         sample_rate=params.sample_rate,
 54 |         lower_edge_hertz=params.mel_min_hz,
 55 |         upper_edge_hertz=params.mel_max_hz)
 56 |     mel_spectrogram = tf.matmul(
 57 |       magnitude_spectrogram, linear_to_mel_weight_matrix)
 58 |     log_mel_spectrogram = tf.math.log(mel_spectrogram + params.log_offset)
 59 |     # log_mel_spectrogram has shape [<# STFT frames>, params.mel_bands]
 60 | 
 61 |     # Frame spectrogram (shape [<# STFT frames>, params.mel_bands]) into patches
 62 |     # (the input examples). Only complete frames are emitted, so if there is
 63 |     # less than params.patch_window_seconds of waveform then nothing is emitted
 64 |     # (to avoid this, zero-pad before processing).
 65 |     spectrogram_hop_length_samples = int(
 66 |       round(params.sample_rate * params.stft_hop_seconds))
 67 |     spectrogram_sample_rate = params.sample_rate / spectrogram_hop_length_samples
 68 |     patch_window_length_samples = int(
 69 |       round(spectrogram_sample_rate * params.patch_window_seconds))
 70 |     patch_hop_length_samples = int(
 71 |       round(spectrogram_sample_rate * params.patch_hop_seconds))
 72 |     features = tf.signal.frame(
 73 |         signal=log_mel_spectrogram,
 74 |         frame_length=patch_window_length_samples,
 75 |         frame_step=patch_hop_length_samples,
 76 |         axis=0)
 77 |     # features has shape [<# patches>, <# STFT frames in an patch>, params.mel_bands]
 78 | 
 79 |     return log_mel_spectrogram, features
 80 | 
 81 | 
 82 | def pad_waveform(waveform, params):
 83 |   """Pads waveform with silence if needed to get an integral number of patches."""
 84 |   # In order to produce one patch of log mel spectrogram input to YAMNet, we
 85 |   # need at least one patch window length of waveform plus enough extra samples
 86 |   # to complete the final STFT analysis window.
 87 |   min_waveform_seconds = (
 88 |       params.patch_window_seconds +
 89 |       params.stft_window_seconds - params.stft_hop_seconds)
 90 |   min_num_samples = tf.cast(min_waveform_seconds * params.sample_rate, tf.int32)
 91 |   num_samples = tf.shape(waveform)[0]
 92 |   num_padding_samples = tf.maximum(0, min_num_samples - num_samples)
 93 | 
 94 |   # In addition, there might be enough waveform for one or more additional
 95 |   # patches formed by hopping forward. If there are more samples than one patch,
 96 |   # round up to an integral number of hops.
 97 |   num_samples = tf.maximum(num_samples, min_num_samples)
 98 |   num_samples_after_first_patch = num_samples - min_num_samples
 99 |   hop_samples = tf.cast(params.patch_hop_seconds * params.sample_rate, tf.int32)
100 |   num_hops_after_first_patch = tf.cast(tf.math.ceil(
101 |           tf.cast(num_samples_after_first_patch, tf.float32) /
102 |           tf.cast(hop_samples, tf.float32)), tf.int32)
103 |   num_padding_samples += (
104 |       hop_samples * num_hops_after_first_patch - num_samples_after_first_patch)
105 | 
106 |   padded_waveform = tf.pad(waveform, [[0, num_padding_samples]],
107 |                            mode='CONSTANT', constant_values=0.0)
108 |   return padded_waveform
109 | 
110 | 
111 | def _tflite_stft_magnitude(signal, frame_length, frame_step, fft_length):
112 |   """TF-Lite-compatible version of tf.abs(tf.signal.stft())."""
113 |   def _hann_window():
114 |     return tf.reshape(
115 |       tf.constant(
116 |           (0.5 - 0.5 * np.cos(2 * np.pi * np.arange(0, 1.0, 1.0 / frame_length))
117 |           ).astype(np.float32),
118 |           name='hann_window'), [1, frame_length])
119 | 
120 |   def _dft_matrix(dft_length):
121 |     """Calculate the full DFT matrix in NumPy."""
122 |     # See https://en.wikipedia.org/wiki/DFT_matrix
123 |     omega = (0 + 1j) * 2.0 * np.pi / float(dft_length)
124 |     # Don't include 1/sqrt(N) scaling, tf.signal.rfft doesn't apply it.
125 |     return np.exp(omega * np.outer(np.arange(dft_length), np.arange(dft_length)))
126 | 
127 |   def _rdft(framed_signal, fft_length):
128 |     """Implement real-input Discrete Fourier Transform by matmul."""
129 |     # We are right-multiplying by the DFT matrix, and we are keeping only the
130 |     # first half ("positive frequencies").  So discard the second half of rows,
131 |     # but transpose the array for right-multiplication.  The DFT matrix is
132 |     # symmetric, so we could have done it more directly, but this reflects our
133 |     # intention better.
134 |     complex_dft_matrix_kept_values = _dft_matrix(fft_length)[:(
135 |         fft_length // 2 + 1), :].transpose()
136 |     real_dft_matrix = tf.constant(
137 |         np.real(complex_dft_matrix_kept_values).astype(np.float32),
138 |         name='real_dft_matrix')
139 |     imag_dft_matrix = tf.constant(
140 |         np.imag(complex_dft_matrix_kept_values).astype(np.float32),
141 |         name='imaginary_dft_matrix')
142 |     signal_frame_length = tf.shape(framed_signal)[-1]
143 |     half_pad = (fft_length - signal_frame_length) // 2
144 |     padded_frames = tf.pad(
145 |         framed_signal,
146 |         [
147 |             # Don't add any padding in the frame dimension.
148 |             [0, 0],
149 |             # Pad before and after the signal within each frame.
150 |             [half_pad, fft_length - signal_frame_length - half_pad]
151 |         ],
152 |         mode='CONSTANT',
153 |         constant_values=0.0)
154 |     real_stft = tf.matmul(padded_frames, real_dft_matrix)
155 |     imag_stft = tf.matmul(padded_frames, imag_dft_matrix)
156 |     return real_stft, imag_stft
157 | 
158 |   def _complex_abs(real, imag):
159 |     return tf.sqrt(tf.add(real * real, imag * imag))
160 | 
161 |   framed_signal = tf.signal.frame(signal, frame_length, frame_step)
162 |   windowed_signal = framed_signal * _hann_window()
163 |   real_stft, imag_stft = _rdft(windowed_signal, fft_length)
164 |   stft_magnitude = _complex_abs(real_stft, imag_stft)
165 |   return stft_magnitude
166 | 


--------------------------------------------------------------------------------
/sound/project-keyword-spotter/hearing_snake_metadata.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "version": 1.0,
 3 |     "best_scores": [
 4 |         169,
 5 |         34,
 6 |         21,
 7 |         0,
 8 |         0,
 9 |         0,
10 |         0,
11 |         0,
12 |         0,
13 |         0
14 |     ]
15 | }


--------------------------------------------------------------------------------
/sound/project-keyword-spotter/install_requirements.sh:
--------------------------------------------------------------------------------
 1 | # Copyright 2019 Google LLC
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #    https://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | #!/bin/bash
15 | sudo apt-get install -y python3 python3-pyaudio python3-numpy python3-scipy 
16 | 
17 | sudo apt-get install -y python3-dev libsdl-image1.2-dev libsdl-mixer1.2-dev libsdl-ttf2.0-dev   libsdl1.2-dev libsmpeg-dev python-numpy subversion libportmidi-dev ffmpeg libswscale-dev libavformat-dev libavcodec-dev  libfreetype6-dev
18 | 
19 | sudo apt-get install -y python3-pyaudio
20 | 
21 | pip3 install pygame
22 | 
23 | pip3 install PyUserInput
24 | 


--------------------------------------------------------------------------------
/sound/project-keyword-spotter/media/Icon:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/sound/project-keyword-spotter/media/Icon


--------------------------------------------------------------------------------
/sound/project-keyword-spotter/media/startscreen.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/sound/project-keyword-spotter/media/startscreen.png


--------------------------------------------------------------------------------
/sound/project-keyword-spotter/models/Icon:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/sound/project-keyword-spotter/models/Icon


--------------------------------------------------------------------------------
/sound/project-keyword-spotter/models/model-backup1.tflite:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/sound/project-keyword-spotter/models/model-backup1.tflite


--------------------------------------------------------------------------------
/sound/project-keyword-spotter/models/model.tflite:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/sound/project-keyword-spotter/models/model.tflite


--------------------------------------------------------------------------------
/sound/project-keyword-spotter/models/model_quantized_edgetpu.tflite:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/sound/project-keyword-spotter/models/model_quantized_edgetpu.tflite


--------------------------------------------------------------------------------
/sound/project-keyword-spotter/models/voice_commands_v0.7_edgetpu.tflite:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/sound/project-keyword-spotter/models/voice_commands_v0.7_edgetpu.tflite


--------------------------------------------------------------------------------
/sound/project-keyword-spotter/models/voice_commands_v0.8_edgetpu.tflite:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/sound/project-keyword-spotter/models/voice_commands_v0.8_edgetpu.tflite


--------------------------------------------------------------------------------
/sound/project-keyword-spotter/params.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2019 The TensorFlow Authors All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | 
16 | """Hyperparameters for YAMNet."""
17 | 
18 | from dataclasses import dataclass
19 | 
20 | # The following hyperparameters (except patch_hop_seconds) were used to train YAMNet,
21 | # so expect some variability in performance if you change these. The patch hop can
22 | # be changed arbitrarily: a smaller hop should give you more patches from the same
23 | # clip and possibly better performance at a larger computational cost.
24 | @dataclass(frozen=True)  # Instances of this class are immutable.
25 | class Params:
26 |   sample_rate: float = 16000.0
27 |   stft_window_seconds: float = 0.025
28 |   stft_hop_seconds: float = 0.010
29 |   mel_bands: int = 64
30 |   mel_min_hz: float = 125.0
31 |   mel_max_hz: float = 7500.0
32 |   log_offset: float = 0.001
33 |   patch_window_seconds: float = 0.96
34 |   patch_hop_seconds: float = 0.48
35 | 
36 |   @property
37 |   def patch_frames(self):
38 |     return int(round(self.patch_window_seconds / self.stft_hop_seconds))
39 | 
40 |   @property
41 |   def patch_bands(self):
42 |     return self.mel_bands
43 | 
44 |   num_classes: int = 521
45 |   conv_padding: str = 'same'
46 |   batchnorm_center: bool = True
47 |   batchnorm_scale: bool = False
48 |   batchnorm_epsilon: float = 1e-4
49 |   classifier_activation: str = 'sigmoid'
50 | 
51 |   tflite_compatible: bool = False
52 | 


--------------------------------------------------------------------------------
/sound/project-keyword-spotter/pygame_images/Icon:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/sound/project-keyword-spotter/pygame_images/Icon


--------------------------------------------------------------------------------
/sound/project-keyword-spotter/pygame_images/apple.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/sound/project-keyword-spotter/pygame_images/apple.png


--------------------------------------------------------------------------------
/sound/project-keyword-spotter/pygame_images/bg.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/sound/project-keyword-spotter/pygame_images/bg.jpg


--------------------------------------------------------------------------------
/sound/project-keyword-spotter/pygame_images/snake_head_with_ears.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/sound/project-keyword-spotter/pygame_images/snake_head_with_ears.png


--------------------------------------------------------------------------------
/sound/project-keyword-spotter/pygame_images/snake_tail.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wileyw/DeepLearningDemos/efa7e1bc2caabad488c8420b90bb617b9af5c424/sound/project-keyword-spotter/pygame_images/snake_tail.png


--------------------------------------------------------------------------------
/sound/project-keyword-spotter/run_model.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2019 Google LLC
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     https://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | """Runs a model on the edgetpu.
16 | 
17 | Useage:
18 | python3 run_model.py --model_file model_edgetpu.tflite
19 | """
20 | from __future__ import absolute_import
21 | from __future__ import division
22 | from __future__ import print_function
23 | 
24 | import argparse
25 | import sys
26 | import model
27 | import numpy as np
28 | 
29 | 
30 | def print_results(result, commands, labels, top=1):
31 |   """Example callback function that prints the passed detections."""
32 |   top_results = np.argsort(-result)[:top]
33 |   for p in range(top):
34 |     l = labels[top_results[p]]
35 |     if l in commands.keys():
36 |       threshold = commands[labels[top_results[p]]]["conf"]
37 |     else:
38 |       threshold = 0.5
39 |     if top_results[p] and result[top_results[p]] > threshold:
40 |       sys.stdout.write("\033[1m\033[93m*%15s*\033[0m (%.3f)" %
41 |                        (l, result[top_results[p]]))
42 |     elif result[top_results[p]] > 0.005:
43 |       sys.stdout.write(" %15s (%.3f)" % (l, result[top_results[p]]))
44 |   sys.stdout.write("\n")
45 | 
46 | 
47 | def main():
48 |   parser = argparse.ArgumentParser()
49 |   model.add_model_flags(parser)
50 |   args = parser.parse_args()
51 |   interpreter = model.make_interpreter(args.model_file)
52 |   interpreter.allocate_tensors()
53 |   mic = args.mic if args.mic is None else int(args.mic)
54 |   model.classify_audio(mic, interpreter,
55 |                        labels_file="config/labels_simple_audio.txt",
56 |                        result_callback=print_results,
57 |                        sample_rate_hz=int(args.sample_rate_hz),
58 |                        num_frames_hop=int(args.num_frames_hop))
59 | 
60 | if __name__ == "__main__":
61 |   main()
62 | 


--------------------------------------------------------------------------------
/sound/project-keyword-spotter/run_model_yamnet.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2019 Google LLC
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     https://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | """Runs a model on the edgetpu.
16 | 
17 | Useage:
18 | python3 run_model.py --model_file model_edgetpu.tflite
19 | """
20 | from __future__ import absolute_import
21 | from __future__ import division
22 | from __future__ import print_function
23 | 
24 | import argparse
25 | import sys
26 | import model_yamnet
27 | import numpy as np
28 | 
29 | 
30 | def print_results(result, commands, labels, top=1):
31 |   """Example callback function that prints the passed detections."""
32 |   top_results = np.argsort(-result)[:top]
33 |   for p in range(top):
34 |     l = labels[top_results[p]]
35 |     if l in commands.keys():
36 |       threshold = commands[labels[top_results[p]]]["conf"]
37 |     else:
38 |       threshold = 0.5
39 |     if top_results[p] and result[top_results[p]] > threshold:
40 |       sys.stdout.write("\033[1m\033[93m*%15s*\033[0m (%.3f)" %
41 |                        (l, result[top_results[p]]))
42 |     elif result[top_results[p]] > 0.005:
43 |       sys.stdout.write(" %15s (%.3f)" % (l, result[top_results[p]]))
44 |   sys.stdout.write("\n")
45 | 
46 | 
47 | def main():
48 |   parser = argparse.ArgumentParser()
49 |   model_yamnet.add_model_flags(parser)
50 |   args = parser.parse_args()
51 |   interpreter = model_yamnet.make_interpreter(args.model_file)
52 |   interpreter.allocate_tensors()
53 |   mic = args.mic if args.mic is None else int(args.mic)
54 |   model_yamnet.classify_audio(mic, interpreter,
55 |                        labels_file="config/labels_simple_audio.txt",
56 |                        result_callback=print_results,
57 |                        sample_rate_hz=int(args.sample_rate_hz),
58 |                        num_frames_hop=int(args.num_frames_hop))
59 | 
60 | if __name__ == "__main__":
61 |   main()
62 | 


--------------------------------------------------------------------------------
/sound/project-keyword-spotter/run_snake.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Copyright 2019 Google LLC
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #    https://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | export DISPLAY="${DISPLAY:-:0}"
16 | python3 run_hearing_snake.py
17 | 


--------------------------------------------------------------------------------
/sound/project-keyword-spotter/run_yt_voice_control.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2019 Google LLC
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     https://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | """Controls a YouTube using voice commands.
16 | 
17 | 
18 | Usage:
19 | Requires YouTube to be running in a browser tab and focus to be on the
20 | YouTube player.
21 | 
22 | python3 run_yt_voice_control.py
23 | """
24 | from __future__ import absolute_import
25 | from __future__ import division
26 | from __future__ import print_function
27 | 
28 | import argparse
29 | import sys
30 | import model
31 | from pykeyboard import PyKeyboard
32 | 
33 | 
34 | class YoutubeControl(object):
35 |   """Maps voice command detections to youtube controls."""
36 | 
37 |   def __init__(self):
38 |     """Creates an instance of `YoutubeControl`."""
39 |     self._keyboard = PyKeyboard()
40 |     self._command_lookup = {
41 |         "left": self._keyboard.left_key,
42 |         "right": self._keyboard.right_key,
43 |         "up": self._keyboard.up_key,
44 |         "down": self._keyboard.down_key,
45 |         "shift": self._keyboard.shift_key
46 |     }
47 | 
48 |   def run_command(self, command):
49 |     """Parses and excecuted a command."""
50 |     if len(command) == 1:
51 |       self._keyboard.tap_key(command)
52 |     elif command in self._command_lookup.keys():
53 |       self._keyboard.tap_key(self._command_lookup[command])
54 |     elif "+" in command:
55 |       keys = command.split("+")
56 |       press_list = []
57 |       for key in keys:
58 |         if len(key) == 1:
59 |           press_list.append(key)
60 |         elif key in self._command_lookup.keys():
61 |           press_list.append(self._command_lookup[key])
62 |         else:
63 |           print("Can't parse: ", command)
64 |           return
65 |         self._keyboard.press_keys(press_list)
66 | 
67 | 
68 | def main():
69 |   parser = argparse.ArgumentParser()
70 |   model.add_model_flags(parser)
71 |   args = parser.parse_args()
72 |   interpreter = model.make_interpreter(args.model_file)
73 |   interpreter.allocate_tensors()
74 |   mic = args.mic if args.mic is None else int(args.mic)
75 |   yt_control = YoutubeControl()
76 |   sys.stdout.write("--------------------\n")
77 |   sys.stdout.write("This script will control Youtube.\n")
78 |   sys.stdout.write("Just ensure that focus is on the YouTube player.\n")
79 |   sys.stdout.write("--------------------\n")
80 | 
81 |   model.classify_audio(mic, interpreter,
82 |                        labels_file="config/labels_gc2.raw.txt",
83 |                        commands_file="config/commands_v2.txt",
84 |                        dectection_callback=yt_control.run_command,
85 |                        sample_rate_hz=int(args.sample_rate_hz),
86 |                        num_frames_hop=int(args.num_frames_hop))
87 | 
88 | 
89 | if __name__ == "__main__":
90 |   main()
91 | 


--------------------------------------------------------------------------------
/sound/project-keyword-spotter/run_yt_voice_control.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Copyright 2019 Google LLC
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #    https://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | export DISPLAY="${DISPLAY:-:0}"
16 | python3 run_yt_voice_control.py
17 | 


--------------------------------------------------------------------------------
/sound/project-keyword-spotter/yamnet.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2019 The TensorFlow Authors All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | 
 16 | """Core model definition of YAMNet."""
 17 | 
 18 | import csv
 19 | 
 20 | import numpy as np
 21 | import tensorflow as tf
 22 | from tensorflow.keras import Model, layers
 23 | 
 24 | import features as features_lib
 25 | 
 26 | 
 27 | def _batch_norm(name, params):
 28 |   def _bn_layer(layer_input):
 29 |     return layers.BatchNormalization(
 30 |       name=name,
 31 |       center=params.batchnorm_center,
 32 |       scale=params.batchnorm_scale,
 33 |       epsilon=params.batchnorm_epsilon)(layer_input)
 34 |   return _bn_layer
 35 | 
 36 | 
 37 | def _conv(name, kernel, stride, filters, params):
 38 |   def _conv_layer(layer_input):
 39 |     output = layers.Conv2D(name='{}/conv'.format(name),
 40 |                            filters=filters,
 41 |                            kernel_size=kernel,
 42 |                            strides=stride,
 43 |                            padding=params.conv_padding,
 44 |                            use_bias=False,
 45 |                            activation=None)(layer_input)
 46 |     output = _batch_norm('{}/conv/bn'.format(name), params)(output)
 47 |     output = layers.ReLU(name='{}/relu'.format(name))(output)
 48 |     return output
 49 |   return _conv_layer
 50 | 
 51 | 
 52 | def _separable_conv(name, kernel, stride, filters, params):
 53 |   def _separable_conv_layer(layer_input):
 54 |     output = layers.DepthwiseConv2D(name='{}/depthwise_conv'.format(name),
 55 |                                     kernel_size=kernel,
 56 |                                     strides=stride,
 57 |                                     depth_multiplier=1,
 58 |                                     padding=params.conv_padding,
 59 |                                     use_bias=False,
 60 |                                     activation=None)(layer_input)
 61 |     output = _batch_norm('{}/depthwise_conv/bn'.format(name), params)(output)
 62 |     output = layers.ReLU(name='{}/depthwise_conv/relu'.format(name))(output)
 63 |     output = layers.Conv2D(name='{}/pointwise_conv'.format(name),
 64 |                            filters=filters,
 65 |                            kernel_size=(1, 1),
 66 |                            strides=1,
 67 |                            padding=params.conv_padding,
 68 |                            use_bias=False,
 69 |                            activation=None)(output)
 70 |     output = _batch_norm('{}/pointwise_conv/bn'.format(name), params)(output)
 71 |     output = layers.ReLU(name='{}/pointwise_conv/relu'.format(name))(output)
 72 |     return output
 73 |   return _separable_conv_layer
 74 | 
 75 | 
 76 | _YAMNET_LAYER_DEFS = [
 77 |     # (layer_function, kernel, stride, num_filters)
 78 |     (_conv,          [3, 3], 2,   32),
 79 |     (_separable_conv, [3, 3], 1,   64),
 80 |     (_separable_conv, [3, 3], 2,  128),
 81 |     (_separable_conv, [3, 3], 1,  128),
 82 |     (_separable_conv, [3, 3], 2,  256),
 83 |     (_separable_conv, [3, 3], 1,  256),
 84 |     (_separable_conv, [3, 3], 2,  512),
 85 |     (_separable_conv, [3, 3], 1,  512),
 86 |     (_separable_conv, [3, 3], 1,  512),
 87 |     (_separable_conv, [3, 3], 1,  512),
 88 |     (_separable_conv, [3, 3], 1,  512),
 89 |     (_separable_conv, [3, 3], 1,  512),
 90 |     (_separable_conv, [3, 3], 2, 1024),
 91 |     (_separable_conv, [3, 3], 1, 1024)
 92 | ]
 93 | 
 94 | 
 95 | def yamnet(features, params):
 96 |   """Define the core YAMNet mode in Keras."""
 97 |   net = layers.Reshape(
 98 |       (params.patch_frames, params.patch_bands, 1),
 99 |       input_shape=(params.patch_frames, params.patch_bands))(features)
100 |   for (i, (layer_fun, kernel, stride, filters)) in enumerate(_YAMNET_LAYER_DEFS):
101 |     net = layer_fun('layer{}'.format(i + 1), kernel, stride, filters, params)(net)
102 |   embeddings = layers.GlobalAveragePooling2D()(net)
103 |   logits = layers.Dense(units=params.num_classes, use_bias=True)(embeddings)
104 |   predictions = layers.Activation(activation=params.classifier_activation)(logits)
105 |   return predictions, embeddings
106 | 
107 | 
108 | def yamnet_frames_model(params):
109 |   """Defines the YAMNet waveform-to-class-scores model.
110 | 
111 |   Args:
112 |     params: An instance of Params containing hyperparameters.
113 | 
114 |   Returns:
115 |     A model accepting (num_samples,) waveform input and emitting:
116 |     - predictions: (num_patches, num_classes) matrix of class scores per time frame
117 |     - embeddings: (num_patches, embedding size) matrix of embeddings per time frame
118 |     - log_mel_spectrogram: (num_spectrogram_frames, num_mel_bins) spectrogram feature matrix
119 |   """
120 |   waveform = layers.Input(batch_shape=(None,), dtype=tf.float32)
121 |   waveform_padded = features_lib.pad_waveform(waveform, params)
122 |   log_mel_spectrogram, features = features_lib.waveform_to_log_mel_spectrogram_patches(
123 |       waveform_padded, params)
124 |   predictions, embeddings = yamnet(features, params)
125 |   frames_model = Model(
126 |       name='yamnet_frames', inputs=waveform,
127 |       outputs=[predictions, embeddings, log_mel_spectrogram])
128 |   return frames_model
129 | 
130 | 
131 | def class_names(class_map_csv):
132 |   """Read the class name definition file and return a list of strings."""
133 |   if tf.is_tensor(class_map_csv):
134 |     class_map_csv = class_map_csv.numpy()
135 |   with open(class_map_csv) as csv_file:
136 |     reader = csv.reader(csv_file)
137 |     next(reader)   # Skip header
138 |     return np.array([display_name for (_, _, display_name) in reader])
139 | 


--------------------------------------------------------------------------------
/sound/sound.ipynb:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------