├── .fluxbot ├── Manifest.toml └── Project.toml ├── .github ├── FUNDING.yml └── workflows │ └── fluxbot.jl ├── .gitignore ├── .gitlab-ci.yml ├── .gitpod.Dockerfile ├── .gitpod.yml ├── LICENSE.md ├── README.md ├── contrib ├── README.md ├── audio │ └── speech-blstm │ │ ├── 00-data.jl │ │ ├── 01-speech-blstm.jl │ │ ├── Manifest.toml │ │ ├── Project.toml │ │ ├── README.md │ │ ├── TIMIT │ │ └── README.md │ │ ├── test │ │ └── README.md │ │ └── train │ │ └── README.md ├── games │ └── differentiable-programming │ │ ├── cartpole │ │ ├── DQN.jl │ │ ├── DiffRL.jl │ │ ├── Manifest.toml │ │ ├── Project.toml │ │ └── cuda │ │ │ ├── Manifest.toml │ │ │ └── Project.toml │ │ ├── pendulum │ │ ├── DDPG.jl │ │ ├── DiffRL.jl │ │ ├── Manifest.toml │ │ ├── Project.toml │ │ └── cuda │ │ │ ├── Manifest.toml │ │ │ └── Project.toml │ │ └── trebuchet │ │ ├── DDPG.jl │ │ ├── DiffRL.jl │ │ ├── Manifest.toml │ │ ├── Project.toml │ │ └── cuda │ │ ├── Manifest.toml │ │ └── Project.toml └── meta-learning │ ├── Manifest.toml │ ├── MetaLearning.jl │ ├── Project.toml │ ├── fomaml_grad.png │ ├── linear.jl │ ├── reptile_grad.png │ └── utils.jl ├── other ├── autoregressive-process │ ├── Manifest.toml │ ├── Project.toml │ ├── README.md │ ├── loss.png │ ├── model.jl │ └── utils.jl ├── bitstring-parity │ ├── Manifest.toml │ ├── Project.toml │ ├── README.md │ ├── data.jl │ ├── xor1.jl │ ├── xor2.jl │ └── xor3.jl ├── fizzbuzz │ ├── Manifest.toml │ ├── Project.toml │ └── fizzbuzz.jl ├── flux-next │ ├── Manifest.toml │ ├── Project.toml │ └── intro.jl ├── housing │ ├── .gitignore │ ├── Manifest.toml │ ├── Project.toml │ ├── README.md │ ├── housing.jl │ └── img │ │ └── singleneuron.svg └── iris │ ├── Manifest.toml │ ├── Project.toml │ ├── README.md │ └── iris.jl ├── script ├── Manifest.toml ├── Notebooks.toml ├── Project.toml ├── convert.jl └── notebook.jl ├── text ├── char-rnn │ ├── .gitignore │ ├── Manifest.toml │ ├── Project.toml │ ├── README.md │ ├── char-rnn.jl │ └── docs │ │ └── rnn-train.png ├── lang-detection │ ├── .gitignore │ ├── Manifest.toml │ ├── Project.toml │ ├── README.md │ ├── model.jl │ └── scrape.jl ├── nanogpt │ ├── Manifest.toml │ ├── Project.toml │ ├── README.md │ ├── docs │ │ └── Full_GPT_architecture.svg │ └── gpt.jl ├── phonemes │ ├── 0-data.jl │ ├── 1-model.jl │ ├── Manifest.toml │ └── Project.toml └── treebank │ ├── Manifest.toml │ ├── Project.toml │ ├── README.md │ ├── data.jl │ └── recursive.jl ├── tutorials ├── 60-minute-blitz │ ├── 60-minute-blitz.jl │ ├── Manifest.toml │ └── Project.toml ├── dataloader │ ├── Manifest.toml │ ├── Project.toml │ ├── README.md │ └── dataloader-image-data.jl └── transfer_learning │ ├── .gitignore │ ├── Manifest.toml │ ├── Project.toml │ ├── README.md │ └── transfer_learning.jl └── vision ├── cdcgan_mnist ├── Manifest.toml ├── Project.toml ├── README.md ├── cGAN_mnist.jl └── output │ ├── cgan_steps_000000.png │ ├── cgan_steps_001000.png │ ├── cgan_steps_002000.png │ ├── cgan_steps_003000.png │ ├── cgan_steps_004000.png │ ├── cgan_steps_005000.png │ ├── cgan_steps_006000.png │ ├── cgan_steps_007000.png │ ├── cgan_steps_008000.png │ ├── cgan_steps_009000.png │ ├── cgan_steps_010000.png │ ├── cgan_steps_011000.png │ ├── cgan_steps_011725.png │ └── img_for_readme.png ├── conv_mnist ├── .gitignore ├── Manifest.toml ├── Project.toml ├── README.md ├── conv_mnist.jl └── docs │ └── LeNet-5.png ├── convmixer_cifar10 ├── README.md ├── convmixer.jl └── doc │ └── convmixerarchi.png ├── dcgan_mnist ├── .gitignore ├── Manifest.toml ├── Project.toml ├── README.md ├── dcgan_mnist.jl └── output │ ├── dcgan_generator_discriminator.png │ ├── dcgan_steps_000000.png │ ├── dcgan_steps_001000.png │ ├── dcgan_steps_002000.png │ ├── dcgan_steps_003000.png │ ├── dcgan_steps_004000.png │ ├── dcgan_steps_005000.png │ ├── dcgan_steps_006000.png │ ├── dcgan_steps_007000.png │ ├── dcgan_steps_008000.png │ ├── dcgan_steps_009000.png │ └── dcgan_steps_009380.png ├── diffusion_mnist ├── .DS_Store ├── Manifest.toml ├── Project.toml ├── README.md ├── diffusion_mnist.jl ├── diffusion_plot.jl ├── diffusion_test.jl └── docs │ ├── diff_eq_em.gif │ ├── diff_eq_em_images.jpeg │ ├── diff_eq_em_plot.png │ ├── diff_eq_ode.gif │ ├── diff_eq_ode_images.jpeg │ ├── diff_eq_ode_plot.png │ ├── em_images.jpeg │ ├── loss.png │ ├── pc_images.jpeg │ ├── sampled_noise.jpeg │ ├── sde.png │ └── unet.png ├── mlp_mnist ├── .gitignore ├── Manifest.toml ├── Project.toml ├── README.md ├── docs │ └── mlp.svg └── mlp_mnist.jl ├── spatial_transformer ├── Manifest.toml ├── Project.toml ├── README.md ├── images │ └── stn_example.png └── spatial_transformer.jl ├── vae_mnist ├── .gitignore ├── Manifest.toml ├── Project.toml ├── README.md ├── docs │ ├── clustering.png │ ├── epoch_10.png │ ├── epoch_20.png │ ├── epoch_5.png │ ├── manifold.png │ ├── original.png │ └── vae.png ├── output │ ├── clustering.png │ ├── epoch_1.png │ ├── epoch_10.png │ ├── epoch_11.png │ ├── epoch_12.png │ ├── epoch_13.png │ ├── epoch_14.png │ ├── epoch_15.png │ ├── epoch_16.png │ ├── epoch_17.png │ ├── epoch_18.png │ ├── epoch_19.png │ ├── epoch_2.png │ ├── epoch_20.png │ ├── epoch_3.png │ ├── epoch_4.png │ ├── epoch_5.png │ ├── epoch_6.png │ ├── epoch_7.png │ ├── epoch_8.png │ ├── epoch_9.png │ ├── manifold.png │ └── original.png ├── vae_mnist.jl └── vae_plot.jl └── vgg_cifar10 ├── Manifest.toml ├── Project.toml ├── README.md ├── docs └── vgg.png └── vgg_cifar10.jl /.fluxbot/Manifest.toml: -------------------------------------------------------------------------------- 1 | # This file is machine-generated - editing it directly is not advised 2 | 3 | [[Artifacts]] 4 | deps = ["Pkg"] 5 | git-tree-sha1 = "c30985d8821e0cd73870b17b0ed0ce6dc44cb744" 6 | uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33" 7 | version = "1.3.0" 8 | 9 | [[Base64]] 10 | uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f" 11 | 12 | [[Dates]] 13 | deps = ["Printf"] 14 | uuid = "ade2ca70-3891-5945-98fb-dc099432e06a" 15 | 16 | [[Distributed]] 17 | deps = ["Random", "Serialization", "Sockets"] 18 | uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b" 19 | 20 | [[FluxBot]] 21 | deps = ["GitHub", "Glob", "Pkg", "Sockets"] 22 | git-tree-sha1 = "7c5fedc22b3e9ca4d7e891c43f91df382bded7d5" 23 | repo-rev = "zoo" 24 | repo-url = "https://github.com/dhairyagandhi96/FluxBot.jl" 25 | uuid = "352bd040-0f98-11ea-1faf-6f930ca83554" 26 | version = "0.1.0" 27 | 28 | [[GitHub]] 29 | deps = ["Base64", "Dates", "HTTP", "JSON", "MbedTLS", "Sockets", "SodiumSeal"] 30 | git-tree-sha1 = "a4f61fc1b1724e6eec1d9333eac2d4b01d8fcc8f" 31 | uuid = "bc5e4493-9b4d-5f90-b8aa-2b2bcaad7a26" 32 | version = "5.4.0" 33 | 34 | [[Glob]] 35 | git-tree-sha1 = "4df9f7e06108728ebf00a0a11edee4b29a482bb2" 36 | uuid = "c27321d9-0574-5035-807b-f59d2c89b15c" 37 | version = "1.3.0" 38 | 39 | [[HTTP]] 40 | deps = ["Base64", "Dates", "IniFile", "MbedTLS", "Sockets", "URIs"] 41 | git-tree-sha1 = "63055ee44b5c2b95ec1921edcf856c60124ff0c3" 42 | uuid = "cd3eb016-35fb-5094-929b-558a96fad6f3" 43 | version = "0.9.2" 44 | 45 | [[IniFile]] 46 | deps = ["Test"] 47 | git-tree-sha1 = "098e4d2c533924c921f9f9847274f2ad89e018b8" 48 | uuid = "83e8ac13-25f8-5344-8a64-a9f2b223428f" 49 | version = "0.5.0" 50 | 51 | [[InteractiveUtils]] 52 | deps = ["Markdown"] 53 | uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240" 54 | 55 | [[JLLWrappers]] 56 | git-tree-sha1 = "a431f5f2ca3f4feef3bd7a5e94b8b8d4f2f647a0" 57 | uuid = "692b3bcd-3c85-4b1f-b108-f13ce0eb3210" 58 | version = "1.2.0" 59 | 60 | [[JSON]] 61 | deps = ["Dates", "Mmap", "Parsers", "Unicode"] 62 | git-tree-sha1 = "81690084b6198a2e1da36fcfda16eeca9f9f24e4" 63 | uuid = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" 64 | version = "0.21.1" 65 | 66 | [[LibGit2]] 67 | deps = ["Printf"] 68 | uuid = "76f85450-5226-5b5a-8eaa-529ad045b433" 69 | 70 | [[Libdl]] 71 | uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb" 72 | 73 | [[Logging]] 74 | uuid = "56ddb016-857b-54e1-b83d-db4d58db5568" 75 | 76 | [[Markdown]] 77 | deps = ["Base64"] 78 | uuid = "d6f4376e-aef5-505a-96c1-9c027394607a" 79 | 80 | [[MbedTLS]] 81 | deps = ["Dates", "MbedTLS_jll", "Random", "Sockets"] 82 | git-tree-sha1 = "1c38e51c3d08ef2278062ebceade0e46cefc96fe" 83 | uuid = "739be429-bea8-5141-9913-cc70e7f3736d" 84 | version = "1.0.3" 85 | 86 | [[MbedTLS_jll]] 87 | deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] 88 | git-tree-sha1 = "0eef589dd1c26a3ac9d753fe1a8bcad63f956fa6" 89 | uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1" 90 | version = "2.16.8+1" 91 | 92 | [[Mmap]] 93 | uuid = "a63ad114-7e13-5084-954f-fe012c677804" 94 | 95 | [[Parsers]] 96 | deps = ["Dates"] 97 | git-tree-sha1 = "50c9a9ed8c714945e01cd53a21007ed3865ed714" 98 | uuid = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0" 99 | version = "1.0.15" 100 | 101 | [[Pkg]] 102 | deps = ["Dates", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "UUIDs"] 103 | uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" 104 | 105 | [[Printf]] 106 | deps = ["Unicode"] 107 | uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7" 108 | 109 | [[REPL]] 110 | deps = ["InteractiveUtils", "Markdown", "Sockets"] 111 | uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb" 112 | 113 | [[Random]] 114 | deps = ["Serialization"] 115 | uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" 116 | 117 | [[SHA]] 118 | uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce" 119 | 120 | [[Serialization]] 121 | uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b" 122 | 123 | [[Sockets]] 124 | uuid = "6462fe0b-24de-5631-8697-dd941f90decc" 125 | 126 | [[SodiumSeal]] 127 | deps = ["Base64", "Libdl", "libsodium_jll"] 128 | git-tree-sha1 = "80cef67d2953e33935b41c6ab0a178b9987b1c99" 129 | uuid = "2133526b-2bfb-4018-ac12-889fb3908a75" 130 | version = "0.1.1" 131 | 132 | [[Test]] 133 | deps = ["Distributed", "InteractiveUtils", "Logging", "Random"] 134 | uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40" 135 | 136 | [[URIs]] 137 | git-tree-sha1 = "7855809b88d7b16e9b029afd17880930626f54a2" 138 | uuid = "5c2747f8-b7ea-4ff2-ba2e-563bfd36b1d4" 139 | version = "1.2.0" 140 | 141 | [[UUIDs]] 142 | deps = ["Random", "SHA"] 143 | uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" 144 | 145 | [[Unicode]] 146 | uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5" 147 | 148 | [[libsodium_jll]] 149 | deps = ["Libdl", "Pkg"] 150 | git-tree-sha1 = "7127f5f40332ccfa43ee07dcd0c4d81a27d9bb23" 151 | uuid = "a9144af2-ca23-56d9-984f-0d03f7b5ccf8" 152 | version = "1.0.18+1" 153 | -------------------------------------------------------------------------------- /.fluxbot/Project.toml: -------------------------------------------------------------------------------- 1 | [deps] 2 | FluxBot = "352bd040-0f98-11ea-1faf-6f930ca83554" 3 | -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | github: [JuliaLang] 2 | -------------------------------------------------------------------------------- /.github/workflows/fluxbot.jl: -------------------------------------------------------------------------------- 1 | name: FluxBot 2 | 3 | on: 4 | issue_comment: 5 | types: [created, edited] 6 | 7 | jobs: 8 | build: 9 | if: contains(github.event.comment.body, '@ModelZookeeper') 10 | runs-on: ${{ matrix.os }} 11 | strategy: 12 | matrix: 13 | julia-version: [1.5.0] 14 | julia-arch: [x86] 15 | os: [ubuntu-latest] 16 | steps: 17 | - uses: actions/checkout@af513c7a016048ae468971c52ed77d9562c7c819 # v1.0.0 18 | - uses: julia-actions/setup-julia@v1 19 | with: 20 | version: ${{ matrix.julia-version }} 21 | - name: Install dependencies 22 | run: julia --project=.fluxbot/ -e 'using Pkg; Pkg.instantiate()' 23 | - name: FluxBot.respond 24 | env: 25 | FLUXBOT_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 26 | BOT_SECRET: ${{ secrets.BOT_SECRET }} 27 | MODELZOO_TRIGGER_TOKEN: ${{ secrets.MODELZOO_TRIGGER_TOKEN }} 28 | run: julia --project=.fluxbot -e 'using FluxBot; FluxBot.trial()' 29 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | notebooks 2 | .vscode -------------------------------------------------------------------------------- /.gitlab-ci.yml: -------------------------------------------------------------------------------- 1 | include: 2 | - 'https://raw.githubusercontent.com/JuliaGPU/gitlab-ci/master/templates/v6.yml' 3 | 4 | image: nvidia/cuda:10.1-cudnn7-devel-ubuntu18.04 5 | 6 | .modelzoo: 7 | stage: deploy 8 | 9 | script: 10 | - julia --project=script -e 'using Pkg; Pkg.instantiate()' 11 | # if triggered by FluxBot 12 | - > 13 | if [ "$FLUXBOT" == "true" ]; then 14 | git clone https://github.com/FluxML/Flux.jl 15 | cd Flux.jl && git fetch origin pull/$PRID/head:test_$PRID 16 | git checkout test_$PRID 17 | julia --project -e 'using Pkg; 18 | Pkg.instantiate();' 19 | cd .. 20 | export FLUX="$PWD/Flux.jl" 21 | export JULIA_LOAD_PATH=".:$FLUX::" 22 | fi 23 | # end 24 | 25 | - julia --project -e 'using Pkg; 26 | Pkg.instantiate(); 27 | ENV["FLUXBOT"] == true && Pkg.develop(PackageSpec(path = ENV["FLUX"])); 28 | Pkg.resolve(); 29 | Pkg.API.precompile();' 30 | 31 | - cd script 32 | - julia --project convert.jl $TESTSUITE 33 | 34 | after_script: 35 | - apt-get -qq -o=Dpkg::Use-Pty=0 -y install unzip 36 | - julia --project=script -e 'using Pkg; Pkg.instantiate();' 37 | - julia --project=script -e 'using FluxBot; FluxBot.respond();' 38 | 39 | only: 40 | variables: 41 | - $PRID 42 | - $TESTSUITE 43 | - $FLUXBOT 44 | - $REPO_NAME 45 | artifacts: 46 | paths: 47 | - notebooks/*.ipynb 48 | 49 | zoo:1.0: 50 | extends: 51 | - .julia:1.0 52 | - .modelzoo 53 | tags: 54 | - nvidia 55 | 56 | zoo:1.1: 57 | extends: 58 | - .julia:1.1 59 | - .modelzoo 60 | tags: 61 | - nvidia 62 | 63 | zoo:1.2: 64 | extends: 65 | - .julia:1.2 66 | - .modelzoo 67 | tags: 68 | - nvidia 69 | 70 | zoo:1.3: 71 | extends: 72 | - .julia:1.3 73 | - .modelzoo 74 | tags: 75 | - nvidia 76 | 77 | zoo:nightly: 78 | extends: 79 | - .julia:nightly 80 | - .modelzoo 81 | tags: 82 | - nvidia 83 | allow_failure: true 84 | 85 | -------------------------------------------------------------------------------- /.gitpod.Dockerfile: -------------------------------------------------------------------------------- 1 | FROM gitpod/workspace-full 2 | 3 | USER gitpod 4 | 5 | # Install Julia 6 | RUN sudo apt-get update \ 7 | && sudo apt-get install -y \ 8 | libatomic1 \ 9 | gfortran \ 10 | perl \ 11 | wget \ 12 | m4 \ 13 | pkg-config \ 14 | julia \ 15 | && sudo rm -rf /var/lib/apt/lists/* 16 | -------------------------------------------------------------------------------- /.gitpod.yml: -------------------------------------------------------------------------------- 1 | image: 2 | file: .gitpod.Dockerfile 3 | 4 | vscode: 5 | extensions: 6 | - julialang.language-julia@0.12.3:lgRyBd8rjwUpMGG0C5GAig== 7 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | These examples are licensed under the MIT "Expat" License: 2 | 3 | > Copyright (c) 2017 by Flux contributors; 4 | > https://github.com/FluxML/model-zoo/graphs/contributors 5 | > 6 | > Permission is hereby granted, free of charge, to any person obtaining a copy 7 | > of this software and associated documentation files (the "Software"), to deal 8 | > in the Software without restriction, including without limitation the rights 9 | > to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | > copies of the Software, and to permit persons to whom the Software is 11 | > furnished to do so, subject to the following conditions: 12 | > 13 | > The above copyright notice and this permission notice shall be included in all 14 | > copies or substantial portions of the Software. 15 | > 16 | > THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | > IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | > FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | > AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | > LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | > OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | > SOFTWARE. 23 | > 24 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | # Flux Model Zoo 4 | 5 | This repository contains various demonstrations of the [Flux](http://fluxml.github.io/) machine learning library. Any of these may freely be used as a starting point for your own models. 6 | 7 | The models are broadly categorised into the folders [vision](/vision) (e.g. large convolutional neural networks (CNNs)), [text](/text) (e.g. various recurrent neural networks (RNNs) and natural language processing (NLP) models), [games](/contrib/games) (Reinforcement Learning / RL). See the READMEs of respective models for more information. 8 | 9 | ## Usage 10 | 11 | Each model comes with its own [Julia project](https://pkgdocs.julialang.org/v1/environments/#Using-someone-else's-project). To use this, open Julia in the project folder, and enter 12 | 13 | ```julia 14 | using Pkg; Pkg.activate("."); Pkg.instantiate() 15 | ``` 16 | 17 | This will install all needed packages, at the exact versions when the model was last updated. Then you can run the model code with `include(".jl")`, or by running the model script line-by-line. 18 | 19 | Models may also be run with NVIDIA GPU support, if you have a CUDA installed. Most models will have this capability by default, pointed at by calls to `gpu` in the model code. 20 | 21 | ### Gitpod Online IDE 22 | 23 | Each model can be used in [Gitpod](https://www.gitpod.io/), just [open the repository by gitpod](https://gitpod.io/#https://github.com/FluxML/model-zoo) 24 | 25 | * Based on [Gitpod's policies](https://www.gitpod.io/pricing/), free access is limited. 26 | * All of your work will place in the Gitpod's cloud. 27 | * It isn't an officially maintained feature. 28 | 29 | ## Contributing 30 | 31 | We welcome contributions of new models and documentation. 32 | 33 | ### Share a new model 34 | 35 | If you want to share a new model, we suggest you follow these guidelines: 36 | 37 | * Models should be in a folder with a project and manifest file to pin all relevant packages. 38 | * Models should include a README(.md) to explain what the model is about, how to run it, and what results it achieves (if applicable). 39 | * Models should ideally be CPU/GPU agnostic and not depend directly on GPU functionality. 40 | * Please keep the code short, clean, and self-explanatory, with as little boilerplate as possible. 41 | 42 | ### Create or improve documentation 43 | 44 | You can contribute in one of the following ways 45 | 46 | * Add or improve documentation to existing models: Write the following information: 47 | * Give a brief introduction to the model’s architecture and the goal it archives. 48 | * Describe the Flux API that the model demonstrates (high-level API, AD, custom operations, custom layers, etc.). 49 | * Add literature background for the model. More specifically, add articles, blog posts, videos, and any other resource that is helpful to better understand the model. 50 | * Mention the technique that is being demonstrated. Briefly describe the learning technique being demonstrated (Computer vision, regression, NLP, time series, etc.). 51 | * Write in-depth tutorials for a model: You can further extend the documentation of a model and create a tutorial to explain in more detail the architecture, the training routine, use your own data, and so forth. After you write a tutorial, create a PR with it for the [Tutorials](https://fluxml.ai/tutorials/) section on the [FluxML](https://fluxml.ai/) website. 52 | 53 | ### Update a model 54 | 55 | Each example lists the version of Flux for which it was most recently updated. 56 | Bringing them up to the latest is a great way to learn! 57 | Flux has a [NEWS page](https://github.com/FluxML/Flux.jl/blob/master/NEWS.md) listing important changes. 58 | (For other packages, see their releses page: [MLUtils](https://github.com/JuliaML/MLUtils.jl/releases), [MLDatasets](https://github.com/JuliaML/MLDatasets.jl/releases), etc.) 59 | 60 | To run the old examples, Flux v0.11 can be installed and run on [Julia 1.6, the LTS version](https://julialang.org/downloads/#long_term_support_release). 61 | Flux v0.12 works on Julia 1.8. 62 | Flux v0.14 is the latest right now, this and v0.13 are marked with ☀️; models upgraded to use 63 | explicit gradients (v0.13.9+ or v0.14) have a `+`. 64 | 65 | ## Examples in the Model Zoo 66 | 67 | **Vision** 68 | * MNIST 69 | * [Simple multi-layer perceptron](vision/mlp_mnist) ☀️ v0.13 + 70 | * [Simple ConvNet (LeNet)](vision/conv_mnist) ☀️ v0.14 71 | * [Variational Auto-Encoder](vision/vae_mnist) ☀️ v0.13 + 72 | * [Deep Convolutional Generative Adversarial Networks](vision/dcgan_mnist) ☀️ v0.13 + 73 | * [Conditional Deep Convolutional Generative Adversarial Networks](vision/cdcgan_mnist) ☀️ v0.13 74 | * [Score-Based Generative Modeling (Diffusion Model)](vision/diffusion_mnist) ☀️ v0.13 75 | * [Spatial Transformer](vision/spatial_transformer) ☀️ v0.13 + 76 | * CIFAR10 77 | * [VGG 16/19](vision/vgg_cifar10) ☀️ v0.13 + 78 | * [ConvMixer "Patches are all you need?"](vision/convmixer_cifar10/) ☀️ v0.13 79 | 80 | **Text** 81 | * [CharRNN](text/char-rnn) ☀️ v0.13 + 82 | * [NanoGPT](text/nanogpt) ☀️ v0.14 83 | * [Character-level language detection](text/lang-detection) ☀️ v0.13 + 84 | * [Seq2Seq phoneme detection on CMUDict](text/phonemes) ⛅️ v0.11 85 | * [Recursive net on IMDB sentiment treebank](text/treebank) ⛅️ v0.11 86 | 87 | **Other** & contributed models 88 | * [Logistic Regression Iris](other/iris/iris.jl) ☀️ v0.13 + 89 | * [Autoregressive Model](other/autoregressive-process/) ☀️ v0.13 + 90 | * [BitString Parity Challenge](other/bitstring-parity) ⛅️ v0.11 91 | * [MLP on housing data](other/housing/) (low level API) ⛅️ v0.11 92 | * [FizzBuzz](other/fizzbuzz/fizzbuzz.jl) ☀️ v0.13 + 93 | * [Meta-Learning](contrib/meta-learning/MetaLearning.jl) ❄️ v0.7 94 | * [Speech recognition](contrib/audio/speech-blstm) ❄️ v0.6 95 | 96 | **Tutorials** 97 | * [A 60 Minute Blitz](tutorials/60-minute-blitz/60-minute-blitz.jl) ⛅️ v0.11 98 | * [DataLoader example with image data](tutorials/dataloader) ⛅️ v0.11 99 | * [Transfer Learning](tutorials/transfer_learning/transfer_learning.jl) ☀️ v0.13 + 100 | 101 | ## Examples Elsewhere 102 | 103 | **MLJFlux** is a bridge to [MLJ.jl](https://github.com/alan-turing-institute/MLJ.jl), a package for mostly non-neural-network machine learning. They have some examples of interest, which like the model zoo's examples, each include a local Project & Manifest file: 104 | 105 | * [Iris](https://github.com/FluxML/MLJFlux.jl/tree/dev/examples/iris) ⛅️ v0.11 106 | * [Boston](https://github.com/FluxML/MLJFlux.jl/tree/dev/examples/boston) ⛅️ v0.11 107 | * [MNIST](https://github.com/FluxML/MLJFlux.jl/tree/dev/examples/mnist) ⛅️ v0.11 108 | -------------------------------------------------------------------------------- /contrib/README.md: -------------------------------------------------------------------------------- 1 | # Model Contributions 2 | 3 | The models here have been contributed by community members to display some of the models that can be used in different fields. 4 | 5 | These models come with their own environments (the Project.toml and Manifest.toml files) so please make sure to use them while working with these models. 6 | -------------------------------------------------------------------------------- /contrib/audio/speech-blstm/00-data.jl: -------------------------------------------------------------------------------- 1 | # 00-data.jl 2 | # Extracts audio features from TIMIT to be used in speech recognition 3 | 4 | using Flux: onehotbatch 5 | using WAV 6 | using BSON 7 | 8 | # This wookay's fork of MFCC updated to work with Julia v0.7/1.0 9 | # https://github.com/wookay/MFCC.jl 10 | using MFCC 11 | 12 | # Define constants that will be used 13 | const TRAINING_DATA_DIR = "TIMIT/TRAIN" 14 | const TEST_DATA_DIR = "TIMIT/TEST" 15 | 16 | const TRAINING_OUT_DIR = "train" 17 | const TEST_OUT_DIR = "test" 18 | 19 | # Make dictionary to map from phones to class numbers 20 | const PHONES = split("h# q eh dx iy r ey ix tcl sh ow z s hh aw m t er l w aa hv ae dcl y axr d kcl k ux ng gcl g ao epi ih p ay v n f jh ax en oy dh pcl ah bcl el zh uw pau b uh th ax-h em ch nx eng") 21 | translations = Dict(phone=>i for (i, phone) in enumerate(PHONES)) 22 | translations["sil"] = translations["h#"] 23 | const PHONE_TRANSLATIONS = translations 24 | 25 | # Make dictionary to perform class folding 26 | const FOLDINGS = Dict( 27 | "ao" => "aa", 28 | "ax" => "ah", 29 | "ax-h" => "ah", 30 | "axr" => "er", 31 | "hv" => "hh", 32 | "ix" => "ih", 33 | "el" => "l", 34 | "em" => "m", 35 | "en" => "n", 36 | "nx" => "n", 37 | "eng" => "ng", 38 | "zh" => "sh", 39 | "pcl" => "sil", 40 | "tcl" => "sil", 41 | "kcl" => "sil", 42 | "bcl" => "sil", 43 | "dcl" => "sil", 44 | "gcl" => "sil", 45 | "h#" => "sil", 46 | "pau" => "sil", 47 | "epi" => "sil", 48 | "ux" => "uw" 49 | ) 50 | 51 | FRAME_LENGTH = 0.025 # ms 52 | FRAME_INTERVAL = 0.010 # ms 53 | 54 | """ 55 | makeFeatures(wavFname, phnFname) 56 | 57 | Extracts Mel filterbanks and associated labels from `wavFname` and `phnFaname`. 58 | """ 59 | function makeFeatures(phnFname, wavFname) 60 | samps, sr = wavread(wavFname) 61 | samps = vec(samps) 62 | 63 | mfccs, _, _ = mfcc(samps, sr, :rasta; wintime=FRAME_LENGTH, steptime=FRAME_INTERVAL) 64 | 65 | local lines 66 | open(phnFname, "r") do f 67 | lines = readlines(f) 68 | end 69 | 70 | boundaries = Vector() 71 | labels = Vector() 72 | 73 | # first field in the file is the beginning sample number, which isn't 74 | # needed for calculating where the labels are 75 | for line in lines 76 | _, boundary, label = split(line) 77 | boundary = parse(Int64, boundary) 78 | push!(boundaries, boundary) 79 | push!(labels, label) 80 | end 81 | 82 | labelInfo = collect(zip(boundaries, labels)) 83 | labelInfoIdx = 1 84 | boundary, label = labelInfo[labelInfoIdx] 85 | nSegments = length(labelInfo) 86 | 87 | frameLengthSamples = FRAME_LENGTH * sr 88 | frameIntervalSamples = FRAME_INTERVAL * sr 89 | halfFrameLength = FRAME_LENGTH / 2 90 | 91 | # Begin generating sequence labels by looping through the MFCC 92 | # frames 93 | 94 | labelSequence = Vector() # Holds the sequence of labels 95 | 96 | idxsToDelete = Vector() # To store indices for frames labeled as 'q' 97 | for i=1:size(mfccs, 1) 98 | win_end = frameLengthSamples + (i-1)*frameIntervalSamples 99 | 100 | # Move on to next label if current frame of samples is more than half 101 | # way into next labeled section and there are still more labels to 102 | # iterate through 103 | if labelInfoIdx < nSegments && win_end - boundary > halfFrameLength 104 | 105 | labelInfoIdx += 1 106 | boundary, label = labelInfo[labelInfoIdx] 107 | end 108 | 109 | if label == "q" 110 | push!(idxsToDelete, i) 111 | continue 112 | end 113 | 114 | push!(labelSequence, label) 115 | end 116 | 117 | # Remove the frames that were labeld as 'q' 118 | mfccs = mfccs[[i for i in 1:size(mfccs,1) if !(i in Set(idxsToDelete))],:] 119 | 120 | mfccDeltas = deltas(mfccs, 2) 121 | features = hcat(mfccs, mfccDeltas) 122 | return (features, labelSequence) 123 | end 124 | 125 | """ 126 | createData(data_dir, out_dir) 127 | 128 | Extracts data from files in `data_dir` and saves results in `out_dir`. 129 | """ 130 | function createData(data_dir, out_dir) 131 | 132 | ! isdir(out_dir) && mkdir(out_dir) 133 | 134 | for (root, dirs, files) in walkdir(data_dir) 135 | 136 | # Exclude the files that are part of the speaker accent readings 137 | files = [x for x in files if ! occursin("SA", x)] 138 | 139 | phnFnames = [x for x in files if occursin("PHN", x)] 140 | wavFnames = [x for x in files if occursin("WAV", x)] 141 | 142 | one_dir_up = basename(root) 143 | print("$(root)\r") 144 | 145 | for (wavFname, phnFname) in zip(wavFnames, phnFnames) 146 | phn_path = joinpath(root, phnFname) 147 | wav_path = joinpath(root, wavFname) 148 | 149 | x, y = makeFeatures(phn_path, wav_path) 150 | 151 | # Generate class nums; there are 61 total classes, but only 39 are 152 | # used after folding. 153 | y = [PHONE_TRANSLATIONS[x] for x in y] 154 | class_nums = [n for n in 1:61] 155 | y = onehotbatch(y, class_nums) 156 | 157 | base, _ = splitext(phnFname) 158 | dat_name = one_dir_up * base * ".bson" 159 | dat_path = joinpath(out_dir, dat_name) 160 | BSON.@save dat_path x y 161 | end 162 | end 163 | println() 164 | end 165 | 166 | createData(TRAINING_DATA_DIR, TRAINING_OUT_DIR) 167 | createData(TEST_DATA_DIR, TEST_OUT_DIR) 168 | -------------------------------------------------------------------------------- /contrib/audio/speech-blstm/01-speech-blstm.jl: -------------------------------------------------------------------------------- 1 | # 01-speech-blstm.jl 2 | # 3 | # See Graves & Schmidhuber ([Graves, A., & 4 | # Schmidhuber, J. (2005). Framewise phoneme classification with 5 | # bidirectional LSTM and other neural network architectures. Neural 6 | # Networks, 18(5-6), 602-610.]). 7 | 8 | using Flux 9 | using Flux: crossentropy, softmax, flip, sigmoid, LSTM, @epochs 10 | using BSON 11 | using Random 12 | 13 | # Paths to the training and test data directories 14 | const TRAINDIR = "train" 15 | const TESTDIR = "test" 16 | const EPOCHS = 20 17 | 18 | # Component layers of the bidirectional LSTM layer 19 | forward = LSTM(26, 93) 20 | backward = LSTM(26, 93) 21 | output = Dense(186, 61) 22 | 23 | """ 24 | BLSTM(x) 25 | 26 | BLSTM layer using above LSTM layers 27 | 28 | # Parameters 29 | * **x** A 2-tuple containing the forward and backward time samples; 30 | the first is from processing the sequence forward, and the second 31 | is from processing it backward 32 | 33 | # Returns 34 | * The concatenation of the forward and backward LSTM predictions 35 | """ 36 | BLSTM(x) = vcat.(forward.(x), flip(backward, x)) 37 | 38 | """ 39 | model(x) 40 | 41 | The chain of functions representing the trained model. 42 | 43 | # Parameters 44 | * **x** The utterance that the model should process 45 | 46 | # Returns 47 | * The model's predictions for each time step in `x` 48 | """ 49 | model(x) = softmax.(output.(BLSTM(x))) 50 | 51 | """ 52 | loss(x, y) 53 | 54 | Calculates the categorical cross-entropy loss for an utterance 55 | 56 | # Parameters 57 | * **x** Iterable containing the frames to classify 58 | * **y** Iterable containing the labels corresponding to the frames 59 | in `x` 60 | 61 | # Returns 62 | * The calculated loss value 63 | 64 | # Side-effects 65 | * Resets the state in the BLSTM layer 66 | """ 67 | function loss(x, y) 68 | l = sum(crossentropy.(model(x), y)) 69 | Flux.reset!((forward, backward)) 70 | return l 71 | end 72 | 73 | """ 74 | readData(dataDir) 75 | 76 | Reads in the data contained in a specified directory 77 | 78 | # Parameters 79 | * **dataDir** String of the path to the directory containing the data 80 | 81 | # Return 82 | * **Xs** Vector where each element is a vector of the frames for 83 | one utterance 84 | * **Ys** A vector where each element is a vector of the labels for 85 | the frames for one utterance 86 | """ 87 | function readData(dataDir) 88 | fnames = readdir(dataDir) 89 | 90 | Xs = Vector() 91 | Ys = Vector() 92 | 93 | for (i, fname) in enumerate(fnames) 94 | print(string(i) * "/" * string(length(fnames)) * "\r") 95 | BSON.@load joinpath(dataDir, fname) x y 96 | x = [x[i,:] for i in 1:size(x,1)] 97 | y = [y[:,i] for i in 1:size(y,2)] 98 | push!(Xs, x) 99 | push!(Ys, y) 100 | end 101 | 102 | return (Xs, Ys) 103 | end 104 | 105 | """ 106 | evaluateAccuracy(data) 107 | 108 | Evaluates the accuracy of the model on a set of data; can be used 109 | either for validation or test accuracy 110 | 111 | # Parameters 112 | * **data** An iterable of paired values where the first element is 113 | all the frames for a single utterance, and the second is the 114 | associated frame labels to compare the model's predictions against 115 | 116 | # Returns 117 | * The predicted accuracy value as a proportion of the number of 118 | correct predictions over the total number of predictions made 119 | """ 120 | function evaluateAccuracy(data) 121 | correct = Vector() 122 | for (x, y) in data 123 | y = argmax.(y) 124 | ŷ = argmax.(model(x)) 125 | Flux.reset!((forward, backward)) 126 | append!(correct, [ŷ_n == y_n for (ŷ_n, y_n) in zip(ŷ, y)]) 127 | end 128 | sum(correct) / length(correct) 129 | end 130 | 131 | function main() 132 | 133 | println("Loading files") 134 | Xs, Ys = readData(TRAINDIR) 135 | data = collect(zip(Xs, Ys)) 136 | 137 | valData = data[1:184] 138 | data = data[185:end] 139 | 140 | # Begin training 141 | println("Beginning training") 142 | 143 | opt = Momentum(params((forward, backward, output)), 10.0^-5; ρ=0.9) 144 | 145 | i = 0 146 | 147 | @epochs EPOCHS begin 148 | 149 | i += 1 150 | 151 | shuffle!(data) 152 | valData = valData[shuffle(1:length(valData))] 153 | 154 | Flux.train!(loss, data, opt) 155 | 156 | BSON.@save "model_epoch$(i).bson" forward backward output 157 | 158 | print("Validating\r") 159 | val_acc = evaluateAccuracy(valData) 160 | println("Val acc. " * string(val_acc)) 161 | println() 162 | end 163 | 164 | # Clean up some memory 165 | valData = nothing 166 | data = nothing 167 | Xs = nothing 168 | Ys = nothing 169 | GC.gc() 170 | 171 | # Test model 172 | print("Testing\r") 173 | Xs_test, Ys_test = readData(TESTDIR) 174 | test_data = collect(zip(Xs_test, Ys_test)) 175 | test_acc = evaluateAccuracy(test_data) 176 | println("Test acc. " * string(test_acc)) 177 | println() 178 | end 179 | 180 | main() 181 | -------------------------------------------------------------------------------- /contrib/audio/speech-blstm/Project.toml: -------------------------------------------------------------------------------- 1 | name = "FramewiseSpeechNetwork" 2 | author = ["Matthew C. Kelley"] 3 | 4 | [deps] 5 | BSON = "fbb218c0-5317-5bc6-957e-2ee96dd4b1f0" 6 | Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c" 7 | MFCC = "ca7b5df7-6146-5dcc-89ec-36256279a339" 8 | Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" 9 | WAV = "8149f6b0-98f6-5db9-b78f-408fbbb8ef88" 10 | -------------------------------------------------------------------------------- /contrib/audio/speech-blstm/README.md: -------------------------------------------------------------------------------- 1 | This model is an implementation of the neural network for speech recognition described in Graves & Schmidhuber (2005). It takes in frames of frequency information derived from the waveform, and it predicts which phone class the frame belongs to, among a reduced set of English phones. The training is run using the [TIMIT data set (Garofolo et al., 1993)](https://catalog.ldc.upenn.edu/LDC93S1). 2 | 3 | # How to use these scripts 4 | 5 | This implementation is broken down into two separate scripts. The first, `00-data.jl`, extracts the appropriate speech features from the data in TIMIT and saves them to file. It assumes that you have the TIMIT speech corpus extracted, [converted into RIFF WAV file format](https://web.archive.org/web/20180528013655/https://stackoverflow.com/questions/47370167/change-huge-amount-of-data-from-nist-to-riff-wav-file), and in the same directory as the script itself. It takes no arguments, and is run 6 | 7 | ```bash 8 | julia 00-data.jl 9 | ``` 10 | 11 | It will print out which directory it is working on as it goes so you can track the progress as it extracts the training and testing data. 12 | 13 | The second script, `01-speech-blstm.jl`, trains the network. It loads in the speech data extracted from `00-data.jl` and runs it through the network for 20 epochs, which is on average how long Graves & Schmidhuber needed to train the network for. (The number of epochs can be changed by modifying the value of the `EPOCHS` variable in the script.) The script is run as 14 | 15 | ```bash 16 | julia 01-speech-blstm.jl 17 | ``` 18 | 19 | At the end of each epoch, the script prints out the validation accuracy and saves a BSON file with the model's current weights. After running through all the epochs, the script prints out the testing accuracy on the default holdout test set. 20 | 21 | # Using a trained model 22 | 23 | It is simple to use the model once it's been trained. Simply load in the model from the BSON file, and use the `model(x)` function from `01-speech-blstm.jl` on some data prepared using the same procedure as in `00-data.jl`. The phoneme class numbers can be determined by using `argmax`. The `Flux` and `BSON` packages will need to be loaded in beforehand. 24 | 25 | ```julia 26 | using Flux, BSON 27 | using Flux: flip, softmax 28 | BSON.@load "model_epoch20.bson" forward backward output 29 | BLSTM(x) = vcat.(forward.(x), flip(backward, x)) 30 | model(x) = softmax.(output.(BLSTM(x))) 31 | ŷ = model(x) # where x is utterance you want to be transcribed 32 | phonemes = argmax.(ŷ) 33 | ``` 34 | 35 | # References 36 | 37 | Garofalo, J. S., Lamel, L. F., Fisher, W. M., Fiscus, J. G., Pallett, D. S., & Dahlgren, N. L. (1993). The DARPA TIMIT acoustic-phonetic continuous speech corpus cdrom. Linguistic Data Consortium. 38 | 39 | Graves, A., & Schmidhuber, J. (2005). Framewise phoneme classification with bidirectional LSTM and other neural network architectures. *Neural Networks, 18*(5-6), 602-610. 40 | -------------------------------------------------------------------------------- /contrib/audio/speech-blstm/TIMIT/README.md: -------------------------------------------------------------------------------- 1 | This is the folder where the TIMIT data should be placed after downloading it from the [Linguistic Data Consortium](https://www.ldc.upenn.edu/). It is not included in this repository for copyright and, secondarily, space restrictions. 2 | -------------------------------------------------------------------------------- /contrib/audio/speech-blstm/test/README.md: -------------------------------------------------------------------------------- 1 | This is the folder where the TIMIT data should be placed after downloading it from the [Linguistic Data Consortium](https://www.ldc.upenn.edu/). It is not included in this repository for copyright and, secondarily, space restrictions. 2 | -------------------------------------------------------------------------------- /contrib/audio/speech-blstm/train/README.md: -------------------------------------------------------------------------------- 1 | This is the folder where the TIMIT data should be placed after downloading it from the [Linguistic Data Consortium](https://www.ldc.upenn.edu/). It is not included in this repository for copyright and, secondarily, space restrictions. 2 | -------------------------------------------------------------------------------- /contrib/games/differentiable-programming/cartpole/DQN.jl: -------------------------------------------------------------------------------- 1 | using Flux, Gym, Printf, Zygote 2 | using Statistics: mean 3 | using DataStructures: CircularBuffer 4 | using Distributions: sample 5 | #using CuArrays 6 | 7 | # Load game environment 8 | env = make("CartPole-v0") 9 | reset!(env) 10 | 11 | # ----------------------------- Parameters ------------------------------------- 12 | 13 | STATE_SIZE = length(state(env)) # 4 14 | ACTION_SIZE = length(env._env.action_space) # 2 15 | MEM_SIZE = 100_000 16 | BATCH_SIZE = 64 17 | γ = 1f0 # discount rate 18 | 19 | # Exploration params 20 | ϵ = 1f0 # Initial exploration rate 21 | ϵ_MIN = 1f-2 # Final exploratin rate 22 | ϵ_DECAY = 995f-3 23 | 24 | # Optimiser params 25 | η = 1f-2 # Learning rate 26 | η_decay = 1f-3 27 | 28 | memory = CircularBuffer{Any}(MEM_SIZE) # Used to remember past results 29 | 30 | # ------------------------------ Model Architecture ---------------------------- 31 | 32 | model = Chain(Dense(STATE_SIZE, 24, tanh), 33 | Dense(24, 48, tanh), 34 | Dense(48, ACTION_SIZE)) |> gpu 35 | 36 | loss(x, y) = Flux.mse(model(x), y) 37 | 38 | opt = Flux.Optimiser(ADAM(η), InvDecay(η_decay)) 39 | 40 | # ----------------------------- Helper Functions ------------------------------- 41 | 42 | get_ϵ(e) = max(ϵ_MIN, min(ϵ, 1f0 - log10(e * ϵ_DECAY))) 43 | 44 | remember(state, action, reward, next_state, done) = 45 | push!(memory, (state, action, reward, next_state, done)) 46 | 47 | function action(state, train=true) 48 | train && rand() ≤ get_ϵ(e) && (return Gym.sample(env._env.action_space)) 49 | act_values = model(state |> gpu) 50 | return Flux.onecold(act_values) 51 | end 52 | 53 | function replay() 54 | global ϵ 55 | batch_size = min(BATCH_SIZE, length(memory)) 56 | minibatch = sample(memory, batch_size, replace = false) 57 | 58 | x = [] 59 | y = [] 60 | for (iter, (state, action, reward, next_state, done)) in enumerate(minibatch) 61 | target = reward 62 | if !done 63 | target += γ * maximum(model(next_state |> gpu)) 64 | end 65 | 66 | target_f = model(state |> gpu) 67 | target_f[action] = target 68 | 69 | push!(x, state) 70 | push!(y, target_f) 71 | end 72 | x = hcat(x...) |> gpu 73 | y = hcat(y...) |> gpu 74 | 75 | grads = Zygote.gradient(()->loss(x, y), params(model)) 76 | Flux.Optimise.update!(opt, params(model), grads) 77 | 78 | ϵ *= ϵ > ϵ_MIN ? ϵ_DECAY : 1.0f0 79 | end 80 | 81 | function episode!(env) 82 | reset!(env) 83 | while !game_over(env) 84 | #render(env) 85 | s = state(env) 86 | a = action(s, trainable(env)) 87 | s′, r, done, _ = step!(env, a) 88 | trainable(env) && remember(s, a, r, s′, done) 89 | end 90 | 91 | env.total_reward 92 | end 93 | 94 | # -------------------------------- Testing ------------------------------------- 95 | 96 | function test(env::EnvWrapper) 97 | score_mean = 0f0 98 | testmode!(env) 99 | for _=1:100 100 | total_reward = episode!(env) 101 | score_mean += total_reward / 100 102 | end 103 | testmode!(env, false) 104 | return score_mean 105 | end 106 | 107 | # ------------------------------ Training -------------------------------------- 108 | 109 | e = 1 110 | while true 111 | global e 112 | total_reward = @sprintf "%6.2f" episode!(env) 113 | print("Episode: $e | Score: $total_reward | ") 114 | replay() 115 | 116 | score_mean = test(env) 117 | score_mean_str = @sprintf "%6.2f" score_mean 118 | print("Mean score over 100 test episodes: " * score_mean_str) 119 | 120 | println() 121 | 122 | if score_mean > env.reward_threshold 123 | println("CartPole-v0 solved!") 124 | break 125 | end 126 | e += 1 127 | end 128 | -------------------------------------------------------------------------------- /contrib/games/differentiable-programming/cartpole/DiffRL.jl: -------------------------------------------------------------------------------- 1 | using Flux, Gym, Printf, Zygote 2 | using Zygote: @adjoint 3 | using Flux.Optimise: update! 4 | using Statistics: mean 5 | #using CuArrays 6 | 7 | import Base.sign 8 | 9 | #Load game environment 10 | 11 | env = make("CartPole-v0") 12 | reset!(env) 13 | 14 | #ctx = Ctx(env) 15 | 16 | #display(ctx.s) 17 | #using Blink# when not on Juno 18 | #body!(Blink.Window(), ctx.s) 19 | 20 | # ----------------------------- Parameters ------------------------------------- 21 | 22 | STATE_SIZE = length(env._env.state) 23 | ACTION_SIZE = length(env._env.action_space) 24 | MAX_TRAIN_REWARD = env._env.x_threshold * env._env.θ_threshold_radians 25 | SEQ_LEN = 8 26 | 27 | # Optimiser params 28 | η = 3f-2 29 | # ------------------------------ Model Architecture ---------------------------- 30 | sign(x) = Base.sign.(x) 31 | @adjoint sign(x) = sign(x), x̄ -> (x̄,) 32 | 33 | model = Chain(Dense(STATE_SIZE, 24, relu), 34 | Dense(24, 48, relu), 35 | Dense(48, 1, tanh), x->sign(x)) |> gpu 36 | 37 | opt = ADAM(η) 38 | 39 | action(state) = state |> model |> (model_output) -> (3 .+ model_output) / 2 40 | 41 | loss(rewards) = Flux.mse(rewards, MAX_TRAIN_REWARD) 42 | 43 | # ----------------------------- Helper Functions ------------------------------- 44 | 45 | function train_reward(env::EnvWrapper) 46 | s = env._env.state 47 | x, ẋ, θ, θ̇ = s 48 | # Custom reward for training 49 | # Product of Triangular function over x-axis and θ-axis 50 | # Min reward = 0, Max reward = env.x_threshold * env.θ_threshold_radians 51 | x_upper = env._env.x_threshold - x 52 | x_lower = env._env.x_threshold + x 53 | 54 | r_x = max(0f0, min(x_upper, x_lower)) 55 | 56 | θ_upper = env._env.θ_threshold_radians - θ 57 | θ_lower = env._env.θ_threshold_radians + θ 58 | 59 | r_θ = max(0f0, min(θ_upper, θ_lower)) 60 | 61 | return r_x * r_θ 62 | end 63 | 64 | function μEpisode(env::EnvWrapper) 65 | l = 0 66 | for frames ∈ 1:SEQ_LEN 67 | #render(env, ctx) 68 | #sleep(0.01) 69 | a = action(env._env.state) 70 | s′, r, done, _ = step!(env, a) 71 | 72 | if trainable(env) 73 | l += loss(train_reward(env)) 74 | end 75 | game_over(env) && break 76 | end 77 | return l 78 | end 79 | 80 | function episode!(env::EnvWrapper) 81 | reset!(env) 82 | while !game_over(env) 83 | if trainable(env) 84 | grads = gradient(()->μEpisode(env), params(model)) 85 | update!(opt, params(model), grads) 86 | else 87 | μEpisode(env) 88 | end 89 | end 90 | env.total_reward 91 | end 92 | 93 | # -------------------------------- Testing ------------------------------------- 94 | 95 | function test(env::EnvWrapper) 96 | score_mean = 0f0 97 | testmode!(env) 98 | for _=1:100 99 | total_reward = episode!(env) 100 | score_mean += total_reward / 100 101 | end 102 | testmode!(env, false) 103 | return score_mean 104 | end 105 | 106 | # ------------------------------ Training -------------------------------------- 107 | 108 | e = 1 109 | while true 110 | global e 111 | total_reward = @sprintf "%6.2f" episode!(env) 112 | print("Episode: $e | Score: $total_reward | ") 113 | 114 | score_mean = test(env) 115 | score_mean_str = @sprintf "%6.2f" score_mean 116 | print("Mean score over 100 test episodes: " * score_mean_str) 117 | 118 | println() 119 | 120 | if score_mean > env.reward_threshold 121 | println("CartPole-v0 solved!") 122 | break 123 | end 124 | e += 1 125 | end 126 | -------------------------------------------------------------------------------- /contrib/games/differentiable-programming/cartpole/Project.toml: -------------------------------------------------------------------------------- 1 | [deps] 2 | BSON = "fbb218c0-5317-5bc6-957e-2ee96dd4b1f0" 3 | DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" 4 | Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" 5 | Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c" 6 | Gym = "56b9baea-2481-11e9-37ae-75904354ad8c" 7 | IRTools = "7869d1d1-7146-5819-86e3-90919afe41df" 8 | Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" 9 | Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" 10 | Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" 11 | Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" 12 | -------------------------------------------------------------------------------- /contrib/games/differentiable-programming/cartpole/cuda/Project.toml: -------------------------------------------------------------------------------- 1 | [deps] 2 | BSON = "fbb218c0-5317-5bc6-957e-2ee96dd4b1f0" 3 | CuArrays = "3a865a2d-5b23-5a0f-bc46-62713ec82fae" 4 | DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" 5 | Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" 6 | Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c" 7 | Gym = "56b9baea-2481-11e9-37ae-75904354ad8c" 8 | Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" 9 | Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" 10 | Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" 11 | -------------------------------------------------------------------------------- /contrib/games/differentiable-programming/pendulum/DDPG.jl: -------------------------------------------------------------------------------- 1 | using Flux, Gym, Printf, Zygote 2 | using Flux.Tracker: data 3 | using Flux.Optimise: update! 4 | using Statistics: mean 5 | using DataStructures: CircularBuffer 6 | using Distributions: sample 7 | 8 | #using CuArrays 9 | 10 | #Load game environment 11 | 12 | env = make("Pendulum-v0") 13 | reset!(env) 14 | 15 | # ----------------------------- Parameters ------------------------------------- 16 | 17 | STATE_SIZE = length(state(env)) 18 | ACTION_SIZE = 1#length(env.actions) 19 | ACTION_BOUND = env._env.action_space.high[1] 20 | MAX_EP = 50_000 21 | MAX_EP_LENGTH = 200 22 | 23 | BATCH_SIZE = 64 24 | MEM_SIZE = 100_000 25 | MIN_EXP_SIZE = 50_000 26 | 27 | γ = 99f-2 # discount rate 28 | 29 | τ = 1f-3 # for running average while updating target networks 30 | η_act = 1f-4 # Learning rate 31 | η_crit = 1f-3 32 | L2_DECAY = 1f-2 33 | 34 | # Ornstein-Uhlenbeck Noise params 35 | μ = 0f0 36 | θ = 15f-2 37 | σ = 2f-1 38 | 39 | # --------------------------------- Memory ------------------------------------ 40 | 41 | memory = CircularBuffer{Any}(MEM_SIZE) 42 | 43 | function getData(batch_size = BATCH_SIZE) 44 | # Getting data in shape 45 | minibatch = sample(memory, batch_size) 46 | x = hcat(minibatch...) 47 | 48 | s = hcat(x[1, :]...) |> gpu 49 | a = hcat(x[2, :]...) |> gpu 50 | r = hcat(x[3, :]...) |> gpu 51 | s′ = hcat(x[4, :]...) |> gpu 52 | s_mask = .!hcat(x[5, :]...) |> gpu 53 | 54 | return s, a, r, s′, s_mask 55 | end 56 | 57 | # -------------------------------- Action Noise -------------------------------- 58 | 59 | struct OUNoise 60 | μ 61 | θ 62 | σ 63 | X 64 | end 65 | 66 | ou = OUNoise(μ, θ, σ, zeros(Float32, ACTION_SIZE) |> gpu) 67 | 68 | function sample_noise(ou::OUNoise) 69 | dx = ou.θ * (ou.μ .- ou.X) 70 | dx .+= ou.σ * randn(Float32, length(ou.X)) |> gpu 71 | ou.X .+= dx 72 | end 73 | 74 | # Noise scale 75 | τ_ = 25 76 | ϵ = exp(-1f0 / τ_) 77 | noise_scale = 1f0 / ACTION_BOUND 78 | 79 | # ----------------------------- Model Architecture ----------------------------- 80 | 81 | w_init(dims...) = 6f-3rand(Float32, dims...) .- 3f-3 82 | 83 | actor = Chain(Dense(STATE_SIZE, 400, relu), 84 | Dense(400, 300, relu), 85 | Dense(300, ACTION_SIZE, tanh, initW=w_init), 86 | x -> x * ACTION_BOUND) |> gpu 87 | actor_target = deepcopy(actor) 88 | 89 | # Critic model 90 | struct crit 91 | state_crit 92 | act_crit 93 | sa_crit 94 | end 95 | 96 | Flux.@treelike crit 97 | 98 | function (c::crit)(state, action) 99 | s = c.state_crit(state) 100 | a = c.act_crit(action) 101 | c.sa_crit(relu.(s .+ a)) 102 | end 103 | 104 | Base.deepcopy(c::crit) = crit(deepcopy(c.state_crit), 105 | deepcopy(c.act_crit), 106 | deepcopy(c.sa_crit)) 107 | 108 | critic = crit(Chain(Dense(STATE_SIZE, 400, relu), Dense(400, 300)) |> gpu, 109 | Dense(ACTION_SIZE, 300) |> gpu, 110 | Dense(300, 1, initW=w_init) |> gpu) 111 | critic_target = deepcopy(critic) 112 | 113 | # ---------------------- Param Update Functions -------------------------------- 114 | 115 | function update_target!(target, model; τ = 1f0) 116 | for (p_t, p_m) in zip(params(target), params(model)) 117 | p_t.data .= (1f0 - τ) * p_t.data .+ τ * p_m.data 118 | end 119 | end 120 | 121 | function update_model!(model, opt, loss, inp...) 122 | grads = gradient(()->loss(inp...), params(model)) 123 | update!(opt, params(model), grads) 124 | end 125 | 126 | # ---------------------------------- Training ---------------------------------- 127 | ## Losses 128 | function L2_loss(model) 129 | l2_loss = sum(map(p->sum(p.^2), params(model))) 130 | return L2_DECAY * l2_loss 131 | end 132 | 133 | loss_crit(y, s, a) = Flux.mse(critic(s, a), y) #+ L2_loss(critic) 134 | 135 | function loss_act(s) 136 | actions = actor(s) 137 | crit_out = critic(s, actions) 138 | return -sum(crit_out) 139 | end 140 | 141 | ## Optimizers 142 | opt_crit = ADAM(η_crit) 143 | opt_act = ADAM(η_act) 144 | 145 | 146 | function replay() 147 | s, a, r, s′, s_mask = getData() 148 | 149 | a′ = actor_target(s′) 150 | v′ = critic_target(s′, a′) 151 | y = data(r .+ γ * v′ .* s_mask) # set v′ to 0 where s_ is terminal state 152 | 153 | 154 | update_model!(critic, opt_crit, loss_crit, y, s, a) 155 | update_model!(actor, opt_act, loss_act, s) 156 | 157 | # Update Target models 158 | update_target!(actor_target, actor; τ = τ) 159 | update_target!(critic_target, critic; τ = τ) 160 | end 161 | 162 | # ---------------------------- Helper Functions -------------------------------- 163 | 164 | # Stores tuple of state, action, reward, next_state, and done 165 | remember(state, action, reward, next_state, done) = 166 | push!(memory, [data.((state, action, reward, next_state))..., done]) 167 | 168 | # Choose action according to policy PendulumPolicy 169 | function action(state, train=true) 170 | state = reshape(state, size(state)..., 1) 171 | act_pred = actor(state |> gpu) 172 | if train 173 | act_pred = act_pred .+ noise_scale * sample_noise(ou) 174 | end 175 | clamp.(act_pred, -ACTION_BOUND, ACTION_BOUND) # returns action 176 | end 177 | 178 | function episode!(env::EnvWrapper) 179 | reset!(env) 180 | for ep=1:MAX_EP_LENGTH 181 | s = state(env) 182 | a = action(s, trainable(env)) 183 | s′, r, done, _ = step!(env, a) 184 | if trainable(env) 185 | remember(s, a, r, s′, done) 186 | replay() 187 | end 188 | end 189 | env.total_reward 190 | end 191 | 192 | # -------------------------------- Testing ------------------------------------- 193 | 194 | # Returns average score over 100 episodes 195 | 196 | function test(env::EnvWrapper) 197 | score_mean = 0f0 198 | testmode!(env) 199 | for e=1:100 200 | total_reward = episode!(env) 201 | score_mean += total_reward / 100 202 | end 203 | testmode!(env, false) 204 | return score_mean 205 | end 206 | 207 | # ------------------------------ Training -------------------------------------- 208 | 209 | # Populate memory with random actions 210 | 211 | s = reset!(env) 212 | for e=1:MIN_EXP_SIZE 213 | global s 214 | a = 2rand(Float32) * ACTION_BOUND - ACTION_BOUND 215 | s′, r, done, _ = step!(env, a) 216 | remember(s, a, r, s′, done) 217 | s = s′ 218 | end 219 | 220 | for e=1:MAX_EP 221 | total_reward = episode!(env) 222 | total_reward = @sprintf "%9.3f" total_reward 223 | print("Episode: $e | Score: $total_reward | ") 224 | score_mean = test(env) 225 | score_mean = @sprintf "%9.3f" score_mean 226 | println("Mean score over 100 test episodes: $score_mean") 227 | end 228 | -------------------------------------------------------------------------------- /contrib/games/differentiable-programming/pendulum/DiffRL.jl: -------------------------------------------------------------------------------- 1 | using Flux, Gym, Printf, Zygote 2 | using Flux.Optimise: update! 3 | using Statistics: mean 4 | #using CuArrays 5 | 6 | #Load game environment 7 | 8 | env = make("Pendulum-v0") 9 | reset!(env) 10 | # ----------------------------- Parameters ------------------------------------- 11 | 12 | 13 | STATE_SIZE = length(state(env)) # returns state from obs space 14 | ACTION_SIZE = 1#length(env.actions) 15 | ACTION_BOUND = env._env.action_space.high[1] 16 | MAX_REWARD = 0f0 # Max reward in a timestep 17 | MAX_EP = 10 18 | MAX_EP_LENGTH = 1000 19 | SEQ_LEN = 4 20 | 21 | # ------------------------------ Model Architecture ---------------------------- 22 | 23 | model = Chain(Dense(STATE_SIZE, 24, relu), 24 | Dense(24, 48, relu), 25 | Dense(48, ACTION_SIZE)) |> gpu 26 | 27 | η = 3f-2 28 | 29 | opt = ADAM(η) 30 | 31 | loss(r) = Flux.mse(r, MAX_REWARD) 32 | 33 | # ----------------------------- Helper Functions ------------------------------- 34 | 35 | function μEpisode(env::EnvWrapper) 36 | l = 0 37 | for frames ∈ 1:SEQ_LEN 38 | #render(env, ctx) 39 | #sleep(0.01) 40 | a = model(state(env)) 41 | s, r, done, _ = step!(env, a) 42 | if trainable(env) 43 | l += loss(r) 44 | end 45 | 46 | game_over(env) && break 47 | end 48 | return l 49 | end 50 | 51 | 52 | function episode!(env::EnvWrapper) 53 | reset!(env) 54 | while !game_over(env) 55 | if trainable(env) 56 | grads = gradient(()->μEpisode(env), params(model)) 57 | update!(opt, params(model), grads) 58 | else 59 | μEpisode(env) 60 | end 61 | end 62 | 63 | env.total_reward 64 | end 65 | 66 | # -------------------------------- Testing ------------------------------------- 67 | 68 | function test(env::EnvWrapper) 69 | score_mean = 0f0 70 | testmode!(env) 71 | for e=1:100 72 | total_reward = episode!(env) 73 | score_mean += total_reward / 100 74 | end 75 | testmode!(env, false) 76 | return score_mean 77 | end 78 | 79 | # ------------------------------ Training -------------------------------------- 80 | 81 | for e=1:MAX_EP 82 | total_reward = episode!(env) 83 | total_reward = @sprintf "%9.3f" total_reward 84 | print("Episode: $e | Score: $total_reward | ") 85 | score_mean = test(env) 86 | score_mean = @sprintf "%9.3f" score_mean 87 | println("Mean score over 100 test episodes: $score_mean") 88 | end 89 | -------------------------------------------------------------------------------- /contrib/games/differentiable-programming/pendulum/Project.toml: -------------------------------------------------------------------------------- 1 | name = "pendulum" 2 | uuid = "948af14c-2d17-11e9-24e0-0541b265729a" 3 | authors = ["Tejan Karmali "] 4 | version = "0.1.0" 5 | 6 | [deps] 7 | DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" 8 | Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c" 9 | Gym = "56b9baea-2481-11e9-37ae-75904354ad8c" 10 | IRTools = "7869d1d1-7146-5819-86e3-90919afe41df" 11 | Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" 12 | Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" 13 | -------------------------------------------------------------------------------- /contrib/games/differentiable-programming/pendulum/cuda/Project.toml: -------------------------------------------------------------------------------- 1 | name = "pendulum" 2 | uuid = "948af14c-2d17-11e9-24e0-0541b265729a" 3 | authors = ["Tejan Karmali "] 4 | version = "0.1.0" 5 | 6 | [deps] 7 | CuArrays = "3a865a2d-5b23-5a0f-bc46-62713ec82fae" 8 | DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" 9 | Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" 10 | Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c" 11 | Gym = "56b9baea-2481-11e9-37ae-75904354ad8c" 12 | Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" 13 | -------------------------------------------------------------------------------- /contrib/games/differentiable-programming/trebuchet/DiffRL.jl: -------------------------------------------------------------------------------- 1 | using Flux, Trebuchet 2 | using Zygote: forwarddiff 3 | using Statistics: mean 4 | using Random 5 | 6 | lerp(x, lo, hi) = x*(hi-lo)+lo 7 | 8 | function shoot(wind, angle, weight) 9 | Trebuchet.shoot((wind, Trebuchet.deg2rad(angle), weight))[2] 10 | end 11 | 12 | shoot(ps) = forwarddiff(p -> shoot(p...), ps) 13 | 14 | Random.seed!(0) 15 | 16 | model = Chain(Dense(2, 16, σ), 17 | Dense(16, 64, σ), 18 | Dense(64, 16, σ), 19 | Dense(16, 2)) |> f64 20 | 21 | θ = params(model) 22 | 23 | function aim(wind, target) 24 | angle, weight = model([wind, target]) 25 | angle = σ(angle)*90 26 | weight = weight + 200 27 | angle, weight 28 | end 29 | 30 | distance(wind, target) = 31 | shoot(collect([wind, aim(wind, target)...])) 32 | 33 | function loss(wind, target) 34 | (distance(wind, target) - target)^2 35 | end 36 | 37 | DIST = (20, 100) # Maximum target distance 38 | SPEED = 5 # Maximum wind speed 39 | 40 | target() = (randn() * SPEED, lerp(rand(), DIST...)) 41 | 42 | meanloss() = mean(sqrt(loss(target()...)) for i = 1:100) 43 | 44 | opt = ADAM() 45 | 46 | dataset = (target() for i = 1:100_000) 47 | cb = Flux.throttle(() -> @show(meanloss()), 10) 48 | 49 | Flux.train!(loss, θ, dataset, opt, cb = cb) 50 | -------------------------------------------------------------------------------- /contrib/games/differentiable-programming/trebuchet/Project.toml: -------------------------------------------------------------------------------- 1 | [deps] 2 | BSON = "fbb218c0-5317-5bc6-957e-2ee96dd4b1f0" 3 | DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" 4 | Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" 5 | Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c" 6 | Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" 7 | Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" 8 | Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" 9 | Trebuchet = "98b73d46-197d-11e9-11eb-69a6ff759d3a" 10 | WebIO = "0f1e0344-ec1d-5b48-a673-e5cf874b6c29" 11 | Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" 12 | -------------------------------------------------------------------------------- /contrib/games/differentiable-programming/trebuchet/cuda/Project.toml: -------------------------------------------------------------------------------- 1 | [deps] 2 | BSON = "fbb218c0-5317-5bc6-957e-2ee96dd4b1f0" 3 | CuArrays = "3a865a2d-5b23-5a0f-bc46-62713ec82fae" 4 | DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" 5 | Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" 6 | Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c" 7 | Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" 8 | Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" 9 | Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" 10 | Trebuchet = "98b73d46-197d-11e9-11eb-69a6ff759d3a" 11 | WebIO = "0f1e0344-ec1d-5b48-a673-e5cf874b6c29" 12 | -------------------------------------------------------------------------------- /contrib/meta-learning/Project.toml: -------------------------------------------------------------------------------- 1 | [deps] 2 | Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" 3 | Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c" 4 | Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" 5 | Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" 6 | Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" 7 | Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" 8 | -------------------------------------------------------------------------------- /contrib/meta-learning/fomaml_grad.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/contrib/meta-learning/fomaml_grad.png -------------------------------------------------------------------------------- /contrib/meta-learning/linear.jl: -------------------------------------------------------------------------------- 1 | function xavier_uniform(dims...) 2 | bound = sqrt(1 / dims[2]) 3 | return Float32.(rand(Uniform(-bound, bound), dims...)) 4 | end 5 | 6 | struct Linear{F,S,T} 7 | W::S 8 | b::T 9 | σ::F 10 | end 11 | 12 | Linear(W, b) = Linear(W, b, identity) 13 | 14 | function Linear(in::Integer, out::Integer, σ = identity; 15 | initW = xavier_uniform, initb = nothing) 16 | if initb == nothing 17 | bias_bound = 1 / sqrt(in) 18 | initb = (out) -> Float32.(rand(Uniform(-bias_bound, bias_bound), out)) 19 | end 20 | return Linear(param(initW(out, in)), param(initb(out)), σ) 21 | end 22 | 23 | Flux.@treelike Linear 24 | 25 | function (a::Linear)(x::AbstractArray) 26 | W, b, σ = a.W, a.b, a.σ 27 | σ.(W*x .+ b) 28 | end 29 | 30 | function Base.show(io::IO, l::Linear) 31 | print(io, "Linear(", size(l.W, 2), ", ", size(l.W, 1)) 32 | l.σ == identity || print(io, ", ", l.σ) 33 | print(io, ")") 34 | end 35 | -------------------------------------------------------------------------------- /contrib/meta-learning/reptile_grad.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/contrib/meta-learning/reptile_grad.png -------------------------------------------------------------------------------- /contrib/meta-learning/utils.jl: -------------------------------------------------------------------------------- 1 | """ 2 | ``` 3 | eval_model(model, x::AbstractArray, testx::AbstractArray, task=SineWave(); 4 | opt=Descent(1e-2), updates=32) 5 | ``` 6 | Evaluates the `model` on a sine wave `task` training to sample `x` with `updates` 7 | amount of gradient steps using `opt`. 8 | Evaluation loss is calculated based on the mean squared error 9 | between model predictions and sine wave values on `testx`. 10 | """ 11 | function eval_model(model, x::AbstractArray, testx::AbstractArray, task=SineWave(); 12 | opt=Descent(0.02), updates=32) 13 | weights = params(model) 14 | prev_weights = deepcopy(Flux.data.(weights)) 15 | 16 | y = task(x) 17 | testy = task(testx) 18 | init_preds = model(testx') 19 | test_loss = Flux.mse(init_preds, testy') 20 | 21 | test_losses = Float32[] 22 | push!(test_losses, Flux.data(test_loss)) 23 | 24 | print(task, "\n") 25 | @printf("Before finetuning, Loss = %f\n", test_loss) 26 | for i in 1:updates 27 | l = Flux.mse(model(x'), y') 28 | Flux.back!(l) 29 | Flux.Optimise._update_params!(opt, weights) 30 | test_loss = Flux.mse(model(testx'), testy') 31 | push!(test_losses, Flux.data(test_loss)) 32 | @printf("After %d fits, Loss = %f\n", i, test_loss) 33 | end 34 | final_preds = model(testx') 35 | 36 | # reset weights to state before finetune 37 | Flux.loadparams!(model, prev_weights) 38 | 39 | return (x=x, testx=testx, y=y, testy=testy, 40 | initial_predictions=Array(Flux.data(init_preds)'), 41 | final_predictions=Array(Flux.data(final_preds)'), 42 | test_losses=test_losses) 43 | end 44 | 45 | function plot_eval_data(data::NamedTuple, title="") 46 | return plot([data.x, data.testx, data.testx, data.testx], 47 | [data.y, data.testy, data.initial_predictions, data.final_predictions], 48 | line=[:scatter :path :path :path], 49 | label=["Sampled points", "Ground truth", "Before finetune", "After finetune"], 50 | foreground_color_legend=:white, background_color_legend=:transparent, 51 | title=title, 52 | xlim=(-5.5, 5.5)) 53 | end 54 | -------------------------------------------------------------------------------- /other/autoregressive-process/Project.toml: -------------------------------------------------------------------------------- 1 | [deps] 2 | Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c" 3 | Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" 4 | Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" 5 | 6 | [compat] 7 | Flux = "0.13.9, 0.14" 8 | julia = "1.6" 9 | -------------------------------------------------------------------------------- /other/autoregressive-process/README.md: -------------------------------------------------------------------------------- 1 | # Autoregressive Model 2 | 3 | An [autoregressive (AR) process](https://en.wikipedia.org/wiki/Autoregressive_model) is a stochastic process with an autoregressive structure, i.e., past realizations influence its future realizations. 4 | 5 | This model-zoo example illustrates how to use Flux's recurrent layers to model an AR process. 6 | 7 | The example contains the following files: 8 | + [utils.jl](utils.jl): 9 | + `generate_process`: generates an AR process 10 | + `batch_timeseries`: transforms a vector into the proper format for recurrent layers in Flux and allows to batch the time series as required. 11 | 12 | + [model.jl](model.jl): creates and trains the recurrent model to predict the generated AR process. 13 | 14 | ## Example loss 15 | 16 | Running the model with the hyperparameters currently given in the example, we obtain the following train and test losses. We see that the model begins to overfit after around 30 epochs. 17 | 18 | ![loss](loss.png) -------------------------------------------------------------------------------- /other/autoregressive-process/loss.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/other/autoregressive-process/loss.png -------------------------------------------------------------------------------- /other/autoregressive-process/model.jl: -------------------------------------------------------------------------------- 1 | using Flux 2 | using Random 3 | using Statistics 4 | include("utils.jl") 5 | 6 | # Hyperparameters and configuration of AR process 7 | @Base.kwdef mutable struct Args 8 | seed::Int = 72 # Random seed 9 | # AR process parameters 10 | ϕ::Vector{Float32} = [.3f0, .2f0, -.5f0] # AR coefficients (=> AR(3)) 11 | proclen::Int = 750 # Process length 12 | # Recurrent net parameters 13 | dev = cpu # Device: cpu or gpu 14 | opt = ADAM # Optimizer 15 | η::Float64 = 2e-3 # Learning rate 16 | hidden_nodes::Int = 64 # Number of hidden nodes 17 | hidden_layers::Int = 2 # Number of hidden layers 18 | layer = LSTM # Type of layer, should be one of LSTM, GRU, RNN 19 | epochs::Int = 100 # Number of epochs 20 | seqlen::Int = 10 # Sequence length to use as input 21 | seqshift::Int = 10 # Shift between sequences (see utils.jl) 22 | train_ratio::Float64 = .7 # Percentage of data in the train set 23 | verbose::Bool = true # Whether we log the results during training or not 24 | end 25 | 26 | # Creates a model according to the pre-defined hyperparameters `args` 27 | function build_model(args) 28 | Chain( 29 | args.layer(1, args.hidden_nodes), 30 | [args.layer(args.hidden_nodes, args.hidden_nodes) for _ ∈ 1:args.hidden_layers-1]..., 31 | Dense(args.hidden_nodes, 1, identity) 32 | ) |> args.dev 33 | end 34 | 35 | # Creates training and testing samples according to hyperparameters `args` 36 | function generate_train_test_data(args) 37 | # Generate full AR process 38 | data = generate_process(args.ϕ, args.proclen) 39 | # Create input X and output y (series shifted by 1) 40 | X, y = data[1:end-1], data[2:end] 41 | # Split data into training and testing sets 42 | idx = round(Int, args.train_ratio * length(X)) 43 | Xtrain, Xtest = X[1:idx], X[idx+1:end] 44 | ytrain, ytest = y[1:idx], y[idx+1:end] 45 | # Transform data to time series batches and return 46 | map(x -> batch_timeseries(x, args.seqlen, args.seqshift) |> args.dev, 47 | (Xtrain, Xtest, ytrain, ytest)) 48 | end 49 | 50 | function mse_loss(model, x, y) 51 | # Warm up recurrent model on first observation 52 | model(x[1]) 53 | # Compute mean squared error loss on the rest of the sequence 54 | mean(Flux.Losses.mse.([model(xᵢ) for xᵢ ∈ x[2:end]], y[2:end])) 55 | end 56 | 57 | # Trains and outputs the model according to the chosen hyperparameters `args` 58 | function train_model(args) 59 | Random.seed!(args.seed) 60 | # Create recurrent model 61 | model = build_model(args) 62 | # Get data 63 | Xtrain, Xtest, ytrain, ytest = generate_train_test_data(args) 64 | 65 | opt = Flux.setup(args.opt(args.η), model) 66 | # Training loop 67 | for i ∈ 1:args.epochs 68 | Flux.reset!(model) # Reset hidden state of the recurrent model 69 | # Compute the gradients of the loss function 70 | (∇m,) = gradient(model) do m 71 | mse_loss(m, Xtrain, ytrain) 72 | end 73 | Flux.update!(opt, model, ∇m) # Update model parameters 74 | if args.verbose && i % 10 == 0 # Log results every 10 epochs 75 | # Compute loss on train and test set for logging (important: the model must be reset!) 76 | Flux.reset!(model) 77 | train_loss = mse_loss(model, Xtrain, ytrain) 78 | Flux.reset!(model) 79 | test_loss = mse_loss(model, Xtest, ytest) 80 | @info "Epoch $i / $(args.epochs), train loss: $(round(train_loss, digits=3)) | test loss: $(round(test_loss, digits=3))" 81 | end 82 | end 83 | return model 84 | end 85 | 86 | cd(@__DIR__) 87 | 88 | args = Args() # Set up hyperparameters 89 | m = train_model(args) # Train and output model 90 | -------------------------------------------------------------------------------- /other/autoregressive-process/utils.jl: -------------------------------------------------------------------------------- 1 | # Generates an AR(p) process with coefficients `ϕ`. 2 | # `ϕ` should be provided as a vector and it represents the coefficients of the AR model. 3 | # Hence the order of the generated process is equal to the length of `ϕ`. 4 | # `s` indicates the total length of the series to be generated. 5 | function generate_process(ϕ::AbstractVector{Float32}, s::Int) 6 | s > 0 || error("s must be positive") 7 | # Generate white noise 8 | ϵ = randn(Float32, s) 9 | # Initialize time series 10 | X = zeros(Float32, s) 11 | p = length(ϕ) 12 | X[1] = ϵ[1] 13 | # Reverse the order of the coefficients for multiplication later on 14 | ϕ = reverse(ϕ) 15 | # Fill first p observations 16 | for t ∈ 1:p-1 17 | X[t+1] = X[1:t]'ϕ[1:t] + ϵ[t+1] 18 | end 19 | # Compute values iteratively 20 | for t ∈ p+1:s 21 | X[t] = X[t-p:t-1]'ϕ + ϵ[t] 22 | end 23 | X 24 | end 25 | 26 | # Create batches of a time series `X` by splitting the series into 27 | # sequences of length `s`. Each new sequence is shifted by `r` steps. 28 | # When s == r, the series is split into non-overlapping batches. 29 | function batch_timeseries(X, s::Int, r::Int) 30 | r > 0 || error("r must be positive") 31 | # If X is passed in format T×1, reshape it 32 | if isa(X, AbstractVector) 33 | X = permutedims(X) 34 | end 35 | T = size(X, 2) 36 | s ≤ T || error("s cannot be longer than the total series") 37 | # Ensure uniform sequence lengths by dropping the first observations until 38 | # the total sequence length matches a multiple of the batchsize 39 | X = X[:, ((T - s) % r)+1:end] 40 | [X[:, t:r:end-s+t] for t ∈ 1:s] # Output 41 | end 42 | 43 | -------------------------------------------------------------------------------- /other/bitstring-parity/Project.toml: -------------------------------------------------------------------------------- 1 | [deps] 2 | Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c" 3 | Parameters = "d96e819e-fc66-5662-9728-84c9c7592b0a" 4 | Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" 5 | 6 | [compat] 7 | Flux = "0.11.4" 8 | julia = "1.5" 9 | -------------------------------------------------------------------------------- /other/bitstring-parity/README.md: -------------------------------------------------------------------------------- 1 | From https://blog.openai.com/requests-for-research-2/. 2 | 3 | ⭐ Train an LSTM to solve the XOR problem: that is, given a sequence of bits, determine its parity. The LSTM should consume the sequence, one bit at a time, and then output the correct answer at the sequence’s end. Test the two approaches below: 4 | 5 | - Generate a dataset of random 100,000 binary strings of length 50. Train the LSTM; what performance do you get? 6 | - Generate a dataset of random 100,000 binary strings, where the length of each string is independently and randomly chosen between 1 and 50. Train the LSTM. Does it succeed? What explains the difference? 7 | 8 | ## Files 9 | 10 | * [2 bit strings](./xor1.jl) 11 | * [2000 1 to 10 length strings](./xor2.jl) 12 | * [100,000 1 to 50 length strings](./xor3.jl) 13 | -------------------------------------------------------------------------------- /other/bitstring-parity/data.jl: -------------------------------------------------------------------------------- 1 | using Flux: onehot, onehotbatch 2 | using Random 3 | 4 | const alphabet = [false, true] # 0, 1 5 | 6 | parity(x) = reduce(xor, x) 7 | 8 | gendata(n::Int, k::Int) = gendata(n, k:k) 9 | 10 | function gendata(n::Int, k::UnitRange{Int}) 11 | X = bitrand.(rand(k, n)) 12 | return [(onehotbatch(x, alphabet), onehot(y, alphabet)) for (x, y) in zip(X, parity.(X))] 13 | end 14 | -------------------------------------------------------------------------------- /other/bitstring-parity/xor1.jl: -------------------------------------------------------------------------------- 1 | include("data.jl") 2 | using Flux, Statistics 3 | using Flux: onehot, onehotbatch, throttle, logitcrossentropy, reset!, onecold 4 | using Parameters: @with_kw 5 | 6 | @with_kw mutable struct Args 7 | lr::Float64 = 1e-3 # Learning rate 8 | epochs::Int = 20 # Number of epochs for training 9 | train_len::Int = 100 # Length of training data to be generated 10 | val_len::Int = 10 # Length of Validation Data 11 | throttle::Int = 10 # Throttle timeout 12 | end 13 | 14 | function getdata(args) 15 | # Using gendata function defined in data.jl 16 | train = gendata(args.train_len, 2) 17 | val = gendata(args.val_len, 2) 18 | return train, val 19 | end 20 | 21 | function build_model() 22 | scanner = LSTM(length(alphabet), 20) 23 | encoder = Dense(20, length(alphabet)) 24 | return scanner, encoder 25 | end 26 | 27 | function model(x, scanner, encoder) 28 | state = scanner.(x.data)[end] 29 | reset!(scanner) 30 | encoder(state) 31 | end 32 | 33 | function train(; kws...) 34 | # Initialize the parameters 35 | args = Args(; kws...) 36 | 37 | # Load Data 38 | train_data, val_data = getdata(args) 39 | 40 | @info("Constructing Model...") 41 | scanner,encoder = build_model() 42 | 43 | loss(x, y) = logitcrossentropy(model(x, scanner, encoder), y) 44 | batch_loss(data) = mean(loss(d...) for d in data) 45 | 46 | opt = ADAM(args.lr) 47 | ps = params(scanner, encoder) 48 | evalcb = () -> @show batch_loss(val_data) 49 | 50 | @info("Training...") 51 | for i=1:args.epochs 52 | Flux.train!(loss, ps, train_data, opt, cb=throttle(evalcb, args.throttle)) 53 | end 54 | return scanner, encoder 55 | end 56 | 57 | function test(scanner, encoder) 58 | # sanity test 59 | tx = map(c -> onehotbatch(c, alphabet), [ 60 | [false, true], # 01 -> 1 61 | [true, false], # 10 -> 1 62 | [false, false], # 00 -> 0 63 | [true, true]]) # 11 -> 0 64 | @info("Test...") 65 | out = [onecold(model(x, scanner, encoder)) - 1 for x in tx] 66 | input = [[0,1],[1,0],[0,0],[1,1]] 67 | for i in 1:length(tx) 68 | print(input[i]," => ",out[i],"\n") 69 | end 70 | end 71 | 72 | cd(@__DIR__) 73 | scanner, encoder = train() 74 | test(scanner, encoder) 75 | -------------------------------------------------------------------------------- /other/bitstring-parity/xor2.jl: -------------------------------------------------------------------------------- 1 | include("data.jl") 2 | using Flux, Statistics 3 | using Flux: onehot, onehotbatch, throttle, logitcrossentropy, reset!, onecold 4 | using Parameters: @with_kw 5 | 6 | @with_kw mutable struct Args 7 | lr::Float64 = 1e-3 # Learning rate 8 | epochs::Int = 20 # Number of epochs for training 9 | train_len::Int = 2000 # Length of training data to be generated 10 | val_len::Int = 100 # Length of Validation Data 11 | throttle::Int = 10 # Throttle timeout 12 | end 13 | 14 | function getdata(args) 15 | # training data of bit strings from length 2 to 10 16 | train = gendata(args.train_len, 1:10) 17 | # validation data of bit strings of length 10 18 | val = gendata(args.val_len, 10) 19 | return train, val 20 | end 21 | 22 | function build_model() 23 | scanner = LSTM(length(alphabet), 20) 24 | encoder = Dense(20, length(alphabet)) 25 | return scanner, encoder 26 | end 27 | 28 | function model(x, scanner, encoder) 29 | state = scanner.(x.data)[end] 30 | reset!(scanner) 31 | encoder(state) 32 | end 33 | 34 | function train(; kws...) 35 | # Initialize the parameters 36 | args = Args(; kws...) 37 | 38 | # Load Data 39 | train_data, val_data = getdata(args) 40 | 41 | @info("Constructing Model...") 42 | scanner,encoder = build_model() 43 | 44 | loss(x, y) = logitcrossentropy(model(x, scanner, encoder), y) 45 | batch_loss(data) = mean(loss(d...) for d in data) 46 | 47 | opt = ADAM(args.lr) 48 | ps = params(scanner, encoder) 49 | evalcb = () -> @show batch_loss(val_data) 50 | 51 | @info("Training...") 52 | for i=1:args.epochs 53 | Flux.train!(loss, ps, train_data, opt, cb=throttle(evalcb, args.throttle)) 54 | end 55 | 56 | # Try running the model on strings of length 50. 57 | # 58 | # Even though the model has only been trained with 59 | # much shorter strings, it has learned the 60 | # parity function and will accurate on longer strings. 61 | function t50() 62 | l = batch_loss(gendata(1000, 50)) 63 | println("Batch_loss for length 50 string: ", l,"\n") 64 | end 65 | t50() 66 | return scanner, encoder 67 | end 68 | 69 | function test(scanner, encoder) 70 | # sanity test 71 | tx = map(c -> onehotbatch(c, alphabet), [ 72 | [false, true], # 01 -> 1 73 | [true, false], # 10 -> 1 74 | [false, false], # 00 -> 0 75 | [true, true]]) # 11 -> 0 76 | @info("Test...") 77 | out = [onecold(model(x, scanner, encoder)) - 1 for x in tx] 78 | input = [[0,1],[1,0],[0,0],[1,1]] 79 | for i in 1:length(tx) 80 | print(input[i]," => ",out[i],"\n") 81 | end 82 | end 83 | 84 | cd(@__DIR__) 85 | scanner, encoder = train() 86 | test(scanner, encoder) 87 | -------------------------------------------------------------------------------- /other/bitstring-parity/xor3.jl: -------------------------------------------------------------------------------- 1 | include("data.jl") 2 | using Flux, Statistics 3 | using Flux: onehot, onehotbatch, throttle, logitcrossentropy, reset!, onecold 4 | using Parameters: @with_kw 5 | 6 | @with_kw mutable struct Args 7 | lr::Float64 = 1e-3 # Learning rate 8 | epochs::Int = 20 # Number of epochs for training 9 | train_len::Int = 100000 # Length of training data to be generated 10 | val_len::Int = 1000 # Length of Validation Data 11 | throttle::Int = 10 # Throttle timeout 12 | end 13 | 14 | function getdata(args) 15 | # training data of bit strings from length 2 to 50 16 | train = gendata(args.train_len, 1:50) 17 | # validation data of bit strings of length 50 18 | val = gendata(args.val_len, 50) 19 | return train, val 20 | end 21 | 22 | function build_model() 23 | scanner = LSTM(length(alphabet), 20) 24 | encoder = Dense(20, length(alphabet)) 25 | return scanner, encoder 26 | end 27 | 28 | function model(x, scanner, encoder) 29 | state = scanner.(x.data)[end] 30 | reset!(scanner) 31 | encoder(state) 32 | end 33 | 34 | function train(; kws...) 35 | # Initialize the parameters 36 | args = Args(; kws...) 37 | 38 | # Load Data 39 | train_data, val_data = getdata(args) 40 | 41 | @info("Constructing Model...") 42 | scanner,encoder = build_model() 43 | 44 | loss(x, y) = logitcrossentropy(model(x, scanner, encoder), y) 45 | batch_loss(data) = mean(loss(d...) for d in data) 46 | 47 | opt = ADAM(args.lr) 48 | ps = params(scanner, encoder) 49 | evalcb = () -> @show batch_loss(val_data) 50 | 51 | @info("Training...") 52 | for i=1:args.epochs 53 | Flux.train!(loss, ps, train_data, opt, cb=throttle(evalcb, args.throttle)) 54 | end 55 | 56 | return scanner, encoder 57 | end 58 | 59 | function test(scanner, encoder) 60 | # sanity test 61 | tx = map(c -> onehotbatch(c, alphabet), [ 62 | [false, true], # 01 -> 1 63 | [true, false], # 10 -> 1 64 | [false, false], # 00 -> 0 65 | [true, true]]) # 11 -> 0 66 | @info("Test...") 67 | out = [onecold(model(x, scanner, encoder)) - 1 for x in tx] 68 | input = [[0,1],[1,0],[0,0],[1,1]] 69 | for i in 1:length(tx) 70 | print(input[i]," => ",out[i],"\n") 71 | end 72 | end 73 | 74 | cd(@__DIR__) 75 | scanner, encoder = train() 76 | test(scanner, encoder) 77 | -------------------------------------------------------------------------------- /other/fizzbuzz/Project.toml: -------------------------------------------------------------------------------- 1 | [deps] 2 | Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c" 3 | Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" 4 | 5 | [compat] 6 | Flux = "0.13.9, 0.14" 7 | julia = "1.6" 8 | -------------------------------------------------------------------------------- /other/fizzbuzz/fizzbuzz.jl: -------------------------------------------------------------------------------- 1 | # Inspired by "Fizz Buzz in Tensorflow" blog by Joel Grus 2 | # http://joelgrus.com/2016/05/23/fizz-buzz-in-tensorflow/ 3 | 4 | using Flux 5 | using Flux: onehotbatch, train!, setup, logitcrossentropy 6 | using Test 7 | 8 | # Data preparation 9 | function fizzbuzz(x::Int) 10 | is_divisible_by_three = x % 3 == 0 11 | is_divisible_by_five = x % 5 == 0 12 | 13 | if is_divisible_by_three & is_divisible_by_five 14 | return "fizzbuzz" 15 | elseif is_divisible_by_three 16 | return "fizz" 17 | elseif is_divisible_by_five 18 | return "buzz" 19 | else 20 | return "else" 21 | end 22 | end 23 | 24 | const LABELS = ("fizz", "buzz", "fizzbuzz", "else"); 25 | 26 | # Feature engineering 27 | features(x) = float.([x % 3, x % 5, x % 15]) 28 | features(x::AbstractArray) = reduce(hcat, features.(x)) 29 | 30 | function getdata() 31 | 32 | @test fizzbuzz.((3, 5, 15, 98)) == LABELS 33 | 34 | raw_x = 1:100; 35 | raw_y = fizzbuzz.(raw_x); 36 | 37 | X = features(raw_x); 38 | y = onehotbatch(raw_y, LABELS); 39 | return X, y 40 | end 41 | 42 | function train(; epochs::Int=500, dim::Int=20, eta::Real=0.001) 43 | 44 | # Get Data 45 | X, y = getdata() 46 | 47 | # Model 48 | m = Chain(Dense(3 => dim, relu), Dense(dim => 4)) 49 | loss(m, x, y) = logitcrossentropy(m(x), y) 50 | 51 | # Helpers 52 | deepbuzz(x) = (a = argmax(m(features(x))); a == 4 ? x : LABELS[a]) 53 | 54 | function monitor(e) 55 | print("epoch $(lpad(e, 4)): loss = $(round(loss(m,X,y); digits=4)) | ") 56 | @show deepbuzz.([3, 5, 15, 98]) 57 | end 58 | 59 | # Training 60 | opt = setup(Adam(eta), m) 61 | for e in 0:epochs 62 | if e % 50 == 0 63 | monitor(e) 64 | end 65 | train!(loss, m, [(X, y)], opt) 66 | end 67 | 68 | return m 69 | end 70 | 71 | train() 72 | -------------------------------------------------------------------------------- /other/flux-next/Project.toml: -------------------------------------------------------------------------------- 1 | [deps] 2 | Colors = "5ae59095-9a9b-59fe-a467-6f913c188581" 3 | Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c" 4 | Images = "916415d5-f1e6-5110-898d-aaa5f9f070e0" 5 | Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" 6 | Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" 7 | -------------------------------------------------------------------------------- /other/flux-next/intro.jl: -------------------------------------------------------------------------------- 1 | # Flux vNext 2 | # ========== 3 | 4 | # Optimisation Basics 5 | # ------------------- 6 | 7 | using Flux 8 | using Flux: step! 9 | 10 | # Using Flux is very simple. You write a program, and we'll make tweaks to that 11 | # program so that it gets gradually better. 12 | 13 | # What does it mean to make a program "better"? That's up to you – your program 14 | # returns a score, called the "loss", which determines how well the program 15 | # is doing. Flux's job is to minimise that score. For example, let's take the 16 | # simplest possible program, one that simply returns a constant. 17 | 18 | w = 1 19 | loss = () -> w 20 | 21 | loss() 22 | 23 | # This program doesn't look very interesting, but we can still do something 24 | # interesting with it. The core function that optimises programs for us is 25 | # `step!`. We have to pass `step!` an *optimiser* called `Descent`; this basically 26 | # tells Flux how agressive to be, but we'll talk more about that later. 27 | 28 | opt = Descent(1) 29 | step!(loss, opt) 30 | 31 | # `step!` returns the same loss as before, `1`. But something more interesting 32 | # has happened; try running `loss()` again. 33 | 34 | loss() 35 | 36 | # It went down! And if we keep calling `step!` in a loop, it'll keep going down. 37 | 38 | for i = 1:10 39 | @show step!(loss, opt) 40 | end 41 | 42 | loss() 43 | 44 | # Of course, this case is pretty easy: we can always improve the parameter by 45 | # making `w` less. 46 | 47 | w 48 | 49 | # Here's something harder; now our `loss` is always positive, so it can't keep 50 | # improving indefinitely. Things will stop improving once we hit the *minimum* 51 | # of this function (which we happen to know is at $w = 0$, where $loss = 0$.) 52 | 53 | w = 1 54 | loss = () -> w^2 55 | 56 | opt = Descent(0.2) 57 | for i = 1:10 58 | @show step!(loss, opt) 59 | end 60 | 61 | w 62 | 63 | # You can see that our loss gradually tends towards $0$, and so does $w$. Note, 64 | # however, that Flux will never say: "Ok, we're done here, here's the best value 65 | # for $w$." Though there are tools that can do this in simple cases, Flux is 66 | # designed to scale to extremely complex problems where this is no longer 67 | # possible. So we only make tweaks and it's up to you when to finish. 68 | 69 | # Let's put these ideas towards something a little more interesting. Say we want 70 | # to solve $5x = 10$, to find an $x$ that makes this true. What's our program? 71 | # Well, to start with we want to take $f(x) = 5x$. Then our loss should be something like 72 | # $f(x) - 10$, so that it measures how far the $f(x)$ is from where we want it to 73 | # be. This doesn't quite work, however, since the loss will be low (negative) if 74 | # $f(x)$ is `-Inf`! So we can use our squaring trick again here, to make 75 | # sure that $f(x) - 10$ tends to zero. 76 | 77 | x = 1 # Our initial guess 78 | f = x -> 5x 79 | 80 | opt = Descent(0.01) 81 | 82 | for i = 1:10 83 | l = step!(opt) do 84 | (f(x) - 10)^2 85 | end 86 | @show l 87 | end 88 | 89 | # Our loss ended up being pretty low. How's our function looking? 90 | 91 | 5x 92 | 93 | # That looks pretty good. So we're beginning to be able to use Flux to solve 94 | # problems where we know what the *output* should look like, but we're not 95 | # sure what the *input* should be to get there. 96 | 97 | # You now arguably understand everything you need to do productive ML. But let's 98 | # look over a few more examples to see how it looks in practice. 99 | 100 | # Optimising Colours 101 | # ------------------ 102 | 103 | # Just like Julia more generally, Flux has good support for custom types. 104 | # This means we can carry out optimisation on things like colours! 105 | 106 | # This example uses the excellent Colors.jl. Colors contains, among other 107 | # things, a `colordiff` function which uses fancy colour theory algorithms to 108 | # estimate the *perceptual* difference between two colours. We can use this 109 | # directly in our loss function. 110 | 111 | using Colors 112 | 113 | target = RGB(1, 0, 0) 114 | colour = RGB(1, 1, 1) 115 | [target, colour] 116 | #- 117 | opt = Descent(0.01) 118 | 119 | for i = 1:10 120 | step!(opt, target) do y 121 | colordiff(colour, y) 122 | end 123 | end 124 | 125 | [colour, target] 126 | 127 | # `colour` started out white and is now red. That makes sense, as we've 128 | # minimised the distance between the two colours. But we can also *maximize* 129 | # with a simple minus sign. 130 | 131 | colour1 = RGB(1, 1, 1) 132 | 133 | for i = 1:10 134 | step!(opt, target) do y 135 | -colordiff(colour1, y) 136 | end 137 | end 138 | 139 | [colour1, target] 140 | 141 | # Now we have green, a colour that's arguably very different from red. However, 142 | # there's a subtlety here; notice what happens if we use a different colour as 143 | # our starting point. 144 | 145 | colour2 = RGB(0, 0, 1) 146 | 147 | for i = 1:10 148 | step!(opt, target) do y 149 | -colordiff(colour2, y) 150 | end 151 | end 152 | 153 | [colour2, target] 154 | 155 | # Now we have a dark blue! If we look directly at `colourdiff` we'll see that 156 | # green is better. 157 | 158 | colordiff(target, colour1), colordiff(target, colour2) 159 | 160 | # So why do we get blue here? This is another case where it's important that 161 | # Flux optimises programs through a series of small tweaks. In this case, even 162 | # though green is better overall, making our colour slightly more green actually 163 | # makes our score worse temporarily. 164 | 165 | colordiff(target, RGB(0, 0, 0.4)), colordiff(target, RGB(0, 0.1, 0.4)) 166 | 167 | # This is known as a *local optimimum*. It's important to understand how Flux 168 | # optimises programs and what this means for you, so we'll cover this in more 169 | # detail in future. 170 | 171 | # Keras in 5 lines 172 | # ----------------- 173 | 174 | # [Working on making this an MNIST demo, but here's the gist of it.] 175 | 176 | # Dummy data. 177 | 178 | x = rand(10) 179 | y = [1, 0] 180 | 181 | # Logistic regresion. 182 | 183 | using Flux: crossentropy 184 | 185 | W = randn(2, 10) 186 | b = zeros(2) 187 | 188 | predict = x -> softmax(W * x .+ b) 189 | 190 | opt = Descent(0.1) 191 | 192 | loss = (x, y) -> crossentropy(predict(x), y) 193 | 194 | step!(loss, opt, x, y) 195 | 196 | # Multi-layer perceptron. 197 | 198 | function dense(in, out, σ = identity) 199 | W = randn(out, in) 200 | b = zeros(out) 201 | x -> σ.(W * x .+ b) 202 | end 203 | 204 | chain(fs...) = x -> foldl((x, m) -> m(x), fs, init = x) 205 | 206 | model = chain(dense(10, 5, relu), dense(5, 2), softmax) 207 | 208 | # Doesn't quite work yet. 209 | 210 | ## step!(opt, x, y) do x, y 211 | ## crossentropy(model(x), y) 212 | ## end 213 | -------------------------------------------------------------------------------- /other/housing/.gitignore: -------------------------------------------------------------------------------- 1 | housing.data 2 | -------------------------------------------------------------------------------- /other/housing/Project.toml: -------------------------------------------------------------------------------- 1 | [deps] 2 | DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab" 3 | Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c" 4 | Parameters = "d96e819e-fc66-5662-9728-84c9c7592b0a" 5 | Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" 6 | 7 | [compat] 8 | Flux = "0.11.4" 9 | julia = "1.5" 10 | -------------------------------------------------------------------------------- /other/housing/README.md: -------------------------------------------------------------------------------- 1 | # Housing data example 2 | 3 | ![singleneuron](img/singleneuron.svg) 4 | 5 | Source: [Dive into Deep Learning](http://d2l.ai/chapter_linear-networks/linear-regression.html#from-linear-regression-to-deep-networks) 6 | 7 | ## Model Info 8 | 9 | In this example, we create a linear regression model that predicts housing data. It replicates the housing data example from the [Knet.jl readme](https://github.com/denizyuret/Knet.jl). Although we could have reused more of Flux (see the MNIST example), the library's abstractions are very lightweight and don't force you into any particular strategy. 10 | 11 | A linear model can be created as a neural network with a single layer. The number of inputs is the same as the features that the data has. Each input is connected to a single output with no activation function. Then, the output of the model is a linear function that predicts unseen data. 12 | 13 | ## Training 14 | To run this example: 15 | 16 | ```script 17 | cd other/housing/ 18 | julia --project housing.jl 19 | ``` 20 | 21 | ## Reference 22 | 23 | * [Aston Zhang, Zachary C. Lipton, Mu Li and Alexander J. Smola, "Dive into Deep Learning", 2022](http://d2l.ai/chapter_linear-networks/linear-regression.html#from-linear-regression-to-deep-networks) 24 | -------------------------------------------------------------------------------- /other/housing/housing.jl: -------------------------------------------------------------------------------- 1 | # # Housing data 2 | 3 | # In this example, we create a linear regression model that predicts housing data. 4 | # It replicates the housing data example from the [Knet.jl readme](https://github.com/denizyuret/Knet.jl). 5 | # Although we could have reused more of Flux (see the MNIST example), the library's abstractions are very 6 | # lightweight and don't force you into any particular strategy. 7 | 8 | # A linear model can be created as a neural network with a single layer. 9 | # The number of inputs is the same as the features that the data has. 10 | # Each input is connected to a single output with no activation function. 11 | # Then, the output of the model is a linear function that predicts unseen data. 12 | 13 | # ![singleneuron](img/singleneuron.svg) 14 | 15 | # Source: [Dive into Deep Learning](http://d2l.ai/chapter_linear-networks/linear-regression.html#from-linear-regression-to-deep-networks) 16 | 17 | # To run this example, we need the following packages: 18 | 19 | using Flux 20 | using Flux: gradient 21 | using Flux.Optimise: update! 22 | using DelimitedFiles, Statistics 23 | using Parameters: @with_kw 24 | 25 | 26 | # We set default values for the learning rate (for the training routine) and the percentage of 27 | # the data that we use when testing the model: 28 | 29 | @with_kw mutable struct Hyperparams 30 | ## Learning rate 31 | lr::Float64 = 0.1 32 | ## Train Test split ratio, define percentage of data to be used as Test data 33 | split_ratio::Float64 = 0.1 34 | end 35 | 36 | 37 | # ## Data 38 | 39 | # We create the function `get_processed_data` to load the housing data, normalize it, 40 | # and finally split it into train and test datasets: 41 | 42 | 43 | function get_processed_data(args) 44 | isfile("housing.data") || 45 | download("https://raw.githubusercontent.com/MikeInnes/notebooks/master/housing.data", 46 | "housing.data") 47 | 48 | rawdata = readdlm("housing.data")' 49 | 50 | ## The last feature is our target -- the price of the house. 51 | split_ratio = args.split_ratio ## For the train test split 52 | 53 | x = rawdata[1:13,:] 54 | y = rawdata[14:14,:] 55 | 56 | ## Normalise the data 57 | x = (x .- mean(x, dims = 2)) ./ std(x, dims = 2) 58 | 59 | ## Split into train and test sets 60 | split_index = floor(Int,size(x,2)*split_ratio) 61 | x_train = x[:,1:split_index] 62 | y_train = y[:,1:split_index] 63 | x_test = x[:,split_index+1:size(x,2)] 64 | y_test = y[:,split_index+1:size(x,2)] 65 | 66 | train_data = (x_train, y_train) 67 | test_data = (x_test, y_test) 68 | 69 | return train_data,test_data 70 | end 71 | 72 | # This function performs the following tasks: 73 | 74 | # 1. Downloads the housing data. The original size of the data is 505 rows and 14 columns. 75 | # 2. Loads the data as a 14x505 matrix. This is the shape that Flux expects. 76 | # 3. Splits the data into features and a target. Notice that the 14th row corresponds to the target for each example. 77 | # 4. Normalizes the data. For more information on normalizing data, see [How to Use StandardScaler and MinMaxScaler Transforms in Python](https://machinelearningmastery.com/standardscaler-and-minmaxscaler-transforms-in-python/). 78 | # 5. Splits the data into train and test datasets. 79 | 80 | 81 | # ## Model 82 | # We use a struct to define the model’s parameters. 83 | # It contains an array for holding the weights *W* and a vector for the bias term *b*: 84 | 85 | mutable struct model 86 | W::AbstractArray 87 | b::AbstractVector 88 | end 89 | 90 | # Also, we create the function `predict` to compute the model’s output: 91 | 92 | predict(x, m) = m.W*x .+ m.b 93 | 94 | # Notice that the function `predict` takes as an argument the model struct we defined above. 95 | 96 | # ## Loss function 97 | 98 | # The most commonly used loss function for Linear Regression is Mean Squared Error (MSE). 99 | # We define the MSE function as: 100 | 101 | meansquarederror(ŷ, y) = sum((ŷ .- y).^2)/size(y, 2) 102 | 103 | # **Note:** An implementation of the MSE function is also available in 104 | # [Flux](https://fluxml.ai/Flux.jl/stable/models/losses/#Flux.Losses.mse). 105 | 106 | # ## Train function 107 | # Finally, we define the `train` function so that the model learns the best parameters (*W* and *b*): 108 | 109 | 110 | function train(; kws...) 111 | ## Initialize the Hyperparamters 112 | args = Hyperparams(; kws...) 113 | 114 | ## Load the data 115 | (x_train,y_train),(x_test,y_test) = get_processed_data(args) 116 | 117 | ## The model 118 | m = model((randn(1,13)),[0.]) 119 | 120 | loss(x, y) = meansquarederror(predict(x, m), y) 121 | 122 | ## Training 123 | η = args.lr 124 | θ = params(m.W, m.b) 125 | 126 | for i = 1:500 127 | g = gradient(() -> loss(x_train, y_train), θ) 128 | for x in θ 129 | update!(x, g[x]*η) 130 | end 131 | if i%100==0 132 | @show loss(x_train, y_train) 133 | end 134 | end 135 | 136 | ## Predict the RMSE on the test set 137 | err = meansquarederror(predict(x_test, m),y_test) 138 | println(err) 139 | end 140 | 141 | # The function above initializes the model’s parameters *W* and *b* randomly. 142 | # Then, it sets the learning rate η and θ as a 143 | # [params object](https://fluxml.ai/Flux.jl/stable/training/training/#Flux.params) 144 | # that points to W and b. Also, it sets a 145 | # [custom training loop](https://fluxml.ai/Flux.jl/stable/training/training/#Custom-Training-loops) 146 | # which is the [Gradient descent algorithm](https://en.wikipedia.org/wiki/Gradient_descent). 147 | # Finally, it computes the MSE for the test set. 148 | 149 | # ## Run the example 150 | # We call the `train` function to run the Housing data example: 151 | 152 | cd(@__DIR__) 153 | train() 154 | -------------------------------------------------------------------------------- /other/iris/Project.toml: -------------------------------------------------------------------------------- 1 | [deps] 2 | DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" 3 | Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c" 4 | MLDatasets = "eb30cadb-4394-5ae3-aed4-317e484a6458" 5 | Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" 6 | 7 | [compat] 8 | DataFrames = "1.4.3" 9 | Flux = "0.13.9, 0.14" 10 | MLDatasets = "0.7.6" 11 | julia = "1.6" 12 | -------------------------------------------------------------------------------- /other/iris/README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | # Use Flux to do Logistic Regression on the Iris dataset 4 | 5 | This is a very simple model, with a single layer that outputs to softmax. 6 | 7 | Logistic regression can basically be thought of as a [single layer neural network](https://sebastianraschka.com/faq/docs/logisticregr-neuralnet.html). 8 | 9 | ## Data Source 10 | 11 | The data source is Fisher's classic dataset, retrieved from the [UCI Machine Learning Repository](https://archive.ics.uci.edu/ml/datasets/iris). 12 | 13 | ## Usage 14 | 15 | `cd` into `model-zoo/other/iris`, start the Julia REPL and instantiate the environment: 16 | 17 | ```julia 18 | 19 | julia> using Pkg; Pkg.activate("."); Pkg.instantiate() 20 | 21 | ``` 22 | 23 | Then train and evaluate the model: 24 | 25 | ```julia 26 | 27 | julia> include("iris.jl") 28 | Starting training. 29 | 30 | Accuracy: 0.94 31 | 32 | Confusion Matrix: 33 | 34 | 3×3 Array{Int64,2}: 35 | 16 0 0 36 | 0 16 1 37 | 0 2 15 38 | 39 | julia> 40 | 41 | ``` 42 | -------------------------------------------------------------------------------- /other/iris/iris.jl: -------------------------------------------------------------------------------- 1 | # # Iris data 2 | 3 | # In this example, we create a logistic regression model that classifies iris flowers. 4 | # It consists of a [single-layer neural network](https://sebastianraschka.com/faq/docs/logisticregr-neuralnet.html) 5 | # that outputs **three** probabilities (one for each species of iris flowers). 6 | # We use Fisher's classic dataset to train the model. This dataset is retrieved from 7 | # the [UCI Machine Learning Repository](https://archive.ics.uci.edu/ml/datasets/iris). 8 | 9 | # In Machine Learning, a classification task can be performed by a logistic regression model. 10 | # However, we also can create a logistic regression model as a single-layer neural network. 11 | # This neural network has the following characteristics: 12 | 13 | # * Uses the [logitcrossentropy](https://fluxml.ai/Flux.jl/stable/models/losses/#Flux.Losses.logitcrossentropy) loss function. 14 | # * Expects the class labels of the iris flowers encoded using [One-Hot encoding](https://fluxml.ai/Flux.jl/stable/data/onehot/#One-Hot-Encoding). 15 | # * Outputs the index in the output vector with the highest value as the class label using 16 | # [onecold](https://fluxml.ai/Flux.jl/stable/data/onehot/#Flux.onecold) which is the inverse operation of One-Hot encoding. 17 | 18 | # To run this example, we need the following packages: 19 | 20 | # Suggested in the documentation readme, but uncomment if installation of packages is needed 21 | # import Pkg 22 | # Pkg.activate(".") # activate in the folder of iris 23 | # Pkg.instantiate() # installs required packages for the example 24 | 25 | using Flux, MLDatasets, DataFrames 26 | using Flux: logitcrossentropy, normalise, onecold, onehotbatch 27 | using Statistics: mean 28 | 29 | # We set default values for the learning rate *lr* (for the training routine) and the number of 30 | # times that we repeat the train data (more information below): 31 | 32 | Base.@kwdef mutable struct Args 33 | lr::Float64 = 0.5 34 | repeat::Int = 110 35 | end 36 | 37 | # ## Data 38 | 39 | # We create the function `get_processed_data` to load the iris data, preprocess 40 | # it (normalize and One-Hot encode the class labels), and split it into train and test datasets. 41 | 42 | 43 | function get_processed_data(args::Args) 44 | 45 | iris = Iris(as_df=false) 46 | labels = iris.targets |> vec 47 | features = iris.features 48 | 49 | ## Subract mean, divide by std dev for normed mean of 0 and std dev of 1. 50 | normed_features = normalise(features, dims=2) 51 | 52 | klasses = sort(unique(labels)) 53 | onehot_labels = onehotbatch(labels, klasses) 54 | 55 | ## Split into training and test sets, 2/3 for training, 1/3 for test. 56 | train_indices = [1:3:150 ; 2:3:150] 57 | 58 | X_train = normed_features[:, train_indices] 59 | y_train = onehot_labels[:, train_indices] 60 | 61 | X_test = normed_features[:, 3:3:150] 62 | y_test = onehot_labels[:, 3:3:150] 63 | 64 | ## Repeat the data `args.repeat` times 65 | train_data = Iterators.repeated((X_train, y_train), args.repeat) 66 | test_data = (X_test,y_test) 67 | 68 | return train_data, test_data 69 | end 70 | 71 | # The iris data is a 4×150 matrix. This means that the iris data has 150 examples, 72 | # and each example has four features as well as a class label. 73 | # After normalizing and encoding the data, the `get_processed_data` function divides it into train and test data. 74 | # Also, it repeats the examples in the train data so that we have more data to train the neural network. 75 | 76 | 77 | # ## Metrics 78 | 79 | # We use two functions to assess the output of the model: `accuracy` and `confusion matrix`. 80 | # The [accuracy function](https://developers.google.com/machine-learning/crash-course/classification/accuracy) 81 | # measures the percentage of the labels that the model classified correctly. 82 | # On the other hand, the [confusion matrix](https://machinelearningmastery.com/confusion-matrix-machine-learning/) 83 | # is a table that summarises how good the model is for predicting data. 84 | 85 | 86 | accuracy(model, x, y) = mean(onecold(model(x)) .== onecold(y)) 87 | 88 | 89 | function confusion_matrix(model, X, y) 90 | ŷ = onehotbatch(onecold(model(X)), 1:3) 91 | y * transpose(ŷ) 92 | end 93 | 94 | # ## Train function 95 | 96 | # We define the `train` function that defines the model and trains it: 97 | 98 | function train(; kws...) 99 | ## Initialize hyperparameter arguments 100 | args = Args(; kws...) 101 | 102 | ## Load processed data 103 | train_data, test_data = get_processed_data(args) 104 | 105 | ## #Declare model taking 4 features as inputs and outputting 3 probabiltiies, 106 | ## one for each species of iris. 107 | model = Chain(Dense(4, 3)) 108 | 109 | ## Define loss function to be used in training 110 | ## For numerical stability, we use here logitcrossentropy 111 | loss(m, x, y) = logitcrossentropy(m(x), y) 112 | 113 | ## Training 114 | ## Gradient descent optimiser with learning rate `args.lr` 115 | optimiser = Descent(args.lr) 116 | ## For any other optimiser, we would need e.g. 117 | ## opt_state = Flux.setup(Momentum(args.lr), model) 118 | 119 | println("Starting training.") 120 | Flux.train!(loss, model, train_data, optimiser) 121 | 122 | return model, test_data 123 | end 124 | 125 | # The function above loads the train and test data. 126 | # Then, it creates the model as a single-layer network that expects as an input 127 | # a four-element vector (features) and outputs a three-element vector 128 | # (the number of classes of species of iris flowers). 129 | # Also, it sets [logitcrossentropy](https://fluxml.ai/Flux.jl/stable/models/losses/#Flux.Losses.logitcrossentropy) 130 | # as the loss function and the Gradient descent optimiser 131 | # [Descent](https://fluxml.ai/Flux.jl/stable/training/optimisers/#Flux.Optimise.Descent). 132 | # Finally, it runs a training step with the 133 | # [train! function](https://fluxml.ai/Flux.jl/stable/training/training/#Flux.Optimise.train!). 134 | 135 | # ## Test function 136 | 137 | # After training the model, we define the `test` function that 138 | # computes the model performance on the test data. 139 | # It calls the `accuracy` function and displays the confusion matrix. 140 | # This function validates that the model should achieve at least a 0.8 accuracy score. 141 | 142 | 143 | function test(model, test) 144 | ## Testing model performance on test data 145 | X_test, y_test = test 146 | accuracy_score = accuracy(model, X_test, y_test) 147 | 148 | println("\nAccuracy: $accuracy_score") 149 | 150 | ## Sanity check. 151 | @assert accuracy_score > 0.8 152 | 153 | ## To avoid confusion, here is the definition of a 154 | ## Confusion Matrix: https://en.wikipedia.org/wiki/Confusion_matrix 155 | println("\nConfusion Matrix:\n") 156 | display(confusion_matrix(model, X_test, y_test)) 157 | end 158 | 159 | # ## Run the example 160 | 161 | # We call the `train` function to run the iris data example and compute the model performance: 162 | 163 | cd(@__DIR__) 164 | model, test_data = train() 165 | test(model, test_data) 166 | -------------------------------------------------------------------------------- /script/Manifest.toml: -------------------------------------------------------------------------------- 1 | # This file is machine-generated - editing it directly is not advised 2 | 3 | [[Base64]] 4 | uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f" 5 | 6 | [[BinaryProvider]] 7 | deps = ["Libdl", "SHA"] 8 | git-tree-sha1 = "5b08ed6036d9d3f0ee6369410b830f8873d4024c" 9 | uuid = "b99e7846-7c00-51b0-8f62-c81ae34c0232" 10 | version = "0.5.8" 11 | 12 | [[Compat]] 13 | deps = ["Base64", "Dates", "DelimitedFiles", "Distributed", "InteractiveUtils", "LibGit2", "Libdl", "LinearAlgebra", "Markdown", "Mmap", "Pkg", "Printf", "REPL", "Random", "Serialization", "SharedArrays", "Sockets", "SparseArrays", "Statistics", "Test", "UUIDs", "Unicode"] 14 | git-tree-sha1 = "ed2c4abadf84c53d9e58510b5fc48912c2336fbb" 15 | uuid = "34da2185-b29b-5c13-b0c7-acf172513d20" 16 | version = "2.2.0" 17 | 18 | [[Dates]] 19 | deps = ["Printf"] 20 | uuid = "ade2ca70-3891-5945-98fb-dc099432e06a" 21 | 22 | [[DelimitedFiles]] 23 | deps = ["Mmap"] 24 | uuid = "8bb1440f-4735-579b-a4ab-409b98df4dab" 25 | 26 | [[Distributed]] 27 | deps = ["Random", "Serialization", "Sockets"] 28 | uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b" 29 | 30 | [[FluxBot]] 31 | deps = ["GitHub", "Glob", "Pkg", "Sockets"] 32 | git-tree-sha1 = "74ace680acd73d83cea4f98fe4be087a48e9a278" 33 | repo-rev = "master" 34 | repo-url = "https://github.com/dhairyagandhi96/FluxBot.jl.git" 35 | uuid = "352bd040-0f98-11ea-1faf-6f930ca83554" 36 | version = "0.1.0" 37 | 38 | [[GitHub]] 39 | deps = ["Base64", "Dates", "HTTP", "JSON", "MbedTLS", "Sockets"] 40 | git-tree-sha1 = "f8f9c05004861b6680c1bd363e7e2fcff602a283" 41 | uuid = "bc5e4493-9b4d-5f90-b8aa-2b2bcaad7a26" 42 | version = "5.1.4" 43 | 44 | [[Glob]] 45 | deps = ["Compat", "Test"] 46 | git-tree-sha1 = "c72f1fcb7d17426de1e8af2e948dfb3de1116eed" 47 | uuid = "c27321d9-0574-5035-807b-f59d2c89b15c" 48 | version = "1.2.0" 49 | 50 | [[HTTP]] 51 | deps = ["Base64", "Dates", "IniFile", "MbedTLS", "Sockets"] 52 | git-tree-sha1 = "5c49dab19938b119fe204fd7d7e8e174f4e9c68b" 53 | uuid = "cd3eb016-35fb-5094-929b-558a96fad6f3" 54 | version = "0.8.8" 55 | 56 | [[IniFile]] 57 | deps = ["Test"] 58 | git-tree-sha1 = "098e4d2c533924c921f9f9847274f2ad89e018b8" 59 | uuid = "83e8ac13-25f8-5344-8a64-a9f2b223428f" 60 | version = "0.5.0" 61 | 62 | [[InteractiveUtils]] 63 | deps = ["Markdown"] 64 | uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240" 65 | 66 | [[JSON]] 67 | deps = ["Dates", "Mmap", "Parsers", "Unicode"] 68 | git-tree-sha1 = "b34d7cef7b337321e97d22242c3c2b91f476748e" 69 | uuid = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" 70 | version = "0.21.0" 71 | 72 | [[LibGit2]] 73 | uuid = "76f85450-5226-5b5a-8eaa-529ad045b433" 74 | 75 | [[Libdl]] 76 | uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb" 77 | 78 | [[LinearAlgebra]] 79 | deps = ["Libdl"] 80 | uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" 81 | 82 | [[Literate]] 83 | deps = ["Base64", "JSON", "REPL", "Test"] 84 | git-tree-sha1 = "71cdca07ffe1731c0ccbef801913e4ab8f9aac2b" 85 | pinned = true 86 | uuid = "98b081ad-f1c9-55d3-8b20-4c87d4299306" 87 | version = "1.0.2" 88 | 89 | [[Logging]] 90 | uuid = "56ddb016-857b-54e1-b83d-db4d58db5568" 91 | 92 | [[Markdown]] 93 | deps = ["Base64"] 94 | uuid = "d6f4376e-aef5-505a-96c1-9c027394607a" 95 | 96 | [[MbedTLS]] 97 | deps = ["BinaryProvider", "Dates", "Distributed", "Libdl", "Random", "Sockets", "Test"] 98 | git-tree-sha1 = "2d94286a9c2f52c63a16146bb86fd6cdfbf677c6" 99 | uuid = "739be429-bea8-5141-9913-cc70e7f3736d" 100 | version = "0.6.8" 101 | 102 | [[Mmap]] 103 | uuid = "a63ad114-7e13-5084-954f-fe012c677804" 104 | 105 | [[Parsers]] 106 | deps = ["Dates", "Test"] 107 | git-tree-sha1 = "d112c19ccca00924d5d3a38b11ae2b4b268dda39" 108 | uuid = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0" 109 | version = "0.3.11" 110 | 111 | [[Pkg]] 112 | deps = ["Dates", "LibGit2", "Markdown", "Printf", "REPL", "Random", "SHA", "UUIDs"] 113 | uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" 114 | 115 | [[Printf]] 116 | deps = ["Unicode"] 117 | uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7" 118 | 119 | [[REPL]] 120 | deps = ["InteractiveUtils", "Markdown", "Sockets"] 121 | uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb" 122 | 123 | [[Random]] 124 | deps = ["Serialization"] 125 | uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" 126 | 127 | [[SHA]] 128 | uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce" 129 | 130 | [[Serialization]] 131 | uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b" 132 | 133 | [[SharedArrays]] 134 | deps = ["Distributed", "Mmap", "Random", "Serialization"] 135 | uuid = "1a1011a3-84de-559e-8e89-a11a2f7dc383" 136 | 137 | [[Sockets]] 138 | uuid = "6462fe0b-24de-5631-8697-dd941f90decc" 139 | 140 | [[SparseArrays]] 141 | deps = ["LinearAlgebra", "Random"] 142 | uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" 143 | 144 | [[Statistics]] 145 | deps = ["LinearAlgebra", "SparseArrays"] 146 | uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" 147 | 148 | [[Test]] 149 | deps = ["Distributed", "InteractiveUtils", "Logging", "Random"] 150 | uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40" 151 | 152 | [[UUIDs]] 153 | deps = ["Random", "SHA"] 154 | uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" 155 | 156 | [[Unicode]] 157 | uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5" 158 | -------------------------------------------------------------------------------- /script/Notebooks.toml: -------------------------------------------------------------------------------- 1 | [MNIST] 2 | path = "vision/mnist" 3 | notebook = ["mlp.jl", "autoencoder.jl", "vae.jl"] 4 | 5 | [CIFAR10] 6 | path = "vision/cifar10" 7 | notebook = "cifar10.jl" 8 | 9 | [char-rnn] 10 | path = "text/char-rnn" 11 | notebook = "char-rnn.jl" 12 | 13 | [lang-detection] 14 | path = "text/lang-detection" 15 | notebook = "model.jl" 16 | deps = ["scrape.jl"] 17 | 18 | [phonemes] 19 | path = "text/phonemes" 20 | notebook = "1-model.jl" 21 | deps = ["0-data.jl"] 22 | 23 | [treebank] 24 | path = "text/treebank" 25 | notebook = "recursive.jl" 26 | deps = ["data.jl"] 27 | 28 | [diffeq] 29 | path = "other/diffeq" 30 | notebook = "diffeq.jl" 31 | 32 | #[bitstring-parity] 33 | #path = "other/bitstring-parity" 34 | #notebook = ["xor1.jl", "xor2.jl", "xor3.jl"] 35 | #deps = "data.jl" 36 | 37 | [fizzbuzz] 38 | path = "other/fizzbuzz" 39 | notebook = "fizzbuzz.jl" 40 | 41 | [housing] 42 | path = "other/housing" 43 | notebook = "housing.jl" 44 | 45 | [tutorials] 46 | path="tutorials" 47 | notebook="60-minute-blitz.jl" 48 | -------------------------------------------------------------------------------- /script/Project.toml: -------------------------------------------------------------------------------- 1 | [deps] 2 | FluxBot = "352bd040-0f98-11ea-1faf-6f930ca83554" 3 | Literate = "98b081ad-f1c9-55d3-8b20-4c87d4299306" 4 | -------------------------------------------------------------------------------- /script/convert.jl: -------------------------------------------------------------------------------- 1 | using Pkg, Pkg.TOML 2 | 3 | root = joinpath(@__DIR__, "..") 4 | 5 | meta = TOML.parsefile(joinpath(@__DIR__, "Notebooks.toml")) 6 | meta = meta[ARGS[1]] 7 | 8 | path = meta["path"] 9 | deps = get(meta, "deps", []) 10 | deps = deps isa String ? [deps] : deps 11 | 12 | for d in ["Project.toml", "Manifest.toml", ".gitignore"] 13 | isfile(joinpath(root, path, d)) && push!(deps, d) 14 | end 15 | 16 | mkpath(joinpath(root, "notebooks", path)) 17 | for dep in deps 18 | cp(joinpath(root, path, dep), joinpath(root, "notebooks", path, dep), force = true) 19 | end 20 | 21 | pushfirst!(LOAD_PATH, @__DIR__) 22 | Pkg.activate(joinpath(root, "notebooks", path)) 23 | 24 | using Literate 25 | 26 | function postprocess_nb(content) 27 | content = replace(content, r"\s*using CUDA" => "## using CUDA") 28 | return content 29 | end 30 | 31 | function preprocess_nb(content) 32 | content = replace(content, r"#\s*using CUDA" => "using CUDA") 33 | content = "using Pkg; Pkg.activate(\".\"); Pkg.instantiate();\n\n" * content 34 | return content 35 | end 36 | 37 | function init_nb(content) 38 | content = "using Pkg; Pkg.activate(\"$root\"); Pkg.status();\n\n" * content 39 | return content 40 | end 41 | 42 | scripts = meta["notebook"] 43 | scripts isa String && (scripts = [scripts]) 44 | 45 | for script in scripts 46 | Literate.notebook(joinpath(root, path, script), 47 | joinpath(root, "notebooks", path), 48 | credit = false, preprocess = preprocess_nb, 49 | postprocess = postprocess_nb) 50 | end 51 | 52 | scripts = map(x -> x[1:end - 3] * ".ipynb", scripts) 53 | nbs = filter(x -> endswith(x, ".ipynb"), readdir(joinpath(root, path))) 54 | keep = union(deps, scripts, nbs) 55 | files = readdir(joinpath(root, "notebooks", path)) 56 | 57 | for r in files 58 | r in keep || rm(joinpath(root, "notebooks", path, r), force = true) 59 | end 60 | -------------------------------------------------------------------------------- /script/notebook.jl: -------------------------------------------------------------------------------- 1 | using Pkg 2 | Pkg.activate(@__DIR__) 3 | Pkg.instantiate() 4 | 5 | using Pkg.TOML 6 | meta = length(ARGS) > 0 ? ARGS : 7 | keys(TOML.parsefile(joinpath(@__DIR__, "Notebooks.toml"))) 8 | 9 | convertjl = joinpath(@__DIR__, "convert.jl") 10 | 11 | for proj in meta 12 | run(`$(Base.julia_cmd()) $convertjl $proj`) 13 | end 14 | -------------------------------------------------------------------------------- /text/char-rnn/.gitignore: -------------------------------------------------------------------------------- 1 | input.txt 2 | -------------------------------------------------------------------------------- /text/char-rnn/Project.toml: -------------------------------------------------------------------------------- 1 | [deps] 2 | Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c" 3 | OneHotArrays = "0b1bfda6-eb8a-41d2-88d8-f5af5cad476f" 4 | StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" 5 | 6 | [compat] 7 | Flux = "0.13.9, 0.14" -------------------------------------------------------------------------------- /text/char-rnn/README.md: -------------------------------------------------------------------------------- 1 | # Character-Level RNN 2 | 3 | ![char-rnn](../char-rnn/docs/rnn-train.png) 4 | 5 | [Source](https://d2l.ai/chapter_recurrent-neural-networks/rnn.html#rnn-based-character-level-language-models) 6 | 7 | ## Model Information 8 | 9 | A recurrent neural network (RNN) outputs a prediction and a hidden state at each step of the computation. The hidden state captures historical information of a sequence (i.e., the neural network has memory) and the output is the final prediction of the model. We use this type of neural network to model sequences such as text or time series. 10 | 11 | 12 | ## Training 13 | 14 | ```shell 15 | cd text/char-rnn 16 | julia --project char-rnn.jl 17 | ``` 18 | 19 | ## References 20 | 21 | * [The Unreasonable Effectiveness of Recurrent Neural Networks](http://karpathy.github.io/2015/05/21/rnn-effectiveness/) 22 | * [Understanding LSTM Networks](https://colah.github.io/posts/2015-08-Understanding-LSTMs/) 23 | * [Aston Zhang, Zachary C. Lipton, Mu Li and Alexander J. Smola, "Dive into Deep Learning", 2020](https://d2l.ai/chapter_recurrent-neural-networks/rnn.html#rnn-based-character-level-language-models) 24 | 25 | -------------------------------------------------------------------------------- /text/char-rnn/docs/rnn-train.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/text/char-rnn/docs/rnn-train.png -------------------------------------------------------------------------------- /text/lang-detection/.gitignore: -------------------------------------------------------------------------------- 1 | corpus 2 | -------------------------------------------------------------------------------- /text/lang-detection/Project.toml: -------------------------------------------------------------------------------- 1 | [deps] 2 | Cascadia = "54eefc05-d75b-58de-a785-1a3403f0919f" 3 | Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c" 4 | Gumbo = "708ec375-b3d6-5a57-a7ce-8257bf98657a" 5 | HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3" 6 | Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" 7 | Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" 8 | Unicode = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5" 9 | 10 | [compat] 11 | Cascadia = "1" 12 | Flux = "0.13.9, 0.14" 13 | Gumbo = "0.8" 14 | HTTP = "1.7" 15 | julia = "1.6" 16 | -------------------------------------------------------------------------------- /text/lang-detection/README.md: -------------------------------------------------------------------------------- 1 | # Language detection model 2 | 3 | This model uses an LSTM for character-level language detection. Given a sentence of text, each character is fed into the model and the final output determines which of five languages the sentence was written in. 4 | 5 | First run `scrape.jl` to download a Wikipedia data set. `model.jl` contains the actual model and training code. 6 | 7 | ## Training 8 | 9 | ```shell 10 | cd text/lang-detection 11 | julia scrape.jl 12 | julia --project model.jl 13 | ``` 14 | 15 | ## References 16 | 17 | * [Aston Zhang, Zachary C. Lipton, Mu Li and Alexander J. Smola, "Dive into Deep Learning", 2020](https://d2l.ai/chapter_recurrent-modern/lstm.html) 18 | -------------------------------------------------------------------------------- /text/lang-detection/model.jl: -------------------------------------------------------------------------------- 1 | # # Language detection (character-level) 2 | 3 | # In this example, we create a character-level language detection model. Given a sentence (text), each character is fed into an [LSTM](https://d2l.ai/chapter_recurrent-modern/lstm.html) and then the final output determines in which language the text is written. 4 | 5 | # This example illustrates the preprocessing of text data before feeding it into the model as well as the use of a scanner and an encoder for a language model. 6 | 7 | # If you need more information about how LSTM work and related technical concepts, 8 | # check out the following resources: 9 | 10 | # * [The Unreasonable Effectiveness of Recurrent Neural Networks](http://karpathy.github.io/2015/05/21/rnn-effectiveness/) 11 | # * [Understanding LSTM Networks](https://colah.github.io/posts/2015-08-Understanding-LSTMs/) 12 | # * [Illustrated Guide to Recurrent Neural Networks: Understanding the Intuition](https://www.youtube.com/watch?v=LHXXI4-IEns) 13 | 14 | # To run this example, we need the following packages: 15 | 16 | using Flux 17 | using Flux: onehot, onecold, onehotbatch, logitcrossentropy, reset! 18 | using Statistics: mean 19 | using Random 20 | using Unicode 21 | 22 | # We set default values for hyperparameters: 23 | 24 | Base.@kwdef mutable struct Args 25 | lr::Float64 = 1e-3 ## Learning rate 26 | N::Int = 15 ## Number of perceptrons in hidden layer 27 | epochs::Int = 3 ## Number of epochs 28 | test_len::Int = 100 ## length of test data 29 | langs_len::Int = 0 ## Number of different languages in Corpora 30 | alphabet_len::Int = 0 ## Total number of characters possible, in corpora 31 | throttle::Int = 10 ## throttle timeout 32 | end 33 | 34 | # ## Load dataset 35 | 36 | # Before running this example, you need to obtain the data by running the script `scrape.jl`. 37 | # It downloads articles from Wikipedia in five different languages (English, Italian, French, Spanish, and Danish). 38 | # Also, it creates the folder `corpus` that contains five text files (one per language). 39 | 40 | # The function `get_processed_data` reads the text files and creates the data set for training the model. 41 | # First, it loads the raw text into a dictionary. 42 | # Then, it defines the alphabet and the characters that will be represented as unknown. 43 | # Finally, it one-hot encodes the text and its corresponding labels (the language in which is written) 44 | # before splitting the data into train and test data sets. 45 | 46 | 47 | function get_processed_data(args) 48 | corpora = Dict() 49 | 50 | for file in readdir("corpus") 51 | lang = Symbol(match(r"(.*)\.txt", file).captures[1]) 52 | corpus = split(String(read("corpus/$file")), ".") 53 | corpus = strip.(Unicode.normalize.(corpus, casefold=true, stripmark=true)) 54 | corpus = filter(!isempty, corpus) 55 | corpora[lang] = corpus 56 | end 57 | 58 | langs = collect(keys(corpora)) 59 | args.langs_len = length(langs) 60 | alphabet = ['a':'z'; '0':'9'; ' '; '\n'; '_'] 61 | args.alphabet_len = length(alphabet) 62 | 63 | ## See which chars will be represented as "unknown" 64 | unk_chars = unique(filter(∉(alphabet), join(vcat(values(corpora)...)))) 65 | dataset = [(onehotbatch(s, alphabet, '_'), onehot(l, langs)) for l in langs for s in corpora[l]] |> shuffle 66 | 67 | train, test = dataset[1:end-args.test_len], dataset[end-args.test_len+1:end] 68 | testX, testY = first.(test), last.(test) 69 | return train, testX, testY, langs 70 | end 71 | 72 | # ## Create the model 73 | 74 | # The model consists of an **encoder** and a **classifier**. The **encoder** reads the sentence one character 75 | # at a time using one [dense](https://fluxml.ai/Flux.jl/stable/models/layers/#Flux.Dense) 76 | # and one [LSTM](https://fluxml.ai/Flux.jl/stable/models/layers/#Flux.LSTM) layers, and encodes it through 77 | # the state of its last character. 78 | # The **classifier** inputs this encoding and outputs the predicted language for the sentence. 79 | # The model is defined as a [Custom model](https://fluxml.ai/Flux.jl/stable/models/advanced/) 80 | 81 | struct EncoderClassifier{E, C} 82 | encoder::E 83 | classifier::C 84 | end 85 | 86 | function build_model(args) 87 | encoder = Chain(Dense(args.alphabet_len, args.N, σ), LSTM(args.N, args.N)) 88 | classifier = Dense(args.N, args.langs_len) 89 | return EncoderClassifier(encoder, classifier) 90 | end 91 | 92 | # Notice that we use the function [reset!](https://fluxml.ai/Flux.jl/stable/models/layers/#Flux.reset!) 93 | # when computing the model's prediction to reset the hidden state of an LSTM layer back to its original value. 94 | 95 | function (m::EncoderClassifier)(x) 96 | state = m.encoder(x)[:, end] 97 | Flux.reset!(m.encoder) 98 | m.classifier(state) 99 | end 100 | 101 | Flux.@functor EncoderClassifier 102 | 103 | # ## Train the model 104 | 105 | # The function `train` executes one training step for the model 106 | # using Flux’s [train!](https://fluxml.ai/Flux.jl/stable/training/training/#Flux.Optimise.train!). 107 | # It uses the loss function 108 | # [logitcrossentropy](https://fluxml.ai/Flux.jl/stable/models/losses/#Flux.Losses.logitcrossentropy) 109 | # and the [ADAM](https://fluxml.ai/Flux.jl/stable/training/optimisers/#Flux.Optimise.ADAM) optimizer. 110 | 111 | function train(; kws...) 112 | ## Initialize Hyperparameters 113 | args = Args(; kws...) 114 | 115 | ## Load Data 116 | train_data, test_X, test_Y, langs = get_processed_data(args) 117 | 118 | @info("Constructing Model...") 119 | model = build_model(args) 120 | loss(model, x, y) = logitcrossentropy(model(x), y) 121 | opt = Flux.setup(ADAM(args.lr), model) 122 | 123 | @info("Training...") 124 | for epoch in 1:args.epochs 125 | Flux.train!(loss, model, train_data, opt) 126 | test_loss = mean(loss(model, x, y) for (x, y) in zip(test_X, test_Y)) 127 | @show epoch, test_loss 128 | end 129 | 130 | test_predictions = [onecold(model(x), langs) for x in test_X] 131 | accuracy = mean(test_predictions .== [onecold(y, langs) for y in test_Y]) 132 | @show accuracy 133 | end 134 | 135 | cd(@__DIR__) 136 | train() 137 | -------------------------------------------------------------------------------- /text/lang-detection/scrape.jl: -------------------------------------------------------------------------------- 1 | using Cascadia, Gumbo, HTTP 2 | 3 | pages = Dict( 4 | :en => ["Wikipedia", "Osama_bin_Laden_(elephant)", "List_of_lists_of_lists", "Josephine_Butler", "Canadian_football", "Judaism"], 5 | :it => ["Wikipedia", "Ludovico_Einaudi", "Filosofia_della_scienza", "Pizza", "Effie_Gray", "Galeazzo_Maria_Sforza", "Ebraismo"], 6 | :fr => ["Wikipedia", "Philosophie_des_sciences", "Seconde_Guerre_mondiale", "Eric_Hakonsson"], 7 | :es => ["Wikipedia", "Chorizo", "Historia_de_Barcelona", "Espania", "Las_Vegas_Strip", "Judaismo"], 8 | :da => ["Wikipedia", "H.C._Andersen", "L.A._Ring", "Jiangxi", "NATO", "Thomas_Edison", "Bangladesh"]) 9 | 10 | rawpage(url) = parsehtml(String(HTTP.get(url).body)).root 11 | 12 | content(url) = join((collect(nodeText(m) for m in eachmatch(sel".mw-parser-output > p", rawpage(url)))), "\n") 13 | 14 | cd(@__DIR__) 15 | mkpath("corpus") 16 | 17 | for (lang, ps) in pages 18 | open("corpus/$lang.txt", "w") do io 19 | for p in ps 20 | write(io, content("https://$lang.wikipedia.org/wiki/$p")) 21 | end 22 | end 23 | end 24 | -------------------------------------------------------------------------------- /text/nanogpt/Project.toml: -------------------------------------------------------------------------------- 1 | [deps] 2 | CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" 3 | Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c" 4 | JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819" 5 | MLUtils = "f1d291b0-491e-4a28-83b9-f70985020b54" 6 | ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca" 7 | Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" 8 | Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" 9 | StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" 10 | cuDNN = "02a925ec-e4fe-4b08-9a7e-0d78e3d38ccd" 11 | -------------------------------------------------------------------------------- /text/nanogpt/README.md: -------------------------------------------------------------------------------- 1 | # Generative pre-trained transformer 2 | 3 | ![GPT architecture](docs/Full_GPT_architecture.svg) 4 | 5 | [Source](https://en.wikipedia.org/wiki/Generative_pre-trained_transformer) 6 | 7 | ## Model Information 8 | 9 | GPT is built of a multi-head attention architecture. We offer here a very small instance based on Andrej Karpathy's [nanoGPT](https://github.com/karpathy/nanoGPT). The default parameters give a model much smaller than nanoGPT, tuned for fastest convergence on a very small data set (Shakespeare). 10 | 11 | This model takes as input a sequence of existing text (context) and produces as output the predicted next character. Actually, it produces the predicted next character for each initial sub-sequence of the input, in effect giving an extra degree of parallelism for the purposes of training. 12 | 13 | For the attention mechanism, we use [Flux.MultiHeadAttention](https://fluxml.ai/Flux.jl/stable/reference/models/layers/#MultiHeadAttention). 14 | 15 | 16 | ## Training 17 | 18 | ```shell 19 | cd text/gpt 20 | julia --project gpt.jl 21 | ``` 22 | 23 | ## Example output 24 | 25 | After one epoch: 26 | 27 | generate(model, "_", 50) = "_me, but plept fairs, And heards, verchean my word" 28 | generate(model, "_", 50) = "_ows know yought, This alce! totether him. weliest" 29 | generate(model, "The", 50) = "These prurd passtion? CINCESSIT: He eloucy I must" 30 | generate(model, "The", 50) = "The bitherse dresic in to so shall with a his the " 31 | 32 | After 20 epochs: 33 | 34 | generate(model, "_", 50) = "_ething a calling do me diseases Of, on he's to th" 35 | generate(model, "_", 50) = "_ ragg Thou flatters all in wators the selfsarut o" 36 | generate(model, "The", 50) = "The Mirtouggake Go: For my mischance lords his sea" 37 | generate(model, "The", 50) = "The oll-gakemoremo his dead: All this man make gen" 38 | 39 | ## References 40 | 41 | * [Attention is all you need](https://proceedings.neurips.cc/paper_files/paper/2017/file/3f5ee243547dee91fbd053c1c4a845aa-Paper.pdf) 42 | * [Youtube (3blue1brown): Attention in transformers, visually explained](https://www.youtube.com/watch?v=eMlx5fFNoYc) 43 | * [Youtube (Karpathy): Let's build GPT: from scratch, in code, spelled out](https://www.youtube.com/watch?v=kCc8FmEb1nY) 44 | * [nanoGPT](https://github.com/karpathy/nanoGPT) 45 | -------------------------------------------------------------------------------- /text/phonemes/0-data.jl: -------------------------------------------------------------------------------- 1 | using Flux, Flux.Data.CMUDict 2 | using Flux: onehot, batchseq 3 | using Base.Iterators: partition 4 | 5 | tokenise(s, α) = [onehot(c, α) for c in s] 6 | 7 | function getdata(args) 8 | dict = cmudict() 9 | alphabet = [:end, CMUDict.alphabet()...] 10 | args.Nin = length(alphabet) 11 | 12 | phones = [:start, :end, CMUDict.symbols()...] 13 | args.phones_len = length(phones) 14 | 15 | # Turn a word into a sequence of vectors 16 | tokenise("PHYLOGENY", alphabet) 17 | # Same for phoneme lists 18 | tokenise(dict["PHYLOGENY"], phones) 19 | 20 | words = sort(collect(keys(dict)), by = length) 21 | 22 | # Finally, create iterators for our inputs and outputs. 23 | batches(xs, p) = [batchseq(b, p) for b in partition(xs, 50)] 24 | 25 | Xs = batches([tokenise(word, alphabet) for word in words], 26 | onehot(:end, alphabet)) 27 | 28 | Ys = batches([tokenise([dict[word]..., :end], phones) for word in words], 29 | onehot(:end, phones)) 30 | 31 | Yo = batches([tokenise([:start, dict[word]...], phones) for word in words], 32 | onehot(:end, phones)) 33 | 34 | data = collect(zip(Xs, Yo, Ys)) 35 | return data, alphabet, phones 36 | end 37 | -------------------------------------------------------------------------------- /text/phonemes/1-model.jl: -------------------------------------------------------------------------------- 1 | # Based on https://arxiv.org/abs/1409.0473 2 | include("0-data.jl") 3 | using Flux: flip, logitcrossentropy, reset!, throttle 4 | using Parameters: @with_kw 5 | using StatsBase: wsample 6 | 7 | @with_kw mutable struct Args 8 | lr::Float64 = 1e-3 # learning rate 9 | Nin::Int = 0 # size of input layer, will be assigned as length(alphabet) 10 | Nh::Int = 30 # size of hidden layer 11 | phones_len::Int = 0 # length of phonemes 12 | throttle::Int = 30 # throttle timeout 13 | end 14 | 15 | function build_model(args) 16 | # A recurrent model which takes a token and returns a context-dependent 17 | # annotation. 18 | forward = LSTM(args.Nin, args.Nh÷2) 19 | backward = LSTM(args.Nin, args.Nh÷2) 20 | encode(tokens) = vcat.(forward.(tokens), flip(backward, tokens)) 21 | 22 | alignnet = Dense(2*args.Nh, 1) 23 | 24 | # A recurrent model which takes a sequence of annotations, attends, and returns 25 | # a predicted output token. 26 | recur = LSTM(args.Nh+args.phones_len, args.Nh) 27 | toalpha = Dense(args.Nh, args.phones_len) 28 | return (forward, backward, alignnet, recur, toalpha), encode 29 | end 30 | 31 | align(s, t, alignnet) = alignnet(vcat(t, s .* Int.(ones(1, size(t, 2))))) 32 | 33 | function asoftmax(xs) 34 | xs = [exp.(x) for x in xs] 35 | s = sum(xs) 36 | return [x ./ s for x in xs] 37 | end 38 | 39 | function decode1(tokens, phone, state) 40 | # Unpack models 41 | forward, backward, alignnet, recur, toalpha = state 42 | weights = asoftmax([align(recur.state[2], t, alignnet) for t in tokens]) 43 | context = sum(map((a, b) -> a .* b, weights, tokens)) 44 | y = recur(vcat(Float32.(phone), context)) 45 | return toalpha(y) 46 | end 47 | 48 | decode(tokens, phones, state) = [decode1(tokens, phone, state) for phone in phones] 49 | 50 | function model(x, y, state, encode) 51 | # Unpack models 52 | forward, backward, alignnet, recur, toalpha = state 53 | ŷ = decode(encode(x), y, state) 54 | reset!(state) 55 | return ŷ 56 | end 57 | 58 | function predict(s, state, encode, alphabet, phones) 59 | ts = encode(tokenise(s, alphabet)) 60 | ps = Any[:start] 61 | for i = 1:50 62 | dist = softmax(decode1(ts, onehot(ps[end], phones), state)) 63 | next = wsample(phones, vec(dist)) 64 | next == :end && break 65 | push!(ps, next) 66 | end 67 | reset!(state) 68 | return ps[2:end] 69 | end 70 | 71 | function train(; kws...) 72 | # Initialize Hyperparameters 73 | args = Args(; kws...) 74 | @info("Loading Data...") 75 | data,alphabet,phones = getdata(args) 76 | 77 | # The full model 78 | # state = (forward, backward, alignnet, recur, toalpha) 79 | @info("Constructing Model...") 80 | state, encode = build_model(args) 81 | 82 | loss(x, yo, y) = sum(logitcrossentropy.(model(x, yo, state, encode), y)) 83 | evalcb = () -> @show loss(data[500]...) 84 | opt = ADAM(args.lr) 85 | @info("Training...") 86 | Flux.train!(loss, params(state), data, opt, cb = throttle(evalcb, args.throttle)) 87 | return state, encode, alphabet, phones 88 | end 89 | 90 | cd(@__DIR__) 91 | state, encode, alphabet, phones = train() 92 | @info("Testing...") 93 | predict("PHYLOGENY", state, encode, alphabet, phones) 94 | -------------------------------------------------------------------------------- /text/phonemes/Project.toml: -------------------------------------------------------------------------------- 1 | [deps] 2 | Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c" 3 | Parameters = "d96e819e-fc66-5662-9728-84c9c7592b0a" 4 | StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" 5 | 6 | [compat] 7 | Flux = "0.11.4" 8 | julia = "1.5" 9 | -------------------------------------------------------------------------------- /text/treebank/Project.toml: -------------------------------------------------------------------------------- 1 | [deps] 2 | Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c" 3 | Parameters = "d96e819e-fc66-5662-9728-84c9c7592b0a" 4 | 5 | [compat] 6 | Flux = "0.11.4" 7 | julia = "1.5" 8 | -------------------------------------------------------------------------------- /text/treebank/README.md: -------------------------------------------------------------------------------- 1 | # Recursive net on IMDB sentiment treebank 2 | 3 | ![treebank](../treebank/docs/treebank.png) 4 | 5 | [Source](https://nlp.stanford.edu/~socherr/EMNLP2013_RNTN.pdf) 6 | 7 | ## Model information 8 | 9 | A recursive neural network can be used for learning tree-like structures (directed acyclic graphs). It computes compositional vector representations for prhases of variable length which are used as features for performing classification. 10 | 11 | This example uses the [Standford Sentiment Treebank dataset (SST)](https://nlp.stanford.edu/sentiment/index.html) which is often used as one of the benchmark datasets to test new language models. It has five different classes (very negative to very positive) and the goal is to perform sentiment analysis. 12 | 13 | 14 | ## Training 15 | 16 | ```shell 17 | cd text/treebank 18 | julia --project recursive.jl 19 | ``` 20 | 21 | ## References 22 | 23 | * [Recursive Deep Models for Semantic Compositionality Over a Sentiment Treebank](https://nlp.stanford.edu/~socherr/EMNLP2013_RNTN.pdf) 24 | -------------------------------------------------------------------------------- /text/treebank/data.jl: -------------------------------------------------------------------------------- 1 | using Flux 2 | using Flux: onehot 3 | using Flux.Data.Sentiment 4 | using Flux.Data: Tree, leaves 5 | 6 | function getdata() 7 | traintrees = Sentiment.train() 8 | 9 | ## Get the raw labels and phrases as separate trees. 10 | labels = map.(x -> x[1], traintrees) 11 | phrases = map.(x -> x[2], traintrees) 12 | 13 | ## All tokens in the training set. 14 | tokens = vcat(map(leaves, phrases)...) 15 | 16 | ## Count how many times each token appears. 17 | freqs = Dict{String,Int}() 18 | for t in tokens 19 | freqs[t] = get(freqs, t, 0) + 1 20 | end 21 | 22 | ## Replace singleton tokens with an "unknown" marker. 23 | ## This roughly cuts our "alphabet" of tokens in half. 24 | phrases = map.(t -> get(freqs, t, 0) == 1 ? "UNK" : t, phrases) 25 | 26 | ## Our alphabet of tokens. 27 | alphabet = unique(vcat(map(leaves, phrases)...)) 28 | 29 | ## One-hot-encode our training data with respect to the alphabet. 30 | phrases_e = map.(t -> t == nothing ? t : onehot(t, alphabet), phrases) 31 | labels_e = map.(t -> onehot(t, 0:4), labels) 32 | 33 | train = map.(tuple, phrases_e, labels_e) 34 | return train, alphabet 35 | end 36 | -------------------------------------------------------------------------------- /text/treebank/recursive.jl: -------------------------------------------------------------------------------- 1 | # # Recursive net on IMDB sentiment treebank 2 | 3 | # In this example, we create a recursive neural network to perform sentiment analysis using 4 | # IMDB data. 5 | # This type of model can be used 6 | # for learning tree-like structures (directed acyclic graphs). 7 | # It computes compositional vector representations for prhases of variable length 8 | # which are used as features for performing classification. 9 | 10 | # ![treebank](../treebank/docs/treebank.png) 11 | 12 | # [Source](https://nlp.stanford.edu/~socherr/EMNLP2013_RNTN.pdf) 13 | 14 | # This example uses the [Standford Sentiment Treebank dataset 15 | # (SST)](https://nlp.stanford.edu/sentiment/index.html) which is often used 16 | # as one of the benchmark datasets to test new language models. 17 | # It has five different classes (very negative to very positive) and the 18 | # goal is to perform sentiment analysis. 19 | 20 | # To run this example, we need the following packages: 21 | 22 | using Flux 23 | using Flux: logitcrossentropy, throttle 24 | using Flux.Data: Tree, children, isleaf 25 | using Parameters: @with_kw 26 | 27 | # The script `data.jl` contains the function `getdata` that obtains 28 | # and process the SST dataset. 29 | 30 | include("data.jl") 31 | 32 | # We set default values for the hyperparameters: 33 | 34 | @with_kw mutable struct Args 35 | lr::Float64 = 1e-3 ## Learning rate 36 | N::Int = 300 37 | throttle::Int = 10 ## Throttle timeout 38 | end 39 | 40 | # ## Build the model 41 | 42 | # The function `train` loads the data, builds and trains the model. 43 | # For more information on how the recursive neural network works, see 44 | # section 4 of [Recursive Deep Models for Semantic Compositionality 45 | # Over a Sentiment Treebank](https://nlp.stanford.edu/~socherr/EMNLP2013_RNTN.pdf). 46 | 47 | function train(; kws...) 48 | ## Initialize HyperParameters 49 | args = Args(; kws...) 50 | 51 | ## Load data 52 | @info("Loading Data...") 53 | train_data, alphabet = getdata() 54 | 55 | @info("Constructing model....") 56 | embedding = randn(Float32, args.N, length(alphabet)) 57 | 58 | @info "Size of the embedding" size(embedding) 59 | 60 | W = Dense(2*args.N, args.N, tanh) 61 | combine(a, b) = W([a; b]) 62 | 63 | sentiment = Chain(Dense(args.N, 5)) 64 | 65 | function forward(tree) 66 | if isleaf(tree) 67 | token, sent = tree.value 68 | phrase = embedding * token 69 | phrase, logitcrossentropy(sentiment(phrase), sent) 70 | else 71 | _, sent = tree.value 72 | c1, l1 = forward(tree[1]) 73 | c2, l2 = forward(tree[2]) 74 | phrase = combine(c1, c2) 75 | phrase, l1 + l2 + logitcrossentropy(sentiment(phrase), sent) 76 | end 77 | end 78 | 79 | loss(tree) = forward(tree)[2] 80 | 81 | opt = ADAM(args.lr) 82 | ps = params(embedding, W, sentiment) 83 | evalcb = () -> @show loss(train_data[1]) 84 | @info("Training Model...") 85 | Flux.train!(loss, ps, zip(train_data), opt,cb = throttle(evalcb, args.throttle)) 86 | end 87 | 88 | # ## Train the model 89 | 90 | cd(@__DIR__) 91 | train() 92 | -------------------------------------------------------------------------------- /tutorials/60-minute-blitz/Project.toml: -------------------------------------------------------------------------------- 1 | [deps] 2 | Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c" 3 | Images = "916415d5-f1e6-5110-898d-aaa5f9f070e0" 4 | Metalhead = "dbeba491-748d-5e0e-a39e-b530a07fa0cc" 5 | Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" 6 | 7 | [compat] 8 | Flux = "0.11.5" 9 | julia = "1.5" 10 | -------------------------------------------------------------------------------- /tutorials/dataloader/Project.toml: -------------------------------------------------------------------------------- 1 | [deps] 2 | Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c" 3 | MLDatasets = "eb30cadb-4394-5ae3-aed4-317e484a6458" 4 | Parameters = "d96e819e-fc66-5662-9728-84c9c7592b0a" 5 | 6 | [compat] 7 | Flux = "0.13" 8 | MLDatasets = "0.6" 9 | -------------------------------------------------------------------------------- /tutorials/dataloader/README.md: -------------------------------------------------------------------------------- 1 | # Using Flux DataLoader with image data 2 | 3 | In this example, we show how to load image data in Flux DataLoader and process it in mini-batches. We use the [DataLoader](https://fluxml.ai/Flux.jl/stable/data/dataloader/#Flux.Data.DataLoader) type to handle iteration over mini-batches of data. For this example, we load the [MNIST dataset](https://juliaml.github.io/MLDatasets.jl/latest/datasets/MNIST/) using the [MLDatasets](https://juliaml.github.io/MLDatasets.jl/latest/) package. 4 | 5 | Before we start, make sure you have installed the following packages: 6 | 7 | * [Flux](https://github.com/FluxML/Flux.jl) 8 | * [MLDatasets]((https://juliaml.github.io/MLDatasets.jl/latest/)) 9 | 10 | To install these packages, run the following in the REPL: 11 | 12 | ```julia 13 | Pkg.add("Flux") 14 | Pkg.add("MLDatasets") 15 | ``` 16 | 17 |
18 | 19 | Load the packages we'll need: 20 | 21 | ```julia 22 | using MLDatasets: MNIST 23 | using Flux.Data: DataLoader 24 | using Flux: onehotbatch 25 | ``` 26 | 27 | ## Step1: Loading the MNIST data set 28 | 29 | We load the MNIST train and test data from MLDatasets: 30 | 31 | ```julia-repl 32 | julia> train_data = MNIST(:train) 33 | dataset MNIST: 34 | metadata => Dict{String, Any} with 3 entries 35 | split => :train 36 | features => 28×28×60000 Array{Float32, 3} 37 | targets => 60000-element Vector{Int64} 38 | 39 | julia> train_x, train_y = train_data[:]; 40 | 41 | julia> test_x, test_y = MNIST(:test)[:]; 42 | ``` 43 |
44 | 45 | This code loads the MNIST train and test images as Float32 as well as their labels. The data set `train_x` is a 28×28×60000 multi-dimensional array. It contains 60000 elements and each one of it contains a 28x28 array. Each array represents a 28x28 image (in grayscale) of a handwritten digit. Moreover, each element of the 28x28 arrays is a pixel that represents the amount of light that it contains. On the other hand, `test_y` is a 60000 element vector and each element of this vector represents the label or actual value (0 to 9) of a handwritten digit. 46 | 47 | ## Step 2: Loading the dataset onto DataLoader 48 | 49 | Before we load the data onto a DataLoader, we need to reshape it so that it has the correct shape for Flux. For this example, the MNIST train data must be of the same dimension as our model's input and output layers. 50 | 51 | For example, if our model's input layer expects a 28x28x1 multi-dimensional array, we need to reshape the train and test data as follows: 52 | 53 | ```julia 54 | train_x = reshape(train_x, 28, 28, 1, :) 55 | test_x = reshape(test_x, 28, 28, 1, :) 56 | ``` 57 |
58 | 59 | Also, the MNIST labels must be encoded as a vector with the same dimension as the number of categories (unique handwritten digits) in the data set. To encode the labels, we use the [Flux's onehotbatch](https://fluxml.ai/Flux.jl/stable/data/onehot/#Batches-1) function: 60 | 61 | ```julia 62 | train_y, test_y = onehotbatch(train_y, 0:9), onehotbatch(test_y, 0:9) 63 | ``` 64 |
65 | 66 | >**Note:** For more information on other encoding methods, see [Handling Data in Flux](https://fluxml.ai/Flux.jl/stable/data/onehot/). 67 | 68 | Now, we load the train images and their labels onto a DataLoader object: 69 | 70 | ```julia 71 | data_loader = DataLoader((train_x, train_y), batchsize=128, shuffle=true) 72 | ``` 73 |
74 | 75 | Notice that we set the DataLoader `batchsize` to 128. This will enable us to iterate over the data in batches of size 128. Also, by setting `shuffle=true` the DataLoader will shuffle the observations each time that iterations are re-started. 76 | 77 | ## Step 3: Iterating over the data 78 | 79 | Finally, we can iterate over the 60000 MNIST train data in mini-batches (most of them of size 128) using the Dataloader that we created in the previous step. Each element of the DataLoader is a tuple `(x, y)` in which `x` represents a 28x28x1 array and `y` a vector that encodes the corresponding label of the image. 80 | 81 | ```julia 82 | for (x, y) in data_loader 83 | @assert size(x) == (28, 28, 1, 128) || size(x) == (28, 28, 1, 96) 84 | @assert size(y) == (10, 128) || size(y) == (10, 96) 85 | ... 86 | end 87 | ``` 88 | 89 |
90 | 91 | 92 | Now, we can create a model and train it using the `data_loader` we just created. For more information on building models in Flux, see [Model-Building Basics](https://fluxml.ai/Flux.jl/stable/models/basics/#Model-Building-Basics-1). 93 | -------------------------------------------------------------------------------- /tutorials/dataloader/dataloader-image-data.jl: -------------------------------------------------------------------------------- 1 | # # An example of DataLoader using image data 2 | 3 | 4 | # In this example, we show how to load image data in Flux DataLoader and process it in mini-batches. 5 | # We use the [DataLoader](https://fluxml.ai/Flux.jl/stable/data/dataloader/#Flux.Data.DataLoader) type 6 | # to handle iteration over mini-batches of data. 7 | # Moreover, we load the [MNIST dataset](https://juliaml.github.io/MLDatasets.jl/stable/datasets/MNIST/) 8 | # using the [MLDatasets](https://juliaml.github.io/MLDatasets.jl/stable/) package. 9 | 10 | 11 | # Load the packages we need: 12 | 13 | using MLDatasets: MNIST 14 | using Flux.Data: DataLoader 15 | using Flux: onehotbatch 16 | using Parameters: @with_kw 17 | 18 | # We set a default value for the size of the mini-batches: 19 | 20 | @with_kw mutable struct Args 21 | minibath_size::Int = 128 ## Size of mini-batch 22 | end 23 | 24 | # ## Data 25 | 26 | # We create the function `get_data` to get, preprare and load the data onto a DataLoader object. 27 | 28 | function get_data(args) 29 | 30 | ## Load the MNIST train and test data from MLDatasets 31 | train_x, train_y = MNIST(:train)[:] 32 | test_x, test_y = MNIST(:test)[:] 33 | 34 | ## Reshape data to 28x28x1 multi-dimensional array 35 | train_x = reshape(train_x, 28, 28, 1, :) 36 | test_x = reshape(test_x, 28, 28, 1, :) 37 | 38 | ## Labels must be encoded as a vector with the same dimension 39 | ## as the number of categories (unique handwritten digits) in the data set 40 | train_y, test_y = onehotbatch(train_y, 0:9), onehotbatch(test_y, 0:9) 41 | 42 | ## Now, we load the train and test images and their labels onto a DataLoader object 43 | data_loader_train = DataLoader(train_x, train_y, batchsize=args.minibath_size, shuffle=true) 44 | data_loader_test = DataLoader(train_x, train_y, batchsize=args.minibath_size, shuffle=true) 45 | 46 | return data_loader_train, data_loader_test 47 | 48 | end 49 | 50 | # This function performs the following tasks: 51 | # * Loads the MNIST train and test images as Float32 as well as their labels. The dataset `train_x` is a 28×28×60000 multi-dimensional array. 52 | # It contains 60000 elements and each one of it contains a 28x28 array. Each array represents a 28x28 image (in grayscale) of a handwritten digit. 53 | # Moreover, each element of the 28x28 arrays is a pixel that represents the amount of light that it contains. On the other hand, `test_y` is a 60000 element vector and each element of this vector represents the label or actual value (0 to 9) of a handwritten digit. 54 | # * Reshapes the train and test data to a 28x28x1 multi-dimensional array. 55 | # * One-hot encodes the train and test labels. It creates a batch of one-hot vectors so we can pass the labels of the data as arguments for the loss function. 56 | # * Creates two DataLoader objects that handle data mini-batches of the size defined above. 57 | 58 | # ## Iterate over data 59 | 60 | # Now, we can iterate over the train data during the training routine we want to define. 61 | 62 | function train(; kws...) 63 | args = Args(; kws...) 64 | 65 | @info("Loading data...") 66 | data_loader_train, data_loader_test = get_data(args) 67 | 68 | ## Iterating over train data 69 | for (x, y) in data_loader_train 70 | @assert size(x) == (28, 28, 1, 128) || size(x) == (28, 28, 1, 96) 71 | @assert size(y) == (10, 128) || size(y) == (10, 96) 72 | end 73 | end 74 | 75 | # ## Run the example 76 | 77 | # We call the `train` function: 78 | 79 | cd(@__DIR__) 80 | train() 81 | -------------------------------------------------------------------------------- /tutorials/transfer_learning/.gitignore: -------------------------------------------------------------------------------- 1 | data/ -------------------------------------------------------------------------------- /tutorials/transfer_learning/Project.toml: -------------------------------------------------------------------------------- 1 | [deps] 2 | DataAugmentation = "88a5189c-e7ff-4f85-ac6b-e6158070f02e" 3 | Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c" 4 | Images = "916415d5-f1e6-5110-898d-aaa5f9f070e0" 5 | Metalhead = "dbeba491-748d-5e0e-a39e-b530a07fa0cc" 6 | Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" 7 | Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" 8 | 9 | [compat] 10 | Flux = "0.13, 0.14" 11 | julia = "1.6" 12 | -------------------------------------------------------------------------------- /tutorials/transfer_learning/transfer_learning.jl: -------------------------------------------------------------------------------- 1 | # load packages 2 | using Random: shuffle! 3 | import Base: length, getindex 4 | using Images 5 | using Flux, CUDA 6 | using Flux: update! 7 | using DataAugmentation 8 | using Metalhead 9 | 10 | device = CUDA.functional() ? gpu : cpu 11 | # device = cpu 12 | 13 | ## Custom DataLoader 14 | const CATS = readdir(abspath(joinpath("data", "animals", "cats")), join = true) 15 | const DOGS = readdir(abspath(joinpath("data", "animals", "dogs")), join = true) 16 | const PANDA = readdir(abspath(joinpath("data", "animals", "panda")), join = true) 17 | 18 | struct ImageContainer{T<:Vector} 19 | img::T 20 | end 21 | 22 | imgs = [CATS..., DOGS..., PANDA...] 23 | shuffle!(imgs) 24 | data = ImageContainer(imgs) 25 | 26 | length(data::ImageContainer) = length(data.img) 27 | 28 | const im_size = (224, 224) 29 | tfm = DataAugmentation.compose(ScaleKeepAspect(im_size), CenterCrop(im_size)) 30 | name_to_idx = Dict{String,Int32}("cats" => 1, "dogs" => 2, "panda" => 3) 31 | 32 | const mu = [0.485f0, 0.456f0, 0.406f0] 33 | const sigma = [0.229f0, 0.224f0, 0.225f0] 34 | 35 | function getindex(data::ImageContainer, idx::Int) 36 | path = data.img[idx] 37 | _img = Images.load(path) 38 | _img = itemdata(apply(tfm, Image(_img))) 39 | img = collect(channelview(float32.(RGB.(_img)))) 40 | img = permutedims((img .- mu) ./ sigma, (3, 2, 1)) 41 | name = replace(path, r"(.+)\\(.+)\\(.+_\d+)\.jpg" => s"\2") 42 | y = name_to_idx[name] 43 | return img, y 44 | end 45 | 46 | # define DataLoaders 47 | const batchsize = 16 48 | 49 | dtrain = Flux.DataLoader( 50 | ImageContainer(imgs[1:2700]); 51 | batchsize, 52 | collate = true, 53 | parallel = true, 54 | ) 55 | device == gpu ? dtrain = Flux.CuIterator(dtrain) : nothing 56 | 57 | deval = Flux.DataLoader( 58 | ImageContainer(imgs[2701:3000]); 59 | batchsize, 60 | collate = true, 61 | parallel = true, 62 | ) 63 | device == gpu ? deval = Flux.CuIterator(deval) : nothing 64 | 65 | # Fine-tune | 🐢 mode 66 | # Load a pre-trained model: 67 | m = Metalhead.ResNet(18, pretrain = true).layers 68 | m_tot = Chain(m[1], AdaptiveMeanPool((1, 1)), Flux.flatten, Dense(512 => 3)) |> device 69 | 70 | function eval_f(m, deval) 71 | good = 0 72 | count = 0 73 | for (x, y) in deval 74 | good += sum(Flux.onecold(m(x)) .== y) 75 | count += length(y) 76 | end 77 | acc = round(good / count, digits = 4) 78 | return acc 79 | end 80 | 81 | function train_epoch!(model; opt, dtrain) 82 | for (x, y) in dtrain 83 | grads = gradient(model) do m 84 | Flux.Losses.logitcrossentropy(m(x), Flux.onehotbatch(y, 1:3)) 85 | end 86 | update!(opt, model, grads[1]) 87 | end 88 | end 89 | 90 | opt = Flux.setup(Flux.Optimisers.Adam(1e-5), m_tot); 91 | 92 | for iter = 1:5 93 | @time train_epoch!(m_tot; opt, dtrain) 94 | metric_train = eval_f(m_tot, dtrain) 95 | metric_eval = eval_f(m_tot, deval) 96 | @info "train" metric = metric_train 97 | @info "eval" metric = metric_eval 98 | end 99 | 100 | # Fine-tune | 🐇 mode 101 | # define models 102 | m_infer = deepcopy(m[1]) |> device 103 | m_tune = Chain(AdaptiveMeanPool((1, 1)), Flux.flatten, Dense(512 => 3)) |> device 104 | 105 | function eval_f(m_infer, m_tune, deval) 106 | good = 0 107 | count = 0 108 | for (x, y) in deval 109 | good += sum(Flux.onecold(m_tune(m_infer(x))) .== y) 110 | count += length(y) 111 | end 112 | acc = round(good / count, digits = 4) 113 | return acc 114 | end 115 | 116 | function train_epoch!(m_infer, m_tune; opt, dtrain) 117 | for (x, y) in dtrain 118 | infer = m_infer(x) 119 | grads = gradient(m_tune) do m 120 | Flux.Losses.logitcrossentropy(m(infer), Flux.onehotbatch(y, 1:3)) 121 | end 122 | update!(opt, m_tune, grads[1]) 123 | end 124 | end 125 | 126 | opt = Flux.setup(Flux.Optimisers.Adam(1e-3), m_tune); 127 | 128 | # training loop 129 | for iter = 1:5 130 | @time train_epoch!(m_infer, m_tune; opt, dtrain) 131 | metric_train = eval_f(m_infer, m_tune, dtrain) 132 | metric_eval = eval_f(m_infer, m_tune, deval) 133 | @info "train" metric = metric_train 134 | @info "eval" metric = metric_eval 135 | end 136 | -------------------------------------------------------------------------------- /vision/cdcgan_mnist/Project.toml: -------------------------------------------------------------------------------- 1 | [deps] 2 | CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" 3 | Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c" 4 | ImageMagick = "6218d12a-5da1-5696-b52f-db25d2ecc6d1" 5 | Images = "916415d5-f1e6-5110-898d-aaa5f9f070e0" 6 | MLDatasets = "eb30cadb-4394-5ae3-aed4-317e484a6458" 7 | Parameters = "d96e819e-fc66-5662-9728-84c9c7592b0a" 8 | Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" 9 | Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" 10 | 11 | [compat] 12 | CUDA = "2.4.0" 13 | Flux = "0.11.4" 14 | MLDatasets = "0.6" 15 | julia = "1.5" 16 | -------------------------------------------------------------------------------- /vision/cdcgan_mnist/README.md: -------------------------------------------------------------------------------- 1 | # Conditional DC-GAN 2 | 3 | 4 | 5 | [Source](https://arxiv.org/pdf/1411.1784.pdf) 6 | 7 | ## Model Info 8 | 9 | Generative Adversarial Networks have two models, a _Generator model G(z)_ and a _Discriminator model D(x)_, in competition with each other. G tries to estimate the distribution of the training data and D tries to estimate the probability that a data sample came from the original training data and not from G. During training, the Generator learns a mapping from a _prior distribution p(z)_ to the _data space G(z)_. The discriminator D(x) produces a probability value of a given x coming from the actual training data. 10 | This model can be modified to include additional inputs, y, on which the models can be conditioned. y can be any type of additional inputs, for example, class labels. _The conditioning can be achieved by simply feeding y to both the Generator — G(z|y) and the Discriminator — D(x|y)_. 11 | 12 | ## Training 13 | 14 | ```shell 15 | cd vision/cdcgan_mnist 16 | julia --project cGAN_mnist.jl 17 | ``` 18 | 19 | ## Results 20 | 21 | 1000 training steps 22 | 23 | ![1000 training steps](../cdcgan_mnist/output/cgan_steps_001000.png) 24 | 25 | 3000 training steps 26 | 27 | ![30000 trainig steps](../cdcgan_mnist/output/cgan_steps_003000.png) 28 | 29 | 5000 training steps 30 | 31 | ![5000 training steps](../cdcgan_mnist/output/cgan_steps_005000.png) 32 | 33 | 10000 training steps 34 | 35 | ![10000 training steps](../cdcgan_mnist/output/cgan_steps_010000.png) 36 | 37 | 11725 training steps 38 | 39 | ![11725 training steps](../cdcgan_mnist/output/cgan_steps_011725.png) 40 | 41 | ## References 42 | 43 | * [Mirza, M. and Osindero, S., “Conditional Generative Adversarial Nets”, arXiv e-prints, 2014.](https://arxiv.org/pdf/1411.1784.pdf) 44 | 45 | * [Training a Conditional DC-GAN on CIFAR-10](https://medium.com/@utk.is.here/training-a-conditional-dc-gan-on-cifar-10-fce88395d610) 46 | -------------------------------------------------------------------------------- /vision/cdcgan_mnist/output/cgan_steps_000000.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/cdcgan_mnist/output/cgan_steps_000000.png -------------------------------------------------------------------------------- /vision/cdcgan_mnist/output/cgan_steps_001000.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/cdcgan_mnist/output/cgan_steps_001000.png -------------------------------------------------------------------------------- /vision/cdcgan_mnist/output/cgan_steps_002000.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/cdcgan_mnist/output/cgan_steps_002000.png -------------------------------------------------------------------------------- /vision/cdcgan_mnist/output/cgan_steps_003000.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/cdcgan_mnist/output/cgan_steps_003000.png -------------------------------------------------------------------------------- /vision/cdcgan_mnist/output/cgan_steps_004000.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/cdcgan_mnist/output/cgan_steps_004000.png -------------------------------------------------------------------------------- /vision/cdcgan_mnist/output/cgan_steps_005000.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/cdcgan_mnist/output/cgan_steps_005000.png -------------------------------------------------------------------------------- /vision/cdcgan_mnist/output/cgan_steps_006000.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/cdcgan_mnist/output/cgan_steps_006000.png -------------------------------------------------------------------------------- /vision/cdcgan_mnist/output/cgan_steps_007000.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/cdcgan_mnist/output/cgan_steps_007000.png -------------------------------------------------------------------------------- /vision/cdcgan_mnist/output/cgan_steps_008000.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/cdcgan_mnist/output/cgan_steps_008000.png -------------------------------------------------------------------------------- /vision/cdcgan_mnist/output/cgan_steps_009000.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/cdcgan_mnist/output/cgan_steps_009000.png -------------------------------------------------------------------------------- /vision/cdcgan_mnist/output/cgan_steps_010000.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/cdcgan_mnist/output/cgan_steps_010000.png -------------------------------------------------------------------------------- /vision/cdcgan_mnist/output/cgan_steps_011000.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/cdcgan_mnist/output/cgan_steps_011000.png -------------------------------------------------------------------------------- /vision/cdcgan_mnist/output/cgan_steps_011725.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/cdcgan_mnist/output/cgan_steps_011725.png -------------------------------------------------------------------------------- /vision/cdcgan_mnist/output/img_for_readme.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/cdcgan_mnist/output/img_for_readme.png -------------------------------------------------------------------------------- /vision/conv_mnist/.gitignore: -------------------------------------------------------------------------------- 1 | runs/ -------------------------------------------------------------------------------- /vision/conv_mnist/Project.toml: -------------------------------------------------------------------------------- 1 | [deps] 2 | CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" 3 | Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c" 4 | JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819" 5 | MLDatasets = "eb30cadb-4394-5ae3-aed4-317e484a6458" 6 | ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca" 7 | cuDNN = "02a925ec-e4fe-4b08-9a7e-0d78e3d38ccd" 8 | 9 | [compat] 10 | CUDA = "3, 4" 11 | Flux = "0.13.16, 0.14" 12 | JLD2 = "0.4.31" 13 | MLDatasets = "0.7" 14 | julia = "1.6" 15 | -------------------------------------------------------------------------------- /vision/conv_mnist/README.md: -------------------------------------------------------------------------------- 1 | # LeNet-5 2 | 3 | ![LeNet-5](../conv_mnist/docs/LeNet-5.png) 4 | 5 | [Source](https://d2l.ai/chapter_convolutional-neural-networks/lenet.html) 6 | 7 | ## Model Info 8 | 9 | At a high level LeNet (LeNet-5) consists of two parts: 10 | (i) _a convolutional encoder consisting of two convolutional layers_; 11 | (ii) _a dense block consisting of three fully-connected layers_ 12 | 13 | The basic units in each convolutional block are a convolutional layer, a sigmoid activation function, and a subsequent average pooling operation. Each convolutional layer uses a 5×5 kernel and a sigmoid activation function. These layers map spatially arranged inputs to a number of two-dimensional feature maps, typically increasing the number of channels. The first convolutional layer has 6 output channels, while the second has 16. Each 2×2 pooling operation (stride 2) reduces dimensionality by a factor of 4 via spatial downsampling. The convolutional block emits an output with shape given by (batch size, number of channel, height, width). 14 | 15 | >**Note:** The original architecture of Lenet5 used the sigmoind activation function. However, this is a a modernized version since it uses the RELU activation function instead. 16 | 17 | ## Training 18 | 19 | ```shell 20 | cd vision/conv_mnist 21 | julia --project conv_mnist.jl 22 | ``` 23 | 24 | ## References 25 | 26 | * [Y. Lecun, L. Bottou, Y. Bengio and P. Haffner, "Gradient-based learning applied to document recognition," in Proceedings of the IEEE, vol. 86, no. 11, pp. 2278-2324, Nov. 1998, doi: 10.1109/5.726791.](http://yann.lecun.com/exdb/publis/pdf/lecun-01a.pdf) 27 | 28 | * [Aston Zhang, Zachary C. Lipton, Mu Li and Alexander J. Smola, "Dive into Deep Learning", 2020](https://d2l.ai/chapter_convolutional-neural-networks/lenet.html) 29 | -------------------------------------------------------------------------------- /vision/conv_mnist/docs/LeNet-5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/conv_mnist/docs/LeNet-5.png -------------------------------------------------------------------------------- /vision/convmixer_cifar10/README.md: -------------------------------------------------------------------------------- 1 | # ConvMixer 2 | 3 | ![](doc/convmixerarchi.png) 4 | 5 | From the preprint [Patches are all you need? 🤷](https://openreview.net/forum?id=TVHS5Y4dNvM). 6 | 7 | ## Architecture 8 | 9 | The first layer of the ConvMixer architecture is a patch-splitting encoded with a strided convolution. The rest is a stack of residual convolutional networks with alternationg channel-wise and space-wise convolutions. Each layer is followed by a classical Batch Normalization and all the activations are Gaussian Linear Units (GeLU). This architecture was not designed to be state-of-the-art competitive, but to reach very high accuracies without theoretical complexities, by exploiting patch segmentation of images. 10 | 11 | The network is trained on CIFAR10 by minimizing the cross-entropy loss with the ADAM optimizer, with Weight Decay and Gradient Clipping. 12 | 13 | ## Training 14 | 15 | ``` 16 | cd vision/convmixer_cifar10 17 | julia --project convmixer.jl 18 | ``` 19 | 20 | The chosen parameters are the smallest for which the architecture reaches an acceptable accuracy. Training for ~100 epochs should result in a 74\% validation error. With a depth of 32 and a width of 256, one can reach 85\% without data augmentation. 21 | 22 | ## Reference 23 | 24 | - https://openreview.net/forum?id=TVHS5Y4dNvM 25 | -------------------------------------------------------------------------------- /vision/convmixer_cifar10/convmixer.jl: -------------------------------------------------------------------------------- 1 | using Flux, MLDatasets 2 | using Flux: onehotbatch, onecold, DataLoader, Optimiser 3 | using BSON:@save,@load 4 | 5 | 6 | function ConvMixer(in_channels, kernel_size, patch_size, dim, depth, N_classes) 7 | f = Chain( 8 | Conv((patch_size, patch_size), in_channels=>dim, gelu; stride=patch_size), 9 | BatchNorm(dim), 10 | [ 11 | Chain( 12 | SkipConnection(Chain(Conv((kernel_size,kernel_size), dim=>dim, gelu; pad=SamePad(), groups=dim), BatchNorm(dim)), +), 13 | Chain(Conv((1,1), dim=>dim, gelu), BatchNorm(dim)) 14 | ) 15 | for i in 1:depth 16 | ]..., 17 | AdaptiveMeanPool((1,1)), 18 | flatten, 19 | Dense(dim,N_classes) 20 | ) 21 | return f 22 | end 23 | 24 | function get_data(batchsize; dataset = MLDatasets.CIFAR10, idxs = nothing) 25 | """ 26 | idxs=nothing gives the full dataset, otherwise (for testing purposes) only the 1:idxs elements of the train set are given. 27 | """ 28 | ENV["DATADEPS_ALWAYS_ACCEPT"] = "true" 29 | 30 | # Loading Dataset 31 | if idxs===nothing 32 | xtrain, ytrain = dataset(:train)[:] 33 | xtest, ytest = dataset(:test)[:] 34 | else 35 | xtrain, ytrain = dataset(:train)[1:idxs] 36 | xtest, ytest = dataset(:test)[1:Int(idxs/10)] 37 | end 38 | 39 | # Reshape Data to comply to Julia's (width, height, channels, batch_size) convention in case there are only 1 channel (eg MNIST) 40 | if ndims(xtrain)==3 41 | w = size(xtrain)[1] 42 | xtrain = reshape(xtrain, (w,w,1,:)) 43 | xtest = reshape(xtest, (w,w,1,:)) 44 | end 45 | 46 | ytrain, ytest = onehotbatch(ytrain, 0:9), onehotbatch(ytest, 0:9) 47 | 48 | train_loader = DataLoader((xtrain, ytrain), batchsize=batchsize, shuffle=true) 49 | test_loader = DataLoader((xtest, ytest), batchsize=batchsize) 50 | 51 | return train_loader, test_loader 52 | end 53 | 54 | function create_loss_function(dataloader, device) 55 | 56 | function loss(model) 57 | n = 0 58 | l = 0.0f0 59 | acc = 0.0f0 60 | 61 | for (x,y) in dataloader 62 | x,y = x |> device, y |> device 63 | z = model(x) 64 | l += Flux.logitcrossentropy(z, y, agg=sum) 65 | acc += sum(onecold(z).==onecold(y)) 66 | n += size(x)[end] 67 | end 68 | l / n, acc / n 69 | end 70 | 71 | return loss 72 | 73 | end 74 | 75 | 76 | function train(n_epochs=100) 77 | 78 | #params: warning, the training can be long with these params 79 | train_loader, test_loader = get_data(128) 80 | η = 3e-4 81 | in_channel = 3 82 | patch_size = 2 83 | kernel_size = 7 84 | dim = 128 85 | dimPL = 2 86 | depth = 18 87 | use_cuda = true 88 | 89 | #logging the losses 90 | train_save = zeros(n_epochs, 2) 91 | test_save = zeros(n_epochs, 2) 92 | 93 | if use_cuda 94 | device = gpu 95 | @info "Training on GPU" 96 | else 97 | device = cpu 98 | @info "Training on CPU" 99 | end 100 | 101 | train_loss_fn = create_loss_function(train_loader, device) 102 | test_loss_fn = create_loss_function(test_loader, device) 103 | 104 | model = ConvMixer(in_channel, kernel_size, patch_size, dim, depth, 10) |> device 105 | 106 | ps = params(model) 107 | opt = Optimiser( 108 | WeightDecay(1f-3), 109 | ClipNorm(1.0), 110 | ADAM(η) 111 | ) 112 | 113 | for epoch in 1:n_epochs 114 | for (x,y) in train_loader 115 | x,y = x|>device, y|>device 116 | gr = gradient(()->Flux.logitcrossentropy(model(x), y, agg=sum), ps) 117 | Flux.Optimise.update!(opt, ps, gr) 118 | end 119 | 120 | #logging 121 | train_loss, train_acc = train_loss_fn(model) |> cpu 122 | test_loss, test_acc = test_loss_fn(model) |> cpu 123 | train_save[epoch,:] = [train_loss, train_acc] 124 | test_save[epoch,:] = [test_loss, test_acc] 125 | 126 | if epoch%5==0 127 | @info "Epoch $epoch : Train loss = $train_loss || Validation accuracy = $test_acc." 128 | end 129 | 130 | end 131 | 132 | model = model |> cpu 133 | @save "model.bson" model 134 | @save "losses.bson" train_save test_save 135 | end 136 | 137 | if abspath(PROGRAM_FILE) == @__FILE__ 138 | train() 139 | end -------------------------------------------------------------------------------- /vision/convmixer_cifar10/doc/convmixerarchi.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/convmixer_cifar10/doc/convmixerarchi.png -------------------------------------------------------------------------------- /vision/dcgan_mnist/.gitignore: -------------------------------------------------------------------------------- 1 | *.png -------------------------------------------------------------------------------- /vision/dcgan_mnist/Project.toml: -------------------------------------------------------------------------------- 1 | [deps] 2 | CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" 3 | Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c" 4 | ImageMagick = "6218d12a-5da1-5696-b52f-db25d2ecc6d1" 5 | Images = "916415d5-f1e6-5110-898d-aaa5f9f070e0" 6 | MLDatasets = "eb30cadb-4394-5ae3-aed4-317e484a6458" 7 | Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" 8 | 9 | [compat] 10 | Flux = "0.13.9, 0.14" 11 | MLDatasets = "0.7" 12 | julia = "1.6" 13 | -------------------------------------------------------------------------------- /vision/dcgan_mnist/README.md: -------------------------------------------------------------------------------- 1 | # Deep Convolutional GAN (DC-GAN) 2 | 3 | ![dcgan_gen_disc](../dcgan_mnist/output/dcgan_generator_discriminator.png) 4 | [Source](https://gluon.mxnet.io/chapter14_generative-adversarial-networks/dcgan.html) 5 | 6 | ## Model Info 7 | 8 | A DC-GAN is a direct extension of the GAN, except that it explicitly uses convolutional and transposed convolutional layers in the discriminator and generator, respectively. The discriminator is made up of strided convolutional layers, batch norm layers, and LeakyReLU activations. The generator is comprised of transposed convolutional layers, batch norm layers, and ReLU activations. 9 | 10 | ## Training 11 | 12 | ```script 13 | cd vision/dcgan_mnist 14 | julia --project dcgan_mnist.jl 15 | ``` 16 | 17 | ## Results 18 | 19 | 2000 training steps 20 | 21 | ![2000 training steps](../dcgan_mnist/output/dcgan_steps_002000.png) 22 | 23 | 5000 training steps 24 | 25 | ![5000 training steps](../dcgan_mnist/output/dcgan_steps_005000.png) 26 | 27 | 8000 training steps 28 | 29 | ![8000 training steps](../dcgan_mnist/output/dcgan_steps_008000.png) 30 | 31 | 9380 training steps 32 | 33 | ![9380 training steps](../dcgan_mnist/output/dcgan_steps_009380.png) 34 | 35 | ## References 36 | 37 | * [Radford, A. et al.: Unsupervised Representation Learning with Deep Convolutional Generative Adversarial Networks, http://arxiv.org/abs/1511.06434, (2015).](https://arxiv.org/pdf/1511.06434v2.pdf) 38 | 39 | * [pytorch.org/tutorials/beginner/dcgan_faces_tutorial](https://pytorch.org/tutorials/beginner/dcgan_faces_tutorial.html) 40 | -------------------------------------------------------------------------------- /vision/dcgan_mnist/dcgan_mnist.jl: -------------------------------------------------------------------------------- 1 | using Base.Iterators: partition 2 | using Flux 3 | using Flux.Optimise: update! 4 | using Flux.Losses: logitbinarycrossentropy 5 | using Images 6 | using MLDatasets 7 | using Statistics 8 | using Printf 9 | using Random 10 | using CUDA 11 | CUDA.allowscalar(false) 12 | 13 | Base.@kwdef struct HyperParams 14 | batch_size::Int = 128 15 | latent_dim::Int = 100 16 | epochs::Int = 20 17 | verbose_freq::Int = 1000 18 | output_x::Int = 6 19 | output_y::Int = 6 20 | lr_dscr::Float32 = 0.0002 21 | lr_gen::Float32 = 0.0002 22 | end 23 | 24 | function create_output_image(gen, fixed_noise, hparams) 25 | fake_images = @. cpu(gen(fixed_noise)) 26 | image_array = reduce(vcat, reduce.(hcat, partition(fake_images, hparams.output_y))) 27 | image_array = permutedims(dropdims(image_array; dims=(3, 4)), (2, 1)) 28 | image_array = @. Gray(image_array + 1f0) / 2f0 29 | return image_array 30 | end 31 | 32 | 33 | # weight initialization as given in the paper https://arxiv.org/abs/1511.06434 34 | dcgan_init(shape...) = randn(Float32, shape...) * 0.02f0 35 | 36 | function Discriminator() 37 | return Chain( 38 | Conv((4, 4), 1 => 64; stride = 2, pad = 1, init = dcgan_init), 39 | x->leakyrelu.(x, 0.2f0), 40 | Dropout(0.25), 41 | Conv((4, 4), 64 => 128; stride = 2, pad = 1, init = dcgan_init), 42 | x->leakyrelu.(x, 0.2f0), 43 | Dropout(0.25), 44 | x->reshape(x, 7 * 7 * 128, :), 45 | Dense(7 * 7 * 128, 1)) 46 | end 47 | 48 | function Generator(latent_dim::Int) 49 | return Chain( 50 | Dense(latent_dim, 7 * 7 * 256), 51 | BatchNorm(7 * 7 * 256, relu), 52 | x->reshape(x, 7, 7, 256, :), 53 | ConvTranspose((5, 5), 256 => 128; stride = 1, pad = 2, init = dcgan_init), 54 | BatchNorm(128, relu), 55 | ConvTranspose((4, 4), 128 => 64; stride = 2, pad = 1, init = dcgan_init), 56 | BatchNorm(64, relu), 57 | ConvTranspose((4, 4), 64 => 1; stride = 2, pad = 1, init = dcgan_init), 58 | x -> tanh.(x) 59 | ) 60 | end 61 | 62 | # Loss functions 63 | function discriminator_loss(real_output, fake_output) 64 | real_loss = logitbinarycrossentropy(real_output, 1) 65 | fake_loss = logitbinarycrossentropy(fake_output, 0) 66 | return real_loss + fake_loss 67 | end 68 | 69 | generator_loss(fake_output) = logitbinarycrossentropy(fake_output, 1) 70 | 71 | function train_discriminator!(gen, dscr, x, opt_dscr, hparams) 72 | noise = randn!(similar(x, (hparams.latent_dim, hparams.batch_size))) 73 | fake_input = gen(noise) 74 | # Taking gradient 75 | loss, grads = Flux.withgradient(dscr) do dscr 76 | discriminator_loss(dscr(x), dscr(fake_input)) 77 | end 78 | update!(opt_dscr, dscr, grads[1]) 79 | return loss 80 | end 81 | 82 | function train_generator!(gen, dscr, x, opt_gen, hparams) 83 | noise = randn!(similar(x, (hparams.latent_dim, hparams.batch_size))) 84 | # Taking gradient 85 | loss, grads = Flux.withgradient(gen) do gen 86 | generator_loss(dscr(gen(noise))) 87 | end 88 | update!(opt_gen, gen, grads[1]) 89 | return loss 90 | end 91 | 92 | function train(; kws...) 93 | # Model Parameters 94 | hparams = HyperParams(; kws...) 95 | 96 | if CUDA.functional() 97 | @info "Training on GPU" 98 | else 99 | @warn "Training on CPU, this will be very slow!" # 20 mins/epoch 100 | end 101 | 102 | # Load MNIST dataset 103 | images = MLDatasets.MNIST(:train).features 104 | # Normalize to [-1, 1] 105 | image_tensor = reshape(@.(2f0 * images - 1f0), 28, 28, 1, :) 106 | # Partition into batches 107 | data = [image_tensor[:, :, :, r] |> gpu for r in partition(1:60000, hparams.batch_size)] 108 | 109 | fixed_noise = [randn(Float32, hparams.latent_dim, 1) |> gpu for _=1:hparams.output_x*hparams.output_y] 110 | 111 | # Discriminator 112 | dscr = Discriminator() |> gpu 113 | 114 | # Generator 115 | gen = Generator(hparams.latent_dim) |> gpu 116 | 117 | # Optimizers 118 | opt_dscr = Flux.setup(Adam(hparams.lr_dscr), dscr) 119 | opt_gen = Flux.setup(Adam(hparams.lr_gen), gen) 120 | 121 | # Training 122 | train_steps = 0 123 | for ep in 1:hparams.epochs 124 | @info "Epoch $ep" 125 | for x in data 126 | # Update discriminator and generator 127 | loss_dscr = train_discriminator!(gen, dscr, x, opt_dscr, hparams) 128 | loss_gen = train_generator!(gen, dscr, x, opt_gen, hparams) 129 | 130 | if train_steps % hparams.verbose_freq == 0 131 | @info("Train step $(train_steps), Discriminator loss = $(loss_dscr), Generator loss = $(loss_gen)") 132 | # Save generated fake image 133 | output_image = create_output_image(gen, fixed_noise, hparams) 134 | save(@sprintf("output/dcgan_steps_%06d.png", train_steps), output_image) 135 | end 136 | train_steps += 1 137 | end 138 | end 139 | 140 | output_image = create_output_image(gen, fixed_noise, hparams) 141 | save(@sprintf("output/dcgan_steps_%06d.png", train_steps), output_image) 142 | end 143 | 144 | if abspath(PROGRAM_FILE) == @__FILE__ 145 | train() 146 | end 147 | 148 | -------------------------------------------------------------------------------- /vision/dcgan_mnist/output/dcgan_generator_discriminator.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/dcgan_mnist/output/dcgan_generator_discriminator.png -------------------------------------------------------------------------------- /vision/dcgan_mnist/output/dcgan_steps_000000.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/dcgan_mnist/output/dcgan_steps_000000.png -------------------------------------------------------------------------------- /vision/dcgan_mnist/output/dcgan_steps_001000.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/dcgan_mnist/output/dcgan_steps_001000.png -------------------------------------------------------------------------------- /vision/dcgan_mnist/output/dcgan_steps_002000.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/dcgan_mnist/output/dcgan_steps_002000.png -------------------------------------------------------------------------------- /vision/dcgan_mnist/output/dcgan_steps_003000.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/dcgan_mnist/output/dcgan_steps_003000.png -------------------------------------------------------------------------------- /vision/dcgan_mnist/output/dcgan_steps_004000.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/dcgan_mnist/output/dcgan_steps_004000.png -------------------------------------------------------------------------------- /vision/dcgan_mnist/output/dcgan_steps_005000.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/dcgan_mnist/output/dcgan_steps_005000.png -------------------------------------------------------------------------------- /vision/dcgan_mnist/output/dcgan_steps_006000.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/dcgan_mnist/output/dcgan_steps_006000.png -------------------------------------------------------------------------------- /vision/dcgan_mnist/output/dcgan_steps_007000.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/dcgan_mnist/output/dcgan_steps_007000.png -------------------------------------------------------------------------------- /vision/dcgan_mnist/output/dcgan_steps_008000.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/dcgan_mnist/output/dcgan_steps_008000.png -------------------------------------------------------------------------------- /vision/dcgan_mnist/output/dcgan_steps_009000.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/dcgan_mnist/output/dcgan_steps_009000.png -------------------------------------------------------------------------------- /vision/dcgan_mnist/output/dcgan_steps_009380.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/dcgan_mnist/output/dcgan_steps_009380.png -------------------------------------------------------------------------------- /vision/diffusion_mnist/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/diffusion_mnist/.DS_Store -------------------------------------------------------------------------------- /vision/diffusion_mnist/Project.toml: -------------------------------------------------------------------------------- 1 | [deps] 2 | BSON = "fbb218c0-5317-5bc6-957e-2ee96dd4b1f0" 3 | CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" 4 | DrWatson = "634d3b9d-ee7a-5ddf-bec9-22491ea816e1" 5 | FileIO = "5789e2e9-d7fb-5bc7-8068-2c6fae9b9549" 6 | Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c" 7 | Images = "916415d5-f1e6-5110-898d-aaa5f9f070e0" 8 | Logging = "56ddb016-857b-54e1-b83d-db4d58db5568" 9 | MLDatasets = "eb30cadb-4394-5ae3-aed4-317e484a6458" 10 | Parameters = "d96e819e-fc66-5662-9728-84c9c7592b0a" 11 | ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca" 12 | Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" 13 | TensorBoardLogger = "899adc3e-224a-11e9-021f-63837185c80f" 14 | DifferentialEquations = "0c46a032-eb83-5123-abaf-570d42b7fbaa" 15 | 16 | [compat] 17 | CUDA = "3" 18 | Flux = "0.13" 19 | julia = "1.6" 20 | FileIO = "1" 21 | BSON = "0.3" 22 | DrWatson = "2" 23 | Images = "0.25" 24 | MLDatasets = "0.6" 25 | Parameters = "0.12" 26 | ProgressMeter = "1" 27 | TensorBoardLogger = "0.1" -------------------------------------------------------------------------------- /vision/diffusion_mnist/README.md: -------------------------------------------------------------------------------- 1 | # Score-Based Generative Modeling (Diffusion Model) 2 | ![sde](../diffusion_mnist/docs/sde.png) 3 | [Source](https://yang-song.github.io/blog/2021/score/#score-based-generative-modeling-with-stochastic-differential-equations-sdes) 4 | 5 | ## Model Info 6 | *Score-Based Generative Modeling* is a framework to learn stochastic dynamics that transitions one distribution to another. In our case, we will be modeling the transition from the MNIST image distribution into random noise. The general idea is to learn the forward dynamics (*score function* or *gradients*) of the image's distribution being slowly evolved into random gaussian noise through a *diffusion process*. This is shown in the image above with the **Forward Stochastic Differential Equation (SDE)**. With estimates of how the forward dynamics works, we can then reverse the process allowing us to create realistic looking images from pure noise! This is shown with the **Reverse SDE** in the graphic above. 7 | 8 | In contrast to likelihood based models, *Score-Based Generative Modeling* depends only on the *score function*, $\nabla_x \log{p(x)}$ which is minimized through *score matching*. **Concretely, this tutorial will be using a UNet architecture and score matching loss function to learn this score function**. After this gradient is estimated, we can then draw samples from the MNIST dataset using **Langevin Dynamics** of the reverse SDE. 9 | 10 | ### More Model Info 11 | A much more in-depth walkthrough of the theory is available [here](https://yang-song.github.io/blog/2021/score/) from the original author, [Yang Song](https://yang-song.github.io/). I highly recommend this blog to become more familiar with the concepts before diving into the code! 12 | 13 | ### Pytorch Equivalent Code 14 | For those coming from Python, [here](https://colab.research.google.com/drive/120kYYBOVa1i0TD85RjlEkFjaWDxSFUx3?usp=sharing) is the equivalent Pytorch code that was used to create this Julia tutorial. 15 | 16 | ## Training 17 | ```shell 18 | cd vision/diffusion_mnist 19 | julia --project diffusion_mnist.jl 20 | ``` 21 | 22 | ## Visualization 23 | ```shell 24 | cd vision/diffusion_mnist 25 | julia --project diffusion_plot.jl 26 | ``` 27 | Visualizations are sampled with either the equations used in the [original PyTorch tutorial](https://colab.research.google.com/drive/120kYYBOVa1i0TD85RjlEkFjaWDxSFUx3?usp=sharing) or with the help of [`DifferentialEquations.jl`](https://diffeq.sciml.ai/stable/). 28 | |
Sampled Noise
|
Euler-Maruyama (EM) Sampler
|
Predictor Corrector Sampler
| 29 | | ----------- | ----------- | ----------- | 30 | |
![noise](../diffusion_mnist/docs/sampled_noise.jpeg)
|
![em](../diffusion_mnist/docs/em_images.jpeg)
|
![pc](../diffusion_mnist/docs/pc_images.jpeg)
| 31 | 32 | |
Euler-Maruyama (`DifferentialEquations.jl`)
|
Probability Flow ODE (`DifferentialEquations.jl`)
| 33 | | ----------- | ----------- | 34 | |
![pc](../diffusion_mnist/docs/diff_eq_em_images.jpeg)
|
![pc](../diffusion_mnist/docs/diff_eq_ode_images.jpeg)
| 35 | 36 | And since the `DifferentialEquations.jl`'s `solve()` returns the entire sample path, it is easy to visualize the reverse-time SDE sampling process as an `animation`: 37 | 38 | 39 | |
Euler-Maruyama
|
Probability Flow ODE
| 40 | | ----------- | ----------- | 41 | |
![Alt Text](../diffusion_mnist/docs/diff_eq_em.gif)
|
![Alt Text](../diffusion_mnist/docs/diff_eq_ode.gif)
| 42 | 43 | And finally, we can visualize the components of the image, `𝙭`, as a function of `t ∈ [1, ϵ]`. As noted by the authors, the Probability Flow ODE captures the same 44 | marginal probability density 𝒫ₜ(𝙭) as it's stochastic counterpart. 45 | | | | 46 | | ----------- | ----------- | 47 | |
![pc](../diffusion_mnist/docs/diff_eq_em_plot.png)
|
![pc](../diffusion_mnist/docs/diff_eq_ode_plot.png)
| 48 | 49 | The lines, `x(t) = ± σᵗ`, are shown for referenece. 50 | 51 | ## References 52 | 53 | * [Yang Song. “Generative Modeling by Estimating Gradients of the Data Distribution.” Blog Post, 2021](https://yang-song.github.io/blog/2021/score/) 54 | 55 | * [Yang Song, Jascha Sohl-Dickstein, Diederik P. Kingma, Abhishek Kumar, Stefano Ermon, and Ben Poole. "Score-Based Generative Modeling Through 56 | Stochastic Differential Equations". ArXiv Preprint, 2021](https://arxiv.org/pdf/2011.13456.pdf) 57 | 58 | -------------------------------------------------------------------------------- /vision/diffusion_mnist/diffusion_test.jl: -------------------------------------------------------------------------------- 1 | include("diffusion_mnist.jl") 2 | 3 | function test() 4 | @info "Begin tests for diffusion_mnist.jl" 5 | 6 | gaussfourierproj_test = GaussianFourierProjection(32, 20.0f0) 7 | # GaussianFourierProjection(embed_dim, ⋅)(batch) => (embed_dim, batch) 8 | @assert gaussfourierproj_test(randn(Float32, 32)) |> size == (32, 32) 9 | # W is fixed wrt. repeated calls 10 | @assert gaussfourierproj_test( 11 | ones(Float32, 32)) == 12 | gaussfourierproj_test(ones(Float32, 32) 13 | ) 14 | # W is not trainable 15 | @assert params(gaussfourierproj_test) == Flux.Params([]) 16 | 17 | @assert expand_dims(ones(Float32, 32), 3) |> size == (1, 1, 1, 32) 18 | 19 | unet_test = UNet() 20 | x_test = randn(Float32, (28, 28, 1, 32)) 21 | t_test = rand(Float32, 32) 22 | score_test = unet_test(x_test, t_test) 23 | @assert score_test |> size == (28, 28, 1, 32) 24 | @assert typeof(score_test) == Array{Float32,4} 25 | 26 | # Test gradient computation 27 | grad_test = gradient( 28 | () -> model_loss(unet_test, x_test), params(unet_test) 29 | ) 30 | @assert grad_test.params == params(unet_test) 31 | 32 | train(save_path="test", epochs=1, batch_size=4096, tblogger=false) 33 | 34 | @info "Tests complete for diffusion_mnist.jl" 35 | end 36 | 37 | if abspath(PROGRAM_FILE) == @__FILE__ 38 | test() 39 | end -------------------------------------------------------------------------------- /vision/diffusion_mnist/docs/diff_eq_em.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/diffusion_mnist/docs/diff_eq_em.gif -------------------------------------------------------------------------------- /vision/diffusion_mnist/docs/diff_eq_em_images.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/diffusion_mnist/docs/diff_eq_em_images.jpeg -------------------------------------------------------------------------------- /vision/diffusion_mnist/docs/diff_eq_em_plot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/diffusion_mnist/docs/diff_eq_em_plot.png -------------------------------------------------------------------------------- /vision/diffusion_mnist/docs/diff_eq_ode.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/diffusion_mnist/docs/diff_eq_ode.gif -------------------------------------------------------------------------------- /vision/diffusion_mnist/docs/diff_eq_ode_images.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/diffusion_mnist/docs/diff_eq_ode_images.jpeg -------------------------------------------------------------------------------- /vision/diffusion_mnist/docs/diff_eq_ode_plot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/diffusion_mnist/docs/diff_eq_ode_plot.png -------------------------------------------------------------------------------- /vision/diffusion_mnist/docs/em_images.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/diffusion_mnist/docs/em_images.jpeg -------------------------------------------------------------------------------- /vision/diffusion_mnist/docs/loss.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/diffusion_mnist/docs/loss.png -------------------------------------------------------------------------------- /vision/diffusion_mnist/docs/pc_images.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/diffusion_mnist/docs/pc_images.jpeg -------------------------------------------------------------------------------- /vision/diffusion_mnist/docs/sampled_noise.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/diffusion_mnist/docs/sampled_noise.jpeg -------------------------------------------------------------------------------- /vision/diffusion_mnist/docs/sde.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/diffusion_mnist/docs/sde.png -------------------------------------------------------------------------------- /vision/diffusion_mnist/docs/unet.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/diffusion_mnist/docs/unet.png -------------------------------------------------------------------------------- /vision/mlp_mnist/.gitignore: -------------------------------------------------------------------------------- 1 | *.bson 2 | *.png 3 | -------------------------------------------------------------------------------- /vision/mlp_mnist/Project.toml: -------------------------------------------------------------------------------- 1 | [deps] 2 | Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c" 3 | ImageCore = "a09fc81d-aa75-5fe9-8630-4744c3626534" 4 | ImageInTerminal = "d8c32880-2388-543b-8c61-d9f865259254" 5 | MLDatasets = "eb30cadb-4394-5ae3-aed4-317e484a6458" 6 | Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" 7 | Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" 8 | 9 | [compat] 10 | Flux = "0.13.9, 0.14" 11 | MLDatasets = "0.7" 12 | julia = "1.6" 13 | -------------------------------------------------------------------------------- /vision/mlp_mnist/README.md: -------------------------------------------------------------------------------- 1 | # Multilayer Perceptron (MLP) 2 | 3 | ![mlp](../mlp_mnist/docs/mlp.svg) 4 | 5 | [Source](http://d2l.ai/chapter_multilayer-perceptrons/mlp.html) 6 | 7 | ## Model Info 8 | 9 | A [multi-layer perceptron](https://en.wikipedia.org/wiki/Multilayer_perceptron) (MLP) consists of at least three sets of of nodes: an input layer, one or more hidden layer and an output layer. Each node except for the input node is a neuron that uses a nonlinear activation function. The multiple layers and non-linearities allow an MLP to distinguish data that is not linearly separable once trained. 10 | 11 | In this example, we create a MLP that classifies handwritten digits using the [MNIST dataset](http://yann.lecun.com/exdb/mnist/). 12 | Our model uses the simplest [Flux layers](http://fluxml.ai/Flux.jl/stable/models/layers/), namely `Dense` and `Chain`. 13 | Since it uses [softmax](https://en.wikipedia.org/wiki/Softmax_function) on its outputs, and [`crossentropy`](http://fluxml.ai/Flux.jl/stable/models/losses/#Flux.Losses.crossentropy) as the loss function. 14 | 15 | For simplicity this model does not use a graphics card, since an ordinary CPU is fast enough. 16 | See for example the [LeNet convolutional network](https://github.com/FluxML/model-zoo/tree/master/vision/conv_mnist) for GPU usage. 17 | 18 | ## Training 19 | 20 | You can copy and paste the example into the Julia REPL to see what each part does. 21 | Or you can run it all at once from the terminal, like this: 22 | 23 | ```script 24 | cd vision/mlp_mnist 25 | julia --project mlp_mnist.jl 26 | ``` 27 | 28 | ## Reference 29 | 30 | * [Aston Zhang, Zachary C. Lipton, Mu Li and Alexander J. Smola, "Dive into Deep Learning", 2020](http://d2l.ai/chapter_multilayer-perceptrons/mlp.html) 31 | * [3Blue1Brown Neural networks videos](https://www.youtube.com/watch?v=aircAruvnKk&list=PLZHQObOWTQDNU6R1_67000Dx_ZCJB-3pi) 32 | * [Neural Networks and Deep Learning](http://neuralnetworksanddeeplearning.com/) 33 | 34 | -------------------------------------------------------------------------------- /vision/mlp_mnist/mlp_mnist.jl: -------------------------------------------------------------------------------- 1 | # Simple multi-layer perceptron, for the MNIST hand-written digits. 2 | # This example does not use a GPU, it's small enough not to need one. 3 | 4 | using Flux, MLDatasets, Statistics 5 | 6 | # Our model is very simple: Its one "hidden layer" has 32 "neurons" each connected to every input pixel. 7 | # Each has a sigmoid nonlinearity, and is connected to every "neuron" in the output layer. 8 | # Finally, softmax produces probabilities, i.e. positive numbers which add up to 1: 9 | 10 | model = Chain(Dense(28^2 => 32, sigmoid), Dense(32 => 10), softmax) 11 | 12 | p1 = model(rand(Float32, 28^2)) # run model on random data shaped like an image 13 | 14 | @show sum(p1) ≈1; 15 | 16 | p3 = model(rand(Float32, 28^2, 3)) # ...or on a batch of 3 fake, random "images" 17 | 18 | @show sum(p3; dims=1); # all approx 1. Last dim is batch dim. 19 | 20 | #===== DATA =====# 21 | 22 | # Calling MLDatasets.MNIST() will dowload the dataset if necessary, 23 | # and return a struct containing it. 24 | # It takes a few seconds to read from disk each time, so do this once: 25 | 26 | train_data = MLDatasets.MNIST() # i.e. split=:train 27 | test_data = MLDatasets.MNIST(split=:test) 28 | 29 | # train_data.features is a 28×28×60000 Array{Float32, 3} of the images. 30 | # We need a 2D array for our model. Let's combine the reshape needed with 31 | # other pre-processing, in a function: 32 | 33 | function simple_loader(data::MNIST; batchsize::Int=64) 34 | x2dim = reshape(data.features, 28^2, :) 35 | yhot = Flux.onehotbatch(data.targets, 0:9) 36 | Flux.DataLoader((x2dim, yhot); batchsize, shuffle=true) 37 | end 38 | 39 | # train_data.targets is a 60000-element Vector{Int}, of labels from 0 to 9. 40 | # Flux.onehotbatch([0,1,9], 0:9) makes a matrix of 0 and 1. 41 | 42 | simple_loader(train_data) # returns a DataLoader, with first element a tuple like this: 43 | 44 | x1, y1 = first(simple_loader(train_data)); # (784×64 Matrix{Float32}, 10×64 OneHotMatrix) 45 | 46 | model(x1) # x1 is the right shape for our model 47 | 48 | y1 # y1 is the same shape as the model output. 49 | 50 | @show Flux.crossentropy(model(x1), y1); # This will be our loss function 51 | 52 | #===== ACCURACY =====# 53 | 54 | # We're going to log accuracy and loss during training. There's no advantage to 55 | # calculating these on minibatches, since MNIST is small enough to do it at once. 56 | 57 | function simple_accuracy(model, data::MNIST=test_data) 58 | (x, y) = only(simple_loader(data; batchsize=length(data))) # make one big batch 59 | y_hat = model(x) 60 | iscorrect = Flux.onecold(y_hat) .== Flux.onecold(y) # BitVector 61 | acc = round(100 * mean(iscorrect); digits=2) 62 | end 63 | 64 | @show simple_accuracy(model); # accuracy about 10%, on training data, before training! 65 | 66 | #===== TRAINING =====# 67 | 68 | # Make a dataloader using the desired batchsize: 69 | 70 | train_loader = simple_loader(train_data, batchsize = 256) 71 | 72 | # Initialise storage needed for the Adam optimiser, with our chosen learning rate: 73 | 74 | opt_state = Flux.setup(Adam(3e-4), model); 75 | 76 | # Then train for 30 epochs, printing out details as we go: 77 | 78 | for epoch in 1:30 79 | loss = 0.0 80 | for (x, y) in train_loader 81 | # Compute the loss and the gradients: 82 | l, gs = Flux.withgradient(m -> Flux.crossentropy(m(x), y), model) 83 | # Update the model parameters (and the Adam momenta): 84 | Flux.update!(opt_state, model, gs[1]) 85 | # Accumulate the mean loss, just for logging: 86 | loss += l / length(train_loader) 87 | end 88 | 89 | if mod(epoch, 2) == 1 90 | # Report on train and test, only every 2nd epoch: 91 | train_acc = simple_accuracy(model, train_data) 92 | test_acc = simple_accuracy(model, test_data) 93 | @info "After epoch = $epoch" loss train_acc test_acc 94 | end 95 | end 96 | 97 | # This should get to about 94% accuracy. 98 | # To do better, try using Dense(784 => 64, relu) instead. 99 | 100 | #===== INSPECTION =====# 101 | 102 | using ImageCore, ImageInTerminal 103 | 104 | xtest, ytest = only(simple_loader(test_data, batchsize=length(test_data))); 105 | 106 | # There are many ways to look at images, you won't need ImageInTerminal if working in a notebook. 107 | # ImageCore.Gray is a special type, which interprets numbers between 0.0 and 1.0 as shades: 108 | 109 | reshape(xtest[:,33], 28, 28) .|> Gray |> transpose 110 | 111 | @show Flux.onecold(ytest, 0:9)[33]; # true label, should match! 112 | 113 | # Now we can compare the model's probabilities, for the same input. 114 | # This should be highest at the same number: 115 | 116 | p10 = (0:9) .=> model(xtest[:, 33]); 117 | display(p10) 118 | -------------------------------------------------------------------------------- /vision/spatial_transformer/Project.toml: -------------------------------------------------------------------------------- 1 | [deps] 2 | CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" 3 | DrWatson = "634d3b9d-ee7a-5ddf-bec9-22491ea816e1" 4 | Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c" 5 | ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" 6 | MLDatasets = "eb30cadb-4394-5ae3-aed4-317e484a6458" 7 | NNlib = "872c559c-99b0-510c-b3b7-b6c96a88d5cd" 8 | NNlibCUDA = "a00861dc-f156-4864-bf3c-e6376f28a68d" 9 | Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" 10 | ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca" 11 | Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" 12 | ZygoteRules = "700de1a5-db45-46bc-99cf-38207098b444" 13 | 14 | [compat] 15 | DrWatson = "2.7.6" 16 | Flux = "0.13.9, 0.14" 17 | MLDatasets = "0.7.6" 18 | julia = "1.7.0" 19 | -------------------------------------------------------------------------------- /vision/spatial_transformer/README.md: -------------------------------------------------------------------------------- 1 | # Spatial Transformer Network 2 | 3 | In this tutorial we'll build a spatial transformer network that will transform MNIST 4 | digits for classification by a CNN: 5 | 6 | * [Spatial Transformer Networks](https://proceedings.neurips.cc/paper/2015/hash/33ceb07bf4eeb3da587e268d663aba1a-Abstract.html) 7 | 8 | Spatial transformer networks perform differentiable affine transformations on images, in this tutorial for the purpose of aiding classification of MNIST digits. 9 | 10 | ![spatial_transformer](https://pytorch.org/tutorials/_images/stn-arch.png) 11 | 12 | 13 | ![stn_visualization](images/stn_example.png) -------------------------------------------------------------------------------- /vision/spatial_transformer/images/stn_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/spatial_transformer/images/stn_example.png -------------------------------------------------------------------------------- /vision/vae_mnist/.gitignore: -------------------------------------------------------------------------------- 1 | *.jld2 2 | -------------------------------------------------------------------------------- /vision/vae_mnist/Project.toml: -------------------------------------------------------------------------------- 1 | [deps] 2 | CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" 3 | DrWatson = "634d3b9d-ee7a-5ddf-bec9-22491ea816e1" 4 | Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c" 5 | ImageIO = "82e4d734-157c-48bb-816b-45c225c6df19" 6 | ImageMagick = "6218d12a-5da1-5696-b52f-db25d2ecc6d1" 7 | Images = "916415d5-f1e6-5110-898d-aaa5f9f070e0" 8 | JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819" 9 | MLDatasets = "eb30cadb-4394-5ae3-aed4-317e484a6458" 10 | MLUtils = "f1d291b0-491e-4a28-83b9-f70985020b54" 11 | Optimisers = "3bd65402-5787-11e9-1adc-39752487f4e2" 12 | Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" 13 | ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca" 14 | TensorBoardLogger = "899adc3e-224a-11e9-021f-63837185c80f" 15 | cuDNN = "02a925ec-e4fe-4b08-9a7e-0d78e3d38ccd" 16 | 17 | [compat] 18 | CUDA = "5" 19 | Flux = "0.14" 20 | MLDatasets = "0.7" 21 | julia = "1.9" 22 | -------------------------------------------------------------------------------- /vision/vae_mnist/README.md: -------------------------------------------------------------------------------- 1 | # Variational Autoencoder (VAE) 2 | 3 | 4 | 5 | [Source](https://learnopencv.com/variational-autoencoder-in-tensorflow/#:~:text=Variational) 6 | 7 | ## Model Info 8 | 9 | Variational Autoencoder (VAE) came into existence in 2013, when Kingma et al. published the paper [Auto-Encoding Variational Bayes](https://arxiv.org/pdf/1312.6114.pdf). This paper was an extension of the original idea of Auto-Encoder primarily to learn the distribution of the data. VAEs are devised within the variational inference framework and approximately model the data distribution after training, making it computationally cheap to generate new samples. 10 | 11 | In VAE the idea is to encode the input as a probability distribution rather than a point estimate as in vanilla auto-encoder. Then VAE uses a decoder to reconstruct the original input by using samples from that probability distribution. 12 | 13 | ## Training 14 | 15 | ```shell 16 | cd vision/vae_mnist 17 | julia --project vae_mnist.jl 18 | ``` 19 | 20 | Original image 21 | 22 | ![Original](docs/original.png) 23 | 24 | 5 epochs 25 | 26 | ![5 epochs](docs/epoch_5.png) 27 | 28 | 10 epochs 29 | 30 | ![10 epochs](docs/epoch_10.png) 31 | 32 | 20 epochs 33 | 34 | ![10 epochs](docs/epoch_20.png) 35 | 36 | ## Visualization 37 | 38 | ```shell 39 | julia --project vae_plot.jl 40 | ``` 41 | 42 | Latent space clustering 43 | 44 | ![Clustering](docs/clustering.png) 45 | 46 | 2D manifold 47 | 48 | ![Manifold](docs/manifold.png) 49 | 50 | ## References 51 | 52 | * [Kingma, Diederik P., and Max Welling. “Auto-Encoding Variational Bayes.” ArXiv Preprint ArXiv:1312.6114, 2013.](https://arxiv.org/pdf/1312.6114.pdf) 53 | 54 | * [Variational Autoencoder in TensorFlow](https://learnopencv.com/variational-autoencoder-in-tensorflow/#:~:text=Variational.) 55 | -------------------------------------------------------------------------------- /vision/vae_mnist/docs/clustering.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/vae_mnist/docs/clustering.png -------------------------------------------------------------------------------- /vision/vae_mnist/docs/epoch_10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/vae_mnist/docs/epoch_10.png -------------------------------------------------------------------------------- /vision/vae_mnist/docs/epoch_20.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/vae_mnist/docs/epoch_20.png -------------------------------------------------------------------------------- /vision/vae_mnist/docs/epoch_5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/vae_mnist/docs/epoch_5.png -------------------------------------------------------------------------------- /vision/vae_mnist/docs/manifold.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/vae_mnist/docs/manifold.png -------------------------------------------------------------------------------- /vision/vae_mnist/docs/original.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/vae_mnist/docs/original.png -------------------------------------------------------------------------------- /vision/vae_mnist/docs/vae.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/vae_mnist/docs/vae.png -------------------------------------------------------------------------------- /vision/vae_mnist/output/clustering.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/vae_mnist/output/clustering.png -------------------------------------------------------------------------------- /vision/vae_mnist/output/epoch_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/vae_mnist/output/epoch_1.png -------------------------------------------------------------------------------- /vision/vae_mnist/output/epoch_10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/vae_mnist/output/epoch_10.png -------------------------------------------------------------------------------- /vision/vae_mnist/output/epoch_11.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/vae_mnist/output/epoch_11.png -------------------------------------------------------------------------------- /vision/vae_mnist/output/epoch_12.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/vae_mnist/output/epoch_12.png -------------------------------------------------------------------------------- /vision/vae_mnist/output/epoch_13.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/vae_mnist/output/epoch_13.png -------------------------------------------------------------------------------- /vision/vae_mnist/output/epoch_14.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/vae_mnist/output/epoch_14.png -------------------------------------------------------------------------------- /vision/vae_mnist/output/epoch_15.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/vae_mnist/output/epoch_15.png -------------------------------------------------------------------------------- /vision/vae_mnist/output/epoch_16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/vae_mnist/output/epoch_16.png -------------------------------------------------------------------------------- /vision/vae_mnist/output/epoch_17.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/vae_mnist/output/epoch_17.png -------------------------------------------------------------------------------- /vision/vae_mnist/output/epoch_18.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/vae_mnist/output/epoch_18.png -------------------------------------------------------------------------------- /vision/vae_mnist/output/epoch_19.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/vae_mnist/output/epoch_19.png -------------------------------------------------------------------------------- /vision/vae_mnist/output/epoch_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/vae_mnist/output/epoch_2.png -------------------------------------------------------------------------------- /vision/vae_mnist/output/epoch_20.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/vae_mnist/output/epoch_20.png -------------------------------------------------------------------------------- /vision/vae_mnist/output/epoch_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/vae_mnist/output/epoch_3.png -------------------------------------------------------------------------------- /vision/vae_mnist/output/epoch_4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/vae_mnist/output/epoch_4.png -------------------------------------------------------------------------------- /vision/vae_mnist/output/epoch_5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/vae_mnist/output/epoch_5.png -------------------------------------------------------------------------------- /vision/vae_mnist/output/epoch_6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/vae_mnist/output/epoch_6.png -------------------------------------------------------------------------------- /vision/vae_mnist/output/epoch_7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/vae_mnist/output/epoch_7.png -------------------------------------------------------------------------------- /vision/vae_mnist/output/epoch_8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/vae_mnist/output/epoch_8.png -------------------------------------------------------------------------------- /vision/vae_mnist/output/epoch_9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/vae_mnist/output/epoch_9.png -------------------------------------------------------------------------------- /vision/vae_mnist/output/manifold.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/vae_mnist/output/manifold.png -------------------------------------------------------------------------------- /vision/vae_mnist/output/original.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/vae_mnist/output/original.png -------------------------------------------------------------------------------- /vision/vae_mnist/vae_mnist.jl: -------------------------------------------------------------------------------- 1 | # Variational Autoencoder(VAE) 2 | # 3 | # Auto-Encoding Variational Bayes 4 | # Diederik P Kingma, Max Welling 5 | # https://arxiv.org/abs/1312.6114 6 | 7 | using JLD2 8 | # using CUDA, cuDNN # uncomment one of these if you use GPU 9 | # using AMDGPU 10 | # using Metal 11 | using DrWatson: struct2dict 12 | using Flux 13 | using Optimisers: AdamW 14 | using MLUtils: randn_like, chunk, DataLoader 15 | using Flux: logitbinarycrossentropy 16 | using Images 17 | using Logging: with_logger 18 | using MLDatasets 19 | using ProgressMeter: Progress, next! 20 | using TensorBoardLogger: TBLogger, tb_overwrite 21 | using Random 22 | 23 | # load MNIST images and return loader 24 | function get_data(batch_size) 25 | xtrain, ytrain = MLDatasets.MNIST(split=:train)[:] 26 | xtrain = reshape(xtrain, 28^2, :) 27 | return DataLoader((xtrain, ytrain), batchsize=batch_size, shuffle=true) 28 | end 29 | 30 | struct Encoder 31 | linear 32 | μ 33 | logσ 34 | end 35 | 36 | Flux.@layer Encoder 37 | 38 | Encoder(input_dim::Int, latent_dim::Int, hidden_dim::Int) = Encoder( 39 | Dense(input_dim, hidden_dim, tanh), # linear 40 | Dense(hidden_dim, latent_dim), # μ 41 | Dense(hidden_dim, latent_dim), # logσ 42 | ) 43 | 44 | function (encoder::Encoder)(x) 45 | h = encoder.linear(x) 46 | encoder.μ(h), encoder.logσ(h) 47 | end 48 | 49 | Decoder(input_dim::Int, latent_dim::Int, hidden_dim::Int) = Chain( 50 | Dense(latent_dim, hidden_dim, tanh), 51 | Dense(hidden_dim, input_dim) 52 | ) 53 | 54 | function reconstuct(encoder, decoder, x) 55 | μ, logσ = encoder(x) 56 | z = μ + randn_like(logσ) .* exp.(logσ) 57 | return μ, logσ, decoder(z) 58 | end 59 | 60 | function model_loss(encoder, decoder, x) 61 | μ, logσ, decoder_z = reconstuct(encoder, decoder, x) 62 | batch_size = size(x)[end] 63 | # KL-divergence 64 | kl_q_p = 0.5f0 * sum(@. (exp(2*logσ) + μ^2 - 1 - 2*logσ)) / batch_size 65 | 66 | logp_x_z = -logitbinarycrossentropy(decoder_z, x, agg=sum) / batch_size 67 | 68 | return -logp_x_z + kl_q_p 69 | end 70 | 71 | function convert_to_image(x, y_size) 72 | Gray.(permutedims(vcat(reshape.(chunk(x |> cpu, y_size), 28, :)...), (2, 1))) 73 | end 74 | 75 | # arguments for the `train` function 76 | Base.@kwdef mutable struct Args 77 | η = 1e-3 # learning rate 78 | λ = 1e-4 # regularization paramater 79 | batch_size = 128 # batch size 80 | sample_size = 10 # sampling size for output 81 | epochs = 20 # number of epochs 82 | seed = 0 # random seed 83 | use_gpu = true # use GPU 84 | input_dim = 28^2 # image size 85 | latent_dim = 64 # latent dimension 86 | hidden_dim = 500 # hidden dimension 87 | verbose_freq = 10 # logging for every verbose_freq iterations 88 | tblogger = false # log training with tensorboard 89 | save_path = "output" # results path 90 | end 91 | 92 | function train(; kws...) 93 | # load hyperparamters 94 | args = Args(; kws...) 95 | args.seed > 0 && Random.seed!(args.seed) 96 | 97 | if args.use_gpu 98 | device = Flux.get_device() 99 | else 100 | device = Flux.get_device("CPU") 101 | end 102 | 103 | @info "Training on $device" 104 | 105 | # load MNIST images 106 | loader = get_data(args.batch_size) 107 | 108 | # initialize encoder and decoder 109 | encoder = Encoder(args.input_dim, args.latent_dim, args.hidden_dim) |> device 110 | decoder = Decoder(args.input_dim, args.latent_dim, args.hidden_dim) |> device 111 | 112 | # ADAM optimizer 113 | opt_enc = Flux.setup(AdamW(eta=args.η, lambda=args.λ), encoder) 114 | opt_dec = Flux.setup(AdamW(eta=args.η, lambda=args.λ), decoder) 115 | 116 | !ispath(args.save_path) && mkpath(args.save_path) 117 | 118 | # logging by TensorBoard.jl 119 | if args.tblogger 120 | tblogger = TBLogger(args.save_path, tb_overwrite) 121 | end 122 | 123 | # fixed input 124 | original, _ = first(get_data(args.sample_size^2)) 125 | original = original |> device 126 | image = convert_to_image(original, args.sample_size) 127 | image_path = joinpath(args.save_path, "original.png") 128 | save(image_path, image) 129 | 130 | # training 131 | train_steps = 0 132 | @info "Start Training, total $(args.epochs) epochs" 133 | for epoch = 1:args.epochs 134 | @info "Epoch $(epoch)" 135 | progress = Progress(length(loader)) 136 | 137 | for (x, _) in loader 138 | x_dev = x |> device 139 | loss, (grad_enc, grad_dec) = Flux.withgradient(encoder, decoder) do enc, dec 140 | model_loss(enc, dec, x_dev) 141 | end 142 | 143 | Flux.update!(opt_enc, encoder, grad_enc) 144 | Flux.update!(opt_dec, decoder, grad_dec) 145 | # progress meter 146 | next!(progress; showvalues=[(:loss, loss)]) 147 | 148 | # logging with TensorBoard 149 | if args.tblogger && train_steps % args.verbose_freq == 0 150 | with_logger(tblogger) do 151 | @info "train" loss=loss 152 | end 153 | end 154 | 155 | train_steps += 1 156 | end 157 | # save image 158 | _, _, rec_original = reconstuct(encoder, decoder, original) 159 | rec_original = sigmoid.(rec_original) 160 | image = convert_to_image(rec_original, args.sample_size) 161 | image_path = joinpath(args.save_path, "epoch_$(epoch).png") 162 | save(image_path, image) 163 | @info "Image saved: $(image_path)" 164 | end 165 | 166 | # save model 167 | let encoder = cpu(encoder), decoder = cpu(decoder), args=struct2dict(args) 168 | filepath = joinpath(args[:save_path], "checkpoint.jld2") 169 | JLD2.save(filepath, "encoder", Flux.state(encoder), 170 | "decoder", Flux.state(decoder), 171 | "args", args) 172 | @info "Model saved: $(filepath)" 173 | end 174 | end 175 | 176 | if abspath(PROGRAM_FILE) == @__FILE__ 177 | train() 178 | end 179 | 180 | -------------------------------------------------------------------------------- /vision/vae_mnist/vae_plot.jl: -------------------------------------------------------------------------------- 1 | include("vae_mnist.jl") 2 | 3 | using Plots 4 | 5 | function plot_result() 6 | checkpoint = JLD2.load("output/checkpoint.jld2") 7 | encoder_state = checkpoint["encoder"] 8 | decoder_state = checkpoint["decoder"] 9 | args = Args(; checkpoint["args"]...) 10 | encoder = Encoder(args.input_dim, args.latent_dim, args.hidden_dim) 11 | decoder = Decoder(args.input_dim, args.latent_dim, args.hidden_dim) 12 | Flux.loadmodel!(encoder, encoder_state) 13 | Flux.loadmodel!(decoder, decoder_state) 14 | loader = get_data(args.batch_size) 15 | 16 | # clustering in the latent space 17 | # visualize first two dims 18 | plt = scatter(palette=:rainbow) 19 | for (i, (x, y)) in enumerate(loader) 20 | i < 20 || break 21 | μ, logσ = encoder(x) 22 | @assert size(μ, 1) == 2 # Latent_dim has to be 2 for direct visualization, otherwise use PCA or t-SNE 23 | scatter!(μ[1, :], μ[2, :], 24 | markerstrokewidth=0, markeralpha=0.8, 25 | aspect_ratio=1, 26 | markercolor=y, label="") 27 | end 28 | savefig(plt, "output/clustering.png") 29 | 30 | z = range(-2.0, stop=2.0, length=11) 31 | len = Base.length(z) 32 | z1 = repeat(z, len) 33 | z2 = sort(z1) 34 | x = zeros(Float32, args.latent_dim, len^2) 35 | x[1, :] = z1 36 | x[2, :] = z2 37 | samples = decoder(x) 38 | samples = sigmoid.(samples) 39 | image = convert_to_image(samples, len) 40 | save("output/manifold.png", image) 41 | end 42 | 43 | if abspath(PROGRAM_FILE) == @__FILE__ 44 | plot_result() 45 | end 46 | -------------------------------------------------------------------------------- /vision/vgg_cifar10/Project.toml: -------------------------------------------------------------------------------- 1 | [deps] 2 | CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" 3 | Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c" 4 | Images = "916415d5-f1e6-5110-898d-aaa5f9f070e0" 5 | MLDatasets = "eb30cadb-4394-5ae3-aed4-317e484a6458" 6 | MLUtils = "f1d291b0-491e-4a28-83b9-f70985020b54" 7 | Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" 8 | 9 | [compat] 10 | CUDA = "3, 4" 11 | Flux = "0.13.9, 0.14" 12 | MLDatasets = "0.7" 13 | MLUtils = "0.3" 14 | julia = "1.6" 15 | -------------------------------------------------------------------------------- /vision/vgg_cifar10/README.md: -------------------------------------------------------------------------------- 1 | # Visual Geometry Group (VGG) 2 | 3 | ![vgg](../vgg_cifar10/docs/vgg.png) 4 | 5 | [Source](http://d2l.ai/chapter_convolutional-modern/vgg.html) 6 | 7 | ## Model Info 8 | 9 | The basic building block of classic CNNs is a sequence of the following: (i) a convolutional layer with padding to maintain the resolution, (ii) a nonlinearity such as a ReLU, (iii) a pooling layer such as a maximum pooling layer. _One VGG block consists of a sequence of convolutional layers, followed by a maximum pooling layer for spatial downsampling_. In the original VGG paper [Simonyan & Zisserman, 2015](https://arxiv.org/pdf/1409.1556v4.pdf), the authors employed convolutions with 3×3 kernels with padding of 1 (keeping height and width) and 2×2 maximum pooling with stride of 2 (halving the resolution after each block). 10 | 11 | ## Training 12 | 13 | ```shell 14 | cd vision/vgg_cifar10 15 | julia --project vgg_cifar10.jl 16 | ``` 17 | 18 | ## References 19 | 20 | * [Simonyan, K. and Zisserman, A., “Very Deep Convolutional Networks for Large-Scale Image Recognition”, arXiv e-prints, 2015. 21 | ](https://arxiv.org/pdf/1409.1556v4.pdf) 22 | 23 | * [Aston Zhang, Zachary C. Lipton, Mu Li and Alexander J. Smola, "Dive into Deep Learning", 2020](http://d2l.ai/chapter_convolutional-modern/vgg.html) 24 | -------------------------------------------------------------------------------- /vision/vgg_cifar10/docs/vgg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/vgg_cifar10/docs/vgg.png -------------------------------------------------------------------------------- /vision/vgg_cifar10/vgg_cifar10.jl: -------------------------------------------------------------------------------- 1 | using Flux 2 | using Flux: onehotbatch, onecold, flatten 3 | using Flux.Losses: logitcrossentropy 4 | using Statistics: mean 5 | using CUDA 6 | using MLDatasets: CIFAR10 7 | using MLUtils: splitobs, DataLoader 8 | 9 | if CUDA.has_cuda() 10 | @info "CUDA is on" 11 | CUDA.allowscalar(false) 12 | end 13 | 14 | function get_processed_data(args) 15 | x, y = CIFAR10(:train)[:] 16 | 17 | (train_x, train_y), (val_x, val_y) = splitobs((x, y), at=1-args.valsplit) 18 | 19 | train_x = float(train_x) 20 | train_y = onehotbatch(train_y, 0:9) 21 | val_x = float(val_x) 22 | val_y = onehotbatch(val_y, 0:9) 23 | 24 | return (train_x, train_y), (val_x, val_y) 25 | end 26 | 27 | function get_test_data() 28 | test_x, test_y = CIFAR10(:test)[:] 29 | 30 | test_x = float(test_x) 31 | test_y = onehotbatch(test_y, 0:9) 32 | 33 | return test_x, test_y 34 | end 35 | 36 | # VGG16 and VGG19 models 37 | function vgg16() 38 | Chain([ 39 | Conv((3, 3), 3 => 64, relu, pad=(1, 1), stride=(1, 1)), 40 | BatchNorm(64), 41 | Conv((3, 3), 64 => 64, relu, pad=(1, 1), stride=(1, 1)), 42 | BatchNorm(64), 43 | MaxPool((2,2)), 44 | Conv((3, 3), 64 => 128, relu, pad=(1, 1), stride=(1, 1)), 45 | BatchNorm(128), 46 | Conv((3, 3), 128 => 128, relu, pad=(1, 1), stride=(1, 1)), 47 | BatchNorm(128), 48 | MaxPool((2,2)), 49 | Conv((3, 3), 128 => 256, relu, pad=(1, 1), stride=(1, 1)), 50 | BatchNorm(256), 51 | Conv((3, 3), 256 => 256, relu, pad=(1, 1), stride=(1, 1)), 52 | BatchNorm(256), 53 | Conv((3, 3), 256 => 256, relu, pad=(1, 1), stride=(1, 1)), 54 | BatchNorm(256), 55 | MaxPool((2,2)), 56 | Conv((3, 3), 256 => 512, relu, pad=(1, 1), stride=(1, 1)), 57 | BatchNorm(512), 58 | Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)), 59 | BatchNorm(512), 60 | Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)), 61 | BatchNorm(512), 62 | MaxPool((2,2)), 63 | Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)), 64 | BatchNorm(512), 65 | Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)), 66 | BatchNorm(512), 67 | Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)), 68 | BatchNorm(512), 69 | MaxPool((2,2)), 70 | flatten, 71 | Dense(512, 4096, relu), 72 | Dropout(0.5), 73 | Dense(4096, 4096, relu), 74 | Dropout(0.5), 75 | Dense(4096, 10) 76 | ]) 77 | end 78 | 79 | Base.@kwdef mutable struct Args 80 | batchsize::Int = 128 81 | lr::Float32 = 3f-4 82 | epochs::Int = 50 83 | valsplit::Float64 = 0.1 84 | end 85 | 86 | function train(; kws...) 87 | # Initialize the hyperparameters 88 | args = Args(; kws...) 89 | 90 | # Load the train, validation data 91 | train_data, val_data = get_processed_data(args) 92 | 93 | train_loader = DataLoader(train_data, batchsize=args.batchsize, shuffle=true) 94 | val_loader = DataLoader(val_data, batchsize=args.batchsize) 95 | 96 | @info("Constructing Model") 97 | m = vgg16() |> gpu 98 | 99 | loss(m, x, y) = logitcrossentropy(m(x), y) 100 | 101 | ## Training 102 | # Defining the optimizer 103 | opt = Flux.setup(Adam(args.lr), m) 104 | 105 | @info("Training....") 106 | # Starting to train models 107 | for epoch in 1:args.epochs 108 | @info "Epoch $epoch" 109 | 110 | for (x, y) in train_loader 111 | x, y = x |> gpu, y |> gpu 112 | gs = Flux.gradient(m -> loss(m, x, y), m) 113 | Flux.update!(opt, m, gs[1]) 114 | end 115 | 116 | validation_loss = 0f0 117 | for (x, y) in val_loader 118 | x, y = x |> gpu, y |> gpu 119 | validation_loss += loss(m, x, y) 120 | end 121 | validation_loss /= length(val_loader) 122 | @show validation_loss 123 | end 124 | 125 | return m 126 | end 127 | 128 | function test(m; kws...) 129 | args = Args(kws...) 130 | 131 | test_data = get_test_data() 132 | test_loader = DataLoader(test_data, batchsize=args.batchsize) 133 | 134 | correct, total = 0, 0 135 | for (x, y) in test_loader 136 | x, y = x |> gpu, y |> gpu 137 | correct += sum(onecold(cpu(m(x))) .== onecold(cpu(y))) 138 | total += size(y, 2) 139 | end 140 | test_accuracy = correct / total 141 | 142 | # Print the final accuracy 143 | @show test_accuracy 144 | end 145 | 146 | if abspath(PROGRAM_FILE) == @__FILE__ 147 | m = train() 148 | test(m) 149 | end 150 | 151 | --------------------------------------------------------------------------------