├── .fluxbot
├── Manifest.toml
└── Project.toml
├── .github
├── FUNDING.yml
└── workflows
│ └── fluxbot.jl
├── .gitignore
├── .gitlab-ci.yml
├── .gitpod.Dockerfile
├── .gitpod.yml
├── LICENSE.md
├── README.md
├── contrib
├── README.md
├── audio
│ └── speech-blstm
│ │ ├── 00-data.jl
│ │ ├── 01-speech-blstm.jl
│ │ ├── Manifest.toml
│ │ ├── Project.toml
│ │ ├── README.md
│ │ ├── TIMIT
│ │ └── README.md
│ │ ├── test
│ │ └── README.md
│ │ └── train
│ │ └── README.md
├── games
│ └── differentiable-programming
│ │ ├── cartpole
│ │ ├── DQN.jl
│ │ ├── DiffRL.jl
│ │ ├── Manifest.toml
│ │ ├── Project.toml
│ │ └── cuda
│ │ │ ├── Manifest.toml
│ │ │ └── Project.toml
│ │ ├── pendulum
│ │ ├── DDPG.jl
│ │ ├── DiffRL.jl
│ │ ├── Manifest.toml
│ │ ├── Project.toml
│ │ └── cuda
│ │ │ ├── Manifest.toml
│ │ │ └── Project.toml
│ │ └── trebuchet
│ │ ├── DDPG.jl
│ │ ├── DiffRL.jl
│ │ ├── Manifest.toml
│ │ ├── Project.toml
│ │ └── cuda
│ │ ├── Manifest.toml
│ │ └── Project.toml
└── meta-learning
│ ├── Manifest.toml
│ ├── MetaLearning.jl
│ ├── Project.toml
│ ├── fomaml_grad.png
│ ├── linear.jl
│ ├── reptile_grad.png
│ └── utils.jl
├── other
├── autoregressive-process
│ ├── Manifest.toml
│ ├── Project.toml
│ ├── README.md
│ ├── loss.png
│ ├── model.jl
│ └── utils.jl
├── bitstring-parity
│ ├── Manifest.toml
│ ├── Project.toml
│ ├── README.md
│ ├── data.jl
│ ├── xor1.jl
│ ├── xor2.jl
│ └── xor3.jl
├── fizzbuzz
│ ├── Manifest.toml
│ ├── Project.toml
│ └── fizzbuzz.jl
├── flux-next
│ ├── Manifest.toml
│ ├── Project.toml
│ └── intro.jl
├── housing
│ ├── .gitignore
│ ├── Manifest.toml
│ ├── Project.toml
│ ├── README.md
│ ├── housing.jl
│ └── img
│ │ └── singleneuron.svg
└── iris
│ ├── Manifest.toml
│ ├── Project.toml
│ ├── README.md
│ └── iris.jl
├── script
├── Manifest.toml
├── Notebooks.toml
├── Project.toml
├── convert.jl
└── notebook.jl
├── text
├── char-rnn
│ ├── .gitignore
│ ├── Manifest.toml
│ ├── Project.toml
│ ├── README.md
│ ├── char-rnn.jl
│ └── docs
│ │ └── rnn-train.png
├── lang-detection
│ ├── .gitignore
│ ├── Manifest.toml
│ ├── Project.toml
│ ├── README.md
│ ├── model.jl
│ └── scrape.jl
├── nanogpt
│ ├── Manifest.toml
│ ├── Project.toml
│ ├── README.md
│ ├── docs
│ │ └── Full_GPT_architecture.svg
│ └── gpt.jl
├── phonemes
│ ├── 0-data.jl
│ ├── 1-model.jl
│ ├── Manifest.toml
│ └── Project.toml
└── treebank
│ ├── Manifest.toml
│ ├── Project.toml
│ ├── README.md
│ ├── data.jl
│ └── recursive.jl
├── tutorials
├── 60-minute-blitz
│ ├── 60-minute-blitz.jl
│ ├── Manifest.toml
│ └── Project.toml
├── dataloader
│ ├── Manifest.toml
│ ├── Project.toml
│ ├── README.md
│ └── dataloader-image-data.jl
└── transfer_learning
│ ├── .gitignore
│ ├── Manifest.toml
│ ├── Project.toml
│ ├── README.md
│ └── transfer_learning.jl
└── vision
├── cdcgan_mnist
├── Manifest.toml
├── Project.toml
├── README.md
├── cGAN_mnist.jl
└── output
│ ├── cgan_steps_000000.png
│ ├── cgan_steps_001000.png
│ ├── cgan_steps_002000.png
│ ├── cgan_steps_003000.png
│ ├── cgan_steps_004000.png
│ ├── cgan_steps_005000.png
│ ├── cgan_steps_006000.png
│ ├── cgan_steps_007000.png
│ ├── cgan_steps_008000.png
│ ├── cgan_steps_009000.png
│ ├── cgan_steps_010000.png
│ ├── cgan_steps_011000.png
│ ├── cgan_steps_011725.png
│ └── img_for_readme.png
├── conv_mnist
├── .gitignore
├── Manifest.toml
├── Project.toml
├── README.md
├── conv_mnist.jl
└── docs
│ └── LeNet-5.png
├── convmixer_cifar10
├── README.md
├── convmixer.jl
└── doc
│ └── convmixerarchi.png
├── dcgan_mnist
├── .gitignore
├── Manifest.toml
├── Project.toml
├── README.md
├── dcgan_mnist.jl
└── output
│ ├── dcgan_generator_discriminator.png
│ ├── dcgan_steps_000000.png
│ ├── dcgan_steps_001000.png
│ ├── dcgan_steps_002000.png
│ ├── dcgan_steps_003000.png
│ ├── dcgan_steps_004000.png
│ ├── dcgan_steps_005000.png
│ ├── dcgan_steps_006000.png
│ ├── dcgan_steps_007000.png
│ ├── dcgan_steps_008000.png
│ ├── dcgan_steps_009000.png
│ └── dcgan_steps_009380.png
├── diffusion_mnist
├── .DS_Store
├── Manifest.toml
├── Project.toml
├── README.md
├── diffusion_mnist.jl
├── diffusion_plot.jl
├── diffusion_test.jl
└── docs
│ ├── diff_eq_em.gif
│ ├── diff_eq_em_images.jpeg
│ ├── diff_eq_em_plot.png
│ ├── diff_eq_ode.gif
│ ├── diff_eq_ode_images.jpeg
│ ├── diff_eq_ode_plot.png
│ ├── em_images.jpeg
│ ├── loss.png
│ ├── pc_images.jpeg
│ ├── sampled_noise.jpeg
│ ├── sde.png
│ └── unet.png
├── mlp_mnist
├── .gitignore
├── Manifest.toml
├── Project.toml
├── README.md
├── docs
│ └── mlp.svg
└── mlp_mnist.jl
├── spatial_transformer
├── Manifest.toml
├── Project.toml
├── README.md
├── images
│ └── stn_example.png
└── spatial_transformer.jl
├── vae_mnist
├── .gitignore
├── Manifest.toml
├── Project.toml
├── README.md
├── docs
│ ├── clustering.png
│ ├── epoch_10.png
│ ├── epoch_20.png
│ ├── epoch_5.png
│ ├── manifold.png
│ ├── original.png
│ └── vae.png
├── output
│ ├── clustering.png
│ ├── epoch_1.png
│ ├── epoch_10.png
│ ├── epoch_11.png
│ ├── epoch_12.png
│ ├── epoch_13.png
│ ├── epoch_14.png
│ ├── epoch_15.png
│ ├── epoch_16.png
│ ├── epoch_17.png
│ ├── epoch_18.png
│ ├── epoch_19.png
│ ├── epoch_2.png
│ ├── epoch_20.png
│ ├── epoch_3.png
│ ├── epoch_4.png
│ ├── epoch_5.png
│ ├── epoch_6.png
│ ├── epoch_7.png
│ ├── epoch_8.png
│ ├── epoch_9.png
│ ├── manifold.png
│ └── original.png
├── vae_mnist.jl
└── vae_plot.jl
└── vgg_cifar10
├── Manifest.toml
├── Project.toml
├── README.md
├── docs
└── vgg.png
└── vgg_cifar10.jl
/.fluxbot/Manifest.toml:
--------------------------------------------------------------------------------
1 | # This file is machine-generated - editing it directly is not advised
2 |
3 | [[Artifacts]]
4 | deps = ["Pkg"]
5 | git-tree-sha1 = "c30985d8821e0cd73870b17b0ed0ce6dc44cb744"
6 | uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
7 | version = "1.3.0"
8 |
9 | [[Base64]]
10 | uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
11 |
12 | [[Dates]]
13 | deps = ["Printf"]
14 | uuid = "ade2ca70-3891-5945-98fb-dc099432e06a"
15 |
16 | [[Distributed]]
17 | deps = ["Random", "Serialization", "Sockets"]
18 | uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b"
19 |
20 | [[FluxBot]]
21 | deps = ["GitHub", "Glob", "Pkg", "Sockets"]
22 | git-tree-sha1 = "7c5fedc22b3e9ca4d7e891c43f91df382bded7d5"
23 | repo-rev = "zoo"
24 | repo-url = "https://github.com/dhairyagandhi96/FluxBot.jl"
25 | uuid = "352bd040-0f98-11ea-1faf-6f930ca83554"
26 | version = "0.1.0"
27 |
28 | [[GitHub]]
29 | deps = ["Base64", "Dates", "HTTP", "JSON", "MbedTLS", "Sockets", "SodiumSeal"]
30 | git-tree-sha1 = "a4f61fc1b1724e6eec1d9333eac2d4b01d8fcc8f"
31 | uuid = "bc5e4493-9b4d-5f90-b8aa-2b2bcaad7a26"
32 | version = "5.4.0"
33 |
34 | [[Glob]]
35 | git-tree-sha1 = "4df9f7e06108728ebf00a0a11edee4b29a482bb2"
36 | uuid = "c27321d9-0574-5035-807b-f59d2c89b15c"
37 | version = "1.3.0"
38 |
39 | [[HTTP]]
40 | deps = ["Base64", "Dates", "IniFile", "MbedTLS", "Sockets", "URIs"]
41 | git-tree-sha1 = "63055ee44b5c2b95ec1921edcf856c60124ff0c3"
42 | uuid = "cd3eb016-35fb-5094-929b-558a96fad6f3"
43 | version = "0.9.2"
44 |
45 | [[IniFile]]
46 | deps = ["Test"]
47 | git-tree-sha1 = "098e4d2c533924c921f9f9847274f2ad89e018b8"
48 | uuid = "83e8ac13-25f8-5344-8a64-a9f2b223428f"
49 | version = "0.5.0"
50 |
51 | [[InteractiveUtils]]
52 | deps = ["Markdown"]
53 | uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
54 |
55 | [[JLLWrappers]]
56 | git-tree-sha1 = "a431f5f2ca3f4feef3bd7a5e94b8b8d4f2f647a0"
57 | uuid = "692b3bcd-3c85-4b1f-b108-f13ce0eb3210"
58 | version = "1.2.0"
59 |
60 | [[JSON]]
61 | deps = ["Dates", "Mmap", "Parsers", "Unicode"]
62 | git-tree-sha1 = "81690084b6198a2e1da36fcfda16eeca9f9f24e4"
63 | uuid = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
64 | version = "0.21.1"
65 |
66 | [[LibGit2]]
67 | deps = ["Printf"]
68 | uuid = "76f85450-5226-5b5a-8eaa-529ad045b433"
69 |
70 | [[Libdl]]
71 | uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
72 |
73 | [[Logging]]
74 | uuid = "56ddb016-857b-54e1-b83d-db4d58db5568"
75 |
76 | [[Markdown]]
77 | deps = ["Base64"]
78 | uuid = "d6f4376e-aef5-505a-96c1-9c027394607a"
79 |
80 | [[MbedTLS]]
81 | deps = ["Dates", "MbedTLS_jll", "Random", "Sockets"]
82 | git-tree-sha1 = "1c38e51c3d08ef2278062ebceade0e46cefc96fe"
83 | uuid = "739be429-bea8-5141-9913-cc70e7f3736d"
84 | version = "1.0.3"
85 |
86 | [[MbedTLS_jll]]
87 | deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
88 | git-tree-sha1 = "0eef589dd1c26a3ac9d753fe1a8bcad63f956fa6"
89 | uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1"
90 | version = "2.16.8+1"
91 |
92 | [[Mmap]]
93 | uuid = "a63ad114-7e13-5084-954f-fe012c677804"
94 |
95 | [[Parsers]]
96 | deps = ["Dates"]
97 | git-tree-sha1 = "50c9a9ed8c714945e01cd53a21007ed3865ed714"
98 | uuid = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0"
99 | version = "1.0.15"
100 |
101 | [[Pkg]]
102 | deps = ["Dates", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "UUIDs"]
103 | uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
104 |
105 | [[Printf]]
106 | deps = ["Unicode"]
107 | uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7"
108 |
109 | [[REPL]]
110 | deps = ["InteractiveUtils", "Markdown", "Sockets"]
111 | uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb"
112 |
113 | [[Random]]
114 | deps = ["Serialization"]
115 | uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
116 |
117 | [[SHA]]
118 | uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce"
119 |
120 | [[Serialization]]
121 | uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
122 |
123 | [[Sockets]]
124 | uuid = "6462fe0b-24de-5631-8697-dd941f90decc"
125 |
126 | [[SodiumSeal]]
127 | deps = ["Base64", "Libdl", "libsodium_jll"]
128 | git-tree-sha1 = "80cef67d2953e33935b41c6ab0a178b9987b1c99"
129 | uuid = "2133526b-2bfb-4018-ac12-889fb3908a75"
130 | version = "0.1.1"
131 |
132 | [[Test]]
133 | deps = ["Distributed", "InteractiveUtils", "Logging", "Random"]
134 | uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
135 |
136 | [[URIs]]
137 | git-tree-sha1 = "7855809b88d7b16e9b029afd17880930626f54a2"
138 | uuid = "5c2747f8-b7ea-4ff2-ba2e-563bfd36b1d4"
139 | version = "1.2.0"
140 |
141 | [[UUIDs]]
142 | deps = ["Random", "SHA"]
143 | uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
144 |
145 | [[Unicode]]
146 | uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"
147 |
148 | [[libsodium_jll]]
149 | deps = ["Libdl", "Pkg"]
150 | git-tree-sha1 = "7127f5f40332ccfa43ee07dcd0c4d81a27d9bb23"
151 | uuid = "a9144af2-ca23-56d9-984f-0d03f7b5ccf8"
152 | version = "1.0.18+1"
153 |
--------------------------------------------------------------------------------
/.fluxbot/Project.toml:
--------------------------------------------------------------------------------
1 | [deps]
2 | FluxBot = "352bd040-0f98-11ea-1faf-6f930ca83554"
3 |
--------------------------------------------------------------------------------
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | github: [JuliaLang]
2 |
--------------------------------------------------------------------------------
/.github/workflows/fluxbot.jl:
--------------------------------------------------------------------------------
1 | name: FluxBot
2 |
3 | on:
4 | issue_comment:
5 | types: [created, edited]
6 |
7 | jobs:
8 | build:
9 | if: contains(github.event.comment.body, '@ModelZookeeper')
10 | runs-on: ${{ matrix.os }}
11 | strategy:
12 | matrix:
13 | julia-version: [1.5.0]
14 | julia-arch: [x86]
15 | os: [ubuntu-latest]
16 | steps:
17 | - uses: actions/checkout@af513c7a016048ae468971c52ed77d9562c7c819 # v1.0.0
18 | - uses: julia-actions/setup-julia@v1
19 | with:
20 | version: ${{ matrix.julia-version }}
21 | - name: Install dependencies
22 | run: julia --project=.fluxbot/ -e 'using Pkg; Pkg.instantiate()'
23 | - name: FluxBot.respond
24 | env:
25 | FLUXBOT_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
26 | BOT_SECRET: ${{ secrets.BOT_SECRET }}
27 | MODELZOO_TRIGGER_TOKEN: ${{ secrets.MODELZOO_TRIGGER_TOKEN }}
28 | run: julia --project=.fluxbot -e 'using FluxBot; FluxBot.trial()'
29 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | notebooks
2 | .vscode
--------------------------------------------------------------------------------
/.gitlab-ci.yml:
--------------------------------------------------------------------------------
1 | include:
2 | - 'https://raw.githubusercontent.com/JuliaGPU/gitlab-ci/master/templates/v6.yml'
3 |
4 | image: nvidia/cuda:10.1-cudnn7-devel-ubuntu18.04
5 |
6 | .modelzoo:
7 | stage: deploy
8 |
9 | script:
10 | - julia --project=script -e 'using Pkg; Pkg.instantiate()'
11 | # if triggered by FluxBot
12 | - >
13 | if [ "$FLUXBOT" == "true" ]; then
14 | git clone https://github.com/FluxML/Flux.jl
15 | cd Flux.jl && git fetch origin pull/$PRID/head:test_$PRID
16 | git checkout test_$PRID
17 | julia --project -e 'using Pkg;
18 | Pkg.instantiate();'
19 | cd ..
20 | export FLUX="$PWD/Flux.jl"
21 | export JULIA_LOAD_PATH=".:$FLUX::"
22 | fi
23 | # end
24 |
25 | - julia --project -e 'using Pkg;
26 | Pkg.instantiate();
27 | ENV["FLUXBOT"] == true && Pkg.develop(PackageSpec(path = ENV["FLUX"]));
28 | Pkg.resolve();
29 | Pkg.API.precompile();'
30 |
31 | - cd script
32 | - julia --project convert.jl $TESTSUITE
33 |
34 | after_script:
35 | - apt-get -qq -o=Dpkg::Use-Pty=0 -y install unzip
36 | - julia --project=script -e 'using Pkg; Pkg.instantiate();'
37 | - julia --project=script -e 'using FluxBot; FluxBot.respond();'
38 |
39 | only:
40 | variables:
41 | - $PRID
42 | - $TESTSUITE
43 | - $FLUXBOT
44 | - $REPO_NAME
45 | artifacts:
46 | paths:
47 | - notebooks/*.ipynb
48 |
49 | zoo:1.0:
50 | extends:
51 | - .julia:1.0
52 | - .modelzoo
53 | tags:
54 | - nvidia
55 |
56 | zoo:1.1:
57 | extends:
58 | - .julia:1.1
59 | - .modelzoo
60 | tags:
61 | - nvidia
62 |
63 | zoo:1.2:
64 | extends:
65 | - .julia:1.2
66 | - .modelzoo
67 | tags:
68 | - nvidia
69 |
70 | zoo:1.3:
71 | extends:
72 | - .julia:1.3
73 | - .modelzoo
74 | tags:
75 | - nvidia
76 |
77 | zoo:nightly:
78 | extends:
79 | - .julia:nightly
80 | - .modelzoo
81 | tags:
82 | - nvidia
83 | allow_failure: true
84 |
85 |
--------------------------------------------------------------------------------
/.gitpod.Dockerfile:
--------------------------------------------------------------------------------
1 | FROM gitpod/workspace-full
2 |
3 | USER gitpod
4 |
5 | # Install Julia
6 | RUN sudo apt-get update \
7 | && sudo apt-get install -y \
8 | libatomic1 \
9 | gfortran \
10 | perl \
11 | wget \
12 | m4 \
13 | pkg-config \
14 | julia \
15 | && sudo rm -rf /var/lib/apt/lists/*
16 |
--------------------------------------------------------------------------------
/.gitpod.yml:
--------------------------------------------------------------------------------
1 | image:
2 | file: .gitpod.Dockerfile
3 |
4 | vscode:
5 | extensions:
6 | - julialang.language-julia@0.12.3:lgRyBd8rjwUpMGG0C5GAig==
7 |
--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
1 | These examples are licensed under the MIT "Expat" License:
2 |
3 | > Copyright (c) 2017 by Flux contributors;
4 | > https://github.com/FluxML/model-zoo/graphs/contributors
5 | >
6 | > Permission is hereby granted, free of charge, to any person obtaining a copy
7 | > of this software and associated documentation files (the "Software"), to deal
8 | > in the Software without restriction, including without limitation the rights
9 | > to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | > copies of the Software, and to permit persons to whom the Software is
11 | > furnished to do so, subject to the following conditions:
12 | >
13 | > The above copyright notice and this permission notice shall be included in all
14 | > copies or substantial portions of the Software.
15 | >
16 | > THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | > IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | > FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | > AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | > LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | > OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | > SOFTWARE.
23 | >
24 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | # Flux Model Zoo
4 |
5 | This repository contains various demonstrations of the [Flux](http://fluxml.github.io/) machine learning library. Any of these may freely be used as a starting point for your own models.
6 |
7 | The models are broadly categorised into the folders [vision](/vision) (e.g. large convolutional neural networks (CNNs)), [text](/text) (e.g. various recurrent neural networks (RNNs) and natural language processing (NLP) models), [games](/contrib/games) (Reinforcement Learning / RL). See the READMEs of respective models for more information.
8 |
9 | ## Usage
10 |
11 | Each model comes with its own [Julia project](https://pkgdocs.julialang.org/v1/environments/#Using-someone-else's-project). To use this, open Julia in the project folder, and enter
12 |
13 | ```julia
14 | using Pkg; Pkg.activate("."); Pkg.instantiate()
15 | ```
16 |
17 | This will install all needed packages, at the exact versions when the model was last updated. Then you can run the model code with `include(".jl")`, or by running the model script line-by-line.
18 |
19 | Models may also be run with NVIDIA GPU support, if you have a CUDA installed. Most models will have this capability by default, pointed at by calls to `gpu` in the model code.
20 |
21 | ### Gitpod Online IDE
22 |
23 | Each model can be used in [Gitpod](https://www.gitpod.io/), just [open the repository by gitpod](https://gitpod.io/#https://github.com/FluxML/model-zoo)
24 |
25 | * Based on [Gitpod's policies](https://www.gitpod.io/pricing/), free access is limited.
26 | * All of your work will place in the Gitpod's cloud.
27 | * It isn't an officially maintained feature.
28 |
29 | ## Contributing
30 |
31 | We welcome contributions of new models and documentation.
32 |
33 | ### Share a new model
34 |
35 | If you want to share a new model, we suggest you follow these guidelines:
36 |
37 | * Models should be in a folder with a project and manifest file to pin all relevant packages.
38 | * Models should include a README(.md) to explain what the model is about, how to run it, and what results it achieves (if applicable).
39 | * Models should ideally be CPU/GPU agnostic and not depend directly on GPU functionality.
40 | * Please keep the code short, clean, and self-explanatory, with as little boilerplate as possible.
41 |
42 | ### Create or improve documentation
43 |
44 | You can contribute in one of the following ways
45 |
46 | * Add or improve documentation to existing models: Write the following information:
47 | * Give a brief introduction to the model’s architecture and the goal it archives.
48 | * Describe the Flux API that the model demonstrates (high-level API, AD, custom operations, custom layers, etc.).
49 | * Add literature background for the model. More specifically, add articles, blog posts, videos, and any other resource that is helpful to better understand the model.
50 | * Mention the technique that is being demonstrated. Briefly describe the learning technique being demonstrated (Computer vision, regression, NLP, time series, etc.).
51 | * Write in-depth tutorials for a model: You can further extend the documentation of a model and create a tutorial to explain in more detail the architecture, the training routine, use your own data, and so forth. After you write a tutorial, create a PR with it for the [Tutorials](https://fluxml.ai/tutorials/) section on the [FluxML](https://fluxml.ai/) website.
52 |
53 | ### Update a model
54 |
55 | Each example lists the version of Flux for which it was most recently updated.
56 | Bringing them up to the latest is a great way to learn!
57 | Flux has a [NEWS page](https://github.com/FluxML/Flux.jl/blob/master/NEWS.md) listing important changes.
58 | (For other packages, see their releses page: [MLUtils](https://github.com/JuliaML/MLUtils.jl/releases), [MLDatasets](https://github.com/JuliaML/MLDatasets.jl/releases), etc.)
59 |
60 | To run the old examples, Flux v0.11 can be installed and run on [Julia 1.6, the LTS version](https://julialang.org/downloads/#long_term_support_release).
61 | Flux v0.12 works on Julia 1.8.
62 | Flux v0.14 is the latest right now, this and v0.13 are marked with ☀️; models upgraded to use
63 | explicit gradients (v0.13.9+ or v0.14) have a `+`.
64 |
65 | ## Examples in the Model Zoo
66 |
67 | **Vision**
68 | * MNIST
69 | * [Simple multi-layer perceptron](vision/mlp_mnist) ☀️ v0.13 +
70 | * [Simple ConvNet (LeNet)](vision/conv_mnist) ☀️ v0.14
71 | * [Variational Auto-Encoder](vision/vae_mnist) ☀️ v0.13 +
72 | * [Deep Convolutional Generative Adversarial Networks](vision/dcgan_mnist) ☀️ v0.13 +
73 | * [Conditional Deep Convolutional Generative Adversarial Networks](vision/cdcgan_mnist) ☀️ v0.13
74 | * [Score-Based Generative Modeling (Diffusion Model)](vision/diffusion_mnist) ☀️ v0.13
75 | * [Spatial Transformer](vision/spatial_transformer) ☀️ v0.13 +
76 | * CIFAR10
77 | * [VGG 16/19](vision/vgg_cifar10) ☀️ v0.13 +
78 | * [ConvMixer "Patches are all you need?"](vision/convmixer_cifar10/) ☀️ v0.13
79 |
80 | **Text**
81 | * [CharRNN](text/char-rnn) ☀️ v0.13 +
82 | * [NanoGPT](text/nanogpt) ☀️ v0.14
83 | * [Character-level language detection](text/lang-detection) ☀️ v0.13 +
84 | * [Seq2Seq phoneme detection on CMUDict](text/phonemes) ⛅️ v0.11
85 | * [Recursive net on IMDB sentiment treebank](text/treebank) ⛅️ v0.11
86 |
87 | **Other** & contributed models
88 | * [Logistic Regression Iris](other/iris/iris.jl) ☀️ v0.13 +
89 | * [Autoregressive Model](other/autoregressive-process/) ☀️ v0.13 +
90 | * [BitString Parity Challenge](other/bitstring-parity) ⛅️ v0.11
91 | * [MLP on housing data](other/housing/) (low level API) ⛅️ v0.11
92 | * [FizzBuzz](other/fizzbuzz/fizzbuzz.jl) ☀️ v0.13 +
93 | * [Meta-Learning](contrib/meta-learning/MetaLearning.jl) ❄️ v0.7
94 | * [Speech recognition](contrib/audio/speech-blstm) ❄️ v0.6
95 |
96 | **Tutorials**
97 | * [A 60 Minute Blitz](tutorials/60-minute-blitz/60-minute-blitz.jl) ⛅️ v0.11
98 | * [DataLoader example with image data](tutorials/dataloader) ⛅️ v0.11
99 | * [Transfer Learning](tutorials/transfer_learning/transfer_learning.jl) ☀️ v0.13 +
100 |
101 | ## Examples Elsewhere
102 |
103 | **MLJFlux** is a bridge to [MLJ.jl](https://github.com/alan-turing-institute/MLJ.jl), a package for mostly non-neural-network machine learning. They have some examples of interest, which like the model zoo's examples, each include a local Project & Manifest file:
104 |
105 | * [Iris](https://github.com/FluxML/MLJFlux.jl/tree/dev/examples/iris) ⛅️ v0.11
106 | * [Boston](https://github.com/FluxML/MLJFlux.jl/tree/dev/examples/boston) ⛅️ v0.11
107 | * [MNIST](https://github.com/FluxML/MLJFlux.jl/tree/dev/examples/mnist) ⛅️ v0.11
108 |
--------------------------------------------------------------------------------
/contrib/README.md:
--------------------------------------------------------------------------------
1 | # Model Contributions
2 |
3 | The models here have been contributed by community members to display some of the models that can be used in different fields.
4 |
5 | These models come with their own environments (the Project.toml and Manifest.toml files) so please make sure to use them while working with these models.
6 |
--------------------------------------------------------------------------------
/contrib/audio/speech-blstm/00-data.jl:
--------------------------------------------------------------------------------
1 | # 00-data.jl
2 | # Extracts audio features from TIMIT to be used in speech recognition
3 |
4 | using Flux: onehotbatch
5 | using WAV
6 | using BSON
7 |
8 | # This wookay's fork of MFCC updated to work with Julia v0.7/1.0
9 | # https://github.com/wookay/MFCC.jl
10 | using MFCC
11 |
12 | # Define constants that will be used
13 | const TRAINING_DATA_DIR = "TIMIT/TRAIN"
14 | const TEST_DATA_DIR = "TIMIT/TEST"
15 |
16 | const TRAINING_OUT_DIR = "train"
17 | const TEST_OUT_DIR = "test"
18 |
19 | # Make dictionary to map from phones to class numbers
20 | const PHONES = split("h# q eh dx iy r ey ix tcl sh ow z s hh aw m t er l w aa hv ae dcl y axr d kcl k ux ng gcl g ao epi ih p ay v n f jh ax en oy dh pcl ah bcl el zh uw pau b uh th ax-h em ch nx eng")
21 | translations = Dict(phone=>i for (i, phone) in enumerate(PHONES))
22 | translations["sil"] = translations["h#"]
23 | const PHONE_TRANSLATIONS = translations
24 |
25 | # Make dictionary to perform class folding
26 | const FOLDINGS = Dict(
27 | "ao" => "aa",
28 | "ax" => "ah",
29 | "ax-h" => "ah",
30 | "axr" => "er",
31 | "hv" => "hh",
32 | "ix" => "ih",
33 | "el" => "l",
34 | "em" => "m",
35 | "en" => "n",
36 | "nx" => "n",
37 | "eng" => "ng",
38 | "zh" => "sh",
39 | "pcl" => "sil",
40 | "tcl" => "sil",
41 | "kcl" => "sil",
42 | "bcl" => "sil",
43 | "dcl" => "sil",
44 | "gcl" => "sil",
45 | "h#" => "sil",
46 | "pau" => "sil",
47 | "epi" => "sil",
48 | "ux" => "uw"
49 | )
50 |
51 | FRAME_LENGTH = 0.025 # ms
52 | FRAME_INTERVAL = 0.010 # ms
53 |
54 | """
55 | makeFeatures(wavFname, phnFname)
56 |
57 | Extracts Mel filterbanks and associated labels from `wavFname` and `phnFaname`.
58 | """
59 | function makeFeatures(phnFname, wavFname)
60 | samps, sr = wavread(wavFname)
61 | samps = vec(samps)
62 |
63 | mfccs, _, _ = mfcc(samps, sr, :rasta; wintime=FRAME_LENGTH, steptime=FRAME_INTERVAL)
64 |
65 | local lines
66 | open(phnFname, "r") do f
67 | lines = readlines(f)
68 | end
69 |
70 | boundaries = Vector()
71 | labels = Vector()
72 |
73 | # first field in the file is the beginning sample number, which isn't
74 | # needed for calculating where the labels are
75 | for line in lines
76 | _, boundary, label = split(line)
77 | boundary = parse(Int64, boundary)
78 | push!(boundaries, boundary)
79 | push!(labels, label)
80 | end
81 |
82 | labelInfo = collect(zip(boundaries, labels))
83 | labelInfoIdx = 1
84 | boundary, label = labelInfo[labelInfoIdx]
85 | nSegments = length(labelInfo)
86 |
87 | frameLengthSamples = FRAME_LENGTH * sr
88 | frameIntervalSamples = FRAME_INTERVAL * sr
89 | halfFrameLength = FRAME_LENGTH / 2
90 |
91 | # Begin generating sequence labels by looping through the MFCC
92 | # frames
93 |
94 | labelSequence = Vector() # Holds the sequence of labels
95 |
96 | idxsToDelete = Vector() # To store indices for frames labeled as 'q'
97 | for i=1:size(mfccs, 1)
98 | win_end = frameLengthSamples + (i-1)*frameIntervalSamples
99 |
100 | # Move on to next label if current frame of samples is more than half
101 | # way into next labeled section and there are still more labels to
102 | # iterate through
103 | if labelInfoIdx < nSegments && win_end - boundary > halfFrameLength
104 |
105 | labelInfoIdx += 1
106 | boundary, label = labelInfo[labelInfoIdx]
107 | end
108 |
109 | if label == "q"
110 | push!(idxsToDelete, i)
111 | continue
112 | end
113 |
114 | push!(labelSequence, label)
115 | end
116 |
117 | # Remove the frames that were labeld as 'q'
118 | mfccs = mfccs[[i for i in 1:size(mfccs,1) if !(i in Set(idxsToDelete))],:]
119 |
120 | mfccDeltas = deltas(mfccs, 2)
121 | features = hcat(mfccs, mfccDeltas)
122 | return (features, labelSequence)
123 | end
124 |
125 | """
126 | createData(data_dir, out_dir)
127 |
128 | Extracts data from files in `data_dir` and saves results in `out_dir`.
129 | """
130 | function createData(data_dir, out_dir)
131 |
132 | ! isdir(out_dir) && mkdir(out_dir)
133 |
134 | for (root, dirs, files) in walkdir(data_dir)
135 |
136 | # Exclude the files that are part of the speaker accent readings
137 | files = [x for x in files if ! occursin("SA", x)]
138 |
139 | phnFnames = [x for x in files if occursin("PHN", x)]
140 | wavFnames = [x for x in files if occursin("WAV", x)]
141 |
142 | one_dir_up = basename(root)
143 | print("$(root)\r")
144 |
145 | for (wavFname, phnFname) in zip(wavFnames, phnFnames)
146 | phn_path = joinpath(root, phnFname)
147 | wav_path = joinpath(root, wavFname)
148 |
149 | x, y = makeFeatures(phn_path, wav_path)
150 |
151 | # Generate class nums; there are 61 total classes, but only 39 are
152 | # used after folding.
153 | y = [PHONE_TRANSLATIONS[x] for x in y]
154 | class_nums = [n for n in 1:61]
155 | y = onehotbatch(y, class_nums)
156 |
157 | base, _ = splitext(phnFname)
158 | dat_name = one_dir_up * base * ".bson"
159 | dat_path = joinpath(out_dir, dat_name)
160 | BSON.@save dat_path x y
161 | end
162 | end
163 | println()
164 | end
165 |
166 | createData(TRAINING_DATA_DIR, TRAINING_OUT_DIR)
167 | createData(TEST_DATA_DIR, TEST_OUT_DIR)
168 |
--------------------------------------------------------------------------------
/contrib/audio/speech-blstm/01-speech-blstm.jl:
--------------------------------------------------------------------------------
1 | # 01-speech-blstm.jl
2 | #
3 | # See Graves & Schmidhuber ([Graves, A., &
4 | # Schmidhuber, J. (2005). Framewise phoneme classification with
5 | # bidirectional LSTM and other neural network architectures. Neural
6 | # Networks, 18(5-6), 602-610.]).
7 |
8 | using Flux
9 | using Flux: crossentropy, softmax, flip, sigmoid, LSTM, @epochs
10 | using BSON
11 | using Random
12 |
13 | # Paths to the training and test data directories
14 | const TRAINDIR = "train"
15 | const TESTDIR = "test"
16 | const EPOCHS = 20
17 |
18 | # Component layers of the bidirectional LSTM layer
19 | forward = LSTM(26, 93)
20 | backward = LSTM(26, 93)
21 | output = Dense(186, 61)
22 |
23 | """
24 | BLSTM(x)
25 |
26 | BLSTM layer using above LSTM layers
27 |
28 | # Parameters
29 | * **x** A 2-tuple containing the forward and backward time samples;
30 | the first is from processing the sequence forward, and the second
31 | is from processing it backward
32 |
33 | # Returns
34 | * The concatenation of the forward and backward LSTM predictions
35 | """
36 | BLSTM(x) = vcat.(forward.(x), flip(backward, x))
37 |
38 | """
39 | model(x)
40 |
41 | The chain of functions representing the trained model.
42 |
43 | # Parameters
44 | * **x** The utterance that the model should process
45 |
46 | # Returns
47 | * The model's predictions for each time step in `x`
48 | """
49 | model(x) = softmax.(output.(BLSTM(x)))
50 |
51 | """
52 | loss(x, y)
53 |
54 | Calculates the categorical cross-entropy loss for an utterance
55 |
56 | # Parameters
57 | * **x** Iterable containing the frames to classify
58 | * **y** Iterable containing the labels corresponding to the frames
59 | in `x`
60 |
61 | # Returns
62 | * The calculated loss value
63 |
64 | # Side-effects
65 | * Resets the state in the BLSTM layer
66 | """
67 | function loss(x, y)
68 | l = sum(crossentropy.(model(x), y))
69 | Flux.reset!((forward, backward))
70 | return l
71 | end
72 |
73 | """
74 | readData(dataDir)
75 |
76 | Reads in the data contained in a specified directory
77 |
78 | # Parameters
79 | * **dataDir** String of the path to the directory containing the data
80 |
81 | # Return
82 | * **Xs** Vector where each element is a vector of the frames for
83 | one utterance
84 | * **Ys** A vector where each element is a vector of the labels for
85 | the frames for one utterance
86 | """
87 | function readData(dataDir)
88 | fnames = readdir(dataDir)
89 |
90 | Xs = Vector()
91 | Ys = Vector()
92 |
93 | for (i, fname) in enumerate(fnames)
94 | print(string(i) * "/" * string(length(fnames)) * "\r")
95 | BSON.@load joinpath(dataDir, fname) x y
96 | x = [x[i,:] for i in 1:size(x,1)]
97 | y = [y[:,i] for i in 1:size(y,2)]
98 | push!(Xs, x)
99 | push!(Ys, y)
100 | end
101 |
102 | return (Xs, Ys)
103 | end
104 |
105 | """
106 | evaluateAccuracy(data)
107 |
108 | Evaluates the accuracy of the model on a set of data; can be used
109 | either for validation or test accuracy
110 |
111 | # Parameters
112 | * **data** An iterable of paired values where the first element is
113 | all the frames for a single utterance, and the second is the
114 | associated frame labels to compare the model's predictions against
115 |
116 | # Returns
117 | * The predicted accuracy value as a proportion of the number of
118 | correct predictions over the total number of predictions made
119 | """
120 | function evaluateAccuracy(data)
121 | correct = Vector()
122 | for (x, y) in data
123 | y = argmax.(y)
124 | ŷ = argmax.(model(x))
125 | Flux.reset!((forward, backward))
126 | append!(correct, [ŷ_n == y_n for (ŷ_n, y_n) in zip(ŷ, y)])
127 | end
128 | sum(correct) / length(correct)
129 | end
130 |
131 | function main()
132 |
133 | println("Loading files")
134 | Xs, Ys = readData(TRAINDIR)
135 | data = collect(zip(Xs, Ys))
136 |
137 | valData = data[1:184]
138 | data = data[185:end]
139 |
140 | # Begin training
141 | println("Beginning training")
142 |
143 | opt = Momentum(params((forward, backward, output)), 10.0^-5; ρ=0.9)
144 |
145 | i = 0
146 |
147 | @epochs EPOCHS begin
148 |
149 | i += 1
150 |
151 | shuffle!(data)
152 | valData = valData[shuffle(1:length(valData))]
153 |
154 | Flux.train!(loss, data, opt)
155 |
156 | BSON.@save "model_epoch$(i).bson" forward backward output
157 |
158 | print("Validating\r")
159 | val_acc = evaluateAccuracy(valData)
160 | println("Val acc. " * string(val_acc))
161 | println()
162 | end
163 |
164 | # Clean up some memory
165 | valData = nothing
166 | data = nothing
167 | Xs = nothing
168 | Ys = nothing
169 | GC.gc()
170 |
171 | # Test model
172 | print("Testing\r")
173 | Xs_test, Ys_test = readData(TESTDIR)
174 | test_data = collect(zip(Xs_test, Ys_test))
175 | test_acc = evaluateAccuracy(test_data)
176 | println("Test acc. " * string(test_acc))
177 | println()
178 | end
179 |
180 | main()
181 |
--------------------------------------------------------------------------------
/contrib/audio/speech-blstm/Project.toml:
--------------------------------------------------------------------------------
1 | name = "FramewiseSpeechNetwork"
2 | author = ["Matthew C. Kelley"]
3 |
4 | [deps]
5 | BSON = "fbb218c0-5317-5bc6-957e-2ee96dd4b1f0"
6 | Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
7 | MFCC = "ca7b5df7-6146-5dcc-89ec-36256279a339"
8 | Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
9 | WAV = "8149f6b0-98f6-5db9-b78f-408fbbb8ef88"
10 |
--------------------------------------------------------------------------------
/contrib/audio/speech-blstm/README.md:
--------------------------------------------------------------------------------
1 | This model is an implementation of the neural network for speech recognition described in Graves & Schmidhuber (2005). It takes in frames of frequency information derived from the waveform, and it predicts which phone class the frame belongs to, among a reduced set of English phones. The training is run using the [TIMIT data set (Garofolo et al., 1993)](https://catalog.ldc.upenn.edu/LDC93S1).
2 |
3 | # How to use these scripts
4 |
5 | This implementation is broken down into two separate scripts. The first, `00-data.jl`, extracts the appropriate speech features from the data in TIMIT and saves them to file. It assumes that you have the TIMIT speech corpus extracted, [converted into RIFF WAV file format](https://web.archive.org/web/20180528013655/https://stackoverflow.com/questions/47370167/change-huge-amount-of-data-from-nist-to-riff-wav-file), and in the same directory as the script itself. It takes no arguments, and is run
6 |
7 | ```bash
8 | julia 00-data.jl
9 | ```
10 |
11 | It will print out which directory it is working on as it goes so you can track the progress as it extracts the training and testing data.
12 |
13 | The second script, `01-speech-blstm.jl`, trains the network. It loads in the speech data extracted from `00-data.jl` and runs it through the network for 20 epochs, which is on average how long Graves & Schmidhuber needed to train the network for. (The number of epochs can be changed by modifying the value of the `EPOCHS` variable in the script.) The script is run as
14 |
15 | ```bash
16 | julia 01-speech-blstm.jl
17 | ```
18 |
19 | At the end of each epoch, the script prints out the validation accuracy and saves a BSON file with the model's current weights. After running through all the epochs, the script prints out the testing accuracy on the default holdout test set.
20 |
21 | # Using a trained model
22 |
23 | It is simple to use the model once it's been trained. Simply load in the model from the BSON file, and use the `model(x)` function from `01-speech-blstm.jl` on some data prepared using the same procedure as in `00-data.jl`. The phoneme class numbers can be determined by using `argmax`. The `Flux` and `BSON` packages will need to be loaded in beforehand.
24 |
25 | ```julia
26 | using Flux, BSON
27 | using Flux: flip, softmax
28 | BSON.@load "model_epoch20.bson" forward backward output
29 | BLSTM(x) = vcat.(forward.(x), flip(backward, x))
30 | model(x) = softmax.(output.(BLSTM(x)))
31 | ŷ = model(x) # where x is utterance you want to be transcribed
32 | phonemes = argmax.(ŷ)
33 | ```
34 |
35 | # References
36 |
37 | Garofalo, J. S., Lamel, L. F., Fisher, W. M., Fiscus, J. G., Pallett, D. S., & Dahlgren, N. L. (1993). The DARPA TIMIT acoustic-phonetic continuous speech corpus cdrom. Linguistic Data Consortium.
38 |
39 | Graves, A., & Schmidhuber, J. (2005). Framewise phoneme classification with bidirectional LSTM and other neural network architectures. *Neural Networks, 18*(5-6), 602-610.
40 |
--------------------------------------------------------------------------------
/contrib/audio/speech-blstm/TIMIT/README.md:
--------------------------------------------------------------------------------
1 | This is the folder where the TIMIT data should be placed after downloading it from the [Linguistic Data Consortium](https://www.ldc.upenn.edu/). It is not included in this repository for copyright and, secondarily, space restrictions.
2 |
--------------------------------------------------------------------------------
/contrib/audio/speech-blstm/test/README.md:
--------------------------------------------------------------------------------
1 | This is the folder where the TIMIT data should be placed after downloading it from the [Linguistic Data Consortium](https://www.ldc.upenn.edu/). It is not included in this repository for copyright and, secondarily, space restrictions.
2 |
--------------------------------------------------------------------------------
/contrib/audio/speech-blstm/train/README.md:
--------------------------------------------------------------------------------
1 | This is the folder where the TIMIT data should be placed after downloading it from the [Linguistic Data Consortium](https://www.ldc.upenn.edu/). It is not included in this repository for copyright and, secondarily, space restrictions.
2 |
--------------------------------------------------------------------------------
/contrib/games/differentiable-programming/cartpole/DQN.jl:
--------------------------------------------------------------------------------
1 | using Flux, Gym, Printf, Zygote
2 | using Statistics: mean
3 | using DataStructures: CircularBuffer
4 | using Distributions: sample
5 | #using CuArrays
6 |
7 | # Load game environment
8 | env = make("CartPole-v0")
9 | reset!(env)
10 |
11 | # ----------------------------- Parameters -------------------------------------
12 |
13 | STATE_SIZE = length(state(env)) # 4
14 | ACTION_SIZE = length(env._env.action_space) # 2
15 | MEM_SIZE = 100_000
16 | BATCH_SIZE = 64
17 | γ = 1f0 # discount rate
18 |
19 | # Exploration params
20 | ϵ = 1f0 # Initial exploration rate
21 | ϵ_MIN = 1f-2 # Final exploratin rate
22 | ϵ_DECAY = 995f-3
23 |
24 | # Optimiser params
25 | η = 1f-2 # Learning rate
26 | η_decay = 1f-3
27 |
28 | memory = CircularBuffer{Any}(MEM_SIZE) # Used to remember past results
29 |
30 | # ------------------------------ Model Architecture ----------------------------
31 |
32 | model = Chain(Dense(STATE_SIZE, 24, tanh),
33 | Dense(24, 48, tanh),
34 | Dense(48, ACTION_SIZE)) |> gpu
35 |
36 | loss(x, y) = Flux.mse(model(x), y)
37 |
38 | opt = Flux.Optimiser(ADAM(η), InvDecay(η_decay))
39 |
40 | # ----------------------------- Helper Functions -------------------------------
41 |
42 | get_ϵ(e) = max(ϵ_MIN, min(ϵ, 1f0 - log10(e * ϵ_DECAY)))
43 |
44 | remember(state, action, reward, next_state, done) =
45 | push!(memory, (state, action, reward, next_state, done))
46 |
47 | function action(state, train=true)
48 | train && rand() ≤ get_ϵ(e) && (return Gym.sample(env._env.action_space))
49 | act_values = model(state |> gpu)
50 | return Flux.onecold(act_values)
51 | end
52 |
53 | function replay()
54 | global ϵ
55 | batch_size = min(BATCH_SIZE, length(memory))
56 | minibatch = sample(memory, batch_size, replace = false)
57 |
58 | x = []
59 | y = []
60 | for (iter, (state, action, reward, next_state, done)) in enumerate(minibatch)
61 | target = reward
62 | if !done
63 | target += γ * maximum(model(next_state |> gpu))
64 | end
65 |
66 | target_f = model(state |> gpu)
67 | target_f[action] = target
68 |
69 | push!(x, state)
70 | push!(y, target_f)
71 | end
72 | x = hcat(x...) |> gpu
73 | y = hcat(y...) |> gpu
74 |
75 | grads = Zygote.gradient(()->loss(x, y), params(model))
76 | Flux.Optimise.update!(opt, params(model), grads)
77 |
78 | ϵ *= ϵ > ϵ_MIN ? ϵ_DECAY : 1.0f0
79 | end
80 |
81 | function episode!(env)
82 | reset!(env)
83 | while !game_over(env)
84 | #render(env)
85 | s = state(env)
86 | a = action(s, trainable(env))
87 | s′, r, done, _ = step!(env, a)
88 | trainable(env) && remember(s, a, r, s′, done)
89 | end
90 |
91 | env.total_reward
92 | end
93 |
94 | # -------------------------------- Testing -------------------------------------
95 |
96 | function test(env::EnvWrapper)
97 | score_mean = 0f0
98 | testmode!(env)
99 | for _=1:100
100 | total_reward = episode!(env)
101 | score_mean += total_reward / 100
102 | end
103 | testmode!(env, false)
104 | return score_mean
105 | end
106 |
107 | # ------------------------------ Training --------------------------------------
108 |
109 | e = 1
110 | while true
111 | global e
112 | total_reward = @sprintf "%6.2f" episode!(env)
113 | print("Episode: $e | Score: $total_reward | ")
114 | replay()
115 |
116 | score_mean = test(env)
117 | score_mean_str = @sprintf "%6.2f" score_mean
118 | print("Mean score over 100 test episodes: " * score_mean_str)
119 |
120 | println()
121 |
122 | if score_mean > env.reward_threshold
123 | println("CartPole-v0 solved!")
124 | break
125 | end
126 | e += 1
127 | end
128 |
--------------------------------------------------------------------------------
/contrib/games/differentiable-programming/cartpole/DiffRL.jl:
--------------------------------------------------------------------------------
1 | using Flux, Gym, Printf, Zygote
2 | using Zygote: @adjoint
3 | using Flux.Optimise: update!
4 | using Statistics: mean
5 | #using CuArrays
6 |
7 | import Base.sign
8 |
9 | #Load game environment
10 |
11 | env = make("CartPole-v0")
12 | reset!(env)
13 |
14 | #ctx = Ctx(env)
15 |
16 | #display(ctx.s)
17 | #using Blink# when not on Juno
18 | #body!(Blink.Window(), ctx.s)
19 |
20 | # ----------------------------- Parameters -------------------------------------
21 |
22 | STATE_SIZE = length(env._env.state)
23 | ACTION_SIZE = length(env._env.action_space)
24 | MAX_TRAIN_REWARD = env._env.x_threshold * env._env.θ_threshold_radians
25 | SEQ_LEN = 8
26 |
27 | # Optimiser params
28 | η = 3f-2
29 | # ------------------------------ Model Architecture ----------------------------
30 | sign(x) = Base.sign.(x)
31 | @adjoint sign(x) = sign(x), x̄ -> (x̄,)
32 |
33 | model = Chain(Dense(STATE_SIZE, 24, relu),
34 | Dense(24, 48, relu),
35 | Dense(48, 1, tanh), x->sign(x)) |> gpu
36 |
37 | opt = ADAM(η)
38 |
39 | action(state) = state |> model |> (model_output) -> (3 .+ model_output) / 2
40 |
41 | loss(rewards) = Flux.mse(rewards, MAX_TRAIN_REWARD)
42 |
43 | # ----------------------------- Helper Functions -------------------------------
44 |
45 | function train_reward(env::EnvWrapper)
46 | s = env._env.state
47 | x, ẋ, θ, θ̇ = s
48 | # Custom reward for training
49 | # Product of Triangular function over x-axis and θ-axis
50 | # Min reward = 0, Max reward = env.x_threshold * env.θ_threshold_radians
51 | x_upper = env._env.x_threshold - x
52 | x_lower = env._env.x_threshold + x
53 |
54 | r_x = max(0f0, min(x_upper, x_lower))
55 |
56 | θ_upper = env._env.θ_threshold_radians - θ
57 | θ_lower = env._env.θ_threshold_radians + θ
58 |
59 | r_θ = max(0f0, min(θ_upper, θ_lower))
60 |
61 | return r_x * r_θ
62 | end
63 |
64 | function μEpisode(env::EnvWrapper)
65 | l = 0
66 | for frames ∈ 1:SEQ_LEN
67 | #render(env, ctx)
68 | #sleep(0.01)
69 | a = action(env._env.state)
70 | s′, r, done, _ = step!(env, a)
71 |
72 | if trainable(env)
73 | l += loss(train_reward(env))
74 | end
75 | game_over(env) && break
76 | end
77 | return l
78 | end
79 |
80 | function episode!(env::EnvWrapper)
81 | reset!(env)
82 | while !game_over(env)
83 | if trainable(env)
84 | grads = gradient(()->μEpisode(env), params(model))
85 | update!(opt, params(model), grads)
86 | else
87 | μEpisode(env)
88 | end
89 | end
90 | env.total_reward
91 | end
92 |
93 | # -------------------------------- Testing -------------------------------------
94 |
95 | function test(env::EnvWrapper)
96 | score_mean = 0f0
97 | testmode!(env)
98 | for _=1:100
99 | total_reward = episode!(env)
100 | score_mean += total_reward / 100
101 | end
102 | testmode!(env, false)
103 | return score_mean
104 | end
105 |
106 | # ------------------------------ Training --------------------------------------
107 |
108 | e = 1
109 | while true
110 | global e
111 | total_reward = @sprintf "%6.2f" episode!(env)
112 | print("Episode: $e | Score: $total_reward | ")
113 |
114 | score_mean = test(env)
115 | score_mean_str = @sprintf "%6.2f" score_mean
116 | print("Mean score over 100 test episodes: " * score_mean_str)
117 |
118 | println()
119 |
120 | if score_mean > env.reward_threshold
121 | println("CartPole-v0 solved!")
122 | break
123 | end
124 | e += 1
125 | end
126 |
--------------------------------------------------------------------------------
/contrib/games/differentiable-programming/cartpole/Project.toml:
--------------------------------------------------------------------------------
1 | [deps]
2 | BSON = "fbb218c0-5317-5bc6-957e-2ee96dd4b1f0"
3 | DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
4 | Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
5 | Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
6 | Gym = "56b9baea-2481-11e9-37ae-75904354ad8c"
7 | IRTools = "7869d1d1-7146-5819-86e3-90919afe41df"
8 | Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
9 | Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
10 | Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
11 | Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
12 |
--------------------------------------------------------------------------------
/contrib/games/differentiable-programming/cartpole/cuda/Project.toml:
--------------------------------------------------------------------------------
1 | [deps]
2 | BSON = "fbb218c0-5317-5bc6-957e-2ee96dd4b1f0"
3 | CuArrays = "3a865a2d-5b23-5a0f-bc46-62713ec82fae"
4 | DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
5 | Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
6 | Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
7 | Gym = "56b9baea-2481-11e9-37ae-75904354ad8c"
8 | Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
9 | Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
10 | Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
11 |
--------------------------------------------------------------------------------
/contrib/games/differentiable-programming/pendulum/DDPG.jl:
--------------------------------------------------------------------------------
1 | using Flux, Gym, Printf, Zygote
2 | using Flux.Tracker: data
3 | using Flux.Optimise: update!
4 | using Statistics: mean
5 | using DataStructures: CircularBuffer
6 | using Distributions: sample
7 |
8 | #using CuArrays
9 |
10 | #Load game environment
11 |
12 | env = make("Pendulum-v0")
13 | reset!(env)
14 |
15 | # ----------------------------- Parameters -------------------------------------
16 |
17 | STATE_SIZE = length(state(env))
18 | ACTION_SIZE = 1#length(env.actions)
19 | ACTION_BOUND = env._env.action_space.high[1]
20 | MAX_EP = 50_000
21 | MAX_EP_LENGTH = 200
22 |
23 | BATCH_SIZE = 64
24 | MEM_SIZE = 100_000
25 | MIN_EXP_SIZE = 50_000
26 |
27 | γ = 99f-2 # discount rate
28 |
29 | τ = 1f-3 # for running average while updating target networks
30 | η_act = 1f-4 # Learning rate
31 | η_crit = 1f-3
32 | L2_DECAY = 1f-2
33 |
34 | # Ornstein-Uhlenbeck Noise params
35 | μ = 0f0
36 | θ = 15f-2
37 | σ = 2f-1
38 |
39 | # --------------------------------- Memory ------------------------------------
40 |
41 | memory = CircularBuffer{Any}(MEM_SIZE)
42 |
43 | function getData(batch_size = BATCH_SIZE)
44 | # Getting data in shape
45 | minibatch = sample(memory, batch_size)
46 | x = hcat(minibatch...)
47 |
48 | s = hcat(x[1, :]...) |> gpu
49 | a = hcat(x[2, :]...) |> gpu
50 | r = hcat(x[3, :]...) |> gpu
51 | s′ = hcat(x[4, :]...) |> gpu
52 | s_mask = .!hcat(x[5, :]...) |> gpu
53 |
54 | return s, a, r, s′, s_mask
55 | end
56 |
57 | # -------------------------------- Action Noise --------------------------------
58 |
59 | struct OUNoise
60 | μ
61 | θ
62 | σ
63 | X
64 | end
65 |
66 | ou = OUNoise(μ, θ, σ, zeros(Float32, ACTION_SIZE) |> gpu)
67 |
68 | function sample_noise(ou::OUNoise)
69 | dx = ou.θ * (ou.μ .- ou.X)
70 | dx .+= ou.σ * randn(Float32, length(ou.X)) |> gpu
71 | ou.X .+= dx
72 | end
73 |
74 | # Noise scale
75 | τ_ = 25
76 | ϵ = exp(-1f0 / τ_)
77 | noise_scale = 1f0 / ACTION_BOUND
78 |
79 | # ----------------------------- Model Architecture -----------------------------
80 |
81 | w_init(dims...) = 6f-3rand(Float32, dims...) .- 3f-3
82 |
83 | actor = Chain(Dense(STATE_SIZE, 400, relu),
84 | Dense(400, 300, relu),
85 | Dense(300, ACTION_SIZE, tanh, initW=w_init),
86 | x -> x * ACTION_BOUND) |> gpu
87 | actor_target = deepcopy(actor)
88 |
89 | # Critic model
90 | struct crit
91 | state_crit
92 | act_crit
93 | sa_crit
94 | end
95 |
96 | Flux.@treelike crit
97 |
98 | function (c::crit)(state, action)
99 | s = c.state_crit(state)
100 | a = c.act_crit(action)
101 | c.sa_crit(relu.(s .+ a))
102 | end
103 |
104 | Base.deepcopy(c::crit) = crit(deepcopy(c.state_crit),
105 | deepcopy(c.act_crit),
106 | deepcopy(c.sa_crit))
107 |
108 | critic = crit(Chain(Dense(STATE_SIZE, 400, relu), Dense(400, 300)) |> gpu,
109 | Dense(ACTION_SIZE, 300) |> gpu,
110 | Dense(300, 1, initW=w_init) |> gpu)
111 | critic_target = deepcopy(critic)
112 |
113 | # ---------------------- Param Update Functions --------------------------------
114 |
115 | function update_target!(target, model; τ = 1f0)
116 | for (p_t, p_m) in zip(params(target), params(model))
117 | p_t.data .= (1f0 - τ) * p_t.data .+ τ * p_m.data
118 | end
119 | end
120 |
121 | function update_model!(model, opt, loss, inp...)
122 | grads = gradient(()->loss(inp...), params(model))
123 | update!(opt, params(model), grads)
124 | end
125 |
126 | # ---------------------------------- Training ----------------------------------
127 | ## Losses
128 | function L2_loss(model)
129 | l2_loss = sum(map(p->sum(p.^2), params(model)))
130 | return L2_DECAY * l2_loss
131 | end
132 |
133 | loss_crit(y, s, a) = Flux.mse(critic(s, a), y) #+ L2_loss(critic)
134 |
135 | function loss_act(s)
136 | actions = actor(s)
137 | crit_out = critic(s, actions)
138 | return -sum(crit_out)
139 | end
140 |
141 | ## Optimizers
142 | opt_crit = ADAM(η_crit)
143 | opt_act = ADAM(η_act)
144 |
145 |
146 | function replay()
147 | s, a, r, s′, s_mask = getData()
148 |
149 | a′ = actor_target(s′)
150 | v′ = critic_target(s′, a′)
151 | y = data(r .+ γ * v′ .* s_mask) # set v′ to 0 where s_ is terminal state
152 |
153 |
154 | update_model!(critic, opt_crit, loss_crit, y, s, a)
155 | update_model!(actor, opt_act, loss_act, s)
156 |
157 | # Update Target models
158 | update_target!(actor_target, actor; τ = τ)
159 | update_target!(critic_target, critic; τ = τ)
160 | end
161 |
162 | # ---------------------------- Helper Functions --------------------------------
163 |
164 | # Stores tuple of state, action, reward, next_state, and done
165 | remember(state, action, reward, next_state, done) =
166 | push!(memory, [data.((state, action, reward, next_state))..., done])
167 |
168 | # Choose action according to policy PendulumPolicy
169 | function action(state, train=true)
170 | state = reshape(state, size(state)..., 1)
171 | act_pred = actor(state |> gpu)
172 | if train
173 | act_pred = act_pred .+ noise_scale * sample_noise(ou)
174 | end
175 | clamp.(act_pred, -ACTION_BOUND, ACTION_BOUND) # returns action
176 | end
177 |
178 | function episode!(env::EnvWrapper)
179 | reset!(env)
180 | for ep=1:MAX_EP_LENGTH
181 | s = state(env)
182 | a = action(s, trainable(env))
183 | s′, r, done, _ = step!(env, a)
184 | if trainable(env)
185 | remember(s, a, r, s′, done)
186 | replay()
187 | end
188 | end
189 | env.total_reward
190 | end
191 |
192 | # -------------------------------- Testing -------------------------------------
193 |
194 | # Returns average score over 100 episodes
195 |
196 | function test(env::EnvWrapper)
197 | score_mean = 0f0
198 | testmode!(env)
199 | for e=1:100
200 | total_reward = episode!(env)
201 | score_mean += total_reward / 100
202 | end
203 | testmode!(env, false)
204 | return score_mean
205 | end
206 |
207 | # ------------------------------ Training --------------------------------------
208 |
209 | # Populate memory with random actions
210 |
211 | s = reset!(env)
212 | for e=1:MIN_EXP_SIZE
213 | global s
214 | a = 2rand(Float32) * ACTION_BOUND - ACTION_BOUND
215 | s′, r, done, _ = step!(env, a)
216 | remember(s, a, r, s′, done)
217 | s = s′
218 | end
219 |
220 | for e=1:MAX_EP
221 | total_reward = episode!(env)
222 | total_reward = @sprintf "%9.3f" total_reward
223 | print("Episode: $e | Score: $total_reward | ")
224 | score_mean = test(env)
225 | score_mean = @sprintf "%9.3f" score_mean
226 | println("Mean score over 100 test episodes: $score_mean")
227 | end
228 |
--------------------------------------------------------------------------------
/contrib/games/differentiable-programming/pendulum/DiffRL.jl:
--------------------------------------------------------------------------------
1 | using Flux, Gym, Printf, Zygote
2 | using Flux.Optimise: update!
3 | using Statistics: mean
4 | #using CuArrays
5 |
6 | #Load game environment
7 |
8 | env = make("Pendulum-v0")
9 | reset!(env)
10 | # ----------------------------- Parameters -------------------------------------
11 |
12 |
13 | STATE_SIZE = length(state(env)) # returns state from obs space
14 | ACTION_SIZE = 1#length(env.actions)
15 | ACTION_BOUND = env._env.action_space.high[1]
16 | MAX_REWARD = 0f0 # Max reward in a timestep
17 | MAX_EP = 10
18 | MAX_EP_LENGTH = 1000
19 | SEQ_LEN = 4
20 |
21 | # ------------------------------ Model Architecture ----------------------------
22 |
23 | model = Chain(Dense(STATE_SIZE, 24, relu),
24 | Dense(24, 48, relu),
25 | Dense(48, ACTION_SIZE)) |> gpu
26 |
27 | η = 3f-2
28 |
29 | opt = ADAM(η)
30 |
31 | loss(r) = Flux.mse(r, MAX_REWARD)
32 |
33 | # ----------------------------- Helper Functions -------------------------------
34 |
35 | function μEpisode(env::EnvWrapper)
36 | l = 0
37 | for frames ∈ 1:SEQ_LEN
38 | #render(env, ctx)
39 | #sleep(0.01)
40 | a = model(state(env))
41 | s, r, done, _ = step!(env, a)
42 | if trainable(env)
43 | l += loss(r)
44 | end
45 |
46 | game_over(env) && break
47 | end
48 | return l
49 | end
50 |
51 |
52 | function episode!(env::EnvWrapper)
53 | reset!(env)
54 | while !game_over(env)
55 | if trainable(env)
56 | grads = gradient(()->μEpisode(env), params(model))
57 | update!(opt, params(model), grads)
58 | else
59 | μEpisode(env)
60 | end
61 | end
62 |
63 | env.total_reward
64 | end
65 |
66 | # -------------------------------- Testing -------------------------------------
67 |
68 | function test(env::EnvWrapper)
69 | score_mean = 0f0
70 | testmode!(env)
71 | for e=1:100
72 | total_reward = episode!(env)
73 | score_mean += total_reward / 100
74 | end
75 | testmode!(env, false)
76 | return score_mean
77 | end
78 |
79 | # ------------------------------ Training --------------------------------------
80 |
81 | for e=1:MAX_EP
82 | total_reward = episode!(env)
83 | total_reward = @sprintf "%9.3f" total_reward
84 | print("Episode: $e | Score: $total_reward | ")
85 | score_mean = test(env)
86 | score_mean = @sprintf "%9.3f" score_mean
87 | println("Mean score over 100 test episodes: $score_mean")
88 | end
89 |
--------------------------------------------------------------------------------
/contrib/games/differentiable-programming/pendulum/Project.toml:
--------------------------------------------------------------------------------
1 | name = "pendulum"
2 | uuid = "948af14c-2d17-11e9-24e0-0541b265729a"
3 | authors = ["Tejan Karmali "]
4 | version = "0.1.0"
5 |
6 | [deps]
7 | DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
8 | Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
9 | Gym = "56b9baea-2481-11e9-37ae-75904354ad8c"
10 | IRTools = "7869d1d1-7146-5819-86e3-90919afe41df"
11 | Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
12 | Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
13 |
--------------------------------------------------------------------------------
/contrib/games/differentiable-programming/pendulum/cuda/Project.toml:
--------------------------------------------------------------------------------
1 | name = "pendulum"
2 | uuid = "948af14c-2d17-11e9-24e0-0541b265729a"
3 | authors = ["Tejan Karmali "]
4 | version = "0.1.0"
5 |
6 | [deps]
7 | CuArrays = "3a865a2d-5b23-5a0f-bc46-62713ec82fae"
8 | DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
9 | Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
10 | Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
11 | Gym = "56b9baea-2481-11e9-37ae-75904354ad8c"
12 | Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
13 |
--------------------------------------------------------------------------------
/contrib/games/differentiable-programming/trebuchet/DiffRL.jl:
--------------------------------------------------------------------------------
1 | using Flux, Trebuchet
2 | using Zygote: forwarddiff
3 | using Statistics: mean
4 | using Random
5 |
6 | lerp(x, lo, hi) = x*(hi-lo)+lo
7 |
8 | function shoot(wind, angle, weight)
9 | Trebuchet.shoot((wind, Trebuchet.deg2rad(angle), weight))[2]
10 | end
11 |
12 | shoot(ps) = forwarddiff(p -> shoot(p...), ps)
13 |
14 | Random.seed!(0)
15 |
16 | model = Chain(Dense(2, 16, σ),
17 | Dense(16, 64, σ),
18 | Dense(64, 16, σ),
19 | Dense(16, 2)) |> f64
20 |
21 | θ = params(model)
22 |
23 | function aim(wind, target)
24 | angle, weight = model([wind, target])
25 | angle = σ(angle)*90
26 | weight = weight + 200
27 | angle, weight
28 | end
29 |
30 | distance(wind, target) =
31 | shoot(collect([wind, aim(wind, target)...]))
32 |
33 | function loss(wind, target)
34 | (distance(wind, target) - target)^2
35 | end
36 |
37 | DIST = (20, 100) # Maximum target distance
38 | SPEED = 5 # Maximum wind speed
39 |
40 | target() = (randn() * SPEED, lerp(rand(), DIST...))
41 |
42 | meanloss() = mean(sqrt(loss(target()...)) for i = 1:100)
43 |
44 | opt = ADAM()
45 |
46 | dataset = (target() for i = 1:100_000)
47 | cb = Flux.throttle(() -> @show(meanloss()), 10)
48 |
49 | Flux.train!(loss, θ, dataset, opt, cb = cb)
50 |
--------------------------------------------------------------------------------
/contrib/games/differentiable-programming/trebuchet/Project.toml:
--------------------------------------------------------------------------------
1 | [deps]
2 | BSON = "fbb218c0-5317-5bc6-957e-2ee96dd4b1f0"
3 | DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
4 | Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
5 | Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
6 | Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
7 | Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
8 | Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
9 | Trebuchet = "98b73d46-197d-11e9-11eb-69a6ff759d3a"
10 | WebIO = "0f1e0344-ec1d-5b48-a673-e5cf874b6c29"
11 | Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
12 |
--------------------------------------------------------------------------------
/contrib/games/differentiable-programming/trebuchet/cuda/Project.toml:
--------------------------------------------------------------------------------
1 | [deps]
2 | BSON = "fbb218c0-5317-5bc6-957e-2ee96dd4b1f0"
3 | CuArrays = "3a865a2d-5b23-5a0f-bc46-62713ec82fae"
4 | DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
5 | Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
6 | Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
7 | Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
8 | Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
9 | Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
10 | Trebuchet = "98b73d46-197d-11e9-11eb-69a6ff759d3a"
11 | WebIO = "0f1e0344-ec1d-5b48-a673-e5cf874b6c29"
12 |
--------------------------------------------------------------------------------
/contrib/meta-learning/Project.toml:
--------------------------------------------------------------------------------
1 | [deps]
2 | Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
3 | Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
4 | Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
5 | Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
6 | Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
7 | Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
8 |
--------------------------------------------------------------------------------
/contrib/meta-learning/fomaml_grad.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/contrib/meta-learning/fomaml_grad.png
--------------------------------------------------------------------------------
/contrib/meta-learning/linear.jl:
--------------------------------------------------------------------------------
1 | function xavier_uniform(dims...)
2 | bound = sqrt(1 / dims[2])
3 | return Float32.(rand(Uniform(-bound, bound), dims...))
4 | end
5 |
6 | struct Linear{F,S,T}
7 | W::S
8 | b::T
9 | σ::F
10 | end
11 |
12 | Linear(W, b) = Linear(W, b, identity)
13 |
14 | function Linear(in::Integer, out::Integer, σ = identity;
15 | initW = xavier_uniform, initb = nothing)
16 | if initb == nothing
17 | bias_bound = 1 / sqrt(in)
18 | initb = (out) -> Float32.(rand(Uniform(-bias_bound, bias_bound), out))
19 | end
20 | return Linear(param(initW(out, in)), param(initb(out)), σ)
21 | end
22 |
23 | Flux.@treelike Linear
24 |
25 | function (a::Linear)(x::AbstractArray)
26 | W, b, σ = a.W, a.b, a.σ
27 | σ.(W*x .+ b)
28 | end
29 |
30 | function Base.show(io::IO, l::Linear)
31 | print(io, "Linear(", size(l.W, 2), ", ", size(l.W, 1))
32 | l.σ == identity || print(io, ", ", l.σ)
33 | print(io, ")")
34 | end
35 |
--------------------------------------------------------------------------------
/contrib/meta-learning/reptile_grad.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/contrib/meta-learning/reptile_grad.png
--------------------------------------------------------------------------------
/contrib/meta-learning/utils.jl:
--------------------------------------------------------------------------------
1 | """
2 | ```
3 | eval_model(model, x::AbstractArray, testx::AbstractArray, task=SineWave();
4 | opt=Descent(1e-2), updates=32)
5 | ```
6 | Evaluates the `model` on a sine wave `task` training to sample `x` with `updates`
7 | amount of gradient steps using `opt`.
8 | Evaluation loss is calculated based on the mean squared error
9 | between model predictions and sine wave values on `testx`.
10 | """
11 | function eval_model(model, x::AbstractArray, testx::AbstractArray, task=SineWave();
12 | opt=Descent(0.02), updates=32)
13 | weights = params(model)
14 | prev_weights = deepcopy(Flux.data.(weights))
15 |
16 | y = task(x)
17 | testy = task(testx)
18 | init_preds = model(testx')
19 | test_loss = Flux.mse(init_preds, testy')
20 |
21 | test_losses = Float32[]
22 | push!(test_losses, Flux.data(test_loss))
23 |
24 | print(task, "\n")
25 | @printf("Before finetuning, Loss = %f\n", test_loss)
26 | for i in 1:updates
27 | l = Flux.mse(model(x'), y')
28 | Flux.back!(l)
29 | Flux.Optimise._update_params!(opt, weights)
30 | test_loss = Flux.mse(model(testx'), testy')
31 | push!(test_losses, Flux.data(test_loss))
32 | @printf("After %d fits, Loss = %f\n", i, test_loss)
33 | end
34 | final_preds = model(testx')
35 |
36 | # reset weights to state before finetune
37 | Flux.loadparams!(model, prev_weights)
38 |
39 | return (x=x, testx=testx, y=y, testy=testy,
40 | initial_predictions=Array(Flux.data(init_preds)'),
41 | final_predictions=Array(Flux.data(final_preds)'),
42 | test_losses=test_losses)
43 | end
44 |
45 | function plot_eval_data(data::NamedTuple, title="")
46 | return plot([data.x, data.testx, data.testx, data.testx],
47 | [data.y, data.testy, data.initial_predictions, data.final_predictions],
48 | line=[:scatter :path :path :path],
49 | label=["Sampled points", "Ground truth", "Before finetune", "After finetune"],
50 | foreground_color_legend=:white, background_color_legend=:transparent,
51 | title=title,
52 | xlim=(-5.5, 5.5))
53 | end
54 |
--------------------------------------------------------------------------------
/other/autoregressive-process/Project.toml:
--------------------------------------------------------------------------------
1 | [deps]
2 | Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
3 | Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
4 | Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
5 |
6 | [compat]
7 | Flux = "0.13.9, 0.14"
8 | julia = "1.6"
9 |
--------------------------------------------------------------------------------
/other/autoregressive-process/README.md:
--------------------------------------------------------------------------------
1 | # Autoregressive Model
2 |
3 | An [autoregressive (AR) process](https://en.wikipedia.org/wiki/Autoregressive_model) is a stochastic process with an autoregressive structure, i.e., past realizations influence its future realizations.
4 |
5 | This model-zoo example illustrates how to use Flux's recurrent layers to model an AR process.
6 |
7 | The example contains the following files:
8 | + [utils.jl](utils.jl):
9 | + `generate_process`: generates an AR process
10 | + `batch_timeseries`: transforms a vector into the proper format for recurrent layers in Flux and allows to batch the time series as required.
11 |
12 | + [model.jl](model.jl): creates and trains the recurrent model to predict the generated AR process.
13 |
14 | ## Example loss
15 |
16 | Running the model with the hyperparameters currently given in the example, we obtain the following train and test losses. We see that the model begins to overfit after around 30 epochs.
17 |
18 | 
--------------------------------------------------------------------------------
/other/autoregressive-process/loss.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/other/autoregressive-process/loss.png
--------------------------------------------------------------------------------
/other/autoregressive-process/model.jl:
--------------------------------------------------------------------------------
1 | using Flux
2 | using Random
3 | using Statistics
4 | include("utils.jl")
5 |
6 | # Hyperparameters and configuration of AR process
7 | @Base.kwdef mutable struct Args
8 | seed::Int = 72 # Random seed
9 | # AR process parameters
10 | ϕ::Vector{Float32} = [.3f0, .2f0, -.5f0] # AR coefficients (=> AR(3))
11 | proclen::Int = 750 # Process length
12 | # Recurrent net parameters
13 | dev = cpu # Device: cpu or gpu
14 | opt = ADAM # Optimizer
15 | η::Float64 = 2e-3 # Learning rate
16 | hidden_nodes::Int = 64 # Number of hidden nodes
17 | hidden_layers::Int = 2 # Number of hidden layers
18 | layer = LSTM # Type of layer, should be one of LSTM, GRU, RNN
19 | epochs::Int = 100 # Number of epochs
20 | seqlen::Int = 10 # Sequence length to use as input
21 | seqshift::Int = 10 # Shift between sequences (see utils.jl)
22 | train_ratio::Float64 = .7 # Percentage of data in the train set
23 | verbose::Bool = true # Whether we log the results during training or not
24 | end
25 |
26 | # Creates a model according to the pre-defined hyperparameters `args`
27 | function build_model(args)
28 | Chain(
29 | args.layer(1, args.hidden_nodes),
30 | [args.layer(args.hidden_nodes, args.hidden_nodes) for _ ∈ 1:args.hidden_layers-1]...,
31 | Dense(args.hidden_nodes, 1, identity)
32 | ) |> args.dev
33 | end
34 |
35 | # Creates training and testing samples according to hyperparameters `args`
36 | function generate_train_test_data(args)
37 | # Generate full AR process
38 | data = generate_process(args.ϕ, args.proclen)
39 | # Create input X and output y (series shifted by 1)
40 | X, y = data[1:end-1], data[2:end]
41 | # Split data into training and testing sets
42 | idx = round(Int, args.train_ratio * length(X))
43 | Xtrain, Xtest = X[1:idx], X[idx+1:end]
44 | ytrain, ytest = y[1:idx], y[idx+1:end]
45 | # Transform data to time series batches and return
46 | map(x -> batch_timeseries(x, args.seqlen, args.seqshift) |> args.dev,
47 | (Xtrain, Xtest, ytrain, ytest))
48 | end
49 |
50 | function mse_loss(model, x, y)
51 | # Warm up recurrent model on first observation
52 | model(x[1])
53 | # Compute mean squared error loss on the rest of the sequence
54 | mean(Flux.Losses.mse.([model(xᵢ) for xᵢ ∈ x[2:end]], y[2:end]))
55 | end
56 |
57 | # Trains and outputs the model according to the chosen hyperparameters `args`
58 | function train_model(args)
59 | Random.seed!(args.seed)
60 | # Create recurrent model
61 | model = build_model(args)
62 | # Get data
63 | Xtrain, Xtest, ytrain, ytest = generate_train_test_data(args)
64 |
65 | opt = Flux.setup(args.opt(args.η), model)
66 | # Training loop
67 | for i ∈ 1:args.epochs
68 | Flux.reset!(model) # Reset hidden state of the recurrent model
69 | # Compute the gradients of the loss function
70 | (∇m,) = gradient(model) do m
71 | mse_loss(m, Xtrain, ytrain)
72 | end
73 | Flux.update!(opt, model, ∇m) # Update model parameters
74 | if args.verbose && i % 10 == 0 # Log results every 10 epochs
75 | # Compute loss on train and test set for logging (important: the model must be reset!)
76 | Flux.reset!(model)
77 | train_loss = mse_loss(model, Xtrain, ytrain)
78 | Flux.reset!(model)
79 | test_loss = mse_loss(model, Xtest, ytest)
80 | @info "Epoch $i / $(args.epochs), train loss: $(round(train_loss, digits=3)) | test loss: $(round(test_loss, digits=3))"
81 | end
82 | end
83 | return model
84 | end
85 |
86 | cd(@__DIR__)
87 |
88 | args = Args() # Set up hyperparameters
89 | m = train_model(args) # Train and output model
90 |
--------------------------------------------------------------------------------
/other/autoregressive-process/utils.jl:
--------------------------------------------------------------------------------
1 | # Generates an AR(p) process with coefficients `ϕ`.
2 | # `ϕ` should be provided as a vector and it represents the coefficients of the AR model.
3 | # Hence the order of the generated process is equal to the length of `ϕ`.
4 | # `s` indicates the total length of the series to be generated.
5 | function generate_process(ϕ::AbstractVector{Float32}, s::Int)
6 | s > 0 || error("s must be positive")
7 | # Generate white noise
8 | ϵ = randn(Float32, s)
9 | # Initialize time series
10 | X = zeros(Float32, s)
11 | p = length(ϕ)
12 | X[1] = ϵ[1]
13 | # Reverse the order of the coefficients for multiplication later on
14 | ϕ = reverse(ϕ)
15 | # Fill first p observations
16 | for t ∈ 1:p-1
17 | X[t+1] = X[1:t]'ϕ[1:t] + ϵ[t+1]
18 | end
19 | # Compute values iteratively
20 | for t ∈ p+1:s
21 | X[t] = X[t-p:t-1]'ϕ + ϵ[t]
22 | end
23 | X
24 | end
25 |
26 | # Create batches of a time series `X` by splitting the series into
27 | # sequences of length `s`. Each new sequence is shifted by `r` steps.
28 | # When s == r, the series is split into non-overlapping batches.
29 | function batch_timeseries(X, s::Int, r::Int)
30 | r > 0 || error("r must be positive")
31 | # If X is passed in format T×1, reshape it
32 | if isa(X, AbstractVector)
33 | X = permutedims(X)
34 | end
35 | T = size(X, 2)
36 | s ≤ T || error("s cannot be longer than the total series")
37 | # Ensure uniform sequence lengths by dropping the first observations until
38 | # the total sequence length matches a multiple of the batchsize
39 | X = X[:, ((T - s) % r)+1:end]
40 | [X[:, t:r:end-s+t] for t ∈ 1:s] # Output
41 | end
42 |
43 |
--------------------------------------------------------------------------------
/other/bitstring-parity/Project.toml:
--------------------------------------------------------------------------------
1 | [deps]
2 | Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
3 | Parameters = "d96e819e-fc66-5662-9728-84c9c7592b0a"
4 | Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
5 |
6 | [compat]
7 | Flux = "0.11.4"
8 | julia = "1.5"
9 |
--------------------------------------------------------------------------------
/other/bitstring-parity/README.md:
--------------------------------------------------------------------------------
1 | From https://blog.openai.com/requests-for-research-2/.
2 |
3 | ⭐ Train an LSTM to solve the XOR problem: that is, given a sequence of bits, determine its parity. The LSTM should consume the sequence, one bit at a time, and then output the correct answer at the sequence’s end. Test the two approaches below:
4 |
5 | - Generate a dataset of random 100,000 binary strings of length 50. Train the LSTM; what performance do you get?
6 | - Generate a dataset of random 100,000 binary strings, where the length of each string is independently and randomly chosen between 1 and 50. Train the LSTM. Does it succeed? What explains the difference?
7 |
8 | ## Files
9 |
10 | * [2 bit strings](./xor1.jl)
11 | * [2000 1 to 10 length strings](./xor2.jl)
12 | * [100,000 1 to 50 length strings](./xor3.jl)
13 |
--------------------------------------------------------------------------------
/other/bitstring-parity/data.jl:
--------------------------------------------------------------------------------
1 | using Flux: onehot, onehotbatch
2 | using Random
3 |
4 | const alphabet = [false, true] # 0, 1
5 |
6 | parity(x) = reduce(xor, x)
7 |
8 | gendata(n::Int, k::Int) = gendata(n, k:k)
9 |
10 | function gendata(n::Int, k::UnitRange{Int})
11 | X = bitrand.(rand(k, n))
12 | return [(onehotbatch(x, alphabet), onehot(y, alphabet)) for (x, y) in zip(X, parity.(X))]
13 | end
14 |
--------------------------------------------------------------------------------
/other/bitstring-parity/xor1.jl:
--------------------------------------------------------------------------------
1 | include("data.jl")
2 | using Flux, Statistics
3 | using Flux: onehot, onehotbatch, throttle, logitcrossentropy, reset!, onecold
4 | using Parameters: @with_kw
5 |
6 | @with_kw mutable struct Args
7 | lr::Float64 = 1e-3 # Learning rate
8 | epochs::Int = 20 # Number of epochs for training
9 | train_len::Int = 100 # Length of training data to be generated
10 | val_len::Int = 10 # Length of Validation Data
11 | throttle::Int = 10 # Throttle timeout
12 | end
13 |
14 | function getdata(args)
15 | # Using gendata function defined in data.jl
16 | train = gendata(args.train_len, 2)
17 | val = gendata(args.val_len, 2)
18 | return train, val
19 | end
20 |
21 | function build_model()
22 | scanner = LSTM(length(alphabet), 20)
23 | encoder = Dense(20, length(alphabet))
24 | return scanner, encoder
25 | end
26 |
27 | function model(x, scanner, encoder)
28 | state = scanner.(x.data)[end]
29 | reset!(scanner)
30 | encoder(state)
31 | end
32 |
33 | function train(; kws...)
34 | # Initialize the parameters
35 | args = Args(; kws...)
36 |
37 | # Load Data
38 | train_data, val_data = getdata(args)
39 |
40 | @info("Constructing Model...")
41 | scanner,encoder = build_model()
42 |
43 | loss(x, y) = logitcrossentropy(model(x, scanner, encoder), y)
44 | batch_loss(data) = mean(loss(d...) for d in data)
45 |
46 | opt = ADAM(args.lr)
47 | ps = params(scanner, encoder)
48 | evalcb = () -> @show batch_loss(val_data)
49 |
50 | @info("Training...")
51 | for i=1:args.epochs
52 | Flux.train!(loss, ps, train_data, opt, cb=throttle(evalcb, args.throttle))
53 | end
54 | return scanner, encoder
55 | end
56 |
57 | function test(scanner, encoder)
58 | # sanity test
59 | tx = map(c -> onehotbatch(c, alphabet), [
60 | [false, true], # 01 -> 1
61 | [true, false], # 10 -> 1
62 | [false, false], # 00 -> 0
63 | [true, true]]) # 11 -> 0
64 | @info("Test...")
65 | out = [onecold(model(x, scanner, encoder)) - 1 for x in tx]
66 | input = [[0,1],[1,0],[0,0],[1,1]]
67 | for i in 1:length(tx)
68 | print(input[i]," => ",out[i],"\n")
69 | end
70 | end
71 |
72 | cd(@__DIR__)
73 | scanner, encoder = train()
74 | test(scanner, encoder)
75 |
--------------------------------------------------------------------------------
/other/bitstring-parity/xor2.jl:
--------------------------------------------------------------------------------
1 | include("data.jl")
2 | using Flux, Statistics
3 | using Flux: onehot, onehotbatch, throttle, logitcrossentropy, reset!, onecold
4 | using Parameters: @with_kw
5 |
6 | @with_kw mutable struct Args
7 | lr::Float64 = 1e-3 # Learning rate
8 | epochs::Int = 20 # Number of epochs for training
9 | train_len::Int = 2000 # Length of training data to be generated
10 | val_len::Int = 100 # Length of Validation Data
11 | throttle::Int = 10 # Throttle timeout
12 | end
13 |
14 | function getdata(args)
15 | # training data of bit strings from length 2 to 10
16 | train = gendata(args.train_len, 1:10)
17 | # validation data of bit strings of length 10
18 | val = gendata(args.val_len, 10)
19 | return train, val
20 | end
21 |
22 | function build_model()
23 | scanner = LSTM(length(alphabet), 20)
24 | encoder = Dense(20, length(alphabet))
25 | return scanner, encoder
26 | end
27 |
28 | function model(x, scanner, encoder)
29 | state = scanner.(x.data)[end]
30 | reset!(scanner)
31 | encoder(state)
32 | end
33 |
34 | function train(; kws...)
35 | # Initialize the parameters
36 | args = Args(; kws...)
37 |
38 | # Load Data
39 | train_data, val_data = getdata(args)
40 |
41 | @info("Constructing Model...")
42 | scanner,encoder = build_model()
43 |
44 | loss(x, y) = logitcrossentropy(model(x, scanner, encoder), y)
45 | batch_loss(data) = mean(loss(d...) for d in data)
46 |
47 | opt = ADAM(args.lr)
48 | ps = params(scanner, encoder)
49 | evalcb = () -> @show batch_loss(val_data)
50 |
51 | @info("Training...")
52 | for i=1:args.epochs
53 | Flux.train!(loss, ps, train_data, opt, cb=throttle(evalcb, args.throttle))
54 | end
55 |
56 | # Try running the model on strings of length 50.
57 | #
58 | # Even though the model has only been trained with
59 | # much shorter strings, it has learned the
60 | # parity function and will accurate on longer strings.
61 | function t50()
62 | l = batch_loss(gendata(1000, 50))
63 | println("Batch_loss for length 50 string: ", l,"\n")
64 | end
65 | t50()
66 | return scanner, encoder
67 | end
68 |
69 | function test(scanner, encoder)
70 | # sanity test
71 | tx = map(c -> onehotbatch(c, alphabet), [
72 | [false, true], # 01 -> 1
73 | [true, false], # 10 -> 1
74 | [false, false], # 00 -> 0
75 | [true, true]]) # 11 -> 0
76 | @info("Test...")
77 | out = [onecold(model(x, scanner, encoder)) - 1 for x in tx]
78 | input = [[0,1],[1,0],[0,0],[1,1]]
79 | for i in 1:length(tx)
80 | print(input[i]," => ",out[i],"\n")
81 | end
82 | end
83 |
84 | cd(@__DIR__)
85 | scanner, encoder = train()
86 | test(scanner, encoder)
87 |
--------------------------------------------------------------------------------
/other/bitstring-parity/xor3.jl:
--------------------------------------------------------------------------------
1 | include("data.jl")
2 | using Flux, Statistics
3 | using Flux: onehot, onehotbatch, throttle, logitcrossentropy, reset!, onecold
4 | using Parameters: @with_kw
5 |
6 | @with_kw mutable struct Args
7 | lr::Float64 = 1e-3 # Learning rate
8 | epochs::Int = 20 # Number of epochs for training
9 | train_len::Int = 100000 # Length of training data to be generated
10 | val_len::Int = 1000 # Length of Validation Data
11 | throttle::Int = 10 # Throttle timeout
12 | end
13 |
14 | function getdata(args)
15 | # training data of bit strings from length 2 to 50
16 | train = gendata(args.train_len, 1:50)
17 | # validation data of bit strings of length 50
18 | val = gendata(args.val_len, 50)
19 | return train, val
20 | end
21 |
22 | function build_model()
23 | scanner = LSTM(length(alphabet), 20)
24 | encoder = Dense(20, length(alphabet))
25 | return scanner, encoder
26 | end
27 |
28 | function model(x, scanner, encoder)
29 | state = scanner.(x.data)[end]
30 | reset!(scanner)
31 | encoder(state)
32 | end
33 |
34 | function train(; kws...)
35 | # Initialize the parameters
36 | args = Args(; kws...)
37 |
38 | # Load Data
39 | train_data, val_data = getdata(args)
40 |
41 | @info("Constructing Model...")
42 | scanner,encoder = build_model()
43 |
44 | loss(x, y) = logitcrossentropy(model(x, scanner, encoder), y)
45 | batch_loss(data) = mean(loss(d...) for d in data)
46 |
47 | opt = ADAM(args.lr)
48 | ps = params(scanner, encoder)
49 | evalcb = () -> @show batch_loss(val_data)
50 |
51 | @info("Training...")
52 | for i=1:args.epochs
53 | Flux.train!(loss, ps, train_data, opt, cb=throttle(evalcb, args.throttle))
54 | end
55 |
56 | return scanner, encoder
57 | end
58 |
59 | function test(scanner, encoder)
60 | # sanity test
61 | tx = map(c -> onehotbatch(c, alphabet), [
62 | [false, true], # 01 -> 1
63 | [true, false], # 10 -> 1
64 | [false, false], # 00 -> 0
65 | [true, true]]) # 11 -> 0
66 | @info("Test...")
67 | out = [onecold(model(x, scanner, encoder)) - 1 for x in tx]
68 | input = [[0,1],[1,0],[0,0],[1,1]]
69 | for i in 1:length(tx)
70 | print(input[i]," => ",out[i],"\n")
71 | end
72 | end
73 |
74 | cd(@__DIR__)
75 | scanner, encoder = train()
76 | test(scanner, encoder)
77 |
--------------------------------------------------------------------------------
/other/fizzbuzz/Project.toml:
--------------------------------------------------------------------------------
1 | [deps]
2 | Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
3 | Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
4 |
5 | [compat]
6 | Flux = "0.13.9, 0.14"
7 | julia = "1.6"
8 |
--------------------------------------------------------------------------------
/other/fizzbuzz/fizzbuzz.jl:
--------------------------------------------------------------------------------
1 | # Inspired by "Fizz Buzz in Tensorflow" blog by Joel Grus
2 | # http://joelgrus.com/2016/05/23/fizz-buzz-in-tensorflow/
3 |
4 | using Flux
5 | using Flux: onehotbatch, train!, setup, logitcrossentropy
6 | using Test
7 |
8 | # Data preparation
9 | function fizzbuzz(x::Int)
10 | is_divisible_by_three = x % 3 == 0
11 | is_divisible_by_five = x % 5 == 0
12 |
13 | if is_divisible_by_three & is_divisible_by_five
14 | return "fizzbuzz"
15 | elseif is_divisible_by_three
16 | return "fizz"
17 | elseif is_divisible_by_five
18 | return "buzz"
19 | else
20 | return "else"
21 | end
22 | end
23 |
24 | const LABELS = ("fizz", "buzz", "fizzbuzz", "else");
25 |
26 | # Feature engineering
27 | features(x) = float.([x % 3, x % 5, x % 15])
28 | features(x::AbstractArray) = reduce(hcat, features.(x))
29 |
30 | function getdata()
31 |
32 | @test fizzbuzz.((3, 5, 15, 98)) == LABELS
33 |
34 | raw_x = 1:100;
35 | raw_y = fizzbuzz.(raw_x);
36 |
37 | X = features(raw_x);
38 | y = onehotbatch(raw_y, LABELS);
39 | return X, y
40 | end
41 |
42 | function train(; epochs::Int=500, dim::Int=20, eta::Real=0.001)
43 |
44 | # Get Data
45 | X, y = getdata()
46 |
47 | # Model
48 | m = Chain(Dense(3 => dim, relu), Dense(dim => 4))
49 | loss(m, x, y) = logitcrossentropy(m(x), y)
50 |
51 | # Helpers
52 | deepbuzz(x) = (a = argmax(m(features(x))); a == 4 ? x : LABELS[a])
53 |
54 | function monitor(e)
55 | print("epoch $(lpad(e, 4)): loss = $(round(loss(m,X,y); digits=4)) | ")
56 | @show deepbuzz.([3, 5, 15, 98])
57 | end
58 |
59 | # Training
60 | opt = setup(Adam(eta), m)
61 | for e in 0:epochs
62 | if e % 50 == 0
63 | monitor(e)
64 | end
65 | train!(loss, m, [(X, y)], opt)
66 | end
67 |
68 | return m
69 | end
70 |
71 | train()
72 |
--------------------------------------------------------------------------------
/other/flux-next/Project.toml:
--------------------------------------------------------------------------------
1 | [deps]
2 | Colors = "5ae59095-9a9b-59fe-a467-6f913c188581"
3 | Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
4 | Images = "916415d5-f1e6-5110-898d-aaa5f9f070e0"
5 | Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
6 | Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
7 |
--------------------------------------------------------------------------------
/other/flux-next/intro.jl:
--------------------------------------------------------------------------------
1 | # Flux vNext
2 | # ==========
3 |
4 | # Optimisation Basics
5 | # -------------------
6 |
7 | using Flux
8 | using Flux: step!
9 |
10 | # Using Flux is very simple. You write a program, and we'll make tweaks to that
11 | # program so that it gets gradually better.
12 |
13 | # What does it mean to make a program "better"? That's up to you – your program
14 | # returns a score, called the "loss", which determines how well the program
15 | # is doing. Flux's job is to minimise that score. For example, let's take the
16 | # simplest possible program, one that simply returns a constant.
17 |
18 | w = 1
19 | loss = () -> w
20 |
21 | loss()
22 |
23 | # This program doesn't look very interesting, but we can still do something
24 | # interesting with it. The core function that optimises programs for us is
25 | # `step!`. We have to pass `step!` an *optimiser* called `Descent`; this basically
26 | # tells Flux how agressive to be, but we'll talk more about that later.
27 |
28 | opt = Descent(1)
29 | step!(loss, opt)
30 |
31 | # `step!` returns the same loss as before, `1`. But something more interesting
32 | # has happened; try running `loss()` again.
33 |
34 | loss()
35 |
36 | # It went down! And if we keep calling `step!` in a loop, it'll keep going down.
37 |
38 | for i = 1:10
39 | @show step!(loss, opt)
40 | end
41 |
42 | loss()
43 |
44 | # Of course, this case is pretty easy: we can always improve the parameter by
45 | # making `w` less.
46 |
47 | w
48 |
49 | # Here's something harder; now our `loss` is always positive, so it can't keep
50 | # improving indefinitely. Things will stop improving once we hit the *minimum*
51 | # of this function (which we happen to know is at $w = 0$, where $loss = 0$.)
52 |
53 | w = 1
54 | loss = () -> w^2
55 |
56 | opt = Descent(0.2)
57 | for i = 1:10
58 | @show step!(loss, opt)
59 | end
60 |
61 | w
62 |
63 | # You can see that our loss gradually tends towards $0$, and so does $w$. Note,
64 | # however, that Flux will never say: "Ok, we're done here, here's the best value
65 | # for $w$." Though there are tools that can do this in simple cases, Flux is
66 | # designed to scale to extremely complex problems where this is no longer
67 | # possible. So we only make tweaks and it's up to you when to finish.
68 |
69 | # Let's put these ideas towards something a little more interesting. Say we want
70 | # to solve $5x = 10$, to find an $x$ that makes this true. What's our program?
71 | # Well, to start with we want to take $f(x) = 5x$. Then our loss should be something like
72 | # $f(x) - 10$, so that it measures how far the $f(x)$ is from where we want it to
73 | # be. This doesn't quite work, however, since the loss will be low (negative) if
74 | # $f(x)$ is `-Inf`! So we can use our squaring trick again here, to make
75 | # sure that $f(x) - 10$ tends to zero.
76 |
77 | x = 1 # Our initial guess
78 | f = x -> 5x
79 |
80 | opt = Descent(0.01)
81 |
82 | for i = 1:10
83 | l = step!(opt) do
84 | (f(x) - 10)^2
85 | end
86 | @show l
87 | end
88 |
89 | # Our loss ended up being pretty low. How's our function looking?
90 |
91 | 5x
92 |
93 | # That looks pretty good. So we're beginning to be able to use Flux to solve
94 | # problems where we know what the *output* should look like, but we're not
95 | # sure what the *input* should be to get there.
96 |
97 | # You now arguably understand everything you need to do productive ML. But let's
98 | # look over a few more examples to see how it looks in practice.
99 |
100 | # Optimising Colours
101 | # ------------------
102 |
103 | # Just like Julia more generally, Flux has good support for custom types.
104 | # This means we can carry out optimisation on things like colours!
105 |
106 | # This example uses the excellent Colors.jl. Colors contains, among other
107 | # things, a `colordiff` function which uses fancy colour theory algorithms to
108 | # estimate the *perceptual* difference between two colours. We can use this
109 | # directly in our loss function.
110 |
111 | using Colors
112 |
113 | target = RGB(1, 0, 0)
114 | colour = RGB(1, 1, 1)
115 | [target, colour]
116 | #-
117 | opt = Descent(0.01)
118 |
119 | for i = 1:10
120 | step!(opt, target) do y
121 | colordiff(colour, y)
122 | end
123 | end
124 |
125 | [colour, target]
126 |
127 | # `colour` started out white and is now red. That makes sense, as we've
128 | # minimised the distance between the two colours. But we can also *maximize*
129 | # with a simple minus sign.
130 |
131 | colour1 = RGB(1, 1, 1)
132 |
133 | for i = 1:10
134 | step!(opt, target) do y
135 | -colordiff(colour1, y)
136 | end
137 | end
138 |
139 | [colour1, target]
140 |
141 | # Now we have green, a colour that's arguably very different from red. However,
142 | # there's a subtlety here; notice what happens if we use a different colour as
143 | # our starting point.
144 |
145 | colour2 = RGB(0, 0, 1)
146 |
147 | for i = 1:10
148 | step!(opt, target) do y
149 | -colordiff(colour2, y)
150 | end
151 | end
152 |
153 | [colour2, target]
154 |
155 | # Now we have a dark blue! If we look directly at `colourdiff` we'll see that
156 | # green is better.
157 |
158 | colordiff(target, colour1), colordiff(target, colour2)
159 |
160 | # So why do we get blue here? This is another case where it's important that
161 | # Flux optimises programs through a series of small tweaks. In this case, even
162 | # though green is better overall, making our colour slightly more green actually
163 | # makes our score worse temporarily.
164 |
165 | colordiff(target, RGB(0, 0, 0.4)), colordiff(target, RGB(0, 0.1, 0.4))
166 |
167 | # This is known as a *local optimimum*. It's important to understand how Flux
168 | # optimises programs and what this means for you, so we'll cover this in more
169 | # detail in future.
170 |
171 | # Keras in 5 lines
172 | # -----------------
173 |
174 | # [Working on making this an MNIST demo, but here's the gist of it.]
175 |
176 | # Dummy data.
177 |
178 | x = rand(10)
179 | y = [1, 0]
180 |
181 | # Logistic regresion.
182 |
183 | using Flux: crossentropy
184 |
185 | W = randn(2, 10)
186 | b = zeros(2)
187 |
188 | predict = x -> softmax(W * x .+ b)
189 |
190 | opt = Descent(0.1)
191 |
192 | loss = (x, y) -> crossentropy(predict(x), y)
193 |
194 | step!(loss, opt, x, y)
195 |
196 | # Multi-layer perceptron.
197 |
198 | function dense(in, out, σ = identity)
199 | W = randn(out, in)
200 | b = zeros(out)
201 | x -> σ.(W * x .+ b)
202 | end
203 |
204 | chain(fs...) = x -> foldl((x, m) -> m(x), fs, init = x)
205 |
206 | model = chain(dense(10, 5, relu), dense(5, 2), softmax)
207 |
208 | # Doesn't quite work yet.
209 |
210 | ## step!(opt, x, y) do x, y
211 | ## crossentropy(model(x), y)
212 | ## end
213 |
--------------------------------------------------------------------------------
/other/housing/.gitignore:
--------------------------------------------------------------------------------
1 | housing.data
2 |
--------------------------------------------------------------------------------
/other/housing/Project.toml:
--------------------------------------------------------------------------------
1 | [deps]
2 | DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab"
3 | Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
4 | Parameters = "d96e819e-fc66-5662-9728-84c9c7592b0a"
5 | Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
6 |
7 | [compat]
8 | Flux = "0.11.4"
9 | julia = "1.5"
10 |
--------------------------------------------------------------------------------
/other/housing/README.md:
--------------------------------------------------------------------------------
1 | # Housing data example
2 |
3 | 
4 |
5 | Source: [Dive into Deep Learning](http://d2l.ai/chapter_linear-networks/linear-regression.html#from-linear-regression-to-deep-networks)
6 |
7 | ## Model Info
8 |
9 | In this example, we create a linear regression model that predicts housing data. It replicates the housing data example from the [Knet.jl readme](https://github.com/denizyuret/Knet.jl). Although we could have reused more of Flux (see the MNIST example), the library's abstractions are very lightweight and don't force you into any particular strategy.
10 |
11 | A linear model can be created as a neural network with a single layer. The number of inputs is the same as the features that the data has. Each input is connected to a single output with no activation function. Then, the output of the model is a linear function that predicts unseen data.
12 |
13 | ## Training
14 | To run this example:
15 |
16 | ```script
17 | cd other/housing/
18 | julia --project housing.jl
19 | ```
20 |
21 | ## Reference
22 |
23 | * [Aston Zhang, Zachary C. Lipton, Mu Li and Alexander J. Smola, "Dive into Deep Learning", 2022](http://d2l.ai/chapter_linear-networks/linear-regression.html#from-linear-regression-to-deep-networks)
24 |
--------------------------------------------------------------------------------
/other/housing/housing.jl:
--------------------------------------------------------------------------------
1 | # # Housing data
2 |
3 | # In this example, we create a linear regression model that predicts housing data.
4 | # It replicates the housing data example from the [Knet.jl readme](https://github.com/denizyuret/Knet.jl).
5 | # Although we could have reused more of Flux (see the MNIST example), the library's abstractions are very
6 | # lightweight and don't force you into any particular strategy.
7 |
8 | # A linear model can be created as a neural network with a single layer.
9 | # The number of inputs is the same as the features that the data has.
10 | # Each input is connected to a single output with no activation function.
11 | # Then, the output of the model is a linear function that predicts unseen data.
12 |
13 | # 
14 |
15 | # Source: [Dive into Deep Learning](http://d2l.ai/chapter_linear-networks/linear-regression.html#from-linear-regression-to-deep-networks)
16 |
17 | # To run this example, we need the following packages:
18 |
19 | using Flux
20 | using Flux: gradient
21 | using Flux.Optimise: update!
22 | using DelimitedFiles, Statistics
23 | using Parameters: @with_kw
24 |
25 |
26 | # We set default values for the learning rate (for the training routine) and the percentage of
27 | # the data that we use when testing the model:
28 |
29 | @with_kw mutable struct Hyperparams
30 | ## Learning rate
31 | lr::Float64 = 0.1
32 | ## Train Test split ratio, define percentage of data to be used as Test data
33 | split_ratio::Float64 = 0.1
34 | end
35 |
36 |
37 | # ## Data
38 |
39 | # We create the function `get_processed_data` to load the housing data, normalize it,
40 | # and finally split it into train and test datasets:
41 |
42 |
43 | function get_processed_data(args)
44 | isfile("housing.data") ||
45 | download("https://raw.githubusercontent.com/MikeInnes/notebooks/master/housing.data",
46 | "housing.data")
47 |
48 | rawdata = readdlm("housing.data")'
49 |
50 | ## The last feature is our target -- the price of the house.
51 | split_ratio = args.split_ratio ## For the train test split
52 |
53 | x = rawdata[1:13,:]
54 | y = rawdata[14:14,:]
55 |
56 | ## Normalise the data
57 | x = (x .- mean(x, dims = 2)) ./ std(x, dims = 2)
58 |
59 | ## Split into train and test sets
60 | split_index = floor(Int,size(x,2)*split_ratio)
61 | x_train = x[:,1:split_index]
62 | y_train = y[:,1:split_index]
63 | x_test = x[:,split_index+1:size(x,2)]
64 | y_test = y[:,split_index+1:size(x,2)]
65 |
66 | train_data = (x_train, y_train)
67 | test_data = (x_test, y_test)
68 |
69 | return train_data,test_data
70 | end
71 |
72 | # This function performs the following tasks:
73 |
74 | # 1. Downloads the housing data. The original size of the data is 505 rows and 14 columns.
75 | # 2. Loads the data as a 14x505 matrix. This is the shape that Flux expects.
76 | # 3. Splits the data into features and a target. Notice that the 14th row corresponds to the target for each example.
77 | # 4. Normalizes the data. For more information on normalizing data, see [How to Use StandardScaler and MinMaxScaler Transforms in Python](https://machinelearningmastery.com/standardscaler-and-minmaxscaler-transforms-in-python/).
78 | # 5. Splits the data into train and test datasets.
79 |
80 |
81 | # ## Model
82 | # We use a struct to define the model’s parameters.
83 | # It contains an array for holding the weights *W* and a vector for the bias term *b*:
84 |
85 | mutable struct model
86 | W::AbstractArray
87 | b::AbstractVector
88 | end
89 |
90 | # Also, we create the function `predict` to compute the model’s output:
91 |
92 | predict(x, m) = m.W*x .+ m.b
93 |
94 | # Notice that the function `predict` takes as an argument the model struct we defined above.
95 |
96 | # ## Loss function
97 |
98 | # The most commonly used loss function for Linear Regression is Mean Squared Error (MSE).
99 | # We define the MSE function as:
100 |
101 | meansquarederror(ŷ, y) = sum((ŷ .- y).^2)/size(y, 2)
102 |
103 | # **Note:** An implementation of the MSE function is also available in
104 | # [Flux](https://fluxml.ai/Flux.jl/stable/models/losses/#Flux.Losses.mse).
105 |
106 | # ## Train function
107 | # Finally, we define the `train` function so that the model learns the best parameters (*W* and *b*):
108 |
109 |
110 | function train(; kws...)
111 | ## Initialize the Hyperparamters
112 | args = Hyperparams(; kws...)
113 |
114 | ## Load the data
115 | (x_train,y_train),(x_test,y_test) = get_processed_data(args)
116 |
117 | ## The model
118 | m = model((randn(1,13)),[0.])
119 |
120 | loss(x, y) = meansquarederror(predict(x, m), y)
121 |
122 | ## Training
123 | η = args.lr
124 | θ = params(m.W, m.b)
125 |
126 | for i = 1:500
127 | g = gradient(() -> loss(x_train, y_train), θ)
128 | for x in θ
129 | update!(x, g[x]*η)
130 | end
131 | if i%100==0
132 | @show loss(x_train, y_train)
133 | end
134 | end
135 |
136 | ## Predict the RMSE on the test set
137 | err = meansquarederror(predict(x_test, m),y_test)
138 | println(err)
139 | end
140 |
141 | # The function above initializes the model’s parameters *W* and *b* randomly.
142 | # Then, it sets the learning rate η and θ as a
143 | # [params object](https://fluxml.ai/Flux.jl/stable/training/training/#Flux.params)
144 | # that points to W and b. Also, it sets a
145 | # [custom training loop](https://fluxml.ai/Flux.jl/stable/training/training/#Custom-Training-loops)
146 | # which is the [Gradient descent algorithm](https://en.wikipedia.org/wiki/Gradient_descent).
147 | # Finally, it computes the MSE for the test set.
148 |
149 | # ## Run the example
150 | # We call the `train` function to run the Housing data example:
151 |
152 | cd(@__DIR__)
153 | train()
154 |
--------------------------------------------------------------------------------
/other/iris/Project.toml:
--------------------------------------------------------------------------------
1 | [deps]
2 | DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
3 | Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
4 | MLDatasets = "eb30cadb-4394-5ae3-aed4-317e484a6458"
5 | Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
6 |
7 | [compat]
8 | DataFrames = "1.4.3"
9 | Flux = "0.13.9, 0.14"
10 | MLDatasets = "0.7.6"
11 | julia = "1.6"
12 |
--------------------------------------------------------------------------------
/other/iris/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | # Use Flux to do Logistic Regression on the Iris dataset
4 |
5 | This is a very simple model, with a single layer that outputs to softmax.
6 |
7 | Logistic regression can basically be thought of as a [single layer neural network](https://sebastianraschka.com/faq/docs/logisticregr-neuralnet.html).
8 |
9 | ## Data Source
10 |
11 | The data source is Fisher's classic dataset, retrieved from the [UCI Machine Learning Repository](https://archive.ics.uci.edu/ml/datasets/iris).
12 |
13 | ## Usage
14 |
15 | `cd` into `model-zoo/other/iris`, start the Julia REPL and instantiate the environment:
16 |
17 | ```julia
18 |
19 | julia> using Pkg; Pkg.activate("."); Pkg.instantiate()
20 |
21 | ```
22 |
23 | Then train and evaluate the model:
24 |
25 | ```julia
26 |
27 | julia> include("iris.jl")
28 | Starting training.
29 |
30 | Accuracy: 0.94
31 |
32 | Confusion Matrix:
33 |
34 | 3×3 Array{Int64,2}:
35 | 16 0 0
36 | 0 16 1
37 | 0 2 15
38 |
39 | julia>
40 |
41 | ```
42 |
--------------------------------------------------------------------------------
/other/iris/iris.jl:
--------------------------------------------------------------------------------
1 | # # Iris data
2 |
3 | # In this example, we create a logistic regression model that classifies iris flowers.
4 | # It consists of a [single-layer neural network](https://sebastianraschka.com/faq/docs/logisticregr-neuralnet.html)
5 | # that outputs **three** probabilities (one for each species of iris flowers).
6 | # We use Fisher's classic dataset to train the model. This dataset is retrieved from
7 | # the [UCI Machine Learning Repository](https://archive.ics.uci.edu/ml/datasets/iris).
8 |
9 | # In Machine Learning, a classification task can be performed by a logistic regression model.
10 | # However, we also can create a logistic regression model as a single-layer neural network.
11 | # This neural network has the following characteristics:
12 |
13 | # * Uses the [logitcrossentropy](https://fluxml.ai/Flux.jl/stable/models/losses/#Flux.Losses.logitcrossentropy) loss function.
14 | # * Expects the class labels of the iris flowers encoded using [One-Hot encoding](https://fluxml.ai/Flux.jl/stable/data/onehot/#One-Hot-Encoding).
15 | # * Outputs the index in the output vector with the highest value as the class label using
16 | # [onecold](https://fluxml.ai/Flux.jl/stable/data/onehot/#Flux.onecold) which is the inverse operation of One-Hot encoding.
17 |
18 | # To run this example, we need the following packages:
19 |
20 | # Suggested in the documentation readme, but uncomment if installation of packages is needed
21 | # import Pkg
22 | # Pkg.activate(".") # activate in the folder of iris
23 | # Pkg.instantiate() # installs required packages for the example
24 |
25 | using Flux, MLDatasets, DataFrames
26 | using Flux: logitcrossentropy, normalise, onecold, onehotbatch
27 | using Statistics: mean
28 |
29 | # We set default values for the learning rate *lr* (for the training routine) and the number of
30 | # times that we repeat the train data (more information below):
31 |
32 | Base.@kwdef mutable struct Args
33 | lr::Float64 = 0.5
34 | repeat::Int = 110
35 | end
36 |
37 | # ## Data
38 |
39 | # We create the function `get_processed_data` to load the iris data, preprocess
40 | # it (normalize and One-Hot encode the class labels), and split it into train and test datasets.
41 |
42 |
43 | function get_processed_data(args::Args)
44 |
45 | iris = Iris(as_df=false)
46 | labels = iris.targets |> vec
47 | features = iris.features
48 |
49 | ## Subract mean, divide by std dev for normed mean of 0 and std dev of 1.
50 | normed_features = normalise(features, dims=2)
51 |
52 | klasses = sort(unique(labels))
53 | onehot_labels = onehotbatch(labels, klasses)
54 |
55 | ## Split into training and test sets, 2/3 for training, 1/3 for test.
56 | train_indices = [1:3:150 ; 2:3:150]
57 |
58 | X_train = normed_features[:, train_indices]
59 | y_train = onehot_labels[:, train_indices]
60 |
61 | X_test = normed_features[:, 3:3:150]
62 | y_test = onehot_labels[:, 3:3:150]
63 |
64 | ## Repeat the data `args.repeat` times
65 | train_data = Iterators.repeated((X_train, y_train), args.repeat)
66 | test_data = (X_test,y_test)
67 |
68 | return train_data, test_data
69 | end
70 |
71 | # The iris data is a 4×150 matrix. This means that the iris data has 150 examples,
72 | # and each example has four features as well as a class label.
73 | # After normalizing and encoding the data, the `get_processed_data` function divides it into train and test data.
74 | # Also, it repeats the examples in the train data so that we have more data to train the neural network.
75 |
76 |
77 | # ## Metrics
78 |
79 | # We use two functions to assess the output of the model: `accuracy` and `confusion matrix`.
80 | # The [accuracy function](https://developers.google.com/machine-learning/crash-course/classification/accuracy)
81 | # measures the percentage of the labels that the model classified correctly.
82 | # On the other hand, the [confusion matrix](https://machinelearningmastery.com/confusion-matrix-machine-learning/)
83 | # is a table that summarises how good the model is for predicting data.
84 |
85 |
86 | accuracy(model, x, y) = mean(onecold(model(x)) .== onecold(y))
87 |
88 |
89 | function confusion_matrix(model, X, y)
90 | ŷ = onehotbatch(onecold(model(X)), 1:3)
91 | y * transpose(ŷ)
92 | end
93 |
94 | # ## Train function
95 |
96 | # We define the `train` function that defines the model and trains it:
97 |
98 | function train(; kws...)
99 | ## Initialize hyperparameter arguments
100 | args = Args(; kws...)
101 |
102 | ## Load processed data
103 | train_data, test_data = get_processed_data(args)
104 |
105 | ## #Declare model taking 4 features as inputs and outputting 3 probabiltiies,
106 | ## one for each species of iris.
107 | model = Chain(Dense(4, 3))
108 |
109 | ## Define loss function to be used in training
110 | ## For numerical stability, we use here logitcrossentropy
111 | loss(m, x, y) = logitcrossentropy(m(x), y)
112 |
113 | ## Training
114 | ## Gradient descent optimiser with learning rate `args.lr`
115 | optimiser = Descent(args.lr)
116 | ## For any other optimiser, we would need e.g.
117 | ## opt_state = Flux.setup(Momentum(args.lr), model)
118 |
119 | println("Starting training.")
120 | Flux.train!(loss, model, train_data, optimiser)
121 |
122 | return model, test_data
123 | end
124 |
125 | # The function above loads the train and test data.
126 | # Then, it creates the model as a single-layer network that expects as an input
127 | # a four-element vector (features) and outputs a three-element vector
128 | # (the number of classes of species of iris flowers).
129 | # Also, it sets [logitcrossentropy](https://fluxml.ai/Flux.jl/stable/models/losses/#Flux.Losses.logitcrossentropy)
130 | # as the loss function and the Gradient descent optimiser
131 | # [Descent](https://fluxml.ai/Flux.jl/stable/training/optimisers/#Flux.Optimise.Descent).
132 | # Finally, it runs a training step with the
133 | # [train! function](https://fluxml.ai/Flux.jl/stable/training/training/#Flux.Optimise.train!).
134 |
135 | # ## Test function
136 |
137 | # After training the model, we define the `test` function that
138 | # computes the model performance on the test data.
139 | # It calls the `accuracy` function and displays the confusion matrix.
140 | # This function validates that the model should achieve at least a 0.8 accuracy score.
141 |
142 |
143 | function test(model, test)
144 | ## Testing model performance on test data
145 | X_test, y_test = test
146 | accuracy_score = accuracy(model, X_test, y_test)
147 |
148 | println("\nAccuracy: $accuracy_score")
149 |
150 | ## Sanity check.
151 | @assert accuracy_score > 0.8
152 |
153 | ## To avoid confusion, here is the definition of a
154 | ## Confusion Matrix: https://en.wikipedia.org/wiki/Confusion_matrix
155 | println("\nConfusion Matrix:\n")
156 | display(confusion_matrix(model, X_test, y_test))
157 | end
158 |
159 | # ## Run the example
160 |
161 | # We call the `train` function to run the iris data example and compute the model performance:
162 |
163 | cd(@__DIR__)
164 | model, test_data = train()
165 | test(model, test_data)
166 |
--------------------------------------------------------------------------------
/script/Manifest.toml:
--------------------------------------------------------------------------------
1 | # This file is machine-generated - editing it directly is not advised
2 |
3 | [[Base64]]
4 | uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
5 |
6 | [[BinaryProvider]]
7 | deps = ["Libdl", "SHA"]
8 | git-tree-sha1 = "5b08ed6036d9d3f0ee6369410b830f8873d4024c"
9 | uuid = "b99e7846-7c00-51b0-8f62-c81ae34c0232"
10 | version = "0.5.8"
11 |
12 | [[Compat]]
13 | deps = ["Base64", "Dates", "DelimitedFiles", "Distributed", "InteractiveUtils", "LibGit2", "Libdl", "LinearAlgebra", "Markdown", "Mmap", "Pkg", "Printf", "REPL", "Random", "Serialization", "SharedArrays", "Sockets", "SparseArrays", "Statistics", "Test", "UUIDs", "Unicode"]
14 | git-tree-sha1 = "ed2c4abadf84c53d9e58510b5fc48912c2336fbb"
15 | uuid = "34da2185-b29b-5c13-b0c7-acf172513d20"
16 | version = "2.2.0"
17 |
18 | [[Dates]]
19 | deps = ["Printf"]
20 | uuid = "ade2ca70-3891-5945-98fb-dc099432e06a"
21 |
22 | [[DelimitedFiles]]
23 | deps = ["Mmap"]
24 | uuid = "8bb1440f-4735-579b-a4ab-409b98df4dab"
25 |
26 | [[Distributed]]
27 | deps = ["Random", "Serialization", "Sockets"]
28 | uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b"
29 |
30 | [[FluxBot]]
31 | deps = ["GitHub", "Glob", "Pkg", "Sockets"]
32 | git-tree-sha1 = "74ace680acd73d83cea4f98fe4be087a48e9a278"
33 | repo-rev = "master"
34 | repo-url = "https://github.com/dhairyagandhi96/FluxBot.jl.git"
35 | uuid = "352bd040-0f98-11ea-1faf-6f930ca83554"
36 | version = "0.1.0"
37 |
38 | [[GitHub]]
39 | deps = ["Base64", "Dates", "HTTP", "JSON", "MbedTLS", "Sockets"]
40 | git-tree-sha1 = "f8f9c05004861b6680c1bd363e7e2fcff602a283"
41 | uuid = "bc5e4493-9b4d-5f90-b8aa-2b2bcaad7a26"
42 | version = "5.1.4"
43 |
44 | [[Glob]]
45 | deps = ["Compat", "Test"]
46 | git-tree-sha1 = "c72f1fcb7d17426de1e8af2e948dfb3de1116eed"
47 | uuid = "c27321d9-0574-5035-807b-f59d2c89b15c"
48 | version = "1.2.0"
49 |
50 | [[HTTP]]
51 | deps = ["Base64", "Dates", "IniFile", "MbedTLS", "Sockets"]
52 | git-tree-sha1 = "5c49dab19938b119fe204fd7d7e8e174f4e9c68b"
53 | uuid = "cd3eb016-35fb-5094-929b-558a96fad6f3"
54 | version = "0.8.8"
55 |
56 | [[IniFile]]
57 | deps = ["Test"]
58 | git-tree-sha1 = "098e4d2c533924c921f9f9847274f2ad89e018b8"
59 | uuid = "83e8ac13-25f8-5344-8a64-a9f2b223428f"
60 | version = "0.5.0"
61 |
62 | [[InteractiveUtils]]
63 | deps = ["Markdown"]
64 | uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
65 |
66 | [[JSON]]
67 | deps = ["Dates", "Mmap", "Parsers", "Unicode"]
68 | git-tree-sha1 = "b34d7cef7b337321e97d22242c3c2b91f476748e"
69 | uuid = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
70 | version = "0.21.0"
71 |
72 | [[LibGit2]]
73 | uuid = "76f85450-5226-5b5a-8eaa-529ad045b433"
74 |
75 | [[Libdl]]
76 | uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
77 |
78 | [[LinearAlgebra]]
79 | deps = ["Libdl"]
80 | uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
81 |
82 | [[Literate]]
83 | deps = ["Base64", "JSON", "REPL", "Test"]
84 | git-tree-sha1 = "71cdca07ffe1731c0ccbef801913e4ab8f9aac2b"
85 | pinned = true
86 | uuid = "98b081ad-f1c9-55d3-8b20-4c87d4299306"
87 | version = "1.0.2"
88 |
89 | [[Logging]]
90 | uuid = "56ddb016-857b-54e1-b83d-db4d58db5568"
91 |
92 | [[Markdown]]
93 | deps = ["Base64"]
94 | uuid = "d6f4376e-aef5-505a-96c1-9c027394607a"
95 |
96 | [[MbedTLS]]
97 | deps = ["BinaryProvider", "Dates", "Distributed", "Libdl", "Random", "Sockets", "Test"]
98 | git-tree-sha1 = "2d94286a9c2f52c63a16146bb86fd6cdfbf677c6"
99 | uuid = "739be429-bea8-5141-9913-cc70e7f3736d"
100 | version = "0.6.8"
101 |
102 | [[Mmap]]
103 | uuid = "a63ad114-7e13-5084-954f-fe012c677804"
104 |
105 | [[Parsers]]
106 | deps = ["Dates", "Test"]
107 | git-tree-sha1 = "d112c19ccca00924d5d3a38b11ae2b4b268dda39"
108 | uuid = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0"
109 | version = "0.3.11"
110 |
111 | [[Pkg]]
112 | deps = ["Dates", "LibGit2", "Markdown", "Printf", "REPL", "Random", "SHA", "UUIDs"]
113 | uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
114 |
115 | [[Printf]]
116 | deps = ["Unicode"]
117 | uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7"
118 |
119 | [[REPL]]
120 | deps = ["InteractiveUtils", "Markdown", "Sockets"]
121 | uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb"
122 |
123 | [[Random]]
124 | deps = ["Serialization"]
125 | uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
126 |
127 | [[SHA]]
128 | uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce"
129 |
130 | [[Serialization]]
131 | uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
132 |
133 | [[SharedArrays]]
134 | deps = ["Distributed", "Mmap", "Random", "Serialization"]
135 | uuid = "1a1011a3-84de-559e-8e89-a11a2f7dc383"
136 |
137 | [[Sockets]]
138 | uuid = "6462fe0b-24de-5631-8697-dd941f90decc"
139 |
140 | [[SparseArrays]]
141 | deps = ["LinearAlgebra", "Random"]
142 | uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
143 |
144 | [[Statistics]]
145 | deps = ["LinearAlgebra", "SparseArrays"]
146 | uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
147 |
148 | [[Test]]
149 | deps = ["Distributed", "InteractiveUtils", "Logging", "Random"]
150 | uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
151 |
152 | [[UUIDs]]
153 | deps = ["Random", "SHA"]
154 | uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
155 |
156 | [[Unicode]]
157 | uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"
158 |
--------------------------------------------------------------------------------
/script/Notebooks.toml:
--------------------------------------------------------------------------------
1 | [MNIST]
2 | path = "vision/mnist"
3 | notebook = ["mlp.jl", "autoencoder.jl", "vae.jl"]
4 |
5 | [CIFAR10]
6 | path = "vision/cifar10"
7 | notebook = "cifar10.jl"
8 |
9 | [char-rnn]
10 | path = "text/char-rnn"
11 | notebook = "char-rnn.jl"
12 |
13 | [lang-detection]
14 | path = "text/lang-detection"
15 | notebook = "model.jl"
16 | deps = ["scrape.jl"]
17 |
18 | [phonemes]
19 | path = "text/phonemes"
20 | notebook = "1-model.jl"
21 | deps = ["0-data.jl"]
22 |
23 | [treebank]
24 | path = "text/treebank"
25 | notebook = "recursive.jl"
26 | deps = ["data.jl"]
27 |
28 | [diffeq]
29 | path = "other/diffeq"
30 | notebook = "diffeq.jl"
31 |
32 | #[bitstring-parity]
33 | #path = "other/bitstring-parity"
34 | #notebook = ["xor1.jl", "xor2.jl", "xor3.jl"]
35 | #deps = "data.jl"
36 |
37 | [fizzbuzz]
38 | path = "other/fizzbuzz"
39 | notebook = "fizzbuzz.jl"
40 |
41 | [housing]
42 | path = "other/housing"
43 | notebook = "housing.jl"
44 |
45 | [tutorials]
46 | path="tutorials"
47 | notebook="60-minute-blitz.jl"
48 |
--------------------------------------------------------------------------------
/script/Project.toml:
--------------------------------------------------------------------------------
1 | [deps]
2 | FluxBot = "352bd040-0f98-11ea-1faf-6f930ca83554"
3 | Literate = "98b081ad-f1c9-55d3-8b20-4c87d4299306"
4 |
--------------------------------------------------------------------------------
/script/convert.jl:
--------------------------------------------------------------------------------
1 | using Pkg, Pkg.TOML
2 |
3 | root = joinpath(@__DIR__, "..")
4 |
5 | meta = TOML.parsefile(joinpath(@__DIR__, "Notebooks.toml"))
6 | meta = meta[ARGS[1]]
7 |
8 | path = meta["path"]
9 | deps = get(meta, "deps", [])
10 | deps = deps isa String ? [deps] : deps
11 |
12 | for d in ["Project.toml", "Manifest.toml", ".gitignore"]
13 | isfile(joinpath(root, path, d)) && push!(deps, d)
14 | end
15 |
16 | mkpath(joinpath(root, "notebooks", path))
17 | for dep in deps
18 | cp(joinpath(root, path, dep), joinpath(root, "notebooks", path, dep), force = true)
19 | end
20 |
21 | pushfirst!(LOAD_PATH, @__DIR__)
22 | Pkg.activate(joinpath(root, "notebooks", path))
23 |
24 | using Literate
25 |
26 | function postprocess_nb(content)
27 | content = replace(content, r"\s*using CUDA" => "## using CUDA")
28 | return content
29 | end
30 |
31 | function preprocess_nb(content)
32 | content = replace(content, r"#\s*using CUDA" => "using CUDA")
33 | content = "using Pkg; Pkg.activate(\".\"); Pkg.instantiate();\n\n" * content
34 | return content
35 | end
36 |
37 | function init_nb(content)
38 | content = "using Pkg; Pkg.activate(\"$root\"); Pkg.status();\n\n" * content
39 | return content
40 | end
41 |
42 | scripts = meta["notebook"]
43 | scripts isa String && (scripts = [scripts])
44 |
45 | for script in scripts
46 | Literate.notebook(joinpath(root, path, script),
47 | joinpath(root, "notebooks", path),
48 | credit = false, preprocess = preprocess_nb,
49 | postprocess = postprocess_nb)
50 | end
51 |
52 | scripts = map(x -> x[1:end - 3] * ".ipynb", scripts)
53 | nbs = filter(x -> endswith(x, ".ipynb"), readdir(joinpath(root, path)))
54 | keep = union(deps, scripts, nbs)
55 | files = readdir(joinpath(root, "notebooks", path))
56 |
57 | for r in files
58 | r in keep || rm(joinpath(root, "notebooks", path, r), force = true)
59 | end
60 |
--------------------------------------------------------------------------------
/script/notebook.jl:
--------------------------------------------------------------------------------
1 | using Pkg
2 | Pkg.activate(@__DIR__)
3 | Pkg.instantiate()
4 |
5 | using Pkg.TOML
6 | meta = length(ARGS) > 0 ? ARGS :
7 | keys(TOML.parsefile(joinpath(@__DIR__, "Notebooks.toml")))
8 |
9 | convertjl = joinpath(@__DIR__, "convert.jl")
10 |
11 | for proj in meta
12 | run(`$(Base.julia_cmd()) $convertjl $proj`)
13 | end
14 |
--------------------------------------------------------------------------------
/text/char-rnn/.gitignore:
--------------------------------------------------------------------------------
1 | input.txt
2 |
--------------------------------------------------------------------------------
/text/char-rnn/Project.toml:
--------------------------------------------------------------------------------
1 | [deps]
2 | Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
3 | OneHotArrays = "0b1bfda6-eb8a-41d2-88d8-f5af5cad476f"
4 | StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
5 |
6 | [compat]
7 | Flux = "0.13.9, 0.14"
--------------------------------------------------------------------------------
/text/char-rnn/README.md:
--------------------------------------------------------------------------------
1 | # Character-Level RNN
2 |
3 | 
4 |
5 | [Source](https://d2l.ai/chapter_recurrent-neural-networks/rnn.html#rnn-based-character-level-language-models)
6 |
7 | ## Model Information
8 |
9 | A recurrent neural network (RNN) outputs a prediction and a hidden state at each step of the computation. The hidden state captures historical information of a sequence (i.e., the neural network has memory) and the output is the final prediction of the model. We use this type of neural network to model sequences such as text or time series.
10 |
11 |
12 | ## Training
13 |
14 | ```shell
15 | cd text/char-rnn
16 | julia --project char-rnn.jl
17 | ```
18 |
19 | ## References
20 |
21 | * [The Unreasonable Effectiveness of Recurrent Neural Networks](http://karpathy.github.io/2015/05/21/rnn-effectiveness/)
22 | * [Understanding LSTM Networks](https://colah.github.io/posts/2015-08-Understanding-LSTMs/)
23 | * [Aston Zhang, Zachary C. Lipton, Mu Li and Alexander J. Smola, "Dive into Deep Learning", 2020](https://d2l.ai/chapter_recurrent-neural-networks/rnn.html#rnn-based-character-level-language-models)
24 |
25 |
--------------------------------------------------------------------------------
/text/char-rnn/docs/rnn-train.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/text/char-rnn/docs/rnn-train.png
--------------------------------------------------------------------------------
/text/lang-detection/.gitignore:
--------------------------------------------------------------------------------
1 | corpus
2 |
--------------------------------------------------------------------------------
/text/lang-detection/Project.toml:
--------------------------------------------------------------------------------
1 | [deps]
2 | Cascadia = "54eefc05-d75b-58de-a785-1a3403f0919f"
3 | Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
4 | Gumbo = "708ec375-b3d6-5a57-a7ce-8257bf98657a"
5 | HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3"
6 | Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
7 | Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
8 | Unicode = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"
9 |
10 | [compat]
11 | Cascadia = "1"
12 | Flux = "0.13.9, 0.14"
13 | Gumbo = "0.8"
14 | HTTP = "1.7"
15 | julia = "1.6"
16 |
--------------------------------------------------------------------------------
/text/lang-detection/README.md:
--------------------------------------------------------------------------------
1 | # Language detection model
2 |
3 | This model uses an LSTM for character-level language detection. Given a sentence of text, each character is fed into the model and the final output determines which of five languages the sentence was written in.
4 |
5 | First run `scrape.jl` to download a Wikipedia data set. `model.jl` contains the actual model and training code.
6 |
7 | ## Training
8 |
9 | ```shell
10 | cd text/lang-detection
11 | julia scrape.jl
12 | julia --project model.jl
13 | ```
14 |
15 | ## References
16 |
17 | * [Aston Zhang, Zachary C. Lipton, Mu Li and Alexander J. Smola, "Dive into Deep Learning", 2020](https://d2l.ai/chapter_recurrent-modern/lstm.html)
18 |
--------------------------------------------------------------------------------
/text/lang-detection/model.jl:
--------------------------------------------------------------------------------
1 | # # Language detection (character-level)
2 |
3 | # In this example, we create a character-level language detection model. Given a sentence (text), each character is fed into an [LSTM](https://d2l.ai/chapter_recurrent-modern/lstm.html) and then the final output determines in which language the text is written.
4 |
5 | # This example illustrates the preprocessing of text data before feeding it into the model as well as the use of a scanner and an encoder for a language model.
6 |
7 | # If you need more information about how LSTM work and related technical concepts,
8 | # check out the following resources:
9 |
10 | # * [The Unreasonable Effectiveness of Recurrent Neural Networks](http://karpathy.github.io/2015/05/21/rnn-effectiveness/)
11 | # * [Understanding LSTM Networks](https://colah.github.io/posts/2015-08-Understanding-LSTMs/)
12 | # * [Illustrated Guide to Recurrent Neural Networks: Understanding the Intuition](https://www.youtube.com/watch?v=LHXXI4-IEns)
13 |
14 | # To run this example, we need the following packages:
15 |
16 | using Flux
17 | using Flux: onehot, onecold, onehotbatch, logitcrossentropy, reset!
18 | using Statistics: mean
19 | using Random
20 | using Unicode
21 |
22 | # We set default values for hyperparameters:
23 |
24 | Base.@kwdef mutable struct Args
25 | lr::Float64 = 1e-3 ## Learning rate
26 | N::Int = 15 ## Number of perceptrons in hidden layer
27 | epochs::Int = 3 ## Number of epochs
28 | test_len::Int = 100 ## length of test data
29 | langs_len::Int = 0 ## Number of different languages in Corpora
30 | alphabet_len::Int = 0 ## Total number of characters possible, in corpora
31 | throttle::Int = 10 ## throttle timeout
32 | end
33 |
34 | # ## Load dataset
35 |
36 | # Before running this example, you need to obtain the data by running the script `scrape.jl`.
37 | # It downloads articles from Wikipedia in five different languages (English, Italian, French, Spanish, and Danish).
38 | # Also, it creates the folder `corpus` that contains five text files (one per language).
39 |
40 | # The function `get_processed_data` reads the text files and creates the data set for training the model.
41 | # First, it loads the raw text into a dictionary.
42 | # Then, it defines the alphabet and the characters that will be represented as unknown.
43 | # Finally, it one-hot encodes the text and its corresponding labels (the language in which is written)
44 | # before splitting the data into train and test data sets.
45 |
46 |
47 | function get_processed_data(args)
48 | corpora = Dict()
49 |
50 | for file in readdir("corpus")
51 | lang = Symbol(match(r"(.*)\.txt", file).captures[1])
52 | corpus = split(String(read("corpus/$file")), ".")
53 | corpus = strip.(Unicode.normalize.(corpus, casefold=true, stripmark=true))
54 | corpus = filter(!isempty, corpus)
55 | corpora[lang] = corpus
56 | end
57 |
58 | langs = collect(keys(corpora))
59 | args.langs_len = length(langs)
60 | alphabet = ['a':'z'; '0':'9'; ' '; '\n'; '_']
61 | args.alphabet_len = length(alphabet)
62 |
63 | ## See which chars will be represented as "unknown"
64 | unk_chars = unique(filter(∉(alphabet), join(vcat(values(corpora)...))))
65 | dataset = [(onehotbatch(s, alphabet, '_'), onehot(l, langs)) for l in langs for s in corpora[l]] |> shuffle
66 |
67 | train, test = dataset[1:end-args.test_len], dataset[end-args.test_len+1:end]
68 | testX, testY = first.(test), last.(test)
69 | return train, testX, testY, langs
70 | end
71 |
72 | # ## Create the model
73 |
74 | # The model consists of an **encoder** and a **classifier**. The **encoder** reads the sentence one character
75 | # at a time using one [dense](https://fluxml.ai/Flux.jl/stable/models/layers/#Flux.Dense)
76 | # and one [LSTM](https://fluxml.ai/Flux.jl/stable/models/layers/#Flux.LSTM) layers, and encodes it through
77 | # the state of its last character.
78 | # The **classifier** inputs this encoding and outputs the predicted language for the sentence.
79 | # The model is defined as a [Custom model](https://fluxml.ai/Flux.jl/stable/models/advanced/)
80 |
81 | struct EncoderClassifier{E, C}
82 | encoder::E
83 | classifier::C
84 | end
85 |
86 | function build_model(args)
87 | encoder = Chain(Dense(args.alphabet_len, args.N, σ), LSTM(args.N, args.N))
88 | classifier = Dense(args.N, args.langs_len)
89 | return EncoderClassifier(encoder, classifier)
90 | end
91 |
92 | # Notice that we use the function [reset!](https://fluxml.ai/Flux.jl/stable/models/layers/#Flux.reset!)
93 | # when computing the model's prediction to reset the hidden state of an LSTM layer back to its original value.
94 |
95 | function (m::EncoderClassifier)(x)
96 | state = m.encoder(x)[:, end]
97 | Flux.reset!(m.encoder)
98 | m.classifier(state)
99 | end
100 |
101 | Flux.@functor EncoderClassifier
102 |
103 | # ## Train the model
104 |
105 | # The function `train` executes one training step for the model
106 | # using Flux’s [train!](https://fluxml.ai/Flux.jl/stable/training/training/#Flux.Optimise.train!).
107 | # It uses the loss function
108 | # [logitcrossentropy](https://fluxml.ai/Flux.jl/stable/models/losses/#Flux.Losses.logitcrossentropy)
109 | # and the [ADAM](https://fluxml.ai/Flux.jl/stable/training/optimisers/#Flux.Optimise.ADAM) optimizer.
110 |
111 | function train(; kws...)
112 | ## Initialize Hyperparameters
113 | args = Args(; kws...)
114 |
115 | ## Load Data
116 | train_data, test_X, test_Y, langs = get_processed_data(args)
117 |
118 | @info("Constructing Model...")
119 | model = build_model(args)
120 | loss(model, x, y) = logitcrossentropy(model(x), y)
121 | opt = Flux.setup(ADAM(args.lr), model)
122 |
123 | @info("Training...")
124 | for epoch in 1:args.epochs
125 | Flux.train!(loss, model, train_data, opt)
126 | test_loss = mean(loss(model, x, y) for (x, y) in zip(test_X, test_Y))
127 | @show epoch, test_loss
128 | end
129 |
130 | test_predictions = [onecold(model(x), langs) for x in test_X]
131 | accuracy = mean(test_predictions .== [onecold(y, langs) for y in test_Y])
132 | @show accuracy
133 | end
134 |
135 | cd(@__DIR__)
136 | train()
137 |
--------------------------------------------------------------------------------
/text/lang-detection/scrape.jl:
--------------------------------------------------------------------------------
1 | using Cascadia, Gumbo, HTTP
2 |
3 | pages = Dict(
4 | :en => ["Wikipedia", "Osama_bin_Laden_(elephant)", "List_of_lists_of_lists", "Josephine_Butler", "Canadian_football", "Judaism"],
5 | :it => ["Wikipedia", "Ludovico_Einaudi", "Filosofia_della_scienza", "Pizza", "Effie_Gray", "Galeazzo_Maria_Sforza", "Ebraismo"],
6 | :fr => ["Wikipedia", "Philosophie_des_sciences", "Seconde_Guerre_mondiale", "Eric_Hakonsson"],
7 | :es => ["Wikipedia", "Chorizo", "Historia_de_Barcelona", "Espania", "Las_Vegas_Strip", "Judaismo"],
8 | :da => ["Wikipedia", "H.C._Andersen", "L.A._Ring", "Jiangxi", "NATO", "Thomas_Edison", "Bangladesh"])
9 |
10 | rawpage(url) = parsehtml(String(HTTP.get(url).body)).root
11 |
12 | content(url) = join((collect(nodeText(m) for m in eachmatch(sel".mw-parser-output > p", rawpage(url)))), "\n")
13 |
14 | cd(@__DIR__)
15 | mkpath("corpus")
16 |
17 | for (lang, ps) in pages
18 | open("corpus/$lang.txt", "w") do io
19 | for p in ps
20 | write(io, content("https://$lang.wikipedia.org/wiki/$p"))
21 | end
22 | end
23 | end
24 |
--------------------------------------------------------------------------------
/text/nanogpt/Project.toml:
--------------------------------------------------------------------------------
1 | [deps]
2 | CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
3 | Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
4 | JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819"
5 | MLUtils = "f1d291b0-491e-4a28-83b9-f70985020b54"
6 | ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca"
7 | Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
8 | Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
9 | StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
10 | cuDNN = "02a925ec-e4fe-4b08-9a7e-0d78e3d38ccd"
11 |
--------------------------------------------------------------------------------
/text/nanogpt/README.md:
--------------------------------------------------------------------------------
1 | # Generative pre-trained transformer
2 |
3 | 
4 |
5 | [Source](https://en.wikipedia.org/wiki/Generative_pre-trained_transformer)
6 |
7 | ## Model Information
8 |
9 | GPT is built of a multi-head attention architecture. We offer here a very small instance based on Andrej Karpathy's [nanoGPT](https://github.com/karpathy/nanoGPT). The default parameters give a model much smaller than nanoGPT, tuned for fastest convergence on a very small data set (Shakespeare).
10 |
11 | This model takes as input a sequence of existing text (context) and produces as output the predicted next character. Actually, it produces the predicted next character for each initial sub-sequence of the input, in effect giving an extra degree of parallelism for the purposes of training.
12 |
13 | For the attention mechanism, we use [Flux.MultiHeadAttention](https://fluxml.ai/Flux.jl/stable/reference/models/layers/#MultiHeadAttention).
14 |
15 |
16 | ## Training
17 |
18 | ```shell
19 | cd text/gpt
20 | julia --project gpt.jl
21 | ```
22 |
23 | ## Example output
24 |
25 | After one epoch:
26 |
27 | generate(model, "_", 50) = "_me, but plept fairs, And heards, verchean my word"
28 | generate(model, "_", 50) = "_ows know yought, This alce! totether him. weliest"
29 | generate(model, "The", 50) = "These prurd passtion? CINCESSIT: He eloucy I must"
30 | generate(model, "The", 50) = "The bitherse dresic in to so shall with a his the "
31 |
32 | After 20 epochs:
33 |
34 | generate(model, "_", 50) = "_ething a calling do me diseases Of, on he's to th"
35 | generate(model, "_", 50) = "_ ragg Thou flatters all in wators the selfsarut o"
36 | generate(model, "The", 50) = "The Mirtouggake Go: For my mischance lords his sea"
37 | generate(model, "The", 50) = "The oll-gakemoremo his dead: All this man make gen"
38 |
39 | ## References
40 |
41 | * [Attention is all you need](https://proceedings.neurips.cc/paper_files/paper/2017/file/3f5ee243547dee91fbd053c1c4a845aa-Paper.pdf)
42 | * [Youtube (3blue1brown): Attention in transformers, visually explained](https://www.youtube.com/watch?v=eMlx5fFNoYc)
43 | * [Youtube (Karpathy): Let's build GPT: from scratch, in code, spelled out](https://www.youtube.com/watch?v=kCc8FmEb1nY)
44 | * [nanoGPT](https://github.com/karpathy/nanoGPT)
45 |
--------------------------------------------------------------------------------
/text/phonemes/0-data.jl:
--------------------------------------------------------------------------------
1 | using Flux, Flux.Data.CMUDict
2 | using Flux: onehot, batchseq
3 | using Base.Iterators: partition
4 |
5 | tokenise(s, α) = [onehot(c, α) for c in s]
6 |
7 | function getdata(args)
8 | dict = cmudict()
9 | alphabet = [:end, CMUDict.alphabet()...]
10 | args.Nin = length(alphabet)
11 |
12 | phones = [:start, :end, CMUDict.symbols()...]
13 | args.phones_len = length(phones)
14 |
15 | # Turn a word into a sequence of vectors
16 | tokenise("PHYLOGENY", alphabet)
17 | # Same for phoneme lists
18 | tokenise(dict["PHYLOGENY"], phones)
19 |
20 | words = sort(collect(keys(dict)), by = length)
21 |
22 | # Finally, create iterators for our inputs and outputs.
23 | batches(xs, p) = [batchseq(b, p) for b in partition(xs, 50)]
24 |
25 | Xs = batches([tokenise(word, alphabet) for word in words],
26 | onehot(:end, alphabet))
27 |
28 | Ys = batches([tokenise([dict[word]..., :end], phones) for word in words],
29 | onehot(:end, phones))
30 |
31 | Yo = batches([tokenise([:start, dict[word]...], phones) for word in words],
32 | onehot(:end, phones))
33 |
34 | data = collect(zip(Xs, Yo, Ys))
35 | return data, alphabet, phones
36 | end
37 |
--------------------------------------------------------------------------------
/text/phonemes/1-model.jl:
--------------------------------------------------------------------------------
1 | # Based on https://arxiv.org/abs/1409.0473
2 | include("0-data.jl")
3 | using Flux: flip, logitcrossentropy, reset!, throttle
4 | using Parameters: @with_kw
5 | using StatsBase: wsample
6 |
7 | @with_kw mutable struct Args
8 | lr::Float64 = 1e-3 # learning rate
9 | Nin::Int = 0 # size of input layer, will be assigned as length(alphabet)
10 | Nh::Int = 30 # size of hidden layer
11 | phones_len::Int = 0 # length of phonemes
12 | throttle::Int = 30 # throttle timeout
13 | end
14 |
15 | function build_model(args)
16 | # A recurrent model which takes a token and returns a context-dependent
17 | # annotation.
18 | forward = LSTM(args.Nin, args.Nh÷2)
19 | backward = LSTM(args.Nin, args.Nh÷2)
20 | encode(tokens) = vcat.(forward.(tokens), flip(backward, tokens))
21 |
22 | alignnet = Dense(2*args.Nh, 1)
23 |
24 | # A recurrent model which takes a sequence of annotations, attends, and returns
25 | # a predicted output token.
26 | recur = LSTM(args.Nh+args.phones_len, args.Nh)
27 | toalpha = Dense(args.Nh, args.phones_len)
28 | return (forward, backward, alignnet, recur, toalpha), encode
29 | end
30 |
31 | align(s, t, alignnet) = alignnet(vcat(t, s .* Int.(ones(1, size(t, 2)))))
32 |
33 | function asoftmax(xs)
34 | xs = [exp.(x) for x in xs]
35 | s = sum(xs)
36 | return [x ./ s for x in xs]
37 | end
38 |
39 | function decode1(tokens, phone, state)
40 | # Unpack models
41 | forward, backward, alignnet, recur, toalpha = state
42 | weights = asoftmax([align(recur.state[2], t, alignnet) for t in tokens])
43 | context = sum(map((a, b) -> a .* b, weights, tokens))
44 | y = recur(vcat(Float32.(phone), context))
45 | return toalpha(y)
46 | end
47 |
48 | decode(tokens, phones, state) = [decode1(tokens, phone, state) for phone in phones]
49 |
50 | function model(x, y, state, encode)
51 | # Unpack models
52 | forward, backward, alignnet, recur, toalpha = state
53 | ŷ = decode(encode(x), y, state)
54 | reset!(state)
55 | return ŷ
56 | end
57 |
58 | function predict(s, state, encode, alphabet, phones)
59 | ts = encode(tokenise(s, alphabet))
60 | ps = Any[:start]
61 | for i = 1:50
62 | dist = softmax(decode1(ts, onehot(ps[end], phones), state))
63 | next = wsample(phones, vec(dist))
64 | next == :end && break
65 | push!(ps, next)
66 | end
67 | reset!(state)
68 | return ps[2:end]
69 | end
70 |
71 | function train(; kws...)
72 | # Initialize Hyperparameters
73 | args = Args(; kws...)
74 | @info("Loading Data...")
75 | data,alphabet,phones = getdata(args)
76 |
77 | # The full model
78 | # state = (forward, backward, alignnet, recur, toalpha)
79 | @info("Constructing Model...")
80 | state, encode = build_model(args)
81 |
82 | loss(x, yo, y) = sum(logitcrossentropy.(model(x, yo, state, encode), y))
83 | evalcb = () -> @show loss(data[500]...)
84 | opt = ADAM(args.lr)
85 | @info("Training...")
86 | Flux.train!(loss, params(state), data, opt, cb = throttle(evalcb, args.throttle))
87 | return state, encode, alphabet, phones
88 | end
89 |
90 | cd(@__DIR__)
91 | state, encode, alphabet, phones = train()
92 | @info("Testing...")
93 | predict("PHYLOGENY", state, encode, alphabet, phones)
94 |
--------------------------------------------------------------------------------
/text/phonemes/Project.toml:
--------------------------------------------------------------------------------
1 | [deps]
2 | Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
3 | Parameters = "d96e819e-fc66-5662-9728-84c9c7592b0a"
4 | StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
5 |
6 | [compat]
7 | Flux = "0.11.4"
8 | julia = "1.5"
9 |
--------------------------------------------------------------------------------
/text/treebank/Project.toml:
--------------------------------------------------------------------------------
1 | [deps]
2 | Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
3 | Parameters = "d96e819e-fc66-5662-9728-84c9c7592b0a"
4 |
5 | [compat]
6 | Flux = "0.11.4"
7 | julia = "1.5"
8 |
--------------------------------------------------------------------------------
/text/treebank/README.md:
--------------------------------------------------------------------------------
1 | # Recursive net on IMDB sentiment treebank
2 |
3 | 
4 |
5 | [Source](https://nlp.stanford.edu/~socherr/EMNLP2013_RNTN.pdf)
6 |
7 | ## Model information
8 |
9 | A recursive neural network can be used for learning tree-like structures (directed acyclic graphs). It computes compositional vector representations for prhases of variable length which are used as features for performing classification.
10 |
11 | This example uses the [Standford Sentiment Treebank dataset (SST)](https://nlp.stanford.edu/sentiment/index.html) which is often used as one of the benchmark datasets to test new language models. It has five different classes (very negative to very positive) and the goal is to perform sentiment analysis.
12 |
13 |
14 | ## Training
15 |
16 | ```shell
17 | cd text/treebank
18 | julia --project recursive.jl
19 | ```
20 |
21 | ## References
22 |
23 | * [Recursive Deep Models for Semantic Compositionality Over a Sentiment Treebank](https://nlp.stanford.edu/~socherr/EMNLP2013_RNTN.pdf)
24 |
--------------------------------------------------------------------------------
/text/treebank/data.jl:
--------------------------------------------------------------------------------
1 | using Flux
2 | using Flux: onehot
3 | using Flux.Data.Sentiment
4 | using Flux.Data: Tree, leaves
5 |
6 | function getdata()
7 | traintrees = Sentiment.train()
8 |
9 | ## Get the raw labels and phrases as separate trees.
10 | labels = map.(x -> x[1], traintrees)
11 | phrases = map.(x -> x[2], traintrees)
12 |
13 | ## All tokens in the training set.
14 | tokens = vcat(map(leaves, phrases)...)
15 |
16 | ## Count how many times each token appears.
17 | freqs = Dict{String,Int}()
18 | for t in tokens
19 | freqs[t] = get(freqs, t, 0) + 1
20 | end
21 |
22 | ## Replace singleton tokens with an "unknown" marker.
23 | ## This roughly cuts our "alphabet" of tokens in half.
24 | phrases = map.(t -> get(freqs, t, 0) == 1 ? "UNK" : t, phrases)
25 |
26 | ## Our alphabet of tokens.
27 | alphabet = unique(vcat(map(leaves, phrases)...))
28 |
29 | ## One-hot-encode our training data with respect to the alphabet.
30 | phrases_e = map.(t -> t == nothing ? t : onehot(t, alphabet), phrases)
31 | labels_e = map.(t -> onehot(t, 0:4), labels)
32 |
33 | train = map.(tuple, phrases_e, labels_e)
34 | return train, alphabet
35 | end
36 |
--------------------------------------------------------------------------------
/text/treebank/recursive.jl:
--------------------------------------------------------------------------------
1 | # # Recursive net on IMDB sentiment treebank
2 |
3 | # In this example, we create a recursive neural network to perform sentiment analysis using
4 | # IMDB data.
5 | # This type of model can be used
6 | # for learning tree-like structures (directed acyclic graphs).
7 | # It computes compositional vector representations for prhases of variable length
8 | # which are used as features for performing classification.
9 |
10 | # 
11 |
12 | # [Source](https://nlp.stanford.edu/~socherr/EMNLP2013_RNTN.pdf)
13 |
14 | # This example uses the [Standford Sentiment Treebank dataset
15 | # (SST)](https://nlp.stanford.edu/sentiment/index.html) which is often used
16 | # as one of the benchmark datasets to test new language models.
17 | # It has five different classes (very negative to very positive) and the
18 | # goal is to perform sentiment analysis.
19 |
20 | # To run this example, we need the following packages:
21 |
22 | using Flux
23 | using Flux: logitcrossentropy, throttle
24 | using Flux.Data: Tree, children, isleaf
25 | using Parameters: @with_kw
26 |
27 | # The script `data.jl` contains the function `getdata` that obtains
28 | # and process the SST dataset.
29 |
30 | include("data.jl")
31 |
32 | # We set default values for the hyperparameters:
33 |
34 | @with_kw mutable struct Args
35 | lr::Float64 = 1e-3 ## Learning rate
36 | N::Int = 300
37 | throttle::Int = 10 ## Throttle timeout
38 | end
39 |
40 | # ## Build the model
41 |
42 | # The function `train` loads the data, builds and trains the model.
43 | # For more information on how the recursive neural network works, see
44 | # section 4 of [Recursive Deep Models for Semantic Compositionality
45 | # Over a Sentiment Treebank](https://nlp.stanford.edu/~socherr/EMNLP2013_RNTN.pdf).
46 |
47 | function train(; kws...)
48 | ## Initialize HyperParameters
49 | args = Args(; kws...)
50 |
51 | ## Load data
52 | @info("Loading Data...")
53 | train_data, alphabet = getdata()
54 |
55 | @info("Constructing model....")
56 | embedding = randn(Float32, args.N, length(alphabet))
57 |
58 | @info "Size of the embedding" size(embedding)
59 |
60 | W = Dense(2*args.N, args.N, tanh)
61 | combine(a, b) = W([a; b])
62 |
63 | sentiment = Chain(Dense(args.N, 5))
64 |
65 | function forward(tree)
66 | if isleaf(tree)
67 | token, sent = tree.value
68 | phrase = embedding * token
69 | phrase, logitcrossentropy(sentiment(phrase), sent)
70 | else
71 | _, sent = tree.value
72 | c1, l1 = forward(tree[1])
73 | c2, l2 = forward(tree[2])
74 | phrase = combine(c1, c2)
75 | phrase, l1 + l2 + logitcrossentropy(sentiment(phrase), sent)
76 | end
77 | end
78 |
79 | loss(tree) = forward(tree)[2]
80 |
81 | opt = ADAM(args.lr)
82 | ps = params(embedding, W, sentiment)
83 | evalcb = () -> @show loss(train_data[1])
84 | @info("Training Model...")
85 | Flux.train!(loss, ps, zip(train_data), opt,cb = throttle(evalcb, args.throttle))
86 | end
87 |
88 | # ## Train the model
89 |
90 | cd(@__DIR__)
91 | train()
92 |
--------------------------------------------------------------------------------
/tutorials/60-minute-blitz/Project.toml:
--------------------------------------------------------------------------------
1 | [deps]
2 | Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
3 | Images = "916415d5-f1e6-5110-898d-aaa5f9f070e0"
4 | Metalhead = "dbeba491-748d-5e0e-a39e-b530a07fa0cc"
5 | Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
6 |
7 | [compat]
8 | Flux = "0.11.5"
9 | julia = "1.5"
10 |
--------------------------------------------------------------------------------
/tutorials/dataloader/Project.toml:
--------------------------------------------------------------------------------
1 | [deps]
2 | Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
3 | MLDatasets = "eb30cadb-4394-5ae3-aed4-317e484a6458"
4 | Parameters = "d96e819e-fc66-5662-9728-84c9c7592b0a"
5 |
6 | [compat]
7 | Flux = "0.13"
8 | MLDatasets = "0.6"
9 |
--------------------------------------------------------------------------------
/tutorials/dataloader/README.md:
--------------------------------------------------------------------------------
1 | # Using Flux DataLoader with image data
2 |
3 | In this example, we show how to load image data in Flux DataLoader and process it in mini-batches. We use the [DataLoader](https://fluxml.ai/Flux.jl/stable/data/dataloader/#Flux.Data.DataLoader) type to handle iteration over mini-batches of data. For this example, we load the [MNIST dataset](https://juliaml.github.io/MLDatasets.jl/latest/datasets/MNIST/) using the [MLDatasets](https://juliaml.github.io/MLDatasets.jl/latest/) package.
4 |
5 | Before we start, make sure you have installed the following packages:
6 |
7 | * [Flux](https://github.com/FluxML/Flux.jl)
8 | * [MLDatasets]((https://juliaml.github.io/MLDatasets.jl/latest/))
9 |
10 | To install these packages, run the following in the REPL:
11 |
12 | ```julia
13 | Pkg.add("Flux")
14 | Pkg.add("MLDatasets")
15 | ```
16 |
17 |
18 |
19 | Load the packages we'll need:
20 |
21 | ```julia
22 | using MLDatasets: MNIST
23 | using Flux.Data: DataLoader
24 | using Flux: onehotbatch
25 | ```
26 |
27 | ## Step1: Loading the MNIST data set
28 |
29 | We load the MNIST train and test data from MLDatasets:
30 |
31 | ```julia-repl
32 | julia> train_data = MNIST(:train)
33 | dataset MNIST:
34 | metadata => Dict{String, Any} with 3 entries
35 | split => :train
36 | features => 28×28×60000 Array{Float32, 3}
37 | targets => 60000-element Vector{Int64}
38 |
39 | julia> train_x, train_y = train_data[:];
40 |
41 | julia> test_x, test_y = MNIST(:test)[:];
42 | ```
43 |
44 |
45 | This code loads the MNIST train and test images as Float32 as well as their labels. The data set `train_x` is a 28×28×60000 multi-dimensional array. It contains 60000 elements and each one of it contains a 28x28 array. Each array represents a 28x28 image (in grayscale) of a handwritten digit. Moreover, each element of the 28x28 arrays is a pixel that represents the amount of light that it contains. On the other hand, `test_y` is a 60000 element vector and each element of this vector represents the label or actual value (0 to 9) of a handwritten digit.
46 |
47 | ## Step 2: Loading the dataset onto DataLoader
48 |
49 | Before we load the data onto a DataLoader, we need to reshape it so that it has the correct shape for Flux. For this example, the MNIST train data must be of the same dimension as our model's input and output layers.
50 |
51 | For example, if our model's input layer expects a 28x28x1 multi-dimensional array, we need to reshape the train and test data as follows:
52 |
53 | ```julia
54 | train_x = reshape(train_x, 28, 28, 1, :)
55 | test_x = reshape(test_x, 28, 28, 1, :)
56 | ```
57 |
58 |
59 | Also, the MNIST labels must be encoded as a vector with the same dimension as the number of categories (unique handwritten digits) in the data set. To encode the labels, we use the [Flux's onehotbatch](https://fluxml.ai/Flux.jl/stable/data/onehot/#Batches-1) function:
60 |
61 | ```julia
62 | train_y, test_y = onehotbatch(train_y, 0:9), onehotbatch(test_y, 0:9)
63 | ```
64 |
65 |
66 | >**Note:** For more information on other encoding methods, see [Handling Data in Flux](https://fluxml.ai/Flux.jl/stable/data/onehot/).
67 |
68 | Now, we load the train images and their labels onto a DataLoader object:
69 |
70 | ```julia
71 | data_loader = DataLoader((train_x, train_y), batchsize=128, shuffle=true)
72 | ```
73 |
74 |
75 | Notice that we set the DataLoader `batchsize` to 128. This will enable us to iterate over the data in batches of size 128. Also, by setting `shuffle=true` the DataLoader will shuffle the observations each time that iterations are re-started.
76 |
77 | ## Step 3: Iterating over the data
78 |
79 | Finally, we can iterate over the 60000 MNIST train data in mini-batches (most of them of size 128) using the Dataloader that we created in the previous step. Each element of the DataLoader is a tuple `(x, y)` in which `x` represents a 28x28x1 array and `y` a vector that encodes the corresponding label of the image.
80 |
81 | ```julia
82 | for (x, y) in data_loader
83 | @assert size(x) == (28, 28, 1, 128) || size(x) == (28, 28, 1, 96)
84 | @assert size(y) == (10, 128) || size(y) == (10, 96)
85 | ...
86 | end
87 | ```
88 |
89 |
90 |
91 |
92 | Now, we can create a model and train it using the `data_loader` we just created. For more information on building models in Flux, see [Model-Building Basics](https://fluxml.ai/Flux.jl/stable/models/basics/#Model-Building-Basics-1).
93 |
--------------------------------------------------------------------------------
/tutorials/dataloader/dataloader-image-data.jl:
--------------------------------------------------------------------------------
1 | # # An example of DataLoader using image data
2 |
3 |
4 | # In this example, we show how to load image data in Flux DataLoader and process it in mini-batches.
5 | # We use the [DataLoader](https://fluxml.ai/Flux.jl/stable/data/dataloader/#Flux.Data.DataLoader) type
6 | # to handle iteration over mini-batches of data.
7 | # Moreover, we load the [MNIST dataset](https://juliaml.github.io/MLDatasets.jl/stable/datasets/MNIST/)
8 | # using the [MLDatasets](https://juliaml.github.io/MLDatasets.jl/stable/) package.
9 |
10 |
11 | # Load the packages we need:
12 |
13 | using MLDatasets: MNIST
14 | using Flux.Data: DataLoader
15 | using Flux: onehotbatch
16 | using Parameters: @with_kw
17 |
18 | # We set a default value for the size of the mini-batches:
19 |
20 | @with_kw mutable struct Args
21 | minibath_size::Int = 128 ## Size of mini-batch
22 | end
23 |
24 | # ## Data
25 |
26 | # We create the function `get_data` to get, preprare and load the data onto a DataLoader object.
27 |
28 | function get_data(args)
29 |
30 | ## Load the MNIST train and test data from MLDatasets
31 | train_x, train_y = MNIST(:train)[:]
32 | test_x, test_y = MNIST(:test)[:]
33 |
34 | ## Reshape data to 28x28x1 multi-dimensional array
35 | train_x = reshape(train_x, 28, 28, 1, :)
36 | test_x = reshape(test_x, 28, 28, 1, :)
37 |
38 | ## Labels must be encoded as a vector with the same dimension
39 | ## as the number of categories (unique handwritten digits) in the data set
40 | train_y, test_y = onehotbatch(train_y, 0:9), onehotbatch(test_y, 0:9)
41 |
42 | ## Now, we load the train and test images and their labels onto a DataLoader object
43 | data_loader_train = DataLoader(train_x, train_y, batchsize=args.minibath_size, shuffle=true)
44 | data_loader_test = DataLoader(train_x, train_y, batchsize=args.minibath_size, shuffle=true)
45 |
46 | return data_loader_train, data_loader_test
47 |
48 | end
49 |
50 | # This function performs the following tasks:
51 | # * Loads the MNIST train and test images as Float32 as well as their labels. The dataset `train_x` is a 28×28×60000 multi-dimensional array.
52 | # It contains 60000 elements and each one of it contains a 28x28 array. Each array represents a 28x28 image (in grayscale) of a handwritten digit.
53 | # Moreover, each element of the 28x28 arrays is a pixel that represents the amount of light that it contains. On the other hand, `test_y` is a 60000 element vector and each element of this vector represents the label or actual value (0 to 9) of a handwritten digit.
54 | # * Reshapes the train and test data to a 28x28x1 multi-dimensional array.
55 | # * One-hot encodes the train and test labels. It creates a batch of one-hot vectors so we can pass the labels of the data as arguments for the loss function.
56 | # * Creates two DataLoader objects that handle data mini-batches of the size defined above.
57 |
58 | # ## Iterate over data
59 |
60 | # Now, we can iterate over the train data during the training routine we want to define.
61 |
62 | function train(; kws...)
63 | args = Args(; kws...)
64 |
65 | @info("Loading data...")
66 | data_loader_train, data_loader_test = get_data(args)
67 |
68 | ## Iterating over train data
69 | for (x, y) in data_loader_train
70 | @assert size(x) == (28, 28, 1, 128) || size(x) == (28, 28, 1, 96)
71 | @assert size(y) == (10, 128) || size(y) == (10, 96)
72 | end
73 | end
74 |
75 | # ## Run the example
76 |
77 | # We call the `train` function:
78 |
79 | cd(@__DIR__)
80 | train()
81 |
--------------------------------------------------------------------------------
/tutorials/transfer_learning/.gitignore:
--------------------------------------------------------------------------------
1 | data/
--------------------------------------------------------------------------------
/tutorials/transfer_learning/Project.toml:
--------------------------------------------------------------------------------
1 | [deps]
2 | DataAugmentation = "88a5189c-e7ff-4f85-ac6b-e6158070f02e"
3 | Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
4 | Images = "916415d5-f1e6-5110-898d-aaa5f9f070e0"
5 | Metalhead = "dbeba491-748d-5e0e-a39e-b530a07fa0cc"
6 | Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
7 | Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
8 |
9 | [compat]
10 | Flux = "0.13, 0.14"
11 | julia = "1.6"
12 |
--------------------------------------------------------------------------------
/tutorials/transfer_learning/transfer_learning.jl:
--------------------------------------------------------------------------------
1 | # load packages
2 | using Random: shuffle!
3 | import Base: length, getindex
4 | using Images
5 | using Flux, CUDA
6 | using Flux: update!
7 | using DataAugmentation
8 | using Metalhead
9 |
10 | device = CUDA.functional() ? gpu : cpu
11 | # device = cpu
12 |
13 | ## Custom DataLoader
14 | const CATS = readdir(abspath(joinpath("data", "animals", "cats")), join = true)
15 | const DOGS = readdir(abspath(joinpath("data", "animals", "dogs")), join = true)
16 | const PANDA = readdir(abspath(joinpath("data", "animals", "panda")), join = true)
17 |
18 | struct ImageContainer{T<:Vector}
19 | img::T
20 | end
21 |
22 | imgs = [CATS..., DOGS..., PANDA...]
23 | shuffle!(imgs)
24 | data = ImageContainer(imgs)
25 |
26 | length(data::ImageContainer) = length(data.img)
27 |
28 | const im_size = (224, 224)
29 | tfm = DataAugmentation.compose(ScaleKeepAspect(im_size), CenterCrop(im_size))
30 | name_to_idx = Dict{String,Int32}("cats" => 1, "dogs" => 2, "panda" => 3)
31 |
32 | const mu = [0.485f0, 0.456f0, 0.406f0]
33 | const sigma = [0.229f0, 0.224f0, 0.225f0]
34 |
35 | function getindex(data::ImageContainer, idx::Int)
36 | path = data.img[idx]
37 | _img = Images.load(path)
38 | _img = itemdata(apply(tfm, Image(_img)))
39 | img = collect(channelview(float32.(RGB.(_img))))
40 | img = permutedims((img .- mu) ./ sigma, (3, 2, 1))
41 | name = replace(path, r"(.+)\\(.+)\\(.+_\d+)\.jpg" => s"\2")
42 | y = name_to_idx[name]
43 | return img, y
44 | end
45 |
46 | # define DataLoaders
47 | const batchsize = 16
48 |
49 | dtrain = Flux.DataLoader(
50 | ImageContainer(imgs[1:2700]);
51 | batchsize,
52 | collate = true,
53 | parallel = true,
54 | )
55 | device == gpu ? dtrain = Flux.CuIterator(dtrain) : nothing
56 |
57 | deval = Flux.DataLoader(
58 | ImageContainer(imgs[2701:3000]);
59 | batchsize,
60 | collate = true,
61 | parallel = true,
62 | )
63 | device == gpu ? deval = Flux.CuIterator(deval) : nothing
64 |
65 | # Fine-tune | 🐢 mode
66 | # Load a pre-trained model:
67 | m = Metalhead.ResNet(18, pretrain = true).layers
68 | m_tot = Chain(m[1], AdaptiveMeanPool((1, 1)), Flux.flatten, Dense(512 => 3)) |> device
69 |
70 | function eval_f(m, deval)
71 | good = 0
72 | count = 0
73 | for (x, y) in deval
74 | good += sum(Flux.onecold(m(x)) .== y)
75 | count += length(y)
76 | end
77 | acc = round(good / count, digits = 4)
78 | return acc
79 | end
80 |
81 | function train_epoch!(model; opt, dtrain)
82 | for (x, y) in dtrain
83 | grads = gradient(model) do m
84 | Flux.Losses.logitcrossentropy(m(x), Flux.onehotbatch(y, 1:3))
85 | end
86 | update!(opt, model, grads[1])
87 | end
88 | end
89 |
90 | opt = Flux.setup(Flux.Optimisers.Adam(1e-5), m_tot);
91 |
92 | for iter = 1:5
93 | @time train_epoch!(m_tot; opt, dtrain)
94 | metric_train = eval_f(m_tot, dtrain)
95 | metric_eval = eval_f(m_tot, deval)
96 | @info "train" metric = metric_train
97 | @info "eval" metric = metric_eval
98 | end
99 |
100 | # Fine-tune | 🐇 mode
101 | # define models
102 | m_infer = deepcopy(m[1]) |> device
103 | m_tune = Chain(AdaptiveMeanPool((1, 1)), Flux.flatten, Dense(512 => 3)) |> device
104 |
105 | function eval_f(m_infer, m_tune, deval)
106 | good = 0
107 | count = 0
108 | for (x, y) in deval
109 | good += sum(Flux.onecold(m_tune(m_infer(x))) .== y)
110 | count += length(y)
111 | end
112 | acc = round(good / count, digits = 4)
113 | return acc
114 | end
115 |
116 | function train_epoch!(m_infer, m_tune; opt, dtrain)
117 | for (x, y) in dtrain
118 | infer = m_infer(x)
119 | grads = gradient(m_tune) do m
120 | Flux.Losses.logitcrossentropy(m(infer), Flux.onehotbatch(y, 1:3))
121 | end
122 | update!(opt, m_tune, grads[1])
123 | end
124 | end
125 |
126 | opt = Flux.setup(Flux.Optimisers.Adam(1e-3), m_tune);
127 |
128 | # training loop
129 | for iter = 1:5
130 | @time train_epoch!(m_infer, m_tune; opt, dtrain)
131 | metric_train = eval_f(m_infer, m_tune, dtrain)
132 | metric_eval = eval_f(m_infer, m_tune, deval)
133 | @info "train" metric = metric_train
134 | @info "eval" metric = metric_eval
135 | end
136 |
--------------------------------------------------------------------------------
/vision/cdcgan_mnist/Project.toml:
--------------------------------------------------------------------------------
1 | [deps]
2 | CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
3 | Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
4 | ImageMagick = "6218d12a-5da1-5696-b52f-db25d2ecc6d1"
5 | Images = "916415d5-f1e6-5110-898d-aaa5f9f070e0"
6 | MLDatasets = "eb30cadb-4394-5ae3-aed4-317e484a6458"
7 | Parameters = "d96e819e-fc66-5662-9728-84c9c7592b0a"
8 | Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
9 | Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
10 |
11 | [compat]
12 | CUDA = "2.4.0"
13 | Flux = "0.11.4"
14 | MLDatasets = "0.6"
15 | julia = "1.5"
16 |
--------------------------------------------------------------------------------
/vision/cdcgan_mnist/README.md:
--------------------------------------------------------------------------------
1 | # Conditional DC-GAN
2 |
3 |
4 |
5 | [Source](https://arxiv.org/pdf/1411.1784.pdf)
6 |
7 | ## Model Info
8 |
9 | Generative Adversarial Networks have two models, a _Generator model G(z)_ and a _Discriminator model D(x)_, in competition with each other. G tries to estimate the distribution of the training data and D tries to estimate the probability that a data sample came from the original training data and not from G. During training, the Generator learns a mapping from a _prior distribution p(z)_ to the _data space G(z)_. The discriminator D(x) produces a probability value of a given x coming from the actual training data.
10 | This model can be modified to include additional inputs, y, on which the models can be conditioned. y can be any type of additional inputs, for example, class labels. _The conditioning can be achieved by simply feeding y to both the Generator — G(z|y) and the Discriminator — D(x|y)_.
11 |
12 | ## Training
13 |
14 | ```shell
15 | cd vision/cdcgan_mnist
16 | julia --project cGAN_mnist.jl
17 | ```
18 |
19 | ## Results
20 |
21 | 1000 training steps
22 |
23 | 
24 |
25 | 3000 training steps
26 |
27 | 
28 |
29 | 5000 training steps
30 |
31 | 
32 |
33 | 10000 training steps
34 |
35 | 
36 |
37 | 11725 training steps
38 |
39 | 
40 |
41 | ## References
42 |
43 | * [Mirza, M. and Osindero, S., “Conditional Generative Adversarial Nets”, arXiv e-prints, 2014.](https://arxiv.org/pdf/1411.1784.pdf)
44 |
45 | * [Training a Conditional DC-GAN on CIFAR-10](https://medium.com/@utk.is.here/training-a-conditional-dc-gan-on-cifar-10-fce88395d610)
46 |
--------------------------------------------------------------------------------
/vision/cdcgan_mnist/output/cgan_steps_000000.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/cdcgan_mnist/output/cgan_steps_000000.png
--------------------------------------------------------------------------------
/vision/cdcgan_mnist/output/cgan_steps_001000.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/cdcgan_mnist/output/cgan_steps_001000.png
--------------------------------------------------------------------------------
/vision/cdcgan_mnist/output/cgan_steps_002000.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/cdcgan_mnist/output/cgan_steps_002000.png
--------------------------------------------------------------------------------
/vision/cdcgan_mnist/output/cgan_steps_003000.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/cdcgan_mnist/output/cgan_steps_003000.png
--------------------------------------------------------------------------------
/vision/cdcgan_mnist/output/cgan_steps_004000.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/cdcgan_mnist/output/cgan_steps_004000.png
--------------------------------------------------------------------------------
/vision/cdcgan_mnist/output/cgan_steps_005000.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/cdcgan_mnist/output/cgan_steps_005000.png
--------------------------------------------------------------------------------
/vision/cdcgan_mnist/output/cgan_steps_006000.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/cdcgan_mnist/output/cgan_steps_006000.png
--------------------------------------------------------------------------------
/vision/cdcgan_mnist/output/cgan_steps_007000.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/cdcgan_mnist/output/cgan_steps_007000.png
--------------------------------------------------------------------------------
/vision/cdcgan_mnist/output/cgan_steps_008000.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/cdcgan_mnist/output/cgan_steps_008000.png
--------------------------------------------------------------------------------
/vision/cdcgan_mnist/output/cgan_steps_009000.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/cdcgan_mnist/output/cgan_steps_009000.png
--------------------------------------------------------------------------------
/vision/cdcgan_mnist/output/cgan_steps_010000.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/cdcgan_mnist/output/cgan_steps_010000.png
--------------------------------------------------------------------------------
/vision/cdcgan_mnist/output/cgan_steps_011000.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/cdcgan_mnist/output/cgan_steps_011000.png
--------------------------------------------------------------------------------
/vision/cdcgan_mnist/output/cgan_steps_011725.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/cdcgan_mnist/output/cgan_steps_011725.png
--------------------------------------------------------------------------------
/vision/cdcgan_mnist/output/img_for_readme.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/cdcgan_mnist/output/img_for_readme.png
--------------------------------------------------------------------------------
/vision/conv_mnist/.gitignore:
--------------------------------------------------------------------------------
1 | runs/
--------------------------------------------------------------------------------
/vision/conv_mnist/Project.toml:
--------------------------------------------------------------------------------
1 | [deps]
2 | CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
3 | Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
4 | JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819"
5 | MLDatasets = "eb30cadb-4394-5ae3-aed4-317e484a6458"
6 | ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca"
7 | cuDNN = "02a925ec-e4fe-4b08-9a7e-0d78e3d38ccd"
8 |
9 | [compat]
10 | CUDA = "3, 4"
11 | Flux = "0.13.16, 0.14"
12 | JLD2 = "0.4.31"
13 | MLDatasets = "0.7"
14 | julia = "1.6"
15 |
--------------------------------------------------------------------------------
/vision/conv_mnist/README.md:
--------------------------------------------------------------------------------
1 | # LeNet-5
2 |
3 | 
4 |
5 | [Source](https://d2l.ai/chapter_convolutional-neural-networks/lenet.html)
6 |
7 | ## Model Info
8 |
9 | At a high level LeNet (LeNet-5) consists of two parts:
10 | (i) _a convolutional encoder consisting of two convolutional layers_;
11 | (ii) _a dense block consisting of three fully-connected layers_
12 |
13 | The basic units in each convolutional block are a convolutional layer, a sigmoid activation function, and a subsequent average pooling operation. Each convolutional layer uses a 5×5 kernel and a sigmoid activation function. These layers map spatially arranged inputs to a number of two-dimensional feature maps, typically increasing the number of channels. The first convolutional layer has 6 output channels, while the second has 16. Each 2×2 pooling operation (stride 2) reduces dimensionality by a factor of 4 via spatial downsampling. The convolutional block emits an output with shape given by (batch size, number of channel, height, width).
14 |
15 | >**Note:** The original architecture of Lenet5 used the sigmoind activation function. However, this is a a modernized version since it uses the RELU activation function instead.
16 |
17 | ## Training
18 |
19 | ```shell
20 | cd vision/conv_mnist
21 | julia --project conv_mnist.jl
22 | ```
23 |
24 | ## References
25 |
26 | * [Y. Lecun, L. Bottou, Y. Bengio and P. Haffner, "Gradient-based learning applied to document recognition," in Proceedings of the IEEE, vol. 86, no. 11, pp. 2278-2324, Nov. 1998, doi: 10.1109/5.726791.](http://yann.lecun.com/exdb/publis/pdf/lecun-01a.pdf)
27 |
28 | * [Aston Zhang, Zachary C. Lipton, Mu Li and Alexander J. Smola, "Dive into Deep Learning", 2020](https://d2l.ai/chapter_convolutional-neural-networks/lenet.html)
29 |
--------------------------------------------------------------------------------
/vision/conv_mnist/docs/LeNet-5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/conv_mnist/docs/LeNet-5.png
--------------------------------------------------------------------------------
/vision/convmixer_cifar10/README.md:
--------------------------------------------------------------------------------
1 | # ConvMixer
2 |
3 | 
4 |
5 | From the preprint [Patches are all you need? 🤷](https://openreview.net/forum?id=TVHS5Y4dNvM).
6 |
7 | ## Architecture
8 |
9 | The first layer of the ConvMixer architecture is a patch-splitting encoded with a strided convolution. The rest is a stack of residual convolutional networks with alternationg channel-wise and space-wise convolutions. Each layer is followed by a classical Batch Normalization and all the activations are Gaussian Linear Units (GeLU). This architecture was not designed to be state-of-the-art competitive, but to reach very high accuracies without theoretical complexities, by exploiting patch segmentation of images.
10 |
11 | The network is trained on CIFAR10 by minimizing the cross-entropy loss with the ADAM optimizer, with Weight Decay and Gradient Clipping.
12 |
13 | ## Training
14 |
15 | ```
16 | cd vision/convmixer_cifar10
17 | julia --project convmixer.jl
18 | ```
19 |
20 | The chosen parameters are the smallest for which the architecture reaches an acceptable accuracy. Training for ~100 epochs should result in a 74\% validation error. With a depth of 32 and a width of 256, one can reach 85\% without data augmentation.
21 |
22 | ## Reference
23 |
24 | - https://openreview.net/forum?id=TVHS5Y4dNvM
25 |
--------------------------------------------------------------------------------
/vision/convmixer_cifar10/convmixer.jl:
--------------------------------------------------------------------------------
1 | using Flux, MLDatasets
2 | using Flux: onehotbatch, onecold, DataLoader, Optimiser
3 | using BSON:@save,@load
4 |
5 |
6 | function ConvMixer(in_channels, kernel_size, patch_size, dim, depth, N_classes)
7 | f = Chain(
8 | Conv((patch_size, patch_size), in_channels=>dim, gelu; stride=patch_size),
9 | BatchNorm(dim),
10 | [
11 | Chain(
12 | SkipConnection(Chain(Conv((kernel_size,kernel_size), dim=>dim, gelu; pad=SamePad(), groups=dim), BatchNorm(dim)), +),
13 | Chain(Conv((1,1), dim=>dim, gelu), BatchNorm(dim))
14 | )
15 | for i in 1:depth
16 | ]...,
17 | AdaptiveMeanPool((1,1)),
18 | flatten,
19 | Dense(dim,N_classes)
20 | )
21 | return f
22 | end
23 |
24 | function get_data(batchsize; dataset = MLDatasets.CIFAR10, idxs = nothing)
25 | """
26 | idxs=nothing gives the full dataset, otherwise (for testing purposes) only the 1:idxs elements of the train set are given.
27 | """
28 | ENV["DATADEPS_ALWAYS_ACCEPT"] = "true"
29 |
30 | # Loading Dataset
31 | if idxs===nothing
32 | xtrain, ytrain = dataset(:train)[:]
33 | xtest, ytest = dataset(:test)[:]
34 | else
35 | xtrain, ytrain = dataset(:train)[1:idxs]
36 | xtest, ytest = dataset(:test)[1:Int(idxs/10)]
37 | end
38 |
39 | # Reshape Data to comply to Julia's (width, height, channels, batch_size) convention in case there are only 1 channel (eg MNIST)
40 | if ndims(xtrain)==3
41 | w = size(xtrain)[1]
42 | xtrain = reshape(xtrain, (w,w,1,:))
43 | xtest = reshape(xtest, (w,w,1,:))
44 | end
45 |
46 | ytrain, ytest = onehotbatch(ytrain, 0:9), onehotbatch(ytest, 0:9)
47 |
48 | train_loader = DataLoader((xtrain, ytrain), batchsize=batchsize, shuffle=true)
49 | test_loader = DataLoader((xtest, ytest), batchsize=batchsize)
50 |
51 | return train_loader, test_loader
52 | end
53 |
54 | function create_loss_function(dataloader, device)
55 |
56 | function loss(model)
57 | n = 0
58 | l = 0.0f0
59 | acc = 0.0f0
60 |
61 | for (x,y) in dataloader
62 | x,y = x |> device, y |> device
63 | z = model(x)
64 | l += Flux.logitcrossentropy(z, y, agg=sum)
65 | acc += sum(onecold(z).==onecold(y))
66 | n += size(x)[end]
67 | end
68 | l / n, acc / n
69 | end
70 |
71 | return loss
72 |
73 | end
74 |
75 |
76 | function train(n_epochs=100)
77 |
78 | #params: warning, the training can be long with these params
79 | train_loader, test_loader = get_data(128)
80 | η = 3e-4
81 | in_channel = 3
82 | patch_size = 2
83 | kernel_size = 7
84 | dim = 128
85 | dimPL = 2
86 | depth = 18
87 | use_cuda = true
88 |
89 | #logging the losses
90 | train_save = zeros(n_epochs, 2)
91 | test_save = zeros(n_epochs, 2)
92 |
93 | if use_cuda
94 | device = gpu
95 | @info "Training on GPU"
96 | else
97 | device = cpu
98 | @info "Training on CPU"
99 | end
100 |
101 | train_loss_fn = create_loss_function(train_loader, device)
102 | test_loss_fn = create_loss_function(test_loader, device)
103 |
104 | model = ConvMixer(in_channel, kernel_size, patch_size, dim, depth, 10) |> device
105 |
106 | ps = params(model)
107 | opt = Optimiser(
108 | WeightDecay(1f-3),
109 | ClipNorm(1.0),
110 | ADAM(η)
111 | )
112 |
113 | for epoch in 1:n_epochs
114 | for (x,y) in train_loader
115 | x,y = x|>device, y|>device
116 | gr = gradient(()->Flux.logitcrossentropy(model(x), y, agg=sum), ps)
117 | Flux.Optimise.update!(opt, ps, gr)
118 | end
119 |
120 | #logging
121 | train_loss, train_acc = train_loss_fn(model) |> cpu
122 | test_loss, test_acc = test_loss_fn(model) |> cpu
123 | train_save[epoch,:] = [train_loss, train_acc]
124 | test_save[epoch,:] = [test_loss, test_acc]
125 |
126 | if epoch%5==0
127 | @info "Epoch $epoch : Train loss = $train_loss || Validation accuracy = $test_acc."
128 | end
129 |
130 | end
131 |
132 | model = model |> cpu
133 | @save "model.bson" model
134 | @save "losses.bson" train_save test_save
135 | end
136 |
137 | if abspath(PROGRAM_FILE) == @__FILE__
138 | train()
139 | end
--------------------------------------------------------------------------------
/vision/convmixer_cifar10/doc/convmixerarchi.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/convmixer_cifar10/doc/convmixerarchi.png
--------------------------------------------------------------------------------
/vision/dcgan_mnist/.gitignore:
--------------------------------------------------------------------------------
1 | *.png
--------------------------------------------------------------------------------
/vision/dcgan_mnist/Project.toml:
--------------------------------------------------------------------------------
1 | [deps]
2 | CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
3 | Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
4 | ImageMagick = "6218d12a-5da1-5696-b52f-db25d2ecc6d1"
5 | Images = "916415d5-f1e6-5110-898d-aaa5f9f070e0"
6 | MLDatasets = "eb30cadb-4394-5ae3-aed4-317e484a6458"
7 | Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
8 |
9 | [compat]
10 | Flux = "0.13.9, 0.14"
11 | MLDatasets = "0.7"
12 | julia = "1.6"
13 |
--------------------------------------------------------------------------------
/vision/dcgan_mnist/README.md:
--------------------------------------------------------------------------------
1 | # Deep Convolutional GAN (DC-GAN)
2 |
3 | 
4 | [Source](https://gluon.mxnet.io/chapter14_generative-adversarial-networks/dcgan.html)
5 |
6 | ## Model Info
7 |
8 | A DC-GAN is a direct extension of the GAN, except that it explicitly uses convolutional and transposed convolutional layers in the discriminator and generator, respectively. The discriminator is made up of strided convolutional layers, batch norm layers, and LeakyReLU activations. The generator is comprised of transposed convolutional layers, batch norm layers, and ReLU activations.
9 |
10 | ## Training
11 |
12 | ```script
13 | cd vision/dcgan_mnist
14 | julia --project dcgan_mnist.jl
15 | ```
16 |
17 | ## Results
18 |
19 | 2000 training steps
20 |
21 | 
22 |
23 | 5000 training steps
24 |
25 | 
26 |
27 | 8000 training steps
28 |
29 | 
30 |
31 | 9380 training steps
32 |
33 | 
34 |
35 | ## References
36 |
37 | * [Radford, A. et al.: Unsupervised Representation Learning with Deep Convolutional Generative Adversarial Networks, http://arxiv.org/abs/1511.06434, (2015).](https://arxiv.org/pdf/1511.06434v2.pdf)
38 |
39 | * [pytorch.org/tutorials/beginner/dcgan_faces_tutorial](https://pytorch.org/tutorials/beginner/dcgan_faces_tutorial.html)
40 |
--------------------------------------------------------------------------------
/vision/dcgan_mnist/dcgan_mnist.jl:
--------------------------------------------------------------------------------
1 | using Base.Iterators: partition
2 | using Flux
3 | using Flux.Optimise: update!
4 | using Flux.Losses: logitbinarycrossentropy
5 | using Images
6 | using MLDatasets
7 | using Statistics
8 | using Printf
9 | using Random
10 | using CUDA
11 | CUDA.allowscalar(false)
12 |
13 | Base.@kwdef struct HyperParams
14 | batch_size::Int = 128
15 | latent_dim::Int = 100
16 | epochs::Int = 20
17 | verbose_freq::Int = 1000
18 | output_x::Int = 6
19 | output_y::Int = 6
20 | lr_dscr::Float32 = 0.0002
21 | lr_gen::Float32 = 0.0002
22 | end
23 |
24 | function create_output_image(gen, fixed_noise, hparams)
25 | fake_images = @. cpu(gen(fixed_noise))
26 | image_array = reduce(vcat, reduce.(hcat, partition(fake_images, hparams.output_y)))
27 | image_array = permutedims(dropdims(image_array; dims=(3, 4)), (2, 1))
28 | image_array = @. Gray(image_array + 1f0) / 2f0
29 | return image_array
30 | end
31 |
32 |
33 | # weight initialization as given in the paper https://arxiv.org/abs/1511.06434
34 | dcgan_init(shape...) = randn(Float32, shape...) * 0.02f0
35 |
36 | function Discriminator()
37 | return Chain(
38 | Conv((4, 4), 1 => 64; stride = 2, pad = 1, init = dcgan_init),
39 | x->leakyrelu.(x, 0.2f0),
40 | Dropout(0.25),
41 | Conv((4, 4), 64 => 128; stride = 2, pad = 1, init = dcgan_init),
42 | x->leakyrelu.(x, 0.2f0),
43 | Dropout(0.25),
44 | x->reshape(x, 7 * 7 * 128, :),
45 | Dense(7 * 7 * 128, 1))
46 | end
47 |
48 | function Generator(latent_dim::Int)
49 | return Chain(
50 | Dense(latent_dim, 7 * 7 * 256),
51 | BatchNorm(7 * 7 * 256, relu),
52 | x->reshape(x, 7, 7, 256, :),
53 | ConvTranspose((5, 5), 256 => 128; stride = 1, pad = 2, init = dcgan_init),
54 | BatchNorm(128, relu),
55 | ConvTranspose((4, 4), 128 => 64; stride = 2, pad = 1, init = dcgan_init),
56 | BatchNorm(64, relu),
57 | ConvTranspose((4, 4), 64 => 1; stride = 2, pad = 1, init = dcgan_init),
58 | x -> tanh.(x)
59 | )
60 | end
61 |
62 | # Loss functions
63 | function discriminator_loss(real_output, fake_output)
64 | real_loss = logitbinarycrossentropy(real_output, 1)
65 | fake_loss = logitbinarycrossentropy(fake_output, 0)
66 | return real_loss + fake_loss
67 | end
68 |
69 | generator_loss(fake_output) = logitbinarycrossentropy(fake_output, 1)
70 |
71 | function train_discriminator!(gen, dscr, x, opt_dscr, hparams)
72 | noise = randn!(similar(x, (hparams.latent_dim, hparams.batch_size)))
73 | fake_input = gen(noise)
74 | # Taking gradient
75 | loss, grads = Flux.withgradient(dscr) do dscr
76 | discriminator_loss(dscr(x), dscr(fake_input))
77 | end
78 | update!(opt_dscr, dscr, grads[1])
79 | return loss
80 | end
81 |
82 | function train_generator!(gen, dscr, x, opt_gen, hparams)
83 | noise = randn!(similar(x, (hparams.latent_dim, hparams.batch_size)))
84 | # Taking gradient
85 | loss, grads = Flux.withgradient(gen) do gen
86 | generator_loss(dscr(gen(noise)))
87 | end
88 | update!(opt_gen, gen, grads[1])
89 | return loss
90 | end
91 |
92 | function train(; kws...)
93 | # Model Parameters
94 | hparams = HyperParams(; kws...)
95 |
96 | if CUDA.functional()
97 | @info "Training on GPU"
98 | else
99 | @warn "Training on CPU, this will be very slow!" # 20 mins/epoch
100 | end
101 |
102 | # Load MNIST dataset
103 | images = MLDatasets.MNIST(:train).features
104 | # Normalize to [-1, 1]
105 | image_tensor = reshape(@.(2f0 * images - 1f0), 28, 28, 1, :)
106 | # Partition into batches
107 | data = [image_tensor[:, :, :, r] |> gpu for r in partition(1:60000, hparams.batch_size)]
108 |
109 | fixed_noise = [randn(Float32, hparams.latent_dim, 1) |> gpu for _=1:hparams.output_x*hparams.output_y]
110 |
111 | # Discriminator
112 | dscr = Discriminator() |> gpu
113 |
114 | # Generator
115 | gen = Generator(hparams.latent_dim) |> gpu
116 |
117 | # Optimizers
118 | opt_dscr = Flux.setup(Adam(hparams.lr_dscr), dscr)
119 | opt_gen = Flux.setup(Adam(hparams.lr_gen), gen)
120 |
121 | # Training
122 | train_steps = 0
123 | for ep in 1:hparams.epochs
124 | @info "Epoch $ep"
125 | for x in data
126 | # Update discriminator and generator
127 | loss_dscr = train_discriminator!(gen, dscr, x, opt_dscr, hparams)
128 | loss_gen = train_generator!(gen, dscr, x, opt_gen, hparams)
129 |
130 | if train_steps % hparams.verbose_freq == 0
131 | @info("Train step $(train_steps), Discriminator loss = $(loss_dscr), Generator loss = $(loss_gen)")
132 | # Save generated fake image
133 | output_image = create_output_image(gen, fixed_noise, hparams)
134 | save(@sprintf("output/dcgan_steps_%06d.png", train_steps), output_image)
135 | end
136 | train_steps += 1
137 | end
138 | end
139 |
140 | output_image = create_output_image(gen, fixed_noise, hparams)
141 | save(@sprintf("output/dcgan_steps_%06d.png", train_steps), output_image)
142 | end
143 |
144 | if abspath(PROGRAM_FILE) == @__FILE__
145 | train()
146 | end
147 |
148 |
--------------------------------------------------------------------------------
/vision/dcgan_mnist/output/dcgan_generator_discriminator.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/dcgan_mnist/output/dcgan_generator_discriminator.png
--------------------------------------------------------------------------------
/vision/dcgan_mnist/output/dcgan_steps_000000.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/dcgan_mnist/output/dcgan_steps_000000.png
--------------------------------------------------------------------------------
/vision/dcgan_mnist/output/dcgan_steps_001000.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/dcgan_mnist/output/dcgan_steps_001000.png
--------------------------------------------------------------------------------
/vision/dcgan_mnist/output/dcgan_steps_002000.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/dcgan_mnist/output/dcgan_steps_002000.png
--------------------------------------------------------------------------------
/vision/dcgan_mnist/output/dcgan_steps_003000.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/dcgan_mnist/output/dcgan_steps_003000.png
--------------------------------------------------------------------------------
/vision/dcgan_mnist/output/dcgan_steps_004000.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/dcgan_mnist/output/dcgan_steps_004000.png
--------------------------------------------------------------------------------
/vision/dcgan_mnist/output/dcgan_steps_005000.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/dcgan_mnist/output/dcgan_steps_005000.png
--------------------------------------------------------------------------------
/vision/dcgan_mnist/output/dcgan_steps_006000.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/dcgan_mnist/output/dcgan_steps_006000.png
--------------------------------------------------------------------------------
/vision/dcgan_mnist/output/dcgan_steps_007000.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/dcgan_mnist/output/dcgan_steps_007000.png
--------------------------------------------------------------------------------
/vision/dcgan_mnist/output/dcgan_steps_008000.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/dcgan_mnist/output/dcgan_steps_008000.png
--------------------------------------------------------------------------------
/vision/dcgan_mnist/output/dcgan_steps_009000.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/dcgan_mnist/output/dcgan_steps_009000.png
--------------------------------------------------------------------------------
/vision/dcgan_mnist/output/dcgan_steps_009380.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/dcgan_mnist/output/dcgan_steps_009380.png
--------------------------------------------------------------------------------
/vision/diffusion_mnist/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/diffusion_mnist/.DS_Store
--------------------------------------------------------------------------------
/vision/diffusion_mnist/Project.toml:
--------------------------------------------------------------------------------
1 | [deps]
2 | BSON = "fbb218c0-5317-5bc6-957e-2ee96dd4b1f0"
3 | CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
4 | DrWatson = "634d3b9d-ee7a-5ddf-bec9-22491ea816e1"
5 | FileIO = "5789e2e9-d7fb-5bc7-8068-2c6fae9b9549"
6 | Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
7 | Images = "916415d5-f1e6-5110-898d-aaa5f9f070e0"
8 | Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
9 | MLDatasets = "eb30cadb-4394-5ae3-aed4-317e484a6458"
10 | Parameters = "d96e819e-fc66-5662-9728-84c9c7592b0a"
11 | ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca"
12 | Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
13 | TensorBoardLogger = "899adc3e-224a-11e9-021f-63837185c80f"
14 | DifferentialEquations = "0c46a032-eb83-5123-abaf-570d42b7fbaa"
15 |
16 | [compat]
17 | CUDA = "3"
18 | Flux = "0.13"
19 | julia = "1.6"
20 | FileIO = "1"
21 | BSON = "0.3"
22 | DrWatson = "2"
23 | Images = "0.25"
24 | MLDatasets = "0.6"
25 | Parameters = "0.12"
26 | ProgressMeter = "1"
27 | TensorBoardLogger = "0.1"
--------------------------------------------------------------------------------
/vision/diffusion_mnist/README.md:
--------------------------------------------------------------------------------
1 | # Score-Based Generative Modeling (Diffusion Model)
2 | 
3 | [Source](https://yang-song.github.io/blog/2021/score/#score-based-generative-modeling-with-stochastic-differential-equations-sdes)
4 |
5 | ## Model Info
6 | *Score-Based Generative Modeling* is a framework to learn stochastic dynamics that transitions one distribution to another. In our case, we will be modeling the transition from the MNIST image distribution into random noise. The general idea is to learn the forward dynamics (*score function* or *gradients*) of the image's distribution being slowly evolved into random gaussian noise through a *diffusion process*. This is shown in the image above with the **Forward Stochastic Differential Equation (SDE)**. With estimates of how the forward dynamics works, we can then reverse the process allowing us to create realistic looking images from pure noise! This is shown with the **Reverse SDE** in the graphic above.
7 |
8 | In contrast to likelihood based models, *Score-Based Generative Modeling* depends only on the *score function*, $\nabla_x \log{p(x)}$ which is minimized through *score matching*. **Concretely, this tutorial will be using a UNet architecture and score matching loss function to learn this score function**. After this gradient is estimated, we can then draw samples from the MNIST dataset using **Langevin Dynamics** of the reverse SDE.
9 |
10 | ### More Model Info
11 | A much more in-depth walkthrough of the theory is available [here](https://yang-song.github.io/blog/2021/score/) from the original author, [Yang Song](https://yang-song.github.io/). I highly recommend this blog to become more familiar with the concepts before diving into the code!
12 |
13 | ### Pytorch Equivalent Code
14 | For those coming from Python, [here](https://colab.research.google.com/drive/120kYYBOVa1i0TD85RjlEkFjaWDxSFUx3?usp=sharing) is the equivalent Pytorch code that was used to create this Julia tutorial.
15 |
16 | ## Training
17 | ```shell
18 | cd vision/diffusion_mnist
19 | julia --project diffusion_mnist.jl
20 | ```
21 |
22 | ## Visualization
23 | ```shell
24 | cd vision/diffusion_mnist
25 | julia --project diffusion_plot.jl
26 | ```
27 | Visualizations are sampled with either the equations used in the [original PyTorch tutorial](https://colab.research.google.com/drive/120kYYBOVa1i0TD85RjlEkFjaWDxSFUx3?usp=sharing) or with the help of [`DifferentialEquations.jl`](https://diffeq.sciml.ai/stable/).
28 | | Sampled Noise | Euler-Maruyama (EM) Sampler | Predictor Corrector Sampler |
29 | | ----------- | ----------- | ----------- |
30 | |  |  |  |
31 |
32 | | Euler-Maruyama (`DifferentialEquations.jl`) | Probability Flow ODE (`DifferentialEquations.jl`) |
33 | | ----------- | ----------- |
34 | |  |  |
35 |
36 | And since the `DifferentialEquations.jl`'s `solve()` returns the entire sample path, it is easy to visualize the reverse-time SDE sampling process as an `animation`:
37 |
38 |
39 | | Euler-Maruyama | Probability Flow ODE |
40 | | ----------- | ----------- |
41 | |  |  |
42 |
43 | And finally, we can visualize the components of the image, `𝙭`, as a function of `t ∈ [1, ϵ]`. As noted by the authors, the Probability Flow ODE captures the same
44 | marginal probability density 𝒫ₜ(𝙭) as it's stochastic counterpart.
45 | | | |
46 | | ----------- | ----------- |
47 | |  |  |
48 |
49 | The lines, `x(t) = ± σᵗ`, are shown for referenece.
50 |
51 | ## References
52 |
53 | * [Yang Song. “Generative Modeling by Estimating Gradients of the Data Distribution.” Blog Post, 2021](https://yang-song.github.io/blog/2021/score/)
54 |
55 | * [Yang Song, Jascha Sohl-Dickstein, Diederik P. Kingma, Abhishek Kumar, Stefano Ermon, and Ben Poole. "Score-Based Generative Modeling Through
56 | Stochastic Differential Equations". ArXiv Preprint, 2021](https://arxiv.org/pdf/2011.13456.pdf)
57 |
58 |
--------------------------------------------------------------------------------
/vision/diffusion_mnist/diffusion_test.jl:
--------------------------------------------------------------------------------
1 | include("diffusion_mnist.jl")
2 |
3 | function test()
4 | @info "Begin tests for diffusion_mnist.jl"
5 |
6 | gaussfourierproj_test = GaussianFourierProjection(32, 20.0f0)
7 | # GaussianFourierProjection(embed_dim, ⋅)(batch) => (embed_dim, batch)
8 | @assert gaussfourierproj_test(randn(Float32, 32)) |> size == (32, 32)
9 | # W is fixed wrt. repeated calls
10 | @assert gaussfourierproj_test(
11 | ones(Float32, 32)) ==
12 | gaussfourierproj_test(ones(Float32, 32)
13 | )
14 | # W is not trainable
15 | @assert params(gaussfourierproj_test) == Flux.Params([])
16 |
17 | @assert expand_dims(ones(Float32, 32), 3) |> size == (1, 1, 1, 32)
18 |
19 | unet_test = UNet()
20 | x_test = randn(Float32, (28, 28, 1, 32))
21 | t_test = rand(Float32, 32)
22 | score_test = unet_test(x_test, t_test)
23 | @assert score_test |> size == (28, 28, 1, 32)
24 | @assert typeof(score_test) == Array{Float32,4}
25 |
26 | # Test gradient computation
27 | grad_test = gradient(
28 | () -> model_loss(unet_test, x_test), params(unet_test)
29 | )
30 | @assert grad_test.params == params(unet_test)
31 |
32 | train(save_path="test", epochs=1, batch_size=4096, tblogger=false)
33 |
34 | @info "Tests complete for diffusion_mnist.jl"
35 | end
36 |
37 | if abspath(PROGRAM_FILE) == @__FILE__
38 | test()
39 | end
--------------------------------------------------------------------------------
/vision/diffusion_mnist/docs/diff_eq_em.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/diffusion_mnist/docs/diff_eq_em.gif
--------------------------------------------------------------------------------
/vision/diffusion_mnist/docs/diff_eq_em_images.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/diffusion_mnist/docs/diff_eq_em_images.jpeg
--------------------------------------------------------------------------------
/vision/diffusion_mnist/docs/diff_eq_em_plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/diffusion_mnist/docs/diff_eq_em_plot.png
--------------------------------------------------------------------------------
/vision/diffusion_mnist/docs/diff_eq_ode.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/diffusion_mnist/docs/diff_eq_ode.gif
--------------------------------------------------------------------------------
/vision/diffusion_mnist/docs/diff_eq_ode_images.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/diffusion_mnist/docs/diff_eq_ode_images.jpeg
--------------------------------------------------------------------------------
/vision/diffusion_mnist/docs/diff_eq_ode_plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/diffusion_mnist/docs/diff_eq_ode_plot.png
--------------------------------------------------------------------------------
/vision/diffusion_mnist/docs/em_images.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/diffusion_mnist/docs/em_images.jpeg
--------------------------------------------------------------------------------
/vision/diffusion_mnist/docs/loss.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/diffusion_mnist/docs/loss.png
--------------------------------------------------------------------------------
/vision/diffusion_mnist/docs/pc_images.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/diffusion_mnist/docs/pc_images.jpeg
--------------------------------------------------------------------------------
/vision/diffusion_mnist/docs/sampled_noise.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/diffusion_mnist/docs/sampled_noise.jpeg
--------------------------------------------------------------------------------
/vision/diffusion_mnist/docs/sde.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/diffusion_mnist/docs/sde.png
--------------------------------------------------------------------------------
/vision/diffusion_mnist/docs/unet.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/diffusion_mnist/docs/unet.png
--------------------------------------------------------------------------------
/vision/mlp_mnist/.gitignore:
--------------------------------------------------------------------------------
1 | *.bson
2 | *.png
3 |
--------------------------------------------------------------------------------
/vision/mlp_mnist/Project.toml:
--------------------------------------------------------------------------------
1 | [deps]
2 | Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
3 | ImageCore = "a09fc81d-aa75-5fe9-8630-4744c3626534"
4 | ImageInTerminal = "d8c32880-2388-543b-8c61-d9f865259254"
5 | MLDatasets = "eb30cadb-4394-5ae3-aed4-317e484a6458"
6 | Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
7 | Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
8 |
9 | [compat]
10 | Flux = "0.13.9, 0.14"
11 | MLDatasets = "0.7"
12 | julia = "1.6"
13 |
--------------------------------------------------------------------------------
/vision/mlp_mnist/README.md:
--------------------------------------------------------------------------------
1 | # Multilayer Perceptron (MLP)
2 |
3 | 
4 |
5 | [Source](http://d2l.ai/chapter_multilayer-perceptrons/mlp.html)
6 |
7 | ## Model Info
8 |
9 | A [multi-layer perceptron](https://en.wikipedia.org/wiki/Multilayer_perceptron) (MLP) consists of at least three sets of of nodes: an input layer, one or more hidden layer and an output layer. Each node except for the input node is a neuron that uses a nonlinear activation function. The multiple layers and non-linearities allow an MLP to distinguish data that is not linearly separable once trained.
10 |
11 | In this example, we create a MLP that classifies handwritten digits using the [MNIST dataset](http://yann.lecun.com/exdb/mnist/).
12 | Our model uses the simplest [Flux layers](http://fluxml.ai/Flux.jl/stable/models/layers/), namely `Dense` and `Chain`.
13 | Since it uses [softmax](https://en.wikipedia.org/wiki/Softmax_function) on its outputs, and [`crossentropy`](http://fluxml.ai/Flux.jl/stable/models/losses/#Flux.Losses.crossentropy) as the loss function.
14 |
15 | For simplicity this model does not use a graphics card, since an ordinary CPU is fast enough.
16 | See for example the [LeNet convolutional network](https://github.com/FluxML/model-zoo/tree/master/vision/conv_mnist) for GPU usage.
17 |
18 | ## Training
19 |
20 | You can copy and paste the example into the Julia REPL to see what each part does.
21 | Or you can run it all at once from the terminal, like this:
22 |
23 | ```script
24 | cd vision/mlp_mnist
25 | julia --project mlp_mnist.jl
26 | ```
27 |
28 | ## Reference
29 |
30 | * [Aston Zhang, Zachary C. Lipton, Mu Li and Alexander J. Smola, "Dive into Deep Learning", 2020](http://d2l.ai/chapter_multilayer-perceptrons/mlp.html)
31 | * [3Blue1Brown Neural networks videos](https://www.youtube.com/watch?v=aircAruvnKk&list=PLZHQObOWTQDNU6R1_67000Dx_ZCJB-3pi)
32 | * [Neural Networks and Deep Learning](http://neuralnetworksanddeeplearning.com/)
33 |
34 |
--------------------------------------------------------------------------------
/vision/mlp_mnist/mlp_mnist.jl:
--------------------------------------------------------------------------------
1 | # Simple multi-layer perceptron, for the MNIST hand-written digits.
2 | # This example does not use a GPU, it's small enough not to need one.
3 |
4 | using Flux, MLDatasets, Statistics
5 |
6 | # Our model is very simple: Its one "hidden layer" has 32 "neurons" each connected to every input pixel.
7 | # Each has a sigmoid nonlinearity, and is connected to every "neuron" in the output layer.
8 | # Finally, softmax produces probabilities, i.e. positive numbers which add up to 1:
9 |
10 | model = Chain(Dense(28^2 => 32, sigmoid), Dense(32 => 10), softmax)
11 |
12 | p1 = model(rand(Float32, 28^2)) # run model on random data shaped like an image
13 |
14 | @show sum(p1) ≈1;
15 |
16 | p3 = model(rand(Float32, 28^2, 3)) # ...or on a batch of 3 fake, random "images"
17 |
18 | @show sum(p3; dims=1); # all approx 1. Last dim is batch dim.
19 |
20 | #===== DATA =====#
21 |
22 | # Calling MLDatasets.MNIST() will dowload the dataset if necessary,
23 | # and return a struct containing it.
24 | # It takes a few seconds to read from disk each time, so do this once:
25 |
26 | train_data = MLDatasets.MNIST() # i.e. split=:train
27 | test_data = MLDatasets.MNIST(split=:test)
28 |
29 | # train_data.features is a 28×28×60000 Array{Float32, 3} of the images.
30 | # We need a 2D array for our model. Let's combine the reshape needed with
31 | # other pre-processing, in a function:
32 |
33 | function simple_loader(data::MNIST; batchsize::Int=64)
34 | x2dim = reshape(data.features, 28^2, :)
35 | yhot = Flux.onehotbatch(data.targets, 0:9)
36 | Flux.DataLoader((x2dim, yhot); batchsize, shuffle=true)
37 | end
38 |
39 | # train_data.targets is a 60000-element Vector{Int}, of labels from 0 to 9.
40 | # Flux.onehotbatch([0,1,9], 0:9) makes a matrix of 0 and 1.
41 |
42 | simple_loader(train_data) # returns a DataLoader, with first element a tuple like this:
43 |
44 | x1, y1 = first(simple_loader(train_data)); # (784×64 Matrix{Float32}, 10×64 OneHotMatrix)
45 |
46 | model(x1) # x1 is the right shape for our model
47 |
48 | y1 # y1 is the same shape as the model output.
49 |
50 | @show Flux.crossentropy(model(x1), y1); # This will be our loss function
51 |
52 | #===== ACCURACY =====#
53 |
54 | # We're going to log accuracy and loss during training. There's no advantage to
55 | # calculating these on minibatches, since MNIST is small enough to do it at once.
56 |
57 | function simple_accuracy(model, data::MNIST=test_data)
58 | (x, y) = only(simple_loader(data; batchsize=length(data))) # make one big batch
59 | y_hat = model(x)
60 | iscorrect = Flux.onecold(y_hat) .== Flux.onecold(y) # BitVector
61 | acc = round(100 * mean(iscorrect); digits=2)
62 | end
63 |
64 | @show simple_accuracy(model); # accuracy about 10%, on training data, before training!
65 |
66 | #===== TRAINING =====#
67 |
68 | # Make a dataloader using the desired batchsize:
69 |
70 | train_loader = simple_loader(train_data, batchsize = 256)
71 |
72 | # Initialise storage needed for the Adam optimiser, with our chosen learning rate:
73 |
74 | opt_state = Flux.setup(Adam(3e-4), model);
75 |
76 | # Then train for 30 epochs, printing out details as we go:
77 |
78 | for epoch in 1:30
79 | loss = 0.0
80 | for (x, y) in train_loader
81 | # Compute the loss and the gradients:
82 | l, gs = Flux.withgradient(m -> Flux.crossentropy(m(x), y), model)
83 | # Update the model parameters (and the Adam momenta):
84 | Flux.update!(opt_state, model, gs[1])
85 | # Accumulate the mean loss, just for logging:
86 | loss += l / length(train_loader)
87 | end
88 |
89 | if mod(epoch, 2) == 1
90 | # Report on train and test, only every 2nd epoch:
91 | train_acc = simple_accuracy(model, train_data)
92 | test_acc = simple_accuracy(model, test_data)
93 | @info "After epoch = $epoch" loss train_acc test_acc
94 | end
95 | end
96 |
97 | # This should get to about 94% accuracy.
98 | # To do better, try using Dense(784 => 64, relu) instead.
99 |
100 | #===== INSPECTION =====#
101 |
102 | using ImageCore, ImageInTerminal
103 |
104 | xtest, ytest = only(simple_loader(test_data, batchsize=length(test_data)));
105 |
106 | # There are many ways to look at images, you won't need ImageInTerminal if working in a notebook.
107 | # ImageCore.Gray is a special type, which interprets numbers between 0.0 and 1.0 as shades:
108 |
109 | reshape(xtest[:,33], 28, 28) .|> Gray |> transpose
110 |
111 | @show Flux.onecold(ytest, 0:9)[33]; # true label, should match!
112 |
113 | # Now we can compare the model's probabilities, for the same input.
114 | # This should be highest at the same number:
115 |
116 | p10 = (0:9) .=> model(xtest[:, 33]);
117 | display(p10)
118 |
--------------------------------------------------------------------------------
/vision/spatial_transformer/Project.toml:
--------------------------------------------------------------------------------
1 | [deps]
2 | CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
3 | DrWatson = "634d3b9d-ee7a-5ddf-bec9-22491ea816e1"
4 | Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
5 | ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
6 | MLDatasets = "eb30cadb-4394-5ae3-aed4-317e484a6458"
7 | NNlib = "872c559c-99b0-510c-b3b7-b6c96a88d5cd"
8 | NNlibCUDA = "a00861dc-f156-4864-bf3c-e6376f28a68d"
9 | Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
10 | ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca"
11 | Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
12 | ZygoteRules = "700de1a5-db45-46bc-99cf-38207098b444"
13 |
14 | [compat]
15 | DrWatson = "2.7.6"
16 | Flux = "0.13.9, 0.14"
17 | MLDatasets = "0.7.6"
18 | julia = "1.7.0"
19 |
--------------------------------------------------------------------------------
/vision/spatial_transformer/README.md:
--------------------------------------------------------------------------------
1 | # Spatial Transformer Network
2 |
3 | In this tutorial we'll build a spatial transformer network that will transform MNIST
4 | digits for classification by a CNN:
5 |
6 | * [Spatial Transformer Networks](https://proceedings.neurips.cc/paper/2015/hash/33ceb07bf4eeb3da587e268d663aba1a-Abstract.html)
7 |
8 | Spatial transformer networks perform differentiable affine transformations on images, in this tutorial for the purpose of aiding classification of MNIST digits.
9 |
10 | 
11 |
12 |
13 | 
--------------------------------------------------------------------------------
/vision/spatial_transformer/images/stn_example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/spatial_transformer/images/stn_example.png
--------------------------------------------------------------------------------
/vision/vae_mnist/.gitignore:
--------------------------------------------------------------------------------
1 | *.jld2
2 |
--------------------------------------------------------------------------------
/vision/vae_mnist/Project.toml:
--------------------------------------------------------------------------------
1 | [deps]
2 | CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
3 | DrWatson = "634d3b9d-ee7a-5ddf-bec9-22491ea816e1"
4 | Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
5 | ImageIO = "82e4d734-157c-48bb-816b-45c225c6df19"
6 | ImageMagick = "6218d12a-5da1-5696-b52f-db25d2ecc6d1"
7 | Images = "916415d5-f1e6-5110-898d-aaa5f9f070e0"
8 | JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819"
9 | MLDatasets = "eb30cadb-4394-5ae3-aed4-317e484a6458"
10 | MLUtils = "f1d291b0-491e-4a28-83b9-f70985020b54"
11 | Optimisers = "3bd65402-5787-11e9-1adc-39752487f4e2"
12 | Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
13 | ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca"
14 | TensorBoardLogger = "899adc3e-224a-11e9-021f-63837185c80f"
15 | cuDNN = "02a925ec-e4fe-4b08-9a7e-0d78e3d38ccd"
16 |
17 | [compat]
18 | CUDA = "5"
19 | Flux = "0.14"
20 | MLDatasets = "0.7"
21 | julia = "1.9"
22 |
--------------------------------------------------------------------------------
/vision/vae_mnist/README.md:
--------------------------------------------------------------------------------
1 | # Variational Autoencoder (VAE)
2 |
3 |
4 |
5 | [Source](https://learnopencv.com/variational-autoencoder-in-tensorflow/#:~:text=Variational)
6 |
7 | ## Model Info
8 |
9 | Variational Autoencoder (VAE) came into existence in 2013, when Kingma et al. published the paper [Auto-Encoding Variational Bayes](https://arxiv.org/pdf/1312.6114.pdf). This paper was an extension of the original idea of Auto-Encoder primarily to learn the distribution of the data. VAEs are devised within the variational inference framework and approximately model the data distribution after training, making it computationally cheap to generate new samples.
10 |
11 | In VAE the idea is to encode the input as a probability distribution rather than a point estimate as in vanilla auto-encoder. Then VAE uses a decoder to reconstruct the original input by using samples from that probability distribution.
12 |
13 | ## Training
14 |
15 | ```shell
16 | cd vision/vae_mnist
17 | julia --project vae_mnist.jl
18 | ```
19 |
20 | Original image
21 |
22 | 
23 |
24 | 5 epochs
25 |
26 | 
27 |
28 | 10 epochs
29 |
30 | 
31 |
32 | 20 epochs
33 |
34 | 
35 |
36 | ## Visualization
37 |
38 | ```shell
39 | julia --project vae_plot.jl
40 | ```
41 |
42 | Latent space clustering
43 |
44 | 
45 |
46 | 2D manifold
47 |
48 | 
49 |
50 | ## References
51 |
52 | * [Kingma, Diederik P., and Max Welling. “Auto-Encoding Variational Bayes.” ArXiv Preprint ArXiv:1312.6114, 2013.](https://arxiv.org/pdf/1312.6114.pdf)
53 |
54 | * [Variational Autoencoder in TensorFlow](https://learnopencv.com/variational-autoencoder-in-tensorflow/#:~:text=Variational.)
55 |
--------------------------------------------------------------------------------
/vision/vae_mnist/docs/clustering.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/vae_mnist/docs/clustering.png
--------------------------------------------------------------------------------
/vision/vae_mnist/docs/epoch_10.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/vae_mnist/docs/epoch_10.png
--------------------------------------------------------------------------------
/vision/vae_mnist/docs/epoch_20.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/vae_mnist/docs/epoch_20.png
--------------------------------------------------------------------------------
/vision/vae_mnist/docs/epoch_5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/vae_mnist/docs/epoch_5.png
--------------------------------------------------------------------------------
/vision/vae_mnist/docs/manifold.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/vae_mnist/docs/manifold.png
--------------------------------------------------------------------------------
/vision/vae_mnist/docs/original.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/vae_mnist/docs/original.png
--------------------------------------------------------------------------------
/vision/vae_mnist/docs/vae.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/vae_mnist/docs/vae.png
--------------------------------------------------------------------------------
/vision/vae_mnist/output/clustering.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/vae_mnist/output/clustering.png
--------------------------------------------------------------------------------
/vision/vae_mnist/output/epoch_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/vae_mnist/output/epoch_1.png
--------------------------------------------------------------------------------
/vision/vae_mnist/output/epoch_10.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/vae_mnist/output/epoch_10.png
--------------------------------------------------------------------------------
/vision/vae_mnist/output/epoch_11.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/vae_mnist/output/epoch_11.png
--------------------------------------------------------------------------------
/vision/vae_mnist/output/epoch_12.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/vae_mnist/output/epoch_12.png
--------------------------------------------------------------------------------
/vision/vae_mnist/output/epoch_13.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/vae_mnist/output/epoch_13.png
--------------------------------------------------------------------------------
/vision/vae_mnist/output/epoch_14.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/vae_mnist/output/epoch_14.png
--------------------------------------------------------------------------------
/vision/vae_mnist/output/epoch_15.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/vae_mnist/output/epoch_15.png
--------------------------------------------------------------------------------
/vision/vae_mnist/output/epoch_16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/vae_mnist/output/epoch_16.png
--------------------------------------------------------------------------------
/vision/vae_mnist/output/epoch_17.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/vae_mnist/output/epoch_17.png
--------------------------------------------------------------------------------
/vision/vae_mnist/output/epoch_18.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/vae_mnist/output/epoch_18.png
--------------------------------------------------------------------------------
/vision/vae_mnist/output/epoch_19.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/vae_mnist/output/epoch_19.png
--------------------------------------------------------------------------------
/vision/vae_mnist/output/epoch_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/vae_mnist/output/epoch_2.png
--------------------------------------------------------------------------------
/vision/vae_mnist/output/epoch_20.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/vae_mnist/output/epoch_20.png
--------------------------------------------------------------------------------
/vision/vae_mnist/output/epoch_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/vae_mnist/output/epoch_3.png
--------------------------------------------------------------------------------
/vision/vae_mnist/output/epoch_4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/vae_mnist/output/epoch_4.png
--------------------------------------------------------------------------------
/vision/vae_mnist/output/epoch_5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/vae_mnist/output/epoch_5.png
--------------------------------------------------------------------------------
/vision/vae_mnist/output/epoch_6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/vae_mnist/output/epoch_6.png
--------------------------------------------------------------------------------
/vision/vae_mnist/output/epoch_7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/vae_mnist/output/epoch_7.png
--------------------------------------------------------------------------------
/vision/vae_mnist/output/epoch_8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/vae_mnist/output/epoch_8.png
--------------------------------------------------------------------------------
/vision/vae_mnist/output/epoch_9.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/vae_mnist/output/epoch_9.png
--------------------------------------------------------------------------------
/vision/vae_mnist/output/manifold.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/vae_mnist/output/manifold.png
--------------------------------------------------------------------------------
/vision/vae_mnist/output/original.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/vae_mnist/output/original.png
--------------------------------------------------------------------------------
/vision/vae_mnist/vae_mnist.jl:
--------------------------------------------------------------------------------
1 | # Variational Autoencoder(VAE)
2 | #
3 | # Auto-Encoding Variational Bayes
4 | # Diederik P Kingma, Max Welling
5 | # https://arxiv.org/abs/1312.6114
6 |
7 | using JLD2
8 | # using CUDA, cuDNN # uncomment one of these if you use GPU
9 | # using AMDGPU
10 | # using Metal
11 | using DrWatson: struct2dict
12 | using Flux
13 | using Optimisers: AdamW
14 | using MLUtils: randn_like, chunk, DataLoader
15 | using Flux: logitbinarycrossentropy
16 | using Images
17 | using Logging: with_logger
18 | using MLDatasets
19 | using ProgressMeter: Progress, next!
20 | using TensorBoardLogger: TBLogger, tb_overwrite
21 | using Random
22 |
23 | # load MNIST images and return loader
24 | function get_data(batch_size)
25 | xtrain, ytrain = MLDatasets.MNIST(split=:train)[:]
26 | xtrain = reshape(xtrain, 28^2, :)
27 | return DataLoader((xtrain, ytrain), batchsize=batch_size, shuffle=true)
28 | end
29 |
30 | struct Encoder
31 | linear
32 | μ
33 | logσ
34 | end
35 |
36 | Flux.@layer Encoder
37 |
38 | Encoder(input_dim::Int, latent_dim::Int, hidden_dim::Int) = Encoder(
39 | Dense(input_dim, hidden_dim, tanh), # linear
40 | Dense(hidden_dim, latent_dim), # μ
41 | Dense(hidden_dim, latent_dim), # logσ
42 | )
43 |
44 | function (encoder::Encoder)(x)
45 | h = encoder.linear(x)
46 | encoder.μ(h), encoder.logσ(h)
47 | end
48 |
49 | Decoder(input_dim::Int, latent_dim::Int, hidden_dim::Int) = Chain(
50 | Dense(latent_dim, hidden_dim, tanh),
51 | Dense(hidden_dim, input_dim)
52 | )
53 |
54 | function reconstuct(encoder, decoder, x)
55 | μ, logσ = encoder(x)
56 | z = μ + randn_like(logσ) .* exp.(logσ)
57 | return μ, logσ, decoder(z)
58 | end
59 |
60 | function model_loss(encoder, decoder, x)
61 | μ, logσ, decoder_z = reconstuct(encoder, decoder, x)
62 | batch_size = size(x)[end]
63 | # KL-divergence
64 | kl_q_p = 0.5f0 * sum(@. (exp(2*logσ) + μ^2 - 1 - 2*logσ)) / batch_size
65 |
66 | logp_x_z = -logitbinarycrossentropy(decoder_z, x, agg=sum) / batch_size
67 |
68 | return -logp_x_z + kl_q_p
69 | end
70 |
71 | function convert_to_image(x, y_size)
72 | Gray.(permutedims(vcat(reshape.(chunk(x |> cpu, y_size), 28, :)...), (2, 1)))
73 | end
74 |
75 | # arguments for the `train` function
76 | Base.@kwdef mutable struct Args
77 | η = 1e-3 # learning rate
78 | λ = 1e-4 # regularization paramater
79 | batch_size = 128 # batch size
80 | sample_size = 10 # sampling size for output
81 | epochs = 20 # number of epochs
82 | seed = 0 # random seed
83 | use_gpu = true # use GPU
84 | input_dim = 28^2 # image size
85 | latent_dim = 64 # latent dimension
86 | hidden_dim = 500 # hidden dimension
87 | verbose_freq = 10 # logging for every verbose_freq iterations
88 | tblogger = false # log training with tensorboard
89 | save_path = "output" # results path
90 | end
91 |
92 | function train(; kws...)
93 | # load hyperparamters
94 | args = Args(; kws...)
95 | args.seed > 0 && Random.seed!(args.seed)
96 |
97 | if args.use_gpu
98 | device = Flux.get_device()
99 | else
100 | device = Flux.get_device("CPU")
101 | end
102 |
103 | @info "Training on $device"
104 |
105 | # load MNIST images
106 | loader = get_data(args.batch_size)
107 |
108 | # initialize encoder and decoder
109 | encoder = Encoder(args.input_dim, args.latent_dim, args.hidden_dim) |> device
110 | decoder = Decoder(args.input_dim, args.latent_dim, args.hidden_dim) |> device
111 |
112 | # ADAM optimizer
113 | opt_enc = Flux.setup(AdamW(eta=args.η, lambda=args.λ), encoder)
114 | opt_dec = Flux.setup(AdamW(eta=args.η, lambda=args.λ), decoder)
115 |
116 | !ispath(args.save_path) && mkpath(args.save_path)
117 |
118 | # logging by TensorBoard.jl
119 | if args.tblogger
120 | tblogger = TBLogger(args.save_path, tb_overwrite)
121 | end
122 |
123 | # fixed input
124 | original, _ = first(get_data(args.sample_size^2))
125 | original = original |> device
126 | image = convert_to_image(original, args.sample_size)
127 | image_path = joinpath(args.save_path, "original.png")
128 | save(image_path, image)
129 |
130 | # training
131 | train_steps = 0
132 | @info "Start Training, total $(args.epochs) epochs"
133 | for epoch = 1:args.epochs
134 | @info "Epoch $(epoch)"
135 | progress = Progress(length(loader))
136 |
137 | for (x, _) in loader
138 | x_dev = x |> device
139 | loss, (grad_enc, grad_dec) = Flux.withgradient(encoder, decoder) do enc, dec
140 | model_loss(enc, dec, x_dev)
141 | end
142 |
143 | Flux.update!(opt_enc, encoder, grad_enc)
144 | Flux.update!(opt_dec, decoder, grad_dec)
145 | # progress meter
146 | next!(progress; showvalues=[(:loss, loss)])
147 |
148 | # logging with TensorBoard
149 | if args.tblogger && train_steps % args.verbose_freq == 0
150 | with_logger(tblogger) do
151 | @info "train" loss=loss
152 | end
153 | end
154 |
155 | train_steps += 1
156 | end
157 | # save image
158 | _, _, rec_original = reconstuct(encoder, decoder, original)
159 | rec_original = sigmoid.(rec_original)
160 | image = convert_to_image(rec_original, args.sample_size)
161 | image_path = joinpath(args.save_path, "epoch_$(epoch).png")
162 | save(image_path, image)
163 | @info "Image saved: $(image_path)"
164 | end
165 |
166 | # save model
167 | let encoder = cpu(encoder), decoder = cpu(decoder), args=struct2dict(args)
168 | filepath = joinpath(args[:save_path], "checkpoint.jld2")
169 | JLD2.save(filepath, "encoder", Flux.state(encoder),
170 | "decoder", Flux.state(decoder),
171 | "args", args)
172 | @info "Model saved: $(filepath)"
173 | end
174 | end
175 |
176 | if abspath(PROGRAM_FILE) == @__FILE__
177 | train()
178 | end
179 |
180 |
--------------------------------------------------------------------------------
/vision/vae_mnist/vae_plot.jl:
--------------------------------------------------------------------------------
1 | include("vae_mnist.jl")
2 |
3 | using Plots
4 |
5 | function plot_result()
6 | checkpoint = JLD2.load("output/checkpoint.jld2")
7 | encoder_state = checkpoint["encoder"]
8 | decoder_state = checkpoint["decoder"]
9 | args = Args(; checkpoint["args"]...)
10 | encoder = Encoder(args.input_dim, args.latent_dim, args.hidden_dim)
11 | decoder = Decoder(args.input_dim, args.latent_dim, args.hidden_dim)
12 | Flux.loadmodel!(encoder, encoder_state)
13 | Flux.loadmodel!(decoder, decoder_state)
14 | loader = get_data(args.batch_size)
15 |
16 | # clustering in the latent space
17 | # visualize first two dims
18 | plt = scatter(palette=:rainbow)
19 | for (i, (x, y)) in enumerate(loader)
20 | i < 20 || break
21 | μ, logσ = encoder(x)
22 | @assert size(μ, 1) == 2 # Latent_dim has to be 2 for direct visualization, otherwise use PCA or t-SNE
23 | scatter!(μ[1, :], μ[2, :],
24 | markerstrokewidth=0, markeralpha=0.8,
25 | aspect_ratio=1,
26 | markercolor=y, label="")
27 | end
28 | savefig(plt, "output/clustering.png")
29 |
30 | z = range(-2.0, stop=2.0, length=11)
31 | len = Base.length(z)
32 | z1 = repeat(z, len)
33 | z2 = sort(z1)
34 | x = zeros(Float32, args.latent_dim, len^2)
35 | x[1, :] = z1
36 | x[2, :] = z2
37 | samples = decoder(x)
38 | samples = sigmoid.(samples)
39 | image = convert_to_image(samples, len)
40 | save("output/manifold.png", image)
41 | end
42 |
43 | if abspath(PROGRAM_FILE) == @__FILE__
44 | plot_result()
45 | end
46 |
--------------------------------------------------------------------------------
/vision/vgg_cifar10/Project.toml:
--------------------------------------------------------------------------------
1 | [deps]
2 | CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
3 | Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
4 | Images = "916415d5-f1e6-5110-898d-aaa5f9f070e0"
5 | MLDatasets = "eb30cadb-4394-5ae3-aed4-317e484a6458"
6 | MLUtils = "f1d291b0-491e-4a28-83b9-f70985020b54"
7 | Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
8 |
9 | [compat]
10 | CUDA = "3, 4"
11 | Flux = "0.13.9, 0.14"
12 | MLDatasets = "0.7"
13 | MLUtils = "0.3"
14 | julia = "1.6"
15 |
--------------------------------------------------------------------------------
/vision/vgg_cifar10/README.md:
--------------------------------------------------------------------------------
1 | # Visual Geometry Group (VGG)
2 |
3 | 
4 |
5 | [Source](http://d2l.ai/chapter_convolutional-modern/vgg.html)
6 |
7 | ## Model Info
8 |
9 | The basic building block of classic CNNs is a sequence of the following: (i) a convolutional layer with padding to maintain the resolution, (ii) a nonlinearity such as a ReLU, (iii) a pooling layer such as a maximum pooling layer. _One VGG block consists of a sequence of convolutional layers, followed by a maximum pooling layer for spatial downsampling_. In the original VGG paper [Simonyan & Zisserman, 2015](https://arxiv.org/pdf/1409.1556v4.pdf), the authors employed convolutions with 3×3 kernels with padding of 1 (keeping height and width) and 2×2 maximum pooling with stride of 2 (halving the resolution after each block).
10 |
11 | ## Training
12 |
13 | ```shell
14 | cd vision/vgg_cifar10
15 | julia --project vgg_cifar10.jl
16 | ```
17 |
18 | ## References
19 |
20 | * [Simonyan, K. and Zisserman, A., “Very Deep Convolutional Networks for Large-Scale Image Recognition”, arXiv e-prints, 2015.
21 | ](https://arxiv.org/pdf/1409.1556v4.pdf)
22 |
23 | * [Aston Zhang, Zachary C. Lipton, Mu Li and Alexander J. Smola, "Dive into Deep Learning", 2020](http://d2l.ai/chapter_convolutional-modern/vgg.html)
24 |
--------------------------------------------------------------------------------
/vision/vgg_cifar10/docs/vgg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FluxML/model-zoo/9309c30eb095ee56370b73b8a10c1427675ee604/vision/vgg_cifar10/docs/vgg.png
--------------------------------------------------------------------------------
/vision/vgg_cifar10/vgg_cifar10.jl:
--------------------------------------------------------------------------------
1 | using Flux
2 | using Flux: onehotbatch, onecold, flatten
3 | using Flux.Losses: logitcrossentropy
4 | using Statistics: mean
5 | using CUDA
6 | using MLDatasets: CIFAR10
7 | using MLUtils: splitobs, DataLoader
8 |
9 | if CUDA.has_cuda()
10 | @info "CUDA is on"
11 | CUDA.allowscalar(false)
12 | end
13 |
14 | function get_processed_data(args)
15 | x, y = CIFAR10(:train)[:]
16 |
17 | (train_x, train_y), (val_x, val_y) = splitobs((x, y), at=1-args.valsplit)
18 |
19 | train_x = float(train_x)
20 | train_y = onehotbatch(train_y, 0:9)
21 | val_x = float(val_x)
22 | val_y = onehotbatch(val_y, 0:9)
23 |
24 | return (train_x, train_y), (val_x, val_y)
25 | end
26 |
27 | function get_test_data()
28 | test_x, test_y = CIFAR10(:test)[:]
29 |
30 | test_x = float(test_x)
31 | test_y = onehotbatch(test_y, 0:9)
32 |
33 | return test_x, test_y
34 | end
35 |
36 | # VGG16 and VGG19 models
37 | function vgg16()
38 | Chain([
39 | Conv((3, 3), 3 => 64, relu, pad=(1, 1), stride=(1, 1)),
40 | BatchNorm(64),
41 | Conv((3, 3), 64 => 64, relu, pad=(1, 1), stride=(1, 1)),
42 | BatchNorm(64),
43 | MaxPool((2,2)),
44 | Conv((3, 3), 64 => 128, relu, pad=(1, 1), stride=(1, 1)),
45 | BatchNorm(128),
46 | Conv((3, 3), 128 => 128, relu, pad=(1, 1), stride=(1, 1)),
47 | BatchNorm(128),
48 | MaxPool((2,2)),
49 | Conv((3, 3), 128 => 256, relu, pad=(1, 1), stride=(1, 1)),
50 | BatchNorm(256),
51 | Conv((3, 3), 256 => 256, relu, pad=(1, 1), stride=(1, 1)),
52 | BatchNorm(256),
53 | Conv((3, 3), 256 => 256, relu, pad=(1, 1), stride=(1, 1)),
54 | BatchNorm(256),
55 | MaxPool((2,2)),
56 | Conv((3, 3), 256 => 512, relu, pad=(1, 1), stride=(1, 1)),
57 | BatchNorm(512),
58 | Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)),
59 | BatchNorm(512),
60 | Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)),
61 | BatchNorm(512),
62 | MaxPool((2,2)),
63 | Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)),
64 | BatchNorm(512),
65 | Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)),
66 | BatchNorm(512),
67 | Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)),
68 | BatchNorm(512),
69 | MaxPool((2,2)),
70 | flatten,
71 | Dense(512, 4096, relu),
72 | Dropout(0.5),
73 | Dense(4096, 4096, relu),
74 | Dropout(0.5),
75 | Dense(4096, 10)
76 | ])
77 | end
78 |
79 | Base.@kwdef mutable struct Args
80 | batchsize::Int = 128
81 | lr::Float32 = 3f-4
82 | epochs::Int = 50
83 | valsplit::Float64 = 0.1
84 | end
85 |
86 | function train(; kws...)
87 | # Initialize the hyperparameters
88 | args = Args(; kws...)
89 |
90 | # Load the train, validation data
91 | train_data, val_data = get_processed_data(args)
92 |
93 | train_loader = DataLoader(train_data, batchsize=args.batchsize, shuffle=true)
94 | val_loader = DataLoader(val_data, batchsize=args.batchsize)
95 |
96 | @info("Constructing Model")
97 | m = vgg16() |> gpu
98 |
99 | loss(m, x, y) = logitcrossentropy(m(x), y)
100 |
101 | ## Training
102 | # Defining the optimizer
103 | opt = Flux.setup(Adam(args.lr), m)
104 |
105 | @info("Training....")
106 | # Starting to train models
107 | for epoch in 1:args.epochs
108 | @info "Epoch $epoch"
109 |
110 | for (x, y) in train_loader
111 | x, y = x |> gpu, y |> gpu
112 | gs = Flux.gradient(m -> loss(m, x, y), m)
113 | Flux.update!(opt, m, gs[1])
114 | end
115 |
116 | validation_loss = 0f0
117 | for (x, y) in val_loader
118 | x, y = x |> gpu, y |> gpu
119 | validation_loss += loss(m, x, y)
120 | end
121 | validation_loss /= length(val_loader)
122 | @show validation_loss
123 | end
124 |
125 | return m
126 | end
127 |
128 | function test(m; kws...)
129 | args = Args(kws...)
130 |
131 | test_data = get_test_data()
132 | test_loader = DataLoader(test_data, batchsize=args.batchsize)
133 |
134 | correct, total = 0, 0
135 | for (x, y) in test_loader
136 | x, y = x |> gpu, y |> gpu
137 | correct += sum(onecold(cpu(m(x))) .== onecold(cpu(y)))
138 | total += size(y, 2)
139 | end
140 | test_accuracy = correct / total
141 |
142 | # Print the final accuracy
143 | @show test_accuracy
144 | end
145 |
146 | if abspath(PROGRAM_FILE) == @__FILE__
147 | m = train()
148 | test(m)
149 | end
150 |
151 |
--------------------------------------------------------------------------------