├── LICENSE
├── README.md
├── __init__.py
├── captions.txt
├── docs
├── Makefile
├── build
│ ├── doctrees
│ │ ├── environment.pickle
│ │ ├── index.doctree
│ │ └── minimagen.doctree
│ └── html
│ │ ├── .buildinfo
│ │ ├── _images
│ │ ├── posterior_mean.png
│ │ ├── posterior_variance.png
│ │ ├── q_posterior.png
│ │ └── q_sample.png
│ │ ├── _sources
│ │ ├── index.rst.txt
│ │ └── minimagen.rst.txt
│ │ ├── _static
│ │ ├── _sphinx_javascript_frameworks_compat.js
│ │ ├── basic.css
│ │ ├── clf_free_guidance.png
│ │ ├── css
│ │ │ ├── badge_only.css
│ │ │ ├── fonts
│ │ │ │ ├── Roboto-Slab-Bold.woff
│ │ │ │ ├── Roboto-Slab-Bold.woff2
│ │ │ │ ├── Roboto-Slab-Regular.woff
│ │ │ │ ├── Roboto-Slab-Regular.woff2
│ │ │ │ ├── fontawesome-webfont.eot
│ │ │ │ ├── fontawesome-webfont.svg
│ │ │ │ ├── fontawesome-webfont.ttf
│ │ │ │ ├── fontawesome-webfont.woff
│ │ │ │ ├── fontawesome-webfont.woff2
│ │ │ │ ├── lato-bold-italic.woff
│ │ │ │ ├── lato-bold-italic.woff2
│ │ │ │ ├── lato-bold.woff
│ │ │ │ ├── lato-bold.woff2
│ │ │ │ ├── lato-normal-italic.woff
│ │ │ │ ├── lato-normal-italic.woff2
│ │ │ │ ├── lato-normal.woff
│ │ │ │ └── lato-normal.woff2
│ │ │ └── theme.css
│ │ ├── doctools.js
│ │ ├── documentation_options.js
│ │ ├── file.png
│ │ ├── jquery-3.6.0.js
│ │ ├── jquery.js
│ │ ├── js
│ │ │ ├── badge_only.js
│ │ │ ├── html5shiv-printshiv.min.js
│ │ │ ├── html5shiv.min.js
│ │ │ └── theme.js
│ │ ├── language_data.js
│ │ ├── minus.png
│ │ ├── plus.png
│ │ ├── posterior_mean.png
│ │ ├── posterior_mean_coeffs.png
│ │ ├── posterior_variance.png
│ │ ├── posterior_variance_box.png
│ │ ├── pygments.css
│ │ ├── q_posterior.png
│ │ ├── q_sample.png
│ │ ├── q_sample_reparam.png
│ │ ├── searchtools.js
│ │ ├── underscore-1.13.1.js
│ │ ├── underscore.js
│ │ └── x_tm1.png
│ │ ├── genindex.html
│ │ ├── index.html
│ │ ├── minimagen.html
│ │ ├── objects.inv
│ │ ├── py-modindex.html
│ │ ├── search.html
│ │ └── searchindex.js
├── make.bat
├── requirements.txt
└── source
│ ├── _static
│ ├── clf_free_guidance.png
│ ├── file.png
│ ├── minus.png
│ ├── plus.png
│ ├── posterior_mean.png
│ ├── posterior_mean_coeffs.png
│ ├── posterior_variance.png
│ ├── posterior_variance_box.png
│ ├── q_posterior.png
│ ├── q_sample.png
│ ├── q_sample_reparam.png
│ └── x_tm1.png
│ ├── conf.py
│ ├── index.rst
│ └── minimagen.rst
├── images
├── clf_free_guidance.png
├── conditioning_diagram.png
├── dynamic_threshold.mp4
├── model_structure.png
├── posterior_mean.png
├── posterior_mean_coeffs.png
├── posterior_variance.png
├── posterior_variance_box.png
├── q_posterior.png
├── q_sample.png
├── q_sample_reparam.png
├── transformer_full.png
└── x_tm1.png
├── inference.py
├── main.py
├── minimagen
├── Imagen.py
├── Unet.py
├── __init__.py
├── diffusion_model.py
├── generate.py
├── helpers.py
├── layers.py
├── t5.py
└── training.py
├── parameters
├── imagen_params_20220816_165729.json
├── training_parameters_20220816_165729.txt
├── unet_0_params_20220816_165729.json
└── unet_1_params_20220816_165729.json
├── requirements.txt
├── setup.py
└── train.py
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright (c) 2022 AssemblyAI
2 |
3 | Permission is hereby granted, free of charge, to any person obtaining a copy
4 | of this software and associated documentation files (the "Software"), to deal
5 | in the Software without restriction, including without limitation the rights
6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 | copies of the Software, and to permit persons to whom the Software is
8 | furnished to do so, subject to the following conditions:
9 |
10 | The above copyright notice and this permission notice shall be included in all
11 | copies or substantial portions of the Software.
12 |
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19 | SOFTWARE.
20 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # MinImagen
2 | ### A Minimal implementation of the [Imagen](https://imagen.research.google/) text-to-image model.
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 | ### See [Build Your Own Imagen Text-to-Image Model](https://www.assemblyai.com/blog/build-your-own-imagen-text-to-image-model/) for a tutorial on how to build MinImagen.
11 |
12 | ### See [How Imagen Actually Works](https://www.assemblyai.com/blog/how-imagen-actually-works/) for a detailed explanation of Imagen's operating principles.
13 |
14 |
15 |
16 | Given a caption of an image, the text-to-image model **Imagen** will generate an image that reflects the scene described by the caption. The model is a [cascading diffusion model](https://arxiv.org/abs/2106.15282), using a [T5 text encoder](https://arxiv.org/abs/1910.10683) to generate a caption encoding which conditions a base image generator and then a sequence of super-resolution models through which the output of the base image generator is passed.
17 |
18 | In particular, two notable contributions are the developments of:
19 | 1. [**Noise Conditioning Augmentation**](https://www.assemblyai.com/blog/how-imagen-actually-works/#robust-cascaded-diffusion-models), which noises low-resolution conditioning images in the super-resolution models, and
20 | 2. [**Dynamic Thresholding**](https://www.assemblyai.com/blog/how-imagen-actually-works/#dynamic-thresholding) which helps prevent image saturation at high [classifier-free guidance](https://www.assemblyai.com/blog/how-imagen-actually-works/#classifier-free-guidance) weights.
21 |
22 |
23 |
24 | **N.B. - This project is intended only for educational purposes to demonstrate how Diffusion Models are implemented and incorporated into text-to-image models. Many components of the network that are not essential for these educational purposes have been stripped off for simplicity. For a full-fledged implementation, check out Phil Wang's repo (see attribution note below)**
25 |
26 |
27 |
28 | ## Table of Contents
29 | - [Attribution Note](#attribution-note)
30 | - [Installation](#installation)
31 | - [Documentation](#documentation)
32 | - [Usage - Command Line](#usage---command-line)
33 | - [`main.py`](#mainpy) - training and image generation in sequence
34 | - [`train.py`](#trainpy) - training a MinImagen instance
35 | - [`inference.py`](#inferencepy) - generated images using a MinImagen instance
36 | - [Usage - Package](#usage---package)
37 | - [Training](#training)
38 | - [Image Generation](#image-generation)
39 | - [Modifying the Source Code](#modifying-the-source-code)
40 | - [Additional Resources](#additional-resources)
41 | - [Socials](#socials)
42 |
43 |
44 |
45 | ## Attribution Note
46 | This implementation is largely based on Phil Wang's [Imagen implementation](https://github.com/lucidrains/imagen-pytorch).
47 |
48 |
49 |
50 | ## Installation
51 | To install MinImagen, run the following command in the terminal:
52 | ```bash
53 | $ pip install minimagen
54 | ```
55 | **Note that MinImagen requires Python3.9 or higher**
56 |
57 |
58 |
59 | ## Documentation
60 | See the [MinImagen Documentation](https://assemblyai-examples.github.io/MinImagen/) to learn more about the package.
61 |
62 |
63 |
64 | ## Usage - Command Line
65 | If you have cloned this repo (as opposed to just installing the `minimagen` package), you can use the provided scripts to get started with MinImagen. This repo can be cloned by running the following command in the terminal:
66 |
67 | ```bash
68 | $ git clone https://github.com/AssemblyAI-Examples/MinImagen.git
69 | ```
70 |
71 |
72 |
73 | ### `main.py`
74 | For the most basic usage, simply enter the MinImagen directory and run the following in the terminal:
75 | ```bash
76 | $ python main.py
77 | ```
78 | This will create a small MinImagen instance and train it on a tiny amount of data, and then use this MinImagen instance to generate an image.
79 |
80 | After running the script, you will see a directory called `training_`.
81 | 1. This directory is called a *Training Directory* and is generated when training a MinImagen instance.
82 | 2. It contains information about the configuration (`parameters` subdirectory), and contains the model checkpoints (`state_dicts` and `tmp` directories).
83 | 3. It also contains a `training_progress.txt` file that records training progress.
84 |
85 | You will also see a directory called `generated_images_`.
86 | 1. This directory contains a folder of images generated by the model (`generated_images`).
87 | 2. It also contains `captions.txt` files, which documents the captions that were input to get the images (where the line index of a given caption corresponds to the image number in the `generated_iamges` folder).
88 | 3. Finally, this directory also contains `imagen_training_directory.txt`, which specifies the name of the Training Directory used to load the MinImagen instance / generate images.
89 |
90 |
91 |
92 | ### `train.py`
93 |
94 | `main.py` simply runs `train.py` and `inference.py` in series, the former to train the model and the latter to generate the image.
95 |
96 | To train a model, simply run `train.py` and specify relevant command line arguments. The [possible arguments](https://github.com/AssemblyAI-Examples/MinImagen/blob/d7de8350db17713fb630e127c010020820953872/minimagen/training.py#L178) are:
97 |
98 | - `--PARAMETERS` or `-p`, which specifies a directory that specifies the MinImagen configuration to use. It should be structured like a `parameters` subdirectory within a Training Directory (example in [`parameters`](https://github.com/AssemblyAI-Examples/MinImagen/tree/main/parameters)).
99 | - `--NUM_WORKERS"` or `-n`, which specifies the number of workers to use for the DataLoaders.
100 | - `--BATCH_SIZE` or `-b`, which specifies the batch size to use during training.
101 | - `--MAX_NUM_WORDS` or `-mw`, which specifies the maximum number of words allowed in a caption.
102 | - `--IMG_SIDE_LEN` or `-s`, specifies the final side length of the square images the MinImagen will output.
103 | - `--EPOCHS` or `-e`, which specifies the number of training epochs.
104 | - `--T5_NAME` `-t5`, which specifies the name of T5 encoder to use.
105 | - `--TRAIN_VALID_FRAC` or `-f`, which specifies the fraction of dataset to use for training (vs. validation).
106 | - `--TIMESTEPS` or `-t`, which specifies the number of timesteps in Diffusion Process.
107 | - `--OPTIM_LR` or `-lr`, which specifies the learning rate for Adam optimizer.
108 | - `--ACCUM_ITER` or `-ai`, which specifies the number of batches to accumulate for gradient accumulation.
109 | - `--CHCKPT_NUM` or `-cn`, which specifies the interval of batches to create a temporary model checkpoint at during training.
110 | - `--VALID_NUM` or `-vn`, which specifies the number of validation images to use. If None, uses full amount from train/valid split. The reason for including this is that, even with an e.g. 0.99 `--TRAIN_VALID_FRAC`, a prohibitively large number of images could still be left for validation for very large datasets.
111 | - `--RESTART_DIRECTORY` or `-rd`, training directory to load MinImagen instance from if resuming training. A new Training Directory will be created for the training, leaving the previous Training Directory from which the checkpoint is loaded unperturbed.
112 | - `--TESTING` or `-test`, which is used to run the script with a small MinImagen instance and small dataset for testing.
113 |
114 | For example, to run a small training using the provided example [`parameters`](https://github.com/AssemblyAI-Examples/MinImagen/tree/main/parameters) folder, run the following in the terminal:
115 |
116 | ```bash
117 | python train.py --PARAMETERS ./parameters --BATCH_SIZE 2 --TIMESTEPS 25 --TESTING
118 | ```
119 | After execution, you will see a new `training_` [Training Directory](#training-directory) that contains the files as [listed above](#training-directory) from the training.
120 |
121 |
122 |
123 | ### `inference.py`
124 |
125 | To generate images using a model from a [Training Directory](#training-directory), we can use `inference.py`. Simply run `inference.py` and specify relevant command line arguments. The possible arguments are:
126 |
127 | - `--TRAINING_DIRECTORY"` or `-d`, which specifies the training directory from which to load the MinImagen instance for inference.
128 | - `--CAPTIONS` or `-c`, which specifies either (a) a single caption to generate an image for, or (b) a filepath to a `.txt` file that contains a list of captions to generate images for, where each caption is on a new line.
129 |
130 | For example, to generate images for the example captions provided in [`captions.txt`](https://github.com/AssemblyAI-Examples/MinImagen/blob/main/captions.txt) using the model generated from the above training line, simply run
131 |
132 | ```bash
133 | python inference.py -CAPTIONS captions.txt --TRAINING_DIRECTORY training_
134 | ```
135 |
136 | where `TIMESTAMP` is replaced with the appropriate value from your training.
137 |
138 |
139 |
140 | ## Usage - Package
141 |
142 | ### Training
143 |
144 | A minimal training script using the `minimagen` package is shown below. See [`train.py`](https://github.com/AssemblyAI-Examples/MinImagen/blob/main/train.py) for a more built-up version of the below code.
145 |
146 | ```python
147 | import os
148 | from datetime import datetime
149 |
150 | import torch.utils.data
151 | from torch import optim
152 |
153 | from minimagen.Imagen import Imagen
154 | from minimagen.Unet import Unet, Base, Super, BaseTest, SuperTest
155 | from minimagen.generate import load_minimagen, load_params
156 | from minimagen.t5 import get_encoded_dim
157 | from minimagen.training import get_minimagen_parser, ConceptualCaptions, get_minimagen_dl_opts, \
158 | create_directory, get_model_size, save_training_info, get_default_args, MinimagenTrain, \
159 | load_testing_parameters
160 |
161 | # Get device
162 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
163 |
164 | # Command line argument parser
165 | parser = get_minimagen_parser()
166 | args = parser.parse_args()
167 |
168 | # Create training directory
169 | timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
170 | dir_path = f"./training_{timestamp}"
171 | training_dir = create_directory(dir_path)
172 |
173 | # Replace some cmd line args to lower computational load.
174 | args = load_testing_parameters(args)
175 |
176 | # Load subset of Conceptual Captions dataset.
177 | train_dataset, valid_dataset = ConceptualCaptions(args, smalldata=True)
178 |
179 | # Create dataloaders
180 | dl_opts = {**get_minimagen_dl_opts(device), 'batch_size': args.BATCH_SIZE, 'num_workers': args.NUM_WORKERS}
181 | train_dataloader = torch.utils.data.DataLoader(train_dataset, **dl_opts)
182 | valid_dataloader = torch.utils.data.DataLoader(valid_dataset, **dl_opts)
183 |
184 | # Use small U-Nets to lower computational load.
185 | unets_params = [get_default_args(BaseTest), get_default_args(SuperTest)]
186 | unets = [Unet(**unet_params).to(device) for unet_params in unets_params]
187 |
188 | # Specify MinImagen parameters
189 | imagen_params = dict(
190 | image_sizes=(int(args.IMG_SIDE_LEN / 2), args.IMG_SIDE_LEN),
191 | timesteps=args.TIMESTEPS,
192 | cond_drop_prob=0.15,
193 | text_encoder_name=args.T5_NAME
194 | )
195 |
196 | # Create MinImagen from UNets with specified imagen parameters
197 | imagen = Imagen(unets=unets, **imagen_params).to(device)
198 |
199 | # Fill in unspecified arguments with defaults to record complete config (parameters) file
200 | unets_params = [{**get_default_args(Unet), **i} for i in unets_params]
201 | imagen_params = {**get_default_args(Imagen), **imagen_params}
202 |
203 | # Get the size of the Imagen model in megabytes
204 | model_size_MB = get_model_size(imagen)
205 |
206 | # Save all training info (config files, model size, etc.)
207 | save_training_info(args, timestamp, unets_params, imagen_params, model_size_MB, training_dir)
208 |
209 | # Create optimizer
210 | optimizer = optim.Adam(imagen.parameters(), lr=args.OPTIM_LR)
211 |
212 | # Train the MinImagen instance
213 | MinimagenTrain(timestamp, args, unets, imagen, train_dataloader, valid_dataloader, training_dir, optimizer, timeout=30)
214 | ```
215 |
216 | ### Image Generation
217 |
218 | A minimal inference script using the `minimagen` package is shown below. See [`inference.py`](https://github.com/AssemblyAI-Examples/MinImagen/blob/main/inference.py) for a more built-up version of the below code.
219 |
220 | ```python
221 | from argparse import ArgumentParser
222 | from minimagen.generate import load_minimagen, sample_and_save
223 |
224 | # Command line argument parser
225 | parser = ArgumentParser()
226 | parser.add_argument("-d", "--TRAINING_DIRECTORY", dest="TRAINING_DIRECTORY", help="Training directory to use for inference", type=str)
227 | args = parser.parse_args()
228 |
229 | # Specify the caption(s) to generate images for
230 | captions = ['a happy dog']
231 |
232 | # Use `sample_and_save` to generate and save the iamges
233 | sample_and_save(captions, training_directory=args.TRAINING_DIRECTORY)
234 |
235 |
236 |
237 | # Alternatively, rather than specifying a Training Directory, you can input just a MinImagen instance to use for image generation.
238 | # In this case, information about the MinImagen instance used to generate the images will not be saved.
239 | minimagen = load_minimagen(args.TRAINING_DIRECTORY)
240 | sample_and_save(captions, minimagen=minimagen)
241 | ```
242 |
243 | To see more of what MinImagen has to offer, or to get additional details on the scripts above, check out the [MinImagen Documentation](https://assemblyai-examples.github.io/MinImagen/)
244 |
245 |
246 |
247 | ## Modifying the Source Code
248 | If you want to make modifications to the source code (rather than use the `minimagen` package), first clone this repository and navigate into it:
249 |
250 | ```bash
251 | $ git clone https://github.com/AssemblyAI-Examples/MinImagen.git
252 | $ cd MinImagen
253 | ```
254 |
255 | After that, create a virtual environment:
256 | ```bash
257 | $ pip install virtualenv
258 | $ virtualenv venv
259 | ```
260 |
261 | Then activate the virtual environment and install all dependencies:
262 | ```bash
263 | $ .\venv\Scripts\activate.bat # Windows
264 | $ source venv/bin/activate # MacOS/Linux
265 | $ pip install -r requirements.txt
266 | ```
267 |
268 | Now you can modify the source code and the changes will be reflected when running any of the [included scripts](#usage---command-line) (as long as the virtual environment created above is active).
269 |
270 |
271 |
272 |
273 | ## Additional Resources
274 |
275 | - For a step-by-step guide on how to build the version of Imagen in this repository, see [Build Your Own Imagen Text-to-Image Model](https://www.assemblyai.com/blog/build-your-own-imagen-text-to-image-model/).
276 | - For an deep-dive into how Imagen works, see [How Imagen Actually Works](https://www.assemblyai.com/blog/how-imagen-actually-works/).
277 | - For a deep-dive into Diffusion Models, see our [Introduction to Diffusion Models for Machine Learning](https://www.assemblyai.com/blog/diffusion-models-for-machine-learning-introduction/) guide.
278 | - For additional learning resources on Machine Learning and Deep Learning, check out our [Blog](https://www.assemblyai.com/blog/) and [YouTube channel](https://www.youtube.com/c/AssemblyAI).
279 | - Read the original Imagen paper [here](https://arxiv.org/abs/2205.11487).
280 |
281 | ## Socials
282 | - Follow us on [Twitter](https://twitter.com/AssemblyAI) for more Deep Learning content.
283 | - [Follow our newsletter](https://assemblyai.us17.list-manage.com/subscribe?u=cb9db7b18b274c2d402a56c5f&id=2116bf7c68) to stay up to date on our recent content.
284 |
--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
1 | from .minimagen import Imagen
2 | from .minimagen import t5
3 | from .minimagen import Unet
4 | from .minimagen import diffusion_model
--------------------------------------------------------------------------------
/captions.txt:
--------------------------------------------------------------------------------
1 | a happy dog
2 | a big red house
--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
1 | # Minimal makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line, and also
5 | # from the environment for the first two.
6 | SPHINXOPTS ?=
7 | SPHINXBUILD ?= sphinx-build
8 | SOURCEDIR = source
9 | BUILDDIR = build
10 |
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 |
15 | .PHONY: help Makefile
16 |
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 |
--------------------------------------------------------------------------------
/docs/build/doctrees/environment.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AssemblyAI-Community/MinImagen/502c6962fa55285a871995de716cdb0ed3e3d81e/docs/build/doctrees/environment.pickle
--------------------------------------------------------------------------------
/docs/build/doctrees/index.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AssemblyAI-Community/MinImagen/502c6962fa55285a871995de716cdb0ed3e3d81e/docs/build/doctrees/index.doctree
--------------------------------------------------------------------------------
/docs/build/doctrees/minimagen.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AssemblyAI-Community/MinImagen/502c6962fa55285a871995de716cdb0ed3e3d81e/docs/build/doctrees/minimagen.doctree
--------------------------------------------------------------------------------
/docs/build/html/.buildinfo:
--------------------------------------------------------------------------------
1 | # Sphinx build info version 1
2 | # This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
3 | config: 7fd9cf5bdbe1c20ffb2db5da0c8aa23e
4 | tags: 645f666f9bcd5a90fca523b33c5a78b7
5 |
--------------------------------------------------------------------------------
/docs/build/html/_images/posterior_mean.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AssemblyAI-Community/MinImagen/502c6962fa55285a871995de716cdb0ed3e3d81e/docs/build/html/_images/posterior_mean.png
--------------------------------------------------------------------------------
/docs/build/html/_images/posterior_variance.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AssemblyAI-Community/MinImagen/502c6962fa55285a871995de716cdb0ed3e3d81e/docs/build/html/_images/posterior_variance.png
--------------------------------------------------------------------------------
/docs/build/html/_images/q_posterior.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AssemblyAI-Community/MinImagen/502c6962fa55285a871995de716cdb0ed3e3d81e/docs/build/html/_images/q_posterior.png
--------------------------------------------------------------------------------
/docs/build/html/_images/q_sample.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AssemblyAI-Community/MinImagen/502c6962fa55285a871995de716cdb0ed3e3d81e/docs/build/html/_images/q_sample.png
--------------------------------------------------------------------------------
/docs/build/html/_sources/index.rst.txt:
--------------------------------------------------------------------------------
1 | .. MinImagen documentation master file, created by
2 | sphinx-quickstart on Mon Aug 15 18:23:24 2022.
3 | You can adapt this file completely to your liking, but it should at least
4 | contain the root `toctree` directive.
5 |
6 | Welcome to MinImagen's documentation!
7 | =====================================
8 |
9 | .. toctree::
10 | :maxdepth: 2
11 | :caption: Contents:
12 |
13 | minimagen
14 |
15 |
16 |
17 | Indices and tables
18 | ==================
19 |
20 | * :ref:`genindex`
21 | * :ref:`modindex`
22 | * :ref:`search`
23 |
--------------------------------------------------------------------------------
/docs/build/html/_sources/minimagen.rst.txt:
--------------------------------------------------------------------------------
1 | minimagen
2 | ==================
3 |
4 | Imagen
5 | ------------------------
6 |
7 | .. automodule:: minimagen.Imagen
8 | :members:
9 | :undoc-members:
10 | :show-inheritance:
11 |
12 | Unet
13 | ----------------------
14 |
15 | .. automodule:: minimagen.Unet
16 | :members:
17 | :undoc-members:
18 | :show-inheritance:
19 |
20 | Diffusion Model
21 | ----------------------------------
22 |
23 | .. automodule:: minimagen.diffusion_model
24 | :members:
25 | :undoc-members:
26 | :show-inheritance:
27 |
28 | T5
29 | --------------------
30 |
31 | .. automodule:: minimagen.t5
32 | :members:
33 | :undoc-members:
34 | :show-inheritance:
35 |
36 | Training
37 | --------------------
38 |
39 | .. automodule:: minimagen.training
40 | :members:
41 | :undoc-members:
42 | :show-inheritance:
43 |
44 | Generate
45 | --------------------
46 |
47 | .. automodule:: minimagen.generate
48 | :members:
49 | :undoc-members:
50 | :show-inheritance:
--------------------------------------------------------------------------------
/docs/build/html/_static/_sphinx_javascript_frameworks_compat.js:
--------------------------------------------------------------------------------
1 | /*
2 | * _sphinx_javascript_frameworks_compat.js
3 | * ~~~~~~~~~~
4 | *
5 | * Compatability shim for jQuery and underscores.js.
6 | *
7 | * WILL BE REMOVED IN Sphinx 6.0
8 | * xref RemovedInSphinx60Warning
9 | *
10 | */
11 |
12 | /**
13 | * select a different prefix for underscore
14 | */
15 | $u = _.noConflict();
16 |
17 |
18 | /**
19 | * small helper function to urldecode strings
20 | *
21 | * See https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/decodeURIComponent#Decoding_query_parameters_from_a_URL
22 | */
23 | jQuery.urldecode = function(x) {
24 | if (!x) {
25 | return x
26 | }
27 | return decodeURIComponent(x.replace(/\+/g, ' '));
28 | };
29 |
30 | /**
31 | * small helper function to urlencode strings
32 | */
33 | jQuery.urlencode = encodeURIComponent;
34 |
35 | /**
36 | * This function returns the parsed url parameters of the
37 | * current request. Multiple values per key are supported,
38 | * it will always return arrays of strings for the value parts.
39 | */
40 | jQuery.getQueryParameters = function(s) {
41 | if (typeof s === 'undefined')
42 | s = document.location.search;
43 | var parts = s.substr(s.indexOf('?') + 1).split('&');
44 | var result = {};
45 | for (var i = 0; i < parts.length; i++) {
46 | var tmp = parts[i].split('=', 2);
47 | var key = jQuery.urldecode(tmp[0]);
48 | var value = jQuery.urldecode(tmp[1]);
49 | if (key in result)
50 | result[key].push(value);
51 | else
52 | result[key] = [value];
53 | }
54 | return result;
55 | };
56 |
57 | /**
58 | * highlight a given string on a jquery object by wrapping it in
59 | * span elements with the given class name.
60 | */
61 | jQuery.fn.highlightText = function(text, className) {
62 | function highlight(node, addItems) {
63 | if (node.nodeType === 3) {
64 | var val = node.nodeValue;
65 | var pos = val.toLowerCase().indexOf(text);
66 | if (pos >= 0 &&
67 | !jQuery(node.parentNode).hasClass(className) &&
68 | !jQuery(node.parentNode).hasClass("nohighlight")) {
69 | var span;
70 | var isInSVG = jQuery(node).closest("body, svg, foreignObject").is("svg");
71 | if (isInSVG) {
72 | span = document.createElementNS("http://www.w3.org/2000/svg", "tspan");
73 | } else {
74 | span = document.createElement("span");
75 | span.className = className;
76 | }
77 | span.appendChild(document.createTextNode(val.substr(pos, text.length)));
78 | node.parentNode.insertBefore(span, node.parentNode.insertBefore(
79 | document.createTextNode(val.substr(pos + text.length)),
80 | node.nextSibling));
81 | node.nodeValue = val.substr(0, pos);
82 | if (isInSVG) {
83 | var rect = document.createElementNS("http://www.w3.org/2000/svg", "rect");
84 | var bbox = node.parentElement.getBBox();
85 | rect.x.baseVal.value = bbox.x;
86 | rect.y.baseVal.value = bbox.y;
87 | rect.width.baseVal.value = bbox.width;
88 | rect.height.baseVal.value = bbox.height;
89 | rect.setAttribute('class', className);
90 | addItems.push({
91 | "parent": node.parentNode,
92 | "target": rect});
93 | }
94 | }
95 | }
96 | else if (!jQuery(node).is("button, select, textarea")) {
97 | jQuery.each(node.childNodes, function() {
98 | highlight(this, addItems);
99 | });
100 | }
101 | }
102 | var addItems = [];
103 | var result = this.each(function() {
104 | highlight(this, addItems);
105 | });
106 | for (var i = 0; i < addItems.length; ++i) {
107 | jQuery(addItems[i].parent).before(addItems[i].target);
108 | }
109 | return result;
110 | };
111 |
112 | /*
113 | * backward compatibility for jQuery.browser
114 | * This will be supported until firefox bug is fixed.
115 | */
116 | if (!jQuery.browser) {
117 | jQuery.uaMatch = function(ua) {
118 | ua = ua.toLowerCase();
119 |
120 | var match = /(chrome)[ \/]([\w.]+)/.exec(ua) ||
121 | /(webkit)[ \/]([\w.]+)/.exec(ua) ||
122 | /(opera)(?:.*version|)[ \/]([\w.]+)/.exec(ua) ||
123 | /(msie) ([\w.]+)/.exec(ua) ||
124 | ua.indexOf("compatible") < 0 && /(mozilla)(?:.*? rv:([\w.]+)|)/.exec(ua) ||
125 | [];
126 |
127 | return {
128 | browser: match[ 1 ] || "",
129 | version: match[ 2 ] || "0"
130 | };
131 | };
132 | jQuery.browser = {};
133 | jQuery.browser[jQuery.uaMatch(navigator.userAgent).browser] = true;
134 | }
135 |
--------------------------------------------------------------------------------
/docs/build/html/_static/clf_free_guidance.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AssemblyAI-Community/MinImagen/502c6962fa55285a871995de716cdb0ed3e3d81e/docs/build/html/_static/clf_free_guidance.png
--------------------------------------------------------------------------------
/docs/build/html/_static/css/badge_only.css:
--------------------------------------------------------------------------------
1 | .fa:before{-webkit-font-smoothing:antialiased}.clearfix{*zoom:1}.clearfix:after,.clearfix:before{display:table;content:""}.clearfix:after{clear:both}@font-face{font-family:FontAwesome;font-style:normal;font-weight:400;src:url(fonts/fontawesome-webfont.eot?674f50d287a8c48dc19ba404d20fe713?#iefix) format("embedded-opentype"),url(fonts/fontawesome-webfont.woff2?af7ae505a9eed503f8b8e6982036873e) format("woff2"),url(fonts/fontawesome-webfont.woff?fee66e712a8a08eef5805a46892932ad) format("woff"),url(fonts/fontawesome-webfont.ttf?b06871f281fee6b241d60582ae9369b9) format("truetype"),url(fonts/fontawesome-webfont.svg?912ec66d7572ff821749319396470bde#FontAwesome) format("svg")}.fa:before{font-family:FontAwesome;font-style:normal;font-weight:400;line-height:1}.fa:before,a .fa{text-decoration:inherit}.fa:before,a .fa,li .fa{display:inline-block}li .fa-large:before{width:1.875em}ul.fas{list-style-type:none;margin-left:2em;text-indent:-.8em}ul.fas li .fa{width:.8em}ul.fas li .fa-large:before{vertical-align:baseline}.fa-book:before,.icon-book:before{content:"\f02d"}.fa-caret-down:before,.icon-caret-down:before{content:"\f0d7"}.fa-caret-up:before,.icon-caret-up:before{content:"\f0d8"}.fa-caret-left:before,.icon-caret-left:before{content:"\f0d9"}.fa-caret-right:before,.icon-caret-right:before{content:"\f0da"}.rst-versions{position:fixed;bottom:0;left:0;width:300px;color:#fcfcfc;background:#1f1d1d;font-family:Lato,proxima-nova,Helvetica Neue,Arial,sans-serif;z-index:400}.rst-versions a{color:#2980b9;text-decoration:none}.rst-versions .rst-badge-small{display:none}.rst-versions .rst-current-version{padding:12px;background-color:#272525;display:block;text-align:right;font-size:90%;cursor:pointer;color:#27ae60}.rst-versions .rst-current-version:after{clear:both;content:"";display:block}.rst-versions .rst-current-version .fa{color:#fcfcfc}.rst-versions .rst-current-version .fa-book,.rst-versions .rst-current-version .icon-book{float:left}.rst-versions .rst-current-version.rst-out-of-date{background-color:#e74c3c;color:#fff}.rst-versions .rst-current-version.rst-active-old-version{background-color:#f1c40f;color:#000}.rst-versions.shift-up{height:auto;max-height:100%;overflow-y:scroll}.rst-versions.shift-up .rst-other-versions{display:block}.rst-versions .rst-other-versions{font-size:90%;padding:12px;color:grey;display:none}.rst-versions .rst-other-versions hr{display:block;height:1px;border:0;margin:20px 0;padding:0;border-top:1px solid #413d3d}.rst-versions .rst-other-versions dd{display:inline-block;margin:0}.rst-versions .rst-other-versions dd a{display:inline-block;padding:6px;color:#fcfcfc}.rst-versions.rst-badge{width:auto;bottom:20px;right:20px;left:auto;border:none;max-width:300px;max-height:90%}.rst-versions.rst-badge .fa-book,.rst-versions.rst-badge .icon-book{float:none;line-height:30px}.rst-versions.rst-badge.shift-up .rst-current-version{text-align:right}.rst-versions.rst-badge.shift-up .rst-current-version .fa-book,.rst-versions.rst-badge.shift-up .rst-current-version .icon-book{float:left}.rst-versions.rst-badge>.rst-current-version{width:auto;height:30px;line-height:30px;padding:0 6px;display:block;text-align:center}@media screen and (max-width:768px){.rst-versions{width:85%;display:none}.rst-versions.shift{display:block}}
--------------------------------------------------------------------------------
/docs/build/html/_static/css/fonts/Roboto-Slab-Bold.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AssemblyAI-Community/MinImagen/502c6962fa55285a871995de716cdb0ed3e3d81e/docs/build/html/_static/css/fonts/Roboto-Slab-Bold.woff
--------------------------------------------------------------------------------
/docs/build/html/_static/css/fonts/Roboto-Slab-Bold.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AssemblyAI-Community/MinImagen/502c6962fa55285a871995de716cdb0ed3e3d81e/docs/build/html/_static/css/fonts/Roboto-Slab-Bold.woff2
--------------------------------------------------------------------------------
/docs/build/html/_static/css/fonts/Roboto-Slab-Regular.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AssemblyAI-Community/MinImagen/502c6962fa55285a871995de716cdb0ed3e3d81e/docs/build/html/_static/css/fonts/Roboto-Slab-Regular.woff
--------------------------------------------------------------------------------
/docs/build/html/_static/css/fonts/Roboto-Slab-Regular.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AssemblyAI-Community/MinImagen/502c6962fa55285a871995de716cdb0ed3e3d81e/docs/build/html/_static/css/fonts/Roboto-Slab-Regular.woff2
--------------------------------------------------------------------------------
/docs/build/html/_static/css/fonts/fontawesome-webfont.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AssemblyAI-Community/MinImagen/502c6962fa55285a871995de716cdb0ed3e3d81e/docs/build/html/_static/css/fonts/fontawesome-webfont.eot
--------------------------------------------------------------------------------
/docs/build/html/_static/css/fonts/fontawesome-webfont.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AssemblyAI-Community/MinImagen/502c6962fa55285a871995de716cdb0ed3e3d81e/docs/build/html/_static/css/fonts/fontawesome-webfont.ttf
--------------------------------------------------------------------------------
/docs/build/html/_static/css/fonts/fontawesome-webfont.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AssemblyAI-Community/MinImagen/502c6962fa55285a871995de716cdb0ed3e3d81e/docs/build/html/_static/css/fonts/fontawesome-webfont.woff
--------------------------------------------------------------------------------
/docs/build/html/_static/css/fonts/fontawesome-webfont.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AssemblyAI-Community/MinImagen/502c6962fa55285a871995de716cdb0ed3e3d81e/docs/build/html/_static/css/fonts/fontawesome-webfont.woff2
--------------------------------------------------------------------------------
/docs/build/html/_static/css/fonts/lato-bold-italic.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AssemblyAI-Community/MinImagen/502c6962fa55285a871995de716cdb0ed3e3d81e/docs/build/html/_static/css/fonts/lato-bold-italic.woff
--------------------------------------------------------------------------------
/docs/build/html/_static/css/fonts/lato-bold-italic.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AssemblyAI-Community/MinImagen/502c6962fa55285a871995de716cdb0ed3e3d81e/docs/build/html/_static/css/fonts/lato-bold-italic.woff2
--------------------------------------------------------------------------------
/docs/build/html/_static/css/fonts/lato-bold.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AssemblyAI-Community/MinImagen/502c6962fa55285a871995de716cdb0ed3e3d81e/docs/build/html/_static/css/fonts/lato-bold.woff
--------------------------------------------------------------------------------
/docs/build/html/_static/css/fonts/lato-bold.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AssemblyAI-Community/MinImagen/502c6962fa55285a871995de716cdb0ed3e3d81e/docs/build/html/_static/css/fonts/lato-bold.woff2
--------------------------------------------------------------------------------
/docs/build/html/_static/css/fonts/lato-normal-italic.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AssemblyAI-Community/MinImagen/502c6962fa55285a871995de716cdb0ed3e3d81e/docs/build/html/_static/css/fonts/lato-normal-italic.woff
--------------------------------------------------------------------------------
/docs/build/html/_static/css/fonts/lato-normal-italic.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AssemblyAI-Community/MinImagen/502c6962fa55285a871995de716cdb0ed3e3d81e/docs/build/html/_static/css/fonts/lato-normal-italic.woff2
--------------------------------------------------------------------------------
/docs/build/html/_static/css/fonts/lato-normal.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AssemblyAI-Community/MinImagen/502c6962fa55285a871995de716cdb0ed3e3d81e/docs/build/html/_static/css/fonts/lato-normal.woff
--------------------------------------------------------------------------------
/docs/build/html/_static/css/fonts/lato-normal.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AssemblyAI-Community/MinImagen/502c6962fa55285a871995de716cdb0ed3e3d81e/docs/build/html/_static/css/fonts/lato-normal.woff2
--------------------------------------------------------------------------------
/docs/build/html/_static/doctools.js:
--------------------------------------------------------------------------------
1 | /*
2 | * doctools.js
3 | * ~~~~~~~~~~~
4 | *
5 | * Base JavaScript utilities for all Sphinx HTML documentation.
6 | *
7 | * :copyright: Copyright 2007-2022 by the Sphinx team, see AUTHORS.
8 | * :license: BSD, see LICENSE for details.
9 | *
10 | */
11 | "use strict";
12 |
13 | const _ready = (callback) => {
14 | if (document.readyState !== "loading") {
15 | callback();
16 | } else {
17 | document.addEventListener("DOMContentLoaded", callback);
18 | }
19 | };
20 |
21 | /**
22 | * highlight a given string on a node by wrapping it in
23 | * span elements with the given class name.
24 | */
25 | const _highlight = (node, addItems, text, className) => {
26 | if (node.nodeType === Node.TEXT_NODE) {
27 | const val = node.nodeValue;
28 | const parent = node.parentNode;
29 | const pos = val.toLowerCase().indexOf(text);
30 | if (
31 | pos >= 0 &&
32 | !parent.classList.contains(className) &&
33 | !parent.classList.contains("nohighlight")
34 | ) {
35 | let span;
36 |
37 | const closestNode = parent.closest("body, svg, foreignObject");
38 | const isInSVG = closestNode && closestNode.matches("svg");
39 | if (isInSVG) {
40 | span = document.createElementNS("http://www.w3.org/2000/svg", "tspan");
41 | } else {
42 | span = document.createElement("span");
43 | span.classList.add(className);
44 | }
45 |
46 | span.appendChild(document.createTextNode(val.substr(pos, text.length)));
47 | parent.insertBefore(
48 | span,
49 | parent.insertBefore(
50 | document.createTextNode(val.substr(pos + text.length)),
51 | node.nextSibling
52 | )
53 | );
54 | node.nodeValue = val.substr(0, pos);
55 |
56 | if (isInSVG) {
57 | const rect = document.createElementNS(
58 | "http://www.w3.org/2000/svg",
59 | "rect"
60 | );
61 | const bbox = parent.getBBox();
62 | rect.x.baseVal.value = bbox.x;
63 | rect.y.baseVal.value = bbox.y;
64 | rect.width.baseVal.value = bbox.width;
65 | rect.height.baseVal.value = bbox.height;
66 | rect.setAttribute("class", className);
67 | addItems.push({ parent: parent, target: rect });
68 | }
69 | }
70 | } else if (node.matches && !node.matches("button, select, textarea")) {
71 | node.childNodes.forEach((el) => _highlight(el, addItems, text, className));
72 | }
73 | };
74 | const _highlightText = (thisNode, text, className) => {
75 | let addItems = [];
76 | _highlight(thisNode, addItems, text, className);
77 | addItems.forEach((obj) =>
78 | obj.parent.insertAdjacentElement("beforebegin", obj.target)
79 | );
80 | };
81 |
82 | /**
83 | * Small JavaScript module for the documentation.
84 | */
85 | const Documentation = {
86 | init: () => {
87 | Documentation.highlightSearchWords();
88 | Documentation.initDomainIndexTable();
89 | Documentation.initOnKeyListeners();
90 | },
91 |
92 | /**
93 | * i18n support
94 | */
95 | TRANSLATIONS: {},
96 | PLURAL_EXPR: (n) => (n === 1 ? 0 : 1),
97 | LOCALE: "unknown",
98 |
99 | // gettext and ngettext don't access this so that the functions
100 | // can safely bound to a different name (_ = Documentation.gettext)
101 | gettext: (string) => {
102 | const translated = Documentation.TRANSLATIONS[string];
103 | switch (typeof translated) {
104 | case "undefined":
105 | return string; // no translation
106 | case "string":
107 | return translated; // translation exists
108 | default:
109 | return translated[0]; // (singular, plural) translation tuple exists
110 | }
111 | },
112 |
113 | ngettext: (singular, plural, n) => {
114 | const translated = Documentation.TRANSLATIONS[singular];
115 | if (typeof translated !== "undefined")
116 | return translated[Documentation.PLURAL_EXPR(n)];
117 | return n === 1 ? singular : plural;
118 | },
119 |
120 | addTranslations: (catalog) => {
121 | Object.assign(Documentation.TRANSLATIONS, catalog.messages);
122 | Documentation.PLURAL_EXPR = new Function(
123 | "n",
124 | `return (${catalog.plural_expr})`
125 | );
126 | Documentation.LOCALE = catalog.locale;
127 | },
128 |
129 | /**
130 | * highlight the search words provided in the url in the text
131 | */
132 | highlightSearchWords: () => {
133 | const highlight =
134 | new URLSearchParams(window.location.search).get("highlight") || "";
135 | const terms = highlight.toLowerCase().split(/\s+/).filter(x => x);
136 | if (terms.length === 0) return; // nothing to do
137 |
138 | // There should never be more than one element matching "div.body"
139 | const divBody = document.querySelectorAll("div.body");
140 | const body = divBody.length ? divBody[0] : document.querySelector("body");
141 | window.setTimeout(() => {
142 | terms.forEach((term) => _highlightText(body, term, "highlighted"));
143 | }, 10);
144 |
145 | const searchBox = document.getElementById("searchbox");
146 | if (searchBox === null) return;
147 | searchBox.appendChild(
148 | document
149 | .createRange()
150 | .createContextualFragment(
151 | '