├── image-1.jpeg ├── image-2.jpeg ├── image-3.jpeg ├── environment.yml ├── README.md ├── .gitignore ├── LICENSE └── inpaint.ipynb /image-1.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/machinelearnear/openai-glide-text2im/main/image-1.jpeg -------------------------------------------------------------------------------- /image-2.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/machinelearnear/openai-glide-text2im/main/image-2.jpeg -------------------------------------------------------------------------------- /image-3.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/machinelearnear/openai-glide-text2im/main/image-3.jpeg -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | # To use: 2 | # $ conda env create -f environment.yml 3 | # $ conda activate demo 4 | name: machinelearnear-glide-text2im 5 | dependencies: 6 | - python=3.7 7 | - pip 8 | - nb_conda_kernels 9 | - ipykernel 10 | - ipywidgets 11 | - gh 12 | - pip: 13 | - git+https://github.com/openai/glide-text2im 14 | - numpy 15 | - PyYAML 16 | # - -r file:requirements.txt -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # GLIDE 2 | 3 | This is sample repo reproducing the official codebase for running the small, filtered-data GLIDE model from [GLIDE: Towards Photorealistic Image Generation and Editing with Text-Guided Diffusion Models](https://arxiv.org/abs/2112.10741). 4 | 5 | For more information, please see the original repo here: https://github.com/openai/glide-text2im/. 6 | 7 | ![Figure-1](image-1.jpeg) 8 | ![Figure-2](image-2.jpeg) 9 | ![Figure-3](image-3.jpeg) 10 | 11 | ## Watch YouTube Explainer Video 12 | [![GLIDE: Generá y editá imágenes en segundos en base a lo que escribis (+ Repo)](https://img.youtube.com/vi/WG20CnktPbk/0.jpg)](https://www.youtube.com/watch?v=WG20CnktPbk) 13 | 14 | ## How to run 15 | 16 | * The [text2im](notebooks/text2im.ipynb) [![Open In Studio Lab](https://studiolab.sagemaker.aws/studiolab.svg)](https://studiolab.sagemaker.aws/import/github/machinelearnear/openai-glide-text2im/blob/main/text2im.ipynb) notebook shows how to use GLIDE (filtered) with classifier-free guidance to produce images conditioned on text prompts. 17 | 18 | * The [inpaint](notebooks/inpaint.ipynb) [![Open In Studio Lab](https://studiolab.sagemaker.aws/studiolab.svg)](https://studiolab.sagemaker.aws/import/github/machinelearnear/openai-glide-text2im/blob/main/inpaint.ipynb) notebook shows how to use GLIDE (filtered) to fill in a masked region of an image, conditioned on a text prompt. 19 | 20 | * The [clip_guided](notebooks/clip_guided.ipynb) [![Open In Studio Lab](https://studiolab.sagemaker.aws/studiolab.svg)](https://studiolab.sagemaker.aws/import/github/machinelearnear/openai-glide-text2im/blob/main/clip-guided.ipynb) notebook shows how to use GLIDE (filtered) + a filtered noise-aware CLIP model to produce images conditioned on text prompts. 21 | 22 | ## References 23 | ``` 24 | @misc{nichol2021glide, 25 | title={GLIDE: Towards Photorealistic Image Generation and Editing with Text-Guided Diffusion Models}, 26 | author={Alex Nichol and Prafulla Dhariwal and Aditya Ramesh and Pranav Shyam and Pamela Mishkin and Bob McGrew and Ilya Sutskever and Mark Chen}, 27 | year={2021}, 28 | eprint={2112.10741}, 29 | archivePrefix={arXiv}, 30 | primaryClass={cs.CV} 31 | } 32 | ``` 33 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /inpaint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "[![Open In Studio Lab](https://studiolab.sagemaker.aws/studiolab.svg)](https://studiolab.sagemaker.aws/import/github/machinelearnear/openai-glide-text2im/blob/main/inpaint.ipynb)" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "# GLIDE: Towards Photorealistic Image Generation and Editing with Text-Guided Diffusion Models\n", 15 | "- https://arxiv.org/abs/2112.10741" 16 | ] 17 | }, 18 | { 19 | "cell_type": "markdown", 20 | "metadata": {}, 21 | "source": [ 22 | "## Install dependencies & setup notebook" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": null, 28 | "metadata": {}, 29 | "outputs": [], 30 | "source": [ 31 | "# Run this line in Colab to install the package if it is\n", 32 | "# not already installed.\n", 33 | "# !pip install git+https://github.com/openai/glide-text2im" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": 2, 39 | "metadata": {}, 40 | "outputs": [], 41 | "source": [ 42 | "from typing import Tuple\n", 43 | "\n", 44 | "from IPython.display import display\n", 45 | "from PIL import Image\n", 46 | "import numpy as np\n", 47 | "import torch as th\n", 48 | "import torch.nn.functional as F\n", 49 | "\n", 50 | "from glide_text2im.download import load_checkpoint\n", 51 | "from glide_text2im.model_creation import (\n", 52 | " create_model_and_diffusion,\n", 53 | " model_and_diffusion_defaults,\n", 54 | " model_and_diffusion_defaults_upsampler\n", 55 | ")" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": 3, 61 | "metadata": {}, 62 | "outputs": [], 63 | "source": [ 64 | "# This notebook supports both CPU and GPU.\n", 65 | "# On CPU, generating one sample may take on the order of 20 minutes.\n", 66 | "# On a GPU, it should be under a minute.\n", 67 | "\n", 68 | "has_cuda = th.cuda.is_available()\n", 69 | "device = th.device('cpu' if not has_cuda else 'cuda')" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": 4, 75 | "metadata": {}, 76 | "outputs": [ 77 | { 78 | "data": { 79 | "application/vnd.jupyter.widget-view+json": { 80 | "model_id": "83f22a4d88e7456cb800b2a62c37571b", 81 | "version_major": 2, 82 | "version_minor": 0 83 | }, 84 | "text/plain": [ 85 | " 0%| | 0.00/1.54G [00:00 Tuple[th.Tensor, th.Tensor]:\n", 169 | " pil_img = Image.open(path).convert('RGB')\n", 170 | " pil_img = pil_img.resize((size, size), resample=Image.BICUBIC)\n", 171 | " img = np.array(pil_img)\n", 172 | " return th.from_numpy(img)[None].permute(0, 3, 1, 2).float() / 127.5 - 1" 173 | ] 174 | }, 175 | { 176 | "cell_type": "markdown", 177 | "metadata": {}, 178 | "source": [ 179 | "## Sampling parameters" 180 | ] 181 | }, 182 | { 183 | "cell_type": "code", 184 | "execution_count": 7, 185 | "metadata": {}, 186 | "outputs": [ 187 | { 188 | "data": { 189 | "image/png": "\n", 190 | "text/plain": [ 191 | "" 192 | ] 193 | }, 194 | "metadata": {}, 195 | "output_type": "display_data" 196 | } 197 | ], 198 | "source": [ 199 | "# Sampling parameters\n", 200 | "prompt = \"a corgi in a field\"\n", 201 | "batch_size = 1\n", 202 | "guidance_scale = 5.0\n", 203 | "\n", 204 | "# Tune this parameter to control the sharpness of 256x256 images.\n", 205 | "# A value of 1.0 is sharper, but sometimes results in grainy artifacts.\n", 206 | "upsample_temp = 0.997\n", 207 | "\n", 208 | "# Source image we are inpainting\n", 209 | "source_image_256 = read_image('grass.png', size=256)\n", 210 | "source_image_64 = read_image('grass.png', size=64)\n", 211 | "\n", 212 | "# The mask should always be a boolean 64x64 mask, and then we\n", 213 | "# can upsample it for the second stage.\n", 214 | "source_mask_64 = th.ones_like(source_image_64)[:, :1]\n", 215 | "source_mask_64[:, :, 20:] = 0\n", 216 | "source_mask_256 = F.interpolate(source_mask_64, (256, 256), mode='nearest')\n", 217 | "\n", 218 | "# Visualize the image we are inpainting\n", 219 | "show_images(source_image_256 * source_mask_256)" 220 | ] 221 | }, 222 | { 223 | "cell_type": "markdown", 224 | "metadata": {}, 225 | "source": [ 226 | "## Sample from the base model" 227 | ] 228 | }, 229 | { 230 | "cell_type": "code", 231 | "execution_count": 8, 232 | "metadata": {}, 233 | "outputs": [ 234 | { 235 | "data": { 236 | "application/vnd.jupyter.widget-view+json": { 237 | "model_id": "a9f058a7883048d6982f52a5c95ee3dc", 238 | "version_major": 2, 239 | "version_minor": 0 240 | }, 241 | "text/plain": [ 242 | " 0%| | 0/100 [00:00" 253 | ] 254 | }, 255 | "metadata": {}, 256 | "output_type": "display_data" 257 | } 258 | ], 259 | "source": [ 260 | "# Create the text tokens to feed to the model.\n", 261 | "tokens = model.tokenizer.encode(prompt)\n", 262 | "tokens, mask = model.tokenizer.padded_tokens_and_mask(\n", 263 | " tokens, options['text_ctx']\n", 264 | ")\n", 265 | "\n", 266 | "# Create the classifier-free guidance tokens (empty)\n", 267 | "full_batch_size = batch_size * 2\n", 268 | "uncond_tokens, uncond_mask = model.tokenizer.padded_tokens_and_mask(\n", 269 | " [], options['text_ctx']\n", 270 | ")\n", 271 | "\n", 272 | "# Pack the tokens together into model kwargs.\n", 273 | "model_kwargs = dict(\n", 274 | " tokens=th.tensor(\n", 275 | " [tokens] * batch_size + [uncond_tokens] * batch_size, device=device\n", 276 | " ),\n", 277 | " mask=th.tensor(\n", 278 | " [mask] * batch_size + [uncond_mask] * batch_size,\n", 279 | " dtype=th.bool,\n", 280 | " device=device,\n", 281 | " ),\n", 282 | "\n", 283 | " # Masked inpainting image\n", 284 | " inpaint_image=(source_image_64 * source_mask_64).repeat(full_batch_size, 1, 1, 1).to(device),\n", 285 | " inpaint_mask=source_mask_64.repeat(full_batch_size, 1, 1, 1).to(device),\n", 286 | ")\n", 287 | "\n", 288 | "# Create an classifier-free guidance sampling function\n", 289 | "def model_fn(x_t, ts, **kwargs):\n", 290 | " half = x_t[: len(x_t) // 2]\n", 291 | " combined = th.cat([half, half], dim=0)\n", 292 | " model_out = model(combined, ts, **kwargs)\n", 293 | " eps, rest = model_out[:, :3], model_out[:, 3:]\n", 294 | " cond_eps, uncond_eps = th.split(eps, len(eps) // 2, dim=0)\n", 295 | " half_eps = uncond_eps + guidance_scale * (cond_eps - uncond_eps)\n", 296 | " eps = th.cat([half_eps, half_eps], dim=0)\n", 297 | " return th.cat([eps, rest], dim=1)\n", 298 | "\n", 299 | "def denoised_fn(x_start):\n", 300 | " # Force the model to have the exact right x_start predictions\n", 301 | " # for the part of the image which is known.\n", 302 | " return (\n", 303 | " x_start * (1 - model_kwargs['inpaint_mask'])\n", 304 | " + model_kwargs['inpaint_image'] * model_kwargs['inpaint_mask']\n", 305 | " )\n", 306 | "\n", 307 | "# Sample from the base model.\n", 308 | "model.del_cache()\n", 309 | "samples = diffusion.p_sample_loop(\n", 310 | " model_fn,\n", 311 | " (full_batch_size, 3, options[\"image_size\"], options[\"image_size\"]),\n", 312 | " device=device,\n", 313 | " clip_denoised=True,\n", 314 | " progress=True,\n", 315 | " model_kwargs=model_kwargs,\n", 316 | " cond_fn=None,\n", 317 | " denoised_fn=denoised_fn,\n", 318 | ")[:batch_size]\n", 319 | "model.del_cache()\n", 320 | "\n", 321 | "# Show the output\n", 322 | "show_images(samples)" 323 | ] 324 | }, 325 | { 326 | "cell_type": "markdown", 327 | "metadata": {}, 328 | "source": [ 329 | "## Upsample the 64x64 samples" 330 | ] 331 | }, 332 | { 333 | "cell_type": "code", 334 | "execution_count": 9, 335 | "metadata": {}, 336 | "outputs": [ 337 | { 338 | "data": { 339 | "application/vnd.jupyter.widget-view+json": { 340 | "model_id": "2b276c29d8054f41a2e2a6913c50cb35", 341 | "version_major": 2, 342 | "version_minor": 0 343 | }, 344 | "text/plain": [ 345 | " 0%| | 0/27 [00:00" 356 | ] 357 | }, 358 | "metadata": {}, 359 | "output_type": "display_data" 360 | } 361 | ], 362 | "source": [ 363 | "tokens = model_up.tokenizer.encode(prompt)\n", 364 | "tokens, mask = model_up.tokenizer.padded_tokens_and_mask(\n", 365 | " tokens, options_up['text_ctx']\n", 366 | ")\n", 367 | "\n", 368 | "# Create the model conditioning dict.\n", 369 | "model_kwargs = dict(\n", 370 | " # Low-res image to upsample.\n", 371 | " low_res=((samples+1)*127.5).round()/127.5 - 1,\n", 372 | "\n", 373 | " # Text tokens\n", 374 | " tokens=th.tensor(\n", 375 | " [tokens] * batch_size, device=device\n", 376 | " ),\n", 377 | " mask=th.tensor(\n", 378 | " [mask] * batch_size,\n", 379 | " dtype=th.bool,\n", 380 | " device=device,\n", 381 | " ),\n", 382 | "\n", 383 | " # Masked inpainting image.\n", 384 | " inpaint_image=(source_image_256 * source_mask_256).repeat(batch_size, 1, 1, 1).to(device),\n", 385 | " inpaint_mask=source_mask_256.repeat(batch_size, 1, 1, 1).to(device),\n", 386 | ")\n", 387 | "\n", 388 | "def denoised_fn(x_start):\n", 389 | " # Force the model to have the exact right x_start predictions\n", 390 | " # for the part of the image which is known.\n", 391 | " return (\n", 392 | " x_start * (1 - model_kwargs['inpaint_mask'])\n", 393 | " + model_kwargs['inpaint_image'] * model_kwargs['inpaint_mask']\n", 394 | " )\n", 395 | "\n", 396 | "# Sample from the base model.\n", 397 | "model_up.del_cache()\n", 398 | "up_shape = (batch_size, 3, options_up[\"image_size\"], options_up[\"image_size\"])\n", 399 | "up_samples = diffusion_up.p_sample_loop(\n", 400 | " model_up,\n", 401 | " up_shape,\n", 402 | " noise=th.randn(up_shape, device=device) * upsample_temp,\n", 403 | " device=device,\n", 404 | " clip_denoised=True,\n", 405 | " progress=True,\n", 406 | " model_kwargs=model_kwargs,\n", 407 | " cond_fn=None,\n", 408 | " denoised_fn=denoised_fn,\n", 409 | ")[:batch_size]\n", 410 | "model_up.del_cache()\n", 411 | "\n", 412 | "# Show the output\n", 413 | "show_images(up_samples)" 414 | ] 415 | }, 416 | { 417 | "cell_type": "code", 418 | "execution_count": null, 419 | "metadata": {}, 420 | "outputs": [], 421 | "source": [ 422 | "from glide_text2im.clip.model_creation import create_clip_model" 423 | ] 424 | }, 425 | { 426 | "cell_type": "code", 427 | "execution_count": null, 428 | "metadata": {}, 429 | "outputs": [], 430 | "source": [ 431 | "## Create CLIP model\n", 432 | "clip_model = create_clip_model(device=device)\n", 433 | "clip_model.image_encoder.load_state_dict(load_checkpoint('clip/image-enc', device))\n", 434 | "clip_model.text_encoder.load_state_dict(load_checkpoint('clip/text-enc', device))" 435 | ] 436 | }, 437 | { 438 | "cell_type": "code", 439 | "execution_count": null, 440 | "metadata": {}, 441 | "outputs": [], 442 | "source": [ 443 | "##############################\n", 444 | "# Sample from the base model #\n", 445 | "##############################\n", 446 | "\n", 447 | "# Create the text tokens to feed to the model.\n", 448 | "tokens = model.tokenizer.encode(prompt)\n", 449 | "tokens, mask = model.tokenizer.padded_tokens_and_mask(\n", 450 | " tokens, options['text_ctx']\n", 451 | ")\n", 452 | "\n", 453 | "# Pack the tokens together into model kwargs.\n", 454 | "model_kwargs = dict(\n", 455 | " tokens=th.tensor([tokens] * batch_size, device=device),\n", 456 | " mask=th.tensor([mask] * batch_size, dtype=th.bool, device=device),\n", 457 | ")\n", 458 | "\n", 459 | "# Setup guidance function for CLIP model.\n", 460 | "cond_fn = clip_model.cond_fn([prompt] * batch_size, guidance_scale)\n", 461 | "\n", 462 | "# Sample from the base model.\n", 463 | "model.del_cache()\n", 464 | "samples = diffusion.p_sample_loop(\n", 465 | " model,\n", 466 | " (batch_size, 3, options[\"image_size\"], options[\"image_size\"]),\n", 467 | " device=device,\n", 468 | " clip_denoised=True,\n", 469 | " progress=True,\n", 470 | " model_kwargs=model_kwargs,\n", 471 | " cond_fn=cond_fn,\n", 472 | ")\n", 473 | "model.del_cache()\n", 474 | "\n", 475 | "# Show the output\n", 476 | "show_images(samples)\n", 477 | "\n", 478 | "##############################\n", 479 | "# Upsample the 64x64 samples #\n", 480 | "##############################\n", 481 | "\n", 482 | "tokens = model_up.tokenizer.encode(prompt)\n", 483 | "tokens, mask = model_up.tokenizer.padded_tokens_and_mask(\n", 484 | " tokens, options_up['text_ctx']\n", 485 | ")\n", 486 | "\n", 487 | "# Create the model conditioning dict.\n", 488 | "model_kwargs = dict(\n", 489 | " # Low-res image to upsample.\n", 490 | " low_res=((samples+1)*127.5).round()/127.5 - 1,\n", 491 | "\n", 492 | " # Text tokens\n", 493 | " tokens=th.tensor(\n", 494 | " [tokens] * batch_size, device=device\n", 495 | " ),\n", 496 | " mask=th.tensor(\n", 497 | " [mask] * batch_size,\n", 498 | " dtype=th.bool,\n", 499 | " device=device,\n", 500 | " ),\n", 501 | ")\n", 502 | "\n", 503 | "# Sample from the base model.\n", 504 | "model_up.del_cache()\n", 505 | "up_shape = (batch_size, 3, options_up[\"image_size\"], options_up[\"image_size\"])\n", 506 | "up_samples = diffusion_up.ddim_sample_loop(\n", 507 | " model_up,\n", 508 | " up_shape,\n", 509 | " noise=th.randn(up_shape, device=device) * upsample_temp,\n", 510 | " device=device,\n", 511 | " clip_denoised=True,\n", 512 | " progress=True,\n", 513 | " model_kwargs=model_kwargs,\n", 514 | " cond_fn=None,\n", 515 | ")[:batch_size]\n", 516 | "model_up.del_cache()\n", 517 | "\n", 518 | "# Show the output\n", 519 | "show_images(up_samples)" 520 | ] 521 | }, 522 | { 523 | "cell_type": "code", 524 | "execution_count": null, 525 | "metadata": {}, 526 | "outputs": [], 527 | "source": [ 528 | "##############################\n", 529 | "# Sample from the base model #\n", 530 | "##############################\n", 531 | "\n", 532 | "# Create the text tokens to feed to the model.\n", 533 | "tokens = model.tokenizer.encode(prompt)\n", 534 | "tokens, mask = model.tokenizer.padded_tokens_and_mask(\n", 535 | " tokens, options['text_ctx']\n", 536 | ")\n", 537 | "\n", 538 | "# Create the classifier-free guidance tokens (empty)\n", 539 | "full_batch_size = batch_size * 2\n", 540 | "uncond_tokens, uncond_mask = model.tokenizer.padded_tokens_and_mask(\n", 541 | " [], options['text_ctx']\n", 542 | ")\n", 543 | "\n", 544 | "# Pack the tokens together into model kwargs.\n", 545 | "model_kwargs = dict(\n", 546 | " tokens=th.tensor(\n", 547 | " [tokens] * batch_size + [uncond_tokens] * batch_size, device=device\n", 548 | " ),\n", 549 | " mask=th.tensor(\n", 550 | " [mask] * batch_size + [uncond_mask] * batch_size,\n", 551 | " dtype=th.bool,\n", 552 | " device=device,\n", 553 | " ),\n", 554 | "\n", 555 | " # Masked inpainting image\n", 556 | " inpaint_image=(source_image_64 * source_mask_64).repeat(full_batch_size, 1, 1, 1).to(device),\n", 557 | " inpaint_mask=source_mask_64.repeat(full_batch_size, 1, 1, 1).to(device),\n", 558 | ")\n", 559 | "\n", 560 | "# Create an classifier-free guidance sampling function\n", 561 | "def model_fn(x_t, ts, **kwargs):\n", 562 | " half = x_t[: len(x_t) // 2]\n", 563 | " combined = th.cat([half, half], dim=0)\n", 564 | " model_out = model(combined, ts, **kwargs)\n", 565 | " eps, rest = model_out[:, :3], model_out[:, 3:]\n", 566 | " cond_eps, uncond_eps = th.split(eps, len(eps) // 2, dim=0)\n", 567 | " half_eps = uncond_eps + guidance_scale * (cond_eps - uncond_eps)\n", 568 | " eps = th.cat([half_eps, half_eps], dim=0)\n", 569 | " return th.cat([eps, rest], dim=1)\n", 570 | "\n", 571 | "def denoised_fn(x_start):\n", 572 | " # Force the model to have the exact right x_start predictions\n", 573 | " # for the part of the image which is known.\n", 574 | " return (\n", 575 | " x_start * (1 - model_kwargs['inpaint_mask'])\n", 576 | " + model_kwargs['inpaint_image'] * model_kwargs['inpaint_mask']\n", 577 | " )\n", 578 | "\n", 579 | "# Sample from the base model.\n", 580 | "model.del_cache()\n", 581 | "samples = diffusion.p_sample_loop(\n", 582 | " model_fn,\n", 583 | " (full_batch_size, 3, options[\"image_size\"], options[\"image_size\"]),\n", 584 | " device=device,\n", 585 | " clip_denoised=True,\n", 586 | " progress=True,\n", 587 | " model_kwargs=model_kwargs,\n", 588 | " cond_fn=None,\n", 589 | " denoised_fn=denoised_fn,\n", 590 | ")[:batch_size]\n", 591 | "model.del_cache()\n", 592 | "\n", 593 | "# Show the output\n", 594 | "show_images(samples)" 595 | ] 596 | } 597 | ], 598 | "metadata": { 599 | "accelerator": "GPU", 600 | "interpreter": { 601 | "hash": "e7d6e62d90e7e85f9a0faa7f0b1d576302d7ae6108e9fe361594f8e1c8b05781" 602 | }, 603 | "kernelspec": { 604 | "display_name": "default:Python", 605 | "language": "python", 606 | "name": "conda-env-default-py" 607 | }, 608 | "language_info": { 609 | "codemirror_mode": { 610 | "name": "ipython", 611 | "version": 3 612 | }, 613 | "file_extension": ".py", 614 | "mimetype": "text/x-python", 615 | "name": "python", 616 | "nbconvert_exporter": "python", 617 | "pygments_lexer": "ipython3", 618 | "version": "3.9.7" 619 | } 620 | }, 621 | "nbformat": 4, 622 | "nbformat_minor": 4 623 | } 624 | --------------------------------------------------------------------------------