├── .github └── FUNDING.yml ├── DeepFloyd-IF-I-M-v1.0-8bit.ipynb ├── DeepFloyd-IF-I-M-v1.0-Image-Variation.ipynb ├── DeepFloyd-IF-I-M-v1.0-Image.ipynb ├── DeepFloyd-IF-I-M-v1.0-Inpainting.ipynb ├── DeepFloyd-IF-I-M-v1.0-T5Embedder.ipynb ├── DeepFloyd-IF-I-M-v1.0-core.ipynb ├── DeepFloyd-IF-I-M-v1.0-final.ipynb ├── DeepFloyd-IF-I-M-v1.0.ipynb ├── LICENSE └── README.md /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2] 4 | patreon: camenduru 5 | open_collective: # Replace with a single Open Collective username 6 | ko_fi: camenduru 7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel 8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry 9 | liberapay: # Replace with a single Liberapay username 10 | issuehunt: # Replace with a single IssueHunt username 11 | otechie: # Replace with a single Otechie username 12 | lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry 13 | custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2'] 14 | -------------------------------------------------------------------------------- /DeepFloyd-IF-I-M-v1.0-8bit.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "id": "view-in-github" 7 | }, 8 | "source": [ 9 | "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/camenduru/DeepFloyd-IF-colab/blob/main/DeepFloyd-IF-I-M-v1.0.ipynb)" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": null, 15 | "metadata": { 16 | "id": "DA4ASNpIvTzd" 17 | }, 18 | "outputs": [], 19 | "source": [ 20 | "# https://huggingface.co/spaces/DeepFloyd/IF/blob/main/app.py modified\n", 21 | "\n", 22 | "!pip install -q huggingface_hub\n", 23 | "from huggingface_hub import login\n", 24 | "login()" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": null, 30 | "metadata": { 31 | "id": "69qUFyBkwKs0" 32 | }, 33 | "outputs": [], 34 | "source": [ 35 | "!git clone -b 8bit https://github.com/camenduru/DeepFloyd-IF-hf\n", 36 | "%cd /content/DeepFloyd-IF-hf\n", 37 | "!pip install -r requirements.txt\n", 38 | "!python app.py" 39 | ] 40 | } 41 | ], 42 | "metadata": { 43 | "accelerator": "GPU", 44 | "colab": { 45 | "provenance": [] 46 | }, 47 | "gpuClass": "standard", 48 | "kernelspec": { 49 | "display_name": "Python 3", 50 | "name": "python3" 51 | }, 52 | "language_info": { 53 | "name": "python" 54 | } 55 | }, 56 | "nbformat": 4, 57 | "nbformat_minor": 0 58 | } 59 | -------------------------------------------------------------------------------- /DeepFloyd-IF-I-M-v1.0-Image-Variation.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "id": "view-in-github" 7 | }, 8 | "source": [ 9 | "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/camenduru/DeepFloyd-IF-colab/blob/main/DeepFloyd-IF-I-M-v1.0-Image-Variation.ipynb)" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": null, 15 | "metadata": { 16 | "id": "DA4ASNpIvTzd" 17 | }, 18 | "outputs": [], 19 | "source": [ 20 | "# https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/deepfloyd_if_free_tier_google_colab.ipynb modified\n", 21 | "\n", 22 | "!pip install -q -U diffusers~=0.16 transformers~=4.28 safetensors~=0.3 sentencepiece~=0.1 accelerate~=0.18 bitsandbytes~=0.38 torch~=2.0 huggingface_hub" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": null, 28 | "metadata": { 29 | "id": "69qUFyBkwKs0" 30 | }, 31 | "outputs": [], 32 | "source": [ 33 | "from huggingface_hub import login\n", 34 | "login()" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": null, 40 | "metadata": {}, 41 | "outputs": [], 42 | "source": [ 43 | "import gc\n", 44 | "import torch\n", 45 | "\n", 46 | "def flush():\n", 47 | " gc.collect()\n", 48 | " torch.cuda.empty_cache()\n", 49 | "\n", 50 | "import requests\n", 51 | "\n", 52 | "url = \"https://i.kym-cdn.com/entries/icons/original/000/026/561/car.jpg\"\n", 53 | "response = requests.get(url)\n", 54 | "\n", 55 | "from PIL import Image\n", 56 | "from io import BytesIO\n", 57 | "\n", 58 | "original_image = Image.open(BytesIO(response.content)).convert(\"RGB\")\n", 59 | "original_image = original_image.resize((768, 512))\n", 60 | "\n", 61 | "from transformers import T5EncoderModel\n", 62 | "\n", 63 | "text_encoder = T5EncoderModel.from_pretrained(\n", 64 | " \"DeepFloyd/IF-I-XL-v1.0\",\n", 65 | " subfolder=\"text_encoder\", \n", 66 | " device_map=\"auto\", \n", 67 | " load_in_8bit=True, \n", 68 | " variant=\"8bit\"\n", 69 | ")\n", 70 | "\n", 71 | "from diffusers import IFImg2ImgPipeline\n", 72 | "\n", 73 | "pipe = IFImg2ImgPipeline.from_pretrained(\n", 74 | " \"DeepFloyd/IF-I-XL-v1.0\", \n", 75 | " text_encoder=text_encoder, \n", 76 | " unet=None, \n", 77 | " device_map=\"auto\"\n", 78 | ")\n", 79 | "\n", 80 | "prompt = \"anime style\"\n", 81 | "\n", 82 | "prompt_embeds, negative_embeds = pipe.encode_prompt(prompt)\n", 83 | "\n", 84 | "del text_encoder\n", 85 | "del pipe\n", 86 | "flush()\n", 87 | "\n", 88 | "pipe = IFImg2ImgPipeline.from_pretrained(\n", 89 | " \"DeepFloyd/IF-I-XL-v1.0\", \n", 90 | " text_encoder=None, \n", 91 | " variant=\"fp16\", \n", 92 | " torch_dtype=torch.float16, \n", 93 | " device_map=\"auto\"\n", 94 | ")\n", 95 | "\n", 96 | "generator = torch.Generator().manual_seed(0)\n", 97 | "image = pipe(\n", 98 | " image=original_image,\n", 99 | " prompt_embeds=prompt_embeds,\n", 100 | " negative_prompt_embeds=negative_embeds, \n", 101 | " output_type=\"pt\",\n", 102 | " generator=generator,\n", 103 | ").images\n", 104 | "\n", 105 | "pil_image = pt_to_pil(image)\n", 106 | "pil_image[0]" 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": null, 112 | "metadata": {}, 113 | "outputs": [], 114 | "source": [ 115 | "del pipe\n", 116 | "flush()\n", 117 | "\n", 118 | "from diffusers import IFImg2ImgSuperResolutionPipeline\n", 119 | "\n", 120 | "pipe = IFImg2ImgSuperResolutionPipeline.from_pretrained(\n", 121 | " \"DeepFloyd/IF-II-L-v1.0\", \n", 122 | " text_encoder=None, \n", 123 | " variant=\"fp16\", \n", 124 | " torch_dtype=torch.float16, \n", 125 | " device_map=\"auto\"\n", 126 | ")\n", 127 | "\n", 128 | "image = pipe(\n", 129 | " image=image,\n", 130 | " original_image=original_image,\n", 131 | " prompt_embeds=prompt_embeds,\n", 132 | " negative_prompt_embeds=negative_embeds, \n", 133 | " generator=generator,\n", 134 | ").images[0]\n", 135 | "image" 136 | ] 137 | } 138 | ], 139 | "metadata": { 140 | "accelerator": "GPU", 141 | "colab": { 142 | "provenance": [] 143 | }, 144 | "gpuClass": "standard", 145 | "kernelspec": { 146 | "display_name": "Python 3", 147 | "name": "python3" 148 | }, 149 | "language_info": { 150 | "name": "python" 151 | } 152 | }, 153 | "nbformat": 4, 154 | "nbformat_minor": 0 155 | } 156 | -------------------------------------------------------------------------------- /DeepFloyd-IF-I-M-v1.0-Image.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "id": "view-in-github" 7 | }, 8 | "source": [ 9 | "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/camenduru/DeepFloyd-IF-colab/blob/main/DeepFloyd-IF-I-M-v1.0-Image.ipynb)" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": null, 15 | "metadata": { 16 | "id": "DA4ASNpIvTzd" 17 | }, 18 | "outputs": [], 19 | "source": [ 20 | "# https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/deepfloyd_if_free_tier_google_colab.ipynb modified\n", 21 | "\n", 22 | "!pip install -q -U diffusers~=0.16 transformers~=4.28 safetensors~=0.3 sentencepiece~=0.1 accelerate~=0.18 bitsandbytes~=0.38 torch~=2.0 huggingface_hub" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": null, 28 | "metadata": { 29 | "id": "69qUFyBkwKs0" 30 | }, 31 | "outputs": [], 32 | "source": [ 33 | "from huggingface_hub import login\n", 34 | "login()" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": null, 40 | "metadata": {}, 41 | "outputs": [], 42 | "source": [ 43 | "from transformers import T5EncoderModel\n", 44 | "\n", 45 | "text_encoder = T5EncoderModel.from_pretrained(\n", 46 | " \"DeepFloyd/IF-I-XL-v1.0\",\n", 47 | " subfolder=\"text_encoder\", \n", 48 | " device_map=\"auto\", \n", 49 | " load_in_8bit=True, \n", 50 | " variant=\"8bit\"\n", 51 | ")\n", 52 | "\n", 53 | "from diffusers import DiffusionPipeline\n", 54 | "\n", 55 | "pipe = DiffusionPipeline.from_pretrained(\n", 56 | " \"DeepFloyd/IF-I-XL-v1.0\", \n", 57 | " text_encoder=text_encoder, # pass the previously instantiated 8bit text encoder\n", 58 | " unet=None, \n", 59 | " device_map=\"auto\",\n", 60 | " safety_checker=None\n", 61 | ")\n", 62 | "\n", 63 | "prompt = 'a photograph of an astronaut riding a horse holding a sign that says \"Pixel\\'s in space\"'\n", 64 | "\n", 65 | "prompt_embeds, negative_embeds = pipe.encode_prompt(prompt)\n", 66 | "\n", 67 | "del text_encoder\n", 68 | "del pipe\n", 69 | "\n", 70 | "import gc\n", 71 | "import torch\n", 72 | "\n", 73 | "def flush():\n", 74 | " gc.collect()\n", 75 | " torch.cuda.empty_cache()" 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": null, 81 | "metadata": {}, 82 | "outputs": [], 83 | "source": [ 84 | "flush()\n", 85 | "\n", 86 | "pipe = DiffusionPipeline.from_pretrained(\n", 87 | " \"DeepFloyd/IF-I-XL-v1.0\", \n", 88 | " text_encoder=None, \n", 89 | " variant=\"fp16\", \n", 90 | " torch_dtype=torch.float16, \n", 91 | " device_map=\"auto\",\n", 92 | " safety_checker=None\n", 93 | ")\n", 94 | "\n", 95 | "generator = torch.Generator().manual_seed(1)\n", 96 | "\n", 97 | "image = pipe(\n", 98 | " prompt_embeds=prompt_embeds,\n", 99 | " negative_prompt_embeds=negative_embeds, \n", 100 | " output_type=\"pt\",\n", 101 | " generator=generator,\n", 102 | ").images\n", 103 | "\n", 104 | "from diffusers.utils import pt_to_pil\n", 105 | "\n", 106 | "pil_image = pt_to_pil(image)\n", 107 | "\n", 108 | "pil_image[0]" 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": null, 114 | "metadata": {}, 115 | "outputs": [], 116 | "source": [ 117 | "del pipe\n", 118 | "flush()\n", 119 | "\n", 120 | "pipe = DiffusionPipeline.from_pretrained(\n", 121 | " \"DeepFloyd/IF-II-L-v1.0\", \n", 122 | " text_encoder=None, # no use of text encoder => memory savings!\n", 123 | " variant=\"fp16\", \n", 124 | " torch_dtype=torch.float16, \n", 125 | " device_map=\"auto\",\n", 126 | " safety_checker=None\n", 127 | ")\n", 128 | "\n", 129 | "image = pipe(\n", 130 | " image=image, \n", 131 | " prompt_embeds=prompt_embeds, \n", 132 | " negative_prompt_embeds=negative_embeds, \n", 133 | " output_type=\"pt\",\n", 134 | " generator=generator,\n", 135 | ").images\n", 136 | "\n", 137 | "pil_image = pt_to_pil(image)\n", 138 | "\n", 139 | "pil_image[0]" 140 | ] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "execution_count": null, 145 | "metadata": {}, 146 | "outputs": [], 147 | "source": [ 148 | "del pipe\n", 149 | "flush()\n", 150 | "\n", 151 | "pipe = DiffusionPipeline.from_pretrained(\n", 152 | " \"stabilityai/stable-diffusion-x4-upscaler\", \n", 153 | " torch_dtype=torch.float16, \n", 154 | " device_map=\"auto\",\n", 155 | " safety_checker=None\n", 156 | ")\n", 157 | "\n", 158 | "pil_image = pipe(prompt, generator=generator, image=image).images\n", 159 | "\n", 160 | "pil_image[0]" 161 | ] 162 | } 163 | ], 164 | "metadata": { 165 | "accelerator": "GPU", 166 | "colab": { 167 | "provenance": [] 168 | }, 169 | "gpuClass": "standard", 170 | "kernelspec": { 171 | "display_name": "Python 3", 172 | "name": "python3" 173 | }, 174 | "language_info": { 175 | "name": "python" 176 | } 177 | }, 178 | "nbformat": 4, 179 | "nbformat_minor": 0 180 | } 181 | -------------------------------------------------------------------------------- /DeepFloyd-IF-I-M-v1.0-Inpainting.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "id": "view-in-github" 7 | }, 8 | "source": [ 9 | "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/camenduru/DeepFloyd-IF-colab/blob/main/DeepFloyd-IF-I-M-v1.0-Inpainting.ipynb)" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": null, 15 | "metadata": { 16 | "id": "DA4ASNpIvTzd" 17 | }, 18 | "outputs": [], 19 | "source": [ 20 | "# https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/deepfloyd_if_free_tier_google_colab.ipynb modified\n", 21 | "\n", 22 | "!pip install -q -U diffusers~=0.16 transformers~=4.28 safetensors~=0.3 sentencepiece~=0.1 accelerate~=0.18 bitsandbytes~=0.38 torch~=2.0 huggingface_hub" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": null, 28 | "metadata": { 29 | "id": "69qUFyBkwKs0" 30 | }, 31 | "outputs": [], 32 | "source": [ 33 | "from huggingface_hub import login\n", 34 | "login()" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": null, 40 | "metadata": {}, 41 | "outputs": [], 42 | "source": [ 43 | "import gc\n", 44 | "import torch\n", 45 | "\n", 46 | "def flush():\n", 47 | " gc.collect()\n", 48 | " torch.cuda.empty_cache()\n", 49 | "\n", 50 | "import requests\n", 51 | "\n", 52 | "url = \"https://i.imgflip.com/5j6x75.jpg\"\n", 53 | "response = requests.get(url)\n", 54 | "\n", 55 | "from PIL import Image\n", 56 | "from io import BytesIO\n", 57 | "\n", 58 | "original_image = Image.open(BytesIO(response.content)).convert(\"RGB\")\n", 59 | "original_image = original_image.resize((512, 768))\n", 60 | "\n", 61 | "from huggingface_hub import hf_hub_download\n", 62 | "\n", 63 | "mask_image = hf_hub_download(\"diffusers/docs-images\", repo_type=\"dataset\", filename=\"if/sign_man_mask.png\")\n", 64 | "mask_image = Image.open(mask_image)\n", 65 | "\n", 66 | "from PIL import Image\n", 67 | "import numpy as np\n", 68 | "\n", 69 | "height = 64\n", 70 | "width = 64\n", 71 | "\n", 72 | "example_mask = np.zeros((height, width), dtype=np.int8)\n", 73 | "\n", 74 | "# Set masked pixels to 255\n", 75 | "example_mask[20:30, 30:40] = 255\n", 76 | "\n", 77 | "# Make sure to create the image in mode 'L'\n", 78 | "# meaning single channel grayscale\n", 79 | "example_mask = Image.fromarray(example_mask, mode='L')\n", 80 | "\n", 81 | "\n", 82 | "from transformers import T5EncoderModel\n", 83 | "\n", 84 | "text_encoder = T5EncoderModel.from_pretrained(\n", 85 | " \"DeepFloyd/IF-I-XL-v1.0\",\n", 86 | " subfolder=\"text_encoder\", \n", 87 | " device_map=\"auto\", \n", 88 | " load_in_8bit=True, \n", 89 | " variant=\"8bit\"\n", 90 | ")\n", 91 | "\n", 92 | "from diffusers import IFInpaintingPipeline\n", 93 | "\n", 94 | "pipe = IFInpaintingPipeline.from_pretrained(\n", 95 | " \"DeepFloyd/IF-I-XL-v1.0\", \n", 96 | " text_encoder=text_encoder, \n", 97 | " unet=None, \n", 98 | " device_map=\"auto\"\n", 99 | ")\n", 100 | "\n", 101 | "prompt = 'the text, \"just stack more layers\"'\n", 102 | "\n", 103 | "prompt_embeds, negative_embeds = pipe.encode_prompt(prompt)\n", 104 | "\n", 105 | "del text_encoder\n", 106 | "del pipe\n", 107 | "flush()\n", 108 | "\n", 109 | "pipe = IFInpaintingPipeline.from_pretrained(\n", 110 | " \"DeepFloyd/IF-I-XL-v1.0\", \n", 111 | " text_encoder=None, \n", 112 | " variant=\"fp16\", \n", 113 | " torch_dtype=torch.float16, \n", 114 | " device_map=\"auto\"\n", 115 | ")\n", 116 | "\n", 117 | "image = pipe(\n", 118 | " image=original_image,\n", 119 | " mask_image=mask_image,\n", 120 | " prompt_embeds=prompt_embeds,\n", 121 | " negative_prompt_embeds=negative_embeds, \n", 122 | " output_type=\"pt\",\n", 123 | " generator=generator,\n", 124 | ").images\n", 125 | "\n", 126 | "pil_image = pt_to_pil(image)\n", 127 | "pipe.watermarker.apply_watermark(pil_image, pipe.unet.config.sample_size)\n", 128 | "\n", 129 | "pil_image[0]" 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": null, 135 | "metadata": {}, 136 | "outputs": [], 137 | "source": [ 138 | "del pipe\n", 139 | "flush()\n", 140 | "\n", 141 | "from diffusers import IFInpaintingSuperResolutionPipeline\n", 142 | "\n", 143 | "pipe = IFInpaintingSuperResolutionPipeline.from_pretrained(\n", 144 | " \"DeepFloyd/IF-II-L-v1.0\", \n", 145 | " text_encoder=None, \n", 146 | " variant=\"fp16\", \n", 147 | " torch_dtype=torch.float16, \n", 148 | " device_map=\"auto\"\n", 149 | ")\n", 150 | "\n", 151 | "image = pipe(\n", 152 | " image=image,\n", 153 | " original_image=original_image,\n", 154 | " mask_image=mask_image,\n", 155 | " prompt_embeds=prompt_embeds,\n", 156 | " negative_prompt_embeds=negative_embeds, \n", 157 | " generator=generator,\n", 158 | ").images[0]\n", 159 | "image" 160 | ] 161 | } 162 | ], 163 | "metadata": { 164 | "accelerator": "GPU", 165 | "colab": { 166 | "provenance": [] 167 | }, 168 | "gpuClass": "standard", 169 | "kernelspec": { 170 | "display_name": "Python 3", 171 | "name": "python3" 172 | }, 173 | "language_info": { 174 | "name": "python" 175 | } 176 | }, 177 | "nbformat": 4, 178 | "nbformat_minor": 0 179 | } 180 | -------------------------------------------------------------------------------- /DeepFloyd-IF-I-M-v1.0-T5Embedder.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "id": "view-in-github" 7 | }, 8 | "source": [ 9 | "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/camenduru/DeepFloyd-IF-colab/blob/main/DeepFloyd-IF-I-M-v1.0-T5Embedder.ipynb)" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": null, 15 | "metadata": { 16 | "id": "DA4ASNpIvTzd" 17 | }, 18 | "outputs": [], 19 | "source": [ 20 | "# https://www.kaggle.com/code/shonenkov/deepfloyd-if-4-3b-generator-of-pictures modified\n", 21 | "\n", 22 | "!pip install -q torch==1.13.1+cu116 torchvision==0.14.1+cu116 torchaudio==0.13.1 torchtext==0.14.1 torchdata==0.5.1 --extra-index-url https://download.pytorch.org/whl/cu116 -U\n", 23 | "!pip install -q -U diffusers~=0.16 transformers~=4.28 safetensors~=0.3 sentencepiece~=0.1 accelerate~=0.18 bitsandbytes~=0.38 huggingface_hub" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": null, 29 | "metadata": {}, 30 | "outputs": [], 31 | "source": [ 32 | "from transformers import T5EncoderModel\n", 33 | "\n", 34 | "hf_token = \"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\"\n", 35 | "\n", 36 | "text_encoder = T5EncoderModel.from_pretrained(\n", 37 | " \"DeepFloyd/IF-I-L-v1.0\",\n", 38 | " subfolder=\"text_encoder\", \n", 39 | " device_map=\"auto\", \n", 40 | " load_in_8bit=True, \n", 41 | " variant=\"8bit\",\n", 42 | " use_auth_token=hf_token\n", 43 | ")\n", 44 | "\n", 45 | "from diffusers import DiffusionPipeline\n", 46 | "\n", 47 | "pipe = DiffusionPipeline.from_pretrained(\n", 48 | " \"DeepFloyd/IF-I-L-v1.0\", \n", 49 | " text_encoder=text_encoder,\n", 50 | " unet=None, \n", 51 | " device_map=\"auto\",\n", 52 | " safety_checker=None,\n", 53 | " use_auth_token=hf_token\n", 54 | ")\n", 55 | "\n", 56 | "prompt = 'a photograph of an astronaut riding a horse holding a sign that says \"Pixel\\'s in space\"'\n", 57 | "prompt_embeds, negative_embeds = pipe.encode_prompt(prompt)\n", 58 | "\n", 59 | "import numpy as np\n", 60 | "prompt_embeds = prompt_embeds.cpu()\n", 61 | "negative_embeds = negative_embeds.cpu()\n", 62 | "np.save('prompt_embeds.npy', prompt_embeds)\n", 63 | "np.save('negative_embeds.npy', negative_embeds)" 64 | ] 65 | } 66 | ], 67 | "metadata": { 68 | "accelerator": "GPU", 69 | "colab": { 70 | "provenance": [] 71 | }, 72 | "gpuClass": "standard", 73 | "kernelspec": { 74 | "display_name": "Python 3", 75 | "name": "python3" 76 | }, 77 | "language_info": { 78 | "name": "python" 79 | } 80 | }, 81 | "nbformat": 4, 82 | "nbformat_minor": 0 83 | } 84 | -------------------------------------------------------------------------------- /DeepFloyd-IF-I-M-v1.0-core.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "id": "view-in-github" 7 | }, 8 | "source": [ 9 | "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/camenduru/DeepFloyd-IF-colab/blob/main/DeepFloyd-IF-I-M-v1.0-core.ipynb)" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": null, 15 | "metadata": { 16 | "id": "DA4ASNpIvTzd" 17 | }, 18 | "outputs": [], 19 | "source": [ 20 | "# https://www.kaggle.com/code/shonenkov/deepfloyd-if-4-3b-generator-of-pictures modified\n", 21 | "\n", 22 | "!pip install -q torch==1.13.1+cu116 torchvision==0.14.1+cu116 torchaudio==0.13.1 torchtext==0.14.1 torchdata==0.5.1 --extra-index-url https://download.pytorch.org/whl/cu116 -U\n", 23 | "!pip install -q xformers==0.0.16 triton==2.0.0 -U\n", 24 | "!pip install -q deepfloyd-if==1.0.1 \n", 25 | "!pip install -q git+https://github.com/openai/CLIP.git --no-deps\n", 26 | "!git clone https://huggingface.co/bakedpotat/prompts\n", 27 | "\n", 28 | "get_ipython().kernel.do_shutdown(True)" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": null, 34 | "metadata": { 35 | "id": "69qUFyBkwKs0" 36 | }, 37 | "outputs": [], 38 | "source": [ 39 | "import os\n", 40 | "os.environ['FORCE_MEM_EFFICIENT_ATTN'] = \"1\"\n", 41 | "import sys\n", 42 | "import random\n", 43 | "\n", 44 | "import torch\n", 45 | "import numpy as np\n", 46 | "\n", 47 | "from deepfloyd_if.modules import IFStageI, IFStageII, StableStageIII\n", 48 | "\n", 49 | "hf_token = \"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\"\n", 50 | "device = 'cuda:0'\n", 51 | "if_I = IFStageI('IF-I-L-v1.0', device=device, hf_token=hf_token)\n", 52 | "if_II = IFStageII('IF-II-L-v1.0', device=device, hf_token=hf_token)\n", 53 | "if_III = StableStageIII('stable-diffusion-x4-upscaler', device=device)" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": null, 59 | "metadata": {}, 60 | "outputs": [], 61 | "source": [ 62 | "prompts, t5_embs = [], []\n", 63 | "for prompt_idx in [1, 2, 3, 4]:\n", 64 | " prompt = open(f'/content/prompts/{str(prompt_idx).zfill(4)}.txt').read().strip()\n", 65 | " t5_numpy = np.load(f'/content/prompts/{str(prompt_idx).zfill(4)}.npy')\n", 66 | " t5_embs.append(torch.from_numpy(t5_numpy).unsqueeze(0))\n", 67 | " prompts.append(prompt)\n", 68 | "\n", 69 | "t5_embs = torch.cat(t5_embs).to(device)\n", 70 | "t5_embs.shape\n", 71 | "\n", 72 | "# Stage-I: 64px\n", 73 | "\n", 74 | "seed = 42\n", 75 | "\n", 76 | "stageI_generations, _meta = if_I.embeddings_to_image(\n", 77 | " t5_embs, seed=seed, batch_repeat=1,\n", 78 | " dynamic_thresholding_p=0.95,\n", 79 | " dynamic_thresholding_c=1.5,\n", 80 | " guidance_scale=7.0,\n", 81 | " sample_loop='ddpm',\n", 82 | " sample_timestep_respacing='smart50',\n", 83 | " image_size=64,\n", 84 | " aspect_ratio=\"1:1\",\n", 85 | " progress=True,\n", 86 | " disable_watermark=True,\n", 87 | ")\n", 88 | "pil_images_I = if_I.to_images(stageI_generations, disable_watermark=True)\n", 89 | "if_I.show(pil_images_I)" 90 | ] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": null, 95 | "metadata": {}, 96 | "outputs": [], 97 | "source": [ 98 | "# Stage-II: 64px --> 256 px\n", 99 | "\n", 100 | "stageII_generations, _meta = if_II.embeddings_to_image(\n", 101 | " stageI_generations,\n", 102 | " t5_embs, seed=seed, batch_repeat=1,\n", 103 | " dynamic_thresholding_p=0.95,\n", 104 | " dynamic_thresholding_c=1.0,\n", 105 | " aug_level=0.25,\n", 106 | " guidance_scale=4.0,\n", 107 | " image_scale=4.0,\n", 108 | " sample_loop='ddpm',\n", 109 | " sample_timestep_respacing='50',\n", 110 | " progress=True,\n", 111 | ")\n", 112 | "pil_images_II = if_II.to_images(stageII_generations, disable_watermark=True)\n", 113 | "if_II.show(pil_images_II)" 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": null, 119 | "metadata": {}, 120 | "outputs": [], 121 | "source": [ 122 | "# Stage-III: 256px --> 1024px\n", 123 | "\n", 124 | "stageIII_generations = []\n", 125 | "for idx in range(len(stageII_generations)):\n", 126 | " if_III_kwargs = {}\n", 127 | " if_III_kwargs['prompt'] = prompts[idx:idx+1]\n", 128 | " if_III_kwargs['low_res'] = stageII_generations[idx:idx+1]\n", 129 | " if_III_kwargs['seed'] = seed\n", 130 | " if_III_kwargs['t5_embs'] = t5_embs[idx:idx+1]\n", 131 | " _stageIII_generations, _meta = if_III.embeddings_to_image(**if_III_kwargs)\n", 132 | " stageIII_generations.append(_stageIII_generations)\n", 133 | "\n", 134 | "stageIII_generations = torch.cat(stageIII_generations, 0)\n", 135 | "pil_images_III = if_III.to_images(stageIII_generations, disable_watermark=True)\n", 136 | "\n", 137 | "for idx in range(len(prompts)):\n", 138 | " pil_img, prompt = pil_images_III[idx], prompts[idx]\n", 139 | " pil_img.save(f'{idx}.png')\n", 140 | " if_I.show([pil_img],size=14)\n", 141 | " print(prompt, '\\n'*3)" 142 | ] 143 | } 144 | ], 145 | "metadata": { 146 | "accelerator": "GPU", 147 | "colab": { 148 | "provenance": [] 149 | }, 150 | "gpuClass": "standard", 151 | "kernelspec": { 152 | "display_name": "Python 3", 153 | "name": "python3" 154 | }, 155 | "language_info": { 156 | "name": "python" 157 | } 158 | }, 159 | "nbformat": 4, 160 | "nbformat_minor": 0 161 | } 162 | -------------------------------------------------------------------------------- /DeepFloyd-IF-I-M-v1.0-final.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "id": "view-in-github" 7 | }, 8 | "source": [ 9 | "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/camenduru/DeepFloyd-IF-colab/blob/main/DeepFloyd-IF-I-M-v1.0-final.ipynb)" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": null, 15 | "metadata": { 16 | "id": "DA4ASNpIvTzd" 17 | }, 18 | "outputs": [], 19 | "source": [ 20 | "# https://www.kaggle.com/code/shonenkov/deepfloyd-if-4-3b-generator-of-pictures modified\n", 21 | "\n", 22 | "!pip install -q torch==1.13.1+cu116 torchvision==0.14.1+cu116 torchaudio==0.13.1 torchtext==0.14.1 torchdata==0.5.1 --extra-index-url https://download.pytorch.org/whl/cu116 -U\n", 23 | "!pip install -q xformers==0.0.16 triton==2.0.0 -U\n", 24 | "!pip install -q deepfloyd-if==1.0.1 \n", 25 | "!pip install -q git+https://github.com/openai/CLIP.git --no-deps\n", 26 | "# !git clone https://huggingface.co/bakedpotat/prompts\n", 27 | "!pip install -q -U diffusers~=0.16 transformers~=4.28 safetensors~=0.3 sentencepiece~=0.1 accelerate~=0.18 bitsandbytes~=0.38 huggingface_hub" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": null, 33 | "metadata": {}, 34 | "outputs": [], 35 | "source": [ 36 | "get_ipython().kernel.do_shutdown(True)" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": null, 42 | "metadata": {}, 43 | "outputs": [], 44 | "source": [ 45 | "from transformers import T5EncoderModel\n", 46 | "\n", 47 | "hf_token = \"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\"\n", 48 | "\n", 49 | "text_encoder = T5EncoderModel.from_pretrained(\n", 50 | " \"DeepFloyd/IF-I-L-v1.0\",\n", 51 | " load_in_8bit=True,\n", 52 | " subfolder=\"text_encoder\",\n", 53 | " device_map=\"auto\",\n", 54 | " variant=\"8bit\",\n", 55 | " use_auth_token=hf_token\n", 56 | ")\n", 57 | "\n", 58 | "from diffusers import DiffusionPipeline\n", 59 | "\n", 60 | "pipe = DiffusionPipeline.from_pretrained(\n", 61 | " \"DeepFloyd/IF-I-L-v1.0\", \n", 62 | " text_encoder=text_encoder,\n", 63 | " unet=None, \n", 64 | " device_map=\"auto\",\n", 65 | " safety_checker=None,\n", 66 | " use_auth_token=hf_token\n", 67 | ")\n", 68 | "\n", 69 | "prompt = 'a photograph of an astronaut riding a horse holding a sign that says \"Pixel\\'s in space\"'\n", 70 | "prompt_embeds, negative_embeds = pipe.encode_prompt(prompt)\n", 71 | "\n", 72 | "import numpy as np\n", 73 | "prompt_embeds = prompt_embeds.cpu()\n", 74 | "negative_embeds = negative_embeds.cpu()\n", 75 | "np.save('prompt.npy', prompt_embeds)\n", 76 | "np.save('negative.npy', negative_embeds)\n", 77 | "\n", 78 | "get_ipython().kernel.do_shutdown(True)" 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": null, 84 | "metadata": { 85 | "id": "69qUFyBkwKs0" 86 | }, 87 | "outputs": [], 88 | "source": [ 89 | "import os\n", 90 | "os.environ['FORCE_MEM_EFFICIENT_ATTN'] = \"1\"\n", 91 | "import sys\n", 92 | "import random\n", 93 | "\n", 94 | "import torch\n", 95 | "import numpy as np\n", 96 | "\n", 97 | "from deepfloyd_if.modules import IFStageI, IFStageII, StableStageIII\n", 98 | "\n", 99 | "hf_token = \"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\"\n", 100 | "device = 'cuda:0'\n", 101 | "if_I = IFStageI('IF-I-L-v1.0', device=device, hf_token=hf_token)\n", 102 | "if_II = IFStageII('IF-II-L-v1.0', device=device, hf_token=hf_token)\n", 103 | "if_III = StableStageIII('stable-diffusion-x4-upscaler', device=device)" 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": null, 109 | "metadata": {}, 110 | "outputs": [], 111 | "source": [ 112 | "prompts, t5_embs = [], []\n", 113 | "prompt = 'a photograph of an astronaut riding a horse holding a sign that says \"Pixel\\'s in space\"'\n", 114 | "t5_numpy = np.load(f'/content/prompt.npy')\n", 115 | "t5_numpy = t5_numpy.reshape(77, 4096)\n", 116 | "t5_embs.append(torch.from_numpy(t5_numpy).unsqueeze(0))\n", 117 | "prompts.append(prompt)\n", 118 | "\n", 119 | "t5_embs = torch.cat(t5_embs).to(device)\n", 120 | "t5_embs.shape\n", 121 | "\n", 122 | "# Stage-I: 64px\n", 123 | "\n", 124 | "seed = 42\n", 125 | "\n", 126 | "stageI_generations, _meta = if_I.embeddings_to_image(\n", 127 | " t5_embs, seed=seed, batch_repeat=1,\n", 128 | " dynamic_thresholding_p=0.95,\n", 129 | " dynamic_thresholding_c=1.5,\n", 130 | " guidance_scale=7.0,\n", 131 | " sample_loop='ddpm',\n", 132 | " sample_timestep_respacing='smart50',\n", 133 | " image_size=64,\n", 134 | " aspect_ratio=\"1:1\",\n", 135 | " progress=True,\n", 136 | " disable_watermark=True,\n", 137 | ")\n", 138 | "pil_images_I = if_I.to_images(stageI_generations, disable_watermark=True)\n", 139 | "if_I.show(pil_images_I)" 140 | ] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "execution_count": null, 145 | "metadata": {}, 146 | "outputs": [], 147 | "source": [ 148 | "# Stage-II: 64px --> 256 px\n", 149 | "\n", 150 | "stageII_generations, _meta = if_II.embeddings_to_image(\n", 151 | " stageI_generations,\n", 152 | " t5_embs, seed=seed, batch_repeat=1,\n", 153 | " dynamic_thresholding_p=0.95,\n", 154 | " dynamic_thresholding_c=1.0,\n", 155 | " aug_level=0.25,\n", 156 | " guidance_scale=4.0,\n", 157 | " image_scale=4.0,\n", 158 | " sample_loop='ddpm',\n", 159 | " sample_timestep_respacing='50',\n", 160 | " progress=True,\n", 161 | ")\n", 162 | "pil_images_II = if_II.to_images(stageII_generations, disable_watermark=True)\n", 163 | "if_II.show(pil_images_II)" 164 | ] 165 | }, 166 | { 167 | "cell_type": "code", 168 | "execution_count": null, 169 | "metadata": {}, 170 | "outputs": [], 171 | "source": [ 172 | "# Stage-III: 256px --> 1024px\n", 173 | "\n", 174 | "stageIII_generations = []\n", 175 | "for idx in range(len(stageII_generations)):\n", 176 | " if_III_kwargs = {}\n", 177 | " if_III_kwargs['prompt'] = prompts[idx:idx+1]\n", 178 | " if_III_kwargs['low_res'] = stageII_generations[idx:idx+1]\n", 179 | " if_III_kwargs['seed'] = seed\n", 180 | " if_III_kwargs['t5_embs'] = t5_embs[idx:idx+1]\n", 181 | " _stageIII_generations, _meta = if_III.embeddings_to_image(**if_III_kwargs)\n", 182 | " stageIII_generations.append(_stageIII_generations)\n", 183 | "\n", 184 | "stageIII_generations = torch.cat(stageIII_generations, 0)\n", 185 | "pil_images_III = if_III.to_images(stageIII_generations, disable_watermark=True)\n", 186 | "\n", 187 | "for idx in range(len(prompts)):\n", 188 | " pil_img, prompt = pil_images_III[idx], prompts[idx]\n", 189 | " pil_img.save(f'{idx}.png')\n", 190 | " if_I.show([pil_img],size=14)\n", 191 | " print(prompt, '\\n'*3)" 192 | ] 193 | } 194 | ], 195 | "metadata": { 196 | "accelerator": "GPU", 197 | "colab": { 198 | "provenance": [] 199 | }, 200 | "gpuClass": "standard", 201 | "kernelspec": { 202 | "display_name": "Python 3", 203 | "name": "python3" 204 | }, 205 | "language_info": { 206 | "name": "python" 207 | } 208 | }, 209 | "nbformat": 4, 210 | "nbformat_minor": 0 211 | } 212 | -------------------------------------------------------------------------------- /DeepFloyd-IF-I-M-v1.0.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "id": "view-in-github" 7 | }, 8 | "source": [ 9 | "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/camenduru/DeepFloyd-IF-colab/blob/main/DeepFloyd-IF-I-M-v1.0.ipynb)" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": null, 15 | "metadata": { 16 | "id": "DA4ASNpIvTzd" 17 | }, 18 | "outputs": [], 19 | "source": [ 20 | "# https://huggingface.co/spaces/DeepFloyd/IF/blob/main/app.py modified\n", 21 | "\n", 22 | "!pip install -q huggingface_hub\n", 23 | "from huggingface_hub import login\n", 24 | "login()" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": null, 30 | "metadata": { 31 | "id": "69qUFyBkwKs0" 32 | }, 33 | "outputs": [], 34 | "source": [ 35 | "!git clone -b dev https://github.com/camenduru/DeepFloyd-IF-hf\n", 36 | "%cd /content/DeepFloyd-IF-hf\n", 37 | "!pip install -r requirements.txt\n", 38 | "!python app.py" 39 | ] 40 | } 41 | ], 42 | "metadata": { 43 | "accelerator": "GPU", 44 | "colab": { 45 | "provenance": [] 46 | }, 47 | "gpuClass": "standard", 48 | "kernelspec": { 49 | "display_name": "Python 3", 50 | "name": "python3" 51 | }, 52 | "language_info": { 53 | "name": "python" 54 | } 55 | }, 56 | "nbformat": 4, 57 | "nbformat_minor": 0 58 | } 59 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | This is free and unencumbered software released into the public domain. 2 | 3 | Anyone is free to copy, modify, publish, use, compile, sell, or 4 | distribute this software, either in source code form or as a compiled 5 | binary, for any purpose, commercial or non-commercial, and by any 6 | means. 7 | 8 | In jurisdictions that recognize copyright laws, the author or authors 9 | of this software dedicate any and all copyright interest in the 10 | software to the public domain. We make this dedication for the benefit 11 | of the public at large and to the detriment of our heirs and 12 | successors. We intend this dedication to be an overt act of 13 | relinquishment in perpetuity of all present and future rights to this 14 | software under copyright law. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | For more information, please refer to 25 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 🐣 Please follow me for new updates https://twitter.com/camenduru
2 | 🔥 Please join our discord server https://discord.gg/k5BwmmvJJU
3 | 🥳 Please join my patreon community https://patreon.com/camenduru
4 | 5 | # 🚦 WIP 🚦 6 | 7 | ## 🦒 Colab 8 | 9 | | Colab | Version 10 | | --- | --- | 11 | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/camenduru/DeepFloyd-IF-colab/blob/main/DeepFloyd-IF-I-M-v1.0-final.ipynb) | Free Colab T4 Image Gen (Core Lib) (Recommended) 12 | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/camenduru/DeepFloyd-IF-colab/blob/main/DeepFloyd-IF-I-M-v1.0.ipynb) | Pro Colab A100 (Diffusers Lib) (Gradio) 13 | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/camenduru/DeepFloyd-IF-colab/blob/main/DeepFloyd-IF-I-M-v1.0-Image.ipynb) | Free Colab T4 Image Gen (Diffusers Lib) 14 | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/camenduru/DeepFloyd-IF-colab/blob/main/DeepFloyd-IF-I-M-v1.0-Image-Variation.ipynb) | Free Colab T4 Image Variation Gen (Diffusers Lib) 15 | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/camenduru/DeepFloyd-IF-colab/blob/main/DeepFloyd-IF-I-M-v1.0-Inpainting.ipynb) | Free Colab T4 Inpainting (Diffusers Lib) 16 | 17 | 18 | ## 🦆 Kaggle 19 | 20 | | Kaggle | Version 21 | | --- | --- | 22 | [![Open In Kaggle](https://kaggle.com/static/images/open-in-kaggle.svg)](https://www.kaggle.com/code/camenduru/deep-floyd-if-kaggle-ipynb) | Free Colab T4 Image Gen (Core Lib) 23 | 24 | ## Tutorial 25 | 26 | ### Free Colab (T4) 27 | https://www.youtube.com/watch?v=4zglOVF0jSk 28 | 29 | ### Pro Colab (A100) 30 | https://www.youtube.com/watch?v=KRcXonSwa50 31 | 32 | ## Main Repo 33 | https://github.com/deep-floyd/IF 34 | 35 | ## Paper 36 | https://arxiv.org/abs/2205.11487 37 | 38 | ## Code License 39 | https://github.com/deep-floyd/IF/blob/main/LICENSE 40 | 41 | ## Model License 42 | https://github.com/deep-floyd/IF/blob/main/LICENSE-MODEL 43 | 44 | 45 | ## Output 46 | ### Stage-I: 64px 47 | ![stage1](https://user-images.githubusercontent.com/54370274/235267686-bb20a748-077e-4d6d-9612-9dcb706d7f6f.png) 48 | 49 | ### Stage-II: 64px --> 256 px 50 | ![stage2](https://user-images.githubusercontent.com/54370274/235267689-1131d701-6719-4d16-85c0-70f2859d7e58.png) 51 | 52 | ### Stage-III: 256px --> 1024px 53 | a teddy bear looking curiously in the mirror, seeing a cat. 54 | ![stage3-1](https://user-images.githubusercontent.com/54370274/235267830-44158274-804e-4717-9528-7c024f3644e4.png) 55 | 56 | modern digital portrait of antique statue of venus in bikini only in style of cyberpunk glitchcore synthwave art, award prize winning best art masterpiece, reddit top art of all time, trending on artstation, minimalism, neon lady woman, noir glitch 57 | ![stage3-2](https://user-images.githubusercontent.com/54370274/235267832-62e940e2-3381-451f-923f-9f525b434cc7.png) 58 | 59 | photo of dark temple, golden treasure, high detail, smoke, sharp, fog 60 | ![stage3-3](https://user-images.githubusercontent.com/54370274/235267835-fd76c0c2-16fb-4f2a-a094-b8271907bbd5.png) 61 | 62 | glowing mushrooms in a natural environment with smoke in the frame 63 | ![stage3-4](https://user-images.githubusercontent.com/54370274/235267837-dd5239b1-7685-424c-be9b-35b29ee2b327.png) 64 | --------------------------------------------------------------------------------