├── .github
    └── FUNDING.yml
├── DeepFloyd-IF-I-M-v1.0-8bit.ipynb
├── DeepFloyd-IF-I-M-v1.0-Image-Variation.ipynb
├── DeepFloyd-IF-I-M-v1.0-Image.ipynb
├── DeepFloyd-IF-I-M-v1.0-Inpainting.ipynb
├── DeepFloyd-IF-I-M-v1.0-T5Embedder.ipynb
├── DeepFloyd-IF-I-M-v1.0-core.ipynb
├── DeepFloyd-IF-I-M-v1.0-final.ipynb
├── DeepFloyd-IF-I-M-v1.0.ipynb
├── LICENSE
└── README.md


/.github/FUNDING.yml:
--------------------------------------------------------------------------------
 1 | # These are supported funding model platforms
 2 | 
 3 | github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2]
 4 | patreon: camenduru
 5 | open_collective: # Replace with a single Open Collective username
 6 | ko_fi: camenduru
 7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
 8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
 9 | liberapay: # Replace with a single Liberapay username
10 | issuehunt: # Replace with a single IssueHunt username
11 | otechie: # Replace with a single Otechie username
12 | lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry
13 | custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2']
14 | 


--------------------------------------------------------------------------------
/DeepFloyd-IF-I-M-v1.0-8bit.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |   "cells": [
 3 |     {
 4 |       "cell_type": "markdown",
 5 |       "metadata": {
 6 |         "id": "view-in-github"
 7 |       },
 8 |       "source": [
 9 |         "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/camenduru/DeepFloyd-IF-colab/blob/main/DeepFloyd-IF-I-M-v1.0.ipynb)"
10 |       ]
11 |     },
12 |     {
13 |       "cell_type": "code",
14 |       "execution_count": null,
15 |       "metadata": {
16 |         "id": "DA4ASNpIvTzd"
17 |       },
18 |       "outputs": [],
19 |       "source": [
20 |         "# https://huggingface.co/spaces/DeepFloyd/IF/blob/main/app.py modified\n",
21 |         "\n",
22 |         "!pip install -q huggingface_hub\n",
23 |         "from huggingface_hub import login\n",
24 |         "login()"
25 |       ]
26 |     },
27 |     {
28 |       "cell_type": "code",
29 |       "execution_count": null,
30 |       "metadata": {
31 |         "id": "69qUFyBkwKs0"
32 |       },
33 |       "outputs": [],
34 |       "source": [
35 |         "!git clone -b 8bit https://github.com/camenduru/DeepFloyd-IF-hf\n",
36 |         "%cd /content/DeepFloyd-IF-hf\n",
37 |         "!pip install -r requirements.txt\n",
38 |         "!python app.py"
39 |       ]
40 |     }
41 |   ],
42 |   "metadata": {
43 |     "accelerator": "GPU",
44 |     "colab": {
45 |       "provenance": []
46 |     },
47 |     "gpuClass": "standard",
48 |     "kernelspec": {
49 |       "display_name": "Python 3",
50 |       "name": "python3"
51 |     },
52 |     "language_info": {
53 |       "name": "python"
54 |     }
55 |   },
56 |   "nbformat": 4,
57 |   "nbformat_minor": 0
58 | }
59 | 


--------------------------------------------------------------------------------
/DeepFloyd-IF-I-M-v1.0-Image-Variation.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "cells": [
  3 |     {
  4 |       "cell_type": "markdown",
  5 |       "metadata": {
  6 |         "id": "view-in-github"
  7 |       },
  8 |       "source": [
  9 |         "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/camenduru/DeepFloyd-IF-colab/blob/main/DeepFloyd-IF-I-M-v1.0-Image-Variation.ipynb)"
 10 |       ]
 11 |     },
 12 |     {
 13 |       "cell_type": "code",
 14 |       "execution_count": null,
 15 |       "metadata": {
 16 |         "id": "DA4ASNpIvTzd"
 17 |       },
 18 |       "outputs": [],
 19 |       "source": [
 20 |         "# https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/deepfloyd_if_free_tier_google_colab.ipynb modified\n",
 21 |         "\n",
 22 |         "!pip install -q -U diffusers~=0.16 transformers~=4.28 safetensors~=0.3 sentencepiece~=0.1 accelerate~=0.18 bitsandbytes~=0.38 torch~=2.0 huggingface_hub"
 23 |       ]
 24 |     },
 25 |     {
 26 |       "cell_type": "code",
 27 |       "execution_count": null,
 28 |       "metadata": {
 29 |         "id": "69qUFyBkwKs0"
 30 |       },
 31 |       "outputs": [],
 32 |       "source": [
 33 |         "from huggingface_hub import login\n",
 34 |         "login()"
 35 |       ]
 36 |     },
 37 |     {
 38 |       "cell_type": "code",
 39 |       "execution_count": null,
 40 |       "metadata": {},
 41 |       "outputs": [],
 42 |       "source": [
 43 |         "import gc\n",
 44 |         "import torch\n",
 45 |         "\n",
 46 |         "def flush():\n",
 47 |         "  gc.collect()\n",
 48 |         "  torch.cuda.empty_cache()\n",
 49 |         "\n",
 50 |         "import requests\n",
 51 |         "\n",
 52 |         "url = \"https://i.kym-cdn.com/entries/icons/original/000/026/561/car.jpg\"\n",
 53 |         "response = requests.get(url)\n",
 54 |         "\n",
 55 |         "from PIL import Image\n",
 56 |         "from io import BytesIO\n",
 57 |         "\n",
 58 |         "original_image = Image.open(BytesIO(response.content)).convert(\"RGB\")\n",
 59 |         "original_image = original_image.resize((768, 512))\n",
 60 |         "\n",
 61 |         "from transformers import T5EncoderModel\n",
 62 |         "\n",
 63 |         "text_encoder = T5EncoderModel.from_pretrained(\n",
 64 |         "    \"DeepFloyd/IF-I-XL-v1.0\",\n",
 65 |         "    subfolder=\"text_encoder\", \n",
 66 |         "    device_map=\"auto\", \n",
 67 |         "    load_in_8bit=True, \n",
 68 |         "    variant=\"8bit\"\n",
 69 |         ")\n",
 70 |         "\n",
 71 |         "from diffusers import IFImg2ImgPipeline\n",
 72 |         "\n",
 73 |         "pipe = IFImg2ImgPipeline.from_pretrained(\n",
 74 |         "    \"DeepFloyd/IF-I-XL-v1.0\", \n",
 75 |         "    text_encoder=text_encoder, \n",
 76 |         "    unet=None, \n",
 77 |         "    device_map=\"auto\"\n",
 78 |         ")\n",
 79 |         "\n",
 80 |         "prompt = \"anime style\"\n",
 81 |         "\n",
 82 |         "prompt_embeds, negative_embeds = pipe.encode_prompt(prompt)\n",
 83 |         "\n",
 84 |         "del text_encoder\n",
 85 |         "del pipe\n",
 86 |         "flush()\n",
 87 |         "\n",
 88 |         "pipe = IFImg2ImgPipeline.from_pretrained(\n",
 89 |         "    \"DeepFloyd/IF-I-XL-v1.0\", \n",
 90 |         "    text_encoder=None, \n",
 91 |         "    variant=\"fp16\", \n",
 92 |         "    torch_dtype=torch.float16, \n",
 93 |         "    device_map=\"auto\"\n",
 94 |         ")\n",
 95 |         "\n",
 96 |         "generator = torch.Generator().manual_seed(0)\n",
 97 |         "image = pipe(\n",
 98 |         "    image=original_image,\n",
 99 |         "    prompt_embeds=prompt_embeds,\n",
100 |         "    negative_prompt_embeds=negative_embeds, \n",
101 |         "    output_type=\"pt\",\n",
102 |         "    generator=generator,\n",
103 |         ").images\n",
104 |         "\n",
105 |         "pil_image = pt_to_pil(image)\n",
106 |         "pil_image[0]"
107 |       ]
108 |     },
109 |     {
110 |       "cell_type": "code",
111 |       "execution_count": null,
112 |       "metadata": {},
113 |       "outputs": [],
114 |       "source": [
115 |         "del pipe\n",
116 |         "flush()\n",
117 |         "\n",
118 |         "from diffusers import IFImg2ImgSuperResolutionPipeline\n",
119 |         "\n",
120 |         "pipe = IFImg2ImgSuperResolutionPipeline.from_pretrained(\n",
121 |         "    \"DeepFloyd/IF-II-L-v1.0\", \n",
122 |         "    text_encoder=None, \n",
123 |         "    variant=\"fp16\", \n",
124 |         "    torch_dtype=torch.float16, \n",
125 |         "    device_map=\"auto\"\n",
126 |         ")\n",
127 |         "\n",
128 |         "image = pipe(\n",
129 |         "    image=image,\n",
130 |         "    original_image=original_image,\n",
131 |         "    prompt_embeds=prompt_embeds,\n",
132 |         "    negative_prompt_embeds=negative_embeds, \n",
133 |         "    generator=generator,\n",
134 |         ").images[0]\n",
135 |         "image"
136 |       ]
137 |     }
138 |   ],
139 |   "metadata": {
140 |     "accelerator": "GPU",
141 |     "colab": {
142 |       "provenance": []
143 |     },
144 |     "gpuClass": "standard",
145 |     "kernelspec": {
146 |       "display_name": "Python 3",
147 |       "name": "python3"
148 |     },
149 |     "language_info": {
150 |       "name": "python"
151 |     }
152 |   },
153 |   "nbformat": 4,
154 |   "nbformat_minor": 0
155 | }
156 | 


--------------------------------------------------------------------------------
/DeepFloyd-IF-I-M-v1.0-Image.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "cells": [
  3 |     {
  4 |       "cell_type": "markdown",
  5 |       "metadata": {
  6 |         "id": "view-in-github"
  7 |       },
  8 |       "source": [
  9 |         "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/camenduru/DeepFloyd-IF-colab/blob/main/DeepFloyd-IF-I-M-v1.0-Image.ipynb)"
 10 |       ]
 11 |     },
 12 |     {
 13 |       "cell_type": "code",
 14 |       "execution_count": null,
 15 |       "metadata": {
 16 |         "id": "DA4ASNpIvTzd"
 17 |       },
 18 |       "outputs": [],
 19 |       "source": [
 20 |         "# https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/deepfloyd_if_free_tier_google_colab.ipynb modified\n",
 21 |         "\n",
 22 |         "!pip install -q -U diffusers~=0.16 transformers~=4.28 safetensors~=0.3 sentencepiece~=0.1 accelerate~=0.18 bitsandbytes~=0.38 torch~=2.0 huggingface_hub"
 23 |       ]
 24 |     },
 25 |     {
 26 |       "cell_type": "code",
 27 |       "execution_count": null,
 28 |       "metadata": {
 29 |         "id": "69qUFyBkwKs0"
 30 |       },
 31 |       "outputs": [],
 32 |       "source": [
 33 |         "from huggingface_hub import login\n",
 34 |         "login()"
 35 |       ]
 36 |     },
 37 |     {
 38 |       "cell_type": "code",
 39 |       "execution_count": null,
 40 |       "metadata": {},
 41 |       "outputs": [],
 42 |       "source": [
 43 |         "from transformers import T5EncoderModel\n",
 44 |         "\n",
 45 |         "text_encoder = T5EncoderModel.from_pretrained(\n",
 46 |         "    \"DeepFloyd/IF-I-XL-v1.0\",\n",
 47 |         "    subfolder=\"text_encoder\", \n",
 48 |         "    device_map=\"auto\", \n",
 49 |         "    load_in_8bit=True, \n",
 50 |         "    variant=\"8bit\"\n",
 51 |         ")\n",
 52 |         "\n",
 53 |         "from diffusers import DiffusionPipeline\n",
 54 |         "\n",
 55 |         "pipe = DiffusionPipeline.from_pretrained(\n",
 56 |         "    \"DeepFloyd/IF-I-XL-v1.0\", \n",
 57 |         "    text_encoder=text_encoder, # pass the previously instantiated 8bit text encoder\n",
 58 |         "    unet=None, \n",
 59 |         "    device_map=\"auto\",\n",
 60 |         "    safety_checker=None\n",
 61 |         ")\n",
 62 |         "\n",
 63 |         "prompt = 'a photograph of an astronaut riding a horse holding a sign that says \"Pixel\\'s in space\"'\n",
 64 |         "\n",
 65 |         "prompt_embeds, negative_embeds = pipe.encode_prompt(prompt)\n",
 66 |         "\n",
 67 |         "del text_encoder\n",
 68 |         "del pipe\n",
 69 |         "\n",
 70 |         "import gc\n",
 71 |         "import torch\n",
 72 |         "\n",
 73 |         "def flush():\n",
 74 |         "  gc.collect()\n",
 75 |         "  torch.cuda.empty_cache()"
 76 |       ]
 77 |     },
 78 |     {
 79 |       "cell_type": "code",
 80 |       "execution_count": null,
 81 |       "metadata": {},
 82 |       "outputs": [],
 83 |       "source": [
 84 |         "flush()\n",
 85 |         "\n",
 86 |         "pipe = DiffusionPipeline.from_pretrained(\n",
 87 |         "    \"DeepFloyd/IF-I-XL-v1.0\", \n",
 88 |         "    text_encoder=None, \n",
 89 |         "    variant=\"fp16\", \n",
 90 |         "    torch_dtype=torch.float16, \n",
 91 |         "    device_map=\"auto\",\n",
 92 |         "    safety_checker=None\n",
 93 |         ")\n",
 94 |         "\n",
 95 |         "generator = torch.Generator().manual_seed(1)\n",
 96 |         "\n",
 97 |         "image = pipe(\n",
 98 |         "    prompt_embeds=prompt_embeds,\n",
 99 |         "    negative_prompt_embeds=negative_embeds, \n",
100 |         "    output_type=\"pt\",\n",
101 |         "    generator=generator,\n",
102 |         ").images\n",
103 |         "\n",
104 |         "from diffusers.utils import pt_to_pil\n",
105 |         "\n",
106 |         "pil_image = pt_to_pil(image)\n",
107 |         "\n",
108 |         "pil_image[0]"
109 |       ]
110 |     },
111 |     {
112 |       "cell_type": "code",
113 |       "execution_count": null,
114 |       "metadata": {},
115 |       "outputs": [],
116 |       "source": [
117 |         "del pipe\n",
118 |         "flush()\n",
119 |         "\n",
120 |         "pipe = DiffusionPipeline.from_pretrained(\n",
121 |         "    \"DeepFloyd/IF-II-L-v1.0\", \n",
122 |         "    text_encoder=None, # no use of text encoder => memory savings!\n",
123 |         "    variant=\"fp16\", \n",
124 |         "    torch_dtype=torch.float16, \n",
125 |         "    device_map=\"auto\",\n",
126 |         "    safety_checker=None\n",
127 |         ")\n",
128 |         "\n",
129 |         "image = pipe(\n",
130 |         "    image=image, \n",
131 |         "    prompt_embeds=prompt_embeds, \n",
132 |         "    negative_prompt_embeds=negative_embeds, \n",
133 |         "    output_type=\"pt\",\n",
134 |         "    generator=generator,\n",
135 |         ").images\n",
136 |         "\n",
137 |         "pil_image = pt_to_pil(image)\n",
138 |         "\n",
139 |         "pil_image[0]"
140 |       ]
141 |     },
142 |     {
143 |       "cell_type": "code",
144 |       "execution_count": null,
145 |       "metadata": {},
146 |       "outputs": [],
147 |       "source": [
148 |         "del pipe\n",
149 |         "flush()\n",
150 |         "\n",
151 |         "pipe = DiffusionPipeline.from_pretrained(\n",
152 |         "    \"stabilityai/stable-diffusion-x4-upscaler\", \n",
153 |         "    torch_dtype=torch.float16, \n",
154 |         "    device_map=\"auto\",\n",
155 |         "    safety_checker=None\n",
156 |         ")\n",
157 |         "\n",
158 |         "pil_image = pipe(prompt, generator=generator, image=image).images\n",
159 |         "\n",
160 |         "pil_image[0]"
161 |       ]
162 |     }
163 |   ],
164 |   "metadata": {
165 |     "accelerator": "GPU",
166 |     "colab": {
167 |       "provenance": []
168 |     },
169 |     "gpuClass": "standard",
170 |     "kernelspec": {
171 |       "display_name": "Python 3",
172 |       "name": "python3"
173 |     },
174 |     "language_info": {
175 |       "name": "python"
176 |     }
177 |   },
178 |   "nbformat": 4,
179 |   "nbformat_minor": 0
180 | }
181 | 


--------------------------------------------------------------------------------
/DeepFloyd-IF-I-M-v1.0-Inpainting.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "cells": [
  3 |     {
  4 |       "cell_type": "markdown",
  5 |       "metadata": {
  6 |         "id": "view-in-github"
  7 |       },
  8 |       "source": [
  9 |         "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/camenduru/DeepFloyd-IF-colab/blob/main/DeepFloyd-IF-I-M-v1.0-Inpainting.ipynb)"
 10 |       ]
 11 |     },
 12 |     {
 13 |       "cell_type": "code",
 14 |       "execution_count": null,
 15 |       "metadata": {
 16 |         "id": "DA4ASNpIvTzd"
 17 |       },
 18 |       "outputs": [],
 19 |       "source": [
 20 |         "# https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/deepfloyd_if_free_tier_google_colab.ipynb modified\n",
 21 |         "\n",
 22 |         "!pip install -q -U diffusers~=0.16 transformers~=4.28 safetensors~=0.3 sentencepiece~=0.1 accelerate~=0.18 bitsandbytes~=0.38 torch~=2.0 huggingface_hub"
 23 |       ]
 24 |     },
 25 |     {
 26 |       "cell_type": "code",
 27 |       "execution_count": null,
 28 |       "metadata": {
 29 |         "id": "69qUFyBkwKs0"
 30 |       },
 31 |       "outputs": [],
 32 |       "source": [
 33 |         "from huggingface_hub import login\n",
 34 |         "login()"
 35 |       ]
 36 |     },
 37 |     {
 38 |       "cell_type": "code",
 39 |       "execution_count": null,
 40 |       "metadata": {},
 41 |       "outputs": [],
 42 |       "source": [
 43 |         "import gc\n",
 44 |         "import torch\n",
 45 |         "\n",
 46 |         "def flush():\n",
 47 |         "  gc.collect()\n",
 48 |         "  torch.cuda.empty_cache()\n",
 49 |         "\n",
 50 |         "import requests\n",
 51 |         "\n",
 52 |         "url = \"https://i.imgflip.com/5j6x75.jpg\"\n",
 53 |         "response = requests.get(url)\n",
 54 |         "\n",
 55 |         "from PIL import Image\n",
 56 |         "from io import BytesIO\n",
 57 |         "\n",
 58 |         "original_image = Image.open(BytesIO(response.content)).convert(\"RGB\")\n",
 59 |         "original_image = original_image.resize((512, 768))\n",
 60 |         "\n",
 61 |         "from huggingface_hub import hf_hub_download\n",
 62 |         "\n",
 63 |         "mask_image = hf_hub_download(\"diffusers/docs-images\", repo_type=\"dataset\", filename=\"if/sign_man_mask.png\")\n",
 64 |         "mask_image = Image.open(mask_image)\n",
 65 |         "\n",
 66 |         "from PIL import Image\n",
 67 |         "import numpy as np\n",
 68 |         "\n",
 69 |         "height = 64\n",
 70 |         "width = 64\n",
 71 |         "\n",
 72 |         "example_mask = np.zeros((height, width), dtype=np.int8)\n",
 73 |         "\n",
 74 |         "# Set masked pixels to 255\n",
 75 |         "example_mask[20:30, 30:40] = 255\n",
 76 |         "\n",
 77 |         "# Make sure to create the image in mode 'L'\n",
 78 |         "# meaning single channel grayscale\n",
 79 |         "example_mask = Image.fromarray(example_mask, mode='L')\n",
 80 |         "\n",
 81 |         "\n",
 82 |         "from transformers import T5EncoderModel\n",
 83 |         "\n",
 84 |         "text_encoder = T5EncoderModel.from_pretrained(\n",
 85 |         "    \"DeepFloyd/IF-I-XL-v1.0\",\n",
 86 |         "    subfolder=\"text_encoder\", \n",
 87 |         "    device_map=\"auto\", \n",
 88 |         "    load_in_8bit=True, \n",
 89 |         "    variant=\"8bit\"\n",
 90 |         ")\n",
 91 |         "\n",
 92 |         "from diffusers import IFInpaintingPipeline\n",
 93 |         "\n",
 94 |         "pipe = IFInpaintingPipeline.from_pretrained(\n",
 95 |         "    \"DeepFloyd/IF-I-XL-v1.0\", \n",
 96 |         "    text_encoder=text_encoder, \n",
 97 |         "    unet=None, \n",
 98 |         "    device_map=\"auto\"\n",
 99 |         ")\n",
100 |         "\n",
101 |         "prompt = 'the text, \"just stack more layers\"'\n",
102 |         "\n",
103 |         "prompt_embeds, negative_embeds = pipe.encode_prompt(prompt)\n",
104 |         "\n",
105 |         "del text_encoder\n",
106 |         "del pipe\n",
107 |         "flush()\n",
108 |         "\n",
109 |         "pipe = IFInpaintingPipeline.from_pretrained(\n",
110 |         "    \"DeepFloyd/IF-I-XL-v1.0\", \n",
111 |         "    text_encoder=None, \n",
112 |         "    variant=\"fp16\", \n",
113 |         "    torch_dtype=torch.float16, \n",
114 |         "    device_map=\"auto\"\n",
115 |         ")\n",
116 |         "\n",
117 |         "image = pipe(\n",
118 |         "    image=original_image,\n",
119 |         "    mask_image=mask_image,\n",
120 |         "    prompt_embeds=prompt_embeds,\n",
121 |         "    negative_prompt_embeds=negative_embeds, \n",
122 |         "    output_type=\"pt\",\n",
123 |         "    generator=generator,\n",
124 |         ").images\n",
125 |         "\n",
126 |         "pil_image = pt_to_pil(image)\n",
127 |         "pipe.watermarker.apply_watermark(pil_image, pipe.unet.config.sample_size)\n",
128 |         "\n",
129 |         "pil_image[0]"
130 |       ]
131 |     },
132 |     {
133 |       "cell_type": "code",
134 |       "execution_count": null,
135 |       "metadata": {},
136 |       "outputs": [],
137 |       "source": [
138 |         "del pipe\n",
139 |         "flush()\n",
140 |         "\n",
141 |         "from diffusers import IFInpaintingSuperResolutionPipeline\n",
142 |         "\n",
143 |         "pipe = IFInpaintingSuperResolutionPipeline.from_pretrained(\n",
144 |         "    \"DeepFloyd/IF-II-L-v1.0\", \n",
145 |         "    text_encoder=None, \n",
146 |         "    variant=\"fp16\", \n",
147 |         "    torch_dtype=torch.float16, \n",
148 |         "    device_map=\"auto\"\n",
149 |         ")\n",
150 |         "\n",
151 |         "image = pipe(\n",
152 |         "    image=image,\n",
153 |         "    original_image=original_image,\n",
154 |         "    mask_image=mask_image,\n",
155 |         "    prompt_embeds=prompt_embeds,\n",
156 |         "    negative_prompt_embeds=negative_embeds, \n",
157 |         "    generator=generator,\n",
158 |         ").images[0]\n",
159 |         "image"
160 |       ]
161 |     }
162 |   ],
163 |   "metadata": {
164 |     "accelerator": "GPU",
165 |     "colab": {
166 |       "provenance": []
167 |     },
168 |     "gpuClass": "standard",
169 |     "kernelspec": {
170 |       "display_name": "Python 3",
171 |       "name": "python3"
172 |     },
173 |     "language_info": {
174 |       "name": "python"
175 |     }
176 |   },
177 |   "nbformat": 4,
178 |   "nbformat_minor": 0
179 | }
180 | 


--------------------------------------------------------------------------------
/DeepFloyd-IF-I-M-v1.0-T5Embedder.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |   "cells": [
 3 |     {
 4 |       "cell_type": "markdown",
 5 |       "metadata": {
 6 |         "id": "view-in-github"
 7 |       },
 8 |       "source": [
 9 |         "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/camenduru/DeepFloyd-IF-colab/blob/main/DeepFloyd-IF-I-M-v1.0-T5Embedder.ipynb)"
10 |       ]
11 |     },
12 |     {
13 |       "cell_type": "code",
14 |       "execution_count": null,
15 |       "metadata": {
16 |         "id": "DA4ASNpIvTzd"
17 |       },
18 |       "outputs": [],
19 |       "source": [
20 |         "# https://www.kaggle.com/code/shonenkov/deepfloyd-if-4-3b-generator-of-pictures modified\n",
21 |         "\n",
22 |         "!pip install -q torch==1.13.1+cu116 torchvision==0.14.1+cu116 torchaudio==0.13.1 torchtext==0.14.1 torchdata==0.5.1 --extra-index-url https://download.pytorch.org/whl/cu116 -U\n",
23 |         "!pip install -q -U diffusers~=0.16 transformers~=4.28 safetensors~=0.3 sentencepiece~=0.1 accelerate~=0.18 bitsandbytes~=0.38 huggingface_hub"
24 |       ]
25 |     },
26 |     {
27 |       "cell_type": "code",
28 |       "execution_count": null,
29 |       "metadata": {},
30 |       "outputs": [],
31 |       "source": [
32 |         "from transformers import T5EncoderModel\n",
33 |         "\n",
34 |         "hf_token = \"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\"\n",
35 |         "\n",
36 |         "text_encoder = T5EncoderModel.from_pretrained(\n",
37 |         "    \"DeepFloyd/IF-I-L-v1.0\",\n",
38 |         "    subfolder=\"text_encoder\", \n",
39 |         "    device_map=\"auto\", \n",
40 |         "    load_in_8bit=True, \n",
41 |         "    variant=\"8bit\",\n",
42 |         "    use_auth_token=hf_token\n",
43 |         ")\n",
44 |         "\n",
45 |         "from diffusers import DiffusionPipeline\n",
46 |         "\n",
47 |         "pipe = DiffusionPipeline.from_pretrained(\n",
48 |         "    \"DeepFloyd/IF-I-L-v1.0\", \n",
49 |         "    text_encoder=text_encoder,\n",
50 |         "    unet=None, \n",
51 |         "    device_map=\"auto\",\n",
52 |         "    safety_checker=None,\n",
53 |         "    use_auth_token=hf_token\n",
54 |         ")\n",
55 |         "\n",
56 |         "prompt = 'a photograph of an astronaut riding a horse holding a sign that says \"Pixel\\'s in space\"'\n",
57 |         "prompt_embeds, negative_embeds = pipe.encode_prompt(prompt)\n",
58 |         "\n",
59 |         "import numpy as np\n",
60 |         "prompt_embeds = prompt_embeds.cpu()\n",
61 |         "negative_embeds = negative_embeds.cpu()\n",
62 |         "np.save('prompt_embeds.npy', prompt_embeds)\n",
63 |         "np.save('negative_embeds.npy', negative_embeds)"
64 |       ]
65 |     }
66 |   ],
67 |   "metadata": {
68 |     "accelerator": "GPU",
69 |     "colab": {
70 |       "provenance": []
71 |     },
72 |     "gpuClass": "standard",
73 |     "kernelspec": {
74 |       "display_name": "Python 3",
75 |       "name": "python3"
76 |     },
77 |     "language_info": {
78 |       "name": "python"
79 |     }
80 |   },
81 |   "nbformat": 4,
82 |   "nbformat_minor": 0
83 | }
84 | 


--------------------------------------------------------------------------------
/DeepFloyd-IF-I-M-v1.0-core.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "cells": [
  3 |     {
  4 |       "cell_type": "markdown",
  5 |       "metadata": {
  6 |         "id": "view-in-github"
  7 |       },
  8 |       "source": [
  9 |         "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/camenduru/DeepFloyd-IF-colab/blob/main/DeepFloyd-IF-I-M-v1.0-core.ipynb)"
 10 |       ]
 11 |     },
 12 |     {
 13 |       "cell_type": "code",
 14 |       "execution_count": null,
 15 |       "metadata": {
 16 |         "id": "DA4ASNpIvTzd"
 17 |       },
 18 |       "outputs": [],
 19 |       "source": [
 20 |         "# https://www.kaggle.com/code/shonenkov/deepfloyd-if-4-3b-generator-of-pictures modified\n",
 21 |         "\n",
 22 |         "!pip install -q torch==1.13.1+cu116 torchvision==0.14.1+cu116 torchaudio==0.13.1 torchtext==0.14.1 torchdata==0.5.1 --extra-index-url https://download.pytorch.org/whl/cu116 -U\n",
 23 |         "!pip install -q xformers==0.0.16 triton==2.0.0 -U\n",
 24 |         "!pip install -q deepfloyd-if==1.0.1 \n",
 25 |         "!pip install -q git+https://github.com/openai/CLIP.git --no-deps\n",
 26 |         "!git clone https://huggingface.co/bakedpotat/prompts\n",
 27 |         "\n",
 28 |         "get_ipython().kernel.do_shutdown(True)"
 29 |       ]
 30 |     },
 31 |     {
 32 |       "cell_type": "code",
 33 |       "execution_count": null,
 34 |       "metadata": {
 35 |         "id": "69qUFyBkwKs0"
 36 |       },
 37 |       "outputs": [],
 38 |       "source": [
 39 |         "import os\n",
 40 |         "os.environ['FORCE_MEM_EFFICIENT_ATTN'] = \"1\"\n",
 41 |         "import sys\n",
 42 |         "import random\n",
 43 |         "\n",
 44 |         "import torch\n",
 45 |         "import numpy as np\n",
 46 |         "\n",
 47 |         "from deepfloyd_if.modules import IFStageI, IFStageII, StableStageIII\n",
 48 |         "\n",
 49 |         "hf_token = \"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\"\n",
 50 |         "device = 'cuda:0'\n",
 51 |         "if_I = IFStageI('IF-I-L-v1.0', device=device, hf_token=hf_token)\n",
 52 |         "if_II = IFStageII('IF-II-L-v1.0', device=device, hf_token=hf_token)\n",
 53 |         "if_III = StableStageIII('stable-diffusion-x4-upscaler', device=device)"
 54 |       ]
 55 |     },
 56 |     {
 57 |       "cell_type": "code",
 58 |       "execution_count": null,
 59 |       "metadata": {},
 60 |       "outputs": [],
 61 |       "source": [
 62 |         "prompts, t5_embs = [], []\n",
 63 |         "for prompt_idx in [1, 2, 3, 4]:\n",
 64 |         "    prompt = open(f'/content/prompts/{str(prompt_idx).zfill(4)}.txt').read().strip()\n",
 65 |         "    t5_numpy = np.load(f'/content/prompts/{str(prompt_idx).zfill(4)}.npy')\n",
 66 |         "    t5_embs.append(torch.from_numpy(t5_numpy).unsqueeze(0))\n",
 67 |         "    prompts.append(prompt)\n",
 68 |         "\n",
 69 |         "t5_embs = torch.cat(t5_embs).to(device)\n",
 70 |         "t5_embs.shape\n",
 71 |         "\n",
 72 |         "# Stage-I: 64px\n",
 73 |         "\n",
 74 |         "seed = 42\n",
 75 |         "\n",
 76 |         "stageI_generations, _meta = if_I.embeddings_to_image(\n",
 77 |         "    t5_embs, seed=seed, batch_repeat=1,\n",
 78 |         "    dynamic_thresholding_p=0.95,\n",
 79 |         "    dynamic_thresholding_c=1.5,\n",
 80 |         "    guidance_scale=7.0,\n",
 81 |         "    sample_loop='ddpm',\n",
 82 |         "    sample_timestep_respacing='smart50',\n",
 83 |         "    image_size=64,\n",
 84 |         "    aspect_ratio=\"1:1\",\n",
 85 |         "    progress=True,\n",
 86 |         "    disable_watermark=True,\n",
 87 |         ")\n",
 88 |         "pil_images_I = if_I.to_images(stageI_generations, disable_watermark=True)\n",
 89 |         "if_I.show(pil_images_I)"
 90 |       ]
 91 |     },
 92 |     {
 93 |       "cell_type": "code",
 94 |       "execution_count": null,
 95 |       "metadata": {},
 96 |       "outputs": [],
 97 |       "source": [
 98 |         "# Stage-II: 64px --> 256 px\n",
 99 |         "\n",
100 |         "stageII_generations, _meta = if_II.embeddings_to_image(\n",
101 |         "    stageI_generations,\n",
102 |         "    t5_embs, seed=seed, batch_repeat=1,\n",
103 |         "    dynamic_thresholding_p=0.95,\n",
104 |         "    dynamic_thresholding_c=1.0,\n",
105 |         "    aug_level=0.25,\n",
106 |         "    guidance_scale=4.0,\n",
107 |         "    image_scale=4.0,\n",
108 |         "    sample_loop='ddpm',\n",
109 |         "    sample_timestep_respacing='50',\n",
110 |         "    progress=True,\n",
111 |         ")\n",
112 |         "pil_images_II = if_II.to_images(stageII_generations, disable_watermark=True)\n",
113 |         "if_II.show(pil_images_II)"
114 |       ]
115 |     },
116 |     {
117 |       "cell_type": "code",
118 |       "execution_count": null,
119 |       "metadata": {},
120 |       "outputs": [],
121 |       "source": [
122 |         "# Stage-III: 256px --> 1024px\n",
123 |         "\n",
124 |         "stageIII_generations = []\n",
125 |         "for idx in range(len(stageII_generations)):\n",
126 |         "    if_III_kwargs = {}\n",
127 |         "    if_III_kwargs['prompt'] = prompts[idx:idx+1]\n",
128 |         "    if_III_kwargs['low_res'] = stageII_generations[idx:idx+1]\n",
129 |         "    if_III_kwargs['seed'] = seed\n",
130 |         "    if_III_kwargs['t5_embs'] = t5_embs[idx:idx+1]\n",
131 |         "    _stageIII_generations, _meta = if_III.embeddings_to_image(**if_III_kwargs)\n",
132 |         "    stageIII_generations.append(_stageIII_generations)\n",
133 |         "\n",
134 |         "stageIII_generations = torch.cat(stageIII_generations, 0)\n",
135 |         "pil_images_III = if_III.to_images(stageIII_generations, disable_watermark=True)\n",
136 |         "\n",
137 |         "for idx in range(len(prompts)):\n",
138 |         "    pil_img, prompt = pil_images_III[idx], prompts[idx]\n",
139 |         "    pil_img.save(f'{idx}.png')\n",
140 |         "    if_I.show([pil_img],size=14)\n",
141 |         "    print(prompt, '\\n'*3)"
142 |       ]
143 |     }
144 |   ],
145 |   "metadata": {
146 |     "accelerator": "GPU",
147 |     "colab": {
148 |       "provenance": []
149 |     },
150 |     "gpuClass": "standard",
151 |     "kernelspec": {
152 |       "display_name": "Python 3",
153 |       "name": "python3"
154 |     },
155 |     "language_info": {
156 |       "name": "python"
157 |     }
158 |   },
159 |   "nbformat": 4,
160 |   "nbformat_minor": 0
161 | }
162 | 


--------------------------------------------------------------------------------
/DeepFloyd-IF-I-M-v1.0-final.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "cells": [
  3 |     {
  4 |       "cell_type": "markdown",
  5 |       "metadata": {
  6 |         "id": "view-in-github"
  7 |       },
  8 |       "source": [
  9 |         "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/camenduru/DeepFloyd-IF-colab/blob/main/DeepFloyd-IF-I-M-v1.0-final.ipynb)"
 10 |       ]
 11 |     },
 12 |     {
 13 |       "cell_type": "code",
 14 |       "execution_count": null,
 15 |       "metadata": {
 16 |         "id": "DA4ASNpIvTzd"
 17 |       },
 18 |       "outputs": [],
 19 |       "source": [
 20 |         "# https://www.kaggle.com/code/shonenkov/deepfloyd-if-4-3b-generator-of-pictures modified\n",
 21 |         "\n",
 22 |         "!pip install -q torch==1.13.1+cu116 torchvision==0.14.1+cu116 torchaudio==0.13.1 torchtext==0.14.1 torchdata==0.5.1 --extra-index-url https://download.pytorch.org/whl/cu116 -U\n",
 23 |         "!pip install -q xformers==0.0.16 triton==2.0.0 -U\n",
 24 |         "!pip install -q deepfloyd-if==1.0.1 \n",
 25 |         "!pip install -q git+https://github.com/openai/CLIP.git --no-deps\n",
 26 |         "# !git clone https://huggingface.co/bakedpotat/prompts\n",
 27 |         "!pip install -q -U diffusers~=0.16 transformers~=4.28 safetensors~=0.3 sentencepiece~=0.1 accelerate~=0.18 bitsandbytes~=0.38 huggingface_hub"
 28 |       ]
 29 |     },
 30 |     {
 31 |       "cell_type": "code",
 32 |       "execution_count": null,
 33 |       "metadata": {},
 34 |       "outputs": [],
 35 |       "source": [
 36 |         "get_ipython().kernel.do_shutdown(True)"
 37 |       ]
 38 |     },
 39 |     {
 40 |       "cell_type": "code",
 41 |       "execution_count": null,
 42 |       "metadata": {},
 43 |       "outputs": [],
 44 |       "source": [
 45 |         "from transformers import T5EncoderModel\n",
 46 |         "\n",
 47 |         "hf_token = \"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\"\n",
 48 |         "\n",
 49 |         "text_encoder = T5EncoderModel.from_pretrained(\n",
 50 |         "    \"DeepFloyd/IF-I-L-v1.0\",\n",
 51 |         "    load_in_8bit=True,\n",
 52 |         "    subfolder=\"text_encoder\",\n",
 53 |         "    device_map=\"auto\",\n",
 54 |         "    variant=\"8bit\",\n",
 55 |         "    use_auth_token=hf_token\n",
 56 |         ")\n",
 57 |         "\n",
 58 |         "from diffusers import DiffusionPipeline\n",
 59 |         "\n",
 60 |         "pipe = DiffusionPipeline.from_pretrained(\n",
 61 |         "    \"DeepFloyd/IF-I-L-v1.0\", \n",
 62 |         "    text_encoder=text_encoder,\n",
 63 |         "    unet=None, \n",
 64 |         "    device_map=\"auto\",\n",
 65 |         "    safety_checker=None,\n",
 66 |         "    use_auth_token=hf_token\n",
 67 |         ")\n",
 68 |         "\n",
 69 |         "prompt = 'a photograph of an astronaut riding a horse holding a sign that says \"Pixel\\'s in space\"'\n",
 70 |         "prompt_embeds, negative_embeds = pipe.encode_prompt(prompt)\n",
 71 |         "\n",
 72 |         "import numpy as np\n",
 73 |         "prompt_embeds = prompt_embeds.cpu()\n",
 74 |         "negative_embeds = negative_embeds.cpu()\n",
 75 |         "np.save('prompt.npy', prompt_embeds)\n",
 76 |         "np.save('negative.npy', negative_embeds)\n",
 77 |         "\n",
 78 |         "get_ipython().kernel.do_shutdown(True)"
 79 |       ]
 80 |     },
 81 |     {
 82 |       "cell_type": "code",
 83 |       "execution_count": null,
 84 |       "metadata": {
 85 |         "id": "69qUFyBkwKs0"
 86 |       },
 87 |       "outputs": [],
 88 |       "source": [
 89 |         "import os\n",
 90 |         "os.environ['FORCE_MEM_EFFICIENT_ATTN'] = \"1\"\n",
 91 |         "import sys\n",
 92 |         "import random\n",
 93 |         "\n",
 94 |         "import torch\n",
 95 |         "import numpy as np\n",
 96 |         "\n",
 97 |         "from deepfloyd_if.modules import IFStageI, IFStageII, StableStageIII\n",
 98 |         "\n",
 99 |         "hf_token = \"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\"\n",
100 |         "device = 'cuda:0'\n",
101 |         "if_I = IFStageI('IF-I-L-v1.0', device=device, hf_token=hf_token)\n",
102 |         "if_II = IFStageII('IF-II-L-v1.0', device=device, hf_token=hf_token)\n",
103 |         "if_III = StableStageIII('stable-diffusion-x4-upscaler', device=device)"
104 |       ]
105 |     },
106 |     {
107 |       "cell_type": "code",
108 |       "execution_count": null,
109 |       "metadata": {},
110 |       "outputs": [],
111 |       "source": [
112 |         "prompts, t5_embs = [], []\n",
113 |         "prompt = 'a photograph of an astronaut riding a horse holding a sign that says \"Pixel\\'s in space\"'\n",
114 |         "t5_numpy = np.load(f'/content/prompt.npy')\n",
115 |         "t5_numpy = t5_numpy.reshape(77, 4096)\n",
116 |         "t5_embs.append(torch.from_numpy(t5_numpy).unsqueeze(0))\n",
117 |         "prompts.append(prompt)\n",
118 |         "\n",
119 |         "t5_embs = torch.cat(t5_embs).to(device)\n",
120 |         "t5_embs.shape\n",
121 |         "\n",
122 |         "# Stage-I: 64px\n",
123 |         "\n",
124 |         "seed = 42\n",
125 |         "\n",
126 |         "stageI_generations, _meta = if_I.embeddings_to_image(\n",
127 |         "    t5_embs, seed=seed, batch_repeat=1,\n",
128 |         "    dynamic_thresholding_p=0.95,\n",
129 |         "    dynamic_thresholding_c=1.5,\n",
130 |         "    guidance_scale=7.0,\n",
131 |         "    sample_loop='ddpm',\n",
132 |         "    sample_timestep_respacing='smart50',\n",
133 |         "    image_size=64,\n",
134 |         "    aspect_ratio=\"1:1\",\n",
135 |         "    progress=True,\n",
136 |         "    disable_watermark=True,\n",
137 |         ")\n",
138 |         "pil_images_I = if_I.to_images(stageI_generations, disable_watermark=True)\n",
139 |         "if_I.show(pil_images_I)"
140 |       ]
141 |     },
142 |     {
143 |       "cell_type": "code",
144 |       "execution_count": null,
145 |       "metadata": {},
146 |       "outputs": [],
147 |       "source": [
148 |         "# Stage-II: 64px --> 256 px\n",
149 |         "\n",
150 |         "stageII_generations, _meta = if_II.embeddings_to_image(\n",
151 |         "    stageI_generations,\n",
152 |         "    t5_embs, seed=seed, batch_repeat=1,\n",
153 |         "    dynamic_thresholding_p=0.95,\n",
154 |         "    dynamic_thresholding_c=1.0,\n",
155 |         "    aug_level=0.25,\n",
156 |         "    guidance_scale=4.0,\n",
157 |         "    image_scale=4.0,\n",
158 |         "    sample_loop='ddpm',\n",
159 |         "    sample_timestep_respacing='50',\n",
160 |         "    progress=True,\n",
161 |         ")\n",
162 |         "pil_images_II = if_II.to_images(stageII_generations, disable_watermark=True)\n",
163 |         "if_II.show(pil_images_II)"
164 |       ]
165 |     },
166 |     {
167 |       "cell_type": "code",
168 |       "execution_count": null,
169 |       "metadata": {},
170 |       "outputs": [],
171 |       "source": [
172 |         "# Stage-III: 256px --> 1024px\n",
173 |         "\n",
174 |         "stageIII_generations = []\n",
175 |         "for idx in range(len(stageII_generations)):\n",
176 |         "    if_III_kwargs = {}\n",
177 |         "    if_III_kwargs['prompt'] = prompts[idx:idx+1]\n",
178 |         "    if_III_kwargs['low_res'] = stageII_generations[idx:idx+1]\n",
179 |         "    if_III_kwargs['seed'] = seed\n",
180 |         "    if_III_kwargs['t5_embs'] = t5_embs[idx:idx+1]\n",
181 |         "    _stageIII_generations, _meta = if_III.embeddings_to_image(**if_III_kwargs)\n",
182 |         "    stageIII_generations.append(_stageIII_generations)\n",
183 |         "\n",
184 |         "stageIII_generations = torch.cat(stageIII_generations, 0)\n",
185 |         "pil_images_III = if_III.to_images(stageIII_generations, disable_watermark=True)\n",
186 |         "\n",
187 |         "for idx in range(len(prompts)):\n",
188 |         "    pil_img, prompt = pil_images_III[idx], prompts[idx]\n",
189 |         "    pil_img.save(f'{idx}.png')\n",
190 |         "    if_I.show([pil_img],size=14)\n",
191 |         "    print(prompt, '\\n'*3)"
192 |       ]
193 |     }
194 |   ],
195 |   "metadata": {
196 |     "accelerator": "GPU",
197 |     "colab": {
198 |       "provenance": []
199 |     },
200 |     "gpuClass": "standard",
201 |     "kernelspec": {
202 |       "display_name": "Python 3",
203 |       "name": "python3"
204 |     },
205 |     "language_info": {
206 |       "name": "python"
207 |     }
208 |   },
209 |   "nbformat": 4,
210 |   "nbformat_minor": 0
211 | }
212 | 


--------------------------------------------------------------------------------
/DeepFloyd-IF-I-M-v1.0.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |   "cells": [
 3 |     {
 4 |       "cell_type": "markdown",
 5 |       "metadata": {
 6 |         "id": "view-in-github"
 7 |       },
 8 |       "source": [
 9 |         "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/camenduru/DeepFloyd-IF-colab/blob/main/DeepFloyd-IF-I-M-v1.0.ipynb)"
10 |       ]
11 |     },
12 |     {
13 |       "cell_type": "code",
14 |       "execution_count": null,
15 |       "metadata": {
16 |         "id": "DA4ASNpIvTzd"
17 |       },
18 |       "outputs": [],
19 |       "source": [
20 |         "# https://huggingface.co/spaces/DeepFloyd/IF/blob/main/app.py modified\n",
21 |         "\n",
22 |         "!pip install -q huggingface_hub\n",
23 |         "from huggingface_hub import login\n",
24 |         "login()"
25 |       ]
26 |     },
27 |     {
28 |       "cell_type": "code",
29 |       "execution_count": null,
30 |       "metadata": {
31 |         "id": "69qUFyBkwKs0"
32 |       },
33 |       "outputs": [],
34 |       "source": [
35 |         "!git clone -b dev https://github.com/camenduru/DeepFloyd-IF-hf\n",
36 |         "%cd /content/DeepFloyd-IF-hf\n",
37 |         "!pip install -r requirements.txt\n",
38 |         "!python app.py"
39 |       ]
40 |     }
41 |   ],
42 |   "metadata": {
43 |     "accelerator": "GPU",
44 |     "colab": {
45 |       "provenance": []
46 |     },
47 |     "gpuClass": "standard",
48 |     "kernelspec": {
49 |       "display_name": "Python 3",
50 |       "name": "python3"
51 |     },
52 |     "language_info": {
53 |       "name": "python"
54 |     }
55 |   },
56 |   "nbformat": 4,
57 |   "nbformat_minor": 0
58 | }
59 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | This is free and unencumbered software released into the public domain.
 2 | 
 3 | Anyone is free to copy, modify, publish, use, compile, sell, or
 4 | distribute this software, either in source code form or as a compiled
 5 | binary, for any purpose, commercial or non-commercial, and by any
 6 | means.
 7 | 
 8 | In jurisdictions that recognize copyright laws, the author or authors
 9 | of this software dedicate any and all copyright interest in the
10 | software to the public domain. We make this dedication for the benefit
11 | of the public at large and to the detriment of our heirs and
12 | successors. We intend this dedication to be an overt act of
13 | relinquishment in perpetuity of all present and future rights to this
14 | software under copyright law.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 | OTHER DEALINGS IN THE SOFTWARE.
23 | 
24 | For more information, please refer to <https://unlicense.org>
25 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | 🐣 Please follow me for new updates https://twitter.com/camenduru <br />
 2 | 🔥 Please join our discord server https://discord.gg/k5BwmmvJJU <br />
 3 | 🥳 Please join my patreon community https://patreon.com/camenduru <br />
 4 | 
 5 | # 🚦 WIP 🚦
 6 | 
 7 | ## 🦒 Colab 
 8 | 
 9 | | Colab | Version
10 | | --- | --- |
11 | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/camenduru/DeepFloyd-IF-colab/blob/main/DeepFloyd-IF-I-M-v1.0-final.ipynb) | Free Colab T4 Image Gen (Core Lib) (Recommended)
12 | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/camenduru/DeepFloyd-IF-colab/blob/main/DeepFloyd-IF-I-M-v1.0.ipynb) | Pro Colab A100 (Diffusers Lib) (Gradio)
13 | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/camenduru/DeepFloyd-IF-colab/blob/main/DeepFloyd-IF-I-M-v1.0-Image.ipynb) | Free Colab T4 Image Gen (Diffusers Lib)
14 | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/camenduru/DeepFloyd-IF-colab/blob/main/DeepFloyd-IF-I-M-v1.0-Image-Variation.ipynb) | Free Colab T4 Image Variation Gen (Diffusers Lib)
15 | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/camenduru/DeepFloyd-IF-colab/blob/main/DeepFloyd-IF-I-M-v1.0-Inpainting.ipynb) | Free Colab T4 Inpainting (Diffusers Lib)
16 | 
17 | 
18 | ## 🦆 Kaggle 
19 | 
20 | | Kaggle | Version
21 | | --- | --- |
22 | [![Open In Kaggle](https://kaggle.com/static/images/open-in-kaggle.svg)](https://www.kaggle.com/code/camenduru/deep-floyd-if-kaggle-ipynb)  | Free Colab T4 Image Gen (Core Lib)
23 | 
24 | ## Tutorial
25 | 
26 | ### Free Colab (T4)
27 | https://www.youtube.com/watch?v=4zglOVF0jSk
28 | 
29 | ### Pro Colab (A100)
30 | https://www.youtube.com/watch?v=KRcXonSwa50
31 | 
32 | ## Main Repo
33 | https://github.com/deep-floyd/IF
34 | 
35 | ## Paper
36 | https://arxiv.org/abs/2205.11487
37 | 
38 | ## Code License
39 | https://github.com/deep-floyd/IF/blob/main/LICENSE
40 | 
41 | ## Model License 
42 | https://github.com/deep-floyd/IF/blob/main/LICENSE-MODEL
43 | 
44 | 
45 | ## Output
46 | ### Stage-I: 64px
47 | ![stage1](https://user-images.githubusercontent.com/54370274/235267686-bb20a748-077e-4d6d-9612-9dcb706d7f6f.png)
48 | 
49 | ### Stage-II: 64px --> 256 px
50 | ![stage2](https://user-images.githubusercontent.com/54370274/235267689-1131d701-6719-4d16-85c0-70f2859d7e58.png)
51 | 
52 | ### Stage-III: 256px --> 1024px
53 | a teddy bear looking curiously in the mirror, seeing a cat. 
54 | ![stage3-1](https://user-images.githubusercontent.com/54370274/235267830-44158274-804e-4717-9528-7c024f3644e4.png)
55 | 
56 | modern digital portrait of antique statue of venus in bikini only in style of cyberpunk glitchcore synthwave art, award prize winning best art masterpiece, reddit top art of all time, trending on artstation, minimalism, neon lady woman, noir glitch
57 | ![stage3-2](https://user-images.githubusercontent.com/54370274/235267832-62e940e2-3381-451f-923f-9f525b434cc7.png)
58 | 
59 | photo of dark temple, golden treasure, high detail, smoke, sharp, fog 
60 | ![stage3-3](https://user-images.githubusercontent.com/54370274/235267835-fd76c0c2-16fb-4f2a-a094-b8271907bbd5.png)
61 | 
62 | glowing mushrooms in a natural environment with smoke in the frame 
63 | ![stage3-4](https://user-images.githubusercontent.com/54370274/235267837-dd5239b1-7685-424c-be9b-35b29ee2b327.png)
64 | 


--------------------------------------------------------------------------------