├── README.md
├── FaceDetailer_jupyter.ipynb
├── InstantID_IPAdapter_ControlNet_jupyter.ipynb
└── InstantID_IPAdapter_ControlNet_FaceDetailer_jupyter.ipynb
/README.md:
--------------------------------------------------------------------------------
1 | 🐣 Please follow me for new updates https://twitter.com/camenduru
2 | 🔥 Please join our discord server https://discord.gg/k5BwmmvJJU
3 | 🥳 Please join my patreon community https://patreon.com/camenduru
4 |
5 | ### 🍊 Jupyter Notebook
6 |
7 | | Notebook | Info
8 | | --- | --- |
9 | [](https://colab.research.google.com/github/camenduru/InstantID-IPAdapter-ControlNet-jupyter/blob/main/InstantID_IPAdapter_ControlNet_jupyter.ipynb) | InstantID_IPAdapter_ControlNet_jupyter
The ComfyUI workflow in https://x.com/camenduru/status/1797122259133292576 has been converted to a Jupyter Notebook.
10 | [](https://colab.research.google.com/github/camenduru/InstantID-IPAdapter-ControlNet-jupyter/blob/main/InstantID_IPAdapter_ControlNet_FaceDetailer_jupyter.ipynb) | InstantID_IPAdapter_ControlNet_FaceDetailer_jupyter
11 | [](https://colab.research.google.com/github/camenduru/InstantID-IPAdapter-ControlNet-jupyter/blob/main/FaceDetailer_jupyter.ipynb) | FaceDetailer_jupyter
12 |
13 | ### 📋 Tutorial
14 |
15 | ComfyUI = TotoroUI (because ComfyUI is banned on Colab 😋)
16 |
17 | 
18 |
19 | ### 🧬 Code
20 | https://github.com/InstantID/InstantID
21 | https://github.com/tencent-ailab/IP-Adapter
22 | https://github.com/lllyasviel/ControlNet
23 | https://github.com/comfyanonymous/ComfyUI
24 | https://github.com/cubiq/ComfyUI_IPAdapter_plus
25 | https://github.com/cubiq/ComfyUI_essentials
26 | https://github.com/cubiq/ComfyUI_InstantID
27 | https://github.com/Ttl/ComfyUi_NNLatentUpscale
28 | https://github.com/ltdrdata/ComfyUI-Impact-Pack
29 | https://github.com/WASasquatch/was-node-suite-comfyui
30 |
31 | ### 📄 Paper
32 | https://arxiv.org/abs/2401.07519
33 | https://arxiv.org/abs/2308.06721
34 |
35 | ### 🌐 Page
36 | https://instantid.github.io/
37 | https://ip-adapter.github.io/
38 |
39 | ### 🖼 Output
40 | 
41 |
42 | ### 🏢 Sponsor
43 | https://runpod.io
44 |
--------------------------------------------------------------------------------
/FaceDetailer_jupyter.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {
6 | "id": "view-in-github"
7 | },
8 | "source": [
9 | "[](https://colab.research.google.com/github/camenduru/InstantID-IPAdapter-ControlNet-jupyter/blob/main/FaceDetailer_jupyter.ipynb)"
10 | ]
11 | },
12 | {
13 | "cell_type": "code",
14 | "execution_count": null,
15 | "metadata": {
16 | "id": "VjYy0F2gZIPR"
17 | },
18 | "outputs": [],
19 | "source": [
20 | "%cd /content\n",
21 | "!git clone -b totoro https://github.com/camenduru/ComfyUI /content/TotoroUI\n",
22 | "!git clone -b totoro_v2 https://github.com/camenduru/ComfyUI_IPAdapter_plus /content/TotoroUI/IPAdapter\n",
23 | "!git clone -b totoro https://github.com/camenduru/ComfyUI_InstantID /content/TotoroUI/InstantID\n",
24 | "!git clone -b totoro https://github.com/camenduru/ComfyUI-Impact-Pack /content/TotoroUI/Impact\n",
25 | "\n",
26 | "!pip install -q torch==2.2.1+cu121 torchvision==0.17.1+cu121 torchaudio==2.2.1+cu121 torchtext==0.17.1 torchdata==0.7.1 --extra-index-url https://download.pytorch.org/whl/cu121\n",
27 | "!pip install -q torchsde einops diffusers accelerate xformers==0.0.25 insightface onnxruntime onnxruntime-gpu ultralytics==8.2.27 segment-anything piexif\n",
28 | "!apt -y install -qq aria2\n",
29 | "\n",
30 | "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://civitai.com/api/download/models/354657 -d /content/TotoroUI/models -o dreamshaperXL_lightningDPMSDE.safetensors\n",
31 | "# !aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://civitai.com/api/download/models/470847 -d /content/TotoroUI/models -o raemuXL_v35Lightning.safetensors\n",
32 | "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/camenduru/IICF/resolve/main/ultralytics/bbox/Eyes.pt -d /content/TotoroUI/models/ultralytics/bbox -o Eyes.pt\n",
33 | "\n",
34 | "!wget https://huggingface.co/camenduru/IICF/resolve/main/test/anya.jpg -O /content/anya.jpg\n",
35 | "!wget https://huggingface.co/camenduru/IICF/resolve/main/test/pose_images.zip -O /content/pose_images.zip\n",
36 | "!unzip /content/pose_images.zip -d /content"
37 | ]
38 | },
39 | {
40 | "cell_type": "code",
41 | "execution_count": null,
42 | "metadata": {},
43 | "outputs": [],
44 | "source": [
45 | "%cd /content/TotoroUI\n",
46 | "import sys\n",
47 | "sys.path.append('/content/TotoroUI/IPAdapter')\n",
48 | "import IPAdapterPlus\n",
49 | "sys.path.append('/content/TotoroUI/InstantID')\n",
50 | "import InstantID\n",
51 | "sys.path.append('/content/TotoroUI/Impact')\n",
52 | "import torch\n",
53 | "import numpy as np\n",
54 | "from PIL import Image\n",
55 | "import totoro\n",
56 | "import nodes\n",
57 | "import detailer\n",
58 | "import scipy\n",
59 | "import model_management\n",
60 | "import gc\n",
61 | "import random\n",
62 | "\n",
63 | "ran = random.randint(0, 65535)\n",
64 | "# ran = 10\n",
65 | "print(ran)\n",
66 | "\n",
67 | "with torch.inference_mode():\n",
68 | " decoded = Image.open(\"/content/output_image.png\")\n",
69 | " decoded = detailer.pil2tensor(decoded)\n",
70 | "\n",
71 | " model_patcher, clip, vae, clipvision = totoro.sd.load_checkpoint_guess_config(\"/content/TotoroUI/models/dreamshaperXL_lightningDPMSDE.safetensors\", output_vae=True, output_clip=True, embedding_directory=None)\n",
72 | "\n",
73 | " model = detailer.load_yolo(\"/content/TotoroUI/models/ultralytics/bbox/Eyes.pt\")\n",
74 | " bbox_detector = detailer.UltraBBoxDetector(model)\n",
75 | "\n",
76 | " bbox_detector.setAux('face')\n",
77 | " segs = bbox_detector.detect(image=decoded, threshold=0.50, dilation=10, crop_factor=3.0, drop_size=10, detailer_hook=None)\n",
78 | " bbox_detector.setAux(None)\n",
79 | "\n",
80 | " face_tokens = clip.tokenize(\"perfect eyes\")\n",
81 | " face_cond, face_pooled = clip.encode_from_tokens(face_tokens, return_pooled=True)\n",
82 | " face_cond = [[face_cond, {\"pooled_output\": face_pooled}]]\n",
83 | " face_n_tokens = clip.tokenize(\"deformed pupils, deformed eyes, ugly eyes\")\n",
84 | " face_n_cond, face_n_pooled = clip.encode_from_tokens(face_n_tokens, return_pooled=True)\n",
85 | " face_n_cond = [[face_n_cond, {\"pooled_output\": face_n_pooled}]]\n",
86 | "\n",
87 | " enhanced_img, _, cropped_enhanced, cropped_enhanced_alpha, cnet_pil_list, new_segs = \\\n",
88 | " detailer.DetailerForEach.do_detail(image=decoded, segs=segs, model=model_patcher, clip=clip, vae=vae, guide_size=384, guide_size_for_bbox=True, max_size=1024, seed=ran, steps=4, cfg=1.0,\n",
89 | " sampler_name=\"dpmpp_sde\", scheduler=\"karras\", positive=face_cond, negative=face_n_cond, denoise=0.35, feather=5, noise_mask=True,\n",
90 | " force_inpaint=True, wildcard_opt=None, detailer_hook=None, refiner_ratio=0.2, refiner_model=None, refiner_clip=None, refiner_positive=None, refiner_negative=None,\n",
91 | " cycle=1, inpaint_model=False, noise_mask_feather=20)\n",
92 | " upscaled_img = nodes.ImageScaleBy.upscale(_, image=enhanced_img, upscale_method=\"lanczos\", scale_by=1.50)\n",
93 | " adjust_img = detailer.image_filters(image=upscaled_img[0], brightness=0.0, contrast=1.0, saturation=1.0, sharpness=4, blur=0, gaussian_blur=0.0, edge_enhance=0.10, detail_enhance=True)\n",
94 | "\n",
95 | "Image.fromarray(np.array(adjust_img[0]*255, dtype=np.uint8)[0])"
96 | ]
97 | }
98 | ],
99 | "metadata": {
100 | "accelerator": "GPU",
101 | "colab": {
102 | "gpuType": "T4",
103 | "provenance": []
104 | },
105 | "kernelspec": {
106 | "display_name": "Python 3",
107 | "name": "python3"
108 | },
109 | "language_info": {
110 | "name": "python"
111 | }
112 | },
113 | "nbformat": 4,
114 | "nbformat_minor": 0
115 | }
116 |
--------------------------------------------------------------------------------
/InstantID_IPAdapter_ControlNet_jupyter.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {
6 | "id": "view-in-github"
7 | },
8 | "source": [
9 | "[](https://colab.research.google.com/github/camenduru/InstantID-IPAdapter-ControlNet-jupyter/blob/main/InstantID_IPAdapter_ControlNet_jupyter.ipynb)"
10 | ]
11 | },
12 | {
13 | "cell_type": "code",
14 | "execution_count": null,
15 | "metadata": {
16 | "id": "VjYy0F2gZIPR"
17 | },
18 | "outputs": [],
19 | "source": [
20 | "%cd /content\n",
21 | "!git clone -b totoro https://github.com/camenduru/ComfyUI /content/TotoroUI\n",
22 | "!git clone -b totoro_v2 https://github.com/camenduru/ComfyUI_IPAdapter_plus /content/TotoroUI/IPAdapter\n",
23 | "!git clone -b totoro https://github.com/camenduru/ComfyUI_InstantID /content/TotoroUI/InstantID\n",
24 | "\n",
25 | "!pip install -q torch==2.2.1+cu121 torchvision==0.17.1+cu121 torchaudio==2.2.1+cu121 torchtext==0.17.1 torchdata==0.7.1 --extra-index-url https://download.pytorch.org/whl/cu121\n",
26 | "!pip install -q torchsde einops diffusers accelerate xformers==0.0.25 insightface onnxruntime onnxruntime-gpu\n",
27 | "!apt -y install -qq aria2\n",
28 | "\n",
29 | "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://civitai.com/api/download/models/470847 -d /content/TotoroUI/models -o raemuXL_v35Lightning.safetensors\n",
30 | "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/h94/IP-Adapter/resolve/main/models/image_encoder/model.safetensors -d /content/TotoroUI/models/clip_vision -o CLIP-ViT-H-14-laion2B-s32B-b79K.safetensors\n",
31 | "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/h94/IP-Adapter/resolve/main/sdxl_models/ip-adapter-plus-face_sdxl_vit-h.safetensors -d /content/TotoroUI/models/ipadapter -o ip-adapter-plus-face_sdxl_vit-h.safetensors\n",
32 | "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lllyasviel/sd_control_collection/resolve/main/thibaud_xl_openpose.safetensors -d /content/TotoroUI/models/controlnet -o thibaud_xl_openpose.safetensors\n",
33 | "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://github.com/Ttl/ComfyUi_NNLatentUpscale/raw/master/sdxl_resizer.pt -d /content/TotoroUI/models -o sdxl_resizer.pt\n",
34 | "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/DIAMONIK7777/antelopev2/resolve/main/1k3d68.onnx -d /content/TotoroUI/models/insightface/models/antelopev2 -o 1k3d68.onnx\n",
35 | "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/DIAMONIK7777/antelopev2/resolve/main/2d106det.onnx -d /content/TotoroUI/models/insightface/models/antelopev2 -o 2d106det.onnx\n",
36 | "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/DIAMONIK7777/antelopev2/resolve/main/genderage.onnx -d /content/TotoroUI/models/insightface/models/antelopev2 -o genderage.onnx\n",
37 | "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/DIAMONIK7777/antelopev2/resolve/main/glintr100.onnx -d /content/TotoroUI/models/insightface/models/antelopev2 -o glintr100.onnx\n",
38 | "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/DIAMONIK7777/antelopev2/resolve/main/scrfd_10g_bnkps.onnx -d /content/TotoroUI/models/insightface/models/antelopev2 -o scrfd_10g_bnkps.onnx\n",
39 | "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/InstantX/InstantID/resolve/main/ip-adapter.bin -d /content/TotoroUI/models/instantid -o ip-adapter.bin\n",
40 | "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/InstantX/InstantID/resolve/main/ControlNetModel/diffusion_pytorch_model.safetensors -d /content/TotoroUI/models/controlnet/SDXL/instantid -o diffusion_pytorch_model.safetensors\n",
41 | "\n",
42 | "!wget https://huggingface.co/camenduru/IICF/resolve/main/test/anya.jpg -O /content/anya.jpg\n",
43 | "!wget https://huggingface.co/camenduru/IICF/resolve/main/test/pose_images.zip -O /content/pose_images.zip\n",
44 | "!unzip /content/pose_images.zip"
45 | ]
46 | },
47 | {
48 | "cell_type": "code",
49 | "execution_count": null,
50 | "metadata": {},
51 | "outputs": [],
52 | "source": [
53 | "%cd /content/TotoroUI\n",
54 | "import torch\n",
55 | "import numpy as np\n",
56 | "from PIL import Image\n",
57 | "import totoro\n",
58 | "import nodes\n",
59 | "import sys\n",
60 | "sys.path.append('/content/TotoroUI/IPAdapter')\n",
61 | "import IPAdapterPlus\n",
62 | "sys.path.append('/content/TotoroUI/InstantID')\n",
63 | "import InstantID\n",
64 | "import scipy\n",
65 | "import model_management\n",
66 | "from latent_resizer import LatentResizer\n",
67 | "from totoro import model_management\n",
68 | "import gc\n",
69 | "import random\n",
70 | "\n",
71 | "def upscale(latent, upscale):\n",
72 | " device = model_management.get_torch_device()\n",
73 | " samples = latent.to(device=device, dtype=torch.float16)\n",
74 | " model = LatentResizer.load_model('/content/TotoroUI/models/sdxl_resizer.pt', device, torch.float16)\n",
75 | " model.to(device=device)\n",
76 | " latent_out = (model(0.13025 * samples, scale=upscale) / 0.13025)\n",
77 | " latent_out = latent_out.to(device=\"cpu\")\n",
78 | " model.to(device=model_management.vae_offload_device())\n",
79 | " return ({\"samples\": latent_out},)\n",
80 | "\n",
81 | "with torch.no_grad():\n",
82 | " model_patcher, clip, vae, clipvision = totoro.sd.load_checkpoint_guess_config(\"/content/TotoroUI/models/raemuXL_v35Lightning.safetensors\", output_vae=True, output_clip=True, embedding_directory=None)\n",
83 | " IPAdapterPlus_model = IPAdapterPlus.IPAdapterUnifiedLoader().load_models(model_patcher, 'PLUS FACE (portraits)', lora_strength=0.0, provider=\"CUDA\")\n",
84 | " instantid = InstantID.InstantIDModelLoader().load_model(\"/content/TotoroUI/models/instantid/ip-adapter.bin\")\n",
85 | " insightface = InstantID.InstantIDFaceAnalysis().load_insight_face(\"CUDA\")\n",
86 | " instantid_control_net = totoro.controlnet.load_controlnet(\"/content/TotoroUI/models/controlnet/SDXL/instantid/diffusion_pytorch_model.safetensors\")\n",
87 | " output_image, output_mask = nodes.LoadImage().load_image(\"/content/anya.jpg\") \n",
88 | " image_kps, image_kps_mask = nodes.LoadImage().load_image(\"/content/pose_images/pose_kps_00008_.png\")\n",
89 | " image_dw, image_dw_mask = nodes.LoadImage().load_image(\"/content/pose_images/pose_dw_pose_00008_.png\")\n",
90 | " ip_model_patcher = IPAdapterPlus.IPAdapterAdvanced().apply_ipadapter(IPAdapterPlus_model[0], IPAdapterPlus_model[1], image=output_image, weight_type=\"style transfer\")\n",
91 | " tokens = clip.tokenize(\"1girl\")\n",
92 | " cond, pooled = clip.encode_from_tokens(tokens, return_pooled=True)\n",
93 | " cond = [[cond, {\"pooled_output\": pooled}]]\n",
94 | " n_tokens = clip.tokenize(\"(nsfw:1.5), nipple, nude, naked, lowres, child, getty, bad anatomy, bad hands, text, error, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality, normal quality, jpeg artifacts, signature, watermark, username, blurry, artist name, trademark, watermark, title, multiple view, reference sheet, mutated hands and fingers, poorly drawn face, mutation, deformed, ugly, bad proportions, gross proportions, malformed limbs, missing arms, missing legs, extra arms, extra legs, fused fingers, too many fingers, long neck, tatoo, amateur drawing, odd eyes, uneven eyes, unnatural face, uneven nostrils, crooked mouth, bad teeth, crooked teeth, photoshop, video game, censor, censored, ghost, b&w, weird colors, gradient background, spotty background, blurry background, ugly background, simple background, realistic, out of frame, extra objects, gross, ugly, tiling, poorly drawn hands, poorly drawn feet, poorly drawn face, out of focus, blurry, very long body, duplicate, morbid, mutilated, extra fingers, mutated hands, poorly drawn eyes, cloned face, disfigured, deformed, cross-eye, extra limbs, missing limb, malformed hands, mutated, morbid, mutilated, disfigured, extra arms, extra hands, mangled fingers, contorted, conjoined, mismatched limbs, mismatched parts, bad perspective, black and white, oversaturated, undersaturated, bad shadow, cropped image, draft, grainy, pixelated\")\n",
95 | " n_cond, n_pooled = clip.encode_from_tokens(n_tokens, return_pooled=True)\n",
96 | " n_cond = [[n_cond, {\"pooled_output\": n_pooled}]]\n",
97 | " work_model, instantid_cond, instantid_n_cond = InstantID.ApplyInstantID().apply_instantid(instantid=instantid[0], insightface=insightface[0], control_net=instantid_control_net, image=output_image, model=ip_model_patcher[0], positive=cond, negative=n_cond, start_at=0.0, end_at=1.0, weight=0.80, image_kps=image_kps)\n",
98 | " openpose_control_net = totoro.controlnet.load_controlnet(\"/content/TotoroUI/models/controlnet/thibaud_xl_openpose.safetensors\")\n",
99 | " openpose_cond = nodes.ControlNetApply().apply_controlnet(conditioning=instantid_cond, control_net=openpose_control_net, image=image_dw, strength=0.90)"
100 | ]
101 | },
102 | {
103 | "cell_type": "code",
104 | "execution_count": null,
105 | "metadata": {},
106 | "outputs": [],
107 | "source": [
108 | "instantid = None\n",
109 | "insightface = None\n",
110 | "instantid_control_net = None\n",
111 | "ip_model_patcher = None\n",
112 | "cond = None\n",
113 | "n_cond = None\n",
114 | "model_patcher = None\n",
115 | "clip = None\n",
116 | "clipvision = None\n",
117 | "IPAdapterPlus_model = None\n",
118 | "model_management.cleanup_models()\n",
119 | "gc.collect()\n",
120 | "model_management.soft_empty_cache()\n",
121 | "\n",
122 | "ran = random.randint(0, 65535)\n",
123 | "print(ran)\n",
124 | "\n",
125 | "with torch.no_grad():\n",
126 | " latent = {\"samples\":torch.zeros([1, 4, 1024 // 8, 1024 // 8])}\n",
127 | " sample = nodes.common_ksampler(model=work_model, \n",
128 | " seed=ran, \n",
129 | " steps=4, \n",
130 | " cfg=1.3, \n",
131 | " sampler_name=\"dpmpp_sde_gpu\", \n",
132 | " scheduler=\"karras\", \n",
133 | " positive=openpose_cond[0], \n",
134 | " negative=instantid_n_cond,\n",
135 | " latent=latent, \n",
136 | " denoise=0.95)\n",
137 | "\n",
138 | " with torch.inference_mode():\n",
139 | " sample = sample[0][\"samples\"].to(torch.float16)\n",
140 | " vae.first_stage_model.cuda()\n",
141 | " decoded = vae.decode_tiled(sample).detach()\n",
142 | "\n",
143 | "Image.fromarray(np.array(decoded*255, dtype=np.uint8)[0])"
144 | ]
145 | },
146 | {
147 | "cell_type": "code",
148 | "execution_count": null,
149 | "metadata": {},
150 | "outputs": [],
151 | "source": [
152 | "with torch.no_grad():\n",
153 | " latent = upscale(sample, 1.5)\n",
154 | " sample = nodes.common_ksampler(model=work_model,\n",
155 | " seed=ran,\n",
156 | " steps=4, \n",
157 | " cfg=1.3, \n",
158 | " sampler_name=\"dpmpp_sde_gpu\", \n",
159 | " scheduler=\"karras\", \n",
160 | " positive=openpose_cond[0], \n",
161 | " negative=instantid_n_cond,\n",
162 | " latent=latent[0],\n",
163 | " denoise=0.55)\n",
164 | " with torch.inference_mode():\n",
165 | " sample = sample[0][\"samples\"].to(torch.float16)\n",
166 | " vae.first_stage_model.cuda()\n",
167 | " decoded = vae.decode_tiled(sample).detach()\n",
168 | "Image.fromarray(np.array(decoded*255, dtype=np.uint8)[0])"
169 | ]
170 | }
171 | ],
172 | "metadata": {
173 | "accelerator": "GPU",
174 | "colab": {
175 | "gpuType": "T4",
176 | "provenance": []
177 | },
178 | "kernelspec": {
179 | "display_name": "Python 3",
180 | "name": "python3"
181 | },
182 | "language_info": {
183 | "name": "python"
184 | }
185 | },
186 | "nbformat": 4,
187 | "nbformat_minor": 0
188 | }
189 |
--------------------------------------------------------------------------------
/InstantID_IPAdapter_ControlNet_FaceDetailer_jupyter.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {
6 | "id": "view-in-github"
7 | },
8 | "source": [
9 | "[](https://colab.research.google.com/github/camenduru/InstantID-IPAdapter-ControlNet-jupyter/blob/main/InstantID_IPAdapter_ControlNet_FaceDetailer_jupyter.ipynb)"
10 | ]
11 | },
12 | {
13 | "cell_type": "code",
14 | "execution_count": null,
15 | "metadata": {
16 | "id": "VjYy0F2gZIPR"
17 | },
18 | "outputs": [],
19 | "source": [
20 | "%cd /content\n",
21 | "!git clone -b totoro https://github.com/camenduru/ComfyUI /content/TotoroUI\n",
22 | "!git clone -b totoro_v2 https://github.com/camenduru/ComfyUI_IPAdapter_plus /content/TotoroUI/IPAdapter\n",
23 | "!git clone -b totoro https://github.com/camenduru/ComfyUI_InstantID /content/TotoroUI/InstantID\n",
24 | "!git clone -b totoro https://github.com/camenduru/ComfyUI-Impact-Pack /content/TotoroUI/Impact\n",
25 | "\n",
26 | "!pip install -q torch==2.2.1+cu121 torchvision==0.17.1+cu121 torchaudio==2.2.1+cu121 torchtext==0.17.1 torchdata==0.7.1 --extra-index-url https://download.pytorch.org/whl/cu121\n",
27 | "!pip install -q torchsde einops diffusers accelerate xformers==0.0.25 insightface onnxruntime onnxruntime-gpu ultralytics==8.2.27 segment-anything piexif\n",
28 | "!apt -y install -qq aria2\n",
29 | "\n",
30 | "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://civitai.com/api/download/models/354657 -d /content/TotoroUI/models -o dreamshaperXL_lightningDPMSDE.safetensors\n",
31 | "# !aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://civitai.com/api/download/models/470847 -d /content/TotoroUI/models -o raemuXL_v35Lightning.safetensors\n",
32 | "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/h94/IP-Adapter/resolve/main/models/image_encoder/model.safetensors -d /content/TotoroUI/models/clip_vision -o CLIP-ViT-H-14-laion2B-s32B-b79K.safetensors\n",
33 | "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/h94/IP-Adapter/resolve/main/sdxl_models/ip-adapter-plus-face_sdxl_vit-h.safetensors -d /content/TotoroUI/models/ipadapter -o ip-adapter-plus-face_sdxl_vit-h.safetensors\n",
34 | "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lllyasviel/sd_control_collection/resolve/main/thibaud_xl_openpose.safetensors -d /content/TotoroUI/models/controlnet -o thibaud_xl_openpose.safetensors\n",
35 | "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://github.com/Ttl/ComfyUi_NNLatentUpscale/raw/master/sdxl_resizer.pt -d /content/TotoroUI/models -o sdxl_resizer.pt\n",
36 | "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/DIAMONIK7777/antelopev2/resolve/main/1k3d68.onnx -d /content/TotoroUI/models/insightface/models/antelopev2 -o 1k3d68.onnx\n",
37 | "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/DIAMONIK7777/antelopev2/resolve/main/2d106det.onnx -d /content/TotoroUI/models/insightface/models/antelopev2 -o 2d106det.onnx\n",
38 | "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/DIAMONIK7777/antelopev2/resolve/main/genderage.onnx -d /content/TotoroUI/models/insightface/models/antelopev2 -o genderage.onnx\n",
39 | "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/DIAMONIK7777/antelopev2/resolve/main/glintr100.onnx -d /content/TotoroUI/models/insightface/models/antelopev2 -o glintr100.onnx\n",
40 | "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/DIAMONIK7777/antelopev2/resolve/main/scrfd_10g_bnkps.onnx -d /content/TotoroUI/models/insightface/models/antelopev2 -o scrfd_10g_bnkps.onnx\n",
41 | "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/InstantX/InstantID/resolve/main/ip-adapter.bin -d /content/TotoroUI/models/instantid -o ip-adapter.bin\n",
42 | "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/InstantX/InstantID/resolve/main/ControlNetModel/diffusion_pytorch_model.safetensors -d /content/TotoroUI/models/controlnet/SDXL/instantid -o diffusion_pytorch_model.safetensors\n",
43 | "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/camenduru/IICF/resolve/main/ultralytics/bbox/Eyes.pt -d /content/TotoroUI/models/ultralytics/bbox -o Eyes.pt\n",
44 | "\n",
45 | "!wget https://huggingface.co/camenduru/IICF/resolve/main/test/anya.jpg -O /content/anya.jpg\n",
46 | "!wget https://huggingface.co/camenduru/IICF/resolve/main/test/pose_images.zip -O /content/pose_images.zip\n",
47 | "!unzip /content/pose_images.zip -d /content"
48 | ]
49 | },
50 | {
51 | "cell_type": "code",
52 | "execution_count": null,
53 | "metadata": {},
54 | "outputs": [],
55 | "source": [
56 | "%cd /content/TotoroUI\n",
57 | "import sys\n",
58 | "sys.path.append('/content/TotoroUI/IPAdapter')\n",
59 | "import IPAdapterPlus\n",
60 | "sys.path.append('/content/TotoroUI/InstantID')\n",
61 | "import InstantID\n",
62 | "sys.path.append('/content/TotoroUI/Impact')\n",
63 | "import torch\n",
64 | "import numpy as np\n",
65 | "from PIL import Image\n",
66 | "import totoro\n",
67 | "import nodes\n",
68 | "import detailer\n",
69 | "import scipy\n",
70 | "import model_management\n",
71 | "import gc\n",
72 | "import random\n",
73 | "\n",
74 | "with torch.no_grad():\n",
75 | " model_patcher, clip, vae, clipvision = totoro.sd.load_checkpoint_guess_config(\"/content/TotoroUI/models/dreamshaperXL_lightningDPMSDE.safetensors\", output_vae=True, output_clip=True, embedding_directory=None)\n",
76 | " # model_patcher, clip, vae, clipvision = totoro.sd.load_checkpoint_guess_config(\"/content/TotoroUI/models/raemuXL_v35Lightning.safetensors\", output_vae=True, output_clip=True, embedding_directory=None)\n",
77 | " IPAdapterPlus_model = IPAdapterPlus.IPAdapterUnifiedLoader().load_models(model_patcher, 'PLUS FACE (portraits)', lora_strength=0.0, provider=\"CUDA\")\n",
78 | " instantid = InstantID.InstantIDModelLoader().load_model(\"/content/TotoroUI/models/instantid/ip-adapter.bin\")\n",
79 | " insightface = InstantID.InstantIDFaceAnalysis().load_insight_face(\"CUDA\")\n",
80 | " instantid_control_net = totoro.controlnet.load_controlnet(\"/content/TotoroUI/models/controlnet/SDXL/instantid/diffusion_pytorch_model.safetensors\")\n",
81 | " output_image, output_mask = nodes.LoadImage().load_image(\"/content/anya.jpg\") \n",
82 | " image_kps, image_kps_mask = nodes.LoadImage().load_image(\"/content/pose_images/headshot_kps_00003_.png\")\n",
83 | " image_dw, image_dw_mask = nodes.LoadImage().load_image(\"/content/pose_images/headshot_dw_pose_00003_.png\")\n",
84 | " ip_model_patcher = IPAdapterPlus.IPAdapterAdvanced().apply_ipadapter(IPAdapterPlus_model[0], IPAdapterPlus_model[1], image=output_image, weight_type=\"style transfer\")\n",
85 | " tokens = clip.tokenize(\"a portrait photo of a woman\")\n",
86 | " cond, pooled = clip.encode_from_tokens(tokens, return_pooled=True)\n",
87 | " cond = [[cond, {\"pooled_output\": pooled}]]\n",
88 | " n_tokens = clip.tokenize(\"(nsfw:1.5), nipple, nude, naked, lowres, child, getty, bad anatomy, bad hands, text, error, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality, normal quality, jpeg artifacts, signature, watermark, username, blurry, artist name, trademark, watermark, title, multiple view, reference sheet, mutated hands and fingers, poorly drawn face, mutation, deformed, ugly, bad proportions, gross proportions, malformed limbs, missing arms, missing legs, extra arms, extra legs, fused fingers, too many fingers, long neck, tatoo, amateur drawing, odd eyes, uneven eyes, unnatural face, uneven nostrils, crooked mouth, bad teeth, crooked teeth, photoshop, video game, censor, censored, ghost, b&w, weird colors, gradient background, spotty background, blurry background, ugly background, simple background, realistic, out of frame, extra objects, gross, ugly, tiling, poorly drawn hands, poorly drawn feet, poorly drawn face, out of focus, blurry, very long body, duplicate, morbid, mutilated, extra fingers, mutated hands, poorly drawn eyes, cloned face, disfigured, deformed, cross-eye, extra limbs, missing limb, malformed hands, mutated, morbid, mutilated, disfigured, extra arms, extra hands, mangled fingers, contorted, conjoined, mismatched limbs, mismatched parts, bad perspective, black and white, oversaturated, undersaturated, bad shadow, cropped image, draft, grainy, pixelated\")\n",
89 | " n_cond, n_pooled = clip.encode_from_tokens(n_tokens, return_pooled=True)\n",
90 | " n_cond = [[n_cond, {\"pooled_output\": n_pooled}]]\n",
91 | " work_model, instantid_cond, instantid_n_cond = InstantID.ApplyInstantID().apply_instantid(instantid=instantid[0], insightface=insightface[0], control_net=instantid_control_net, image=output_image, model=ip_model_patcher[0], positive=cond, negative=n_cond, start_at=0.0, end_at=1.0, weight=0.80, image_kps=image_kps)\n",
92 | " openpose_control_net = totoro.controlnet.load_controlnet(\"/content/TotoroUI/models/controlnet/thibaud_xl_openpose.safetensors\")\n",
93 | " openpose_cond = nodes.ControlNetApply().apply_controlnet(conditioning=instantid_cond, control_net=openpose_control_net, image=image_dw, strength=0.90)"
94 | ]
95 | },
96 | {
97 | "cell_type": "code",
98 | "execution_count": null,
99 | "metadata": {},
100 | "outputs": [],
101 | "source": [
102 | "instantid = None\n",
103 | "insightface = None\n",
104 | "instantid_control_net = None\n",
105 | "ip_model_patcher = None\n",
106 | "cond = None\n",
107 | "n_cond = None\n",
108 | "model_patcher = None\n",
109 | "clip = None\n",
110 | "clipvision = None\n",
111 | "IPAdapterPlus_model = None\n",
112 | "model_management.cleanup_models()\n",
113 | "gc.collect()\n",
114 | "model_management.soft_empty_cache()\n",
115 | "\n",
116 | "ran = random.randint(0, 65535)\n",
117 | "# ran = 10\n",
118 | "print(ran)\n",
119 | "\n",
120 | "with torch.inference_mode():\n",
121 | " latent = {\"samples\":torch.zeros([1, 4, 1024 // 8, 1024 // 8])}\n",
122 | " sample = nodes.common_ksampler(model=work_model, \n",
123 | " seed=ran, \n",
124 | " steps=4, \n",
125 | " cfg=1.3, \n",
126 | " sampler_name=\"dpmpp_sde_gpu\", \n",
127 | " scheduler=\"karras\", \n",
128 | " positive=openpose_cond[0], \n",
129 | " negative=instantid_n_cond,\n",
130 | " latent=latent, \n",
131 | " denoise=0.95)\n",
132 | "\n",
133 | " sample = sample[0][\"samples\"].to(torch.float16)\n",
134 | " vae.first_stage_model.cuda()\n",
135 | " decoded = vae.decode(sample).detach()\n",
136 | "\n",
137 | "Image.fromarray(np.array(decoded*255, dtype=np.uint8)[0]).save(\"/content/output_image.png\")"
138 | ]
139 | },
140 | {
141 | "cell_type": "code",
142 | "execution_count": null,
143 | "metadata": {},
144 | "outputs": [],
145 | "source": [
146 | "exit() # If T4"
147 | ]
148 | },
149 | {
150 | "cell_type": "code",
151 | "execution_count": null,
152 | "metadata": {},
153 | "outputs": [],
154 | "source": [
155 | "%cd /content/TotoroUI\n",
156 | "import sys\n",
157 | "sys.path.append('/content/TotoroUI/IPAdapter')\n",
158 | "import IPAdapterPlus\n",
159 | "sys.path.append('/content/TotoroUI/InstantID')\n",
160 | "import InstantID\n",
161 | "sys.path.append('/content/TotoroUI/Impact')\n",
162 | "import torch\n",
163 | "import numpy as np\n",
164 | "from PIL import Image\n",
165 | "import totoro\n",
166 | "import nodes\n",
167 | "import detailer\n",
168 | "import scipy\n",
169 | "import model_management\n",
170 | "import gc\n",
171 | "import random\n",
172 | "\n",
173 | "ran = random.randint(0, 65535)\n",
174 | "# ran = 10\n",
175 | "print(ran)\n",
176 | "\n",
177 | "with torch.inference_mode():\n",
178 | " decoded = Image.open(\"/content/output_image.png\")\n",
179 | " decoded = detailer.pil2tensor(decoded)\n",
180 | "\n",
181 | " model_patcher, clip, vae, clipvision = totoro.sd.load_checkpoint_guess_config(\"/content/TotoroUI/models/dreamshaperXL_lightningDPMSDE.safetensors\", output_vae=True, output_clip=True, embedding_directory=None)\n",
182 | "\n",
183 | " model = detailer.load_yolo(\"/content/TotoroUI/models/ultralytics/bbox/Eyes.pt\")\n",
184 | " bbox_detector = detailer.UltraBBoxDetector(model)\n",
185 | "\n",
186 | " bbox_detector.setAux('face')\n",
187 | " segs = bbox_detector.detect(image=decoded, threshold=0.50, dilation=10, crop_factor=3.0, drop_size=10, detailer_hook=None)\n",
188 | " bbox_detector.setAux(None)\n",
189 | "\n",
190 | " face_tokens = clip.tokenize(\"perfect eyes\")\n",
191 | " face_cond, face_pooled = clip.encode_from_tokens(face_tokens, return_pooled=True)\n",
192 | " face_cond = [[face_cond, {\"pooled_output\": face_pooled}]]\n",
193 | " face_n_tokens = clip.tokenize(\"deformed pupils, deformed eyes, ugly eyes\")\n",
194 | " face_n_cond, face_n_pooled = clip.encode_from_tokens(face_n_tokens, return_pooled=True)\n",
195 | " face_n_cond = [[face_n_cond, {\"pooled_output\": face_n_pooled}]]\n",
196 | "\n",
197 | " enhanced_img, _, cropped_enhanced, cropped_enhanced_alpha, cnet_pil_list, new_segs = \\\n",
198 | " detailer.DetailerForEach.do_detail(image=decoded, segs=segs, model=model_patcher, clip=clip, vae=vae, guide_size=384, guide_size_for_bbox=True, max_size=1024, seed=ran, steps=4, cfg=1.0,\n",
199 | " sampler_name=\"dpmpp_sde\", scheduler=\"karras\", positive=face_cond, negative=face_n_cond, denoise=0.35, feather=5, noise_mask=True,\n",
200 | " force_inpaint=True, wildcard_opt=None, detailer_hook=None, refiner_ratio=0.2, refiner_model=None, refiner_clip=None, refiner_positive=None, refiner_negative=None,\n",
201 | " cycle=1, inpaint_model=False, noise_mask_feather=20)\n",
202 | " upscaled_img = nodes.ImageScaleBy.upscale(_, image=enhanced_img, upscale_method=\"lanczos\", scale_by=1.50)\n",
203 | " adjust_img = detailer.image_filters(image=upscaled_img[0], brightness=0.0, contrast=1.0, saturation=1.0, sharpness=4, blur=0, gaussian_blur=0.0, edge_enhance=0.10, detail_enhance=True)\n",
204 | "\n",
205 | "Image.fromarray(np.array(adjust_img[0]*255, dtype=np.uint8)[0])"
206 | ]
207 | },
208 | {
209 | "cell_type": "code",
210 | "execution_count": null,
211 | "metadata": {},
212 | "outputs": [],
213 | "source": [
214 | "model_management.cleanup_models()\n",
215 | "gc.collect()\n",
216 | "model_management.soft_empty_cache()\n",
217 | "\n",
218 | "with torch.inference_mode():\n",
219 | " latent = { \"samples\": vae.encode(adjust_img[0]) } \n",
220 | " sample = nodes.common_ksampler(model=work_model, \n",
221 | " seed=ran, \n",
222 | " steps=4, \n",
223 | " cfg=1.3, \n",
224 | " sampler_name=\"dpmpp_sde_gpu\", \n",
225 | " scheduler=\"karras\", \n",
226 | " positive=instantid_cond, \n",
227 | " negative=instantid_n_cond,\n",
228 | " latent=latent, \n",
229 | " denoise=0.20)\n",
230 | "\n",
231 | " sample = sample[0][\"samples\"].to(torch.float16)\n",
232 | "\n",
233 | " vae.first_stage_model.cuda()\n",
234 | " decoded = vae.decode(sample).detach()\n",
235 | " enhanced_img, _, cropped_enhanced, cropped_enhanced_alpha, cnet_pil_list, new_segs = \\\n",
236 | " detailer.DetailerForEach.do_detail(image=decoded, segs=segs, model=model_patcher, clip=clip, vae=vae, guide_size=384, guide_size_for_bbox=True, max_size=1024, seed=ran, steps=4, cfg=1.0,\n",
237 | " sampler_name=\"dpmpp_sde\", scheduler=\"karras\", positive=face_cond, negative=face_n_cond, denoise=0.35, feather=5, noise_mask=True,\n",
238 | " force_inpaint=True, wildcard_opt=None, detailer_hook=None, refiner_ratio=0.2, refiner_model=None, refiner_clip=None, refiner_positive=None, refiner_negative=None,\n",
239 | " cycle=1, inpaint_model=False, noise_mask_feather=20)\n",
240 | " upscaled_img = nodes.ImageScaleBy.upscale(_, image=enhanced_img, upscale_method=\"lanczos\", scale_by=1.50)\n",
241 | " adjust_img = detailer.image_filters(image=upscaled_img[0], brightness=0.0, contrast=1.0, saturation=1.0, sharpness=4, blur=0, gaussian_blur=0.0, edge_enhance=0.10, detail_enhance=True)\n",
242 | "Image.fromarray(np.array(adjust_img[0]*255, dtype=np.uint8)[0])"
243 | ]
244 | }
245 | ],
246 | "metadata": {
247 | "accelerator": "GPU",
248 | "colab": {
249 | "gpuType": "T4",
250 | "provenance": []
251 | },
252 | "kernelspec": {
253 | "display_name": "Python 3",
254 | "name": "python3"
255 | },
256 | "language_info": {
257 | "name": "python"
258 | }
259 | },
260 | "nbformat": 4,
261 | "nbformat_minor": 0
262 | }
263 |
--------------------------------------------------------------------------------