├── .gitignore
├── LICENSE
├── README.md
├── data
├── input
│ ├── 1545.png
│ ├── 855.png
│ └── 94.png
├── sample_prompts.txt
└── smpl_uv_mask.png
├── doc
├── method.png
├── overview.png
└── pipeline.png
├── infer_i2uv.py
├── infer_t2uv.py
├── model_i2t.py
├── output
├── i2uv
│ ├── 1545.png
│ ├── 1545_a.png
│ ├── 855.png
│ ├── 855_a.png
│ ├── 94.png
│ └── 94_a.png
└── t2uv
│ ├── 0000.png
│ ├── 0001.png
│ ├── 0002.png
│ ├── 0003.png
│ ├── 0004.png
│ └── 0005.png
└── requirements.txt
/.gitignore:
--------------------------------------------------------------------------------
1 | # Created by https://www.toptal.com/developers/gitignore/api/python
2 | # Edit at https://www.toptal.com/developers/gitignore?templates=python
3 |
4 | ### Python ###
5 | # Byte-compiled / optimized / DLL files
6 | __pycache__/
7 | *.py[cod]
8 | *$py.class
9 |
10 | # C extensions
11 | *.so
12 |
13 | # Distribution / packaging
14 | .Python
15 | build/
16 | develop-eggs/
17 | dist/
18 | downloads/
19 | eggs/
20 | .eggs/
21 | lib/
22 | lib64/
23 | parts/
24 | sdist/
25 | var/
26 | wheels/
27 | share/python-wheels/
28 | *.egg-info/
29 | .installed.cfg
30 | *.egg
31 | MANIFEST
32 |
33 | # PyInstaller
34 | # Usually these files are written by a python script from a template
35 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
36 | *.manifest
37 | *.spec
38 |
39 | # Installer logs
40 | pip-log.txt
41 | pip-delete-this-directory.txt
42 |
43 | # Unit test / coverage reports
44 | htmlcov/
45 | .tox/
46 | .nox/
47 | .coverage
48 | .coverage.*
49 | .cache
50 | nosetests.xml
51 | coverage.xml
52 | *.cover
53 | *.py,cover
54 | .hypothesis/
55 | .pytest_cache/
56 | cover/
57 |
58 | # Translations
59 | *.mo
60 | *.pot
61 |
62 | # Django stuff:
63 | *.log
64 | local_settings.py
65 | db.sqlite3
66 | db.sqlite3-journal
67 |
68 | # Flask stuff:
69 | instance/
70 | .webassets-cache
71 |
72 | # Scrapy stuff:
73 | .scrapy
74 |
75 | # Sphinx documentation
76 | docs/_build/
77 |
78 | # PyBuilder
79 | .pybuilder/
80 | target/
81 |
82 | # Jupyter Notebook
83 | .ipynb_checkpoints
84 |
85 | # IPython
86 | profile_default/
87 | ipython_config.py
88 |
89 | # pyenv
90 | # For a library or package, you might want to ignore these files since the code is
91 | # intended to run in multiple environments; otherwise, check them in:
92 | # .python-version
93 |
94 | # pipenv
95 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
96 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
97 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
98 | # install all needed dependencies.
99 | #Pipfile.lock
100 |
101 | # poetry
102 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
103 | # This is especially recommended for binary packages to ensure reproducibility, and is more
104 | # commonly ignored for libraries.
105 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
106 | #poetry.lock
107 |
108 | # pdm
109 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
110 | #pdm.lock
111 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
112 | # in version control.
113 | # https://pdm.fming.dev/#use-with-ide
114 | .pdm.toml
115 |
116 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
117 | __pypackages__/
118 |
119 | # Celery stuff
120 | celerybeat-schedule
121 | celerybeat.pid
122 |
123 | # SageMath parsed files
124 | *.sage.py
125 |
126 | # Environments
127 | .env
128 | .venv
129 | env/
130 | venv/
131 | ENV/
132 | env.bak/
133 | venv.bak/
134 |
135 | # Spyder project settings
136 | .spyderproject
137 | .spyproject
138 |
139 | # Rope project settings
140 | .ropeproject
141 |
142 | # mkdocs documentation
143 | /site
144 |
145 | # mypy
146 | .mypy_cache/
147 | .dmypy.json
148 | dmypy.json
149 |
150 | # Pyre type checker
151 | .pyre/
152 |
153 | # pytype static type analyzer
154 | .pytype/
155 |
156 | # Cython debug symbols
157 | cython_debug/
158 |
159 | # PyCharm
160 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
161 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
162 | # and can be added to the global gitignore or merged into this file. For a more nuclear
163 | # option (not recommended) you can uncomment the following to ignore the entire idea folder.
164 | .idea/
165 |
166 | ### Python Patch ###
167 | # Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration
168 | poetry.toml
169 |
170 | # ruff
171 | .ruff_cache/
172 |
173 | # LSP config files
174 | pyrightconfig.json
175 |
176 | # End of https://www.toptal.com/developers/gitignore/api/python
177 |
178 | .vscode/
179 | .threestudio_cache/
180 | outputs/
181 | outputs-gradio/
182 |
183 | # pretrained model weights
184 | *.ckpt
185 | *.pt
186 | *.pth
187 |
188 | # wandb
189 | wandb/
190 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2023 Kuma
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # [ECCV 2024 Oral] TexDreamer: Towards Zero-Shot High-Fidelity 3D Human Texture Generation
2 |
3 |
4 | ### [Project Page](https://ggxxii.github.io/texdreamer/) | [Paper (ArXiv)](https://arxiv.org/abs/2403.12906) | [ATLAS Dataset](https://huggingface.co/datasets/ggxxii/ATLAS)
5 |
6 |
7 | [Yufei Liu](https://ggxxii.github.io/)1,
8 | Junwei Zhu 2,
9 | [Junshu Tang](https://junshutang.github.io/)3,
10 | Shijie Zhang 4,
11 | [Jiangning Zhang](https://zhangzjn.github.io/)2,
12 | Weijian Cao2,
13 | Chengjie Wang2,
14 | Yunsheng Wu2,
15 | Dongjin Huang1*
16 |
17 |
18 | 1Shanghai University, 2Tencent Youtu Lab, 3Shanghai Jiao Tong University 4 Fudan University
19 |
20 | ## Updates
21 |
34 |
35 | [07/2024] TexDreamer is accepted to ECCV 2024!
36 |
37 |
38 |
39 |
44 |
45 | ## Installation
46 |
47 | We recommend using anaconda to manage the python environment. The setup commands below are provided for your reference.
48 |
49 | ```bash
50 | git clone https://github.com/ggxxii/texdreamer.git
51 | cd texdreamer
52 | conda create -n texdreamer python=3.8
53 | conda activate texdreamer
54 | conda install pytorch==2.0.0 torchvision==0.15.0 torchaudio==2.0.0 pytorch-cuda=11.7 -c pytorch -c nvidia
55 | pip install -r requirements.txt
56 | ```
57 | Please also install xformers following: https://github.com/facebookresearch/xformers.git instructions.
58 |
59 |
60 | ## Data Preparation
61 |
62 | ### Download TexDreamer Trained models
63 | You can find our model .zip from [Huggingface](https://huggingface.co/ggxxii/TexDreamer)
64 | Put the downloaded models in the folder `texdreamer_u128_t16_origin`. The folder structure should look like
65 |
66 | ```
67 | ./
68 | ├── ...
69 | └── texdreamer_u128_t16_origin/
70 | ├── i2t
71 | ├── i2t_decoder.pth
72 | └── SMPL_NEUTRAL.pkl
73 | ├── i2uv
74 | ├── vision_encoder
75 | ├──config.json
76 | └──pytorch_model.bin
77 | └── i2t_decoder.pth
78 | ├── text_encoder
79 | ├── adapter_config.json
80 | └── adapter_model.bin
81 | ├── unet
82 | ├── adapter_config.json
83 | └── adapter_model.bin
84 | ```
85 |
86 | ## Generate Human Texture from Text
87 |
88 | ### From input .txt file
89 |
90 | We provide a txt file with 6 sample prompts. You can find it in `data/sample_prompts.txt`. And the sample generation results are in `output/t2uv`.
91 |
92 | ```bash
93 | python infer_t2uv.py --lora_path texdreamer_u128_t16_origin --save_path output/t2uv --test_list data/sample_prompts.txt
94 | ```
95 | Since we load stabilityai/stable-diffusion-2-1 from local files, you may need first download it and change 'cache_dir' in function 'get_lora_sd_pipeline'.
96 |
97 | ## Generate Human Texture from Image
98 |
99 | ### From input image folder
100 |
101 | We provide some sample images from [Market-1501](https://zheng-lab.cecs.anu.edu.au/Project/project_reid.html) dataset. You can find it in `data/input`. And the sample generation results are in `output/i2uv`.
102 |
103 | Of course you can also use your own images.
104 |
105 | ```bash
106 | python infer_i2uv.py --lora_path texdreamer_u128_t16_origin --save_path output/i2uv --test_folder data/input
107 | ```
108 |
109 |
110 | ## Citation
111 | If you find our work useful for your research, please consider citing the paper:
112 | ```
113 | @inproceedings{texdreamer,
114 | title={Texdreamer: Towards zero-shot high-fidelity 3d human texture generation},
115 | author={Liu, Yufei and Zhu, Junwei and Tang, Junshu and Zhang, Shijie and Zhang, Jiangning and Cao, Weijian and Wang, Chengjie and Wu, Yunsheng and Huang, Dongjin},
116 | booktitle={European Conference on Computer Vision},
117 | pages={184--202},
118 | year={2024},
119 | organization={Springer}
120 | }
121 | ```
--------------------------------------------------------------------------------
/data/input/1545.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ggxxii/texdreamer/e8b9cd76ab4f8f49f3c940b31d6bdf2c427173a3/data/input/1545.png
--------------------------------------------------------------------------------
/data/input/855.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ggxxii/texdreamer/e8b9cd76ab4f8f49f3c940b31d6bdf2c427173a3/data/input/855.png
--------------------------------------------------------------------------------
/data/input/94.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ggxxii/texdreamer/e8b9cd76ab4f8f49f3c940b31d6bdf2c427173a3/data/input/94.png
--------------------------------------------------------------------------------
/data/sample_prompts.txt:
--------------------------------------------------------------------------------
1 | Deadpool, red-black costume
2 | Iron Man, red-gold armor
3 | Commodus, Roman tunic and laurel wreath
4 | pretty woman, stunning gold sequin gown, dark brown hair, young
5 | Tom Hardy, black tuxedo, buzz cut
6 | Stormtrooper
--------------------------------------------------------------------------------
/data/smpl_uv_mask.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ggxxii/texdreamer/e8b9cd76ab4f8f49f3c940b31d6bdf2c427173a3/data/smpl_uv_mask.png
--------------------------------------------------------------------------------
/doc/method.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ggxxii/texdreamer/e8b9cd76ab4f8f49f3c940b31d6bdf2c427173a3/doc/method.png
--------------------------------------------------------------------------------
/doc/overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ggxxii/texdreamer/e8b9cd76ab4f8f49f3c940b31d6bdf2c427173a3/doc/overview.png
--------------------------------------------------------------------------------
/doc/pipeline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ggxxii/texdreamer/e8b9cd76ab4f8f49f3c940b31d6bdf2c427173a3/doc/pipeline.png
--------------------------------------------------------------------------------
/infer_i2uv.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import os
3 | import random
4 |
5 |
6 | import torch
7 | import torch.nn.functional as F
8 | import torch.utils.checkpoint
9 |
10 | from accelerate.logging import get_logger
11 | from accelerate.utils import set_seed
12 | from diffusers import AutoencoderKL, DDPMScheduler, DiffusionPipeline, UNet2DConditionModel
13 | from diffusers import DDPMScheduler, PNDMScheduler, StableDiffusionPipeline, DPMSolverMultistepScheduler, EulerAncestralDiscreteScheduler
14 | from diffusers.pipelines.stable_diffusion import StableDiffusionSafetyChecker
15 | from diffusers.optimization import get_scheduler
16 | from diffusers.utils import check_min_version
17 | from diffusers.utils.import_utils import is_xformers_available
18 | from huggingface_hub import HfFolder, Repository, whoami
19 | from PIL import Image
20 | from torchvision import transforms
21 | from tqdm.auto import tqdm
22 | from transformers import AutoTokenizer, PretrainedConfig, CLIPFeatureExtractor, CLIPProcessor, CLIPVisionModel
23 | from peft import PeftModel, LoraConfig, get_peft_model_state_dict, set_peft_model_state_dict
24 | from model_i2t import Image2Token
25 |
26 | # Will error if the minimal version of diffusers is not installed. Remove at your own risks.
27 | check_min_version("0.10.0.dev0")
28 |
29 | logger = get_logger(__name__)
30 |
31 |
32 |
33 | def get_lora_sd_pipeline(
34 | ckpt_dir, base_model_name_or_path=None, dtype=torch.float16, device="cuda", adapter_name="default", cache_dir="huggingface/hub", local_files_only=True
35 | ):
36 | unet_sub_dir = os.path.join(ckpt_dir, "unet")
37 | text_encoder_sub_dir = os.path.join(ckpt_dir, "text_encoder")
38 | if os.path.exists(text_encoder_sub_dir) and base_model_name_or_path is None:
39 | config = LoraConfig.from_pretrained(text_encoder_sub_dir)
40 | base_model_name_or_path = config.base_model_name_or_path
41 |
42 | if base_model_name_or_path is None:
43 | raise ValueError("Please specify the base model name or path")
44 |
45 | pipe = StableDiffusionPipeline.from_pretrained(
46 | base_model_name_or_path, torch_dtype=dtype, requires_safety_checker=False, cache_dir=cache_dir, local_files_only=local_files_only
47 | ).to(device)
48 | pipe.unet = PeftModel.from_pretrained(pipe.unet, unet_sub_dir, adapter_name=adapter_name)
49 |
50 | if os.path.exists(text_encoder_sub_dir):
51 | pipe.text_encoder = PeftModel.from_pretrained(pipe.text_encoder, text_encoder_sub_dir, adapter_name=adapter_name)
52 |
53 | if dtype in (torch.float16, torch.bfloat16):
54 | pipe.unet.half()
55 | pipe.text_encoder.half()
56 |
57 | pipe.to(device)
58 | return pipe
59 |
60 |
61 | def load_adapter(pipe, ckpt_dir, adapter_name):
62 | unet_sub_dir = os.path.join(ckpt_dir, "unet")
63 | text_encoder_sub_dir = os.path.join(ckpt_dir, "text_encoder")
64 | pipe.unet.load_adapter(unet_sub_dir, adapter_name=adapter_name)
65 | if os.path.exists(text_encoder_sub_dir):
66 | pipe.text_encoder.load_adapter(text_encoder_sub_dir, adapter_name=adapter_name)
67 |
68 |
69 | def set_adapter(pipe, adapter_name):
70 | pipe.unet.set_adapter(adapter_name)
71 | if isinstance(pipe.text_encoder, PeftModel):
72 | pipe.text_encoder.set_adapter(adapter_name)
73 |
74 |
75 | def merging_lora_with_base(pipe, ckpt_dir, adapter_name="default"):
76 | unet_sub_dir = os.path.join(ckpt_dir, "unet")
77 | text_encoder_sub_dir = os.path.join(ckpt_dir, "text_encoder")
78 | if isinstance(pipe.unet, PeftModel):
79 | pipe.unet.set_adapter(adapter_name)
80 | else:
81 | pipe.unet = PeftModel.from_pretrained(pipe.unet, unet_sub_dir, adapter_name=adapter_name)
82 | pipe.unet = pipe.unet.merge_and_unload()
83 |
84 | if os.path.exists(text_encoder_sub_dir):
85 | if isinstance(pipe.text_encoder, PeftModel):
86 | pipe.text_encoder.set_adapter(adapter_name)
87 | else:
88 | pipe.text_encoder = PeftModel.from_pretrained(
89 | pipe.text_encoder, text_encoder_sub_dir, adapter_name=adapter_name
90 | )
91 | pipe.text_encoder = pipe.text_encoder.merge_and_unload()
92 |
93 | return pipe
94 |
95 |
96 | def create_weighted_lora_adapter(pipe, adapters, weights, adapter_name="default"):
97 | pipe.unet.add_weighted_adapter(adapters, weights, adapter_name)
98 | if isinstance(pipe.text_encoder, PeftModel):
99 | pipe.text_encoder.add_weighted_adapter(adapters, weights, adapter_name)
100 |
101 | return pipe
102 |
103 |
104 | if __name__=='__main__':
105 | parser = argparse.ArgumentParser()
106 | parser.add_argument('--seed', type=int, default=777, help='Random seed')
107 | parser.add_argument('--lora_path', type=str, default="texdreamer_u128_t16_origin", help='Lora path')
108 | parser.add_argument('--cache_dir', type=str, default="huggingface", help='Cache directory for Huggingface models')
109 | parser.add_argument('--test_folder', type=str, default="data/input", help='Path to test folder')
110 | parser.add_argument('--output_folder', type=str, default="output/i2uv", help='Output folder for generated images')
111 |
112 | args = parser.parse_args()
113 |
114 |
115 | myseed = args.seed
116 | MODEL_NAME = "stabilityai/stable-diffusion-2-1"
117 | CLIP_NAME="laion/CLIP-ViT-H-14-laion2B-s32B-b79K"
118 |
119 | lora_path = args.lora_path
120 | i2uv_path = "texdreamer_u128_t16_origin/i2uv"
121 | local_files_only=True
122 | uv_mask = Image.open("data/smpl_uv_mask.png").convert("L")
123 |
124 |
125 | processor = CLIPProcessor.from_pretrained(CLIP_NAME, cache_dir=cache_dir, local_files_only=local_files_only)
126 | pipe = get_lora_sd_pipeline(lora_path, base_model_name_or_path=MODEL_NAME, adapter_name="hutex")
127 | set_adapter(pipe, adapter_name="hutex")
128 |
129 | pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe.scheduler.config)
130 | pipe.safety_checker=None
131 |
132 |
133 | i2t_decoder = Image2Token()
134 | i2t_decoder.load_state_dict(torch.load(os.path.join(i2uv_path, "i2t_decoder.pth")))
135 | i2t_decoder.eval()
136 | i2t_decoder.to(pipe.device)
137 |
138 | i2uv_vision_encoder_path = os.path.join(i2uv_path, 'vision_encoder')
139 | if os.path.exists(i2uv_vision_encoder_path):
140 | CLIP_NAME=i2uv_vision_encoder_path
141 | vision_encoder = CLIPVisionModel.from_pretrained(CLIP_NAME, cache_dir=cache_dir, local_files_only=local_files_only)
142 | vision_encoder.eval()
143 | vision_encoder.to(pipe.device)
144 |
145 |
146 | test_folder = args.test_folder
147 | output_folder = args.output_folder
148 | os.makedirs(output_folder, exist_ok=True)
149 |
150 |
151 | for im_file in os.listdir(test_folder):
152 |
153 | if os.path.isdir(test_folder):
154 | folder_path = test_folder
155 | save_path = output_folder
156 | os.makedirs(save_path, exist_ok=True)
157 |
158 | if im_file.endswith('png'):
159 |
160 | im_pil = Image.open(os.path.join(folder_path, im_file))
161 | w,h=im_pil.size
162 | max_size = max(w,h)
163 | crop = Image.new("RGB", (max_size, max_size))
164 | crop.paste(im_pil, ((max_size-w)//2, (max_size-h)//2))
165 |
166 | with torch.no_grad():
167 | encoder_hidden_states = i2t_decoder(vision_encoder(processor(images=crop, return_tensors="pt")["pixel_values"].to(pipe.device)).last_hidden_state)
168 | set_seed(myseed)
169 | image = pipe(prompt_embeds=encoder_hidden_states, height=1024, width=1024, num_inference_steps=32, guidance_scale=2).images[0]
170 | image.save(os.path.join(save_path, im_file.replace('.jpg', '.png')))
171 |
172 | crop=crop.resize((1024,1024))
173 | show_img = Image.new("RGB", (1024*2,1024))
174 | show_img.paste(crop)
175 | show_img.paste(image, (1024,0))
176 | filename, extension = os.path.splitext(im_file)
177 | show_img.save(os.path.join(save_path, f"{filename}_a{extension}"))
178 |
179 |
180 |
181 |
182 |
183 |
184 |
185 |
--------------------------------------------------------------------------------
/infer_t2uv.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import os
3 | from pathlib import Path
4 | from typing import Optional
5 | import argparse
6 |
7 | import torch
8 | import torch.nn.functional as F
9 | import torch.utils.checkpoint
10 |
11 | from accelerate.logging import get_logger
12 | from accelerate.utils import set_seed
13 | from diffusers import AutoencoderKL, DDPMScheduler, DiffusionPipeline, UNet2DConditionModel
14 | from diffusers import DDPMScheduler, PNDMScheduler, StableDiffusionPipeline, DPMSolverMultistepScheduler, EulerAncestralDiscreteScheduler
15 | from diffusers.pipelines.stable_diffusion import StableDiffusionSafetyChecker
16 | from diffusers.optimization import get_scheduler
17 | from diffusers.utils import check_min_version
18 | from diffusers.utils.import_utils import is_xformers_available
19 | from huggingface_hub import HfFolder, Repository, whoami
20 | from PIL import Image
21 | from torchvision import transforms
22 | from tqdm.auto import tqdm
23 | from transformers import AutoTokenizer, PretrainedConfig, CLIPFeatureExtractor
24 | from peft import PeftModel, LoraConfig, get_peft_model_state_dict, set_peft_model_state_dict
25 |
26 | # Will error if the minimal version of diffusers is not installed. Remove at your own risks.
27 | check_min_version("0.10.0.dev0")
28 |
29 | logger = get_logger(__name__)
30 |
31 |
32 |
33 |
34 | def get_lora_sd_pipeline(
35 | ckpt_dir, base_model_name_or_path=None, dtype=torch.float16, device="cuda", adapter_name="default", cache_dir="huggingface/hub", local_files_only=True
36 | ):
37 | unet_sub_dir = os.path.join(ckpt_dir, "unet")
38 | text_encoder_sub_dir = os.path.join(ckpt_dir, "text_encoder")
39 | if os.path.exists(text_encoder_sub_dir) and base_model_name_or_path is None:
40 | config = LoraConfig.from_pretrained(text_encoder_sub_dir)
41 | base_model_name_or_path = config.base_model_name_or_path
42 |
43 | if base_model_name_or_path is None:
44 | raise ValueError("Please specify the base model name or path")
45 |
46 | pipe = StableDiffusionPipeline.from_pretrained(
47 | base_model_name_or_path, torch_dtype=dtype, requires_safety_checker=False, cache_dir=cache_dir, local_files_only=local_files_only
48 | ).to(device)
49 | pipe.unet = PeftModel.from_pretrained(pipe.unet, unet_sub_dir, adapter_name=adapter_name)
50 |
51 | if os.path.exists(text_encoder_sub_dir):
52 | pipe.text_encoder = PeftModel.from_pretrained(pipe.text_encoder, text_encoder_sub_dir, adapter_name=adapter_name)
53 |
54 | if dtype in (torch.float16, torch.bfloat16):
55 | pipe.unet.half()
56 | pipe.text_encoder.half()
57 |
58 | pipe.to(device)
59 | return pipe
60 |
61 |
62 | def load_adapter(pipe, ckpt_dir, adapter_name):
63 | unet_sub_dir = os.path.join(ckpt_dir, "unet")
64 | text_encoder_sub_dir = os.path.join(ckpt_dir, "text_encoder")
65 | pipe.unet.load_adapter(unet_sub_dir, adapter_name=adapter_name)
66 | if os.path.exists(text_encoder_sub_dir):
67 | pipe.text_encoder.load_adapter(text_encoder_sub_dir, adapter_name=adapter_name)
68 |
69 |
70 | def set_adapter(pipe, adapter_name):
71 | pipe.unet.set_adapter(adapter_name)
72 | if isinstance(pipe.text_encoder, PeftModel):
73 | pipe.text_encoder.set_adapter(adapter_name)
74 |
75 |
76 | def merging_lora_with_base(pipe, ckpt_dir, adapter_name="default"):
77 | unet_sub_dir = os.path.join(ckpt_dir, "unet")
78 | text_encoder_sub_dir = os.path.join(ckpt_dir, "text_encoder")
79 | if isinstance(pipe.unet, PeftModel):
80 | pipe.unet.set_adapter(adapter_name)
81 | else:
82 | pipe.unet = PeftModel.from_pretrained(pipe.unet, unet_sub_dir, adapter_name=adapter_name)
83 | pipe.unet = pipe.unet.merge_and_unload()
84 |
85 | if os.path.exists(text_encoder_sub_dir):
86 | if isinstance(pipe.text_encoder, PeftModel):
87 | pipe.text_encoder.set_adapter(adapter_name)
88 | else:
89 | pipe.text_encoder = PeftModel.from_pretrained(
90 | pipe.text_encoder, text_encoder_sub_dir, adapter_name=adapter_name
91 | )
92 | pipe.text_encoder = pipe.text_encoder.merge_and_unload()
93 |
94 | return pipe
95 |
96 |
97 | def create_weighted_lora_adapter(pipe, adapters, weights, adapter_name="default"):
98 | pipe.unet.add_weighted_adapter(adapters, weights, adapter_name)
99 | if isinstance(pipe.text_encoder, PeftModel):
100 | pipe.text_encoder.add_weighted_adapter(adapters, weights, adapter_name)
101 |
102 | return pipe
103 |
104 |
105 | if __name__=='__main__':
106 | parser = argparse.ArgumentParser()
107 | parser.add_argument('--seed', type=int, default=777, help='Random seed')
108 | parser.add_argument('--lora_path', type=str, default="texdreamer_u128_t16_origin", help='Lora path')
109 | parser.add_argument('--save_path', type=str, default="output/t2uv", help='Save path for generated images')
110 | parser.add_argument('--test_list', type=str, default="data/sample_prompts.txt", help='Path to input txt file')
111 |
112 | args = parser.parse_args()
113 |
114 | # Will error if the minimal version of diffusers is not installed. Remove at your own risks.
115 | check_min_version("0.10.0.dev0")
116 |
117 | logger = get_logger(__name__)
118 |
119 | myseed = args.seed
120 | MODEL_NAME = "stabilityai/stable-diffusion-2-1"
121 | lora_path = args.lora_path
122 | save_path = args.save_path
123 | os.makedirs(save_path, exist_ok=True)
124 |
125 | uv_mask = Image.open("data/smpl_uv_mask.png").convert("L")
126 |
127 | positive_prompt = ", natural lighting, photo-realistic, 4k"
128 | negative_prompt = "overexposed, shadow, reflection, low quality, teeth, open mouth, eyes closed"
129 |
130 | pipe = get_lora_sd_pipeline(lora_path, base_model_name_or_path=MODEL_NAME, adapter_name="hutex")
131 |
132 | set_adapter(pipe, adapter_name="hutex")
133 |
134 | pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe.scheduler.config)
135 | pipe.safety_checker = None
136 |
137 | ###################genrate from .txt file###################
138 | test_list = args.test_list
139 | idx = 0
140 | with open(test_list, 'r') as f:
141 | for line in f.readlines():
142 | prompt = 'hutex, ' + line.strip()
143 | with torch.no_grad():
144 | set_seed(myseed)
145 | images = pipe(prompt + positive_prompt, height=1024, width=1024, num_inference_steps=32, guidance_scale=7.5,
146 | negative_prompt=negative_prompt, num_images_per_prompt=1).images
147 |
148 | image = images[0]
149 | image.putalpha(uv_mask)
150 | image.save(os.path.join(save_path, '{:04d}.png'.format(idx)))
151 |
152 | idx += 1
153 |
154 |
155 |
156 |
--------------------------------------------------------------------------------
/model_i2t.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 |
4 |
5 |
6 | class Image2Token(nn.Module):
7 |
8 | def __init__(self, visual_hidden_size=1280, text_hidden_size=1024, max_length=77, num_layers=3):
9 | super(Image2Token, self).__init__()
10 |
11 | self.visual_proj = nn.Linear(visual_hidden_size, text_hidden_size)
12 |
13 | if num_layers>0:
14 | self.query = nn.Parameter(torch.randn((1, max_length, text_hidden_size)))
15 | decoder_layer = nn.TransformerDecoderLayer(d_model=text_hidden_size, nhead=text_hidden_size//64, batch_first=True)
16 | self.i2t = nn.TransformerDecoder(decoder_layer, num_layers=num_layers)
17 | else:
18 | self.i2t = None
19 |
20 | def forward(self, x):
21 | b,s,d=x.shape
22 | out = self.visual_proj(x)
23 | if self.i2t is not None:
24 | out = self.i2t(self.query.repeat(b,1,1), out)
25 |
26 | return out
27 |
--------------------------------------------------------------------------------
/output/i2uv/1545.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ggxxii/texdreamer/e8b9cd76ab4f8f49f3c940b31d6bdf2c427173a3/output/i2uv/1545.png
--------------------------------------------------------------------------------
/output/i2uv/1545_a.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ggxxii/texdreamer/e8b9cd76ab4f8f49f3c940b31d6bdf2c427173a3/output/i2uv/1545_a.png
--------------------------------------------------------------------------------
/output/i2uv/855.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ggxxii/texdreamer/e8b9cd76ab4f8f49f3c940b31d6bdf2c427173a3/output/i2uv/855.png
--------------------------------------------------------------------------------
/output/i2uv/855_a.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ggxxii/texdreamer/e8b9cd76ab4f8f49f3c940b31d6bdf2c427173a3/output/i2uv/855_a.png
--------------------------------------------------------------------------------
/output/i2uv/94.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ggxxii/texdreamer/e8b9cd76ab4f8f49f3c940b31d6bdf2c427173a3/output/i2uv/94.png
--------------------------------------------------------------------------------
/output/i2uv/94_a.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ggxxii/texdreamer/e8b9cd76ab4f8f49f3c940b31d6bdf2c427173a3/output/i2uv/94_a.png
--------------------------------------------------------------------------------
/output/t2uv/0000.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ggxxii/texdreamer/e8b9cd76ab4f8f49f3c940b31d6bdf2c427173a3/output/t2uv/0000.png
--------------------------------------------------------------------------------
/output/t2uv/0001.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ggxxii/texdreamer/e8b9cd76ab4f8f49f3c940b31d6bdf2c427173a3/output/t2uv/0001.png
--------------------------------------------------------------------------------
/output/t2uv/0002.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ggxxii/texdreamer/e8b9cd76ab4f8f49f3c940b31d6bdf2c427173a3/output/t2uv/0002.png
--------------------------------------------------------------------------------
/output/t2uv/0003.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ggxxii/texdreamer/e8b9cd76ab4f8f49f3c940b31d6bdf2c427173a3/output/t2uv/0003.png
--------------------------------------------------------------------------------
/output/t2uv/0004.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ggxxii/texdreamer/e8b9cd76ab4f8f49f3c940b31d6bdf2c427173a3/output/t2uv/0004.png
--------------------------------------------------------------------------------
/output/t2uv/0005.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ggxxii/texdreamer/e8b9cd76ab4f8f49f3c940b31d6bdf2c427173a3/output/t2uv/0005.png
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | diffusers==0.18.2
2 | ninja==1.11.1
3 | omegaconf==2.3.0
4 | opencv-contrib-python==4.7.0.72
5 | opencv-python==4.7.0.72
6 | peft==0.11.1
7 | Pillow==9.5.0
8 | pytorch3d==0.7.4
9 | PyYAML==6.0
10 | safetensors==0.4.3
11 | scipy==1.10.1
12 | timm==0.6.13
13 | tokenizers==0.13.3
14 | tqdm==4.65.0
15 | transformers==4.28.1
16 | typing_extensions==4.5.0
17 | typing-inspect==0.9.0
18 |
19 |
--------------------------------------------------------------------------------