├── .gitignore ├── LICENSE ├── README.md ├── data ├── input │ ├── 1545.png │ ├── 855.png │ └── 94.png ├── sample_prompts.txt └── smpl_uv_mask.png ├── doc ├── method.png ├── overview.png └── pipeline.png ├── infer_i2uv.py ├── infer_t2uv.py ├── model_i2t.py ├── output ├── i2uv │ ├── 1545.png │ ├── 1545_a.png │ ├── 855.png │ ├── 855_a.png │ ├── 94.png │ └── 94_a.png └── t2uv │ ├── 0000.png │ ├── 0001.png │ ├── 0002.png │ ├── 0003.png │ ├── 0004.png │ └── 0005.png └── requirements.txt /.gitignore: -------------------------------------------------------------------------------- 1 | # Created by https://www.toptal.com/developers/gitignore/api/python 2 | # Edit at https://www.toptal.com/developers/gitignore?templates=python 3 | 4 | ### Python ### 5 | # Byte-compiled / optimized / DLL files 6 | __pycache__/ 7 | *.py[cod] 8 | *$py.class 9 | 10 | # C extensions 11 | *.so 12 | 13 | # Distribution / packaging 14 | .Python 15 | build/ 16 | develop-eggs/ 17 | dist/ 18 | downloads/ 19 | eggs/ 20 | .eggs/ 21 | lib/ 22 | lib64/ 23 | parts/ 24 | sdist/ 25 | var/ 26 | wheels/ 27 | share/python-wheels/ 28 | *.egg-info/ 29 | .installed.cfg 30 | *.egg 31 | MANIFEST 32 | 33 | # PyInstaller 34 | # Usually these files are written by a python script from a template 35 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 36 | *.manifest 37 | *.spec 38 | 39 | # Installer logs 40 | pip-log.txt 41 | pip-delete-this-directory.txt 42 | 43 | # Unit test / coverage reports 44 | htmlcov/ 45 | .tox/ 46 | .nox/ 47 | .coverage 48 | .coverage.* 49 | .cache 50 | nosetests.xml 51 | coverage.xml 52 | *.cover 53 | *.py,cover 54 | .hypothesis/ 55 | .pytest_cache/ 56 | cover/ 57 | 58 | # Translations 59 | *.mo 60 | *.pot 61 | 62 | # Django stuff: 63 | *.log 64 | local_settings.py 65 | db.sqlite3 66 | db.sqlite3-journal 67 | 68 | # Flask stuff: 69 | instance/ 70 | .webassets-cache 71 | 72 | # Scrapy stuff: 73 | .scrapy 74 | 75 | # Sphinx documentation 76 | docs/_build/ 77 | 78 | # PyBuilder 79 | .pybuilder/ 80 | target/ 81 | 82 | # Jupyter Notebook 83 | .ipynb_checkpoints 84 | 85 | # IPython 86 | profile_default/ 87 | ipython_config.py 88 | 89 | # pyenv 90 | # For a library or package, you might want to ignore these files since the code is 91 | # intended to run in multiple environments; otherwise, check them in: 92 | # .python-version 93 | 94 | # pipenv 95 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 96 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 97 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 98 | # install all needed dependencies. 99 | #Pipfile.lock 100 | 101 | # poetry 102 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 103 | # This is especially recommended for binary packages to ensure reproducibility, and is more 104 | # commonly ignored for libraries. 105 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 106 | #poetry.lock 107 | 108 | # pdm 109 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 110 | #pdm.lock 111 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 112 | # in version control. 113 | # https://pdm.fming.dev/#use-with-ide 114 | .pdm.toml 115 | 116 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 117 | __pypackages__/ 118 | 119 | # Celery stuff 120 | celerybeat-schedule 121 | celerybeat.pid 122 | 123 | # SageMath parsed files 124 | *.sage.py 125 | 126 | # Environments 127 | .env 128 | .venv 129 | env/ 130 | venv/ 131 | ENV/ 132 | env.bak/ 133 | venv.bak/ 134 | 135 | # Spyder project settings 136 | .spyderproject 137 | .spyproject 138 | 139 | # Rope project settings 140 | .ropeproject 141 | 142 | # mkdocs documentation 143 | /site 144 | 145 | # mypy 146 | .mypy_cache/ 147 | .dmypy.json 148 | dmypy.json 149 | 150 | # Pyre type checker 151 | .pyre/ 152 | 153 | # pytype static type analyzer 154 | .pytype/ 155 | 156 | # Cython debug symbols 157 | cython_debug/ 158 | 159 | # PyCharm 160 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 161 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 162 | # and can be added to the global gitignore or merged into this file. For a more nuclear 163 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 164 | .idea/ 165 | 166 | ### Python Patch ### 167 | # Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration 168 | poetry.toml 169 | 170 | # ruff 171 | .ruff_cache/ 172 | 173 | # LSP config files 174 | pyrightconfig.json 175 | 176 | # End of https://www.toptal.com/developers/gitignore/api/python 177 | 178 | .vscode/ 179 | .threestudio_cache/ 180 | outputs/ 181 | outputs-gradio/ 182 | 183 | # pretrained model weights 184 | *.ckpt 185 | *.pt 186 | *.pth 187 | 188 | # wandb 189 | wandb/ 190 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Kuma 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # [ECCV 2024 Oral] TexDreamer: Towards Zero-Shot High-Fidelity 3D Human Texture Generation 2 | 3 | 4 | ### [Project Page](https://ggxxii.github.io/texdreamer/) | [Paper (ArXiv)](https://arxiv.org/abs/2403.12906) | [ATLAS Dataset](https://huggingface.co/datasets/ggxxii/ATLAS) 5 | 6 | 7 | [Yufei Liu](https://ggxxii.github.io/)1, 8 | Junwei Zhu 2, 9 | [Junshu Tang](https://junshutang.github.io/)3, 10 | Shijie Zhang 4, 11 | [Jiangning Zhang](https://zhangzjn.github.io/)2, 12 | Weijian Cao2, 13 | Chengjie Wang2, 14 | Yunsheng Wu2, 15 | Dongjin Huang1*
16 | 17 | 18 | 1Shanghai University, 2Tencent Youtu Lab, 3Shanghai Jiao Tong University 4 Fudan University 19 | 20 | ## Updates 21 | 34 | 35 | [07/2024] TexDreamer is accepted to ECCV 2024! 36 | 37 | 38 | 39 | 44 | 45 | ## Installation 46 | 47 | We recommend using anaconda to manage the python environment. The setup commands below are provided for your reference. 48 | 49 | ```bash 50 | git clone https://github.com/ggxxii/texdreamer.git 51 | cd texdreamer 52 | conda create -n texdreamer python=3.8 53 | conda activate texdreamer 54 | conda install pytorch==2.0.0 torchvision==0.15.0 torchaudio==2.0.0 pytorch-cuda=11.7 -c pytorch -c nvidia 55 | pip install -r requirements.txt 56 | ``` 57 | Please also install xformers following: https://github.com/facebookresearch/xformers.git instructions. 58 | 59 | 60 | ## Data Preparation 61 | 62 | ### Download TexDreamer Trained models 63 | You can find our model .zip from [Huggingface](https://huggingface.co/ggxxii/TexDreamer) 64 | Put the downloaded models in the folder `texdreamer_u128_t16_origin`. The folder structure should look like 65 | 66 | ``` 67 | ./ 68 | ├── ... 69 | └── texdreamer_u128_t16_origin/ 70 | ├── i2t 71 | ├── i2t_decoder.pth 72 | └── SMPL_NEUTRAL.pkl 73 | ├── i2uv 74 | ├── vision_encoder 75 | ├──config.json 76 | └──pytorch_model.bin 77 | └── i2t_decoder.pth 78 | ├── text_encoder 79 | ├── adapter_config.json 80 | └── adapter_model.bin 81 | ├── unet 82 | ├── adapter_config.json 83 | └── adapter_model.bin 84 | ``` 85 | 86 | ## Generate Human Texture from Text 87 | 88 | ### From input .txt file 89 | 90 | We provide a txt file with 6 sample prompts. You can find it in `data/sample_prompts.txt`. And the sample generation results are in `output/t2uv`. 91 | 92 | ```bash 93 | python infer_t2uv.py --lora_path texdreamer_u128_t16_origin --save_path output/t2uv --test_list data/sample_prompts.txt 94 | ``` 95 | Since we load stabilityai/stable-diffusion-2-1 from local files, you may need first download it and change 'cache_dir' in function 'get_lora_sd_pipeline'. 96 | 97 | ## Generate Human Texture from Image 98 | 99 | ### From input image folder 100 | 101 | We provide some sample images from [Market-1501](https://zheng-lab.cecs.anu.edu.au/Project/project_reid.html) dataset. You can find it in `data/input`. And the sample generation results are in `output/i2uv`. 102 | 103 | Of course you can also use your own images. 104 | 105 | ```bash 106 | python infer_i2uv.py --lora_path texdreamer_u128_t16_origin --save_path output/i2uv --test_folder data/input 107 | ``` 108 | 109 | 110 | ## Citation 111 | If you find our work useful for your research, please consider citing the paper: 112 | ``` 113 | @inproceedings{texdreamer, 114 | title={Texdreamer: Towards zero-shot high-fidelity 3d human texture generation}, 115 | author={Liu, Yufei and Zhu, Junwei and Tang, Junshu and Zhang, Shijie and Zhang, Jiangning and Cao, Weijian and Wang, Chengjie and Wu, Yunsheng and Huang, Dongjin}, 116 | booktitle={European Conference on Computer Vision}, 117 | pages={184--202}, 118 | year={2024}, 119 | organization={Springer} 120 | } 121 | ``` -------------------------------------------------------------------------------- /data/input/1545.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ggxxii/texdreamer/e8b9cd76ab4f8f49f3c940b31d6bdf2c427173a3/data/input/1545.png -------------------------------------------------------------------------------- /data/input/855.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ggxxii/texdreamer/e8b9cd76ab4f8f49f3c940b31d6bdf2c427173a3/data/input/855.png -------------------------------------------------------------------------------- /data/input/94.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ggxxii/texdreamer/e8b9cd76ab4f8f49f3c940b31d6bdf2c427173a3/data/input/94.png -------------------------------------------------------------------------------- /data/sample_prompts.txt: -------------------------------------------------------------------------------- 1 | Deadpool, red-black costume 2 | Iron Man, red-gold armor 3 | Commodus, Roman tunic and laurel wreath 4 | pretty woman, stunning gold sequin gown, dark brown hair, young 5 | Tom Hardy, black tuxedo, buzz cut 6 | Stormtrooper -------------------------------------------------------------------------------- /data/smpl_uv_mask.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ggxxii/texdreamer/e8b9cd76ab4f8f49f3c940b31d6bdf2c427173a3/data/smpl_uv_mask.png -------------------------------------------------------------------------------- /doc/method.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ggxxii/texdreamer/e8b9cd76ab4f8f49f3c940b31d6bdf2c427173a3/doc/method.png -------------------------------------------------------------------------------- /doc/overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ggxxii/texdreamer/e8b9cd76ab4f8f49f3c940b31d6bdf2c427173a3/doc/overview.png -------------------------------------------------------------------------------- /doc/pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ggxxii/texdreamer/e8b9cd76ab4f8f49f3c940b31d6bdf2c427173a3/doc/pipeline.png -------------------------------------------------------------------------------- /infer_i2uv.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import random 4 | 5 | 6 | import torch 7 | import torch.nn.functional as F 8 | import torch.utils.checkpoint 9 | 10 | from accelerate.logging import get_logger 11 | from accelerate.utils import set_seed 12 | from diffusers import AutoencoderKL, DDPMScheduler, DiffusionPipeline, UNet2DConditionModel 13 | from diffusers import DDPMScheduler, PNDMScheduler, StableDiffusionPipeline, DPMSolverMultistepScheduler, EulerAncestralDiscreteScheduler 14 | from diffusers.pipelines.stable_diffusion import StableDiffusionSafetyChecker 15 | from diffusers.optimization import get_scheduler 16 | from diffusers.utils import check_min_version 17 | from diffusers.utils.import_utils import is_xformers_available 18 | from huggingface_hub import HfFolder, Repository, whoami 19 | from PIL import Image 20 | from torchvision import transforms 21 | from tqdm.auto import tqdm 22 | from transformers import AutoTokenizer, PretrainedConfig, CLIPFeatureExtractor, CLIPProcessor, CLIPVisionModel 23 | from peft import PeftModel, LoraConfig, get_peft_model_state_dict, set_peft_model_state_dict 24 | from model_i2t import Image2Token 25 | 26 | # Will error if the minimal version of diffusers is not installed. Remove at your own risks. 27 | check_min_version("0.10.0.dev0") 28 | 29 | logger = get_logger(__name__) 30 | 31 | 32 | 33 | def get_lora_sd_pipeline( 34 | ckpt_dir, base_model_name_or_path=None, dtype=torch.float16, device="cuda", adapter_name="default", cache_dir="huggingface/hub", local_files_only=True 35 | ): 36 | unet_sub_dir = os.path.join(ckpt_dir, "unet") 37 | text_encoder_sub_dir = os.path.join(ckpt_dir, "text_encoder") 38 | if os.path.exists(text_encoder_sub_dir) and base_model_name_or_path is None: 39 | config = LoraConfig.from_pretrained(text_encoder_sub_dir) 40 | base_model_name_or_path = config.base_model_name_or_path 41 | 42 | if base_model_name_or_path is None: 43 | raise ValueError("Please specify the base model name or path") 44 | 45 | pipe = StableDiffusionPipeline.from_pretrained( 46 | base_model_name_or_path, torch_dtype=dtype, requires_safety_checker=False, cache_dir=cache_dir, local_files_only=local_files_only 47 | ).to(device) 48 | pipe.unet = PeftModel.from_pretrained(pipe.unet, unet_sub_dir, adapter_name=adapter_name) 49 | 50 | if os.path.exists(text_encoder_sub_dir): 51 | pipe.text_encoder = PeftModel.from_pretrained(pipe.text_encoder, text_encoder_sub_dir, adapter_name=adapter_name) 52 | 53 | if dtype in (torch.float16, torch.bfloat16): 54 | pipe.unet.half() 55 | pipe.text_encoder.half() 56 | 57 | pipe.to(device) 58 | return pipe 59 | 60 | 61 | def load_adapter(pipe, ckpt_dir, adapter_name): 62 | unet_sub_dir = os.path.join(ckpt_dir, "unet") 63 | text_encoder_sub_dir = os.path.join(ckpt_dir, "text_encoder") 64 | pipe.unet.load_adapter(unet_sub_dir, adapter_name=adapter_name) 65 | if os.path.exists(text_encoder_sub_dir): 66 | pipe.text_encoder.load_adapter(text_encoder_sub_dir, adapter_name=adapter_name) 67 | 68 | 69 | def set_adapter(pipe, adapter_name): 70 | pipe.unet.set_adapter(adapter_name) 71 | if isinstance(pipe.text_encoder, PeftModel): 72 | pipe.text_encoder.set_adapter(adapter_name) 73 | 74 | 75 | def merging_lora_with_base(pipe, ckpt_dir, adapter_name="default"): 76 | unet_sub_dir = os.path.join(ckpt_dir, "unet") 77 | text_encoder_sub_dir = os.path.join(ckpt_dir, "text_encoder") 78 | if isinstance(pipe.unet, PeftModel): 79 | pipe.unet.set_adapter(adapter_name) 80 | else: 81 | pipe.unet = PeftModel.from_pretrained(pipe.unet, unet_sub_dir, adapter_name=adapter_name) 82 | pipe.unet = pipe.unet.merge_and_unload() 83 | 84 | if os.path.exists(text_encoder_sub_dir): 85 | if isinstance(pipe.text_encoder, PeftModel): 86 | pipe.text_encoder.set_adapter(adapter_name) 87 | else: 88 | pipe.text_encoder = PeftModel.from_pretrained( 89 | pipe.text_encoder, text_encoder_sub_dir, adapter_name=adapter_name 90 | ) 91 | pipe.text_encoder = pipe.text_encoder.merge_and_unload() 92 | 93 | return pipe 94 | 95 | 96 | def create_weighted_lora_adapter(pipe, adapters, weights, adapter_name="default"): 97 | pipe.unet.add_weighted_adapter(adapters, weights, adapter_name) 98 | if isinstance(pipe.text_encoder, PeftModel): 99 | pipe.text_encoder.add_weighted_adapter(adapters, weights, adapter_name) 100 | 101 | return pipe 102 | 103 | 104 | if __name__=='__main__': 105 | parser = argparse.ArgumentParser() 106 | parser.add_argument('--seed', type=int, default=777, help='Random seed') 107 | parser.add_argument('--lora_path', type=str, default="texdreamer_u128_t16_origin", help='Lora path') 108 | parser.add_argument('--cache_dir', type=str, default="huggingface", help='Cache directory for Huggingface models') 109 | parser.add_argument('--test_folder', type=str, default="data/input", help='Path to test folder') 110 | parser.add_argument('--output_folder', type=str, default="output/i2uv", help='Output folder for generated images') 111 | 112 | args = parser.parse_args() 113 | 114 | 115 | myseed = args.seed 116 | MODEL_NAME = "stabilityai/stable-diffusion-2-1" 117 | CLIP_NAME="laion/CLIP-ViT-H-14-laion2B-s32B-b79K" 118 | 119 | lora_path = args.lora_path 120 | i2uv_path = "texdreamer_u128_t16_origin/i2uv" 121 | local_files_only=True 122 | uv_mask = Image.open("data/smpl_uv_mask.png").convert("L") 123 | 124 | 125 | processor = CLIPProcessor.from_pretrained(CLIP_NAME, cache_dir=cache_dir, local_files_only=local_files_only) 126 | pipe = get_lora_sd_pipeline(lora_path, base_model_name_or_path=MODEL_NAME, adapter_name="hutex") 127 | set_adapter(pipe, adapter_name="hutex") 128 | 129 | pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe.scheduler.config) 130 | pipe.safety_checker=None 131 | 132 | 133 | i2t_decoder = Image2Token() 134 | i2t_decoder.load_state_dict(torch.load(os.path.join(i2uv_path, "i2t_decoder.pth"))) 135 | i2t_decoder.eval() 136 | i2t_decoder.to(pipe.device) 137 | 138 | i2uv_vision_encoder_path = os.path.join(i2uv_path, 'vision_encoder') 139 | if os.path.exists(i2uv_vision_encoder_path): 140 | CLIP_NAME=i2uv_vision_encoder_path 141 | vision_encoder = CLIPVisionModel.from_pretrained(CLIP_NAME, cache_dir=cache_dir, local_files_only=local_files_only) 142 | vision_encoder.eval() 143 | vision_encoder.to(pipe.device) 144 | 145 | 146 | test_folder = args.test_folder 147 | output_folder = args.output_folder 148 | os.makedirs(output_folder, exist_ok=True) 149 | 150 | 151 | for im_file in os.listdir(test_folder): 152 | 153 | if os.path.isdir(test_folder): 154 | folder_path = test_folder 155 | save_path = output_folder 156 | os.makedirs(save_path, exist_ok=True) 157 | 158 | if im_file.endswith('png'): 159 | 160 | im_pil = Image.open(os.path.join(folder_path, im_file)) 161 | w,h=im_pil.size 162 | max_size = max(w,h) 163 | crop = Image.new("RGB", (max_size, max_size)) 164 | crop.paste(im_pil, ((max_size-w)//2, (max_size-h)//2)) 165 | 166 | with torch.no_grad(): 167 | encoder_hidden_states = i2t_decoder(vision_encoder(processor(images=crop, return_tensors="pt")["pixel_values"].to(pipe.device)).last_hidden_state) 168 | set_seed(myseed) 169 | image = pipe(prompt_embeds=encoder_hidden_states, height=1024, width=1024, num_inference_steps=32, guidance_scale=2).images[0] 170 | image.save(os.path.join(save_path, im_file.replace('.jpg', '.png'))) 171 | 172 | crop=crop.resize((1024,1024)) 173 | show_img = Image.new("RGB", (1024*2,1024)) 174 | show_img.paste(crop) 175 | show_img.paste(image, (1024,0)) 176 | filename, extension = os.path.splitext(im_file) 177 | show_img.save(os.path.join(save_path, f"{filename}_a{extension}")) 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | -------------------------------------------------------------------------------- /infer_t2uv.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | from pathlib import Path 4 | from typing import Optional 5 | import argparse 6 | 7 | import torch 8 | import torch.nn.functional as F 9 | import torch.utils.checkpoint 10 | 11 | from accelerate.logging import get_logger 12 | from accelerate.utils import set_seed 13 | from diffusers import AutoencoderKL, DDPMScheduler, DiffusionPipeline, UNet2DConditionModel 14 | from diffusers import DDPMScheduler, PNDMScheduler, StableDiffusionPipeline, DPMSolverMultistepScheduler, EulerAncestralDiscreteScheduler 15 | from diffusers.pipelines.stable_diffusion import StableDiffusionSafetyChecker 16 | from diffusers.optimization import get_scheduler 17 | from diffusers.utils import check_min_version 18 | from diffusers.utils.import_utils import is_xformers_available 19 | from huggingface_hub import HfFolder, Repository, whoami 20 | from PIL import Image 21 | from torchvision import transforms 22 | from tqdm.auto import tqdm 23 | from transformers import AutoTokenizer, PretrainedConfig, CLIPFeatureExtractor 24 | from peft import PeftModel, LoraConfig, get_peft_model_state_dict, set_peft_model_state_dict 25 | 26 | # Will error if the minimal version of diffusers is not installed. Remove at your own risks. 27 | check_min_version("0.10.0.dev0") 28 | 29 | logger = get_logger(__name__) 30 | 31 | 32 | 33 | 34 | def get_lora_sd_pipeline( 35 | ckpt_dir, base_model_name_or_path=None, dtype=torch.float16, device="cuda", adapter_name="default", cache_dir="huggingface/hub", local_files_only=True 36 | ): 37 | unet_sub_dir = os.path.join(ckpt_dir, "unet") 38 | text_encoder_sub_dir = os.path.join(ckpt_dir, "text_encoder") 39 | if os.path.exists(text_encoder_sub_dir) and base_model_name_or_path is None: 40 | config = LoraConfig.from_pretrained(text_encoder_sub_dir) 41 | base_model_name_or_path = config.base_model_name_or_path 42 | 43 | if base_model_name_or_path is None: 44 | raise ValueError("Please specify the base model name or path") 45 | 46 | pipe = StableDiffusionPipeline.from_pretrained( 47 | base_model_name_or_path, torch_dtype=dtype, requires_safety_checker=False, cache_dir=cache_dir, local_files_only=local_files_only 48 | ).to(device) 49 | pipe.unet = PeftModel.from_pretrained(pipe.unet, unet_sub_dir, adapter_name=adapter_name) 50 | 51 | if os.path.exists(text_encoder_sub_dir): 52 | pipe.text_encoder = PeftModel.from_pretrained(pipe.text_encoder, text_encoder_sub_dir, adapter_name=adapter_name) 53 | 54 | if dtype in (torch.float16, torch.bfloat16): 55 | pipe.unet.half() 56 | pipe.text_encoder.half() 57 | 58 | pipe.to(device) 59 | return pipe 60 | 61 | 62 | def load_adapter(pipe, ckpt_dir, adapter_name): 63 | unet_sub_dir = os.path.join(ckpt_dir, "unet") 64 | text_encoder_sub_dir = os.path.join(ckpt_dir, "text_encoder") 65 | pipe.unet.load_adapter(unet_sub_dir, adapter_name=adapter_name) 66 | if os.path.exists(text_encoder_sub_dir): 67 | pipe.text_encoder.load_adapter(text_encoder_sub_dir, adapter_name=adapter_name) 68 | 69 | 70 | def set_adapter(pipe, adapter_name): 71 | pipe.unet.set_adapter(adapter_name) 72 | if isinstance(pipe.text_encoder, PeftModel): 73 | pipe.text_encoder.set_adapter(adapter_name) 74 | 75 | 76 | def merging_lora_with_base(pipe, ckpt_dir, adapter_name="default"): 77 | unet_sub_dir = os.path.join(ckpt_dir, "unet") 78 | text_encoder_sub_dir = os.path.join(ckpt_dir, "text_encoder") 79 | if isinstance(pipe.unet, PeftModel): 80 | pipe.unet.set_adapter(adapter_name) 81 | else: 82 | pipe.unet = PeftModel.from_pretrained(pipe.unet, unet_sub_dir, adapter_name=adapter_name) 83 | pipe.unet = pipe.unet.merge_and_unload() 84 | 85 | if os.path.exists(text_encoder_sub_dir): 86 | if isinstance(pipe.text_encoder, PeftModel): 87 | pipe.text_encoder.set_adapter(adapter_name) 88 | else: 89 | pipe.text_encoder = PeftModel.from_pretrained( 90 | pipe.text_encoder, text_encoder_sub_dir, adapter_name=adapter_name 91 | ) 92 | pipe.text_encoder = pipe.text_encoder.merge_and_unload() 93 | 94 | return pipe 95 | 96 | 97 | def create_weighted_lora_adapter(pipe, adapters, weights, adapter_name="default"): 98 | pipe.unet.add_weighted_adapter(adapters, weights, adapter_name) 99 | if isinstance(pipe.text_encoder, PeftModel): 100 | pipe.text_encoder.add_weighted_adapter(adapters, weights, adapter_name) 101 | 102 | return pipe 103 | 104 | 105 | if __name__=='__main__': 106 | parser = argparse.ArgumentParser() 107 | parser.add_argument('--seed', type=int, default=777, help='Random seed') 108 | parser.add_argument('--lora_path', type=str, default="texdreamer_u128_t16_origin", help='Lora path') 109 | parser.add_argument('--save_path', type=str, default="output/t2uv", help='Save path for generated images') 110 | parser.add_argument('--test_list', type=str, default="data/sample_prompts.txt", help='Path to input txt file') 111 | 112 | args = parser.parse_args() 113 | 114 | # Will error if the minimal version of diffusers is not installed. Remove at your own risks. 115 | check_min_version("0.10.0.dev0") 116 | 117 | logger = get_logger(__name__) 118 | 119 | myseed = args.seed 120 | MODEL_NAME = "stabilityai/stable-diffusion-2-1" 121 | lora_path = args.lora_path 122 | save_path = args.save_path 123 | os.makedirs(save_path, exist_ok=True) 124 | 125 | uv_mask = Image.open("data/smpl_uv_mask.png").convert("L") 126 | 127 | positive_prompt = ", natural lighting, photo-realistic, 4k" 128 | negative_prompt = "overexposed, shadow, reflection, low quality, teeth, open mouth, eyes closed" 129 | 130 | pipe = get_lora_sd_pipeline(lora_path, base_model_name_or_path=MODEL_NAME, adapter_name="hutex") 131 | 132 | set_adapter(pipe, adapter_name="hutex") 133 | 134 | pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe.scheduler.config) 135 | pipe.safety_checker = None 136 | 137 | ###################genrate from .txt file################### 138 | test_list = args.test_list 139 | idx = 0 140 | with open(test_list, 'r') as f: 141 | for line in f.readlines(): 142 | prompt = 'hutex, ' + line.strip() 143 | with torch.no_grad(): 144 | set_seed(myseed) 145 | images = pipe(prompt + positive_prompt, height=1024, width=1024, num_inference_steps=32, guidance_scale=7.5, 146 | negative_prompt=negative_prompt, num_images_per_prompt=1).images 147 | 148 | image = images[0] 149 | image.putalpha(uv_mask) 150 | image.save(os.path.join(save_path, '{:04d}.png'.format(idx))) 151 | 152 | idx += 1 153 | 154 | 155 | 156 | -------------------------------------------------------------------------------- /model_i2t.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | 6 | class Image2Token(nn.Module): 7 | 8 | def __init__(self, visual_hidden_size=1280, text_hidden_size=1024, max_length=77, num_layers=3): 9 | super(Image2Token, self).__init__() 10 | 11 | self.visual_proj = nn.Linear(visual_hidden_size, text_hidden_size) 12 | 13 | if num_layers>0: 14 | self.query = nn.Parameter(torch.randn((1, max_length, text_hidden_size))) 15 | decoder_layer = nn.TransformerDecoderLayer(d_model=text_hidden_size, nhead=text_hidden_size//64, batch_first=True) 16 | self.i2t = nn.TransformerDecoder(decoder_layer, num_layers=num_layers) 17 | else: 18 | self.i2t = None 19 | 20 | def forward(self, x): 21 | b,s,d=x.shape 22 | out = self.visual_proj(x) 23 | if self.i2t is not None: 24 | out = self.i2t(self.query.repeat(b,1,1), out) 25 | 26 | return out 27 | -------------------------------------------------------------------------------- /output/i2uv/1545.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ggxxii/texdreamer/e8b9cd76ab4f8f49f3c940b31d6bdf2c427173a3/output/i2uv/1545.png -------------------------------------------------------------------------------- /output/i2uv/1545_a.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ggxxii/texdreamer/e8b9cd76ab4f8f49f3c940b31d6bdf2c427173a3/output/i2uv/1545_a.png -------------------------------------------------------------------------------- /output/i2uv/855.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ggxxii/texdreamer/e8b9cd76ab4f8f49f3c940b31d6bdf2c427173a3/output/i2uv/855.png -------------------------------------------------------------------------------- /output/i2uv/855_a.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ggxxii/texdreamer/e8b9cd76ab4f8f49f3c940b31d6bdf2c427173a3/output/i2uv/855_a.png -------------------------------------------------------------------------------- /output/i2uv/94.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ggxxii/texdreamer/e8b9cd76ab4f8f49f3c940b31d6bdf2c427173a3/output/i2uv/94.png -------------------------------------------------------------------------------- /output/i2uv/94_a.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ggxxii/texdreamer/e8b9cd76ab4f8f49f3c940b31d6bdf2c427173a3/output/i2uv/94_a.png -------------------------------------------------------------------------------- /output/t2uv/0000.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ggxxii/texdreamer/e8b9cd76ab4f8f49f3c940b31d6bdf2c427173a3/output/t2uv/0000.png -------------------------------------------------------------------------------- /output/t2uv/0001.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ggxxii/texdreamer/e8b9cd76ab4f8f49f3c940b31d6bdf2c427173a3/output/t2uv/0001.png -------------------------------------------------------------------------------- /output/t2uv/0002.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ggxxii/texdreamer/e8b9cd76ab4f8f49f3c940b31d6bdf2c427173a3/output/t2uv/0002.png -------------------------------------------------------------------------------- /output/t2uv/0003.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ggxxii/texdreamer/e8b9cd76ab4f8f49f3c940b31d6bdf2c427173a3/output/t2uv/0003.png -------------------------------------------------------------------------------- /output/t2uv/0004.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ggxxii/texdreamer/e8b9cd76ab4f8f49f3c940b31d6bdf2c427173a3/output/t2uv/0004.png -------------------------------------------------------------------------------- /output/t2uv/0005.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ggxxii/texdreamer/e8b9cd76ab4f8f49f3c940b31d6bdf2c427173a3/output/t2uv/0005.png -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | diffusers==0.18.2 2 | ninja==1.11.1 3 | omegaconf==2.3.0 4 | opencv-contrib-python==4.7.0.72 5 | opencv-python==4.7.0.72 6 | peft==0.11.1 7 | Pillow==9.5.0 8 | pytorch3d==0.7.4 9 | PyYAML==6.0 10 | safetensors==0.4.3 11 | scipy==1.10.1 12 | timm==0.6.13 13 | tokenizers==0.13.3 14 | tqdm==4.65.0 15 | transformers==4.28.1 16 | typing_extensions==4.5.0 17 | typing-inspect==0.9.0 18 | 19 | --------------------------------------------------------------------------------