├── .gitignore
├── LICENSE
├── README.md
├── data
    ├── input
    │   ├── 1545.png
    │   ├── 855.png
    │   └── 94.png
    ├── sample_prompts.txt
    └── smpl_uv_mask.png
├── doc
    ├── method.png
    ├── overview.png
    └── pipeline.png
├── infer_i2uv.py
├── infer_t2uv.py
├── model_i2t.py
├── output
    ├── i2uv
    │   ├── 1545.png
    │   ├── 1545_a.png
    │   ├── 855.png
    │   ├── 855_a.png
    │   ├── 94.png
    │   └── 94_a.png
    └── t2uv
    │   ├── 0000.png
    │   ├── 0001.png
    │   ├── 0002.png
    │   ├── 0003.png
    │   ├── 0004.png
    │   └── 0005.png
└── requirements.txt


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Created by https://www.toptal.com/developers/gitignore/api/python
  2 | # Edit at https://www.toptal.com/developers/gitignore?templates=python
  3 | 
  4 | ### Python ###
  5 | # Byte-compiled / optimized / DLL files
  6 | __pycache__/
  7 | *.py[cod]
  8 | *$py.class
  9 | 
 10 | # C extensions
 11 | *.so
 12 | 
 13 | # Distribution / packaging
 14 | .Python
 15 | build/
 16 | develop-eggs/
 17 | dist/
 18 | downloads/
 19 | eggs/
 20 | .eggs/
 21 | lib/
 22 | lib64/
 23 | parts/
 24 | sdist/
 25 | var/
 26 | wheels/
 27 | share/python-wheels/
 28 | *.egg-info/
 29 | .installed.cfg
 30 | *.egg
 31 | MANIFEST
 32 | 
 33 | # PyInstaller
 34 | #  Usually these files are written by a python script from a template
 35 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 36 | *.manifest
 37 | *.spec
 38 | 
 39 | # Installer logs
 40 | pip-log.txt
 41 | pip-delete-this-directory.txt
 42 | 
 43 | # Unit test / coverage reports
 44 | htmlcov/
 45 | .tox/
 46 | .nox/
 47 | .coverage
 48 | .coverage.*
 49 | .cache
 50 | nosetests.xml
 51 | coverage.xml
 52 | *.cover
 53 | *.py,cover
 54 | .hypothesis/
 55 | .pytest_cache/
 56 | cover/
 57 | 
 58 | # Translations
 59 | *.mo
 60 | *.pot
 61 | 
 62 | # Django stuff:
 63 | *.log
 64 | local_settings.py
 65 | db.sqlite3
 66 | db.sqlite3-journal
 67 | 
 68 | # Flask stuff:
 69 | instance/
 70 | .webassets-cache
 71 | 
 72 | # Scrapy stuff:
 73 | .scrapy
 74 | 
 75 | # Sphinx documentation
 76 | docs/_build/
 77 | 
 78 | # PyBuilder
 79 | .pybuilder/
 80 | target/
 81 | 
 82 | # Jupyter Notebook
 83 | .ipynb_checkpoints
 84 | 
 85 | # IPython
 86 | profile_default/
 87 | ipython_config.py
 88 | 
 89 | # pyenv
 90 | #   For a library or package, you might want to ignore these files since the code is
 91 | #   intended to run in multiple environments; otherwise, check them in:
 92 | # .python-version
 93 | 
 94 | # pipenv
 95 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 96 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 97 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 98 | #   install all needed dependencies.
 99 | #Pipfile.lock
100 | 
101 | # poetry
102 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
103 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
104 | #   commonly ignored for libraries.
105 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
106 | #poetry.lock
107 | 
108 | # pdm
109 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
110 | #pdm.lock
111 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
112 | #   in version control.
113 | #   https://pdm.fming.dev/#use-with-ide
114 | .pdm.toml
115 | 
116 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
117 | __pypackages__/
118 | 
119 | # Celery stuff
120 | celerybeat-schedule
121 | celerybeat.pid
122 | 
123 | # SageMath parsed files
124 | *.sage.py
125 | 
126 | # Environments
127 | .env
128 | .venv
129 | env/
130 | venv/
131 | ENV/
132 | env.bak/
133 | venv.bak/
134 | 
135 | # Spyder project settings
136 | .spyderproject
137 | .spyproject
138 | 
139 | # Rope project settings
140 | .ropeproject
141 | 
142 | # mkdocs documentation
143 | /site
144 | 
145 | # mypy
146 | .mypy_cache/
147 | .dmypy.json
148 | dmypy.json
149 | 
150 | # Pyre type checker
151 | .pyre/
152 | 
153 | # pytype static type analyzer
154 | .pytype/
155 | 
156 | # Cython debug symbols
157 | cython_debug/
158 | 
159 | # PyCharm
160 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
161 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
162 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
163 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
164 | .idea/
165 | 
166 | ### Python Patch ###
167 | # Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration
168 | poetry.toml
169 | 
170 | # ruff
171 | .ruff_cache/
172 | 
173 | # LSP config files
174 | pyrightconfig.json
175 | 
176 | # End of https://www.toptal.com/developers/gitignore/api/python
177 | 
178 | .vscode/
179 | .threestudio_cache/
180 | outputs/
181 | outputs-gradio/
182 | 
183 | # pretrained model weights
184 | *.ckpt
185 | *.pt
186 | *.pth
187 | 
188 | # wandb
189 | wandb/
190 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 Kuma
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # [ECCV 2024 Oral] TexDreamer: Towards Zero-Shot High-Fidelity 3D Human Texture Generation 
  2 | <img src='doc/overview.png'/>
  3 | 
  4 | ### [Project Page](https://ggxxii.github.io/texdreamer/) | [Paper (ArXiv)](https://arxiv.org/abs/2403.12906) | [ATLAS Dataset](https://huggingface.co/datasets/ggxxii/ATLAS)
  5 | 
  6 | 
  7 | [Yufei Liu](https://ggxxii.github.io/)<sup>1</sup>,
  8 | Junwei Zhu <sup>2</sup>,
  9 | [Junshu Tang](https://junshutang.github.io/)<sup>3</sup>,
 10 | Shijie Zhang <sup>4</sup>,
 11 | [Jiangning Zhang](https://zhangzjn.github.io/)<sup>2</sup>,
 12 | Weijian Cao<sup>2</sup>,
 13 | Chengjie Wang<sup>2</sup>,
 14 | Yunsheng Wu<sup>2</sup>,
 15 | Dongjin Huang<sup>1*</sup><br>
 16 | 
 17 | 
 18 | <sup>1</sup>Shanghai University, <sup>2</sup>Tencent Youtu Lab, <sup>3</sup>Shanghai Jiao Tong University <sup>4</sup> Fudan University
 19 | 
 20 | ## Updates
 21 | <!-- [09/2022] :fire::fire::fire:**If you are looking for a higher-quality 3D human generation method, go checkout our new work [EVA3D](https://hongfz16.github.io/projects/EVA3D.html)!**:fire::fire::fire:
 22 | 
 23 | [09/2022] :fire::fire::fire:**If you are looking for a higher-quality text2motion method, go checkout our new work [MotionDiffuse](https://mingyuan-zhang.github.io/projects/MotionDiffuse.html)!**:fire::fire::fire:
 24 | 
 25 | [07/2022] Code release for motion generation part!
 26 | 
 27 | [05/2022] [Paper](https://arxiv.org/abs/2205.08535) uploaded to arXiv. [![arXiv](https://img.shields.io/badge/arXiv-2205.08535-b31b1b.svg)](https://arxiv.org/abs/2205.08535)
 28 | 
 29 | [05/2022] Add a [Colab Demo](https://colab.research.google.com/drive/1dfaecX7xF3nP6fyXc8XBljV5QY1lc1TR?usp=sharing) for avatar generation! [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1dfaecX7xF3nP6fyXc8XBljV5QY1lc1TR?usp=sharing)
 30 | 
 31 | [05/2022] Support converting the generated avatar to the **animatable FBX format**! Go checkout [how to use the FBX models](#use-generated-fbx-models). Or checkout the [instructions](./Avatar2FBX/README.md) for the conversion codes.
 32 | 
 33 | [05/2022] Code release for avatar generation part! -->
 34 | 
 35 | [07/2024] TexDreamer is accepted to ECCV 2024!
 36 | 
 37 | 
 38 | 
 39 | <!-- ## :star2: Pipeline
 40 | <img src='doc/pipeline.png'/>
 41 | 
 42 | ## :star2: Method
 43 | <img src='doc/method.png'/> -->
 44 | 
 45 | ## Installation
 46 | 
 47 | We recommend using anaconda to manage the python environment. The setup commands below are provided for your reference.
 48 | 
 49 | ```bash
 50 | git clone https://github.com/ggxxii/texdreamer.git
 51 | cd texdreamer
 52 | conda create -n texdreamer python=3.8
 53 | conda activate texdreamer
 54 | conda install pytorch==2.0.0 torchvision==0.15.0 torchaudio==2.0.0 pytorch-cuda=11.7 -c pytorch -c nvidia
 55 | pip install -r requirements.txt
 56 | ```
 57 | Please also install xformers following: https://github.com/facebookresearch/xformers.git instructions.
 58 | 
 59 | 
 60 | ## Data Preparation
 61 | 
 62 | ### Download TexDreamer Trained models
 63 | You can find our model .zip from [Huggingface](https://huggingface.co/ggxxii/TexDreamer)
 64 | Put the downloaded models in the folder `texdreamer_u128_t16_origin`. The folder structure should look like
 65 | 
 66 | ```
 67 | ./
 68 | ├── ...
 69 | └── texdreamer_u128_t16_origin/
 70 |     ├── i2t
 71 |         ├── i2t_decoder.pth
 72 |         └── SMPL_NEUTRAL.pkl
 73 |     ├── i2uv
 74 |         ├── vision_encoder
 75 |             ├──config.json
 76 |             └──pytorch_model.bin
 77 |         └── i2t_decoder.pth
 78 |     ├── text_encoder
 79 |         ├── adapter_config.json
 80 |         └── adapter_model.bin
 81 |     ├── unet
 82 |         ├── adapter_config.json
 83 |         └── adapter_model.bin
 84 | ```
 85 | 
 86 | ## Generate Human Texture from Text
 87 | 
 88 | ### From input .txt file
 89 | 
 90 | We provide a txt file with 6 sample prompts. You can find it in `data/sample_prompts.txt`. And the sample generation results are in `output/t2uv`.
 91 | 
 92 | ```bash
 93 | python infer_t2uv.py --lora_path texdreamer_u128_t16_origin --save_path output/t2uv --test_list data/sample_prompts.txt
 94 | ```
 95 | Since we load stabilityai/stable-diffusion-2-1 from local files, you may need first download it and change 'cache_dir' in function 'get_lora_sd_pipeline'.
 96 | 
 97 | ## Generate Human Texture from Image
 98 | 
 99 | ### From input image folder
100 | 
101 | We provide some sample images from [Market-1501](https://zheng-lab.cecs.anu.edu.au/Project/project_reid.html) dataset. You can find it in `data/input`. And the sample generation results are in `output/i2uv`. 
102 | 
103 | Of course you can also use your own images.
104 | 
105 | ```bash
106 | python infer_i2uv.py --lora_path texdreamer_u128_t16_origin --save_path output/i2uv --test_folder data/input
107 | ```
108 | 
109 | 
110 | ## Citation
111 | If you find our work useful for your research, please consider citing the paper:
112 | ```
113 | @inproceedings{texdreamer,
114 |   title={Texdreamer: Towards zero-shot high-fidelity 3d human texture generation},
115 |   author={Liu, Yufei and Zhu, Junwei and Tang, Junshu and Zhang, Shijie and Zhang, Jiangning and Cao, Weijian and Wang, Chengjie and Wu, Yunsheng and Huang, Dongjin},
116 |   booktitle={European Conference on Computer Vision},
117 |   pages={184--202},
118 |   year={2024},
119 |   organization={Springer}
120 | }
121 | ```


--------------------------------------------------------------------------------
/data/input/1545.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ggxxii/texdreamer/e8b9cd76ab4f8f49f3c940b31d6bdf2c427173a3/data/input/1545.png


--------------------------------------------------------------------------------
/data/input/855.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ggxxii/texdreamer/e8b9cd76ab4f8f49f3c940b31d6bdf2c427173a3/data/input/855.png


--------------------------------------------------------------------------------
/data/input/94.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ggxxii/texdreamer/e8b9cd76ab4f8f49f3c940b31d6bdf2c427173a3/data/input/94.png


--------------------------------------------------------------------------------
/data/sample_prompts.txt:
--------------------------------------------------------------------------------
1 | Deadpool, red-black costume
2 | Iron Man, red-gold armor
3 | Commodus, Roman tunic and laurel wreath
4 | pretty woman, stunning gold sequin gown, dark brown hair, young
5 | Tom Hardy, black tuxedo, buzz cut
6 | Stormtrooper


--------------------------------------------------------------------------------
/data/smpl_uv_mask.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ggxxii/texdreamer/e8b9cd76ab4f8f49f3c940b31d6bdf2c427173a3/data/smpl_uv_mask.png


--------------------------------------------------------------------------------
/doc/method.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ggxxii/texdreamer/e8b9cd76ab4f8f49f3c940b31d6bdf2c427173a3/doc/method.png


--------------------------------------------------------------------------------
/doc/overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ggxxii/texdreamer/e8b9cd76ab4f8f49f3c940b31d6bdf2c427173a3/doc/overview.png


--------------------------------------------------------------------------------
/doc/pipeline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ggxxii/texdreamer/e8b9cd76ab4f8f49f3c940b31d6bdf2c427173a3/doc/pipeline.png


--------------------------------------------------------------------------------
/infer_i2uv.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import os
  3 | import random
  4 | 
  5 | 
  6 | import torch
  7 | import torch.nn.functional as F
  8 | import torch.utils.checkpoint
  9 | 
 10 | from accelerate.logging import get_logger
 11 | from accelerate.utils import set_seed
 12 | from diffusers import AutoencoderKL, DDPMScheduler, DiffusionPipeline, UNet2DConditionModel
 13 | from diffusers import DDPMScheduler, PNDMScheduler, StableDiffusionPipeline, DPMSolverMultistepScheduler, EulerAncestralDiscreteScheduler
 14 | from diffusers.pipelines.stable_diffusion import StableDiffusionSafetyChecker
 15 | from diffusers.optimization import get_scheduler
 16 | from diffusers.utils import check_min_version
 17 | from diffusers.utils.import_utils import is_xformers_available
 18 | from huggingface_hub import HfFolder, Repository, whoami
 19 | from PIL import Image
 20 | from torchvision import transforms
 21 | from tqdm.auto import tqdm
 22 | from transformers import AutoTokenizer, PretrainedConfig, CLIPFeatureExtractor, CLIPProcessor, CLIPVisionModel
 23 | from peft import PeftModel, LoraConfig, get_peft_model_state_dict, set_peft_model_state_dict
 24 | from model_i2t import Image2Token
 25 | 
 26 | # Will error if the minimal version of diffusers is not installed. Remove at your own risks.
 27 | check_min_version("0.10.0.dev0")
 28 | 
 29 | logger = get_logger(__name__)
 30 | 
 31 | 
 32 | 
 33 | def get_lora_sd_pipeline(
 34 |     ckpt_dir, base_model_name_or_path=None, dtype=torch.float16, device="cuda", adapter_name="default", cache_dir="huggingface/hub", local_files_only=True
 35 | ):
 36 |     unet_sub_dir = os.path.join(ckpt_dir, "unet")
 37 |     text_encoder_sub_dir = os.path.join(ckpt_dir, "text_encoder")
 38 |     if os.path.exists(text_encoder_sub_dir) and base_model_name_or_path is None:
 39 |         config = LoraConfig.from_pretrained(text_encoder_sub_dir)
 40 |         base_model_name_or_path = config.base_model_name_or_path
 41 | 
 42 |     if base_model_name_or_path is None:
 43 |         raise ValueError("Please specify the base model name or path")
 44 | 
 45 |     pipe = StableDiffusionPipeline.from_pretrained(
 46 |         base_model_name_or_path, torch_dtype=dtype, requires_safety_checker=False, cache_dir=cache_dir, local_files_only=local_files_only
 47 |     ).to(device)
 48 |     pipe.unet = PeftModel.from_pretrained(pipe.unet, unet_sub_dir, adapter_name=adapter_name)
 49 | 
 50 |     if os.path.exists(text_encoder_sub_dir):
 51 |         pipe.text_encoder = PeftModel.from_pretrained(pipe.text_encoder, text_encoder_sub_dir, adapter_name=adapter_name)
 52 | 
 53 |     if dtype in (torch.float16, torch.bfloat16):
 54 |         pipe.unet.half()
 55 |         pipe.text_encoder.half()
 56 | 
 57 |     pipe.to(device)
 58 |     return pipe
 59 | 
 60 | 
 61 | def load_adapter(pipe, ckpt_dir, adapter_name):
 62 |     unet_sub_dir = os.path.join(ckpt_dir, "unet")
 63 |     text_encoder_sub_dir = os.path.join(ckpt_dir, "text_encoder")
 64 |     pipe.unet.load_adapter(unet_sub_dir, adapter_name=adapter_name)
 65 |     if os.path.exists(text_encoder_sub_dir):
 66 |         pipe.text_encoder.load_adapter(text_encoder_sub_dir, adapter_name=adapter_name)
 67 | 
 68 | 
 69 | def set_adapter(pipe, adapter_name):
 70 |     pipe.unet.set_adapter(adapter_name)
 71 |     if isinstance(pipe.text_encoder, PeftModel):
 72 |         pipe.text_encoder.set_adapter(adapter_name)
 73 | 
 74 | 
 75 | def merging_lora_with_base(pipe, ckpt_dir, adapter_name="default"):
 76 |     unet_sub_dir = os.path.join(ckpt_dir, "unet")
 77 |     text_encoder_sub_dir = os.path.join(ckpt_dir, "text_encoder")
 78 |     if isinstance(pipe.unet, PeftModel):
 79 |         pipe.unet.set_adapter(adapter_name)
 80 |     else:
 81 |         pipe.unet = PeftModel.from_pretrained(pipe.unet, unet_sub_dir, adapter_name=adapter_name)
 82 |     pipe.unet = pipe.unet.merge_and_unload()
 83 | 
 84 |     if os.path.exists(text_encoder_sub_dir):
 85 |         if isinstance(pipe.text_encoder, PeftModel):
 86 |             pipe.text_encoder.set_adapter(adapter_name)
 87 |         else:
 88 |             pipe.text_encoder = PeftModel.from_pretrained(
 89 |                 pipe.text_encoder, text_encoder_sub_dir, adapter_name=adapter_name
 90 |             )
 91 |         pipe.text_encoder = pipe.text_encoder.merge_and_unload()
 92 | 
 93 |     return pipe
 94 | 
 95 | 
 96 | def create_weighted_lora_adapter(pipe, adapters, weights, adapter_name="default"):
 97 |     pipe.unet.add_weighted_adapter(adapters, weights, adapter_name)
 98 |     if isinstance(pipe.text_encoder, PeftModel):
 99 |         pipe.text_encoder.add_weighted_adapter(adapters, weights, adapter_name)
100 | 
101 |     return pipe
102 | 
103 | 
104 | if __name__=='__main__':
105 |     parser = argparse.ArgumentParser()
106 |     parser.add_argument('--seed', type=int, default=777, help='Random seed')
107 |     parser.add_argument('--lora_path', type=str, default="texdreamer_u128_t16_origin", help='Lora path')
108 |     parser.add_argument('--cache_dir', type=str, default="huggingface", help='Cache directory for Huggingface models')
109 |     parser.add_argument('--test_folder', type=str, default="data/input", help='Path to test folder')
110 |     parser.add_argument('--output_folder', type=str, default="output/i2uv", help='Output folder for generated images')
111 | 
112 |     args = parser.parse_args()
113 | 
114 | 
115 |     myseed = args.seed
116 |     MODEL_NAME = "stabilityai/stable-diffusion-2-1"  
117 |     CLIP_NAME="laion/CLIP-ViT-H-14-laion2B-s32B-b79K"
118 |     
119 |     lora_path = args.lora_path
120 |     i2uv_path = "texdreamer_u128_t16_origin/i2uv"
121 |     local_files_only=True
122 |     uv_mask = Image.open("data/smpl_uv_mask.png").convert("L")
123 |         
124 |     
125 |     processor = CLIPProcessor.from_pretrained(CLIP_NAME, cache_dir=cache_dir, local_files_only=local_files_only)
126 |     pipe = get_lora_sd_pipeline(lora_path, base_model_name_or_path=MODEL_NAME, adapter_name="hutex")
127 |     set_adapter(pipe, adapter_name="hutex")
128 |     
129 |     pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe.scheduler.config)
130 |     pipe.safety_checker=None
131 |     
132 |     
133 |     i2t_decoder = Image2Token()
134 |     i2t_decoder.load_state_dict(torch.load(os.path.join(i2uv_path, "i2t_decoder.pth")))
135 |     i2t_decoder.eval()
136 |     i2t_decoder.to(pipe.device)
137 |     
138 |     i2uv_vision_encoder_path = os.path.join(i2uv_path, 'vision_encoder')
139 |     if os.path.exists(i2uv_vision_encoder_path):
140 |         CLIP_NAME=i2uv_vision_encoder_path
141 |     vision_encoder = CLIPVisionModel.from_pretrained(CLIP_NAME, cache_dir=cache_dir, local_files_only=local_files_only)
142 |     vision_encoder.eval()
143 |     vision_encoder.to(pipe.device)
144 |     
145 |         
146 |     test_folder = args.test_folder
147 |     output_folder = args.output_folder
148 |     os.makedirs(output_folder, exist_ok=True)
149 |     
150 |     
151 |     for im_file in os.listdir(test_folder):
152 |         
153 |         if os.path.isdir(test_folder):
154 |             folder_path = test_folder
155 |             save_path = output_folder
156 |             os.makedirs(save_path, exist_ok=True)
157 |         
158 |         if im_file.endswith('png'):
159 |         
160 |             im_pil = Image.open(os.path.join(folder_path, im_file))
161 |             w,h=im_pil.size
162 |             max_size = max(w,h)
163 |             crop = Image.new("RGB", (max_size, max_size))
164 |             crop.paste(im_pil, ((max_size-w)//2, (max_size-h)//2))
165 | 
166 |             with torch.no_grad():
167 |                 encoder_hidden_states = i2t_decoder(vision_encoder(processor(images=crop, return_tensors="pt")["pixel_values"].to(pipe.device)).last_hidden_state)
168 |                 set_seed(myseed)
169 |                 image = pipe(prompt_embeds=encoder_hidden_states, height=1024, width=1024, num_inference_steps=32, guidance_scale=2).images[0]
170 |                 image.save(os.path.join(save_path, im_file.replace('.jpg', '.png')))
171 |                 
172 |                 crop=crop.resize((1024,1024))
173 |                 show_img = Image.new("RGB", (1024*2,1024))
174 |                 show_img.paste(crop)
175 |                 show_img.paste(image, (1024,0))
176 |                 filename, extension = os.path.splitext(im_file)
177 |                 show_img.save(os.path.join(save_path, f"{filename}_a{extension}"))
178 |                 
179 |  
180 | 
181 |     
182 | 
183 |     
184 |     
185 | 


--------------------------------------------------------------------------------
/infer_t2uv.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import os
  3 | from pathlib import Path
  4 | from typing import Optional
  5 | import argparse
  6 | 
  7 | import torch
  8 | import torch.nn.functional as F
  9 | import torch.utils.checkpoint
 10 | 
 11 | from accelerate.logging import get_logger
 12 | from accelerate.utils import set_seed
 13 | from diffusers import AutoencoderKL, DDPMScheduler, DiffusionPipeline, UNet2DConditionModel
 14 | from diffusers import DDPMScheduler, PNDMScheduler, StableDiffusionPipeline, DPMSolverMultistepScheduler, EulerAncestralDiscreteScheduler
 15 | from diffusers.pipelines.stable_diffusion import StableDiffusionSafetyChecker
 16 | from diffusers.optimization import get_scheduler
 17 | from diffusers.utils import check_min_version
 18 | from diffusers.utils.import_utils import is_xformers_available
 19 | from huggingface_hub import HfFolder, Repository, whoami
 20 | from PIL import Image
 21 | from torchvision import transforms
 22 | from tqdm.auto import tqdm
 23 | from transformers import AutoTokenizer, PretrainedConfig, CLIPFeatureExtractor
 24 | from peft import PeftModel, LoraConfig, get_peft_model_state_dict, set_peft_model_state_dict
 25 | 
 26 | # Will error if the minimal version of diffusers is not installed. Remove at your own risks.
 27 | check_min_version("0.10.0.dev0")
 28 | 
 29 | logger = get_logger(__name__)
 30 | 
 31 | 
 32 | 
 33 | 
 34 | def get_lora_sd_pipeline(
 35 |     ckpt_dir, base_model_name_or_path=None, dtype=torch.float16, device="cuda", adapter_name="default", cache_dir="huggingface/hub", local_files_only=True
 36 | ):
 37 |     unet_sub_dir = os.path.join(ckpt_dir, "unet")
 38 |     text_encoder_sub_dir = os.path.join(ckpt_dir, "text_encoder")
 39 |     if os.path.exists(text_encoder_sub_dir) and base_model_name_or_path is None:
 40 |         config = LoraConfig.from_pretrained(text_encoder_sub_dir)
 41 |         base_model_name_or_path = config.base_model_name_or_path
 42 | 
 43 |     if base_model_name_or_path is None:
 44 |         raise ValueError("Please specify the base model name or path")
 45 | 
 46 |     pipe = StableDiffusionPipeline.from_pretrained(
 47 |         base_model_name_or_path, torch_dtype=dtype, requires_safety_checker=False, cache_dir=cache_dir, local_files_only=local_files_only
 48 |     ).to(device)
 49 |     pipe.unet = PeftModel.from_pretrained(pipe.unet, unet_sub_dir, adapter_name=adapter_name)
 50 | 
 51 |     if os.path.exists(text_encoder_sub_dir):
 52 |         pipe.text_encoder = PeftModel.from_pretrained(pipe.text_encoder, text_encoder_sub_dir, adapter_name=adapter_name)
 53 | 
 54 |     if dtype in (torch.float16, torch.bfloat16):
 55 |         pipe.unet.half()
 56 |         pipe.text_encoder.half()
 57 | 
 58 |     pipe.to(device)
 59 |     return pipe
 60 | 
 61 | 
 62 | def load_adapter(pipe, ckpt_dir, adapter_name):
 63 |     unet_sub_dir = os.path.join(ckpt_dir, "unet")
 64 |     text_encoder_sub_dir = os.path.join(ckpt_dir, "text_encoder")
 65 |     pipe.unet.load_adapter(unet_sub_dir, adapter_name=adapter_name)
 66 |     if os.path.exists(text_encoder_sub_dir):
 67 |         pipe.text_encoder.load_adapter(text_encoder_sub_dir, adapter_name=adapter_name)
 68 | 
 69 | 
 70 | def set_adapter(pipe, adapter_name):
 71 |     pipe.unet.set_adapter(adapter_name)
 72 |     if isinstance(pipe.text_encoder, PeftModel):
 73 |         pipe.text_encoder.set_adapter(adapter_name)
 74 | 
 75 | 
 76 | def merging_lora_with_base(pipe, ckpt_dir, adapter_name="default"):
 77 |     unet_sub_dir = os.path.join(ckpt_dir, "unet")
 78 |     text_encoder_sub_dir = os.path.join(ckpt_dir, "text_encoder")
 79 |     if isinstance(pipe.unet, PeftModel):
 80 |         pipe.unet.set_adapter(adapter_name)
 81 |     else:
 82 |         pipe.unet = PeftModel.from_pretrained(pipe.unet, unet_sub_dir, adapter_name=adapter_name)
 83 |     pipe.unet = pipe.unet.merge_and_unload()
 84 | 
 85 |     if os.path.exists(text_encoder_sub_dir):
 86 |         if isinstance(pipe.text_encoder, PeftModel):
 87 |             pipe.text_encoder.set_adapter(adapter_name)
 88 |         else:
 89 |             pipe.text_encoder = PeftModel.from_pretrained(
 90 |                 pipe.text_encoder, text_encoder_sub_dir, adapter_name=adapter_name
 91 |             )
 92 |         pipe.text_encoder = pipe.text_encoder.merge_and_unload()
 93 | 
 94 |     return pipe
 95 | 
 96 | 
 97 | def create_weighted_lora_adapter(pipe, adapters, weights, adapter_name="default"):
 98 |     pipe.unet.add_weighted_adapter(adapters, weights, adapter_name)
 99 |     if isinstance(pipe.text_encoder, PeftModel):
100 |         pipe.text_encoder.add_weighted_adapter(adapters, weights, adapter_name)
101 | 
102 |     return pipe
103 | 
104 | 
105 | if __name__=='__main__':
106 |     parser = argparse.ArgumentParser()
107 |     parser.add_argument('--seed', type=int, default=777, help='Random seed')
108 |     parser.add_argument('--lora_path', type=str, default="texdreamer_u128_t16_origin", help='Lora path')
109 |     parser.add_argument('--save_path', type=str, default="output/t2uv", help='Save path for generated images')
110 |     parser.add_argument('--test_list', type=str, default="data/sample_prompts.txt", help='Path to input txt file')
111 | 
112 |     args = parser.parse_args()
113 |     
114 |     # Will error if the minimal version of diffusers is not installed. Remove at your own risks.
115 |     check_min_version("0.10.0.dev0")
116 | 
117 |     logger = get_logger(__name__)
118 |     
119 |     myseed = args.seed
120 |     MODEL_NAME = "stabilityai/stable-diffusion-2-1"  
121 |     lora_path = args.lora_path
122 |     save_path = args.save_path
123 |     os.makedirs(save_path, exist_ok=True)
124 | 
125 |     uv_mask = Image.open("data/smpl_uv_mask.png").convert("L")
126 | 
127 |     positive_prompt = ", natural lighting, photo-realistic, 4k"
128 |     negative_prompt = "overexposed, shadow, reflection, low quality, teeth, open mouth, eyes closed"
129 | 
130 |     pipe = get_lora_sd_pipeline(lora_path, base_model_name_or_path=MODEL_NAME, adapter_name="hutex")
131 | 
132 |     set_adapter(pipe, adapter_name="hutex")
133 | 
134 |     pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe.scheduler.config)
135 |     pipe.safety_checker = None
136 | 
137 |     ###################genrate from .txt file###################
138 |     test_list = args.test_list
139 |     idx = 0
140 |     with open(test_list, 'r') as f:
141 |         for line in f.readlines():
142 |             prompt = 'hutex, ' + line.strip()
143 |             with torch.no_grad():
144 |                 set_seed(myseed)
145 |                 images = pipe(prompt + positive_prompt, height=1024, width=1024, num_inference_steps=32, guidance_scale=7.5,
146 |                               negative_prompt=negative_prompt, num_images_per_prompt=1).images
147 | 
148 |             image = images[0]
149 |             image.putalpha(uv_mask)
150 |             image.save(os.path.join(save_path, '{:04d}.png'.format(idx)))
151 | 
152 |             idx += 1
153 | 
154 |     
155 |     
156 | 


--------------------------------------------------------------------------------
/model_i2t.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | 
 5 | 
 6 | class Image2Token(nn.Module):
 7 | 
 8 |     def __init__(self, visual_hidden_size=1280, text_hidden_size=1024, max_length=77, num_layers=3):
 9 |         super(Image2Token, self).__init__()
10 |         
11 |         self.visual_proj = nn.Linear(visual_hidden_size, text_hidden_size)
12 |         
13 |         if num_layers>0:
14 |             self.query = nn.Parameter(torch.randn((1, max_length, text_hidden_size)))
15 |             decoder_layer = nn.TransformerDecoderLayer(d_model=text_hidden_size, nhead=text_hidden_size//64, batch_first=True)
16 |             self.i2t = nn.TransformerDecoder(decoder_layer, num_layers=num_layers)
17 |         else:
18 |             self.i2t = None
19 | 
20 |     def forward(self, x):
21 |         b,s,d=x.shape
22 |         out = self.visual_proj(x)
23 |         if self.i2t is not None:
24 |             out = self.i2t(self.query.repeat(b,1,1), out)
25 | 
26 |         return out
27 | 


--------------------------------------------------------------------------------
/output/i2uv/1545.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ggxxii/texdreamer/e8b9cd76ab4f8f49f3c940b31d6bdf2c427173a3/output/i2uv/1545.png


--------------------------------------------------------------------------------
/output/i2uv/1545_a.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ggxxii/texdreamer/e8b9cd76ab4f8f49f3c940b31d6bdf2c427173a3/output/i2uv/1545_a.png


--------------------------------------------------------------------------------
/output/i2uv/855.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ggxxii/texdreamer/e8b9cd76ab4f8f49f3c940b31d6bdf2c427173a3/output/i2uv/855.png


--------------------------------------------------------------------------------
/output/i2uv/855_a.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ggxxii/texdreamer/e8b9cd76ab4f8f49f3c940b31d6bdf2c427173a3/output/i2uv/855_a.png


--------------------------------------------------------------------------------
/output/i2uv/94.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ggxxii/texdreamer/e8b9cd76ab4f8f49f3c940b31d6bdf2c427173a3/output/i2uv/94.png


--------------------------------------------------------------------------------
/output/i2uv/94_a.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ggxxii/texdreamer/e8b9cd76ab4f8f49f3c940b31d6bdf2c427173a3/output/i2uv/94_a.png


--------------------------------------------------------------------------------
/output/t2uv/0000.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ggxxii/texdreamer/e8b9cd76ab4f8f49f3c940b31d6bdf2c427173a3/output/t2uv/0000.png


--------------------------------------------------------------------------------
/output/t2uv/0001.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ggxxii/texdreamer/e8b9cd76ab4f8f49f3c940b31d6bdf2c427173a3/output/t2uv/0001.png


--------------------------------------------------------------------------------
/output/t2uv/0002.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ggxxii/texdreamer/e8b9cd76ab4f8f49f3c940b31d6bdf2c427173a3/output/t2uv/0002.png


--------------------------------------------------------------------------------
/output/t2uv/0003.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ggxxii/texdreamer/e8b9cd76ab4f8f49f3c940b31d6bdf2c427173a3/output/t2uv/0003.png


--------------------------------------------------------------------------------
/output/t2uv/0004.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ggxxii/texdreamer/e8b9cd76ab4f8f49f3c940b31d6bdf2c427173a3/output/t2uv/0004.png


--------------------------------------------------------------------------------
/output/t2uv/0005.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ggxxii/texdreamer/e8b9cd76ab4f8f49f3c940b31d6bdf2c427173a3/output/t2uv/0005.png


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | diffusers==0.18.2
 2 | ninja==1.11.1
 3 | omegaconf==2.3.0
 4 | opencv-contrib-python==4.7.0.72
 5 | opencv-python==4.7.0.72
 6 | peft==0.11.1
 7 | Pillow==9.5.0
 8 | pytorch3d==0.7.4
 9 | PyYAML==6.0
10 | safetensors==0.4.3
11 | scipy==1.10.1
12 | timm==0.6.13
13 | tokenizers==0.13.3
14 | tqdm==4.65.0
15 | transformers==4.28.1
16 | typing_extensions==4.5.0
17 | typing-inspect==0.9.0
18 | 
19 | 


--------------------------------------------------------------------------------