├── .gitignore ├── hy3dpaint ├── DifferentiableRenderer │ ├── __init__.py │ ├── compile_mesh_painter.sh │ ├── dist │ │ ├── mesh_inpaint_processor-0.0.0-cp310-cp310-win_amd64.whl │ │ ├── mesh_inpaint_processor-0.0.0-cp311-cp311-win_amd64.whl │ │ ├── mesh_inpaint_processor-0.0.0-cp312-cp312-win_amd64.whl │ │ └── mesh_inpaint_processor-0.0.0-cp311-cp311-linux_x86_64.whl │ ├── setup.py │ └── camera_utils.py ├── custom_rasterizer │ ├── lib │ │ └── custom_rasterizer_kernel │ │ │ ├── __init__.py │ │ │ ├── rasterizer.h │ │ │ ├── rasterizer_gpu.cu │ │ │ └── rasterizer.cpp │ ├── custom_rasterizer │ │ ├── __init__.py │ │ └── render.py │ ├── dist │ │ ├── custom_rasterizer-0.1-cp310-cp310-win_amd64.whl │ │ ├── custom_rasterizer-0.1-cp311-cp311-win_amd64.whl │ │ ├── custom_rasterizer-0.1-cp312-cp312-win_amd64.whl │ │ └── custom_rasterizer-0.1-cp311-cp311-linux_x86_64.whl │ └── setup.py ├── 4.0 │ └── python │ │ └── lib │ │ └── site-packages │ │ └── extern_draco.dll ├── src │ ├── data │ │ ├── dataloader │ │ │ ├── pbr_data_format.txt │ │ │ └── objaverse_loader_forTexturePBR.py │ │ ├── __init__.py │ │ └── objaverse_hunyuan.py │ ├── __init__.py │ └── utils │ │ ├── __init__.py │ │ └── train_util.py ├── utils │ ├── __init__.py │ ├── uvwrap_utils.py │ ├── simplify_mesh_utils.py │ ├── image_super_utils.py │ ├── torchvision_fix.py │ ├── multiview_utils.py │ └── pipeline_utils.py ├── cfgs │ └── hunyuan-paint-pbr.yaml ├── hunyuanpaintpbr │ └── __init__.py ├── demo.py ├── README.md └── convert_utils.py ├── hy3dshape ├── hy3dshape │ ├── utils │ │ ├── trainings │ │ │ ├── __init__.py │ │ │ ├── lr_scheduler.py │ │ │ ├── peft.py │ │ │ └── mesh.py │ │ ├── visualizers │ │ │ ├── __init__.py │ │ │ ├── html_util.py │ │ │ └── color_util.py │ │ ├── __init__.py │ │ ├── ema.py │ │ ├── misc.py │ │ └── utils.py │ ├── models │ │ ├── denoisers │ │ │ ├── __init__.py │ │ │ └── moe_layers.py │ │ ├── autoencoders │ │ │ ├── __init__.py │ │ │ ├── attention_processors.py │ │ │ └── surface_extractors.py │ │ ├── __init__.py │ │ └── diffusion │ │ │ └── transport │ │ │ ├── utils.py │ │ │ ├── __init__.py │ │ │ └── integrators.py │ ├── __init__.py │ ├── rembg.py │ ├── meshlib.py │ ├── data │ │ └── utils.py │ ├── preprocessors.py │ └── postprocessors.py ├── minimal_demo.py ├── minimal_vae_demo.py └── configs │ ├── hunyuan3ddit-mini-overfitting-flowmatching-dinog518-bf16-lr1e4-512.yaml │ ├── hunyuan3ddit-full-params-finetuning-flowmatching-dinog518-bf16-lr1e5-512.yaml │ ├── hunyuandit-finetuning-flowmatching-dinog518-bf16-lr1e5-4096.yaml │ ├── hunyuandit-mini-overfitting-flowmatching-dinog518-bf16-lr1e4-4096.yaml │ └── hunyuandit-mini-overfitting-flowmatching-dinog518-bf16-lr1e4-512.yaml ├── __init__.py ├── requirements.txt ├── configs ├── dit_config.yaml ├── dit_config_mini.yaml └── dit_config_2_1.yaml ├── workflow_examples └── Batch_Generator.json └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ -------------------------------------------------------------------------------- /hy3dpaint/DifferentiableRenderer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /hy3dpaint/custom_rasterizer/lib/custom_rasterizer_kernel/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /hy3dshape/hy3dshape/utils/trainings/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | -------------------------------------------------------------------------------- /hy3dshape/hy3dshape/utils/visualizers/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | -------------------------------------------------------------------------------- /hy3dpaint/custom_rasterizer/custom_rasterizer/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | from .render import rasterize, interpolate 3 | """ 4 | from .render import * 5 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- 1 | from .nodes import NODE_CLASS_MAPPINGS, NODE_DISPLAY_NAME_MAPPINGS 2 | 3 | __all__ = ["NODE_CLASS_MAPPINGS", "NODE_DISPLAY_NAME_MAPPINGS"] -------------------------------------------------------------------------------- /hy3dpaint/4.0/python/lib/site-packages/extern_draco.dll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/visualbruno/ComfyUI-Hunyuan3d-2-1/HEAD/hy3dpaint/4.0/python/lib/site-packages/extern_draco.dll -------------------------------------------------------------------------------- /hy3dpaint/DifferentiableRenderer/compile_mesh_painter.sh: -------------------------------------------------------------------------------- 1 | c++ -O3 -Wall -shared -std=c++11 -fPIC `python -m pybind11 --includes` mesh_inpaint_processor.cpp -o mesh_inpaint_processor`python3-config --extension-suffix` -------------------------------------------------------------------------------- /hy3dpaint/custom_rasterizer/dist/custom_rasterizer-0.1-cp310-cp310-win_amd64.whl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/visualbruno/ComfyUI-Hunyuan3d-2-1/HEAD/hy3dpaint/custom_rasterizer/dist/custom_rasterizer-0.1-cp310-cp310-win_amd64.whl -------------------------------------------------------------------------------- /hy3dpaint/custom_rasterizer/dist/custom_rasterizer-0.1-cp311-cp311-win_amd64.whl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/visualbruno/ComfyUI-Hunyuan3d-2-1/HEAD/hy3dpaint/custom_rasterizer/dist/custom_rasterizer-0.1-cp311-cp311-win_amd64.whl -------------------------------------------------------------------------------- /hy3dpaint/custom_rasterizer/dist/custom_rasterizer-0.1-cp312-cp312-win_amd64.whl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/visualbruno/ComfyUI-Hunyuan3d-2-1/HEAD/hy3dpaint/custom_rasterizer/dist/custom_rasterizer-0.1-cp312-cp312-win_amd64.whl -------------------------------------------------------------------------------- /hy3dpaint/custom_rasterizer/dist/custom_rasterizer-0.1-cp311-cp311-linux_x86_64.whl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/visualbruno/ComfyUI-Hunyuan3d-2-1/HEAD/hy3dpaint/custom_rasterizer/dist/custom_rasterizer-0.1-cp311-cp311-linux_x86_64.whl -------------------------------------------------------------------------------- /hy3dshape/hy3dshape/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from .misc import get_config_from_file 4 | from .misc import instantiate_from_config 5 | from .utils import get_logger, logger, synchronize_timer, smart_load_model 6 | -------------------------------------------------------------------------------- /hy3dpaint/DifferentiableRenderer/dist/mesh_inpaint_processor-0.0.0-cp310-cp310-win_amd64.whl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/visualbruno/ComfyUI-Hunyuan3d-2-1/HEAD/hy3dpaint/DifferentiableRenderer/dist/mesh_inpaint_processor-0.0.0-cp310-cp310-win_amd64.whl -------------------------------------------------------------------------------- /hy3dpaint/DifferentiableRenderer/dist/mesh_inpaint_processor-0.0.0-cp311-cp311-win_amd64.whl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/visualbruno/ComfyUI-Hunyuan3d-2-1/HEAD/hy3dpaint/DifferentiableRenderer/dist/mesh_inpaint_processor-0.0.0-cp311-cp311-win_amd64.whl -------------------------------------------------------------------------------- /hy3dpaint/DifferentiableRenderer/dist/mesh_inpaint_processor-0.0.0-cp312-cp312-win_amd64.whl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/visualbruno/ComfyUI-Hunyuan3d-2-1/HEAD/hy3dpaint/DifferentiableRenderer/dist/mesh_inpaint_processor-0.0.0-cp312-cp312-win_amd64.whl -------------------------------------------------------------------------------- /hy3dpaint/DifferentiableRenderer/dist/mesh_inpaint_processor-0.0.0-cp311-cp311-linux_x86_64.whl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/visualbruno/ComfyUI-Hunyuan3d-2-1/HEAD/hy3dpaint/DifferentiableRenderer/dist/mesh_inpaint_processor-0.0.0-cp311-cp311-linux_x86_64.whl -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | trimesh 2 | pymeshlab 3 | pygltflib 4 | xatlas 5 | open3d 6 | omegaconf 7 | pyyaml 8 | configargparse 9 | transformers 10 | diffusers 11 | accelerate 12 | pytorch-lightning 13 | opencv-python 14 | huggingface-hub 15 | safetensors 16 | scikit-image 17 | pybind11 18 | timm 19 | 20 | meshlib 21 | -------------------------------------------------------------------------------- /hy3dpaint/src/data/dataloader/pbr_data_format.txt: -------------------------------------------------------------------------------- 1 | +-----------------+----------------------------------+ 2 | | Key | Value | 3 | +-----------------+----------------------------------+ 4 | | images_cond | torch.Size([2, 2, 3, 512, 512]) | 5 | | images_albedo | torch.Size([2, 6, 3, 512, 512]) | 6 | | images_mr | torch.Size([2, 6, 3, 512, 512]) | 7 | | images_normal | torch.Size([2, 6, 3, 512, 512]) | 8 | | images_position | torch.Size([2, 6, 3, 512, 512]) | 9 | | caption | ['high quality', 'high quality'] | 10 | +-----------------+----------------------------------+ -------------------------------------------------------------------------------- /hy3dpaint/src/__init__.py: -------------------------------------------------------------------------------- 1 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT 2 | # except for the third-party components listed below. 3 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined 4 | # in the repsective licenses of these third-party components. 5 | # Users must comply with all terms and conditions of original licenses of these third-party 6 | # components and must ensure that the usage of the third party components adheres to 7 | # all relevant laws and regulations. 8 | 9 | # For avoidance of doubts, Hunyuan 3D means the large language models and 10 | # their software and algorithms, including trained model weights, parameters (including 11 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code, 12 | # fine-tuning enabling code and other elements of the foregoing made publicly available 13 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT. 14 | -------------------------------------------------------------------------------- /hy3dpaint/src/data/__init__.py: -------------------------------------------------------------------------------- 1 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT 2 | # except for the third-party components listed below. 3 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined 4 | # in the repsective licenses of these third-party components. 5 | # Users must comply with all terms and conditions of original licenses of these third-party 6 | # components and must ensure that the usage of the third party components adheres to 7 | # all relevant laws and regulations. 8 | 9 | # For avoidance of doubts, Hunyuan 3D means the large language models and 10 | # their software and algorithms, including trained model weights, parameters (including 11 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code, 12 | # fine-tuning enabling code and other elements of the foregoing made publicly available 13 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT. 14 | -------------------------------------------------------------------------------- /hy3dpaint/src/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT 2 | # except for the third-party components listed below. 3 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined 4 | # in the repsective licenses of these third-party components. 5 | # Users must comply with all terms and conditions of original licenses of these third-party 6 | # components and must ensure that the usage of the third party components adheres to 7 | # all relevant laws and regulations. 8 | 9 | # For avoidance of doubts, Hunyuan 3D means the large language models and 10 | # their software and algorithms, including trained model weights, parameters (including 11 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code, 12 | # fine-tuning enabling code and other elements of the foregoing made publicly available 13 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT. 14 | -------------------------------------------------------------------------------- /hy3dpaint/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT 2 | # except for the third-party components listed below. 3 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined 4 | # in the repsective licenses of these third-party components. 5 | # Users must comply with all terms and conditions of original licenses of these third-party 6 | # components and must ensure that the usage of the third party components adheres to 7 | # all relevant laws and regulations. 8 | 9 | # For avoidance of doubts, Hunyuan 3D means the large language models and 10 | # their software and algorithms, including trained model weights, parameters (including 11 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code, 12 | # fine-tuning enabling code and other elements of the foregoing made publicly available 13 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT. 14 | -------------------------------------------------------------------------------- /hy3dshape/hy3dshape/models/denoisers/__init__.py: -------------------------------------------------------------------------------- 1 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT 2 | # except for the third-party components listed below. 3 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined 4 | # in the repsective licenses of these third-party components. 5 | # Users must comply with all terms and conditions of original licenses of these third-party 6 | # components and must ensure that the usage of the third party components adheres to 7 | # all relevant laws and regulations. 8 | 9 | # For avoidance of doubts, Hunyuan 3D means the large language models and 10 | # their software and algorithms, including trained model weights, parameters (including 11 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code, 12 | # fine-tuning enabling code and other elements of the foregoing made publicly available 13 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT. 14 | 15 | from .hunyuan3ddit import Hunyuan3DDiT 16 | -------------------------------------------------------------------------------- /hy3dpaint/DifferentiableRenderer/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, Extension 2 | from setuptools.command.build_ext import build_ext 3 | import sys 4 | import os 5 | import pybind11 6 | class BuildExt(build_ext): 7 | def build_extensions(self): 8 | if sys.platform == 'win32': 9 | # Windows-specific compiler flags 10 | for ext in self.extensions: 11 | ext.extra_compile_args = ['/O2', '/Wall'] 12 | else: 13 | # Linux/Mac flags 14 | for ext in self.extensions: 15 | ext.extra_compile_args = ['-O3', '-Wall', '-fPIC'] 16 | build_ext.build_extensions(self) 17 | 18 | setup( 19 | name="mesh_inpaint_processor", 20 | ext_modules=[ 21 | Extension( 22 | "mesh_inpaint_processor", 23 | ["mesh_inpaint_processor.cpp"], 24 | include_dirs=[ 25 | pybind11.get_include(), 26 | pybind11.get_include(user=True) 27 | ], 28 | language='c++' 29 | ), 30 | ], 31 | cmdclass={'build_ext': BuildExt}, 32 | ) -------------------------------------------------------------------------------- /hy3dshape/hy3dshape/__init__.py: -------------------------------------------------------------------------------- 1 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT 2 | # except for the third-party components listed below. 3 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined 4 | # in the repsective licenses of these third-party components. 5 | # Users must comply with all terms and conditions of original licenses of these third-party 6 | # components and must ensure that the usage of the third party components adheres to 7 | # all relevant laws and regulations. 8 | 9 | # For avoidance of doubts, Hunyuan 3D means the large language models and 10 | # their software and algorithms, including trained model weights, parameters (including 11 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code, 12 | # fine-tuning enabling code and other elements of the foregoing made publicly available 13 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT. 14 | 15 | from .pipelines import Hunyuan3DDiTPipeline, Hunyuan3DDiTFlowMatchingPipeline 16 | from .postprocessors import FaceReducer, FloaterRemover, DegenerateFaceRemover, MeshSimplifier 17 | from .preprocessors import ImageProcessorV2, IMAGE_PROCESSORS, DEFAULT_IMAGEPROCESSOR 18 | -------------------------------------------------------------------------------- /hy3dshape/hy3dshape/rembg.py: -------------------------------------------------------------------------------- 1 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT 2 | # except for the third-party components listed below. 3 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined 4 | # in the repsective licenses of these third-party components. 5 | # Users must comply with all terms and conditions of original licenses of these third-party 6 | # components and must ensure that the usage of the third party components adheres to 7 | # all relevant laws and regulations. 8 | 9 | # For avoidance of doubts, Hunyuan 3D means the large language models and 10 | # their software and algorithms, including trained model weights, parameters (including 11 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code, 12 | # fine-tuning enabling code and other elements of the foregoing made publicly available 13 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT. 14 | 15 | from PIL import Image 16 | from rembg import remove, new_session 17 | 18 | 19 | class BackgroundRemover(): 20 | def __init__(self): 21 | self.session = new_session() 22 | 23 | def __call__(self, image: Image.Image): 24 | output = remove(image, session=self.session, bgcolor=[255, 255, 255, 0]) 25 | return output 26 | -------------------------------------------------------------------------------- /hy3dshape/hy3dshape/models/autoencoders/__init__.py: -------------------------------------------------------------------------------- 1 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT 2 | # except for the third-party components listed below. 3 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined 4 | # in the repsective licenses of these third-party components. 5 | # Users must comply with all terms and conditions of original licenses of these third-party 6 | # components and must ensure that the usage of the third party components adheres to 7 | # all relevant laws and regulations. 8 | 9 | # For avoidance of doubts, Hunyuan 3D means the large language models and 10 | # their software and algorithms, including trained model weights, parameters (including 11 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code, 12 | # fine-tuning enabling code and other elements of the foregoing made publicly available 13 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT. 14 | 15 | from .attention_blocks import CrossAttentionDecoder 16 | from .attention_processors import FlashVDMCrossAttentionProcessor, CrossAttentionProcessor, \ 17 | FlashVDMTopMCrossAttentionProcessor 18 | from .model import ShapeVAE, VectsetVAE 19 | from .surface_extractors import SurfaceExtractors, MCSurfaceExtractor, DMCSurfaceExtractor, Latent2MeshOutput 20 | from .volume_decoders import HierarchicalVolumeDecoding, FlashVDMVolumeDecoding, VanillaVolumeDecoder 21 | -------------------------------------------------------------------------------- /hy3dpaint/cfgs/hunyuan-paint-pbr.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | base_learning_rate: 5.0e-05 3 | target: hunyuanpaintpbr.model.HunyuanPaint 4 | params: 5 | num_view: 6 6 | view_size: 512 7 | drop_cond_prob: 0.1 8 | 9 | noise_in_channels: 12 10 | 11 | stable_diffusion_config: 12 | pretrained_model_name_or_path: stabilityai/stable-diffusion-2-1 13 | custom_pipeline: ./hunyuanpaintpbr 14 | 15 | 16 | data: 17 | target: src.data.objaverse_hunyuan.DataModuleFromConfig 18 | params: 19 | batch_size: 1 20 | num_workers: 4 21 | train: 22 | - 23 | target: src.data.dataloader.objaverse_loader_forTexturePBR.TextureDataset 24 | params: 25 | num_view: 6 26 | json_path: train_examples/examples.json 27 | validation: 28 | - 29 | target: src.data.dataloader.objaverse_loader_forTexturePBR.TextureDataset 30 | params: 31 | num_view: 6 32 | json_path: train_examples/examples.json 33 | 34 | lightning: 35 | modelcheckpoint: 36 | params: 37 | every_n_train_steps: 10000 38 | save_top_k: -1 39 | save_last: true 40 | callbacks: {} 41 | 42 | trainer: 43 | benchmark: true 44 | max_epochs: -1 45 | gradient_clip_val: 1.0 46 | val_check_interval: 1000 47 | num_sanity_val_steps: 0 48 | accumulate_grad_batches: 1 49 | check_val_every_n_epoch: null # if not set this, validation does not run 50 | 51 | init_control_from: 52 | resume_from: 53 | -------------------------------------------------------------------------------- /hy3dpaint/utils/uvwrap_utils.py: -------------------------------------------------------------------------------- 1 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT 2 | # except for the third-party components listed below. 3 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined 4 | # in the repsective licenses of these third-party components. 5 | # Users must comply with all terms and conditions of original licenses of these third-party 6 | # components and must ensure that the usage of the third party components adheres to 7 | # all relevant laws and regulations. 8 | 9 | # For avoidance of doubts, Hunyuan 3D means the large language models and 10 | # their software and algorithms, including trained model weights, parameters (including 11 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code, 12 | # fine-tuning enabling code and other elements of the foregoing made publicly available 13 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT. 14 | 15 | import trimesh 16 | import xatlas 17 | 18 | 19 | def mesh_uv_wrap(mesh): 20 | if isinstance(mesh, trimesh.Scene): 21 | mesh = mesh.dump(concatenate=True) 22 | 23 | if len(mesh.faces) > 500000000: 24 | raise ValueError("The mesh has more than 500,000,000 faces, which is not supported.") 25 | 26 | vmapping, indices, uvs = xatlas.parametrize(mesh.vertices, mesh.faces) 27 | 28 | mesh.vertices = mesh.vertices[vmapping] 29 | mesh.faces = indices 30 | mesh.visual.uv = uvs 31 | 32 | return mesh 33 | -------------------------------------------------------------------------------- /hy3dshape/minimal_demo.py: -------------------------------------------------------------------------------- 1 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT 2 | # except for the third-party components listed below. 3 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined 4 | # in the repsective licenses of these third-party components. 5 | # Users must comply with all terms and conditions of original licenses of these third-party 6 | # components and must ensure that the usage of the third party components adheres to 7 | # all relevant laws and regulations. 8 | 9 | # For avoidance of doubts, Hunyuan 3D means the large language models and 10 | # their software and algorithms, including trained model weights, parameters (including 11 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code, 12 | # fine-tuning enabling code and other elements of the foregoing made publicly available 13 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT. 14 | 15 | from PIL import Image 16 | 17 | from hy3dshape.rembg import BackgroundRemover 18 | from hy3dshape.pipelines import Hunyuan3DDiTFlowMatchingPipeline 19 | 20 | model_path = 'tencent/Hunyuan3D-2.1' 21 | pipeline_shapegen = Hunyuan3DDiTFlowMatchingPipeline.from_pretrained(model_path) 22 | 23 | image_path = 'demos/demo.png' 24 | image = Image.open(image_path).convert("RGBA") 25 | if image.mode == 'RGB': 26 | rembg = BackgroundRemover() 27 | image = rembg(image) 28 | 29 | mesh = pipeline_shapegen(image=image)[0] 30 | mesh.export('demo.glb') 31 | -------------------------------------------------------------------------------- /hy3dpaint/hunyuanpaintpbr/__init__.py: -------------------------------------------------------------------------------- 1 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT 2 | # except for the third-party components listed below. 3 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined 4 | # in the repsective licenses of these third-party components. 5 | # Users must comply with all terms and conditions of original licenses of these third-party 6 | # components and must ensure that the usage of the third party components adheres to 7 | # all relevant laws and regulations. 8 | 9 | # For avoidance of doubts, Hunyuan 3D means the large language models and 10 | # their software and algorithms, including trained model weights, parameters (including 11 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code, 12 | # fine-tuning enabling code and other elements of the foregoing made publicly available 13 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT. 14 | 15 | from .pipeline import HunyuanPaintPipeline 16 | from .unet.model import HunyuanPaint 17 | from .unet.modules import ( 18 | Dino_v2, 19 | Basic2p5DTransformerBlock, 20 | ImageProjModel, 21 | UNet2p5DConditionModel, 22 | ) 23 | from .unet.attn_processor import ( 24 | PoseRoPEAttnProcessor2_0, 25 | SelfAttnProcessor2_0, 26 | RefAttnProcessor2_0, 27 | ) 28 | 29 | __all__ = [ 30 | 'HunyuanPaintPipeline', 31 | 'HunyuanPaint', 32 | 'Dino_v2', 33 | 'Basic2p5DTransformerBlock', 34 | 'ImageProjModel', 35 | 'UNet2p5DConditionModel', 36 | 'PoseRoPEAttnProcessor2_0', 37 | 'SelfAttnProcessor2_0', 38 | 'RefAttnProcessor2_0', 39 | ] 40 | -------------------------------------------------------------------------------- /hy3dpaint/custom_rasterizer/custom_rasterizer/render.py: -------------------------------------------------------------------------------- 1 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT 2 | # except for the third-party components listed below. 3 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined 4 | # in the repsective licenses of these third-party components. 5 | # Users must comply with all terms and conditions of original licenses of these third-party 6 | # components and must ensure that the usage of the third party components adheres to 7 | # all relevant laws and regulations. 8 | 9 | # For avoidance of doubts, Hunyuan 3D means the large language models and 10 | # their software and algorithms, including trained model weights, parameters (including 11 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code, 12 | # fine-tuning enabling code and other elements of the foregoing made publicly available 13 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT. 14 | 15 | import custom_rasterizer_kernel 16 | import torch 17 | 18 | 19 | def rasterize(pos, tri, resolution, clamp_depth=torch.zeros(0), use_depth_prior=0): 20 | assert pos.device == tri.device 21 | findices, barycentric = custom_rasterizer_kernel.rasterize_image( 22 | pos[0], tri, clamp_depth, resolution[1], resolution[0], 1e-6, use_depth_prior 23 | ) 24 | return findices, barycentric 25 | 26 | 27 | def interpolate(col, findices, barycentric, tri): 28 | f = findices - 1 + (findices == 0) 29 | vcol = col[0, tri.long()[f.long()]] 30 | result = barycentric.view(*barycentric.shape, 1) * vcol 31 | result = torch.sum(result, axis=-2) 32 | return result.view(1, *result.shape) 33 | -------------------------------------------------------------------------------- /hy3dpaint/custom_rasterizer/setup.py: -------------------------------------------------------------------------------- 1 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT 2 | # except for the third-party components listed below. 3 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined 4 | # in the repsective licenses of these third-party components. 5 | # Users must comply with all terms and conditions of original licenses of these third-party 6 | # components and must ensure that the usage of the third party components adheres to 7 | # all relevant laws and regulations. 8 | 9 | # For avoidance of doubts, Hunyuan 3D means the large language models and 10 | # their software and algorithms, including trained model weights, parameters (including 11 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code, 12 | # fine-tuning enabling code and other elements of the foregoing made publicly available 13 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT. 14 | 15 | from setuptools import setup, find_packages 16 | import torch 17 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension, CppExtension 18 | 19 | # build custom rasterizer 20 | 21 | custom_rasterizer_module = CUDAExtension( 22 | "custom_rasterizer_kernel", 23 | [ 24 | "lib/custom_rasterizer_kernel/rasterizer.cpp", 25 | "lib/custom_rasterizer_kernel/grid_neighbor.cpp", 26 | "lib/custom_rasterizer_kernel/rasterizer_gpu.cu", 27 | ], 28 | ) 29 | 30 | setup( 31 | packages=find_packages(), 32 | version="0.1", 33 | name="custom_rasterizer", 34 | include_package_data=True, 35 | package_dir={"": "."}, 36 | ext_modules=[ 37 | custom_rasterizer_module, 38 | ], 39 | cmdclass={"build_ext": BuildExtension}, 40 | ) 41 | -------------------------------------------------------------------------------- /hy3dshape/hy3dshape/models/__init__.py: -------------------------------------------------------------------------------- 1 | # Open Source Model Licensed under the Apache License Version 2.0 2 | # and Other Licenses of the Third-Party Components therein: 3 | # The below Model in this distribution may have been modified by THL A29 Limited 4 | # ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited. 5 | 6 | # Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved. 7 | # The below software and/or models in this distribution may have been 8 | # modified by THL A29 Limited ("Tencent Modifications"). 9 | # All Tencent Modifications are Copyright (C) THL A29 Limited. 10 | 11 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT 12 | # except for the third-party components listed below. 13 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined 14 | # in the repsective licenses of these third-party components. 15 | # Users must comply with all terms and conditions of original licenses of these third-party 16 | # components and must ensure that the usage of the third party components adheres to 17 | # all relevant laws and regulations. 18 | 19 | # For avoidance of doubts, Hunyuan 3D means the large language models and 20 | # their software and algorithms, including trained model weights, parameters (including 21 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code, 22 | # fine-tuning enabling code and other elements of the foregoing made publicly available 23 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT. 24 | 25 | 26 | from .autoencoders import ShapeVAE 27 | from .conditioner import DualImageEncoder, SingleImageEncoder, DinoImageEncoder, CLIPImageEncoder 28 | from .denoisers import Hunyuan3DDiT 29 | -------------------------------------------------------------------------------- /hy3dpaint/demo.py: -------------------------------------------------------------------------------- 1 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT 2 | # except for the third-party components listed below. 3 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined 4 | # in the repsective licenses of these third-party components. 5 | # Users must comply with all terms and conditions of original licenses of these third-party 6 | # components and must ensure that the usage of the third party components adheres to 7 | # all relevant laws and regulations. 8 | 9 | # For avoidance of doubts, Hunyuan 3D means the large language models and 10 | # their software and algorithms, including trained model weights, parameters (including 11 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code, 12 | # fine-tuning enabling code and other elements of the foregoing made publicly available 13 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT. 14 | 15 | from textureGenPipeline import Hunyuan3DPaintPipeline, Hunyuan3DPaintConfig 16 | 17 | try: 18 | from utils.torchvision_fix import apply_fix 19 | 20 | apply_fix() 21 | except ImportError: 22 | print("Warning: torchvision_fix module not found, proceeding without compatibility fix") 23 | except Exception as e: 24 | print(f"Warning: Failed to apply torchvision fix: {e}") 25 | 26 | 27 | if __name__ == "__main__": 28 | 29 | max_num_view = 6 # can be 6 to 9 30 | resolution = 768 # can be 768 or 512 31 | 32 | conf = Hunyuan3DPaintConfig(max_num_view, resolution) 33 | paint_pipeline = Hunyuan3DPaintPipeline(conf) 34 | output_mesh_path = paint_pipeline(mesh_path="./assets/FireElementalMonster.obj", image_path="./assets/FireElementalMonster.png") 35 | print(f"Output mesh path: {output_mesh_path}") 36 | -------------------------------------------------------------------------------- /hy3dshape/hy3dshape/meshlib.py: -------------------------------------------------------------------------------- 1 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT 2 | # except for the third-party components listed below. 3 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined 4 | # in the repsective licenses of these third-party components. 5 | # Users must comply with all terms and conditions of original licenses of these third-party 6 | # components and must ensure that the usage of the third party components adheres to 7 | # all relevant laws and regulations. 8 | 9 | # For avoidance of doubts, Hunyuan 3D means the large language models and 10 | # their software and algorithms, including trained model weights, parameters (including 11 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code, 12 | # fine-tuning enabling code and other elements of the foregoing made publicly available 13 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT. 14 | 15 | import numpy as np 16 | import meshlib.mrmeshnumpy as mrmeshnumpy 17 | import meshlib.mrmeshpy as mrmeshpy 18 | import trimesh 19 | 20 | def postprocessmesh(vertices: np.array, faces: np.array, settings): 21 | print('Generating Meshlib Mesh ...') 22 | mesh = mrmeshnumpy.meshFromFacesVerts(faces, vertices) 23 | print('Packing Optimally ...') 24 | mesh.packOptimally() 25 | print('Decimating ...') 26 | mrmeshpy.decimateMesh(mesh, settings) 27 | 28 | out_verts = mrmeshnumpy.getNumpyVerts(mesh) 29 | out_faces = mrmeshnumpy.getNumpyFaces(mesh.topology) 30 | 31 | mesh = trimesh.Trimesh(vertices=out_verts, faces=out_faces) 32 | print(f"Reduced faces, resulting in {mesh.vertices.shape[0]} vertices and {mesh.faces.shape[0]} faces") 33 | 34 | return mesh 35 | 36 | 37 | -------------------------------------------------------------------------------- /hy3dpaint/utils/simplify_mesh_utils.py: -------------------------------------------------------------------------------- 1 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT 2 | # except for the third-party components listed below. 3 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined 4 | # in the repsective licenses of these third-party components. 5 | # Users must comply with all terms and conditions of original licenses of these third-party 6 | # components and must ensure that the usage of the third party components adheres to 7 | # all relevant laws and regulations. 8 | 9 | # For avoidance of doubts, Hunyuan 3D means the large language models and 10 | # their software and algorithms, including trained model weights, parameters (including 11 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code, 12 | # fine-tuning enabling code and other elements of the foregoing made publicly available 13 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT. 14 | 15 | import trimesh 16 | import pymeshlab 17 | 18 | 19 | def remesh_mesh(mesh_path, remesh_path): 20 | mesh = mesh_simplify_trimesh(mesh_path, remesh_path) 21 | 22 | 23 | def mesh_simplify_trimesh(inputpath, outputpath, target_count=50000): 24 | # 先去除离散面 25 | ms = pymeshlab.MeshSet() 26 | if inputpath.endswith(".glb"): 27 | ms.load_new_mesh(inputpath, load_in_a_single_layer=True) 28 | else: 29 | ms.load_new_mesh(inputpath) 30 | ms.save_current_mesh(outputpath.replace(".glb", ".obj"), save_textures=False) 31 | # 调用减面函数 32 | courent = trimesh.load(outputpath.replace(".glb", ".obj"), force="mesh") 33 | face_num = courent.faces.shape[0] 34 | 35 | if face_num > target_count: 36 | courent = courent.simplify_quadric_decimation(target_count) 37 | courent.export(outputpath) 38 | -------------------------------------------------------------------------------- /hy3dpaint/utils/image_super_utils.py: -------------------------------------------------------------------------------- 1 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT 2 | # except for the third-party components listed below. 3 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined 4 | # in the repsective licenses of these third-party components. 5 | # Users must comply with all terms and conditions of original licenses of these third-party 6 | # components and must ensure that the usage of the third party components adheres to 7 | # all relevant laws and regulations. 8 | 9 | # For avoidance of doubts, Hunyuan 3D means the large language models and 10 | # their software and algorithms, including trained model weights, parameters (including 11 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code, 12 | # fine-tuning enabling code and other elements of the foregoing made publicly available 13 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT. 14 | 15 | import numpy as np 16 | from PIL import Image 17 | 18 | 19 | class imageSuperNet: 20 | def __init__(self, config) -> None: 21 | from realesrgan import RealESRGANer 22 | from basicsr.archs.rrdbnet_arch import RRDBNet 23 | 24 | model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=4) 25 | upsampler = RealESRGANer( 26 | scale=4, 27 | model_path=config.realesrgan_ckpt_path, 28 | dni_weight=None, 29 | model=model, 30 | tile=0, 31 | tile_pad=10, 32 | pre_pad=0, 33 | half=True, 34 | gpu_id=None, 35 | ) 36 | self.upsampler = upsampler 37 | 38 | def __call__(self, image): 39 | output, _ = self.upsampler.enhance(np.array(image)) 40 | output = Image.fromarray(output) 41 | return output 42 | -------------------------------------------------------------------------------- /configs/dit_config.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | target: .hy3dgen.shapegen.models.Hunyuan3DDiT 3 | params: 4 | in_channels: 64 5 | context_in_dim: 1536 6 | hidden_size: 1024 7 | mlp_ratio: 4.0 8 | num_heads: 16 9 | depth: 16 10 | depth_single_blocks: 32 11 | axes_dim: [ 64 ] 12 | theta: 10000 13 | qkv_bias: True 14 | guidance_embed: False 15 | 16 | vae: 17 | target: .hy3dgen.shapegen.models.ShapeVAE 18 | params: 19 | num_latents: 3072 20 | embed_dim: 64 21 | num_freqs: 8 22 | include_pi: false 23 | heads: 16 24 | width: 1024 25 | num_decoder_layers: 16 26 | qkv_bias: false 27 | qk_norm: true 28 | scale_factor: 0.9990943042622529 29 | 30 | conditioner: 31 | target: .hy3dgen.shapegen.models.SingleImageEncoder 32 | params: 33 | main_image_encoder: 34 | type: DinoImageEncoder # dino giant 35 | kwargs: 36 | config: 37 | attention_probs_dropout_prob: 0.0 38 | drop_path_rate: 0.0 39 | hidden_act: gelu 40 | hidden_dropout_prob: 0.0 41 | hidden_size: 1536 42 | image_size: 518 43 | initializer_range: 0.02 44 | layer_norm_eps: 1.e-6 45 | layerscale_value: 1.0 46 | mlp_ratio: 4 47 | model_type: dinov2 48 | num_attention_heads: 24 49 | num_channels: 3 50 | num_hidden_layers: 40 51 | patch_size: 14 52 | qkv_bias: true 53 | torch_dtype: float32 54 | use_swiglu_ffn: true 55 | image_size: 518 56 | 57 | scheduler: 58 | target: .hy3dgen.shapegen.schedulers.FlowMatchEulerDiscreteScheduler 59 | params: 60 | num_train_timesteps: 1000 61 | 62 | image_processor: 63 | target: .hy3dgen.shapegen.preprocessors.ImageProcessorV2 64 | params: 65 | size: 512 66 | border_ratio: 0.15 67 | 68 | pipeline: 69 | target: .hy3dgen.shapegen.pipelines.Hunyuan3DDiTFlowMatchingPipeline 70 | -------------------------------------------------------------------------------- /configs/dit_config_mini.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | target: .hy3dgen.shapegen.models.Hunyuan3DDiT 3 | params: 4 | in_channels: 64 5 | context_in_dim: 1536 6 | hidden_size: 1024 7 | mlp_ratio: 4.0 8 | num_heads: 16 9 | depth: 8 10 | depth_single_blocks: 16 11 | axes_dim: [ 64 ] 12 | theta: 10000 13 | qkv_bias: True 14 | guidance_embed: False 15 | 16 | vae: 17 | target: .hy3dgen.shapegen.models.ShapeVAE 18 | params: 19 | num_latents: 512 20 | embed_dim: 64 21 | num_freqs: 8 22 | include_pi: false 23 | heads: 16 24 | width: 1024 25 | num_decoder_layers: 16 26 | qkv_bias: false 27 | qk_norm: true 28 | scale_factor: 1.0188137142395404 29 | 30 | conditioner: 31 | target: .hy3dgen.shapegen.models.SingleImageEncoder 32 | params: 33 | main_image_encoder: 34 | type: DinoImageEncoder # dino giant 35 | kwargs: 36 | config: 37 | attention_probs_dropout_prob: 0.0 38 | drop_path_rate: 0.0 39 | hidden_act: gelu 40 | hidden_dropout_prob: 0.0 41 | hidden_size: 1536 42 | image_size: 518 43 | initializer_range: 0.02 44 | layer_norm_eps: 1.e-6 45 | layerscale_value: 1.0 46 | mlp_ratio: 4 47 | model_type: dinov2 48 | num_attention_heads: 24 49 | num_channels: 3 50 | num_hidden_layers: 40 51 | patch_size: 14 52 | qkv_bias: true 53 | torch_dtype: float32 54 | use_swiglu_ffn: true 55 | image_size: 518 56 | 57 | scheduler: 58 | target: .hy3dgen.shapegen.schedulers.FlowMatchEulerDiscreteScheduler 59 | params: 60 | num_train_timesteps: 1000 61 | 62 | image_processor: 63 | target: .hy3dgen.shapegen.preprocessors.ImageProcessorV2 64 | params: 65 | size: 512 66 | border_ratio: 0.15 67 | 68 | pipeline: 69 | target: .hy3dgen.shapegen.pipelines.Hunyuan3DDiTFlowMatchingPipeline 70 | -------------------------------------------------------------------------------- /hy3dshape/minimal_vae_demo.py: -------------------------------------------------------------------------------- 1 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT 2 | # except for the third-party components listed below. 3 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined 4 | # in the repsective licenses of these third-party components. 5 | # Users must comply with all terms and conditions of original licenses of these third-party 6 | # components and must ensure that the usage of the third party components adheres to 7 | # all relevant laws and regulations. 8 | 9 | # For avoidance of doubts, Hunyuan 3D means the large language models and 10 | # their software and algorithms, including trained model weights, parameters (including 11 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code, 12 | # fine-tuning enabling code and other elements of the foregoing made publicly available 13 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT. 14 | 15 | import torch 16 | 17 | from hy3dshape.surface_loaders import SharpEdgeSurfaceLoader 18 | from hy3dshape.models.autoencoders import ShapeVAE 19 | from hy3dshape.pipelines import export_to_trimesh 20 | 21 | 22 | vae = ShapeVAE.from_pretrained( 23 | 'tencent/Hunyuan3D-2.1', 24 | use_safetensors=False, 25 | variant='fp16', 26 | ) 27 | 28 | 29 | loader = SharpEdgeSurfaceLoader( 30 | num_sharp_points=0, 31 | num_uniform_points=81920, 32 | ) 33 | mesh_demo = 'demos/demo.glb' 34 | surface = loader(mesh_demo).to('cuda', dtype=torch.float16) 35 | print(surface.shape) 36 | 37 | latents = vae.encode(surface) 38 | latents = vae.decode(latents) 39 | mesh = vae.latents2mesh( 40 | latents, 41 | output_type='trimesh', 42 | bounds=1.01, 43 | mc_level=0.0, 44 | num_chunks=20000, 45 | octree_resolution=256, 46 | mc_algo='mc', 47 | enable_pbar=True 48 | ) 49 | 50 | mesh = export_to_trimesh(mesh)[0] 51 | mesh.export('output.obj') 52 | -------------------------------------------------------------------------------- /hy3dpaint/src/utils/train_util.py: -------------------------------------------------------------------------------- 1 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT 2 | # except for the third-party components listed below. 3 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined 4 | # in the repsective licenses of these third-party components. 5 | # Users must comply with all terms and conditions of original licenses of these third-party 6 | # components and must ensure that the usage of the third party components adheres to 7 | # all relevant laws and regulations. 8 | 9 | # For avoidance of doubts, Hunyuan 3D means the large language models and 10 | # their software and algorithms, including trained model weights, parameters (including 11 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code, 12 | # fine-tuning enabling code and other elements of the foregoing made publicly available 13 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT. 14 | 15 | import importlib 16 | 17 | 18 | def count_params(model, verbose=False): 19 | total_params = sum(p.numel() for p in model.parameters()) 20 | if verbose: 21 | print(f"{model.__class__.__name__} has {total_params*1.e-6:.2f} M params.") 22 | return total_params 23 | 24 | 25 | def instantiate_from_config(config): 26 | if not "target" in config: 27 | if config == "__is_first_stage__": 28 | return None 29 | elif config == "__is_unconditional__": 30 | return None 31 | raise KeyError("Expected key `target` to instantiate.") 32 | return get_obj_from_str(config["target"])(**config.get("params", dict())) 33 | 34 | 35 | def get_obj_from_str(string, reload=False): 36 | module, cls = string.rsplit(".", 1) 37 | if reload: 38 | module_imp = importlib.import_module(module) 39 | importlib.reload(module_imp) 40 | return getattr(importlib.import_module(module, package=None), cls) 41 | -------------------------------------------------------------------------------- /hy3dshape/hy3dshape/utils/visualizers/html_util.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT 4 | # except for the third-party components listed below. 5 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined 6 | # in the repsective licenses of these third-party components. 7 | # Users must comply with all terms and conditions of original licenses of these third-party 8 | # components and must ensure that the usage of the third party components adheres to 9 | # all relevant laws and regulations. 10 | 11 | # For avoidance of doubts, Hunyuan 3D means the large language models and 12 | # their software and algorithms, including trained model weights, parameters (including 13 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code, 14 | # fine-tuning enabling code and other elements of the foregoing made publicly available 15 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT. 16 | 17 | import io 18 | import base64 19 | import numpy as np 20 | from PIL import Image 21 | 22 | 23 | def to_html_frame(content): 24 | 25 | html_frame = f""" 26 | 27 | 28 | {content} 29 | 30 | 31 | """ 32 | 33 | return html_frame 34 | 35 | 36 | def to_single_row_table(caption: str, content: str): 37 | 38 | table_html = f""" 39 | 40 | 41 | 42 | 43 | 44 |
{caption}
{content}
45 | """ 46 | 47 | return table_html 48 | 49 | 50 | def to_image_embed_tag(image: np.ndarray): 51 | 52 | # Convert np.ndarray to bytes 53 | img = Image.fromarray(image) 54 | raw_bytes = io.BytesIO() 55 | img.save(raw_bytes, "PNG") 56 | 57 | # Encode bytes to base64 58 | image_base64 = base64.b64encode(raw_bytes.getvalue()).decode("utf-8") 59 | 60 | image_tag = f""" 61 | Embedded Image 62 | """ 63 | 64 | return image_tag 65 | -------------------------------------------------------------------------------- /hy3dpaint/custom_rasterizer/lib/custom_rasterizer_kernel/rasterizer.h: -------------------------------------------------------------------------------- 1 | #ifndef RASTERIZER_H_ 2 | #define RASTERIZER_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | #include // For CUDA context 8 | #include 9 | #define INT64 uint64_t 10 | #define MAXINT 2147483647 11 | 12 | __host__ __device__ inline float calculateSignedArea2(float* a, float* b, float* c) { 13 | return ((c[0] - a[0]) * (b[1] - a[1]) - (b[0] - a[0]) * (c[1] - a[1])); 14 | } 15 | 16 | __host__ __device__ inline void calculateBarycentricCoordinate(float* a, float* b, float* c, float* p, 17 | float* barycentric) 18 | { 19 | float beta_tri = calculateSignedArea2(a, p, c); 20 | float gamma_tri = calculateSignedArea2(a, b, p); 21 | float area = calculateSignedArea2(a, b, c); 22 | if (area == 0) { 23 | barycentric[0] = -1.0; 24 | barycentric[1] = -1.0; 25 | barycentric[2] = -1.0; 26 | return; 27 | } 28 | float tri_inv = 1.0 / area; 29 | float beta = beta_tri * tri_inv; 30 | float gamma = gamma_tri * tri_inv; 31 | float alpha = 1.0 - beta - gamma; 32 | barycentric[0] = alpha; 33 | barycentric[1] = beta; 34 | barycentric[2] = gamma; 35 | } 36 | 37 | __host__ __device__ inline bool isBarycentricCoordInBounds(float* barycentricCoord) { 38 | return barycentricCoord[0] >= 0.0 && barycentricCoord[0] <= 1.0 && 39 | barycentricCoord[1] >= 0.0 && barycentricCoord[1] <= 1.0 && 40 | barycentricCoord[2] >= 0.0 && barycentricCoord[2] <= 1.0; 41 | } 42 | 43 | std::vector rasterize_image_gpu(torch::Tensor V, torch::Tensor F, torch::Tensor D, 44 | int width, int height, float occlusion_truncation, int use_depth_prior); 45 | 46 | std::vector> build_hierarchy(std::vector view_layer_positions, std::vector view_layer_normals, int num_level, int resolution); 47 | 48 | std::vector> build_hierarchy_with_feat( 49 | std::vector view_layer_positions, 50 | std::vector view_layer_normals, 51 | std::vector view_layer_feats, 52 | int num_level, int resolution); 53 | 54 | #endif -------------------------------------------------------------------------------- /hy3dshape/hy3dshape/models/diffusion/transport/utils.py: -------------------------------------------------------------------------------- 1 | # This file includes code derived from the SiT project (https://github.com/willisma/SiT), 2 | # which is licensed under the MIT License. 3 | # 4 | # MIT License 5 | # 6 | # Copyright (c) Meta Platforms, Inc. and affiliates. 7 | # 8 | # Permission is hereby granted, free of charge, to any person obtaining a copy 9 | # of this software and associated documentation files (the "Software"), to deal 10 | # in the Software without restriction, including without limitation the rights 11 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | # copies of the Software, and to permit persons to whom the Software is 13 | # furnished to do so, subject to the following conditions: 14 | # 15 | # The above copyright notice and this permission notice shall be included in all 16 | # copies or substantial portions of the Software. 17 | # 18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 | # SOFTWARE. 25 | 26 | import torch as th 27 | 28 | class EasyDict: 29 | 30 | def __init__(self, sub_dict): 31 | for k, v in sub_dict.items(): 32 | setattr(self, k, v) 33 | 34 | def __getitem__(self, key): 35 | return getattr(self, key) 36 | 37 | def mean_flat(x): 38 | """ 39 | Take the mean over all non-batch dimensions. 40 | """ 41 | return th.mean(x, dim=list(range(1, len(x.size())))) 42 | 43 | def log_state(state): 44 | result = [] 45 | 46 | sorted_state = dict(sorted(state.items())) 47 | for key, value in sorted_state.items(): 48 | # Check if the value is an instance of a class 49 | if " 0: 39 | if n % self.verbosity_interval == 0: 40 | print(f"current step: {n}, recent lr-multiplier: {self.f_start}") 41 | if n < self.lr_warm_up_steps: 42 | f = (self.f_max - self.f_start) / self.lr_warm_up_steps * n + self.f_start 43 | self.last_f = f 44 | return f 45 | else: 46 | t = (n - self.lr_warm_up_steps) / (self.lr_max_decay_steps - self.lr_warm_up_steps) 47 | t = min(t, 1.0) 48 | f = self.f_min + 0.5 * (self.f_max - self.f_min) * (1 + np.cos(t * np.pi)) 49 | self.last_f = f 50 | return f 51 | 52 | def __call__(self, n, **kwargs): 53 | return self.schedule(n, **kwargs) 54 | -------------------------------------------------------------------------------- /hy3dshape/hy3dshape/utils/ema.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | 4 | 5 | class LitEma(nn.Module): 6 | def __init__(self, model, decay=0.9999, use_num_updates=True): 7 | super().__init__() 8 | if decay < 0.0 or decay > 1.0: 9 | raise ValueError('Decay must be between 0 and 1') 10 | 11 | self.m_name2s_name = {} 12 | self.register_buffer('decay', torch.tensor(decay, dtype=torch.float32)) 13 | self.register_buffer('num_updates', torch.tensor(0, dtype=torch.int) if use_num_updates 14 | else torch.tensor(-1, dtype=torch.int)) 15 | 16 | for name, p in model.named_parameters(): 17 | if p.requires_grad: 18 | # remove as '.'-character is not allowed in buffers 19 | s_name = name.replace('.', '_____') 20 | self.m_name2s_name.update({name: s_name}) 21 | self.register_buffer(s_name, p.clone().detach().data) 22 | 23 | self.collected_params = [] 24 | 25 | def forward(self, model): 26 | decay = self.decay 27 | 28 | if self.num_updates >= 0: 29 | self.num_updates += 1 30 | decay = min(self.decay, (1 + self.num_updates) / (10 + self.num_updates)) 31 | 32 | one_minus_decay = 1.0 - decay 33 | 34 | with torch.no_grad(): 35 | m_param = dict(model.named_parameters()) 36 | shadow_params = dict(self.named_buffers()) 37 | 38 | for key in m_param: 39 | if m_param[key].requires_grad: 40 | sname = self.m_name2s_name[key] 41 | shadow_params[sname] = shadow_params[sname].type_as(m_param[key]) 42 | shadow_params[sname].sub_(one_minus_decay * (shadow_params[sname] - m_param[key])) 43 | else: 44 | assert not key in self.m_name2s_name 45 | 46 | def copy_to(self, model): 47 | m_param = dict(model.named_parameters()) 48 | shadow_params = dict(self.named_buffers()) 49 | for key in m_param: 50 | if m_param[key].requires_grad: 51 | m_param[key].data.copy_(shadow_params[self.m_name2s_name[key]].data) 52 | else: 53 | assert not key in self.m_name2s_name 54 | 55 | def store(self, model): 56 | """ 57 | Save the current parameters for restoring later. 58 | Args: 59 | parameters: Iterable of `torch.nn.Parameter`; the parameters to be 60 | temporarily stored. 61 | """ 62 | self.collected_params = [param.clone() for param in model.parameters()] 63 | 64 | def restore(self, model): 65 | """ 66 | Restore the parameters stored with the `store` method. 67 | Useful to validate the model with EMA parameters without affecting the 68 | original optimization process. Store the parameters before the 69 | `copy_to` method. After validation (or model saving), use this to 70 | restore the former parameters. 71 | Args: 72 | parameters: Iterable of `torch.nn.Parameter`; the parameters to be 73 | updated with the stored parameters. 74 | """ 75 | for c_param, param in zip(self.collected_params, model.parameters()): 76 | param.data.copy_(c_param.data) 77 | -------------------------------------------------------------------------------- /hy3dpaint/src/data/objaverse_hunyuan.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT 4 | # except for the third-party components listed below. 5 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined 6 | # in the repsective licenses of these third-party components. 7 | # Users must comply with all terms and conditions of original licenses of these third-party 8 | # components and must ensure that the usage of the third party components adheres to 9 | # all relevant laws and regulations. 10 | 11 | # For avoidance of doubts, Hunyuan 3D means the large language models and 12 | # their software and algorithms, including trained model weights, parameters (including 13 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code, 14 | # fine-tuning enabling code and other elements of the foregoing made publicly available 15 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT. 16 | 17 | import pytorch_lightning as pl 18 | from torch.utils.data import Dataset, ConcatDataset, DataLoader 19 | from torch.utils.data.distributed import DistributedSampler 20 | 21 | 22 | class DataModuleFromConfig(pl.LightningDataModule): 23 | def __init__( 24 | self, 25 | batch_size=8, 26 | num_workers=4, 27 | train=None, 28 | validation=None, 29 | test=None, 30 | **kwargs, 31 | ): 32 | super().__init__() 33 | 34 | self.batch_size = batch_size 35 | self.num_workers = num_workers 36 | 37 | self.dataset_configs = dict() 38 | if train is not None: 39 | self.dataset_configs["train"] = train 40 | if validation is not None: 41 | self.dataset_configs["validation"] = validation 42 | if test is not None: 43 | self.dataset_configs["test"] = test 44 | 45 | def setup(self, stage): 46 | from src.utils.train_util import instantiate_from_config 47 | 48 | if stage in ["fit"]: 49 | dataset_dict = {} 50 | for k in self.dataset_configs: 51 | dataset_dict[k] = [] 52 | for loader in self.dataset_configs[k]: 53 | dataset_dict[k].append(instantiate_from_config(loader)) 54 | self.datasets = dataset_dict 55 | print(self.datasets) 56 | else: 57 | raise NotImplementedError 58 | 59 | def train_dataloader(self): 60 | datasets = ConcatDataset(self.datasets["train"]) 61 | sampler = DistributedSampler(datasets) 62 | return DataLoader( 63 | datasets, 64 | batch_size=self.batch_size, 65 | num_workers=self.num_workers, 66 | shuffle=False, 67 | sampler=sampler, 68 | prefetch_factor=2, 69 | pin_memory=True, 70 | ) 71 | 72 | def val_dataloader(self): 73 | datasets = ConcatDataset(self.datasets["validation"]) 74 | sampler = DistributedSampler(datasets) 75 | return DataLoader(datasets, batch_size=4, num_workers=self.num_workers, shuffle=False, sampler=sampler) 76 | 77 | def test_dataloader(self): 78 | datasets = ConcatDataset(self.datasets["test"]) 79 | return DataLoader(datasets, batch_size=self.batch_size, num_workers=self.num_workers, shuffle=False) 80 | -------------------------------------------------------------------------------- /hy3dshape/hy3dshape/models/diffusion/transport/__init__.py: -------------------------------------------------------------------------------- 1 | # This file includes code derived from the SiT project (https://github.com/willisma/SiT), 2 | # which is licensed under the MIT License. 3 | # 4 | # MIT License 5 | # 6 | # Copyright (c) Meta Platforms, Inc. and affiliates. 7 | # 8 | # Permission is hereby granted, free of charge, to any person obtaining a copy 9 | # of this software and associated documentation files (the "Software"), to deal 10 | # in the Software without restriction, including without limitation the rights 11 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | # copies of the Software, and to permit persons to whom the Software is 13 | # furnished to do so, subject to the following conditions: 14 | # 15 | # The above copyright notice and this permission notice shall be included in all 16 | # copies or substantial portions of the Software. 17 | # 18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 | # SOFTWARE. 25 | 26 | from .transport import Transport, ModelType, WeightType, PathType, Sampler 27 | 28 | 29 | def create_transport( 30 | path_type='Linear', 31 | prediction="velocity", 32 | loss_weight=None, 33 | train_eps=None, 34 | sample_eps=None, 35 | train_sample_type="uniform", 36 | mean = 0.0, 37 | std = 1.0, 38 | shift_scale = 1.0, 39 | ): 40 | """function for creating Transport object 41 | **Note**: model prediction defaults to velocity 42 | Args: 43 | - path_type: type of path to use; default to linear 44 | - learn_score: set model prediction to score 45 | - learn_noise: set model prediction to noise 46 | - velocity_weighted: weight loss by velocity weight 47 | - likelihood_weighted: weight loss by likelihood weight 48 | - train_eps: small epsilon for avoiding instability during training 49 | - sample_eps: small epsilon for avoiding instability during sampling 50 | """ 51 | 52 | if prediction == "noise": 53 | model_type = ModelType.NOISE 54 | elif prediction == "score": 55 | model_type = ModelType.SCORE 56 | else: 57 | model_type = ModelType.VELOCITY 58 | 59 | if loss_weight == "velocity": 60 | loss_type = WeightType.VELOCITY 61 | elif loss_weight == "likelihood": 62 | loss_type = WeightType.LIKELIHOOD 63 | else: 64 | loss_type = WeightType.NONE 65 | 66 | path_choice = { 67 | "Linear": PathType.LINEAR, 68 | "GVP": PathType.GVP, 69 | "VP": PathType.VP, 70 | } 71 | 72 | path_type = path_choice[path_type] 73 | 74 | if (path_type in [PathType.VP]): 75 | train_eps = 1e-5 if train_eps is None else train_eps 76 | sample_eps = 1e-3 if train_eps is None else sample_eps 77 | elif (path_type in [PathType.GVP, PathType.LINEAR] and model_type != ModelType.VELOCITY): 78 | train_eps = 1e-3 if train_eps is None else train_eps 79 | sample_eps = 1e-3 if train_eps is None else sample_eps 80 | else: # velocity & [GVP, LINEAR] is stable everywhere 81 | train_eps = 0 82 | sample_eps = 0 83 | 84 | # create flow state 85 | state = Transport( 86 | model_type=model_type, 87 | path_type=path_type, 88 | loss_type=loss_type, 89 | train_eps=train_eps, 90 | sample_eps=sample_eps, 91 | train_sample_type=train_sample_type, 92 | mean=mean, 93 | std=std, 94 | shift_scale =shift_scale, 95 | ) 96 | 97 | return state 98 | -------------------------------------------------------------------------------- /hy3dshape/hy3dshape/utils/trainings/peft.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT 4 | # except for the third-party components listed below. 5 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined 6 | # in the repsective licenses of these third-party components. 7 | # Users must comply with all terms and conditions of original licenses of these third-party 8 | # components and must ensure that the usage of the third party components adheres to 9 | # all relevant laws and regulations. 10 | 11 | # For avoidance of doubts, Hunyuan 3D means the large language models and 12 | # their software and algorithms, including trained model weights, parameters (including 13 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code, 14 | # fine-tuning enabling code and other elements of the foregoing made publicly available 15 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT. 16 | 17 | import os 18 | from pytorch_lightning.callbacks import Callback 19 | from omegaconf import OmegaConf, ListConfig 20 | 21 | class PeftSaveCallback(Callback): 22 | def __init__(self, peft_model, save_dir: str, save_every_n_steps: int = None): 23 | super().__init__() 24 | self.peft_model = peft_model 25 | self.save_dir = save_dir 26 | self.save_every_n_steps = save_every_n_steps 27 | os.makedirs(self.save_dir, exist_ok=True) 28 | 29 | def recursive_convert(self, obj): 30 | from omegaconf import OmegaConf, ListConfig 31 | if isinstance(obj, (OmegaConf, ListConfig)): 32 | return OmegaConf.to_container(obj, resolve=True) 33 | elif isinstance(obj, dict): 34 | return {k: self.recursive_convert(v) for k, v in obj.items()} 35 | elif isinstance(obj, list): 36 | return [self.recursive_convert(i) for i in obj] 37 | elif isinstance(obj, type): 38 | # 避免修改类对象 39 | return obj 40 | elif hasattr(obj, '__dict__'): 41 | for attr_name, attr_value in vars(obj).items(): 42 | setattr(obj, attr_name, self.recursive_convert(attr_value)) 43 | return obj 44 | else: 45 | return obj 46 | 47 | # def recursive_convert(self, obj): 48 | # if isinstance(obj, (OmegaConf, ListConfig)): 49 | # return OmegaConf.to_container(obj, resolve=True) 50 | # elif isinstance(obj, dict): 51 | # return {k: self.recursive_convert(v) for k, v in obj.items()} 52 | # elif isinstance(obj, list): 53 | # return [self.recursive_convert(i) for i in obj] 54 | # elif hasattr(obj, '__dict__'): 55 | # for attr_name, attr_value in vars(obj).items(): 56 | # setattr(obj, attr_name, self.recursive_convert(attr_value)) 57 | # return obj 58 | # else: 59 | # return obj 60 | 61 | def _convert_peft_config(self): 62 | pc = self.peft_model.peft_config 63 | self.peft_model.peft_config = self.recursive_convert(pc) 64 | 65 | def on_train_epoch_end(self, trainer, pl_module): 66 | self._convert_peft_config() 67 | save_path = os.path.join(self.save_dir, f"epoch_{trainer.current_epoch}") 68 | self.peft_model.save_pretrained(save_path) 69 | print(f"[PeftSaveCallback] Saved LoRA weights to {save_path}") 70 | 71 | def on_train_batch_end(self, trainer, pl_module, outputs, batch, batch_idx): 72 | if self.save_every_n_steps is not None: 73 | global_step = trainer.global_step 74 | if global_step % self.save_every_n_steps == 0 and global_step > 0: 75 | self._convert_peft_config() 76 | save_path = os.path.join(self.save_dir, f"step_{global_step}") 77 | self.peft_model.save_pretrained(save_path) 78 | print(f"[PeftSaveCallback] Saved LoRA weights to {save_path}") 79 | -------------------------------------------------------------------------------- /hy3dshape/hy3dshape/utils/misc.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import importlib 4 | from omegaconf import OmegaConf, DictConfig, ListConfig 5 | 6 | import torch 7 | import torch.distributed as dist 8 | from typing import Union 9 | 10 | 11 | def get_config_from_file(config_file: str) -> Union[DictConfig, ListConfig]: 12 | config_file = OmegaConf.load(config_file) 13 | 14 | if 'base_config' in config_file.keys(): 15 | if config_file['base_config'] == "default_base": 16 | base_config = OmegaConf.create() 17 | # base_config = get_default_config() 18 | elif config_file['base_config'].endswith(".yaml"): 19 | base_config = get_config_from_file(config_file['base_config']) 20 | else: 21 | raise ValueError(f"{config_file} must be `.yaml` file or it contains `base_config` key.") 22 | 23 | config_file = {key: value for key, value in config_file if key != "base_config"} 24 | 25 | return OmegaConf.merge(base_config, config_file) 26 | 27 | return config_file 28 | 29 | 30 | def get_obj_from_str(string, reload=False): 31 | module, cls = string.rsplit(".", 1) 32 | if reload: 33 | module_imp = importlib.import_module(module) 34 | importlib.reload(module_imp) 35 | return getattr(importlib.import_module(module, package=None), cls) 36 | 37 | 38 | def get_obj_from_config(config): 39 | if "target" not in config: 40 | raise KeyError("Expected key `target` to instantiate.") 41 | 42 | return get_obj_from_str(config["target"]) 43 | 44 | 45 | def instantiate_from_config(config, **kwargs): 46 | if "target" not in config: 47 | raise KeyError("Expected key `target` to instantiate.") 48 | 49 | cls = get_obj_from_str(config["target"]) 50 | 51 | if config.get("from_pretrained", None): 52 | return cls.from_pretrained( 53 | config["from_pretrained"], 54 | use_safetensors=config.get('use_safetensors', False), 55 | variant=config.get('variant', 'fp16')) 56 | 57 | params = config.get("params", dict()) 58 | # params.update(kwargs) 59 | # instance = cls(**params) 60 | kwargs.update(params) 61 | instance = cls(**kwargs) 62 | 63 | return instance 64 | 65 | 66 | def disabled_train(self, mode=True): 67 | """Overwrite model.train with this function to make sure train/eval mode 68 | does not change anymore.""" 69 | return self 70 | 71 | 72 | def instantiate_non_trainable_model(config): 73 | model = instantiate_from_config(config) 74 | model = model.eval() 75 | model.train = disabled_train 76 | for param in model.parameters(): 77 | param.requires_grad = False 78 | 79 | return model 80 | 81 | 82 | def is_dist_avail_and_initialized(): 83 | if not dist.is_available(): 84 | return False 85 | if not dist.is_initialized(): 86 | return False 87 | return True 88 | 89 | 90 | def get_rank(): 91 | if not is_dist_avail_and_initialized(): 92 | return 0 93 | return dist.get_rank() 94 | 95 | 96 | def get_world_size(): 97 | if not is_dist_avail_and_initialized(): 98 | return 1 99 | return dist.get_world_size() 100 | 101 | 102 | def all_gather_batch(tensors): 103 | """ 104 | Performs all_gather operation on the provided tensors. 105 | """ 106 | # Queue the gathered tensors 107 | world_size = get_world_size() 108 | # There is no need for reduction in the single-proc case 109 | if world_size == 1: 110 | return tensors 111 | tensor_list = [] 112 | output_tensor = [] 113 | for tensor in tensors: 114 | tensor_all = [torch.ones_like(tensor) for _ in range(world_size)] 115 | dist.all_gather( 116 | tensor_all, 117 | tensor, 118 | async_op=False # performance opt 119 | ) 120 | 121 | tensor_list.append(tensor_all) 122 | 123 | for tensor_all in tensor_list: 124 | output_tensor.append(torch.cat(tensor_all, dim=0)) 125 | return output_tensor 126 | -------------------------------------------------------------------------------- /hy3dpaint/DifferentiableRenderer/camera_utils.py: -------------------------------------------------------------------------------- 1 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT 2 | # except for the third-party components listed below. 3 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined 4 | # in the repsective licenses of these third-party components. 5 | # Users must comply with all terms and conditions of original licenses of these third-party 6 | # components and must ensure that the usage of the third party components adheres to 7 | # all relevant laws and regulations. 8 | 9 | # For avoidance of doubts, Hunyuan 3D means the large language models and 10 | # their software and algorithms, including trained model weights, parameters (including 11 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code, 12 | # fine-tuning enabling code and other elements of the foregoing made publicly available 13 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT. 14 | 15 | import math 16 | 17 | import numpy as np 18 | import torch 19 | 20 | 21 | def transform_pos(mtx, pos, keepdim=False): 22 | t_mtx = torch.from_numpy(mtx).to(pos.device) if isinstance(mtx, np.ndarray) else mtx 23 | if pos.shape[-1] == 3: 24 | posw = torch.cat([pos, torch.ones([pos.shape[0], 1]).to(pos.device)], axis=1) 25 | else: 26 | posw = pos 27 | 28 | if keepdim: 29 | return torch.matmul(posw, t_mtx.t())[...] 30 | else: 31 | return torch.matmul(posw, t_mtx.t())[None, ...] 32 | 33 | 34 | def get_mv_matrix(elev, azim, camera_distance, center=None): 35 | elev = -elev 36 | azim += 90 37 | 38 | elev_rad = math.radians(elev) 39 | azim_rad = math.radians(azim) 40 | 41 | camera_position = np.array( 42 | [ 43 | camera_distance * math.cos(elev_rad) * math.cos(azim_rad), 44 | camera_distance * math.cos(elev_rad) * math.sin(azim_rad), 45 | camera_distance * math.sin(elev_rad), 46 | ] 47 | ) 48 | 49 | if center is None: 50 | center = np.array([0, 0, 0]) 51 | else: 52 | center = np.array(center) 53 | 54 | lookat = center - camera_position 55 | lookat = lookat / np.linalg.norm(lookat) 56 | 57 | up = np.array([0, 0, 1.0]) 58 | right = np.cross(lookat, up) 59 | right = right / np.linalg.norm(right) 60 | up = np.cross(right, lookat) 61 | up = up / np.linalg.norm(up) 62 | 63 | c2w = np.concatenate([np.stack([right, up, -lookat], axis=-1), camera_position[:, None]], axis=-1) 64 | 65 | w2c = np.zeros((4, 4)) 66 | w2c[:3, :3] = np.transpose(c2w[:3, :3], (1, 0)) 67 | w2c[:3, 3:] = -np.matmul(np.transpose(c2w[:3, :3], (1, 0)), c2w[:3, 3:]) 68 | w2c[3, 3] = 1.0 69 | 70 | return w2c.astype(np.float32) 71 | 72 | 73 | def get_orthographic_projection_matrix(left=-1, right=1, bottom=-1, top=1, near=0, far=2): 74 | """ 75 | 计算正交投影矩阵。 76 | 77 | 参数: 78 | left (float): 投影区域左侧边界。 79 | right (float): 投影区域右侧边界。 80 | bottom (float): 投影区域底部边界。 81 | top (float): 投影区域顶部边界。 82 | near (float): 投影区域近裁剪面距离。 83 | far (float): 投影区域远裁剪面距离。 84 | 85 | 返回: 86 | numpy.ndarray: 正交投影矩阵。 87 | """ 88 | ortho_matrix = np.eye(4, dtype=np.float32) 89 | ortho_matrix[0, 0] = 2 / (right - left) 90 | ortho_matrix[1, 1] = 2 / (top - bottom) 91 | ortho_matrix[2, 2] = -2 / (far - near) 92 | ortho_matrix[0, 3] = -(right + left) / (right - left) 93 | ortho_matrix[1, 3] = -(top + bottom) / (top - bottom) 94 | ortho_matrix[2, 3] = -(far + near) / (far - near) 95 | return ortho_matrix 96 | 97 | 98 | def get_perspective_projection_matrix(fovy, aspect_wh, near, far): 99 | fovy_rad = math.radians(fovy) 100 | return np.array( 101 | [ 102 | [1.0 / (math.tan(fovy_rad / 2.0) * aspect_wh), 0, 0, 0], 103 | [0, 1.0 / math.tan(fovy_rad / 2.0), 0, 0], 104 | [0, 0, -(far + near) / (far - near), -2.0 * far * near / (far - near)], 105 | [0, 0, -1, 0], 106 | ] 107 | ).astype(np.float32) 108 | -------------------------------------------------------------------------------- /hy3dpaint/README.md: -------------------------------------------------------------------------------- 1 | # Hunyuan3D-Paint 2.1 2 | 3 | Hunyuan3D-Paint 2.1 is a high quality PBR texture generation model for 3D meshes, powered by [RomanTex](https://github.com/oakshy/RomanTex) and [MaterialMVP](https://github.com/ZebinHe/MaterialMVP/). 4 | 5 | 6 | ## Quick Inference 7 | You need to manually download the RealESRGAN weight to the `ckpt` folder using the following command: 8 | ```bash 9 | wget https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRGAN_x4plus.pth -P ckpt 10 | ``` 11 | 12 | Given a 3D mesh `mesh.glb` and a reference image `image.png`, you can run inference using the following code. The result will be saved as `textured_mesh.glb`. 13 | 14 | ```bash 15 | python3 demo.py 16 | ``` 17 | **Optional arguments in `demo.py`:** 18 | 19 | - `max_num_view` : Maximum number of views, adaptively selected by the model (integer between 6 to 12) 20 | 21 | - `resolution` : Resolution for generated PBR textures (512 or 768) 22 | 23 | **Memory Recommendation:** For `max_num_view=6` and `resolution=512`, we recommend using a GPU with at least **21GB VRAM**. 24 | 25 | ## Training 26 | 27 | ### Data Prepare 28 | We provide a piece of data in `train_examples` for the overfitting training test. The data structure should be organized as follows: 29 | 30 | ``` 31 | train_examples/ 32 | ├── examples.json 33 | └── 001/ 34 | ├── render_tex/ # Rendered generated PBR images 35 | │ ├── 000.png # Rendered views (RGB images) 36 | │ ├── 000_albedo.png # Albedo maps for each view 37 | │ ├── 000_mr.png # Metallic-Roughness maps for each view, R and G channels 38 | │ ├── 000_normal.png # Normal maps 39 | │ ├── 000_normal.png # Normal maps 40 | │ ├── 000_pos.png # Position maps 41 | │ ├── 000_pos.png # Position maps 42 | │ ├── 001.png # Additional views... 43 | │ ├── 001_albedo.png 44 | │ ├── 001_mr.png 45 | │ ├── 001_normal.png 46 | │ ├── 001_pos.png 47 | │ └── ... # More views (002, 003, 004, 005, ...) 48 | └── render_cond/ # Rendered reference images (at least two light conditions should be rendered to facilitate consistency loss) 49 | ├── 000_light_AL.png # Light condition 1 (Area Light) 50 | ├── 000_light_ENVMAP.png # Light condition 2 (Environment map) 51 | ├── 000_light_PL.png # Light condition 3 (Point lighting) 52 | ├── 001_light_AL.png 53 | ├── 001_light_ENVMAP.png 54 | ├── 001_light_PL.png 55 | └── ... # More lighting conditions (002-005, ...) 56 | ``` 57 | 58 | Each training example contains: 59 | - **render_tex/**: Multi-view renderings with PBR material properties 60 | - Main RGB images (`XXX.png`) 61 | - Albedo maps (`XXX_albedo.png`) 62 | - Metallic-Roughness maps (`XXX_mr.png`) 63 | - Normal maps (`XXX_normal.png/jpg`) 64 | - Position maps (`XXX_pos.png/jpg`) 65 | - Camera transforms (`transforms.json`) 66 | - **render_cond/**: Lighting condition maps for each view 67 | - Ambient lighting (`XXX_light_AL.png`) 68 | - Environment map lighting (`XXX_light_ENVMAP.png`) 69 | - Point lighting (`XXX_light_PL.png`) 70 | 71 | ### Launch Training 72 | 73 | 74 | ```bash 75 | python3 train.py --base 'cfgs/hunyuan-paint-pbr.yaml' --name overfit --logdir logs/ 76 | ``` 77 | 78 | ## BibTeX 79 | 80 | If you found Hunyuan3D-Paint 2.1 helpful, please cite our papers: 81 | 82 | ```bibtex 83 | @article{feng2025romantex, 84 | title={RomanTex: Decoupling 3D-aware Rotary Positional Embedded Multi-Attention Network for Texture Synthesis}, 85 | author={Feng, Yifei and Yang, Mingxin and Yang, Shuhui and Zhang, Sheng and Yu, Jiaao and Zhao, Zibo and Liu, Yuhong and Jiang, Jie and Guo, Chunchao}, 86 | journal={arXiv preprint arXiv:2503.19011}, 87 | year={2025} 88 | } 89 | 90 | @article{he2025materialmvp, 91 | title={MaterialMVP: Illumination-Invariant Material Generation via Multi-view PBR Diffusion}, 92 | author={He, Zebin and Yang, Mingxin and Yang, Shuhui and Tang, Yixuan and Wang, Tao and Zhang, Kaihao and Chen, Guanying and Liu, Yuhong and Jiang, Jie and Guo, Chunchao and Luo, Wenhan}, 93 | journal={arXiv preprint arXiv:2503.10289}, 94 | year={2025} 95 | } 96 | ``` 97 | -------------------------------------------------------------------------------- /hy3dshape/hy3dshape/models/autoencoders/attention_processors.py: -------------------------------------------------------------------------------- 1 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT 2 | # except for the third-party components listed below. 3 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined 4 | # in the repsective licenses of these third-party components. 5 | # Users must comply with all terms and conditions of original licenses of these third-party 6 | # components and must ensure that the usage of the third party components adheres to 7 | # all relevant laws and regulations. 8 | 9 | # For avoidance of doubts, Hunyuan 3D means the large language models and 10 | # their software and algorithms, including trained model weights, parameters (including 11 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code, 12 | # fine-tuning enabling code and other elements of the foregoing made publicly available 13 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT. 14 | 15 | import os 16 | 17 | import torch 18 | import torch.nn.functional as F 19 | 20 | scaled_dot_product_attention = F.scaled_dot_product_attention 21 | if os.environ.get('CA_USE_SAGEATTN', '0') == '1': 22 | try: 23 | from sageattention import sageattn 24 | except ImportError: 25 | raise ImportError('Please install the package "sageattention" to use this USE_SAGEATTN.') 26 | scaled_dot_product_attention = sageattn 27 | 28 | 29 | class CrossAttentionProcessor: 30 | def __call__(self, attn, q, k, v): 31 | out = scaled_dot_product_attention(q, k, v) 32 | return out 33 | 34 | 35 | class FlashVDMCrossAttentionProcessor: 36 | def __init__(self, topk=None): 37 | self.topk = topk 38 | 39 | def __call__(self, attn, q, k, v): 40 | if k.shape[-2] == 3072: 41 | topk = 1024 42 | elif k.shape[-2] == 512: 43 | topk = 256 44 | else: 45 | topk = k.shape[-2] // 3 46 | 47 | if self.topk is True: 48 | q1 = q[:, :, ::100, :] 49 | sim = q1 @ k.transpose(-1, -2) 50 | sim = torch.mean(sim, -2) 51 | topk_ind = torch.topk(sim, dim=-1, k=topk).indices.squeeze(-2).unsqueeze(-1) 52 | topk_ind = topk_ind.expand(-1, -1, -1, v.shape[-1]) 53 | v0 = torch.gather(v, dim=-2, index=topk_ind) 54 | k0 = torch.gather(k, dim=-2, index=topk_ind) 55 | out = scaled_dot_product_attention(q, k0, v0) 56 | elif self.topk is False: 57 | out = scaled_dot_product_attention(q, k, v) 58 | else: 59 | idx, counts = self.topk 60 | start = 0 61 | outs = [] 62 | for grid_coord, count in zip(idx, counts): 63 | end = start + count 64 | q_chunk = q[:, :, start:end, :] 65 | k0, v0 = self.select_topkv(q_chunk, k, v, topk) 66 | out = scaled_dot_product_attention(q_chunk, k0, v0) 67 | outs.append(out) 68 | start += count 69 | out = torch.cat(outs, dim=-2) 70 | self.topk = False 71 | return out 72 | 73 | def select_topkv(self, q_chunk, k, v, topk): 74 | q1 = q_chunk[:, :, ::50, :] 75 | sim = q1 @ k.transpose(-1, -2) 76 | sim = torch.mean(sim, -2) 77 | topk_ind = torch.topk(sim, dim=-1, k=topk).indices.squeeze(-2).unsqueeze(-1) 78 | topk_ind = topk_ind.expand(-1, -1, -1, v.shape[-1]) 79 | v0 = torch.gather(v, dim=-2, index=topk_ind) 80 | k0 = torch.gather(k, dim=-2, index=topk_ind) 81 | return k0, v0 82 | 83 | 84 | class FlashVDMTopMCrossAttentionProcessor(FlashVDMCrossAttentionProcessor): 85 | def select_topkv(self, q_chunk, k, v, topk): 86 | q1 = q_chunk[:, :, ::30, :] 87 | sim = q1 @ k.transpose(-1, -2) 88 | # sim = sim.to(torch.float32) 89 | sim = sim.softmax(-1) 90 | sim = torch.mean(sim, 1) 91 | activated_token = torch.where(sim > 1e-6)[2] 92 | index = torch.unique(activated_token, return_counts=True)[0].unsqueeze(0).unsqueeze(0).unsqueeze(-1) 93 | index = index.expand(-1, v.shape[1], -1, v.shape[-1]) 94 | v0 = torch.gather(v, dim=-2, index=index) 95 | k0 = torch.gather(k, dim=-2, index=index) 96 | return k0, v0 97 | -------------------------------------------------------------------------------- /hy3dshape/hy3dshape/utils/trainings/mesh.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT 4 | # except for the third-party components listed below. 5 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined 6 | # in the repsective licenses of these third-party components. 7 | # Users must comply with all terms and conditions of original licenses of these third-party 8 | # components and must ensure that the usage of the third party components adheres to 9 | # all relevant laws and regulations. 10 | 11 | # For avoidance of doubts, Hunyuan 3D means the large language models and 12 | # their software and algorithms, including trained model weights, parameters (including 13 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code, 14 | # fine-tuning enabling code and other elements of the foregoing made publicly available 15 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT. 16 | 17 | import os 18 | import cv2 19 | import numpy as np 20 | import PIL.Image 21 | from typing import Optional 22 | 23 | import trimesh 24 | 25 | 26 | def save_obj(pointnp_px3, facenp_fx3, fname): 27 | fid = open(fname, "w") 28 | write_str = "" 29 | for pidx, p in enumerate(pointnp_px3): 30 | pp = p 31 | write_str += "v %f %f %f\n" % (pp[0], pp[1], pp[2]) 32 | 33 | for i, f in enumerate(facenp_fx3): 34 | f1 = f + 1 35 | write_str += "f %d %d %d\n" % (f1[0], f1[1], f1[2]) 36 | fid.write(write_str) 37 | fid.close() 38 | return 39 | 40 | 41 | def savemeshtes2(pointnp_px3, tcoords_px2, facenp_fx3, facetex_fx3, tex_map, fname): 42 | fol, na = os.path.split(fname) 43 | na, _ = os.path.splitext(na) 44 | 45 | matname = "%s/%s.mtl" % (fol, na) 46 | fid = open(matname, "w") 47 | fid.write("newmtl material_0\n") 48 | fid.write("Kd 1 1 1\n") 49 | fid.write("Ka 0 0 0\n") 50 | fid.write("Ks 0.4 0.4 0.4\n") 51 | fid.write("Ns 10\n") 52 | fid.write("illum 2\n") 53 | fid.write("map_Kd %s.png\n" % na) 54 | fid.close() 55 | #### 56 | 57 | fid = open(fname, "w") 58 | fid.write("mtllib %s.mtl\n" % na) 59 | 60 | for pidx, p3 in enumerate(pointnp_px3): 61 | pp = p3 62 | fid.write("v %f %f %f\n" % (pp[0], pp[1], pp[2])) 63 | 64 | for pidx, p2 in enumerate(tcoords_px2): 65 | pp = p2 66 | fid.write("vt %f %f\n" % (pp[0], pp[1])) 67 | 68 | fid.write("usemtl material_0\n") 69 | for i, f in enumerate(facenp_fx3): 70 | f1 = f + 1 71 | f2 = facetex_fx3[i] + 1 72 | fid.write("f %d/%d %d/%d %d/%d\n" % (f1[0], f2[0], f1[1], f2[1], f1[2], f2[2])) 73 | fid.close() 74 | 75 | PIL.Image.fromarray(np.ascontiguousarray(tex_map), "RGB").save( 76 | os.path.join(fol, "%s.png" % na)) 77 | 78 | return 79 | 80 | 81 | class MeshOutput(object): 82 | 83 | def __init__(self, 84 | mesh_v: np.ndarray, 85 | mesh_f: np.ndarray, 86 | vertex_colors: Optional[np.ndarray] = None, 87 | uvs: Optional[np.ndarray] = None, 88 | mesh_tex_idx: Optional[np.ndarray] = None, 89 | tex_map: Optional[np.ndarray] = None): 90 | 91 | self.mesh_v = mesh_v 92 | self.mesh_f = mesh_f 93 | self.vertex_colors = vertex_colors 94 | self.uvs = uvs 95 | self.mesh_tex_idx = mesh_tex_idx 96 | self.tex_map = tex_map 97 | 98 | def contain_uv_texture(self): 99 | return (self.uvs is not None) and (self.mesh_tex_idx is not None) and (self.tex_map is not None) 100 | 101 | def contain_vertex_colors(self): 102 | return self.vertex_colors is not None 103 | 104 | def export(self, fname): 105 | 106 | if self.contain_uv_texture(): 107 | savemeshtes2( 108 | self.mesh_v, 109 | self.uvs, 110 | self.mesh_f, 111 | self.mesh_tex_idx, 112 | self.tex_map, 113 | fname 114 | ) 115 | 116 | elif self.contain_vertex_colors(): 117 | mesh_obj = trimesh.Trimesh(vertices=self.mesh_v, faces=self.mesh_f, vertex_colors=self.vertex_colors) 118 | mesh_obj.export(fname) 119 | 120 | else: 121 | save_obj( 122 | self.mesh_v, 123 | self.mesh_f, 124 | fname 125 | ) 126 | 127 | 128 | 129 | -------------------------------------------------------------------------------- /hy3dpaint/convert_utils.py: -------------------------------------------------------------------------------- 1 | import trimesh 2 | import pygltflib 3 | import numpy as np 4 | from PIL import Image 5 | import base64 6 | import io 7 | 8 | 9 | def combine_metallic_roughness(metallic_path, roughness_path, output_path): 10 | """ 11 | 将metallic和roughness贴图合并为一张贴图 12 | GLB格式要求metallic在B通道,roughness在G通道 13 | """ 14 | # 加载贴图 15 | metallic_img = Image.open(metallic_path).convert("L") # 转为灰度 16 | roughness_img = Image.open(roughness_path).convert("L") # 转为灰度 17 | 18 | # 确保尺寸一致 19 | if metallic_img.size != roughness_img.size: 20 | roughness_img = roughness_img.resize(metallic_img.size) 21 | 22 | # 创建RGB图像 23 | width, height = metallic_img.size 24 | combined = Image.new("RGB", (width, height)) 25 | 26 | # 转为numpy数组便于操作 27 | metallic_array = np.array(metallic_img) 28 | roughness_array = np.array(roughness_img) 29 | 30 | # 创建合并的数组 (R, G, B) = (AO, Roughness, Metallic) 31 | combined_array = np.zeros((height, width, 3), dtype=np.uint8) 32 | combined_array[:, :, 0] = 255 # R通道:AO (如果没有AO贴图,设为白色) 33 | combined_array[:, :, 1] = roughness_array # G通道:Roughness 34 | combined_array[:, :, 2] = metallic_array # B通道:Metallic 35 | 36 | # 转回PIL图像并保存 37 | combined = Image.fromarray(combined_array) 38 | combined.save(output_path) 39 | return output_path 40 | 41 | 42 | def create_glb_with_pbr_materials(obj_path, textures_dict, output_path): 43 | """ 44 | 使用pygltflib创建包含完整PBR材质的GLB文件 45 | 46 | textures_dict = { 47 | 'albedo': 'path/to/albedo.png', 48 | 'metallic': 'path/to/metallic.png', 49 | 'roughness': 'path/to/roughness.png', 50 | 'normal': 'path/to/normal.png', # 可选 51 | 'ao': 'path/to/ao.png' # 可选 52 | } 53 | """ 54 | # 1. 加载OBJ文件 55 | mesh = trimesh.load(obj_path) 56 | 57 | # 2. 先导出为临时GLB 58 | temp_glb = "temp.glb" 59 | mesh.export(temp_glb) 60 | 61 | # 3. 加载GLB文件进行材质编辑 62 | gltf = pygltflib.GLTF2().load(temp_glb) 63 | 64 | # 4. 准备纹理数据 65 | def image_to_data_uri(image_path): 66 | """将图像转换为data URI""" 67 | with open(image_path, "rb") as f: 68 | image_data = f.read() 69 | encoded = base64.b64encode(image_data).decode() 70 | return f"data:image/png;base64,{encoded}" 71 | 72 | # 5. 合并metallic和roughness 73 | if "metallic" in textures_dict and "roughness" in textures_dict: 74 | mr_combined_path = "mr_combined.png" 75 | combine_metallic_roughness(textures_dict["metallic"], textures_dict["roughness"], mr_combined_path) 76 | textures_dict["metallicRoughness"] = mr_combined_path 77 | 78 | # 6. 添加图像到GLTF 79 | images = [] 80 | textures = [] 81 | 82 | texture_mapping = { 83 | "albedo": "baseColorTexture", 84 | "metallicRoughness": "metallicRoughnessTexture", 85 | "normal": "normalTexture", 86 | "ao": "occlusionTexture", 87 | } 88 | 89 | for tex_type, tex_path in textures_dict.items(): 90 | if tex_type in texture_mapping and tex_path: 91 | # 添加图像 92 | image = pygltflib.Image(uri=image_to_data_uri(tex_path)) 93 | images.append(image) 94 | 95 | # 添加纹理 96 | texture = pygltflib.Texture(source=len(images) - 1) 97 | textures.append(texture) 98 | 99 | # 7. 创建PBR材质 100 | pbr_metallic_roughness = pygltflib.PbrMetallicRoughness( 101 | baseColorFactor=[1.0, 1.0, 1.0, 1.0], metallicFactor=1.0, roughnessFactor=1.0 102 | ) 103 | 104 | # 设置纹理索引 105 | texture_index = 0 106 | if "albedo" in textures_dict: 107 | pbr_metallic_roughness.baseColorTexture = pygltflib.TextureInfo(index=texture_index) 108 | texture_index += 1 109 | 110 | if "metallicRoughness" in textures_dict: 111 | pbr_metallic_roughness.metallicRoughnessTexture = pygltflib.TextureInfo(index=texture_index) 112 | texture_index += 1 113 | 114 | # 创建材质 115 | material = pygltflib.Material(name="PBR_Material", pbrMetallicRoughness=pbr_metallic_roughness) 116 | 117 | # 添加法线贴图 118 | if "normal" in textures_dict: 119 | material.normalTexture = pygltflib.NormalTextureInfo(index=texture_index) 120 | texture_index += 1 121 | 122 | # 添加AO贴图 123 | if "ao" in textures_dict: 124 | material.occlusionTexture = pygltflib.OcclusionTextureInfo(index=texture_index) 125 | 126 | # 8. 更新GLTF 127 | gltf.images = images 128 | gltf.textures = textures 129 | gltf.materials = [material] 130 | 131 | # 确保mesh使用材质 132 | if gltf.meshes: 133 | for primitive in gltf.meshes[0].primitives: 134 | primitive.material = 0 135 | 136 | # 9. 保存最终GLB 137 | gltf.save(output_path) 138 | print(f"PBR GLB文件已保存: {output_path}") 139 | 140 | 141 | -------------------------------------------------------------------------------- /hy3dshape/hy3dshape/utils/utils.py: -------------------------------------------------------------------------------- 1 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT 2 | # except for the third-party components listed below. 3 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined 4 | # in the repsective licenses of these third-party components. 5 | # Users must comply with all terms and conditions of original licenses of these third-party 6 | # components and must ensure that the usage of the third party components adheres to 7 | # all relevant laws and regulations. 8 | 9 | # For avoidance of doubts, Hunyuan 3D means the large language models and 10 | # their software and algorithms, including trained model weights, parameters (including 11 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code, 12 | # fine-tuning enabling code and other elements of the foregoing made publicly available 13 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT. 14 | 15 | import logging 16 | import os 17 | from functools import wraps 18 | 19 | import torch 20 | 21 | 22 | def get_logger(name): 23 | logger = logging.getLogger(name) 24 | logger.setLevel(logging.INFO) 25 | 26 | console_handler = logging.StreamHandler() 27 | console_handler.setLevel(logging.INFO) 28 | 29 | formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') 30 | console_handler.setFormatter(formatter) 31 | logger.addHandler(console_handler) 32 | return logger 33 | 34 | 35 | logger = get_logger('hy3dgen.shapgen') 36 | 37 | 38 | class synchronize_timer: 39 | """ Synchronized timer to count the inference time of `nn.Module.forward`. 40 | 41 | Supports both context manager and decorator usage. 42 | 43 | Example as context manager: 44 | ```python 45 | with synchronize_timer('name') as t: 46 | run() 47 | ``` 48 | 49 | Example as decorator: 50 | ```python 51 | @synchronize_timer('Export to trimesh') 52 | def export_to_trimesh(mesh_output): 53 | pass 54 | ``` 55 | """ 56 | 57 | def __init__(self, name=None): 58 | self.name = name 59 | 60 | def __enter__(self): 61 | """Context manager entry: start timing.""" 62 | if os.environ.get('HY3DGEN_DEBUG', '0') == '1': 63 | self.start = torch.cuda.Event(enable_timing=True) 64 | self.end = torch.cuda.Event(enable_timing=True) 65 | self.start.record() 66 | return lambda: self.time 67 | 68 | def __exit__(self, exc_type, exc_value, exc_tb): 69 | """Context manager exit: stop timing and log results.""" 70 | if os.environ.get('HY3DGEN_DEBUG', '0') == '1': 71 | self.end.record() 72 | torch.cuda.synchronize() 73 | self.time = self.start.elapsed_time(self.end) 74 | if self.name is not None: 75 | logger.info(f'{self.name} takes {self.time} ms') 76 | 77 | def __call__(self, func): 78 | """Decorator: wrap the function to time its execution.""" 79 | 80 | @wraps(func) 81 | def wrapper(*args, **kwargs): 82 | with self: 83 | result = func(*args, **kwargs) 84 | return result 85 | 86 | return wrapper 87 | 88 | 89 | def smart_load_model( 90 | model_path, 91 | subfolder, 92 | use_safetensors, 93 | variant, 94 | ): 95 | original_model_path = model_path 96 | # try local path 97 | base_dir = os.environ.get('HY3DGEN_MODELS', '~/.cache/hy3dgen') 98 | model_fld = os.path.expanduser(os.path.join(base_dir, model_path)) 99 | model_path = os.path.expanduser(os.path.join(base_dir, model_path, subfolder)) 100 | logger.info(f'Try to load model from local path: {model_path}') 101 | if not os.path.exists(model_path): 102 | logger.info('Model path not exists, try to download from huggingface') 103 | try: 104 | from huggingface_hub import snapshot_download 105 | # 只下载指定子目录 106 | path = snapshot_download( 107 | repo_id=original_model_path, 108 | allow_patterns=[f"{subfolder}/*"], # 关键修改:模式匹配子文件夹 109 | local_dir=model_fld 110 | ) 111 | model_path = os.path.join(path, subfolder) # 保持路径拼接逻辑不变 112 | except ImportError: 113 | logger.warning( 114 | "You need to install HuggingFace Hub to load models from the hub." 115 | ) 116 | raise RuntimeError(f"Model path {model_path} not found") 117 | except Exception as e: 118 | raise e 119 | 120 | if not os.path.exists(model_path): 121 | raise FileNotFoundError(f"Model path {original_model_path} not found") 122 | 123 | extension = 'ckpt' if not use_safetensors else 'safetensors' 124 | variant = '' if variant is None else f'.{variant}' 125 | ckpt_name = f'model{variant}.{extension}' 126 | config_path = os.path.join(model_path, 'config.yaml') 127 | ckpt_path = os.path.join(model_path, ckpt_name) 128 | return config_path, ckpt_path 129 | -------------------------------------------------------------------------------- /hy3dshape/hy3dshape/models/diffusion/transport/integrators.py: -------------------------------------------------------------------------------- 1 | # This file includes code derived from the SiT project (https://github.com/willisma/SiT), 2 | # which is licensed under the MIT License. 3 | # 4 | # MIT License 5 | # 6 | # Copyright (c) Meta Platforms, Inc. and affiliates. 7 | # 8 | # Permission is hereby granted, free of charge, to any person obtaining a copy 9 | # of this software and associated documentation files (the "Software"), to deal 10 | # in the Software without restriction, including without limitation the rights 11 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | # copies of the Software, and to permit persons to whom the Software is 13 | # furnished to do so, subject to the following conditions: 14 | # 15 | # The above copyright notice and this permission notice shall be included in all 16 | # copies or substantial portions of the Software. 17 | # 18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 | # SOFTWARE. 25 | 26 | import numpy as np 27 | import torch as th 28 | import torch.nn as nn 29 | from torchdiffeq import odeint 30 | from functools import partial 31 | from tqdm import tqdm 32 | 33 | class sde: 34 | """SDE solver class""" 35 | def __init__( 36 | self, 37 | drift, 38 | diffusion, 39 | *, 40 | t0, 41 | t1, 42 | num_steps, 43 | sampler_type, 44 | ): 45 | assert t0 < t1, "SDE sampler has to be in forward time" 46 | 47 | self.num_timesteps = num_steps 48 | self.t = th.linspace(t0, t1, num_steps) 49 | self.dt = self.t[1] - self.t[0] 50 | self.drift = drift 51 | self.diffusion = diffusion 52 | self.sampler_type = sampler_type 53 | 54 | def __Euler_Maruyama_step(self, x, mean_x, t, model, **model_kwargs): 55 | w_cur = th.randn(x.size()).to(x) 56 | t = th.ones(x.size(0)).to(x) * t 57 | dw = w_cur * th.sqrt(self.dt) 58 | drift = self.drift(x, t, model, **model_kwargs) 59 | diffusion = self.diffusion(x, t) 60 | mean_x = x + drift * self.dt 61 | x = mean_x + th.sqrt(2 * diffusion) * dw 62 | return x, mean_x 63 | 64 | def __Heun_step(self, x, _, t, model, **model_kwargs): 65 | w_cur = th.randn(x.size()).to(x) 66 | dw = w_cur * th.sqrt(self.dt) 67 | t_cur = th.ones(x.size(0)).to(x) * t 68 | diffusion = self.diffusion(x, t_cur) 69 | xhat = x + th.sqrt(2 * diffusion) * dw 70 | K1 = self.drift(xhat, t_cur, model, **model_kwargs) 71 | xp = xhat + self.dt * K1 72 | K2 = self.drift(xp, t_cur + self.dt, model, **model_kwargs) 73 | return xhat + 0.5 * self.dt * (K1 + K2), xhat # at last time point we do not perform the heun step 74 | 75 | def __forward_fn(self): 76 | """TODO: generalize here by adding all private functions ending with steps to it""" 77 | sampler_dict = { 78 | "Euler": self.__Euler_Maruyama_step, 79 | "Heun": self.__Heun_step, 80 | } 81 | 82 | try: 83 | sampler = sampler_dict[self.sampler_type] 84 | except: 85 | raise NotImplementedError("Smapler type not implemented.") 86 | 87 | return sampler 88 | 89 | def sample(self, init, model, **model_kwargs): 90 | """forward loop of sde""" 91 | x = init 92 | mean_x = init 93 | samples = [] 94 | sampler = self.__forward_fn() 95 | for ti in self.t[:-1]: 96 | with th.no_grad(): 97 | x, mean_x = sampler(x, mean_x, ti, model, **model_kwargs) 98 | samples.append(x) 99 | 100 | return samples 101 | 102 | class ode: 103 | """ODE solver class""" 104 | def __init__( 105 | self, 106 | drift, 107 | *, 108 | t0, 109 | t1, 110 | sampler_type, 111 | num_steps, 112 | atol, 113 | rtol, 114 | ): 115 | assert t0 < t1, "ODE sampler has to be in forward time" 116 | 117 | self.drift = drift 118 | self.t = th.linspace(t0, t1, num_steps) 119 | self.atol = atol 120 | self.rtol = rtol 121 | self.sampler_type = sampler_type 122 | 123 | def sample(self, x, model, **model_kwargs): 124 | 125 | device = x[0].device if isinstance(x, tuple) else x.device 126 | def _fn(t, x): 127 | t = th.ones(x[0].size(0)).to(device) * t if isinstance(x, tuple) else th.ones(x.size(0)).to(device) * t 128 | model_output = self.drift(x, t, model, **model_kwargs) 129 | return model_output 130 | 131 | t = self.t.to(device) 132 | atol = [self.atol] * len(x) if isinstance(x, tuple) else [self.atol] 133 | rtol = [self.rtol] * len(x) if isinstance(x, tuple) else [self.rtol] 134 | samples = odeint( 135 | _fn, 136 | x, 137 | t, 138 | method=self.sampler_type, 139 | atol=atol, 140 | rtol=rtol 141 | ) 142 | return samples 143 | -------------------------------------------------------------------------------- /hy3dpaint/utils/torchvision_fix.py: -------------------------------------------------------------------------------- 1 | # Torchvision compatibility fix for functional_tensor module 2 | # This file helps resolve compatibility issues between different torchvision versions 3 | 4 | import sys 5 | import torch 6 | import torchvision 7 | 8 | def fix_torchvision_functional_tensor(): 9 | """ 10 | Fix torchvision.transforms.functional_tensor import issue 11 | """ 12 | try: 13 | # Check if the module exists in the expected location 14 | import torchvision.transforms.functional_tensor 15 | print("torchvision.transforms.functional_tensor is available") 16 | return True 17 | except ImportError: 18 | print("torchvision.transforms.functional_tensor not found, applying compatibility fix...") 19 | 20 | try: 21 | # Create a mock functional_tensor module with the required functions 22 | import torchvision.transforms.functional as F 23 | 24 | class FunctionalTensorMock: 25 | """Mock module to replace functional_tensor""" 26 | 27 | @staticmethod 28 | def _get_grayscale_weights(img): 29 | """Helper to create grayscale weights based on image dimensions""" 30 | weights = torch.tensor([0.299, 0.587, 0.114], device=img.device, dtype=img.dtype) 31 | return weights.view(1, 3, 1, 1) if len(img.shape) == 4 else weights.view(3, 1, 1) 32 | 33 | @staticmethod 34 | def _try_import_fallback(module_names, attr_name): 35 | """Helper to try importing from multiple modules""" 36 | for module_name in module_names: 37 | try: 38 | module = __import__(module_name, fromlist=[attr_name]) 39 | if hasattr(module, attr_name): 40 | return getattr(module, attr_name) 41 | except ImportError: 42 | continue 43 | return None 44 | 45 | @staticmethod 46 | def rgb_to_grayscale(img, num_output_channels=1): 47 | """Convert RGB image to grayscale""" 48 | if hasattr(F, 'rgb_to_grayscale'): 49 | return F.rgb_to_grayscale(img, num_output_channels) 50 | 51 | # Fallback implementation 52 | weights = FunctionalTensorMock._get_grayscale_weights(img) 53 | grayscale = torch.sum(img * weights, dim=-3, keepdim=True) 54 | 55 | if num_output_channels == 3: 56 | repeat_dims = (1, 3, 1, 1) if len(img.shape) == 4 else (3, 1, 1) 57 | grayscale = grayscale.repeat(*repeat_dims) 58 | 59 | return grayscale 60 | 61 | @staticmethod 62 | def resize(img, size, interpolation=2, antialias=None): 63 | """Resize function wrapper""" 64 | # Try v2.functional first, then regular functional, then torch.nn.functional 65 | resize_func = FunctionalTensorMock._try_import_fallback([ 66 | 'torchvision.transforms.v2.functional', 67 | 'torchvision.transforms.functional' 68 | ], 'resize') 69 | 70 | if resize_func: 71 | try: 72 | return resize_func(img, size, interpolation=interpolation, antialias=antialias) 73 | except TypeError: 74 | # Fallback for older versions without antialias parameter 75 | return resize_func(img, size, interpolation=interpolation) 76 | 77 | # Final fallback using torch.nn.functional 78 | import torch.nn.functional as torch_F 79 | size = (size, size) if isinstance(size, int) else size 80 | img_input = img.unsqueeze(0) if len(img.shape) == 3 else img 81 | return torch_F.interpolate(img_input, size=size, mode='bilinear', align_corners=False) 82 | 83 | def __getattr__(self, name): 84 | """Fallback to regular functional module""" 85 | func = self._try_import_fallback([ 86 | 'torchvision.transforms.functional', 87 | 'torchvision.transforms.v2.functional' 88 | ], name) 89 | 90 | if func: 91 | return func 92 | 93 | raise AttributeError(f"'{name}' not found in functional_tensor mock") 94 | 95 | # Create the mock module instance and monkey patch 96 | sys.modules['torchvision.transforms.functional_tensor'] = FunctionalTensorMock() 97 | print("Applied compatibility fix: created functional_tensor mock module") 98 | return True 99 | 100 | except Exception as e: 101 | print(f"Failed to create functional_tensor mock: {e}") 102 | return False 103 | 104 | def apply_fix(): 105 | """Apply the torchvision compatibility fix""" 106 | print(f"Torchvision version: {torchvision.__version__}") 107 | return fix_torchvision_functional_tensor() 108 | 109 | if __name__ == "__main__": 110 | apply_fix() 111 | -------------------------------------------------------------------------------- /hy3dshape/configs/hunyuan3ddit-mini-overfitting-flowmatching-dinog518-bf16-lr1e4-512.yaml: -------------------------------------------------------------------------------- 1 | name: "DiT: Flux large flowmatching; VAE: 1024 token length; ImageEncoder: DINO Giant; ImageSize: 518" 2 | 3 | training: 4 | steps: 10_0000_0000 5 | use_amp: true 6 | amp_type: "bf16" 7 | base_lr: 1e-4 8 | gradient_clip_val: 1.0 9 | gradient_clip_algorithm: "norm" 10 | every_n_train_steps: 2000 # 5000 11 | val_check_interval: 50 # 4096 12 | limit_val_batches: 16 13 | 14 | dataset: 15 | target: hy3dshape.data.dit_asl.AlignedShapeLatentModule 16 | params: 17 | #! Base setting 18 | batch_size: 2 19 | num_workers: 8 20 | val_num_workers: 4 21 | 22 | # Data 23 | train_data_list: tools/mini_trainset/preprocessed 24 | val_data_list: tools/mini_trainset/preprocessed 25 | 26 | #! Image loading 27 | cond_stage_key: "image" # image / text / image_text 28 | image_size: 518 29 | mean: &mean [0.5, 0.5, 0.5] 30 | std: &std [0.5, 0.5, 0.5] 31 | 32 | #! Point cloud sampling 33 | pc_size: &pc_size 10240 34 | pc_sharpedge_size: &pc_sharpedge_size 10240 35 | sharpedge_label: &sharpedge_label true 36 | return_normal: true 37 | 38 | #! Augmentation 39 | padding: true 40 | 41 | model: 42 | target: hy3dshape.models.diffusion.flow_matching_sit.Diffuser 43 | params: 44 | first_stage_key: "surface" 45 | cond_stage_key: "image" 46 | scale_by_std: false 47 | z_scale_factor: &z_scale_factor 0.9990943042622529 # 1 / 1.0009065167661184 48 | torch_compile: false 49 | 50 | # ema_config: 51 | # ema_model: LitEma 52 | # ema_decay: 0.999 53 | # ema_inference: false 54 | 55 | first_stage_config: 56 | target: hy3dshape.models.autoencoders.ShapeVAE 57 | from_pretrained: tencent/Hunyuan3D-2.1 58 | params: 59 | num_latents: &num_latents 512 60 | embed_dim: 64 61 | num_freqs: 8 62 | include_pi: false 63 | heads: 16 64 | width: 1024 65 | point_feats: 4 66 | num_decoder_layers: 16 67 | pc_size: *pc_size 68 | pc_sharpedge_size: *pc_sharpedge_size 69 | qkv_bias: false 70 | qk_norm: true 71 | scale_factor: *z_scale_factor 72 | geo_decoder_mlp_expand_ratio: 4 73 | geo_decoder_downsample_ratio: 1 74 | geo_decoder_ln_post: true 75 | 76 | cond_stage_config: 77 | target: hy3dshape.models.conditioner.SingleImageEncoder 78 | params: 79 | main_image_encoder: 80 | type: DinoImageEncoder # dino giant 81 | kwargs: 82 | config: 83 | attention_probs_dropout_prob: 0.0 84 | drop_path_rate: 0.0 85 | hidden_act: gelu 86 | hidden_dropout_prob: 0.0 87 | hidden_size: 1536 88 | image_size: 518 89 | initializer_range: 0.02 90 | layer_norm_eps: 1.e-6 91 | layerscale_value: 1.0 92 | mlp_ratio: 4 93 | model_type: dinov2 94 | num_attention_heads: 24 95 | num_channels: 3 96 | num_hidden_layers: 40 97 | patch_size: 14 98 | qkv_bias: true 99 | torch_dtype: float32 100 | use_swiglu_ffn: true 101 | image_size: 518 102 | 103 | denoiser_cfg: 104 | target: hy3dshape.models.denoisers.hunyuan3ddit.Hunyuan3DDiT 105 | params: 106 | input_size: *num_latents 107 | context_in_dim: 1536 108 | hidden_size: 1024 109 | mlp_ratio: 4.0 110 | num_heads: 16 111 | depth: 8 112 | depth_single_blocks: 16 113 | axes_dim: [64] 114 | theta: 10000 115 | qkv_bias: true 116 | use_pe: false 117 | force_norm_fp32: true 118 | 119 | scheduler_cfg: 120 | transport: 121 | target: hy3dshape.models.diffusion.transport.create_transport 122 | params: 123 | path_type: Linear 124 | prediction: velocity 125 | sampler: 126 | target: hy3dshape.models.diffusion.transport.Sampler 127 | params: {} 128 | ode_params: 129 | sampling_method: euler # dopri5 ... 130 | num_steps: &num_steps 50 131 | 132 | optimizer_cfg: 133 | optimizer: 134 | target: torch.optim.AdamW 135 | params: 136 | betas: [0.9, 0.99] 137 | eps: 1.e-6 138 | weight_decay: 1.e-2 139 | 140 | scheduler: 141 | target: hy3dshape.utils.trainings.lr_scheduler.LambdaWarmUpCosineFactorScheduler 142 | params: 143 | warm_up_steps: 50 # 5000 144 | f_start: 1.e-6 145 | f_min: 1.e-3 146 | f_max: 1.0 147 | 148 | pipeline_cfg: 149 | target: hy3dshape.pipelines.Hunyuan3DDiTFlowMatchingPipeline 150 | 151 | image_processor_cfg: 152 | target: hy3dshape.preprocessors.ImageProcessorV2 153 | params: {} 154 | 155 | callbacks: 156 | logger: 157 | target: hy3dshape.utils.trainings.mesh_log_callback.ImageConditionalASLDiffuserLogger 158 | params: 159 | step_frequency: 100 # 10000 160 | num_samples: 1 161 | sample_times: 1 162 | mean: *mean 163 | std: *std 164 | bounds: [-1.01, -1.01, -1.01, 1.01, 1.01, 1.01] 165 | octree_depth: 8 166 | num_chunks: 50000 167 | mc_level: 0.0 168 | 169 | file_loggers: 170 | target: hy3dshape.utils.trainings.mesh_log_callback.ImageConditionalFixASLDiffuserLogger 171 | params: 172 | step_frequency: 50 # 5000 173 | test_data_path: "tools/mini_testset/images.json" 174 | -------------------------------------------------------------------------------- /workflow_examples/Batch_Generator.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "5ad9bf67-cabe-4ef4-8e0c-bbeee0fc546f", 3 | "revision": 0, 4 | "last_node_id": 8, 5 | "last_link_id": 3, 6 | "nodes": [ 7 | { 8 | "id": 6, 9 | "type": "Hy3D21CameraConfig", 10 | "pos": [ 11 | -706.4094848632812, 12 | 305.74383544921875 13 | ], 14 | "size": [ 15 | 382.7560729980469, 16 | 133.63636779785156 17 | ], 18 | "flags": {}, 19 | "order": 0, 20 | "mode": 0, 21 | "inputs": [], 22 | "outputs": [ 23 | { 24 | "name": "camera_config", 25 | "type": "HY3D21CAMERA", 26 | "links": [ 27 | 1 28 | ] 29 | } 30 | ], 31 | "properties": { 32 | "aux_id": "visualbruno/ComfyUI-Hunyuan3d-2-1", 33 | "ver": "e439689e4b67fb2af5f487ee26ef3a710be92658", 34 | "Node name for S&R": "Hy3D21CameraConfig", 35 | "widget_ue_connectable": {} 36 | }, 37 | "widgets_values": [ 38 | "0, 90, 180, 270, 0, 180, 45, 315", 39 | "0, 0, 0, 0, 90, -90, 0, 0", 40 | "1, 0.5, 1, 0.5, 1, 1, 0.1, 0.1", 41 | 1.1000000000000003 42 | ] 43 | }, 44 | { 45 | "id": 4, 46 | "type": "Hy3D21MeshGenerationBatch", 47 | "pos": [ 48 | -732.0762939453125, 49 | 516.9437255859375 50 | ], 51 | "size": [ 52 | 427.05511474609375, 53 | 622 54 | ], 55 | "flags": {}, 56 | "order": 1, 57 | "mode": 0, 58 | "inputs": [], 59 | "outputs": [ 60 | { 61 | "name": "input_folder", 62 | "type": "STRING", 63 | "links": [ 64 | 2 65 | ] 66 | }, 67 | { 68 | "name": "output_folder", 69 | "type": "STRING", 70 | "links": [ 71 | 3 72 | ] 73 | }, 74 | { 75 | "name": "processed_input_images", 76 | "type": "STRING", 77 | "links": null 78 | }, 79 | { 80 | "name": "processed_output_meshes", 81 | "type": "STRING", 82 | "links": null 83 | } 84 | ], 85 | "properties": { 86 | "aux_id": "visualbruno/ComfyUI-Hunyuan3d-2-1", 87 | "ver": "a1133c7ff88dd2b8c6a85344ffe7acbaa58ec8d4", 88 | "Node name for S&R": "Hy3D21MeshGenerationBatch", 89 | "widget_ue_connectable": {} 90 | }, 91 | "widgets_values": [ 92 | "C:\\Travaux\\Test", 93 | "C:\\Travaux\\Test\\3D", 94 | "Hunyuan3D-vae-v2-1-fp16.ckpt", 95 | "hunyuan3d-dit-v2-1-fp16.ckpt", 96 | 50, 97 | 7.5, 98 | "sdpa", 99 | 1.01, 100 | 384, 101 | 128000, 102 | 0, 103 | "dmc", 104 | true, 105 | 200000, 106 | 1388, 107 | "randomize", 108 | true, 109 | "obj", 110 | false, 111 | true, 112 | true, 113 | false 114 | ] 115 | }, 116 | { 117 | "id": 5, 118 | "type": "Hy3D21GenerateMultiViewsBatch", 119 | "pos": [ 120 | -167.3360137939453, 121 | 428.5770568847656 122 | ], 123 | "size": [ 124 | 592.5423583984375, 125 | 464.7333679199219 126 | ], 127 | "flags": {}, 128 | "order": 2, 129 | "mode": 0, 130 | "inputs": [ 131 | { 132 | "name": "camera_config", 133 | "type": "HY3D21CAMERA", 134 | "link": 1 135 | }, 136 | { 137 | "name": "input_images_folder", 138 | "shape": 7, 139 | "type": "STRING", 140 | "widget": { 141 | "name": "input_images_folder" 142 | }, 143 | "link": 2 144 | }, 145 | { 146 | "name": "input_meshes_folder", 147 | "shape": 7, 148 | "type": "STRING", 149 | "widget": { 150 | "name": "input_meshes_folder" 151 | }, 152 | "link": 3 153 | } 154 | ], 155 | "outputs": [ 156 | { 157 | "name": "processed_meshes", 158 | "type": "STRING", 159 | "links": null 160 | } 161 | ], 162 | "properties": { 163 | "aux_id": "visualbruno/ComfyUI-Hunyuan3d-2-1", 164 | "ver": "f966762862e112b35dfe2e846bfb153f0dd6cae4", 165 | "Node name for S&R": "Hy3D21GenerateMultiViewsBatch", 166 | "widget_ue_connectable": {} 167 | }, 168 | "widgets_values": [ 169 | "C:\\Travaux\\Test\\Meshes", 170 | 512, 171 | 10, 172 | 3, 173 | 2048, 174 | true, 175 | 411413629, 176 | "randomize", 177 | true, 178 | false, 179 | true, 180 | "CustomModel", 181 | "003_realSR_BSRGAN_DFO_s64w8_SwinIR-M_x4_GAN.pth", 182 | "", 183 | "", 184 | "", 185 | "" 186 | ] 187 | } 188 | ], 189 | "links": [ 190 | [ 191 | 1, 192 | 6, 193 | 0, 194 | 5, 195 | 0, 196 | "HY3D21CAMERA" 197 | ], 198 | [ 199 | 2, 200 | 4, 201 | 0, 202 | 5, 203 | 1, 204 | "STRING" 205 | ], 206 | [ 207 | 3, 208 | 4, 209 | 1, 210 | 5, 211 | 2, 212 | "STRING" 213 | ] 214 | ], 215 | "groups": [], 216 | "config": {}, 217 | "extra": { 218 | "ue_links": [], 219 | "ds": { 220 | "scale": 0.826446280991736, 221 | "offset": [ 222 | 1020.0376340132016, 223 | -189.85887715515295 224 | ] 225 | }, 226 | "links_added_by_ue": [], 227 | "frontendVersion": "1.23.4" 228 | }, 229 | "version": 0.4 230 | } -------------------------------------------------------------------------------- /hy3dshape/configs/hunyuan3ddit-full-params-finetuning-flowmatching-dinog518-bf16-lr1e5-512.yaml: -------------------------------------------------------------------------------- 1 | name: "DiT: Flux large flowmatching; VAE: 1024 token length; ImageEncoder: DINO Giant; ImageSize: 518" 2 | 3 | training: 4 | steps: 10_0000_0000 5 | use_amp: true 6 | amp_type: "bf16" 7 | base_lr: 1.e-5 8 | gradient_clip_val: 1.0 9 | gradient_clip_algorithm: "norm" 10 | every_n_train_steps: 2000 # 5000 11 | val_check_interval: 50 # 4096 12 | limit_val_batches: 16 13 | 14 | dataset: 15 | target: hy3dshape.data.dit_asl.AlignedShapeLatentModule 16 | params: 17 | #! Base setting 18 | batch_size: 4 19 | num_workers: 8 20 | val_num_workers: 4 21 | 22 | # Data 23 | train_data_list: tools/mini_trainset/preprocessed 24 | val_data_list: tools/mini_trainset/preprocessed 25 | 26 | #! Image loading 27 | cond_stage_key: "image" # image / text / image_text 28 | image_size: 518 29 | mean: &mean [0.5, 0.5, 0.5] 30 | std: &std [0.5, 0.5, 0.5] 31 | 32 | #! Point cloud sampling 33 | pc_size: &pc_size 30720 34 | pc_sharpedge_size: &pc_sharpedge_size 30720 35 | sharpedge_label: &sharpedge_label true 36 | return_normal: true 37 | 38 | #! Augmentation 39 | padding: true 40 | 41 | model: 42 | target: hy3dshape.models.diffusion.flow_matching_sit.Diffuser 43 | params: 44 | first_stage_key: "surface" 45 | cond_stage_key: "image" 46 | scale_by_std: false 47 | z_scale_factor: &z_scale_factor 0.9990943042622529 # 1 / 1.0009065167661184 48 | torch_compile: false 49 | 50 | # ema_config: 51 | # ema_model: LitEma 52 | # ema_decay: 0.999 53 | # ema_inference: false 54 | 55 | first_stage_config: 56 | target: hy3dshape.models.autoencoders.ShapeVAE 57 | from_pretrained: tencent/Hunyuan3D-2.1 58 | params: 59 | num_latents: &num_latents 512 60 | embed_dim: 64 61 | num_freqs: 8 62 | include_pi: false 63 | heads: 16 64 | width: 1024 65 | point_feats: 4 66 | num_decoder_layers: 16 67 | pc_size: *pc_size 68 | pc_sharpedge_size: *pc_sharpedge_size 69 | qkv_bias: false 70 | qk_norm: true 71 | scale_factor: *z_scale_factor 72 | geo_decoder_mlp_expand_ratio: 4 73 | geo_decoder_downsample_ratio: 1 74 | geo_decoder_ln_post: true 75 | 76 | cond_stage_config: 77 | target: hy3dshape.models.conditioner.SingleImageEncoder 78 | params: 79 | main_image_encoder: 80 | type: DinoImageEncoder # dino giant 81 | kwargs: 82 | config: 83 | attention_probs_dropout_prob: 0.0 84 | drop_path_rate: 0.0 85 | hidden_act: gelu 86 | hidden_dropout_prob: 0.0 87 | hidden_size: 1536 88 | image_size: 518 89 | initializer_range: 0.02 90 | layer_norm_eps: 1.e-6 91 | layerscale_value: 1.0 92 | mlp_ratio: 4 93 | model_type: dinov2 94 | num_attention_heads: 24 95 | num_channels: 3 96 | num_hidden_layers: 40 97 | patch_size: 14 98 | qkv_bias: true 99 | torch_dtype: float32 100 | use_swiglu_ffn: true 101 | image_size: 518 102 | 103 | denoiser_cfg: 104 | target: hy3dshape.models.denoisers.hunyuan3ddit.Hunyuan3DDiT 105 | params: 106 | ckpt_path: ~/.cache/hy3dgen/tencent/Hunyuan3D-2-1-Shape/dit/model.fp16.ckpt 107 | input_size: *num_latents 108 | context_in_dim: 1536 109 | hidden_size: 1024 110 | mlp_ratio: 4.0 111 | num_heads: 16 112 | depth: 16 113 | depth_single_blocks: 32 114 | axes_dim: [64] 115 | theta: 10000 116 | qkv_bias: true 117 | use_pe: false 118 | force_norm_fp32: true 119 | 120 | scheduler_cfg: 121 | transport: 122 | target: hy3dshape.models.diffusion.transport.create_transport 123 | params: 124 | path_type: Linear 125 | prediction: velocity 126 | sampler: 127 | target: hy3dshape.models.diffusion.transport.Sampler 128 | params: {} 129 | ode_params: 130 | sampling_method: euler # dopri5 ... 131 | num_steps: &num_steps 50 132 | 133 | optimizer_cfg: 134 | optimizer: 135 | target: torch.optim.AdamW 136 | params: 137 | betas: [0.9, 0.99] 138 | eps: 1.e-6 139 | weight_decay: 1.e-2 140 | 141 | scheduler: 142 | target: hy3dshape.utils.trainings.lr_scheduler.LambdaWarmUpCosineFactorScheduler 143 | params: 144 | warm_up_steps: 50 # 5000 145 | f_start: 1.e-6 146 | f_min: 1.e-3 147 | f_max: 1.0 148 | 149 | pipeline_cfg: 150 | target: hy3dshape.pipelines.Hunyuan3DDiTFlowMatchingPipeline 151 | 152 | image_processor_cfg: 153 | target: hy3dshape.preprocessors.ImageProcessorV2 154 | params: {} 155 | 156 | callbacks: 157 | logger: 158 | target: hy3dshape.utils.trainings.mesh_log_callback.ImageConditionalASLDiffuserLogger 159 | params: 160 | step_frequency: 100 # 10000 161 | num_samples: 1 162 | sample_times: 1 163 | mean: *mean 164 | std: *std 165 | bounds: [-1.01, -1.01, -1.01, 1.01, 1.01, 1.01] 166 | octree_depth: 8 167 | num_chunks: 50000 168 | mc_level: 0.0 169 | 170 | file_loggers: 171 | target: hy3dshape.utils.trainings.mesh_log_callback.ImageConditionalFixASLDiffuserLogger 172 | params: 173 | step_frequency: 50 # 5000 174 | test_data_path: "tools/mini_testset/images.json" 175 | -------------------------------------------------------------------------------- /hy3dshape/configs/hunyuandit-finetuning-flowmatching-dinog518-bf16-lr1e5-4096.yaml: -------------------------------------------------------------------------------- 1 | name: "DiT: Flux large flowmatching; VAE: 1024 token length; ImageEncoder: DINO Giant; ImageSize: 518" 2 | 3 | training: 4 | steps: 10_0000_0000 5 | use_amp: true 6 | amp_type: "bf16" 7 | base_lr: 1e-5 8 | gradient_clip_val: 1.0 9 | gradient_clip_algorithm: "norm" 10 | every_n_train_steps: 2000 # 5000 11 | val_check_interval: 50 # 4096 12 | limit_val_batches: 16 13 | 14 | dataset: 15 | target: hy3dshape.data.dit_asl.AlignedShapeLatentModule 16 | params: 17 | #! Base setting 18 | batch_size: 4 19 | num_workers: 8 20 | val_num_workers: 4 21 | 22 | # Data 23 | train_data_list: tools/mini_trainset/preprocessed 24 | val_data_list: tools/mini_trainset/preprocessed 25 | 26 | #! Image loading 27 | cond_stage_key: "image" # image / text / image_text 28 | image_size: 518 29 | mean: &mean [0.5, 0.5, 0.5] 30 | std: &std [0.5, 0.5, 0.5] 31 | 32 | #! Point cloud sampling 33 | pc_size: &pc_size 81920 34 | pc_sharpedge_size: &pc_sharpedge_size 0 35 | sharpedge_label: &sharpedge_label true 36 | return_normal: true 37 | 38 | #! Augmentation 39 | padding: true 40 | 41 | model: 42 | target: hy3dshape.models.diffusion.flow_matching_sit.Diffuser 43 | params: 44 | first_stage_key: "surface" 45 | cond_stage_key: "image" 46 | scale_by_std: false 47 | z_scale_factor: &z_scale_factor 1.0039506158752403 48 | torch_compile: false 49 | 50 | # ema_config: 51 | # ema_model: LitEma 52 | # ema_decay: 0.999 53 | # ema_inference: false 54 | 55 | first_stage_config: 56 | target: hy3dshape.models.autoencoders.ShapeVAE 57 | from_pretrained: tencent/Hunyuan3D-2.1 58 | params: 59 | num_latents: &num_latents 4096 60 | embed_dim: 64 61 | num_freqs: 8 62 | include_pi: false 63 | heads: 16 64 | width: 1024 65 | num_encoder_layers: 8 66 | num_decoder_layers: 16 67 | qkv_bias: false 68 | qk_norm: true 69 | scale_factor: *z_scale_factor 70 | geo_decoder_mlp_expand_ratio: 4 71 | geo_decoder_downsample_ratio: 1 72 | geo_decoder_ln_post: true 73 | point_feats: 4 74 | pc_size: *pc_size 75 | pc_sharpedge_size: *pc_sharpedge_size 76 | 77 | cond_stage_config: 78 | target: hy3dshape.models.conditioner.SingleImageEncoder 79 | params: 80 | main_image_encoder: 81 | type: DinoImageEncoder # dino large 82 | kwargs: 83 | config: 84 | attention_probs_dropout_prob: 0.0 85 | drop_path_rate: 0.0 86 | hidden_act: gelu 87 | hidden_dropout_prob: 0.0 88 | hidden_size: 1024 89 | image_size: 518 90 | initializer_range: 0.02 91 | layer_norm_eps: 1.e-6 92 | layerscale_value: 1.0 93 | mlp_ratio: 4 94 | model_type: dinov2 95 | num_attention_heads: 16 96 | num_channels: 3 97 | num_hidden_layers: 24 98 | patch_size: 14 99 | qkv_bias: true 100 | torch_dtype: float32 101 | use_swiglu_ffn: false 102 | image_size: 518 103 | use_cls_token: true 104 | 105 | 106 | denoiser_cfg: 107 | target: hy3dshape.models.denoisers.hunyuandit.HunYuanDiTPlain 108 | params: 109 | input_size: *num_latents 110 | in_channels: 64 111 | hidden_size: 2048 112 | context_dim: 1024 113 | depth: 21 114 | num_heads: 16 115 | qk_norm: true 116 | text_len: 1370 117 | with_decoupled_ca: false 118 | use_attention_pooling: false 119 | qk_norm_type: 'rms' 120 | qkv_bias: false 121 | use_pos_emb: false 122 | num_moe_layers: 6 123 | num_experts: 8 124 | moe_top_k: 2 125 | 126 | scheduler_cfg: 127 | transport: 128 | target: hy3dshape.models.diffusion.transport.create_transport 129 | params: 130 | path_type: Linear 131 | prediction: velocity 132 | sampler: 133 | target: hy3dshape.models.diffusion.transport.Sampler 134 | params: {} 135 | ode_params: 136 | sampling_method: euler # dopri5 ... 137 | num_steps: &num_steps 50 138 | 139 | optimizer_cfg: 140 | optimizer: 141 | target: torch.optim.AdamW 142 | params: 143 | betas: [0.9, 0.99] 144 | eps: 1.e-6 145 | weight_decay: 1.e-2 146 | 147 | scheduler: 148 | target: hy3dshape.utils.trainings.lr_scheduler.LambdaWarmUpCosineFactorScheduler 149 | params: 150 | warm_up_steps: 50 # 5000 151 | f_start: 1.e-6 152 | f_min: 1.e-3 153 | f_max: 1.0 154 | 155 | pipeline_cfg: 156 | target: hy3dshape.pipelines.Hunyuan3DDiTFlowMatchingPipeline 157 | 158 | image_processor_cfg: 159 | target: hy3dshape.preprocessors.ImageProcessorV2 160 | params: {} 161 | 162 | callbacks: 163 | logger: 164 | target: hy3dshape.utils.trainings.mesh_log_callback.ImageConditionalASLDiffuserLogger 165 | params: 166 | step_frequency: 100 # 10000 167 | num_samples: 1 168 | sample_times: 1 169 | mean: *mean 170 | std: *std 171 | bounds: [-1.01, -1.01, -1.01, 1.01, 1.01, 1.01] 172 | octree_depth: 8 173 | num_chunks: 50000 174 | mc_level: 0.0 175 | 176 | file_loggers: 177 | target: hy3dshape.utils.trainings.mesh_log_callback.ImageConditionalFixASLDiffuserLogger 178 | params: 179 | step_frequency: 50 # 5000 180 | test_data_path: "tools/mini_testset/images.json" 181 | -------------------------------------------------------------------------------- /hy3dshape/configs/hunyuandit-mini-overfitting-flowmatching-dinog518-bf16-lr1e4-4096.yaml: -------------------------------------------------------------------------------- 1 | name: "DiT: Flux large flowmatching; VAE: 1024 token length; ImageEncoder: DINO Giant; ImageSize: 518" 2 | 3 | training: 4 | steps: 10_0000_0000 5 | use_amp: true 6 | amp_type: "bf16" 7 | base_lr: 1e-4 8 | gradient_clip_val: 1.0 9 | gradient_clip_algorithm: "norm" 10 | every_n_train_steps: 2000 # 5000 11 | val_check_interval: 50 # 4096 12 | limit_val_batches: 16 13 | 14 | dataset: 15 | target: hy3dshape.data.dit_asl.AlignedShapeLatentModule 16 | params: 17 | #! Base setting 18 | batch_size: 2 19 | num_workers: 8 20 | val_num_workers: 4 21 | 22 | # Data 23 | train_data_list: tools/mini_trainset/preprocessed 24 | val_data_list: tools/mini_trainset/preprocessed 25 | 26 | #! Image loading 27 | cond_stage_key: "image" # image / text / image_text 28 | image_size: 518 29 | mean: &mean [0.5, 0.5, 0.5] 30 | std: &std [0.5, 0.5, 0.5] 31 | 32 | #! Point cloud sampling 33 | pc_size: &pc_size 81920 34 | pc_sharpedge_size: &pc_sharpedge_size 0 35 | sharpedge_label: &sharpedge_label true 36 | return_normal: true 37 | 38 | #! Augmentation 39 | padding: true 40 | 41 | model: 42 | target: hy3dshape.models.diffusion.flow_matching_sit.Diffuser 43 | params: 44 | first_stage_key: "surface" 45 | cond_stage_key: "image" 46 | scale_by_std: false 47 | z_scale_factor: &z_scale_factor 1.0039506158752403 48 | torch_compile: false 49 | 50 | # ema_config: 51 | # ema_model: LitEma 52 | # ema_decay: 0.999 53 | # ema_inference: false 54 | 55 | first_stage_config: 56 | target: hy3dshape.models.autoencoders.ShapeVAE 57 | from_pretrained: tencent/Hunyuan3D-2.1 58 | params: 59 | num_latents: &num_latents 4096 60 | embed_dim: 64 61 | num_freqs: 8 62 | include_pi: false 63 | heads: 16 64 | width: 1024 65 | num_encoder_layers: 8 66 | num_decoder_layers: 16 67 | qkv_bias: false 68 | qk_norm: true 69 | scale_factor: *z_scale_factor 70 | geo_decoder_mlp_expand_ratio: 4 71 | geo_decoder_downsample_ratio: 1 72 | geo_decoder_ln_post: true 73 | point_feats: 4 74 | pc_size: *pc_size 75 | pc_sharpedge_size: *pc_sharpedge_size 76 | 77 | cond_stage_config: 78 | target: hy3dshape.models.conditioner.SingleImageEncoder 79 | params: 80 | main_image_encoder: 81 | type: DinoImageEncoder # dino large 82 | kwargs: 83 | config: 84 | attention_probs_dropout_prob: 0.0 85 | drop_path_rate: 0.0 86 | hidden_act: gelu 87 | hidden_dropout_prob: 0.0 88 | hidden_size: 1024 89 | image_size: 518 90 | initializer_range: 0.02 91 | layer_norm_eps: 1.e-6 92 | layerscale_value: 1.0 93 | mlp_ratio: 4 94 | model_type: dinov2 95 | num_attention_heads: 16 96 | num_channels: 3 97 | num_hidden_layers: 24 98 | patch_size: 14 99 | qkv_bias: true 100 | torch_dtype: float32 101 | use_swiglu_ffn: false 102 | image_size: 518 103 | use_cls_token: true 104 | 105 | 106 | denoiser_cfg: 107 | target: hy3dshape.models.denoisers.hunyuandit.HunYuanDiTPlain 108 | params: 109 | input_size: *num_latents 110 | in_channels: 64 111 | hidden_size: 2048 112 | context_dim: 1024 113 | depth: 11 114 | num_heads: 16 115 | qk_norm: true 116 | text_len: 1370 117 | with_decoupled_ca: false 118 | use_attention_pooling: false 119 | qk_norm_type: 'rms' 120 | qkv_bias: false 121 | use_pos_emb: false 122 | num_moe_layers: 6 123 | num_experts: 8 124 | moe_top_k: 2 125 | 126 | scheduler_cfg: 127 | transport: 128 | target: hy3dshape.models.diffusion.transport.create_transport 129 | params: 130 | path_type: Linear 131 | prediction: velocity 132 | sampler: 133 | target: hy3dshape.models.diffusion.transport.Sampler 134 | params: {} 135 | ode_params: 136 | sampling_method: euler # dopri5 ... 137 | num_steps: &num_steps 50 138 | 139 | optimizer_cfg: 140 | optimizer: 141 | target: torch.optim.AdamW 142 | params: 143 | betas: [0.9, 0.99] 144 | eps: 1.e-6 145 | weight_decay: 1.e-2 146 | 147 | scheduler: 148 | target: hy3dshape.utils.trainings.lr_scheduler.LambdaWarmUpCosineFactorScheduler 149 | params: 150 | warm_up_steps: 50 # 5000 151 | f_start: 1.e-6 152 | f_min: 1.e-3 153 | f_max: 1.0 154 | 155 | pipeline_cfg: 156 | target: hy3dshape.pipelines.Hunyuan3DDiTFlowMatchingPipeline 157 | 158 | image_processor_cfg: 159 | target: hy3dshape.preprocessors.ImageProcessorV2 160 | params: {} 161 | 162 | callbacks: 163 | logger: 164 | target: hy3dshape.utils.trainings.mesh_log_callback.ImageConditionalASLDiffuserLogger 165 | params: 166 | step_frequency: 100 # 10000 167 | num_samples: 1 168 | sample_times: 1 169 | mean: *mean 170 | std: *std 171 | bounds: [-1.01, -1.01, -1.01, 1.01, 1.01, 1.01] 172 | octree_depth: 8 173 | num_chunks: 50000 174 | mc_level: 0.0 175 | 176 | file_loggers: 177 | target: hy3dshape.utils.trainings.mesh_log_callback.ImageConditionalFixASLDiffuserLogger 178 | params: 179 | step_frequency: 50 # 5000 180 | test_data_path: "tools/mini_testset/images.json" 181 | -------------------------------------------------------------------------------- /hy3dshape/configs/hunyuandit-mini-overfitting-flowmatching-dinog518-bf16-lr1e4-512.yaml: -------------------------------------------------------------------------------- 1 | name: "DiT: Flux large flowmatching; VAE: 1024 token length; ImageEncoder: DINO Giant; ImageSize: 518" 2 | 3 | training: 4 | steps: 10_0000_0000 5 | use_amp: true 6 | amp_type: "bf16" 7 | base_lr: 1e-4 8 | gradient_clip_val: 1.0 9 | gradient_clip_algorithm: "norm" 10 | every_n_train_steps: 2000 # 5000 11 | val_check_interval: 50 # 4096 12 | limit_val_batches: 16 13 | 14 | dataset: 15 | target: hy3dshape.data.dit_asl.AlignedShapeLatentModule 16 | params: 17 | #! Base setting 18 | batch_size: 2 19 | num_workers: 8 20 | val_num_workers: 4 21 | 22 | # Data 23 | train_data_list: tools/mini_trainset/preprocessed 24 | val_data_list: tools/mini_trainset/preprocessed 25 | 26 | #! Image loading 27 | cond_stage_key: "image" # image / text / image_text 28 | image_size: 518 29 | mean: &mean [0.5, 0.5, 0.5] 30 | std: &std [0.5, 0.5, 0.5] 31 | 32 | #! Point cloud sampling 33 | pc_size: &pc_size 81920 34 | pc_sharpedge_size: &pc_sharpedge_size 0 35 | sharpedge_label: &sharpedge_label true 36 | return_normal: true 37 | 38 | #! Augmentation 39 | padding: true 40 | 41 | model: 42 | target: hy3dshape.models.diffusion.flow_matching_sit.Diffuser 43 | params: 44 | first_stage_key: "surface" 45 | cond_stage_key: "image" 46 | scale_by_std: false 47 | z_scale_factor: &z_scale_factor 1.0039506158752403 48 | torch_compile: false 49 | 50 | # ema_config: 51 | # ema_model: LitEma 52 | # ema_decay: 0.999 53 | # ema_inference: false 54 | 55 | first_stage_config: 56 | target: hy3dshape.models.autoencoders.ShapeVAE 57 | from_pretrained: tencent/Hunyuan3D-2.1 58 | params: 59 | num_latents: &num_latents 512 60 | embed_dim: 64 61 | num_freqs: 8 62 | include_pi: false 63 | heads: 16 64 | width: 1024 65 | num_encoder_layers: 8 66 | num_decoder_layers: 16 67 | qkv_bias: false 68 | qk_norm: true 69 | scale_factor: *z_scale_factor 70 | geo_decoder_mlp_expand_ratio: 4 71 | geo_decoder_downsample_ratio: 1 72 | geo_decoder_ln_post: true 73 | point_feats: 4 74 | pc_size: *pc_size 75 | pc_sharpedge_size: *pc_sharpedge_size 76 | 77 | cond_stage_config: 78 | target: hy3dshape.models.conditioner.SingleImageEncoder 79 | params: 80 | main_image_encoder: 81 | type: DinoImageEncoder # dino large 82 | kwargs: 83 | config: 84 | attention_probs_dropout_prob: 0.0 85 | drop_path_rate: 0.0 86 | hidden_act: gelu 87 | hidden_dropout_prob: 0.0 88 | hidden_size: 1024 89 | image_size: 518 90 | initializer_range: 0.02 91 | layer_norm_eps: 1.e-6 92 | layerscale_value: 1.0 93 | mlp_ratio: 4 94 | model_type: dinov2 95 | num_attention_heads: 16 96 | num_channels: 3 97 | num_hidden_layers: 24 98 | patch_size: 14 99 | qkv_bias: true 100 | torch_dtype: float32 101 | use_swiglu_ffn: false 102 | image_size: 518 103 | use_cls_token: true 104 | 105 | 106 | denoiser_cfg: 107 | target: hy3dshape.models.denoisers.hunyuandit.HunYuanDiTPlain 108 | params: 109 | input_size: *num_latents 110 | in_channels: 64 111 | hidden_size: 768 112 | context_dim: 1024 113 | depth: 6 114 | num_heads: 12 115 | qk_norm: true 116 | text_len: 1370 117 | with_decoupled_ca: false 118 | use_attention_pooling: false 119 | qk_norm_type: 'rms' 120 | qkv_bias: false 121 | use_pos_emb: false 122 | num_moe_layers: 3 123 | num_experts: 4 124 | moe_top_k: 2 125 | 126 | scheduler_cfg: 127 | transport: 128 | target: hy3dshape.models.diffusion.transport.create_transport 129 | params: 130 | path_type: Linear 131 | prediction: velocity 132 | sampler: 133 | target: hy3dshape.models.diffusion.transport.Sampler 134 | params: {} 135 | ode_params: 136 | sampling_method: euler # dopri5 ... 137 | num_steps: &num_steps 50 138 | 139 | optimizer_cfg: 140 | optimizer: 141 | target: torch.optim.AdamW 142 | params: 143 | betas: [0.9, 0.99] 144 | eps: 1.e-6 145 | weight_decay: 1.e-2 146 | 147 | scheduler: 148 | target: hy3dshape.utils.trainings.lr_scheduler.LambdaWarmUpCosineFactorScheduler 149 | params: 150 | warm_up_steps: 50 # 5000 151 | f_start: 1.e-6 152 | f_min: 1.e-3 153 | f_max: 1.0 154 | 155 | pipeline_cfg: 156 | target: hy3dshape.pipelines.Hunyuan3DDiTFlowMatchingPipeline 157 | 158 | image_processor_cfg: 159 | target: hy3dshape.preprocessors.ImageProcessorV2 160 | params: {} 161 | 162 | callbacks: 163 | logger: 164 | target: hy3dshape.utils.trainings.mesh_log_callback.ImageConditionalASLDiffuserLogger 165 | params: 166 | step_frequency: 100 # 10000 167 | num_samples: 1 168 | sample_times: 1 169 | mean: *mean 170 | std: *std 171 | bounds: [-1.01, -1.01, -1.01, 1.01, 1.01, 1.01] 172 | octree_depth: 8 173 | num_chunks: 50000 174 | mc_level: 0.0 175 | 176 | file_loggers: 177 | target: hy3dshape.utils.trainings.mesh_log_callback.ImageConditionalFixASLDiffuserLogger 178 | params: 179 | step_frequency: 50 # 5000 180 | test_data_path: "tools/mini_testset/images.json" 181 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 🌀 ComfyUI Wrapper for [Hunyuan3D-2.1](https://github.com/Tencent-Hunyuan/Hunyuan3D-2.1) 2 | 3 | > **ComfyUI integration** for Tencent's powerful **Hunyuan3D-2.1** model. Supports textured 3D generation with optional high-quality UV mapping. 4 | 5 | --- 6 | 7 | ## 📦 Repository & Models 8 | 9 | * **GitHub:** [Tencent-Hunyuan/Hunyuan3D-2.1](https://github.com/Tencent-Hunyuan/Hunyuan3D-2.1) 10 | * **Model Weights (HuggingFace):** 11 | 👉 [Main page](https://huggingface.co/tencent/Hunyuan3D-2.1/tree/main) 12 | 13 | ### 🔧 Required Checkpoints 14 | 15 | Place the following checkpoints into the corresponding folders under your `ComfyUI` directory: 16 | 17 | ``` 18 | ComfyUI/ 19 | ├── models/ 20 | │ ├── diffusion_models/ 21 | │ │ └── hunyuan3d-dit-v2-1.ckpt 22 | │ ├── vae/ 23 | │ │ └── hunyuan3d-vae-v2-1.ckpt 24 | ``` 25 | 26 | --- 27 | 28 | ## ⚙️ Installation Guide 29 | 30 | > Tested on **Windows 11** with **Python 3.12** and **Torch >= 2.6.0 + cu126**. Compatible with the latest ComfyUI Portable release. 31 | 32 | ### 1. Install Python Dependencies 33 | 34 | For a standard Python environment: 35 | 36 | ```bash 37 | python -m pip install -r ComfyUI/custom_nodes/ComfyUI-Hunyuan3DWrapper/requirements.txt 38 | ``` 39 | 40 | For **ComfyUI Portable**: 41 | 42 | ```bash 43 | python_embeded\python.exe -m pip install -r ComfyUI\custom_nodes\ComfyUI-Hunyuan3d-2-1\requirements.txt 44 | ``` 45 | 46 | --- 47 | 48 | ### 2. Install or Compile Texture Generation Modules 49 | 50 | Two critical C++ extensions need to be installed: the **custom rasterizer** and the **differentiable renderer**. 51 | 52 | #### Option A: Use Precompiled Wheels (Recommended) 53 | 54 | #### Custom Rasterizer 55 | 56 | You will find precompiled wheels in `hy3dpain\custom_rasterizer\dist` folder 57 | 58 | For standard Python: 59 | 60 | Example, if you are on Python 3.12: 61 | 62 | ```bash 63 | pip install custom_rasterizer-0.1-cp312-cp312-win_amd64.whl 64 | ``` 65 | 66 | For ComfyUI Portable: 67 | 68 | ```bash 69 | python_embeded\python.exe -m pip install ComfyUI\custom_nodes\ComfyUI-Hunyuan3d-2-1\hy3dpaint\custom_rasterizer\dist\custom_rasterizer-0.1-cp312-cp312-win_amd64.whl 70 | ``` 71 | 72 | #### Differentiable Renderer 73 | 74 | You will find precompiled wheels in `hy3dpaint\DifferentiableRenderer\dist` folder 75 | 76 | For standard Python: 77 | 78 | Example, if you are on Python 3.12: 79 | 80 | ```bash 81 | pip install mesh_inpaint_processor-0.0.0-cp312-cp312-win_amd64.whl.whl 82 | ``` 83 | 84 | For ComfyUI Portable: 85 | 86 | ```bash 87 | python_embeded\python.exe -m pip install ComfyUI\custom_nodes\ComfyUI-Hunyuan3d-2-1\hy3dpaint\DifferentiableRenderer\dist\mesh_inpaint_processor-0.0.0-cp312-cp312-win_amd64.whl.whl 88 | ``` 89 | 90 | --- 91 | 92 | #### Option B: Manual Compilation (for advanced users) 93 | 94 | ```bash 95 | # Compile custom rasterizer 96 | cd ComfyUI/custom_nodes/ComfyUI-Hunyuan3d-2-1/hy3dpaint/custom_rasterizer 97 | python setup.py install 98 | 99 | # Compile differentiable renderer 100 | cd ../DifferentiableRenderer 101 | python setup.py install 102 | ``` 103 | 104 | --- 105 | 106 | ## 🩻 Optional: Fix UV Wrapping for High Poly Meshes (Patched Xatlas) 107 | 108 | This upgrade improves UV unwrapping stability for complex meshes. 109 | 110 | ```bash 111 | # Step 1: Uninstall existing xatlas 112 | python_embeded\python.exe -m pip uninstall xatlas 113 | 114 | # Step 2: Clone updated xatlas-python wrapper 115 | cd ComfyUI_windows_portable 116 | git clone --recursive https://github.com/mworchel/xatlas-python.git 117 | 118 | # Step 3: Replace internal xatlas source 119 | cd xatlas-python\extern 120 | del /s /q xatlas 121 | git clone --recursive https://github.com/jpcy/xatlas 122 | 123 | # Step 4: Patch source file 124 | # In xatlas-python/extern/xatlas/source/xatlas/xatlas.cpp: 125 | Line 6774: change `#if 0` → `//#if 0` 126 | Line 6778: change `#endif` → `//#endif` 127 | 128 | # Step 5: Install patched xatlas wrapper 129 | cd ../../.. 130 | python_embeded\python.exe -m pip install .\xatlas-python\ 131 | ``` 132 | 133 | ```python 134 | python_embeded\python.exe -m pip uninstall -y xatlas; ` 135 | cd ComfyUI_windows_portable; ` 136 | if (Test-Path xatlas-python) { Remove-Item xatlas-python -Recurse -Force }; ` 137 | git clone --recursive https://github.com/mworchel/xatlas-python.git; ` 138 | cd xatlas-python\extern; ` 139 | if (Test-Path xatlas) { Remove-Item xatlas -Recurse -Force }; ` 140 | git clone --recursive https://github.com/jpcy/xatlas; ` 141 | (Get-Content .\xatlas\source\xatlas\xatlas.cpp) -replace '#if 0', '//#if 0' -replace '#endif', '//#endif' | Set-Content .\xatlas\source\xatlas\xatlas.cpp; ` 142 | cd ..\..\..; ` 143 | python_embeded\python.exe -m pip install .\xatlas-python\ 144 | ``` 145 | 146 | --- 147 | 148 | ## 📂 Directory Overview 149 | 150 | ``` 151 | ComfyUI/ 152 | ├── custom_nodes/ 153 | │ └── ComfyUI-Hunyuan3d-2-1/ 154 | │ ├── hy3dpaint/ 155 | │ │ ├── custom_rasterizer/ # Custom rasterizer module 156 | │ │ │ ├── setup.py 157 | │ │ │ └── dist/ # Precompiled wheels 158 | │ │ ├── DifferentiableRenderer/ # Differentiable renderer 159 | │ │ │ ├── setup.py 160 | │ │ │ └── dist/ # Precompiled wheels 161 | ├── models/ 162 | │ ├── diffusion_models/ 163 | │ │ └── [hunyuan3d-dit-v2-1.ckpt](https://huggingface.co/tencent/Hunyuan3D-2.1/tree/main/hunyuan3d-dit-v2-1) 164 | │ └── vae/ 165 | │ └── [hunyuan3d-vae-v2-1.ckpt](https://huggingface.co/tencent/Hunyuan3D-2.1/tree/main/hunyuan3d-vae-v2-1) 166 | ├── xatlas-python/ # Patched UV unwrapper (optional) 167 | │ └── extern/ 168 | │ └── xatlas/ 169 | ``` 170 | 171 | --- 172 | 173 | ## 🙏 Acknowledgements 174 | 175 | * **[kijai](https://github.com/kijai/ComfyUI-Hunyuan3DWrapper)** — Original wrapper developer for Hunyuan3D v2.0 176 | * TrueMike, Agee, Palindar, and the vibrant Discord community 177 | * Tencent team for the incredible [Hunyuan3D-2.1](https://github.com/Tencent-Hunyuan/Hunyuan3D-2.1) model 178 | -------------------------------------------------------------------------------- /hy3dshape/hy3dshape/data/utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Copyright (c) 2017-2021 NVIDIA CORPORATION. All rights reserved. 4 | # This file is part of the WebDataset library. 5 | # See the LICENSE file for licensing terms (BSD-style). 6 | 7 | 8 | """Miscellaneous utility functions.""" 9 | 10 | import importlib 11 | import itertools as itt 12 | import os 13 | import re 14 | import sys 15 | from typing import Any, Callable, Iterator, Union 16 | import torch 17 | import numpy as np 18 | 19 | 20 | def make_seed(*args): 21 | seed = 0 22 | for arg in args: 23 | seed = (seed * 31 + hash(arg)) & 0x7FFFFFFF 24 | return seed 25 | 26 | 27 | class PipelineStage: 28 | def invoke(self, *args, **kw): 29 | raise NotImplementedError 30 | 31 | 32 | def identity(x: Any) -> Any: 33 | """Return the argument as is.""" 34 | return x 35 | 36 | 37 | def safe_eval(s: str, expr: str = "{}"): 38 | """Evaluate the given expression more safely.""" 39 | if re.sub("[^A-Za-z0-9_]", "", s) != s: 40 | raise ValueError(f"safe_eval: illegal characters in: '{s}'") 41 | return eval(expr.format(s)) 42 | 43 | 44 | def lookup_sym(sym: str, modules: list): 45 | """Look up a symbol in a list of modules.""" 46 | for mname in modules: 47 | module = importlib.import_module(mname, package="webdataset") 48 | result = getattr(module, sym, None) 49 | if result is not None: 50 | return result 51 | return None 52 | 53 | 54 | def repeatedly0( 55 | loader: Iterator, nepochs: int = sys.maxsize, nbatches: int = sys.maxsize 56 | ): 57 | """Repeatedly returns batches from a DataLoader.""" 58 | for _ in range(nepochs): 59 | yield from itt.islice(loader, nbatches) 60 | 61 | 62 | def guess_batchsize(batch: Union[tuple, list]): 63 | """Guess the batch size by looking at the length of the first element in a tuple.""" 64 | return len(batch[0]) 65 | 66 | 67 | def repeatedly( 68 | source: Iterator, 69 | nepochs: int = None, 70 | nbatches: int = None, 71 | nsamples: int = None, 72 | batchsize: Callable[..., int] = guess_batchsize, 73 | ): 74 | """Repeatedly yield samples from an iterator.""" 75 | epoch = 0 76 | batch = 0 77 | total = 0 78 | while True: 79 | for sample in source: 80 | yield sample 81 | batch += 1 82 | if nbatches is not None and batch >= nbatches: 83 | return 84 | if nsamples is not None: 85 | total += guess_batchsize(sample) 86 | if total >= nsamples: 87 | return 88 | epoch += 1 89 | if nepochs is not None and epoch >= nepochs: 90 | return 91 | 92 | 93 | def pytorch_worker_info(group=None): # sourcery skip: use-contextlib-suppress 94 | """Return node and worker info for PyTorch and some distributed environments.""" 95 | rank = 0 96 | world_size = 1 97 | worker = 0 98 | num_workers = 1 99 | if "RANK" in os.environ and "WORLD_SIZE" in os.environ: 100 | rank = int(os.environ["RANK"]) 101 | world_size = int(os.environ["WORLD_SIZE"]) 102 | else: 103 | try: 104 | import torch.distributed 105 | 106 | if torch.distributed.is_available() and torch.distributed.is_initialized(): 107 | group = group or torch.distributed.group.WORLD 108 | rank = torch.distributed.get_rank(group=group) 109 | world_size = torch.distributed.get_world_size(group=group) 110 | except ModuleNotFoundError: 111 | pass 112 | if "WORKER" in os.environ and "NUM_WORKERS" in os.environ: 113 | worker = int(os.environ["WORKER"]) 114 | num_workers = int(os.environ["NUM_WORKERS"]) 115 | else: 116 | try: 117 | import torch.utils.data 118 | 119 | worker_info = torch.utils.data.get_worker_info() 120 | if worker_info is not None: 121 | worker = worker_info.id 122 | num_workers = worker_info.num_workers 123 | except ModuleNotFoundError: 124 | pass 125 | 126 | return rank, world_size, worker, num_workers 127 | 128 | 129 | def pytorch_worker_seed(group=None): 130 | """Compute a distinct, deterministic RNG seed for each worker and node.""" 131 | rank, world_size, worker, num_workers = pytorch_worker_info(group=group) 132 | return rank * 1000 + worker 133 | 134 | def worker_init_fn(_): 135 | worker_info = torch.utils.data.get_worker_info() 136 | worker_id = worker_info.id 137 | 138 | # dataset = worker_info.dataset 139 | # split_size = dataset.num_records // worker_info.num_workers 140 | # # reset num_records to the true number to retain reliable length information 141 | # dataset.sample_ids = dataset.valid_ids[worker_id * split_size:(worker_id + 1) * split_size] 142 | # current_id = np.random.choice(len(np.random.get_state()[1]), 1) 143 | # return np.random.seed(np.random.get_state()[1][current_id] + worker_id) 144 | 145 | return np.random.seed(np.random.get_state()[1][0] + worker_id) 146 | 147 | 148 | def collation_fn(samples, combine_tensors=True, combine_scalars=True): 149 | """ 150 | 151 | Args: 152 | samples (list[dict]): 153 | combine_tensors: 154 | combine_scalars: 155 | 156 | Returns: 157 | 158 | """ 159 | 160 | result = {} 161 | 162 | keys = samples[0].keys() 163 | 164 | for key in keys: 165 | result[key] = [] 166 | 167 | for sample in samples: 168 | for key in keys: 169 | val = sample[key] 170 | result[key].append(val) 171 | 172 | for key in keys: 173 | val_list = result[key] 174 | if isinstance(val_list[0], (int, float)): 175 | if combine_scalars: 176 | result[key] = np.array(result[key]) 177 | 178 | elif isinstance(val_list[0], torch.Tensor): 179 | if combine_tensors: 180 | result[key] = torch.stack(val_list) 181 | 182 | elif isinstance(val_list[0], np.ndarray): 183 | if combine_tensors: 184 | result[key] = np.stack(val_list) 185 | 186 | return result 187 | -------------------------------------------------------------------------------- /hy3dpaint/utils/multiview_utils.py: -------------------------------------------------------------------------------- 1 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT 2 | # except for the third-party components listed below. 3 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined 4 | # in the repsective licenses of these third-party components. 5 | # Users must comply with all terms and conditions of original licenses of these third-party 6 | # components and must ensure that the usage of the third party components adheres to 7 | # all relevant laws and regulations. 8 | 9 | # For avoidance of doubts, Hunyuan 3D means the large language models and 10 | # their software and algorithms, including trained model weights, parameters (including 11 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code, 12 | # fine-tuning enabling code and other elements of the foregoing made publicly available 13 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT. 14 | 15 | import os 16 | import torch 17 | import random 18 | import numpy as np 19 | from PIL import Image 20 | from typing import List 21 | import huggingface_hub 22 | from omegaconf import OmegaConf 23 | from diffusers import DiffusionPipeline 24 | from diffusers import EulerAncestralDiscreteScheduler, DDIMScheduler, UniPCMultistepScheduler 25 | from ..hunyuanpaintpbr.pipeline import HunyuanPaintPipeline 26 | 27 | 28 | class multiviewDiffusionNet: 29 | def __init__(self, config) -> None: 30 | self.device = config.device 31 | 32 | cfg_path = config.multiview_cfg_path 33 | custom_pipeline = config.custom_pipeline 34 | cfg = OmegaConf.load(cfg_path) 35 | self.cfg = cfg 36 | self.mode = self.cfg.model.params.stable_diffusion_config.custom_pipeline[2:] 37 | 38 | model_path = huggingface_hub.snapshot_download( 39 | repo_id=config.multiview_pretrained_path, 40 | allow_patterns=["hunyuan3d-paintpbr-v2-1/*"], 41 | ) 42 | 43 | model_path = os.path.join(model_path, "hunyuan3d-paintpbr-v2-1") 44 | 45 | pipeline = HunyuanPaintPipeline.from_pretrained( 46 | model_path, 47 | torch_dtype=torch.float16 48 | ) 49 | 50 | pipeline.scheduler = EulerAncestralDiscreteScheduler.from_config(pipeline.scheduler.config, timestep_spacing="trailing") 51 | pipeline.set_progress_bar_config(disable=False) 52 | pipeline.eval() 53 | setattr(pipeline, "view_size", cfg.model.params.get("view_size", 320)) 54 | pipeline.enable_model_cpu_offload() 55 | self.pipeline = pipeline.to(self.device) 56 | self.pipeline.enable_vae_slicing() 57 | self.pipeline.enable_vae_tiling() 58 | 59 | if hasattr(self.pipeline.unet, "use_dino") and self.pipeline.unet.use_dino: 60 | from ..hunyuanpaintpbr.unet.modules import Dino_v2 61 | self.dino_v2 = Dino_v2(config.dino_ckpt_path).to(torch.float16) 62 | self.dino_v2 = self.dino_v2.to(self.device) 63 | 64 | def seed_everything(self, seed): 65 | random.seed(seed) 66 | np.random.seed(seed) 67 | torch.manual_seed(seed) 68 | os.environ["PL_GLOBAL_SEED"] = str(seed) 69 | 70 | @torch.no_grad() 71 | def __call__(self, images, conditions, prompt=None, custom_view_size=None, resize_input=False, num_steps=10, guidance_scale=3.0, seed=0): 72 | pils = self.forward_one( 73 | images, conditions, prompt=prompt, custom_view_size=custom_view_size, resize_input=resize_input, num_steps=num_steps, guidance_scale=guidance_scale, seed=seed 74 | ) 75 | return pils 76 | 77 | def forward_one(self, input_images, control_images, prompt=None, custom_view_size=None, resize_input=False, num_steps=10, guidance_scale=3.0, seed=0): 78 | self.seed_everything(seed) 79 | custom_view_size = custom_view_size if custom_view_size is not None else self.pipeline.view_size 80 | 81 | if not isinstance(input_images, List): 82 | input_images = [input_images] 83 | 84 | if not resize_input: 85 | input_images = [ 86 | input_image.resize((self.pipeline.view_size, self.pipeline.view_size)) for input_image in input_images 87 | ] 88 | else: 89 | input_images = [input_image.resize((custom_view_size, custom_view_size)) for input_image in input_images] 90 | 91 | for i in range(len(control_images)): 92 | control_images[i] = control_images[i].resize((custom_view_size, custom_view_size)) 93 | if control_images[i].mode == "L": 94 | control_images[i] = control_images[i].point(lambda x: 255 if x > 1 else 0, mode="1") 95 | kwargs = dict(generator=torch.Generator(device=self.pipeline.device).manual_seed(0)) 96 | 97 | num_view = len(control_images) // 2 98 | normal_image = [[control_images[i] for i in range(num_view)]] 99 | position_image = [[control_images[i + num_view] for i in range(num_view)]] 100 | 101 | kwargs["width"] = custom_view_size 102 | kwargs["height"] = custom_view_size 103 | kwargs["num_in_batch"] = num_view 104 | kwargs["images_normal"] = normal_image 105 | kwargs["images_position"] = position_image 106 | 107 | if hasattr(self.pipeline.unet, "use_dino") and self.pipeline.unet.use_dino: 108 | dino_hidden_states = self.dino_v2(input_images[0]) 109 | kwargs["dino_hidden_states"] = dino_hidden_states 110 | 111 | sync_condition = None 112 | 113 | infer_steps_dict = { 114 | "EulerAncestralDiscreteScheduler": 10, 115 | "UniPCMultistepScheduler": 10, 116 | "DDIMScheduler": 10, 117 | "ShiftSNRScheduler": 10, 118 | } 119 | 120 | mvd_image = self.pipeline( 121 | input_images[0:1], 122 | num_inference_steps=num_steps, 123 | prompt=prompt, 124 | sync_condition=sync_condition, 125 | guidance_scale=guidance_scale, 126 | **kwargs, 127 | ).images 128 | 129 | if "pbr" in self.mode: 130 | mvd_image = {"albedo": mvd_image[:num_view], "mr": mvd_image[num_view:]} 131 | # mvd_image = {'albedo':mvd_image[:num_view]} 132 | else: 133 | mvd_image = {"hdr": mvd_image} 134 | 135 | return mvd_image 136 | -------------------------------------------------------------------------------- /hy3dpaint/utils/pipeline_utils.py: -------------------------------------------------------------------------------- 1 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT 2 | # except for the third-party components listed below. 3 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined 4 | # in the repsective licenses of these third-party components. 5 | # Users must comply with all terms and conditions of original licenses of these third-party 6 | # components and must ensure that the usage of the third party components adheres to 7 | # all relevant laws and regulations. 8 | 9 | # For avoidance of doubts, Hunyuan 3D means the large language models and 10 | # their software and algorithms, including trained model weights, parameters (including 11 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code, 12 | # fine-tuning enabling code and other elements of the foregoing made publicly available 13 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT. 14 | 15 | import torch 16 | import numpy as np 17 | 18 | 19 | class ViewProcessor: 20 | def __init__(self, config, render): 21 | self.config = config 22 | self.render = render 23 | 24 | def render_normal_multiview(self, camera_elevs, camera_azims, use_abs_coor=True): 25 | normal_maps = [] 26 | for elev, azim in zip(camera_elevs, camera_azims): 27 | normal_map = self.render.render_normal(elev, azim, use_abs_coor=use_abs_coor, return_type="pl") 28 | normal_maps.append(normal_map) 29 | 30 | return normal_maps 31 | 32 | def render_position_multiview(self, camera_elevs, camera_azims): 33 | position_maps = [] 34 | for elev, azim in zip(camera_elevs, camera_azims): 35 | position_map = self.render.render_position(elev, azim, return_type="pl") 36 | position_maps.append(position_map) 37 | 38 | return position_maps 39 | 40 | def bake_view_selection( 41 | self, candidate_camera_elevs, candidate_camera_azims, candidate_view_weights, max_selected_view_num 42 | ): 43 | 44 | original_resolution = self.render.default_resolution 45 | self.render.set_default_render_resolution(1024) 46 | 47 | selected_camera_elevs = [] 48 | selected_camera_azims = [] 49 | selected_view_weights = [] 50 | selected_alpha_maps = [] 51 | viewed_tri_idxs = [] 52 | viewed_masks = [] 53 | 54 | # 计算每个三角片的面积 55 | face_areas = self.render.get_face_areas(from_one_index=True) 56 | total_area = face_areas.sum() 57 | face_area_ratios = face_areas / total_area 58 | 59 | candidate_view_num = len(candidate_camera_elevs) 60 | self.render.set_boundary_unreliable_scale(2) 61 | 62 | for elev, azim in zip(candidate_camera_elevs, candidate_camera_azims): 63 | viewed_tri_idx = self.render.render_alpha(elev, azim, return_type="np") 64 | viewed_tri_idxs.append(set(np.unique(viewed_tri_idx.flatten()))) 65 | viewed_masks.append(viewed_tri_idx[0, :, :, 0] > 0) 66 | 67 | is_selected = [False for _ in range(candidate_view_num)] 68 | total_viewed_tri_idxs = set() 69 | total_viewed_area = 0.0 70 | 71 | for idx in range(6): 72 | selected_camera_elevs.append(candidate_camera_elevs[idx]) 73 | selected_camera_azims.append(candidate_camera_azims[idx]) 74 | selected_view_weights.append(candidate_view_weights[idx]) 75 | selected_alpha_maps.append(viewed_masks[idx]) 76 | is_selected[idx] = True 77 | total_viewed_tri_idxs.update(viewed_tri_idxs[idx]) 78 | 79 | total_viewed_area = face_area_ratios[list(total_viewed_tri_idxs)].sum() 80 | for iter in range(max_selected_view_num - len(selected_view_weights)): 81 | max_inc = 0 82 | max_idx = -1 83 | 84 | for idx, (elev, azim, weight) in enumerate( 85 | zip(candidate_camera_elevs, candidate_camera_azims, candidate_view_weights) 86 | ): 87 | if is_selected[idx]: 88 | continue 89 | new_tri_idxs = viewed_tri_idxs[idx] - total_viewed_tri_idxs 90 | new_inc_area = face_area_ratios[list(new_tri_idxs)].sum() 91 | 92 | if new_inc_area > max_inc: 93 | max_inc = new_inc_area 94 | max_idx = idx 95 | 96 | if max_inc > 0.0001: 97 | is_selected[max_idx] = True 98 | selected_camera_elevs.append(candidate_camera_elevs[max_idx]) 99 | selected_camera_azims.append(candidate_camera_azims[max_idx]) 100 | selected_view_weights.append(candidate_view_weights[max_idx]) 101 | selected_alpha_maps.append(viewed_masks[max_idx]) 102 | total_viewed_tri_idxs = total_viewed_tri_idxs.union(viewed_tri_idxs[max_idx]) 103 | total_viewed_area += max_inc 104 | else: 105 | break 106 | 107 | self.render.set_default_render_resolution(original_resolution) 108 | 109 | return selected_camera_elevs, selected_camera_azims, selected_view_weights 110 | 111 | def bake_from_multiview(self, views, camera_elevs, camera_azims, view_weights): 112 | project_textures, project_weighted_cos_maps = [], [] 113 | project_boundary_maps = [] 114 | 115 | for view, camera_elev, camera_azim, weight in zip(views, camera_elevs, camera_azims, view_weights): 116 | project_texture, project_cos_map, project_boundary_map = self.render.back_project( 117 | view, camera_elev, camera_azim 118 | ) 119 | project_cos_map = weight * (project_cos_map**self.config.bake_exp) 120 | project_textures.append(project_texture) 121 | project_weighted_cos_maps.append(project_cos_map) 122 | project_boundary_maps.append(project_boundary_map) 123 | texture, ori_trust_map = self.render.fast_bake_texture(project_textures, project_weighted_cos_maps) 124 | return texture, ori_trust_map > 1e-8 125 | 126 | def texture_inpaint(self, texture, mask, vertex_inpaint=True, method="NS", default=None, ): 127 | if default is not None: 128 | mask = mask.astype(bool) 129 | inpaint_value = torch.tensor(default, dtype=texture.dtype, device=texture.device) 130 | texture[~mask] = inpaint_value 131 | else: 132 | texture_np = self.render.uv_inpaint(texture, mask, vertex_inpaint, method) 133 | texture = torch.tensor(texture_np / 255).float().to(texture.device) 134 | 135 | return texture 136 | -------------------------------------------------------------------------------- /hy3dpaint/src/data/dataloader/objaverse_loader_forTexturePBR.py: -------------------------------------------------------------------------------- 1 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT 2 | # except for the third-party components listed below. 3 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined 4 | # in the repsective licenses of these third-party components. 5 | # Users must comply with all terms and conditions of original licenses of these third-party 6 | # components and must ensure that the usage of the third party components adheres to 7 | # all relevant laws and regulations. 8 | 9 | # For avoidance of doubts, Hunyuan 3D means the large language models and 10 | # their software and algorithms, including trained model weights, parameters (including 11 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code, 12 | # fine-tuning enabling code and other elements of the foregoing made publicly available 13 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT. 14 | 15 | import os 16 | import time 17 | import glob 18 | import json 19 | import random 20 | import numpy as np 21 | import torch 22 | from .loader_util import BaseDataset 23 | 24 | 25 | class TextureDataset(BaseDataset): 26 | 27 | def __init__( 28 | self, json_path, num_view=6, image_size=512, lighting_suffix_pool=["light_PL", "light_AL", "light_ENVMAP"] 29 | ): 30 | self.data = list() 31 | self.num_view = num_view 32 | self.image_size = image_size 33 | self.lighting_suffix_pool = lighting_suffix_pool 34 | if isinstance(json_path, str): 35 | json_path = [json_path] 36 | for jp in json_path: 37 | with open(jp) as f: 38 | self.data.extend(json.load(f)) 39 | print("============= length of dataset %d =============" % len(self.data)) 40 | 41 | def __getitem__(self, index): 42 | try_sleep_interval = 20 43 | total_try_num = 100 44 | cnt = try_sleep_interval * total_try_num 45 | # try: 46 | images_ref = list() 47 | images_albedo = list() 48 | images_mr = list() 49 | images_normal = list() 50 | images_position = list() 51 | bg_white = [1.0, 1.0, 1.0] 52 | bg_black = [0.0, 0.0, 0.0] 53 | bg_gray = [127 / 255.0, 127 / 255.0, 127 / 255.0] 54 | dirx = self.data[index] 55 | 56 | condition_dict = {} 57 | 58 | # 6view 59 | fix_num_view = self.num_view 60 | available_views = [] 61 | for ext in ["*_albedo.png", "*_albedo.jpg", "*_albedo.jpeg"]: 62 | available_views.extend(glob.glob(os.path.join(dirx, "render_tex", ext))) 63 | cond_images = ( 64 | glob.glob(os.path.join(dirx, "render_cond", "*.png")) 65 | + glob.glob(os.path.join(dirx, "render_cond", "*.jpg")) 66 | + glob.glob(os.path.join(dirx, "render_cond", "*.jpeg")) 67 | ) 68 | 69 | # 确保有足够的样本 70 | if len(available_views) < fix_num_view: 71 | print( 72 | f"Warning: Only {len(available_views)} views available, but {fix_num_view} requested." 73 | "Using all available views." 74 | ) 75 | images_gen = available_views 76 | else: 77 | images_gen = random.sample(available_views, fix_num_view) 78 | 79 | if not cond_images: 80 | raise ValueError(f"No condition images found in {os.path.join(dirx, 'render_cond')}") 81 | ref_image_path = random.choice(cond_images) 82 | light_suffix = None 83 | for suffix in self.lighting_suffix_pool: 84 | if suffix in ref_image_path: 85 | light_suffix = suffix 86 | break 87 | if light_suffix is None: 88 | raise ValueError(f"light suffix not found in {ref_image_path}") 89 | ref_image_diff_light_path = random.choice( 90 | [ 91 | ref_image_path.replace(light_suffix, tar_suffix) 92 | for tar_suffix in self.lighting_suffix_pool 93 | if tar_suffix != light_suffix 94 | ] 95 | ) 96 | images_ref_paths = [ref_image_path, ref_image_diff_light_path] 97 | 98 | # Data aug 99 | bg_c_record = None 100 | for i, image_ref in enumerate(images_ref_paths): 101 | if random.random() < 0.6: 102 | bg_c = bg_gray 103 | else: 104 | if random.random() < 0.5: 105 | bg_c = bg_black 106 | else: 107 | bg_c = bg_white 108 | if i == 0: 109 | bg_c_record = bg_c 110 | image, alpha = self.load_image(image_ref, bg_c_record) 111 | image = self.augment_image(image, bg_c_record).float() 112 | images_ref.append(image) 113 | condition_dict["images_cond"] = torch.stack(images_ref, dim=0).float() 114 | 115 | for i, image_gen in enumerate(images_gen): 116 | images_albedo.append(self.augment_image(self.load_image(image_gen, bg_gray)[0], bg_gray)) 117 | images_mr.append( 118 | self.augment_image(self.load_image(image_gen.replace("_albedo", "_mr"), bg_gray)[0], bg_gray) 119 | ) 120 | images_normal.append( 121 | self.augment_image(self.load_image(image_gen.replace("_albedo", "_normal"), bg_gray)[0], bg_gray) 122 | ) 123 | images_position.append( 124 | self.augment_image(self.load_image(image_gen.replace("_albedo", "_pos"), bg_gray)[0], bg_gray) 125 | ) 126 | 127 | condition_dict["images_albedo"] = torch.stack(images_albedo, dim=0).float() 128 | condition_dict["images_mr"] = torch.stack(images_mr, dim=0).float() 129 | condition_dict["images_normal"] = torch.stack(images_normal, dim=0).float() 130 | condition_dict["images_position"] = torch.stack(images_position, dim=0).float() 131 | condition_dict["name"] = dirx # .replace('/', '_') 132 | return condition_dict # (N, 3, H, W) 133 | 134 | # except Exception as e: 135 | # print(e, self.data[index]) 136 | # # exit() 137 | 138 | 139 | if __name__ == "__main__": 140 | dataset = TextureDataset(json_path=["../../../train_examples/examples.json"]) 141 | print("images_cond", dataset[0]["images_cond"].shape) 142 | print("images_albedo", dataset[0]["images_albedo"].shape) 143 | print("images_mr", dataset[0]["images_mr"].shape) 144 | print("images_normal", dataset[0]["images_normal"].shape) 145 | print("images_position", dataset[0]["images_position"].shape) 146 | print("name", dataset[0]["name"]) 147 | -------------------------------------------------------------------------------- /hy3dshape/hy3dshape/preprocessors.py: -------------------------------------------------------------------------------- 1 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT 2 | # except for the third-party components listed below. 3 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined 4 | # in the repsective licenses of these third-party components. 5 | # Users must comply with all terms and conditions of original licenses of these third-party 6 | # components and must ensure that the usage of the third party components adheres to 7 | # all relevant laws and regulations. 8 | 9 | # For avoidance of doubts, Hunyuan 3D means the large language models and 10 | # their software and algorithms, including trained model weights, parameters (including 11 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code, 12 | # fine-tuning enabling code and other elements of the foregoing made publicly available 13 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT. 14 | 15 | import cv2 16 | import numpy as np 17 | import torch 18 | from PIL import Image 19 | from einops import repeat, rearrange 20 | 21 | 22 | def array_to_tensor(np_array): 23 | image_pt = torch.tensor(np_array).float() 24 | image_pt = image_pt / 255 * 2 - 1 25 | image_pt = rearrange(image_pt, "h w c -> c h w") 26 | image_pts = repeat(image_pt, "c h w -> b c h w", b=1) 27 | return image_pts 28 | 29 | 30 | class ImageProcessorV2: 31 | def __init__(self, size=512, border_ratio=None): 32 | self.size = size 33 | self.border_ratio = border_ratio 34 | 35 | @staticmethod 36 | def recenter(image, border_ratio: float = 0.2): 37 | """ recenter an image to leave some empty space at the image border. 38 | 39 | Args: 40 | image (ndarray): input image, float/uint8 [H, W, 3/4] 41 | mask (ndarray): alpha mask, bool [H, W] 42 | border_ratio (float, optional): border ratio, image will be resized to (1 - border_ratio). Defaults to 0.2. 43 | 44 | Returns: 45 | ndarray: output image, float/uint8 [H, W, 3/4] 46 | """ 47 | 48 | if image.shape[-1] == 4: 49 | mask = image[..., 3] 50 | else: 51 | mask = np.ones_like(image[..., 0:1]) * 255 52 | image = np.concatenate([image, mask], axis=-1) 53 | mask = mask[..., 0] 54 | 55 | H, W, C = image.shape 56 | 57 | size = max(H, W) 58 | result = np.zeros((size, size, C), dtype=np.uint8) 59 | 60 | coords = np.nonzero(mask) 61 | x_min, x_max = coords[0].min(), coords[0].max() 62 | y_min, y_max = coords[1].min(), coords[1].max() 63 | h = x_max - x_min 64 | w = y_max - y_min 65 | if h == 0 or w == 0: 66 | raise ValueError('input image is empty') 67 | desired_size = int(size * (1 - border_ratio)) 68 | scale = desired_size / max(h, w) 69 | h2 = int(h * scale) 70 | w2 = int(w * scale) 71 | x2_min = (size - h2) // 2 72 | x2_max = x2_min + h2 73 | 74 | y2_min = (size - w2) // 2 75 | y2_max = y2_min + w2 76 | 77 | result[x2_min:x2_max, y2_min:y2_max] = cv2.resize(image[x_min:x_max, y_min:y_max], (w2, h2), 78 | interpolation=cv2.INTER_AREA) 79 | 80 | bg = np.ones((result.shape[0], result.shape[1], 3), dtype=np.uint8) * 255 81 | 82 | mask = result[..., 3:].astype(np.float32) / 255 83 | result = result[..., :3] * mask + bg * (1 - mask) 84 | 85 | mask = mask * 255 86 | result = result.clip(0, 255).astype(np.uint8) 87 | mask = mask.clip(0, 255).astype(np.uint8) 88 | return result, mask 89 | 90 | def load_image(self, image, border_ratio=0.15, to_tensor=True): 91 | if isinstance(image, str): 92 | image = cv2.imread(image, cv2.IMREAD_UNCHANGED) 93 | image, mask = self.recenter(image, border_ratio=border_ratio) 94 | image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) 95 | elif isinstance(image, Image.Image): 96 | image = image.convert("RGBA") 97 | image = np.asarray(image) 98 | image, mask = self.recenter(image, border_ratio=border_ratio) 99 | 100 | image = cv2.resize(image, (self.size, self.size), interpolation=cv2.INTER_CUBIC) 101 | mask = cv2.resize(mask, (self.size, self.size), interpolation=cv2.INTER_NEAREST) 102 | mask = mask[..., np.newaxis] 103 | 104 | if to_tensor: 105 | image = array_to_tensor(image) 106 | mask = array_to_tensor(mask) 107 | return image, mask 108 | 109 | def __call__(self, image, border_ratio=0.15, to_tensor=True, **kwargs): 110 | if self.border_ratio is not None: 111 | border_ratio = self.border_ratio 112 | image, mask = self.load_image(image, border_ratio=border_ratio, to_tensor=to_tensor) 113 | outputs = { 114 | 'image': image, 115 | 'mask': mask 116 | } 117 | return outputs 118 | 119 | 120 | class MVImageProcessorV2(ImageProcessorV2): 121 | """ 122 | view order: front, front clockwise 90, back, front clockwise 270 123 | """ 124 | return_view_idx = True 125 | 126 | def __init__(self, size=512, border_ratio=None): 127 | super().__init__(size, border_ratio) 128 | self.view2idx = { 129 | 'front': 0, 130 | 'left': 1, 131 | 'back': 2, 132 | 'right': 3 133 | } 134 | 135 | def __call__(self, image_dict, border_ratio=0.15, to_tensor=True, **kwargs): 136 | if self.border_ratio is not None: 137 | border_ratio = self.border_ratio 138 | 139 | images = [] 140 | masks = [] 141 | view_idxs = [] 142 | for idx, (view_tag, image) in enumerate(image_dict.items()): 143 | view_idxs.append(self.view2idx[view_tag]) 144 | image, mask = self.load_image(image, border_ratio=border_ratio, to_tensor=to_tensor) 145 | images.append(image) 146 | masks.append(mask) 147 | 148 | zipped_lists = zip(view_idxs, images, masks) 149 | sorted_zipped_lists = sorted(zipped_lists) 150 | view_idxs, images, masks = zip(*sorted_zipped_lists) 151 | 152 | image = torch.cat(images, 0).unsqueeze(0) 153 | mask = torch.cat(masks, 0).unsqueeze(0) 154 | outputs = { 155 | 'image': image, 156 | 'mask': mask, 157 | 'view_idxs': view_idxs 158 | } 159 | return outputs 160 | 161 | 162 | IMAGE_PROCESSORS = { 163 | "v2": ImageProcessorV2, 164 | 'mv_v2': MVImageProcessorV2, 165 | } 166 | 167 | DEFAULT_IMAGEPROCESSOR = 'v2' 168 | -------------------------------------------------------------------------------- /hy3dpaint/custom_rasterizer/lib/custom_rasterizer_kernel/rasterizer_gpu.cu: -------------------------------------------------------------------------------- 1 | #include "rasterizer.h" 2 | 3 | __device__ void rasterizeTriangleGPU(int idx, float* vt0, float* vt1, float* vt2, int width, int height, INT64* zbuffer, float* d, float occlusion_truncation) { 4 | float x_min = std::min(vt0[0], std::min(vt1[0],vt2[0])); 5 | float x_max = std::max(vt0[0], std::max(vt1[0],vt2[0])); 6 | float y_min = std::min(vt0[1], std::min(vt1[1],vt2[1])); 7 | float y_max = std::max(vt0[1], std::max(vt1[1],vt2[1])); 8 | 9 | for (int px = x_min; px < x_max + 1; ++px) { 10 | if (px < 0 || px >= width) 11 | continue; 12 | for (int py = y_min; py < y_max + 1; ++py) { 13 | if (py < 0 || py >= height) 14 | continue; 15 | float vt[2] = {px + 0.5f, py + 0.5f}; 16 | float baryCentricCoordinate[3]; 17 | calculateBarycentricCoordinate(vt0, vt1, vt2, vt, baryCentricCoordinate); 18 | if (isBarycentricCoordInBounds(baryCentricCoordinate)) { 19 | int pixel = py * width + px; 20 | if (zbuffer == 0) { 21 | atomicExch(reinterpret_cast(&zbuffer[pixel]),static_cast(idx + 1)); 22 | continue; 23 | } 24 | float depth = baryCentricCoordinate[0] * vt0[2] + baryCentricCoordinate[1] * vt1[2] + baryCentricCoordinate[2] * vt2[2]; 25 | float depth_thres = 0; 26 | if (d) { 27 | depth_thres = d[pixel] * 0.49999f + 0.5f + occlusion_truncation; 28 | } 29 | 30 | int z_quantize = depth * (2<<17); 31 | INT64 token = (INT64)z_quantize * MAXINT + (INT64)(idx + 1); 32 | if (depth < depth_thres) 33 | continue; 34 | atomicMin(reinterpret_cast(&zbuffer[pixel]),static_cast(token)); 35 | } 36 | } 37 | } 38 | } 39 | 40 | __global__ void barycentricFromImgcoordGPU(float* V, int* F, int* findices, INT64* zbuffer, int width, int height, int num_vertices, int num_faces, 41 | float* barycentric_map) 42 | { 43 | int pix = blockIdx.x * blockDim.x + threadIdx.x; 44 | if (pix >= width * height) 45 | return; 46 | INT64 f = zbuffer[pix] % MAXINT; 47 | if (f == (MAXINT-1)) { 48 | findices[pix] = 0; 49 | barycentric_map[pix * 3] = 0; 50 | barycentric_map[pix * 3 + 1] = 0; 51 | barycentric_map[pix * 3 + 2] = 0; 52 | return; 53 | } 54 | findices[pix] = f; 55 | f -= 1; 56 | float barycentric[3] = {0, 0, 0}; 57 | if (f >= 0) { 58 | float vt[2] = {float(pix % width) + 0.5f, float(pix / width) + 0.5f}; 59 | float* vt0_ptr = V + (F[f * 3] * 4); 60 | float* vt1_ptr = V + (F[f * 3 + 1] * 4); 61 | float* vt2_ptr = V + (F[f * 3 + 2] * 4); 62 | 63 | float vt0[2] = {(vt0_ptr[0] / vt0_ptr[3] * 0.5f + 0.5f) * (width - 1) + 0.5f, (0.5f + 0.5f * vt0_ptr[1] / vt0_ptr[3]) * (height - 1) + 0.5f}; 64 | float vt1[2] = {(vt1_ptr[0] / vt1_ptr[3] * 0.5f + 0.5f) * (width - 1) + 0.5f, (0.5f + 0.5f * vt1_ptr[1] / vt1_ptr[3]) * (height - 1) + 0.5f}; 65 | float vt2[2] = {(vt2_ptr[0] / vt2_ptr[3] * 0.5f + 0.5f) * (width - 1) + 0.5f, (0.5f + 0.5f * vt2_ptr[1] / vt2_ptr[3]) * (height - 1) + 0.5f}; 66 | 67 | calculateBarycentricCoordinate(vt0, vt1, vt2, vt, barycentric); 68 | 69 | barycentric[0] = barycentric[0] / vt0_ptr[3]; 70 | barycentric[1] = barycentric[1] / vt1_ptr[3]; 71 | barycentric[2] = barycentric[2] / vt2_ptr[3]; 72 | float w = 1.0f / (barycentric[0] + barycentric[1] + barycentric[2]); 73 | barycentric[0] *= w; 74 | barycentric[1] *= w; 75 | barycentric[2] *= w; 76 | 77 | } 78 | barycentric_map[pix * 3] = barycentric[0]; 79 | barycentric_map[pix * 3 + 1] = barycentric[1]; 80 | barycentric_map[pix * 3 + 2] = barycentric[2]; 81 | } 82 | 83 | __global__ void rasterizeImagecoordsKernelGPU(float* V, int* F, float* d, INT64* zbuffer, float occlusion_trunc, int width, int height, int num_vertices, int num_faces) 84 | { 85 | int f = blockIdx.x * blockDim.x + threadIdx.x; 86 | if (f >= num_faces) 87 | return; 88 | 89 | float* vt0_ptr = V + (F[f * 3] * 4); 90 | float* vt1_ptr = V + (F[f * 3 + 1] * 4); 91 | float* vt2_ptr = V + (F[f * 3 + 2] * 4); 92 | 93 | float vt0[3] = {(vt0_ptr[0] / vt0_ptr[3] * 0.5f + 0.5f) * (width - 1) + 0.5f, (0.5f + 0.5f * vt0_ptr[1] / vt0_ptr[3]) * (height - 1) + 0.5f, vt0_ptr[2] / vt0_ptr[3] * 0.49999f + 0.5f}; 94 | float vt1[3] = {(vt1_ptr[0] / vt1_ptr[3] * 0.5f + 0.5f) * (width - 1) + 0.5f, (0.5f + 0.5f * vt1_ptr[1] / vt1_ptr[3]) * (height - 1) + 0.5f, vt1_ptr[2] / vt1_ptr[3] * 0.49999f + 0.5f}; 95 | float vt2[3] = {(vt2_ptr[0] / vt2_ptr[3] * 0.5f + 0.5f) * (width - 1) + 0.5f, (0.5f + 0.5f * vt2_ptr[1] / vt2_ptr[3]) * (height - 1) + 0.5f, vt2_ptr[2] / vt2_ptr[3] * 0.49999f + 0.5f}; 96 | 97 | rasterizeTriangleGPU(f, vt0, vt1, vt2, width, height, zbuffer, d, occlusion_trunc); 98 | } 99 | 100 | std::vector rasterize_image_gpu(torch::Tensor V, torch::Tensor F, torch::Tensor D, 101 | int width, int height, float occlusion_truncation, int use_depth_prior) 102 | { 103 | int device_id = V.get_device(); 104 | cudaSetDevice(device_id); 105 | int num_faces = F.size(0); 106 | int num_vertices = V.size(0); 107 | auto options = torch::TensorOptions().dtype(torch::kInt32).device(torch::kCUDA, device_id).requires_grad(false); 108 | auto INT64_options = torch::TensorOptions().dtype(torch::kInt64).device(torch::kCUDA, device_id).requires_grad(false); 109 | auto findices = torch::zeros({height, width}, options); 110 | INT64 maxint = (INT64)MAXINT * (INT64)MAXINT + (MAXINT - 1); 111 | auto z_min = torch::ones({height, width}, INT64_options) * (int64_t)maxint; 112 | 113 | if (!use_depth_prior) { 114 | rasterizeImagecoordsKernelGPU<<<(num_faces+255)/256,256,0,at::cuda::getCurrentCUDAStream()>>>(V.data_ptr(), F.data_ptr(), 0, 115 | (INT64*)z_min.data_ptr(), occlusion_truncation, width, height, num_vertices, num_faces); 116 | } else { 117 | rasterizeImagecoordsKernelGPU<<<(num_faces+255)/256,256,0,at::cuda::getCurrentCUDAStream()>>>(V.data_ptr(), F.data_ptr(), D.data_ptr(), 118 | (INT64*)z_min.data_ptr(), occlusion_truncation, width, height, num_vertices, num_faces); 119 | } 120 | 121 | auto float_options = torch::TensorOptions().dtype(torch::kFloat32).device(torch::kCUDA, device_id).requires_grad(false); 122 | auto barycentric = torch::zeros({height, width, 3}, float_options); 123 | barycentricFromImgcoordGPU<<<(width * height + 255)/256, 256>>>(V.data_ptr(), F.data_ptr(), 124 | findices.data_ptr(), (INT64*)z_min.data_ptr(), width, height, num_vertices, num_faces, barycentric.data_ptr()); 125 | 126 | return {findices, barycentric}; 127 | } 128 | -------------------------------------------------------------------------------- /hy3dpaint/custom_rasterizer/lib/custom_rasterizer_kernel/rasterizer.cpp: -------------------------------------------------------------------------------- 1 | #include "rasterizer.h" 2 | 3 | void rasterizeTriangleCPU(int idx, float* vt0, float* vt1, float* vt2, int width, int height, INT64* zbuffer, float* d, float occlusion_truncation) { 4 | float x_min = std::min(vt0[0], std::min(vt1[0],vt2[0])); 5 | float x_max = std::max(vt0[0], std::max(vt1[0],vt2[0])); 6 | float y_min = std::min(vt0[1], std::min(vt1[1],vt2[1])); 7 | float y_max = std::max(vt0[1], std::max(vt1[1],vt2[1])); 8 | 9 | for (int px = x_min; px < x_max + 1; ++px) { 10 | if (px < 0 || px >= width) 11 | continue; 12 | for (int py = y_min; py < y_max + 1; ++py) { 13 | if (py < 0 || py >= height) 14 | continue; 15 | float vt[2] = {px + 0.5f, py + 0.5f}; 16 | float baryCentricCoordinate[3]; 17 | calculateBarycentricCoordinate(vt0, vt1, vt2, vt, baryCentricCoordinate); 18 | if (isBarycentricCoordInBounds(baryCentricCoordinate)) { 19 | int pixel = py * width + px; 20 | if (zbuffer == 0) { 21 | zbuffer[pixel] = (INT64)(idx + 1); 22 | continue; 23 | } 24 | 25 | float depth = baryCentricCoordinate[0] * vt0[2] + baryCentricCoordinate[1] * vt1[2] + baryCentricCoordinate[2] * vt2[2]; 26 | float depth_thres = 0; 27 | if (d) { 28 | depth_thres = d[pixel] * 0.49999f + 0.5f + occlusion_truncation; 29 | } 30 | 31 | int z_quantize = depth * (2<<17); 32 | INT64 token = (INT64)z_quantize * MAXINT + (INT64)(idx + 1); 33 | if (depth < depth_thres) 34 | continue; 35 | zbuffer[pixel] = std::min(zbuffer[pixel], token); 36 | } 37 | } 38 | } 39 | } 40 | 41 | void barycentricFromImgcoordCPU(float* V, int* F, int* findices, INT64* zbuffer, int width, int height, int num_vertices, int num_faces, 42 | float* barycentric_map, int pix) 43 | { 44 | INT64 f = zbuffer[pix] % MAXINT; 45 | if (f == (MAXINT-1)) { 46 | findices[pix] = 0; 47 | barycentric_map[pix * 3] = 0; 48 | barycentric_map[pix * 3 + 1] = 0; 49 | barycentric_map[pix * 3 + 2] = 0; 50 | return; 51 | } 52 | findices[pix] = f; 53 | f -= 1; 54 | float barycentric[3] = {0, 0, 0}; 55 | if (f >= 0) { 56 | float vt[2] = {float(pix % width) + 0.5f, float(pix / width) + 0.5f}; 57 | float* vt0_ptr = V + (F[f * 3] * 4); 58 | float* vt1_ptr = V + (F[f * 3 + 1] * 4); 59 | float* vt2_ptr = V + (F[f * 3 + 2] * 4); 60 | 61 | float vt0[2] = {(vt0_ptr[0] / vt0_ptr[3] * 0.5f + 0.5f) * (width - 1) + 0.5f, (0.5f + 0.5f * vt0_ptr[1] / vt0_ptr[3]) * (height - 1) + 0.5f}; 62 | float vt1[2] = {(vt1_ptr[0] / vt1_ptr[3] * 0.5f + 0.5f) * (width - 1) + 0.5f, (0.5f + 0.5f * vt1_ptr[1] / vt1_ptr[3]) * (height - 1) + 0.5f}; 63 | float vt2[2] = {(vt2_ptr[0] / vt2_ptr[3] * 0.5f + 0.5f) * (width - 1) + 0.5f, (0.5f + 0.5f * vt2_ptr[1] / vt2_ptr[3]) * (height - 1) + 0.5f}; 64 | 65 | calculateBarycentricCoordinate(vt0, vt1, vt2, vt, barycentric); 66 | 67 | barycentric[0] = barycentric[0] / vt0_ptr[3]; 68 | barycentric[1] = barycentric[1] / vt1_ptr[3]; 69 | barycentric[2] = barycentric[2] / vt2_ptr[3]; 70 | float w = 1.0f / (barycentric[0] + barycentric[1] + barycentric[2]); 71 | barycentric[0] *= w; 72 | barycentric[1] *= w; 73 | barycentric[2] *= w; 74 | 75 | } 76 | barycentric_map[pix * 3] = barycentric[0]; 77 | barycentric_map[pix * 3 + 1] = barycentric[1]; 78 | barycentric_map[pix * 3 + 2] = barycentric[2]; 79 | } 80 | 81 | void rasterizeImagecoordsKernelCPU(float* V, int* F, float* d, INT64* zbuffer, float occlusion_trunc, int width, int height, int num_vertices, int num_faces, int f) 82 | { 83 | float* vt0_ptr = V + (F[f * 3] * 4); 84 | float* vt1_ptr = V + (F[f * 3 + 1] * 4); 85 | float* vt2_ptr = V + (F[f * 3 + 2] * 4); 86 | 87 | float vt0[3] = {(vt0_ptr[0] / vt0_ptr[3] * 0.5f + 0.5f) * (width - 1) + 0.5f, (0.5f + 0.5f * vt0_ptr[1] / vt0_ptr[3]) * (height - 1) + 0.5f, vt0_ptr[2] / vt0_ptr[3] * 0.49999f + 0.5f}; 88 | float vt1[3] = {(vt1_ptr[0] / vt1_ptr[3] * 0.5f + 0.5f) * (width - 1) + 0.5f, (0.5f + 0.5f * vt1_ptr[1] / vt1_ptr[3]) * (height - 1) + 0.5f, vt1_ptr[2] / vt1_ptr[3] * 0.49999f + 0.5f}; 89 | float vt2[3] = {(vt2_ptr[0] / vt2_ptr[3] * 0.5f + 0.5f) * (width - 1) + 0.5f, (0.5f + 0.5f * vt2_ptr[1] / vt2_ptr[3]) * (height - 1) + 0.5f, vt2_ptr[2] / vt2_ptr[3] * 0.49999f + 0.5f}; 90 | 91 | rasterizeTriangleCPU(f, vt0, vt1, vt2, width, height, zbuffer, d, occlusion_trunc); 92 | } 93 | 94 | std::vector rasterize_image_cpu(torch::Tensor V, torch::Tensor F, torch::Tensor D, 95 | int width, int height, float occlusion_truncation, int use_depth_prior) 96 | { 97 | int num_faces = F.size(0); 98 | int num_vertices = V.size(0); 99 | auto options = torch::TensorOptions().dtype(torch::kInt32).requires_grad(false); 100 | auto INT64_options = torch::TensorOptions().dtype(torch::kInt64).requires_grad(false); 101 | auto findices = torch::zeros({height, width}, options); 102 | INT64 maxint = (INT64)MAXINT * (INT64)MAXINT + (MAXINT - 1); 103 | auto z_min = torch::ones({height, width}, INT64_options) * (int64_t)maxint; 104 | 105 | if (!use_depth_prior) { 106 | for (int i = 0; i < num_faces; ++i) { 107 | rasterizeImagecoordsKernelCPU(V.data_ptr(), F.data_ptr(), 0, 108 | (INT64*)z_min.data_ptr(), occlusion_truncation, width, height, num_vertices, num_faces, i); 109 | } 110 | } else { 111 | for (int i = 0; i < num_faces; ++i) 112 | rasterizeImagecoordsKernelCPU(V.data_ptr(), F.data_ptr(), D.data_ptr(), 113 | (INT64*)z_min.data_ptr(), occlusion_truncation, width, height, num_vertices, num_faces, i); 114 | } 115 | 116 | auto float_options = torch::TensorOptions().dtype(torch::kFloat32).requires_grad(false); 117 | auto barycentric = torch::zeros({height, width, 3}, float_options); 118 | for (int i = 0; i < width * height; ++i) 119 | barycentricFromImgcoordCPU(V.data_ptr(), F.data_ptr(), 120 | findices.data_ptr(), (INT64*)z_min.data_ptr(), width, height, num_vertices, num_faces, barycentric.data_ptr(), i); 121 | 122 | return {findices, barycentric}; 123 | } 124 | 125 | std::vector rasterize_image(torch::Tensor V, torch::Tensor F, torch::Tensor D, 126 | int width, int height, float occlusion_truncation, int use_depth_prior) 127 | { 128 | int device_id = V.get_device(); 129 | if (device_id == -1) 130 | return rasterize_image_cpu(V, F, D, width, height, occlusion_truncation, use_depth_prior); 131 | else 132 | return rasterize_image_gpu(V, F, D, width, height, occlusion_truncation, use_depth_prior); 133 | } 134 | 135 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 136 | m.def("rasterize_image", &rasterize_image, "Custom image rasterization"); 137 | m.def("build_hierarchy", &build_hierarchy, "Custom image rasterization"); 138 | m.def("build_hierarchy_with_feat", &build_hierarchy_with_feat, "Custom image rasterization"); 139 | } 140 | -------------------------------------------------------------------------------- /hy3dshape/hy3dshape/models/autoencoders/surface_extractors.py: -------------------------------------------------------------------------------- 1 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT 2 | # except for the third-party components listed below. 3 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined 4 | # in the repsective licenses of these third-party components. 5 | # Users must comply with all terms and conditions of original licenses of these third-party 6 | # components and must ensure that the usage of the third party components adheres to 7 | # all relevant laws and regulations. 8 | 9 | # For avoidance of doubts, Hunyuan 3D means the large language models and 10 | # their software and algorithms, including trained model weights, parameters (including 11 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code, 12 | # fine-tuning enabling code and other elements of the foregoing made publicly available 13 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT. 14 | 15 | from typing import Union, Tuple, List 16 | 17 | import numpy as np 18 | import torch 19 | from skimage import measure 20 | 21 | 22 | class Latent2MeshOutput: 23 | def __init__(self, mesh_v=None, mesh_f=None): 24 | self.mesh_v = mesh_v 25 | self.mesh_f = mesh_f 26 | 27 | 28 | def center_vertices(vertices): 29 | """Translate the vertices so that bounding box is centered at zero.""" 30 | vert_min = vertices.min(dim=0)[0] 31 | vert_max = vertices.max(dim=0)[0] 32 | vert_center = 0.5 * (vert_min + vert_max) 33 | return vertices - vert_center 34 | 35 | 36 | class SurfaceExtractor: 37 | def _compute_box_stat(self, bounds: Union[Tuple[float], List[float], float], octree_resolution: int): 38 | """ 39 | Compute grid size, bounding box minimum coordinates, and bounding box size based on input 40 | bounds and resolution. 41 | 42 | Args: 43 | bounds (Union[Tuple[float], List[float], float]): Bounding box coordinates or a single 44 | float representing half side length. 45 | If float, bounds are assumed symmetric around zero in all axes. 46 | Expected format if list/tuple: [xmin, ymin, zmin, xmax, ymax, zmax]. 47 | octree_resolution (int): Resolution of the octree grid. 48 | 49 | Returns: 50 | grid_size (List[int]): Grid size along each axis (x, y, z), each equal to octree_resolution + 1. 51 | bbox_min (np.ndarray): Minimum coordinates of the bounding box (xmin, ymin, zmin). 52 | bbox_size (np.ndarray): Size of the bounding box along each axis (xmax - xmin, etc.). 53 | """ 54 | if isinstance(bounds, float): 55 | bounds = [-bounds, -bounds, -bounds, bounds, bounds, bounds] 56 | 57 | bbox_min, bbox_max = np.array(bounds[0:3]), np.array(bounds[3:6]) 58 | bbox_size = bbox_max - bbox_min 59 | grid_size = [int(octree_resolution) + 1, int(octree_resolution) + 1, int(octree_resolution) + 1] 60 | return grid_size, bbox_min, bbox_size 61 | 62 | def run(self, *args, **kwargs): 63 | """ 64 | Abstract method to extract surface mesh from grid logits. 65 | 66 | This method should be implemented by subclasses. 67 | 68 | Raises: 69 | NotImplementedError: Always, since this is an abstract method. 70 | """ 71 | return NotImplementedError 72 | 73 | def __call__(self, grid_logits, **kwargs): 74 | """ 75 | Process a batch of grid logits to extract surface meshes. 76 | 77 | Args: 78 | grid_logits (torch.Tensor): Batch of grid logits with shape (batch_size, ...). 79 | **kwargs: Additional keyword arguments passed to the `run` method. 80 | 81 | Returns: 82 | List[Optional[Latent2MeshOutput]]: List of mesh outputs for each grid in the batch. 83 | If extraction fails for a grid, None is appended at that position. 84 | """ 85 | outputs = [] 86 | for i in range(grid_logits.shape[0]): 87 | try: 88 | vertices, faces = self.run(grid_logits[i], **kwargs) 89 | vertices = vertices.astype(np.float32) 90 | faces = np.ascontiguousarray(faces) 91 | outputs.append(Latent2MeshOutput(mesh_v=vertices, mesh_f=faces)) 92 | 93 | except Exception: 94 | import traceback 95 | traceback.print_exc() 96 | outputs.append(None) 97 | 98 | return outputs 99 | 100 | 101 | class MCSurfaceExtractor(SurfaceExtractor): 102 | def run(self, grid_logit, *, mc_level, bounds, octree_resolution, **kwargs): 103 | """ 104 | Extract surface mesh using the Marching Cubes algorithm. 105 | 106 | Args: 107 | grid_logit (torch.Tensor): 3D grid logits tensor representing the scalar field. 108 | mc_level (float): The level (iso-value) at which to extract the surface. 109 | bounds (Union[Tuple[float], List[float], float]): Bounding box coordinates or half side length. 110 | octree_resolution (int): Resolution of the octree grid. 111 | **kwargs: Additional keyword arguments (ignored). 112 | 113 | Returns: 114 | Tuple[np.ndarray, np.ndarray]: Tuple containing: 115 | - vertices (np.ndarray): Extracted mesh vertices, scaled and translated to bounding 116 | box coordinates. 117 | - faces (np.ndarray): Extracted mesh faces (triangles). 118 | """ 119 | vertices, faces, normals, _ = measure.marching_cubes(grid_logit.cpu().numpy(), 120 | mc_level, 121 | method="lewiner") 122 | grid_size, bbox_min, bbox_size = self._compute_box_stat(bounds, octree_resolution) 123 | vertices = vertices / grid_size * bbox_size + bbox_min 124 | return vertices, faces 125 | 126 | 127 | class DMCSurfaceExtractor(SurfaceExtractor): 128 | def run(self, grid_logit, *, octree_resolution, **kwargs): 129 | """ 130 | Extract surface mesh using Differentiable Marching Cubes (DMC) algorithm. 131 | 132 | Args: 133 | grid_logit (torch.Tensor): 3D grid logits tensor representing the scalar field. 134 | octree_resolution (int): Resolution of the octree grid. 135 | **kwargs: Additional keyword arguments (ignored). 136 | 137 | Returns: 138 | Tuple[np.ndarray, np.ndarray]: Tuple containing: 139 | - vertices (np.ndarray): Extracted mesh vertices, centered and converted to numpy. 140 | - faces (np.ndarray): Extracted mesh faces (triangles), with reversed vertex order. 141 | 142 | Raises: 143 | ImportError: If the 'diso' package is not installed. 144 | """ 145 | device = grid_logit.device 146 | if not hasattr(self, 'dmc'): 147 | try: 148 | from diso import DiffDMC 149 | self.dmc = DiffDMC(dtype=torch.float32).to(device) 150 | except: 151 | raise ImportError("Please install diso via `pip install diso`, or set mc_algo to 'mc'") 152 | sdf = -grid_logit / octree_resolution 153 | sdf = sdf.to(torch.float32).contiguous() 154 | verts, faces = self.dmc(sdf, deform=None, return_quads=False, normalize=True) 155 | verts = center_vertices(verts) 156 | vertices = verts.detach().cpu().numpy() 157 | faces = faces.detach().cpu().numpy()[:, ::-1] 158 | return vertices, faces 159 | 160 | 161 | SurfaceExtractors = { 162 | 'mc': MCSurfaceExtractor, 163 | 'dmc': DMCSurfaceExtractor, 164 | } 165 | -------------------------------------------------------------------------------- /hy3dshape/hy3dshape/postprocessors.py: -------------------------------------------------------------------------------- 1 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT 2 | # except for the third-party components listed below. 3 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined 4 | # in the repsective licenses of these third-party components. 5 | # Users must comply with all terms and conditions of original licenses of these third-party 6 | # components and must ensure that the usage of the third party components adheres to 7 | # all relevant laws and regulations. 8 | 9 | # For avoidance of doubts, Hunyuan 3D means the large language models and 10 | # their software and algorithms, including trained model weights, parameters (including 11 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code, 12 | # fine-tuning enabling code and other elements of the foregoing made publicly available 13 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT. 14 | 15 | import os 16 | import tempfile 17 | from typing import Union 18 | 19 | import numpy as np 20 | import pymeshlab 21 | import torch 22 | import trimesh 23 | 24 | from .models.autoencoders import Latent2MeshOutput 25 | from .utils import synchronize_timer 26 | 27 | 28 | def load_mesh(path): 29 | if path.endswith(".glb"): 30 | mesh = trimesh.load(path) 31 | else: 32 | mesh = pymeshlab.MeshSet() 33 | mesh.load_new_mesh(path) 34 | return mesh 35 | 36 | 37 | def reduce_face(mesh: pymeshlab.MeshSet, max_facenum: int = 200000): 38 | if max_facenum > mesh.current_mesh().face_number(): 39 | return mesh 40 | 41 | mesh.apply_filter( 42 | "meshing_decimation_quadric_edge_collapse", 43 | targetfacenum=max_facenum, 44 | qualitythr=1.0, 45 | preserveboundary=True, 46 | boundaryweight=3, 47 | preservenormal=True, 48 | preservetopology=True, 49 | autoclean=True 50 | ) 51 | return mesh 52 | 53 | 54 | def remove_floater(mesh: pymeshlab.MeshSet): 55 | mesh.apply_filter("compute_selection_by_small_disconnected_components_per_face", 56 | nbfaceratio=0.005) 57 | mesh.apply_filter("compute_selection_transfer_face_to_vertex", inclusive=False) 58 | mesh.apply_filter("meshing_remove_selected_vertices_and_faces") 59 | return mesh 60 | 61 | 62 | def pymeshlab2trimesh(mesh: pymeshlab.MeshSet): 63 | with tempfile.NamedTemporaryFile(suffix='.ply', delete=False) as temp_file: 64 | mesh.save_current_mesh(temp_file.name) 65 | mesh = trimesh.load(temp_file.name) 66 | # 检查加载的对象类型 67 | if isinstance(mesh, trimesh.Scene): 68 | combined_mesh = trimesh.Trimesh() 69 | # 如果是Scene,遍历所有的geometry并合并 70 | for geom in mesh.geometry.values(): 71 | combined_mesh = trimesh.util.concatenate([combined_mesh, geom]) 72 | mesh = combined_mesh 73 | return mesh 74 | 75 | 76 | def trimesh2pymeshlab(mesh: trimesh.Trimesh): 77 | with tempfile.NamedTemporaryFile(suffix='.ply', delete=False) as temp_file: 78 | if isinstance(mesh, trimesh.scene.Scene): 79 | for idx, obj in enumerate(mesh.geometry.values()): 80 | if idx == 0: 81 | temp_mesh = obj 82 | else: 83 | temp_mesh = temp_mesh + obj 84 | mesh = temp_mesh 85 | mesh.export(temp_file.name) 86 | mesh = pymeshlab.MeshSet() 87 | mesh.load_new_mesh(temp_file.name) 88 | return mesh 89 | 90 | 91 | def export_mesh(input, output): 92 | if isinstance(input, pymeshlab.MeshSet): 93 | mesh = output 94 | elif isinstance(input, Latent2MeshOutput): 95 | output = Latent2MeshOutput() 96 | output.mesh_v = output.current_mesh().vertex_matrix() 97 | output.mesh_f = output.current_mesh().face_matrix() 98 | mesh = output 99 | else: 100 | mesh = pymeshlab2trimesh(output) 101 | return mesh 102 | 103 | 104 | def import_mesh(mesh: Union[pymeshlab.MeshSet, trimesh.Trimesh, Latent2MeshOutput, str]) -> pymeshlab.MeshSet: 105 | if isinstance(mesh, str): 106 | mesh = load_mesh(mesh) 107 | elif isinstance(mesh, Latent2MeshOutput): 108 | mesh = pymeshlab.MeshSet() 109 | mesh_pymeshlab = pymeshlab.Mesh(vertex_matrix=mesh.mesh_v, face_matrix=mesh.mesh_f) 110 | mesh.add_mesh(mesh_pymeshlab, "converted_mesh") 111 | 112 | if isinstance(mesh, (trimesh.Trimesh, trimesh.scene.Scene)): 113 | mesh = trimesh2pymeshlab(mesh) 114 | 115 | return mesh 116 | 117 | 118 | class FaceReducer: 119 | @synchronize_timer('FaceReducer') 120 | def __call__( 121 | self, 122 | mesh: Union[pymeshlab.MeshSet, trimesh.Trimesh, Latent2MeshOutput, str], 123 | max_facenum: int = 40000 124 | ) -> Union[pymeshlab.MeshSet, trimesh.Trimesh]: 125 | ms = import_mesh(mesh) 126 | ms = reduce_face(ms, max_facenum=max_facenum) 127 | mesh = export_mesh(mesh, ms) 128 | return mesh 129 | 130 | 131 | class FloaterRemover: 132 | @synchronize_timer('FloaterRemover') 133 | def __call__( 134 | self, 135 | mesh: Union[pymeshlab.MeshSet, trimesh.Trimesh, Latent2MeshOutput, str], 136 | ) -> Union[pymeshlab.MeshSet, trimesh.Trimesh, Latent2MeshOutput]: 137 | ms = import_mesh(mesh) 138 | ms = remove_floater(ms) 139 | mesh = export_mesh(mesh, ms) 140 | return mesh 141 | 142 | 143 | class DegenerateFaceRemover: 144 | @synchronize_timer('DegenerateFaceRemover') 145 | def __call__( 146 | self, 147 | mesh: Union[pymeshlab.MeshSet, trimesh.Trimesh, Latent2MeshOutput, str], 148 | ) -> Union[pymeshlab.MeshSet, trimesh.Trimesh, Latent2MeshOutput]: 149 | ms = import_mesh(mesh) 150 | 151 | with tempfile.NamedTemporaryFile(suffix='.ply', delete=False) as temp_file: 152 | ms.save_current_mesh(temp_file.name) 153 | ms = pymeshlab.MeshSet() 154 | ms.load_new_mesh(temp_file.name) 155 | 156 | mesh = export_mesh(mesh, ms) 157 | return mesh 158 | 159 | 160 | def mesh_normalize(mesh): 161 | """ 162 | Normalize mesh vertices to sphere 163 | """ 164 | scale_factor = 1.2 165 | vtx_pos = np.asarray(mesh.vertices) 166 | max_bb = (vtx_pos - 0).max(0)[0] 167 | min_bb = (vtx_pos - 0).min(0)[0] 168 | 169 | center = (max_bb + min_bb) / 2 170 | 171 | scale = torch.norm(torch.tensor(vtx_pos - center, dtype=torch.float32), dim=1).max() * 2.0 172 | 173 | vtx_pos = (vtx_pos - center) * (scale_factor / float(scale)) 174 | mesh.vertices = vtx_pos 175 | 176 | return mesh 177 | 178 | 179 | class MeshSimplifier: 180 | def __init__(self, executable: str = None): 181 | if executable is None: 182 | CURRENT_DIR = os.path.dirname(os.path.abspath(__file__)) 183 | executable = os.path.join(CURRENT_DIR, "mesh_simplifier.bin") 184 | self.executable = executable 185 | 186 | @synchronize_timer('MeshSimplifier') 187 | def __call__( 188 | self, 189 | mesh: Union[trimesh.Trimesh], 190 | ) -> Union[trimesh.Trimesh]: 191 | with tempfile.NamedTemporaryFile(suffix='.obj', delete=False) as temp_input: 192 | with tempfile.NamedTemporaryFile(suffix='.obj', delete=False) as temp_output: 193 | mesh.export(temp_input.name) 194 | os.system(f'{self.executable} {temp_input.name} {temp_output.name}') 195 | ms = trimesh.load(temp_output.name, process=False) 196 | if isinstance(ms, trimesh.Scene): 197 | combined_mesh = trimesh.Trimesh() 198 | for geom in ms.geometry.values(): 199 | combined_mesh = trimesh.util.concatenate([combined_mesh, geom]) 200 | ms = combined_mesh 201 | ms = mesh_normalize(ms) 202 | return ms 203 | -------------------------------------------------------------------------------- /hy3dshape/hy3dshape/models/denoisers/moe_layers.py: -------------------------------------------------------------------------------- 1 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT 2 | # except for the third-party components listed below. 3 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined 4 | # in the repsective licenses of these third-party components. 5 | # Users must comply with all terms and conditions of original licenses of these third-party 6 | # components and must ensure that the usage of the third party components adheres to 7 | # all relevant laws and regulations. 8 | 9 | # For avoidance of doubts, Hunyuan 3D means the large language models and 10 | # their software and algorithms, including trained model weights, parameters (including 11 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code, 12 | # fine-tuning enabling code and other elements of the foregoing made publicly available 13 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT. 14 | 15 | import torch 16 | import torch.nn as nn 17 | import numpy as np 18 | import math 19 | from timm.models.vision_transformer import PatchEmbed, Attention, Mlp 20 | 21 | import torch.nn.functional as F 22 | from diffusers.models.attention import FeedForward 23 | 24 | class AddAuxiliaryLoss(torch.autograd.Function): 25 | """ 26 | The trick function of adding auxiliary (aux) loss, 27 | which includes the gradient of the aux loss during backpropagation. 28 | """ 29 | @staticmethod 30 | def forward(ctx, x, loss): 31 | assert loss.numel() == 1 32 | ctx.dtype = loss.dtype 33 | ctx.required_aux_loss = loss.requires_grad 34 | return x 35 | 36 | @staticmethod 37 | def backward(ctx, grad_output): 38 | grad_loss = None 39 | if ctx.required_aux_loss: 40 | grad_loss = torch.ones(1, dtype=ctx.dtype, device=grad_output.device) 41 | return grad_output, grad_loss 42 | 43 | class MoEGate(nn.Module): 44 | def __init__(self, embed_dim, num_experts=16, num_experts_per_tok=2, aux_loss_alpha=0.01): 45 | super().__init__() 46 | self.top_k = num_experts_per_tok 47 | self.n_routed_experts = num_experts 48 | 49 | self.scoring_func = 'softmax' 50 | self.alpha = aux_loss_alpha 51 | self.seq_aux = False 52 | 53 | # topk selection algorithm 54 | self.norm_topk_prob = False 55 | self.gating_dim = embed_dim 56 | self.weight = nn.Parameter(torch.empty((self.n_routed_experts, self.gating_dim))) 57 | self.reset_parameters() 58 | 59 | def reset_parameters(self) -> None: 60 | import torch.nn.init as init 61 | init.kaiming_uniform_(self.weight, a=math.sqrt(5)) 62 | 63 | def forward(self, hidden_states): 64 | bsz, seq_len, h = hidden_states.shape 65 | # print(bsz, seq_len, h) 66 | ### compute gating score 67 | hidden_states = hidden_states.view(-1, h) 68 | logits = F.linear(hidden_states, self.weight, None) 69 | if self.scoring_func == 'softmax': 70 | scores = logits.softmax(dim=-1) 71 | else: 72 | raise NotImplementedError(f'insupportable scoring function for MoE gating: {self.scoring_func}') 73 | 74 | ### select top-k experts 75 | topk_weight, topk_idx = torch.topk(scores, k=self.top_k, dim=-1, sorted=False) 76 | 77 | ### norm gate to sum 1 78 | if self.top_k > 1 and self.norm_topk_prob: 79 | denominator = topk_weight.sum(dim=-1, keepdim=True) + 1e-20 80 | topk_weight = topk_weight / denominator 81 | 82 | ### expert-level computation auxiliary loss 83 | if self.training and self.alpha > 0.0: 84 | scores_for_aux = scores 85 | aux_topk = self.top_k 86 | # always compute aux loss based on the naive greedy topk method 87 | topk_idx_for_aux_loss = topk_idx.view(bsz, -1) 88 | if self.seq_aux: 89 | scores_for_seq_aux = scores_for_aux.view(bsz, seq_len, -1) 90 | ce = torch.zeros(bsz, self.n_routed_experts, device=hidden_states.device) 91 | ce.scatter_add_( 92 | 1, 93 | topk_idx_for_aux_loss, 94 | torch.ones( 95 | bsz, seq_len * aux_topk, 96 | device=hidden_states.device 97 | ) 98 | ).div_(seq_len * aux_topk / self.n_routed_experts) 99 | aux_loss = (ce * scores_for_seq_aux.mean(dim = 1)).sum(dim = 1).mean() 100 | aux_loss = aux_loss * self.alpha 101 | else: 102 | mask_ce = F.one_hot(topk_idx_for_aux_loss.view(-1), 103 | num_classes=self.n_routed_experts) 104 | ce = mask_ce.float().mean(0) 105 | Pi = scores_for_aux.mean(0) 106 | fi = ce * self.n_routed_experts 107 | aux_loss = (Pi * fi).sum() * self.alpha 108 | else: 109 | aux_loss = None 110 | return topk_idx, topk_weight, aux_loss 111 | 112 | class MoEBlock(nn.Module): 113 | def __init__(self, dim, num_experts=8, moe_top_k=2, 114 | activation_fn = "gelu", dropout=0.0, final_dropout = False, 115 | ff_inner_dim = None, ff_bias = True): 116 | super().__init__() 117 | self.moe_top_k = moe_top_k 118 | self.experts = nn.ModuleList([ 119 | FeedForward(dim,dropout=dropout, 120 | activation_fn=activation_fn, 121 | final_dropout=final_dropout, 122 | inner_dim=ff_inner_dim, 123 | bias=ff_bias) 124 | for i in range(num_experts)]) 125 | self.gate = MoEGate(embed_dim=dim, num_experts=num_experts, num_experts_per_tok=moe_top_k) 126 | 127 | self.shared_experts = FeedForward(dim,dropout=dropout, activation_fn=activation_fn, 128 | final_dropout=final_dropout, inner_dim=ff_inner_dim, 129 | bias=ff_bias) 130 | 131 | def initialize_weight(self): 132 | pass 133 | 134 | def forward(self, hidden_states): 135 | identity = hidden_states 136 | orig_shape = hidden_states.shape 137 | topk_idx, topk_weight, aux_loss = self.gate(hidden_states) 138 | 139 | hidden_states = hidden_states.view(-1, hidden_states.shape[-1]) 140 | flat_topk_idx = topk_idx.view(-1) 141 | if self.training: 142 | hidden_states = hidden_states.repeat_interleave(self.moe_top_k, dim=0) 143 | y = torch.empty_like(hidden_states, dtype=hidden_states.dtype) 144 | for i, expert in enumerate(self.experts): 145 | tmp = expert(hidden_states[flat_topk_idx == i]) 146 | y[flat_topk_idx == i] = tmp.to(hidden_states.dtype) 147 | y = (y.view(*topk_weight.shape, -1) * topk_weight.unsqueeze(-1)).sum(dim=1) 148 | y = y.view(*orig_shape) 149 | y = AddAuxiliaryLoss.apply(y, aux_loss) 150 | else: 151 | y = self.moe_infer(hidden_states, flat_topk_idx, topk_weight.view(-1, 1)).view(*orig_shape) 152 | y = y + self.shared_experts(identity) 153 | return y 154 | 155 | 156 | @torch.no_grad() 157 | def moe_infer(self, x, flat_expert_indices, flat_expert_weights): 158 | expert_cache = torch.zeros_like(x) 159 | idxs = flat_expert_indices.argsort() 160 | tokens_per_expert = flat_expert_indices.bincount().cpu().numpy().cumsum(0) 161 | token_idxs = idxs // self.moe_top_k 162 | for i, end_idx in enumerate(tokens_per_expert): 163 | start_idx = 0 if i == 0 else tokens_per_expert[i-1] 164 | if start_idx == end_idx: 165 | continue 166 | expert = self.experts[i] 167 | exp_token_idx = token_idxs[start_idx:end_idx] 168 | expert_tokens = x[exp_token_idx] 169 | expert_out = expert(expert_tokens) 170 | expert_out.mul_(flat_expert_weights[idxs[start_idx:end_idx]]) 171 | 172 | # for fp16 and other dtype 173 | expert_cache = expert_cache.to(expert_out.dtype) 174 | expert_cache.scatter_reduce_(0, exp_token_idx.view(-1, 1).repeat(1, x.shape[-1]), 175 | expert_out, 176 | reduce='sum') 177 | return expert_cache 178 | --------------------------------------------------------------------------------