├── .gitignore
├── hy3dpaint
    ├── DifferentiableRenderer
    │   ├── __init__.py
    │   ├── compile_mesh_painter.sh
    │   ├── dist
    │   │   ├── mesh_inpaint_processor-0.0.0-cp310-cp310-win_amd64.whl
    │   │   ├── mesh_inpaint_processor-0.0.0-cp311-cp311-win_amd64.whl
    │   │   ├── mesh_inpaint_processor-0.0.0-cp312-cp312-win_amd64.whl
    │   │   └── mesh_inpaint_processor-0.0.0-cp311-cp311-linux_x86_64.whl
    │   ├── setup.py
    │   └── camera_utils.py
    ├── custom_rasterizer
    │   ├── lib
    │   │   └── custom_rasterizer_kernel
    │   │   │   ├── __init__.py
    │   │   │   ├── rasterizer.h
    │   │   │   ├── rasterizer_gpu.cu
    │   │   │   └── rasterizer.cpp
    │   ├── custom_rasterizer
    │   │   ├── __init__.py
    │   │   └── render.py
    │   ├── dist
    │   │   ├── custom_rasterizer-0.1-cp310-cp310-win_amd64.whl
    │   │   ├── custom_rasterizer-0.1-cp311-cp311-win_amd64.whl
    │   │   ├── custom_rasterizer-0.1-cp312-cp312-win_amd64.whl
    │   │   └── custom_rasterizer-0.1-cp311-cp311-linux_x86_64.whl
    │   └── setup.py
    ├── 4.0
    │   └── python
    │   │   └── lib
    │   │       └── site-packages
    │   │           └── extern_draco.dll
    ├── src
    │   ├── data
    │   │   ├── dataloader
    │   │   │   ├── pbr_data_format.txt
    │   │   │   └── objaverse_loader_forTexturePBR.py
    │   │   ├── __init__.py
    │   │   └── objaverse_hunyuan.py
    │   ├── __init__.py
    │   └── utils
    │   │   ├── __init__.py
    │   │   └── train_util.py
    ├── utils
    │   ├── __init__.py
    │   ├── uvwrap_utils.py
    │   ├── simplify_mesh_utils.py
    │   ├── image_super_utils.py
    │   ├── torchvision_fix.py
    │   ├── multiview_utils.py
    │   └── pipeline_utils.py
    ├── cfgs
    │   └── hunyuan-paint-pbr.yaml
    ├── hunyuanpaintpbr
    │   └── __init__.py
    ├── demo.py
    ├── README.md
    └── convert_utils.py
├── hy3dshape
    ├── hy3dshape
    │   ├── utils
    │   │   ├── trainings
    │   │   │   ├── __init__.py
    │   │   │   ├── lr_scheduler.py
    │   │   │   ├── peft.py
    │   │   │   └── mesh.py
    │   │   ├── visualizers
    │   │   │   ├── __init__.py
    │   │   │   ├── html_util.py
    │   │   │   └── color_util.py
    │   │   ├── __init__.py
    │   │   ├── ema.py
    │   │   ├── misc.py
    │   │   └── utils.py
    │   ├── models
    │   │   ├── denoisers
    │   │   │   ├── __init__.py
    │   │   │   └── moe_layers.py
    │   │   ├── autoencoders
    │   │   │   ├── __init__.py
    │   │   │   ├── attention_processors.py
    │   │   │   └── surface_extractors.py
    │   │   ├── __init__.py
    │   │   └── diffusion
    │   │   │   └── transport
    │   │   │       ├── utils.py
    │   │   │       ├── __init__.py
    │   │   │       └── integrators.py
    │   ├── __init__.py
    │   ├── rembg.py
    │   ├── meshlib.py
    │   ├── data
    │   │   └── utils.py
    │   ├── preprocessors.py
    │   └── postprocessors.py
    ├── minimal_demo.py
    ├── minimal_vae_demo.py
    └── configs
    │   ├── hunyuan3ddit-mini-overfitting-flowmatching-dinog518-bf16-lr1e4-512.yaml
    │   ├── hunyuan3ddit-full-params-finetuning-flowmatching-dinog518-bf16-lr1e5-512.yaml
    │   ├── hunyuandit-finetuning-flowmatching-dinog518-bf16-lr1e5-4096.yaml
    │   ├── hunyuandit-mini-overfitting-flowmatching-dinog518-bf16-lr1e4-4096.yaml
    │   └── hunyuandit-mini-overfitting-flowmatching-dinog518-bf16-lr1e4-512.yaml
├── __init__.py
├── requirements.txt
├── configs
    ├── dit_config.yaml
    ├── dit_config_mini.yaml
    └── dit_config_2_1.yaml
├── workflow_examples
    └── Batch_Generator.json
└── README.md


/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__


--------------------------------------------------------------------------------
/hy3dpaint/DifferentiableRenderer/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/hy3dpaint/custom_rasterizer/lib/custom_rasterizer_kernel/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/hy3dshape/hy3dshape/utils/trainings/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | 


--------------------------------------------------------------------------------
/hy3dshape/hy3dshape/utils/visualizers/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | 


--------------------------------------------------------------------------------
/hy3dpaint/custom_rasterizer/custom_rasterizer/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | from .render import rasterize, interpolate
3 | """
4 | from .render import *
5 | 


--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
1 | from .nodes import NODE_CLASS_MAPPINGS, NODE_DISPLAY_NAME_MAPPINGS
2 | 
3 | __all__ = ["NODE_CLASS_MAPPINGS", "NODE_DISPLAY_NAME_MAPPINGS"]


--------------------------------------------------------------------------------
/hy3dpaint/4.0/python/lib/site-packages/extern_draco.dll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/visualbruno/ComfyUI-Hunyuan3d-2-1/HEAD/hy3dpaint/4.0/python/lib/site-packages/extern_draco.dll


--------------------------------------------------------------------------------
/hy3dpaint/DifferentiableRenderer/compile_mesh_painter.sh:
--------------------------------------------------------------------------------
1 | c++ -O3 -Wall -shared -std=c++11 -fPIC `python -m pybind11 --includes` mesh_inpaint_processor.cpp -o mesh_inpaint_processor`python3-config --extension-suffix`


--------------------------------------------------------------------------------
/hy3dpaint/custom_rasterizer/dist/custom_rasterizer-0.1-cp310-cp310-win_amd64.whl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/visualbruno/ComfyUI-Hunyuan3d-2-1/HEAD/hy3dpaint/custom_rasterizer/dist/custom_rasterizer-0.1-cp310-cp310-win_amd64.whl


--------------------------------------------------------------------------------
/hy3dpaint/custom_rasterizer/dist/custom_rasterizer-0.1-cp311-cp311-win_amd64.whl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/visualbruno/ComfyUI-Hunyuan3d-2-1/HEAD/hy3dpaint/custom_rasterizer/dist/custom_rasterizer-0.1-cp311-cp311-win_amd64.whl


--------------------------------------------------------------------------------
/hy3dpaint/custom_rasterizer/dist/custom_rasterizer-0.1-cp312-cp312-win_amd64.whl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/visualbruno/ComfyUI-Hunyuan3d-2-1/HEAD/hy3dpaint/custom_rasterizer/dist/custom_rasterizer-0.1-cp312-cp312-win_amd64.whl


--------------------------------------------------------------------------------
/hy3dpaint/custom_rasterizer/dist/custom_rasterizer-0.1-cp311-cp311-linux_x86_64.whl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/visualbruno/ComfyUI-Hunyuan3d-2-1/HEAD/hy3dpaint/custom_rasterizer/dist/custom_rasterizer-0.1-cp311-cp311-linux_x86_64.whl


--------------------------------------------------------------------------------
/hy3dshape/hy3dshape/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | 
3 | from .misc import get_config_from_file
4 | from .misc import instantiate_from_config
5 | from .utils import get_logger, logger, synchronize_timer, smart_load_model
6 | 


--------------------------------------------------------------------------------
/hy3dpaint/DifferentiableRenderer/dist/mesh_inpaint_processor-0.0.0-cp310-cp310-win_amd64.whl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/visualbruno/ComfyUI-Hunyuan3d-2-1/HEAD/hy3dpaint/DifferentiableRenderer/dist/mesh_inpaint_processor-0.0.0-cp310-cp310-win_amd64.whl


--------------------------------------------------------------------------------
/hy3dpaint/DifferentiableRenderer/dist/mesh_inpaint_processor-0.0.0-cp311-cp311-win_amd64.whl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/visualbruno/ComfyUI-Hunyuan3d-2-1/HEAD/hy3dpaint/DifferentiableRenderer/dist/mesh_inpaint_processor-0.0.0-cp311-cp311-win_amd64.whl


--------------------------------------------------------------------------------
/hy3dpaint/DifferentiableRenderer/dist/mesh_inpaint_processor-0.0.0-cp312-cp312-win_amd64.whl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/visualbruno/ComfyUI-Hunyuan3d-2-1/HEAD/hy3dpaint/DifferentiableRenderer/dist/mesh_inpaint_processor-0.0.0-cp312-cp312-win_amd64.whl


--------------------------------------------------------------------------------
/hy3dpaint/DifferentiableRenderer/dist/mesh_inpaint_processor-0.0.0-cp311-cp311-linux_x86_64.whl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/visualbruno/ComfyUI-Hunyuan3d-2-1/HEAD/hy3dpaint/DifferentiableRenderer/dist/mesh_inpaint_processor-0.0.0-cp311-cp311-linux_x86_64.whl


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | trimesh
 2 | pymeshlab
 3 | pygltflib
 4 | xatlas
 5 | open3d
 6 | omegaconf
 7 | pyyaml
 8 | configargparse
 9 | transformers
10 | diffusers
11 | accelerate
12 | pytorch-lightning
13 | opencv-python
14 | huggingface-hub
15 | safetensors
16 | scikit-image
17 | pybind11
18 | timm
19 | 
20 | meshlib
21 | 


--------------------------------------------------------------------------------
/hy3dpaint/src/data/dataloader/pbr_data_format.txt:
--------------------------------------------------------------------------------
 1 | +-----------------+----------------------------------+
 2 | |       Key       |              Value               |
 3 | +-----------------+----------------------------------+
 4 | |   images_cond   | torch.Size([2, 2, 3, 512, 512])  |
 5 | |  images_albedo  | torch.Size([2, 6, 3, 512, 512])  |
 6 | |    images_mr    | torch.Size([2, 6, 3, 512, 512])  |
 7 | |  images_normal  | torch.Size([2, 6, 3, 512, 512])  |
 8 | | images_position | torch.Size([2, 6, 3, 512, 512])  |
 9 | |     caption     | ['high quality', 'high quality'] |
10 | +-----------------+----------------------------------+


--------------------------------------------------------------------------------
/hy3dpaint/src/__init__.py:
--------------------------------------------------------------------------------
 1 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
 2 | # except for the third-party components listed below.
 3 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined
 4 | # in the repsective licenses of these third-party components.
 5 | # Users must comply with all terms and conditions of original licenses of these third-party
 6 | # components and must ensure that the usage of the third party components adheres to
 7 | # all relevant laws and regulations.
 8 | 
 9 | # For avoidance of doubts, Hunyuan 3D means the large language models and
10 | # their software and algorithms, including trained model weights, parameters (including
11 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
12 | # fine-tuning enabling code and other elements of the foregoing made publicly available
13 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
14 | 


--------------------------------------------------------------------------------
/hy3dpaint/src/data/__init__.py:
--------------------------------------------------------------------------------
 1 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
 2 | # except for the third-party components listed below.
 3 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined
 4 | # in the repsective licenses of these third-party components.
 5 | # Users must comply with all terms and conditions of original licenses of these third-party
 6 | # components and must ensure that the usage of the third party components adheres to
 7 | # all relevant laws and regulations.
 8 | 
 9 | # For avoidance of doubts, Hunyuan 3D means the large language models and
10 | # their software and algorithms, including trained model weights, parameters (including
11 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
12 | # fine-tuning enabling code and other elements of the foregoing made publicly available
13 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
14 | 


--------------------------------------------------------------------------------
/hy3dpaint/src/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
 2 | # except for the third-party components listed below.
 3 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined
 4 | # in the repsective licenses of these third-party components.
 5 | # Users must comply with all terms and conditions of original licenses of these third-party
 6 | # components and must ensure that the usage of the third party components adheres to
 7 | # all relevant laws and regulations.
 8 | 
 9 | # For avoidance of doubts, Hunyuan 3D means the large language models and
10 | # their software and algorithms, including trained model weights, parameters (including
11 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
12 | # fine-tuning enabling code and other elements of the foregoing made publicly available
13 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
14 | 


--------------------------------------------------------------------------------
/hy3dpaint/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
 2 | # except for the third-party components listed below.
 3 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined
 4 | # in the repsective licenses of these third-party components.
 5 | # Users must comply with all terms and conditions of original licenses of these third-party
 6 | # components and must ensure that the usage of the third party components adheres to
 7 | # all relevant laws and regulations.
 8 | 
 9 | # For avoidance of doubts, Hunyuan 3D means the large language models and
10 | # their software and algorithms, including trained model weights, parameters (including
11 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
12 | # fine-tuning enabling code and other elements of the foregoing made publicly available
13 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
14 | 


--------------------------------------------------------------------------------
/hy3dshape/hy3dshape/models/denoisers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
 2 | # except for the third-party components listed below.
 3 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined
 4 | # in the repsective licenses of these third-party components.
 5 | # Users must comply with all terms and conditions of original licenses of these third-party
 6 | # components and must ensure that the usage of the third party components adheres to
 7 | # all relevant laws and regulations.
 8 | 
 9 | # For avoidance of doubts, Hunyuan 3D means the large language models and
10 | # their software and algorithms, including trained model weights, parameters (including
11 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
12 | # fine-tuning enabling code and other elements of the foregoing made publicly available
13 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
14 | 
15 | from .hunyuan3ddit import Hunyuan3DDiT
16 | 


--------------------------------------------------------------------------------
/hy3dpaint/DifferentiableRenderer/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, Extension
 2 | from setuptools.command.build_ext import build_ext
 3 | import sys
 4 | import os
 5 | import pybind11
 6 | class BuildExt(build_ext):
 7 |     def build_extensions(self):
 8 |         if sys.platform == 'win32':
 9 |             # Windows-specific compiler flags
10 |             for ext in self.extensions:
11 |                 ext.extra_compile_args = ['/O2', '/Wall']
12 |         else:
13 |             # Linux/Mac flags
14 |             for ext in self.extensions:
15 |                 ext.extra_compile_args = ['-O3', '-Wall', '-fPIC']
16 |         build_ext.build_extensions(self)
17 | 
18 | setup(
19 |     name="mesh_inpaint_processor",
20 |     ext_modules=[
21 |         Extension(
22 |             "mesh_inpaint_processor",
23 |             ["mesh_inpaint_processor.cpp"],
24 |             include_dirs=[
25 |                 pybind11.get_include(),
26 |                 pybind11.get_include(user=True)
27 |             ],
28 |             language='c++'
29 |         ),
30 |     ],
31 |     cmdclass={'build_ext': BuildExt},
32 | )


--------------------------------------------------------------------------------
/hy3dshape/hy3dshape/__init__.py:
--------------------------------------------------------------------------------
 1 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
 2 | # except for the third-party components listed below.
 3 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined
 4 | # in the repsective licenses of these third-party components.
 5 | # Users must comply with all terms and conditions of original licenses of these third-party
 6 | # components and must ensure that the usage of the third party components adheres to
 7 | # all relevant laws and regulations.
 8 | 
 9 | # For avoidance of doubts, Hunyuan 3D means the large language models and
10 | # their software and algorithms, including trained model weights, parameters (including
11 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
12 | # fine-tuning enabling code and other elements of the foregoing made publicly available
13 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
14 | 
15 | from .pipelines import Hunyuan3DDiTPipeline, Hunyuan3DDiTFlowMatchingPipeline
16 | from .postprocessors import FaceReducer, FloaterRemover, DegenerateFaceRemover, MeshSimplifier
17 | from .preprocessors import ImageProcessorV2, IMAGE_PROCESSORS, DEFAULT_IMAGEPROCESSOR
18 | 


--------------------------------------------------------------------------------
/hy3dshape/hy3dshape/rembg.py:
--------------------------------------------------------------------------------
 1 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
 2 | # except for the third-party components listed below.
 3 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined
 4 | # in the repsective licenses of these third-party components.
 5 | # Users must comply with all terms and conditions of original licenses of these third-party
 6 | # components and must ensure that the usage of the third party components adheres to
 7 | # all relevant laws and regulations.
 8 | 
 9 | # For avoidance of doubts, Hunyuan 3D means the large language models and
10 | # their software and algorithms, including trained model weights, parameters (including
11 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
12 | # fine-tuning enabling code and other elements of the foregoing made publicly available
13 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
14 | 
15 | from PIL import Image
16 | from rembg import remove, new_session
17 | 
18 | 
19 | class BackgroundRemover():
20 |     def __init__(self):
21 |         self.session = new_session()
22 | 
23 |     def __call__(self, image: Image.Image):
24 |         output = remove(image, session=self.session, bgcolor=[255, 255, 255, 0])
25 |         return output
26 | 


--------------------------------------------------------------------------------
/hy3dshape/hy3dshape/models/autoencoders/__init__.py:
--------------------------------------------------------------------------------
 1 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
 2 | # except for the third-party components listed below.
 3 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined
 4 | # in the repsective licenses of these third-party components.
 5 | # Users must comply with all terms and conditions of original licenses of these third-party
 6 | # components and must ensure that the usage of the third party components adheres to
 7 | # all relevant laws and regulations.
 8 | 
 9 | # For avoidance of doubts, Hunyuan 3D means the large language models and
10 | # their software and algorithms, including trained model weights, parameters (including
11 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
12 | # fine-tuning enabling code and other elements of the foregoing made publicly available
13 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
14 | 
15 | from .attention_blocks import CrossAttentionDecoder
16 | from .attention_processors import FlashVDMCrossAttentionProcessor, CrossAttentionProcessor, \
17 |     FlashVDMTopMCrossAttentionProcessor
18 | from .model import ShapeVAE, VectsetVAE
19 | from .surface_extractors import SurfaceExtractors, MCSurfaceExtractor, DMCSurfaceExtractor, Latent2MeshOutput
20 | from .volume_decoders import HierarchicalVolumeDecoding, FlashVDMVolumeDecoding, VanillaVolumeDecoder
21 | 


--------------------------------------------------------------------------------
/hy3dpaint/cfgs/hunyuan-paint-pbr.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   base_learning_rate: 5.0e-05
 3 |   target: hunyuanpaintpbr.model.HunyuanPaint
 4 |   params:
 5 |     num_view: 6
 6 |     view_size: 512
 7 |     drop_cond_prob: 0.1
 8 |     
 9 |     noise_in_channels: 12
10 | 
11 |     stable_diffusion_config:
12 |       pretrained_model_name_or_path: stabilityai/stable-diffusion-2-1
13 |       custom_pipeline: ./hunyuanpaintpbr
14 |       
15 | 
16 | data:
17 |   target: src.data.objaverse_hunyuan.DataModuleFromConfig
18 |   params:
19 |     batch_size: 1
20 |     num_workers: 4
21 |     train:
22 |       - 
23 |         target: src.data.dataloader.objaverse_loader_forTexturePBR.TextureDataset
24 |         params:
25 |           num_view: 6
26 |           json_path: train_examples/examples.json
27 |     validation:
28 |       -
29 |         target: src.data.dataloader.objaverse_loader_forTexturePBR.TextureDataset
30 |         params:
31 |           num_view: 6
32 |           json_path: train_examples/examples.json
33 | 
34 | lightning:
35 |   modelcheckpoint:
36 |     params:
37 |       every_n_train_steps: 10000
38 |       save_top_k: -1
39 |       save_last: true
40 |   callbacks: {}
41 | 
42 |   trainer:
43 |     benchmark: true
44 |     max_epochs: -1
45 |     gradient_clip_val: 1.0
46 |     val_check_interval: 1000
47 |     num_sanity_val_steps: 0
48 |     accumulate_grad_batches: 1
49 |     check_val_every_n_epoch: null   # if not set this, validation does not run
50 | 
51 | init_control_from:
52 | resume_from: 
53 | 


--------------------------------------------------------------------------------
/hy3dpaint/utils/uvwrap_utils.py:
--------------------------------------------------------------------------------
 1 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
 2 | # except for the third-party components listed below.
 3 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined
 4 | # in the repsective licenses of these third-party components.
 5 | # Users must comply with all terms and conditions of original licenses of these third-party
 6 | # components and must ensure that the usage of the third party components adheres to
 7 | # all relevant laws and regulations.
 8 | 
 9 | # For avoidance of doubts, Hunyuan 3D means the large language models and
10 | # their software and algorithms, including trained model weights, parameters (including
11 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
12 | # fine-tuning enabling code and other elements of the foregoing made publicly available
13 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
14 | 
15 | import trimesh
16 | import xatlas
17 | 
18 | 
19 | def mesh_uv_wrap(mesh):
20 |     if isinstance(mesh, trimesh.Scene):
21 |         mesh = mesh.dump(concatenate=True)
22 | 
23 |     if len(mesh.faces) > 500000000:
24 |         raise ValueError("The mesh has more than 500,000,000 faces, which is not supported.")
25 | 
26 |     vmapping, indices, uvs = xatlas.parametrize(mesh.vertices, mesh.faces)
27 | 
28 |     mesh.vertices = mesh.vertices[vmapping]
29 |     mesh.faces = indices
30 |     mesh.visual.uv = uvs
31 | 
32 |     return mesh
33 | 


--------------------------------------------------------------------------------
/hy3dshape/minimal_demo.py:
--------------------------------------------------------------------------------
 1 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
 2 | # except for the third-party components listed below.
 3 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined
 4 | # in the repsective licenses of these third-party components.
 5 | # Users must comply with all terms and conditions of original licenses of these third-party
 6 | # components and must ensure that the usage of the third party components adheres to
 7 | # all relevant laws and regulations.
 8 | 
 9 | # For avoidance of doubts, Hunyuan 3D means the large language models and
10 | # their software and algorithms, including trained model weights, parameters (including
11 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
12 | # fine-tuning enabling code and other elements of the foregoing made publicly available
13 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
14 | 
15 | from PIL import Image
16 | 
17 | from hy3dshape.rembg import BackgroundRemover
18 | from hy3dshape.pipelines import Hunyuan3DDiTFlowMatchingPipeline
19 | 
20 | model_path = 'tencent/Hunyuan3D-2.1'
21 | pipeline_shapegen = Hunyuan3DDiTFlowMatchingPipeline.from_pretrained(model_path)
22 | 
23 | image_path = 'demos/demo.png'
24 | image = Image.open(image_path).convert("RGBA")
25 | if image.mode == 'RGB':
26 |     rembg = BackgroundRemover()
27 |     image = rembg(image)
28 | 
29 | mesh = pipeline_shapegen(image=image)[0]
30 | mesh.export('demo.glb')
31 | 


--------------------------------------------------------------------------------
/hy3dpaint/hunyuanpaintpbr/__init__.py:
--------------------------------------------------------------------------------
 1 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
 2 | # except for the third-party components listed below.
 3 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined
 4 | # in the repsective licenses of these third-party components.
 5 | # Users must comply with all terms and conditions of original licenses of these third-party
 6 | # components and must ensure that the usage of the third party components adheres to
 7 | # all relevant laws and regulations.
 8 | 
 9 | # For avoidance of doubts, Hunyuan 3D means the large language models and
10 | # their software and algorithms, including trained model weights, parameters (including
11 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
12 | # fine-tuning enabling code and other elements of the foregoing made publicly available
13 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
14 | 
15 | from .pipeline import HunyuanPaintPipeline
16 | from .unet.model import HunyuanPaint
17 | from .unet.modules import (
18 |     Dino_v2,
19 |     Basic2p5DTransformerBlock,
20 |     ImageProjModel,
21 |     UNet2p5DConditionModel,
22 | )
23 | from .unet.attn_processor import (
24 |     PoseRoPEAttnProcessor2_0,
25 |     SelfAttnProcessor2_0,
26 |     RefAttnProcessor2_0,
27 | )
28 | 
29 | __all__ = [
30 |     'HunyuanPaintPipeline',
31 |     'HunyuanPaint',
32 |     'Dino_v2',
33 |     'Basic2p5DTransformerBlock',
34 |     'ImageProjModel',
35 |     'UNet2p5DConditionModel',
36 |     'PoseRoPEAttnProcessor2_0',
37 |     'SelfAttnProcessor2_0',
38 |     'RefAttnProcessor2_0',
39 | ]
40 | 


--------------------------------------------------------------------------------
/hy3dpaint/custom_rasterizer/custom_rasterizer/render.py:
--------------------------------------------------------------------------------
 1 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
 2 | # except for the third-party components listed below.
 3 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined
 4 | # in the repsective licenses of these third-party components.
 5 | # Users must comply with all terms and conditions of original licenses of these third-party
 6 | # components and must ensure that the usage of the third party components adheres to
 7 | # all relevant laws and regulations.
 8 | 
 9 | # For avoidance of doubts, Hunyuan 3D means the large language models and
10 | # their software and algorithms, including trained model weights, parameters (including
11 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
12 | # fine-tuning enabling code and other elements of the foregoing made publicly available
13 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
14 | 
15 | import custom_rasterizer_kernel
16 | import torch
17 | 
18 | 
19 | def rasterize(pos, tri, resolution, clamp_depth=torch.zeros(0), use_depth_prior=0):
20 |     assert pos.device == tri.device
21 |     findices, barycentric = custom_rasterizer_kernel.rasterize_image(
22 |         pos[0], tri, clamp_depth, resolution[1], resolution[0], 1e-6, use_depth_prior
23 |     )
24 |     return findices, barycentric
25 | 
26 | 
27 | def interpolate(col, findices, barycentric, tri):
28 |     f = findices - 1 + (findices == 0)
29 |     vcol = col[0, tri.long()[f.long()]]
30 |     result = barycentric.view(*barycentric.shape, 1) * vcol
31 |     result = torch.sum(result, axis=-2)
32 |     return result.view(1, *result.shape)
33 | 


--------------------------------------------------------------------------------
/hy3dpaint/custom_rasterizer/setup.py:
--------------------------------------------------------------------------------
 1 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
 2 | # except for the third-party components listed below.
 3 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined
 4 | # in the repsective licenses of these third-party components.
 5 | # Users must comply with all terms and conditions of original licenses of these third-party
 6 | # components and must ensure that the usage of the third party components adheres to
 7 | # all relevant laws and regulations.
 8 | 
 9 | # For avoidance of doubts, Hunyuan 3D means the large language models and
10 | # their software and algorithms, including trained model weights, parameters (including
11 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
12 | # fine-tuning enabling code and other elements of the foregoing made publicly available
13 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
14 | 
15 | from setuptools import setup, find_packages
16 | import torch
17 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension, CppExtension
18 | 
19 | # build custom rasterizer
20 | 
21 | custom_rasterizer_module = CUDAExtension(
22 |     "custom_rasterizer_kernel",
23 |     [
24 |         "lib/custom_rasterizer_kernel/rasterizer.cpp",
25 |         "lib/custom_rasterizer_kernel/grid_neighbor.cpp",
26 |         "lib/custom_rasterizer_kernel/rasterizer_gpu.cu",
27 |     ],
28 | )
29 | 
30 | setup(
31 |     packages=find_packages(),
32 |     version="0.1",
33 |     name="custom_rasterizer",
34 |     include_package_data=True,
35 |     package_dir={"": "."},
36 |     ext_modules=[
37 |         custom_rasterizer_module,
38 |     ],
39 |     cmdclass={"build_ext": BuildExtension},
40 | )
41 | 


--------------------------------------------------------------------------------
/hy3dshape/hy3dshape/models/__init__.py:
--------------------------------------------------------------------------------
 1 | # Open Source Model Licensed under the Apache License Version 2.0
 2 | # and Other Licenses of the Third-Party Components therein:
 3 | # The below Model in this distribution may have been modified by THL A29 Limited
 4 | # ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited.
 5 | 
 6 | # Copyright (C) 2024 THL A29 Limited, a Tencent company.  All rights reserved.
 7 | # The below software and/or models in this distribution may have been
 8 | # modified by THL A29 Limited ("Tencent Modifications").
 9 | # All Tencent Modifications are Copyright (C) THL A29 Limited.
10 | 
11 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
12 | # except for the third-party components listed below.
13 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined
14 | # in the repsective licenses of these third-party components.
15 | # Users must comply with all terms and conditions of original licenses of these third-party
16 | # components and must ensure that the usage of the third party components adheres to
17 | # all relevant laws and regulations.
18 | 
19 | # For avoidance of doubts, Hunyuan 3D means the large language models and
20 | # their software and algorithms, including trained model weights, parameters (including
21 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
22 | # fine-tuning enabling code and other elements of the foregoing made publicly available
23 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
24 | 
25 | 
26 | from .autoencoders import ShapeVAE
27 | from .conditioner import DualImageEncoder, SingleImageEncoder, DinoImageEncoder, CLIPImageEncoder
28 | from .denoisers import Hunyuan3DDiT
29 | 


--------------------------------------------------------------------------------
/hy3dpaint/demo.py:
--------------------------------------------------------------------------------
 1 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
 2 | # except for the third-party components listed below.
 3 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined
 4 | # in the repsective licenses of these third-party components.
 5 | # Users must comply with all terms and conditions of original licenses of these third-party
 6 | # components and must ensure that the usage of the third party components adheres to
 7 | # all relevant laws and regulations.
 8 | 
 9 | # For avoidance of doubts, Hunyuan 3D means the large language models and
10 | # their software and algorithms, including trained model weights, parameters (including
11 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
12 | # fine-tuning enabling code and other elements of the foregoing made publicly available
13 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
14 | 
15 | from textureGenPipeline import Hunyuan3DPaintPipeline, Hunyuan3DPaintConfig
16 | 
17 | try:
18 |     from utils.torchvision_fix import apply_fix
19 | 
20 |     apply_fix()
21 | except ImportError:
22 |     print("Warning: torchvision_fix module not found, proceeding without compatibility fix")
23 | except Exception as e:
24 |     print(f"Warning: Failed to apply torchvision fix: {e}")
25 | 
26 | 
27 | if __name__ == "__main__":
28 | 
29 |     max_num_view = 6  # can be 6 to 9
30 |     resolution = 768  # can be 768 or 512
31 | 
32 |     conf = Hunyuan3DPaintConfig(max_num_view, resolution)
33 |     paint_pipeline = Hunyuan3DPaintPipeline(conf)
34 |     output_mesh_path = paint_pipeline(mesh_path="./assets/FireElementalMonster.obj", image_path="./assets/FireElementalMonster.png")
35 |     print(f"Output mesh path: {output_mesh_path}")
36 | 


--------------------------------------------------------------------------------
/hy3dshape/hy3dshape/meshlib.py:
--------------------------------------------------------------------------------
 1 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
 2 | # except for the third-party components listed below.
 3 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined
 4 | # in the repsective licenses of these third-party components.
 5 | # Users must comply with all terms and conditions of original licenses of these third-party
 6 | # components and must ensure that the usage of the third party components adheres to
 7 | # all relevant laws and regulations.
 8 | 
 9 | # For avoidance of doubts, Hunyuan 3D means the large language models and
10 | # their software and algorithms, including trained model weights, parameters (including
11 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
12 | # fine-tuning enabling code and other elements of the foregoing made publicly available
13 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
14 | 
15 | import numpy as np
16 | import meshlib.mrmeshnumpy as mrmeshnumpy
17 | import meshlib.mrmeshpy as mrmeshpy
18 | import trimesh
19 |     
20 | def postprocessmesh(vertices: np.array, faces: np.array, settings):
21 |     print('Generating Meshlib Mesh ...')
22 |     mesh = mrmeshnumpy.meshFromFacesVerts(faces, vertices)
23 |     print('Packing Optimally ...')
24 |     mesh.packOptimally()
25 |     print('Decimating ...')
26 |     mrmeshpy.decimateMesh(mesh, settings)
27 |     
28 |     out_verts = mrmeshnumpy.getNumpyVerts(mesh)
29 |     out_faces = mrmeshnumpy.getNumpyFaces(mesh.topology)
30 |     
31 |     mesh = trimesh.Trimesh(vertices=out_verts, faces=out_faces)   
32 |     print(f"Reduced faces, resulting in {mesh.vertices.shape[0]} vertices and {mesh.faces.shape[0]} faces")
33 |         
34 |     return mesh
35 | 
36 | 
37 | 


--------------------------------------------------------------------------------
/hy3dpaint/utils/simplify_mesh_utils.py:
--------------------------------------------------------------------------------
 1 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
 2 | # except for the third-party components listed below.
 3 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined
 4 | # in the repsective licenses of these third-party components.
 5 | # Users must comply with all terms and conditions of original licenses of these third-party
 6 | # components and must ensure that the usage of the third party components adheres to
 7 | # all relevant laws and regulations.
 8 | 
 9 | # For avoidance of doubts, Hunyuan 3D means the large language models and
10 | # their software and algorithms, including trained model weights, parameters (including
11 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
12 | # fine-tuning enabling code and other elements of the foregoing made publicly available
13 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
14 | 
15 | import trimesh
16 | import pymeshlab
17 | 
18 | 
19 | def remesh_mesh(mesh_path, remesh_path):
20 |     mesh = mesh_simplify_trimesh(mesh_path, remesh_path)
21 | 
22 | 
23 | def mesh_simplify_trimesh(inputpath, outputpath, target_count=50000):
24 |     # 先去除离散面
25 |     ms = pymeshlab.MeshSet()
26 |     if inputpath.endswith(".glb"):
27 |         ms.load_new_mesh(inputpath, load_in_a_single_layer=True)
28 |     else:
29 |         ms.load_new_mesh(inputpath)
30 |     ms.save_current_mesh(outputpath.replace(".glb", ".obj"), save_textures=False)
31 |     # 调用减面函数
32 |     courent = trimesh.load(outputpath.replace(".glb", ".obj"), force="mesh")
33 |     face_num = courent.faces.shape[0]
34 | 
35 |     if face_num > target_count:
36 |         courent = courent.simplify_quadric_decimation(target_count)
37 |     courent.export(outputpath)
38 | 


--------------------------------------------------------------------------------
/hy3dpaint/utils/image_super_utils.py:
--------------------------------------------------------------------------------
 1 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
 2 | # except for the third-party components listed below.
 3 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined
 4 | # in the repsective licenses of these third-party components.
 5 | # Users must comply with all terms and conditions of original licenses of these third-party
 6 | # components and must ensure that the usage of the third party components adheres to
 7 | # all relevant laws and regulations.
 8 | 
 9 | # For avoidance of doubts, Hunyuan 3D means the large language models and
10 | # their software and algorithms, including trained model weights, parameters (including
11 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
12 | # fine-tuning enabling code and other elements of the foregoing made publicly available
13 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
14 | 
15 | import numpy as np
16 | from PIL import Image
17 | 
18 | 
19 | class imageSuperNet:
20 |     def __init__(self, config) -> None:
21 |         from realesrgan import RealESRGANer
22 |         from basicsr.archs.rrdbnet_arch import RRDBNet
23 | 
24 |         model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=4)
25 |         upsampler = RealESRGANer(
26 |             scale=4,
27 |             model_path=config.realesrgan_ckpt_path,
28 |             dni_weight=None,
29 |             model=model,
30 |             tile=0,
31 |             tile_pad=10,
32 |             pre_pad=0,
33 |             half=True,
34 |             gpu_id=None,
35 |         )
36 |         self.upsampler = upsampler
37 | 
38 |     def __call__(self, image):
39 |         output, _ = self.upsampler.enhance(np.array(image))
40 |         output = Image.fromarray(output)
41 |         return output
42 | 


--------------------------------------------------------------------------------
/configs/dit_config.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   target: .hy3dgen.shapegen.models.Hunyuan3DDiT
 3 |   params:
 4 |     in_channels: 64
 5 |     context_in_dim: 1536
 6 |     hidden_size: 1024
 7 |     mlp_ratio: 4.0
 8 |     num_heads: 16
 9 |     depth: 16
10 |     depth_single_blocks: 32
11 |     axes_dim: [ 64 ]
12 |     theta: 10000
13 |     qkv_bias: True
14 |     guidance_embed: False
15 | 
16 | vae:
17 |   target: .hy3dgen.shapegen.models.ShapeVAE
18 |   params:
19 |     num_latents: 3072
20 |     embed_dim: 64
21 |     num_freqs: 8
22 |     include_pi: false
23 |     heads: 16
24 |     width: 1024
25 |     num_decoder_layers: 16
26 |     qkv_bias: false
27 |     qk_norm: true
28 |     scale_factor: 0.9990943042622529
29 | 
30 | conditioner:
31 |   target: .hy3dgen.shapegen.models.SingleImageEncoder
32 |   params:
33 |     main_image_encoder:
34 |       type: DinoImageEncoder # dino giant
35 |       kwargs:
36 |         config:
37 |           attention_probs_dropout_prob: 0.0
38 |           drop_path_rate: 0.0
39 |           hidden_act: gelu
40 |           hidden_dropout_prob: 0.0
41 |           hidden_size: 1536
42 |           image_size: 518
43 |           initializer_range: 0.02
44 |           layer_norm_eps: 1.e-6
45 |           layerscale_value: 1.0
46 |           mlp_ratio: 4
47 |           model_type: dinov2
48 |           num_attention_heads: 24
49 |           num_channels: 3
50 |           num_hidden_layers: 40
51 |           patch_size: 14
52 |           qkv_bias: true
53 |           torch_dtype: float32
54 |           use_swiglu_ffn: true
55 |         image_size: 518
56 | 
57 | scheduler:
58 |   target: .hy3dgen.shapegen.schedulers.FlowMatchEulerDiscreteScheduler
59 |   params:
60 |     num_train_timesteps: 1000
61 | 
62 | image_processor:
63 |   target: .hy3dgen.shapegen.preprocessors.ImageProcessorV2
64 |   params:
65 |     size: 512
66 |     border_ratio: 0.15
67 | 
68 | pipeline:
69 |   target: .hy3dgen.shapegen.pipelines.Hunyuan3DDiTFlowMatchingPipeline
70 | 


--------------------------------------------------------------------------------
/configs/dit_config_mini.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   target: .hy3dgen.shapegen.models.Hunyuan3DDiT
 3 |   params:
 4 |     in_channels: 64
 5 |     context_in_dim: 1536
 6 |     hidden_size: 1024
 7 |     mlp_ratio: 4.0
 8 |     num_heads: 16
 9 |     depth: 8
10 |     depth_single_blocks: 16
11 |     axes_dim: [ 64 ]
12 |     theta: 10000
13 |     qkv_bias: True
14 |     guidance_embed: False
15 | 
16 | vae:
17 |   target: .hy3dgen.shapegen.models.ShapeVAE
18 |   params:
19 |     num_latents: 512
20 |     embed_dim: 64
21 |     num_freqs: 8
22 |     include_pi: false
23 |     heads: 16
24 |     width: 1024
25 |     num_decoder_layers: 16
26 |     qkv_bias: false
27 |     qk_norm: true
28 |     scale_factor: 1.0188137142395404
29 | 
30 | conditioner:
31 |   target: .hy3dgen.shapegen.models.SingleImageEncoder
32 |   params:
33 |     main_image_encoder:
34 |       type: DinoImageEncoder # dino giant
35 |       kwargs:
36 |         config:
37 |           attention_probs_dropout_prob: 0.0
38 |           drop_path_rate: 0.0
39 |           hidden_act: gelu
40 |           hidden_dropout_prob: 0.0
41 |           hidden_size: 1536
42 |           image_size: 518
43 |           initializer_range: 0.02
44 |           layer_norm_eps: 1.e-6
45 |           layerscale_value: 1.0
46 |           mlp_ratio: 4
47 |           model_type: dinov2
48 |           num_attention_heads: 24
49 |           num_channels: 3
50 |           num_hidden_layers: 40
51 |           patch_size: 14
52 |           qkv_bias: true
53 |           torch_dtype: float32
54 |           use_swiglu_ffn: true
55 |         image_size: 518
56 | 
57 | scheduler:
58 |   target: .hy3dgen.shapegen.schedulers.FlowMatchEulerDiscreteScheduler
59 |   params:
60 |     num_train_timesteps: 1000
61 | 
62 | image_processor:
63 |   target: .hy3dgen.shapegen.preprocessors.ImageProcessorV2
64 |   params:
65 |     size: 512
66 |     border_ratio: 0.15
67 | 
68 | pipeline:
69 |   target: .hy3dgen.shapegen.pipelines.Hunyuan3DDiTFlowMatchingPipeline
70 | 


--------------------------------------------------------------------------------
/hy3dshape/minimal_vae_demo.py:
--------------------------------------------------------------------------------
 1 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
 2 | # except for the third-party components listed below.
 3 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined
 4 | # in the repsective licenses of these third-party components.
 5 | # Users must comply with all terms and conditions of original licenses of these third-party
 6 | # components and must ensure that the usage of the third party components adheres to
 7 | # all relevant laws and regulations.
 8 | 
 9 | # For avoidance of doubts, Hunyuan 3D means the large language models and
10 | # their software and algorithms, including trained model weights, parameters (including
11 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
12 | # fine-tuning enabling code and other elements of the foregoing made publicly available
13 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
14 | 
15 | import torch
16 | 
17 | from hy3dshape.surface_loaders import SharpEdgeSurfaceLoader
18 | from hy3dshape.models.autoencoders import ShapeVAE
19 | from hy3dshape.pipelines import export_to_trimesh
20 | 
21 | 
22 | vae = ShapeVAE.from_pretrained(
23 |     'tencent/Hunyuan3D-2.1',
24 |     use_safetensors=False,
25 |     variant='fp16',
26 | )
27 | 
28 | 
29 | loader = SharpEdgeSurfaceLoader(
30 |     num_sharp_points=0,
31 |     num_uniform_points=81920,
32 | )
33 | mesh_demo = 'demos/demo.glb'
34 | surface = loader(mesh_demo).to('cuda', dtype=torch.float16)
35 | print(surface.shape)
36 | 
37 | latents = vae.encode(surface)
38 | latents = vae.decode(latents)
39 | mesh = vae.latents2mesh(
40 |     latents,
41 |     output_type='trimesh',
42 |     bounds=1.01,
43 |     mc_level=0.0,
44 |     num_chunks=20000,
45 |     octree_resolution=256,
46 |     mc_algo='mc',
47 |     enable_pbar=True
48 | )
49 | 
50 | mesh = export_to_trimesh(mesh)[0]
51 | mesh.export('output.obj')
52 | 


--------------------------------------------------------------------------------
/hy3dpaint/src/utils/train_util.py:
--------------------------------------------------------------------------------
 1 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
 2 | # except for the third-party components listed below.
 3 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined
 4 | # in the repsective licenses of these third-party components.
 5 | # Users must comply with all terms and conditions of original licenses of these third-party
 6 | # components and must ensure that the usage of the third party components adheres to
 7 | # all relevant laws and regulations.
 8 | 
 9 | # For avoidance of doubts, Hunyuan 3D means the large language models and
10 | # their software and algorithms, including trained model weights, parameters (including
11 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
12 | # fine-tuning enabling code and other elements of the foregoing made publicly available
13 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
14 | 
15 | import importlib
16 | 
17 | 
18 | def count_params(model, verbose=False):
19 |     total_params = sum(p.numel() for p in model.parameters())
20 |     if verbose:
21 |         print(f"{model.__class__.__name__} has {total_params*1.e-6:.2f} M params.")
22 |     return total_params
23 | 
24 | 
25 | def instantiate_from_config(config):
26 |     if not "target" in config:
27 |         if config == "__is_first_stage__":
28 |             return None
29 |         elif config == "__is_unconditional__":
30 |             return None
31 |         raise KeyError("Expected key `target` to instantiate.")
32 |     return get_obj_from_str(config["target"])(**config.get("params", dict()))
33 | 
34 | 
35 | def get_obj_from_str(string, reload=False):
36 |     module, cls = string.rsplit(".", 1)
37 |     if reload:
38 |         module_imp = importlib.import_module(module)
39 |         importlib.reload(module_imp)
40 |     return getattr(importlib.import_module(module, package=None), cls)
41 | 


--------------------------------------------------------------------------------
/hy3dshape/hy3dshape/utils/visualizers/html_util.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
 4 | # except for the third-party components listed below.
 5 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined
 6 | # in the repsective licenses of these third-party components.
 7 | # Users must comply with all terms and conditions of original licenses of these third-party
 8 | # components and must ensure that the usage of the third party components adheres to
 9 | # all relevant laws and regulations.
10 | 
11 | # For avoidance of doubts, Hunyuan 3D means the large language models and
12 | # their software and algorithms, including trained model weights, parameters (including
13 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
14 | # fine-tuning enabling code and other elements of the foregoing made publicly available
15 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
16 | 
17 | import io
18 | import base64
19 | import numpy as np
20 | from PIL import Image
21 | 
22 | 
23 | def to_html_frame(content):
24 | 
25 |     html_frame = f"""
26 |     <html>
27 |       <body>
28 |         {content}
29 |       </body>
30 |     </html>
31 |     """
32 | 
33 |     return html_frame
34 | 
35 | 
36 | def to_single_row_table(caption: str, content: str):
37 | 
38 |     table_html = f"""
39 |     <table border = "1">
40 |         <caption>{caption}</caption>
41 |         <tr>
42 |             <td>{content}</td>
43 |         </tr>
44 |     </table>
45 |     """
46 | 
47 |     return table_html
48 | 
49 | 
50 | def to_image_embed_tag(image: np.ndarray):
51 | 
52 |     # Convert np.ndarray to bytes
53 |     img = Image.fromarray(image)
54 |     raw_bytes = io.BytesIO()
55 |     img.save(raw_bytes, "PNG")
56 | 
57 |     # Encode bytes to base64
58 |     image_base64 = base64.b64encode(raw_bytes.getvalue()).decode("utf-8")
59 | 
60 |     image_tag = f"""
61 |     <img src="data:image/png;base64,{image_base64}" alt="Embedded Image">
62 |     """
63 | 
64 |     return image_tag
65 | 


--------------------------------------------------------------------------------
/hy3dpaint/custom_rasterizer/lib/custom_rasterizer_kernel/rasterizer.h:
--------------------------------------------------------------------------------
 1 | #ifndef RASTERIZER_H_
 2 | #define RASTERIZER_H_
 3 | 
 4 | #include <torch/extension.h>
 5 | #include <vector>
 6 | #include <ATen/ATen.h>
 7 | #include <ATen/cuda/CUDAContext.h> // For CUDA context
 8 | #include <cstdint>
 9 | #define INT64 uint64_t
10 | #define MAXINT 2147483647
11 | 
12 | __host__ __device__ inline float calculateSignedArea2(float* a, float* b, float* c) {
13 |     return ((c[0] - a[0]) * (b[1] - a[1]) - (b[0] - a[0]) * (c[1] - a[1]));
14 | }
15 | 
16 | __host__ __device__  inline void calculateBarycentricCoordinate(float* a, float* b, float* c, float* p,
17 |     float* barycentric)
18 | {
19 |     float beta_tri = calculateSignedArea2(a, p, c);
20 |     float gamma_tri = calculateSignedArea2(a, b, p);
21 |     float area = calculateSignedArea2(a, b, c);
22 |     if (area == 0) {
23 |         barycentric[0] = -1.0;
24 |         barycentric[1] = -1.0;
25 |         barycentric[2] = -1.0;
26 |         return;
27 |     }
28 |     float tri_inv = 1.0 / area;
29 |     float beta = beta_tri * tri_inv;
30 |     float gamma = gamma_tri * tri_inv;
31 |     float alpha = 1.0 - beta - gamma;
32 |     barycentric[0] = alpha;
33 |     barycentric[1] = beta;
34 |     barycentric[2] = gamma;
35 | }
36 | 
37 | __host__ __device__  inline bool isBarycentricCoordInBounds(float* barycentricCoord) {
38 |     return barycentricCoord[0] >= 0.0 && barycentricCoord[0] <= 1.0 &&
39 |            barycentricCoord[1] >= 0.0 && barycentricCoord[1] <= 1.0 &&
40 |            barycentricCoord[2] >= 0.0 && barycentricCoord[2] <= 1.0;
41 | }
42 | 
43 | std::vector<torch::Tensor> rasterize_image_gpu(torch::Tensor V, torch::Tensor F, torch::Tensor D,
44 |     int width, int height, float occlusion_truncation, int use_depth_prior);
45 | 
46 | std::vector<std::vector<torch::Tensor>> build_hierarchy(std::vector<torch::Tensor> view_layer_positions, std::vector<torch::Tensor> view_layer_normals, int num_level, int resolution);
47 | 
48 | std::vector<std::vector<torch::Tensor>> build_hierarchy_with_feat(
49 |     std::vector<torch::Tensor> view_layer_positions,
50 |     std::vector<torch::Tensor> view_layer_normals,
51 |     std::vector<torch::Tensor> view_layer_feats,
52 |     int num_level, int resolution);
53 | 
54 | #endif


--------------------------------------------------------------------------------
/hy3dshape/hy3dshape/models/diffusion/transport/utils.py:
--------------------------------------------------------------------------------
 1 | # This file includes code derived from the SiT project (https://github.com/willisma/SiT),
 2 | # which is licensed under the MIT License.
 3 | #
 4 | # MIT License
 5 | #
 6 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 7 | #
 8 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 9 | # of this software and associated documentation files (the "Software"), to deal
10 | # in the Software without restriction, including without limitation the rights
11 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 | # copies of the Software, and to permit persons to whom the Software is
13 | # furnished to do so, subject to the following conditions:
14 | #
15 | # The above copyright notice and this permission notice shall be included in all
16 | # copies or substantial portions of the Software.
17 | #
18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 | # SOFTWARE.
25 | 
26 | import torch as th
27 | 
28 | class EasyDict:
29 | 
30 |     def __init__(self, sub_dict):
31 |         for k, v in sub_dict.items():
32 |             setattr(self, k, v)
33 | 
34 |     def __getitem__(self, key):
35 |         return getattr(self, key)
36 | 
37 | def mean_flat(x):
38 |     """
39 |     Take the mean over all non-batch dimensions.
40 |     """
41 |     return th.mean(x, dim=list(range(1, len(x.size()))))
42 | 
43 | def log_state(state):
44 |     result = []
45 |     
46 |     sorted_state = dict(sorted(state.items()))
47 |     for key, value in sorted_state.items():
48 |         # Check if the value is an instance of a class
49 |         if "<object" in str(value) or "object at" in str(value):
50 |             result.append(f"{key}: [{value.__class__.__name__}]")
51 |         else:
52 |             result.append(f"{key}: {value}")
53 |     
54 |     return '\n'.join(result)
55 | 


--------------------------------------------------------------------------------
/hy3dshape/hy3dshape/utils/visualizers/color_util.py:
--------------------------------------------------------------------------------
 1 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
 2 | # except for the third-party components listed below.
 3 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined
 4 | # in the repsective licenses of these third-party components.
 5 | # Users must comply with all terms and conditions of original licenses of these third-party
 6 | # components and must ensure that the usage of the third party components adheres to
 7 | # all relevant laws and regulations.
 8 | 
 9 | # For avoidance of doubts, Hunyuan 3D means the large language models and
10 | # their software and algorithms, including trained model weights, parameters (including
11 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
12 | # fine-tuning enabling code and other elements of the foregoing made publicly available
13 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
14 | 
15 | import numpy as np
16 | import matplotlib.pyplot as plt
17 | 
18 | 
19 | # Helper functions
20 | def get_colors(inp, colormap="viridis", normalize=True, vmin=None, vmax=None):
21 |     colormap = plt.cm.get_cmap(colormap)
22 |     if normalize:
23 |         vmin = np.min(inp)
24 |         vmax = np.max(inp)
25 | 
26 |     norm = plt.Normalize(vmin, vmax)
27 |     return colormap(norm(inp))[:, :3]
28 | 
29 | 
30 | def gen_checkers(n_checkers_x, n_checkers_y, width=256, height=256):
31 |     # tex dims need to be power of two.
32 |     array = np.ones((width, height, 3), dtype='float32')
33 | 
34 |     # width in texels of each checker
35 |     checker_w = width / n_checkers_x
36 |     checker_h = height / n_checkers_y
37 | 
38 |     for y in range(height):
39 |         for x in range(width):
40 |             color_key = int(x / checker_w) + int(y / checker_h)
41 |             if color_key % 2 == 0:
42 |                 array[x, y, :] = [1., 0.874, 0.0]
43 |             else:
44 |                 array[x, y, :] = [0., 0., 0.]
45 |     return array
46 | 
47 | 
48 | def gen_circle(width=256, height=256):
49 |     xx, yy = np.mgrid[:width, :height]
50 |     circle = (xx - width / 2 + 0.5) ** 2 + (yy - height / 2 + 0.5) ** 2
51 |     array = np.ones((width, height, 4), dtype='float32')
52 |     array[:, :, 0] = (circle <= width)
53 |     array[:, :, 1] = (circle <= width)
54 |     array[:, :, 2] = (circle <= width)
55 |     array[:, :, 3] = circle <= width
56 |     return array
57 | 
58 | 


--------------------------------------------------------------------------------
/configs/dit_config_2_1.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   target: .hy3dshape.hy3dshape.models.denoisers.hunyuandit.HunYuanDiTPlain
 3 |   params:
 4 |     input_size: &num_latents 4096
 5 |     in_channels: 64
 6 |     hidden_size: 2048
 7 |     context_dim: 1024
 8 |     depth: 21
 9 |     num_heads: 16
10 |     qk_norm: true
11 |     text_len: 1370
12 |     with_decoupled_ca: false
13 |     use_attention_pooling: false
14 |     qk_norm_type: 'rms'
15 |     qkv_bias: false
16 |     use_pos_emb: false
17 |     num_moe_layers: 6
18 |     num_experts: 8
19 |     moe_top_k: 2
20 | 
21 | vae:
22 |   target: .hy3dshape.hy3dshape.models.autoencoders.ShapeVAE
23 |   params:
24 |     num_latents: *num_latents
25 |     embed_dim: 64
26 |     num_freqs: 8
27 |     include_pi: false
28 |     heads: 16
29 |     width: 1024
30 |     num_encoder_layers: 8
31 |     num_decoder_layers: 16
32 |     qkv_bias: false
33 |     qk_norm: true
34 |     scale_factor: 1.0039506158752403
35 |     geo_decoder_mlp_expand_ratio: 4
36 |     geo_decoder_downsample_ratio: 1
37 |     geo_decoder_ln_post: true
38 |     point_feats: 4
39 |     pc_size: 81920
40 |     pc_sharpedge_size: 0
41 | 
42 | conditioner:
43 |   target: .hy3dshape.hy3dshape.models.conditioner.SingleImageEncoder
44 |   params:
45 |     main_image_encoder:
46 |         type: DinoImageEncoder # dino large
47 |         kwargs:
48 |             config:
49 |               attention_probs_dropout_prob: 0.0
50 |               drop_path_rate: 0.0
51 |               hidden_act: gelu
52 |               hidden_dropout_prob: 0.0
53 |               hidden_size: 1024
54 |               image_size: 518
55 |               initializer_range: 0.02
56 |               layer_norm_eps: 1.e-6
57 |               layerscale_value: 1.0
58 |               mlp_ratio: 4
59 |               model_type: dinov2
60 |               num_attention_heads: 16
61 |               num_channels: 3
62 |               num_hidden_layers: 24
63 |               patch_size: 14
64 |               qkv_bias: true
65 |               torch_dtype: float32
66 |               use_swiglu_ffn: false
67 |             image_size: 518
68 |             use_cls_token: true
69 | 
70 | scheduler:
71 |   target: .hy3dshape.hy3dshape.schedulers.FlowMatchEulerDiscreteScheduler
72 |   params:
73 |     num_train_timesteps: 1000
74 | 
75 | image_processor:
76 |   target: .hy3dshape.hy3dshape.preprocessors.ImageProcessorV2
77 |   params:
78 |     size: 512
79 |     border_ratio: 0.15
80 | 
81 | pipeline:
82 |   target: .hy3dshape.hy3dshape.pipelines.Hunyuan3DDiTFlowMatchingPipeline
83 | 


--------------------------------------------------------------------------------
/hy3dshape/hy3dshape/utils/trainings/lr_scheduler.py:
--------------------------------------------------------------------------------
 1 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
 2 | # except for the third-party components listed below.
 3 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined
 4 | # in the repsective licenses of these third-party components.
 5 | # Users must comply with all terms and conditions of original licenses of these third-party
 6 | # components and must ensure that the usage of the third party components adheres to
 7 | # all relevant laws and regulations.
 8 | 
 9 | # For avoidance of doubts, Hunyuan 3D means the large language models and
10 | # their software and algorithms, including trained model weights, parameters (including
11 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
12 | # fine-tuning enabling code and other elements of the foregoing made publicly available
13 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
14 | 
15 | import numpy as np
16 | 
17 | 
18 | class BaseScheduler(object):
19 | 
20 |     def schedule(self, n, **kwargs):
21 |         raise NotImplementedError
22 | 
23 | 
24 | class LambdaWarmUpCosineFactorScheduler(BaseScheduler):
25 |     """
26 |     note: use with a base_lr of 1.0
27 |     """
28 |     def __init__(self, warm_up_steps, f_min, f_max, f_start, max_decay_steps, verbosity_interval=0, **ignore_kwargs):
29 |         self.lr_warm_up_steps = warm_up_steps
30 |         self.f_start = f_start
31 |         self.f_min = f_min
32 |         self.f_max = f_max
33 |         self.lr_max_decay_steps = max_decay_steps
34 |         self.last_f = 0.
35 |         self.verbosity_interval = verbosity_interval
36 | 
37 |     def schedule(self, n, **kwargs):
38 |         if self.verbosity_interval > 0:
39 |             if n % self.verbosity_interval == 0:
40 |                 print(f"current step: {n}, recent lr-multiplier: {self.f_start}")
41 |         if n < self.lr_warm_up_steps:
42 |             f = (self.f_max - self.f_start) / self.lr_warm_up_steps * n + self.f_start
43 |             self.last_f = f
44 |             return f
45 |         else:
46 |             t = (n - self.lr_warm_up_steps) / (self.lr_max_decay_steps - self.lr_warm_up_steps)
47 |             t = min(t, 1.0)
48 |             f = self.f_min + 0.5 * (self.f_max - self.f_min) * (1 + np.cos(t * np.pi))
49 |             self.last_f = f
50 |             return f
51 | 
52 |     def __call__(self, n, **kwargs):
53 |         return self.schedule(n, **kwargs)
54 | 


--------------------------------------------------------------------------------
/hy3dshape/hy3dshape/utils/ema.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | 
 4 | 
 5 | class LitEma(nn.Module):
 6 |     def __init__(self, model, decay=0.9999, use_num_updates=True):
 7 |         super().__init__()
 8 |         if decay < 0.0 or decay > 1.0:
 9 |             raise ValueError('Decay must be between 0 and 1')
10 | 
11 |         self.m_name2s_name = {}
12 |         self.register_buffer('decay', torch.tensor(decay, dtype=torch.float32))
13 |         self.register_buffer('num_updates', torch.tensor(0, dtype=torch.int) if use_num_updates
14 |         else torch.tensor(-1, dtype=torch.int))
15 | 
16 |         for name, p in model.named_parameters():
17 |             if p.requires_grad:
18 |                 # remove as '.'-character is not allowed in buffers
19 |                 s_name = name.replace('.', '_____')
20 |                 self.m_name2s_name.update({name: s_name})
21 |                 self.register_buffer(s_name, p.clone().detach().data)
22 | 
23 |         self.collected_params = []
24 | 
25 |     def forward(self, model):
26 |         decay = self.decay
27 | 
28 |         if self.num_updates >= 0:
29 |             self.num_updates += 1
30 |             decay = min(self.decay, (1 + self.num_updates) / (10 + self.num_updates))
31 | 
32 |         one_minus_decay = 1.0 - decay
33 | 
34 |         with torch.no_grad():
35 |             m_param = dict(model.named_parameters())
36 |             shadow_params = dict(self.named_buffers())
37 | 
38 |             for key in m_param:
39 |                 if m_param[key].requires_grad:
40 |                     sname = self.m_name2s_name[key]
41 |                     shadow_params[sname] = shadow_params[sname].type_as(m_param[key])
42 |                     shadow_params[sname].sub_(one_minus_decay * (shadow_params[sname] - m_param[key]))
43 |                 else:
44 |                     assert not key in self.m_name2s_name
45 | 
46 |     def copy_to(self, model):
47 |         m_param = dict(model.named_parameters())
48 |         shadow_params = dict(self.named_buffers())
49 |         for key in m_param:
50 |             if m_param[key].requires_grad:
51 |                 m_param[key].data.copy_(shadow_params[self.m_name2s_name[key]].data)
52 |             else:
53 |                 assert not key in self.m_name2s_name
54 | 
55 |     def store(self, model):
56 |         """
57 |         Save the current parameters for restoring later.
58 |         Args:
59 |           parameters: Iterable of `torch.nn.Parameter`; the parameters to be
60 |             temporarily stored.
61 |         """
62 |         self.collected_params = [param.clone() for param in model.parameters()]
63 | 
64 |     def restore(self, model):
65 |         """
66 |         Restore the parameters stored with the `store` method.
67 |         Useful to validate the model with EMA parameters without affecting the
68 |         original optimization process. Store the parameters before the
69 |         `copy_to` method. After validation (or model saving), use this to
70 |         restore the former parameters.
71 |         Args:
72 |           parameters: Iterable of `torch.nn.Parameter`; the parameters to be
73 |             updated with the stored parameters.
74 |         """
75 |         for c_param, param in zip(self.collected_params, model.parameters()):
76 |             param.data.copy_(c_param.data)
77 | 


--------------------------------------------------------------------------------
/hy3dpaint/src/data/objaverse_hunyuan.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
 4 | # except for the third-party components listed below.
 5 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined
 6 | # in the repsective licenses of these third-party components.
 7 | # Users must comply with all terms and conditions of original licenses of these third-party
 8 | # components and must ensure that the usage of the third party components adheres to
 9 | # all relevant laws and regulations.
10 | 
11 | # For avoidance of doubts, Hunyuan 3D means the large language models and
12 | # their software and algorithms, including trained model weights, parameters (including
13 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
14 | # fine-tuning enabling code and other elements of the foregoing made publicly available
15 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
16 | 
17 | import pytorch_lightning as pl
18 | from torch.utils.data import Dataset, ConcatDataset, DataLoader
19 | from torch.utils.data.distributed import DistributedSampler
20 | 
21 | 
22 | class DataModuleFromConfig(pl.LightningDataModule):
23 |     def __init__(
24 |         self,
25 |         batch_size=8,
26 |         num_workers=4,
27 |         train=None,
28 |         validation=None,
29 |         test=None,
30 |         **kwargs,
31 |     ):
32 |         super().__init__()
33 | 
34 |         self.batch_size = batch_size
35 |         self.num_workers = num_workers
36 | 
37 |         self.dataset_configs = dict()
38 |         if train is not None:
39 |             self.dataset_configs["train"] = train
40 |         if validation is not None:
41 |             self.dataset_configs["validation"] = validation
42 |         if test is not None:
43 |             self.dataset_configs["test"] = test
44 | 
45 |     def setup(self, stage):
46 |         from src.utils.train_util import instantiate_from_config
47 | 
48 |         if stage in ["fit"]:
49 |             dataset_dict = {}
50 |             for k in self.dataset_configs:
51 |                 dataset_dict[k] = []
52 |                 for loader in self.dataset_configs[k]:
53 |                     dataset_dict[k].append(instantiate_from_config(loader))
54 |             self.datasets = dataset_dict
55 |             print(self.datasets)
56 |         else:
57 |             raise NotImplementedError
58 | 
59 |     def train_dataloader(self):
60 |         datasets = ConcatDataset(self.datasets["train"])
61 |         sampler = DistributedSampler(datasets)
62 |         return DataLoader(
63 |             datasets,
64 |             batch_size=self.batch_size,
65 |             num_workers=self.num_workers,
66 |             shuffle=False,
67 |             sampler=sampler,
68 |             prefetch_factor=2,
69 |             pin_memory=True,
70 |         )
71 | 
72 |     def val_dataloader(self):
73 |         datasets = ConcatDataset(self.datasets["validation"])
74 |         sampler = DistributedSampler(datasets)
75 |         return DataLoader(datasets, batch_size=4, num_workers=self.num_workers, shuffle=False, sampler=sampler)
76 | 
77 |     def test_dataloader(self):
78 |         datasets = ConcatDataset(self.datasets["test"])
79 |         return DataLoader(datasets, batch_size=self.batch_size, num_workers=self.num_workers, shuffle=False)
80 | 


--------------------------------------------------------------------------------
/hy3dshape/hy3dshape/models/diffusion/transport/__init__.py:
--------------------------------------------------------------------------------
 1 | # This file includes code derived from the SiT project (https://github.com/willisma/SiT),
 2 | # which is licensed under the MIT License.
 3 | #
 4 | # MIT License
 5 | #
 6 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 7 | #
 8 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 9 | # of this software and associated documentation files (the "Software"), to deal
10 | # in the Software without restriction, including without limitation the rights
11 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 | # copies of the Software, and to permit persons to whom the Software is
13 | # furnished to do so, subject to the following conditions:
14 | #
15 | # The above copyright notice and this permission notice shall be included in all
16 | # copies or substantial portions of the Software.
17 | #
18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 | # SOFTWARE.
25 | 
26 | from .transport import Transport, ModelType, WeightType, PathType, Sampler
27 | 
28 | 
29 | def create_transport(
30 |     path_type='Linear',
31 |     prediction="velocity",
32 |     loss_weight=None,
33 |     train_eps=None,
34 |     sample_eps=None,
35 |     train_sample_type="uniform",
36 |     mean = 0.0,
37 |     std = 1.0,
38 |     shift_scale = 1.0,
39 | ):
40 |     """function for creating Transport object
41 |     **Note**: model prediction defaults to velocity
42 |     Args:
43 |     - path_type: type of path to use; default to linear
44 |     - learn_score: set model prediction to score
45 |     - learn_noise: set model prediction to noise
46 |     - velocity_weighted: weight loss by velocity weight
47 |     - likelihood_weighted: weight loss by likelihood weight
48 |     - train_eps: small epsilon for avoiding instability during training
49 |     - sample_eps: small epsilon for avoiding instability during sampling
50 |     """
51 | 
52 |     if prediction == "noise":
53 |         model_type = ModelType.NOISE
54 |     elif prediction == "score":
55 |         model_type = ModelType.SCORE
56 |     else:
57 |         model_type = ModelType.VELOCITY
58 | 
59 |     if loss_weight == "velocity":
60 |         loss_type = WeightType.VELOCITY
61 |     elif loss_weight == "likelihood":
62 |         loss_type = WeightType.LIKELIHOOD
63 |     else:
64 |         loss_type = WeightType.NONE
65 | 
66 |     path_choice = {
67 |         "Linear": PathType.LINEAR,
68 |         "GVP": PathType.GVP,
69 |         "VP": PathType.VP,
70 |     }
71 | 
72 |     path_type = path_choice[path_type]
73 | 
74 |     if (path_type in [PathType.VP]):
75 |         train_eps = 1e-5 if train_eps is None else train_eps
76 |         sample_eps = 1e-3 if train_eps is None else sample_eps
77 |     elif (path_type in [PathType.GVP, PathType.LINEAR] and model_type != ModelType.VELOCITY):
78 |         train_eps = 1e-3 if train_eps is None else train_eps
79 |         sample_eps = 1e-3 if train_eps is None else sample_eps
80 |     else:  # velocity & [GVP, LINEAR] is stable everywhere
81 |         train_eps = 0
82 |         sample_eps = 0
83 | 
84 |     # create flow state
85 |     state = Transport(
86 |         model_type=model_type,
87 |         path_type=path_type,
88 |         loss_type=loss_type,
89 |         train_eps=train_eps,
90 |         sample_eps=sample_eps,
91 |         train_sample_type=train_sample_type,
92 |         mean=mean,
93 |         std=std,
94 |         shift_scale =shift_scale,
95 |     )
96 | 
97 |     return state
98 | 


--------------------------------------------------------------------------------
/hy3dshape/hy3dshape/utils/trainings/peft.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
 4 | # except for the third-party components listed below.
 5 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined
 6 | # in the repsective licenses of these third-party components.
 7 | # Users must comply with all terms and conditions of original licenses of these third-party
 8 | # components and must ensure that the usage of the third party components adheres to
 9 | # all relevant laws and regulations.
10 | 
11 | # For avoidance of doubts, Hunyuan 3D means the large language models and
12 | # their software and algorithms, including trained model weights, parameters (including
13 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
14 | # fine-tuning enabling code and other elements of the foregoing made publicly available
15 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
16 | 
17 | import os
18 | from pytorch_lightning.callbacks import Callback
19 | from omegaconf import OmegaConf, ListConfig
20 | 
21 | class PeftSaveCallback(Callback):
22 |     def __init__(self, peft_model, save_dir: str, save_every_n_steps: int = None):
23 |         super().__init__()
24 |         self.peft_model = peft_model
25 |         self.save_dir = save_dir
26 |         self.save_every_n_steps = save_every_n_steps
27 |         os.makedirs(self.save_dir, exist_ok=True)
28 | 
29 |     def recursive_convert(self, obj):
30 |         from omegaconf import OmegaConf, ListConfig
31 |         if isinstance(obj, (OmegaConf, ListConfig)):
32 |             return OmegaConf.to_container(obj, resolve=True)
33 |         elif isinstance(obj, dict):
34 |             return {k: self.recursive_convert(v) for k, v in obj.items()}
35 |         elif isinstance(obj, list):
36 |             return [self.recursive_convert(i) for i in obj]
37 |         elif isinstance(obj, type):
38 |             # 避免修改类对象
39 |             return obj
40 |         elif hasattr(obj, '__dict__'):
41 |             for attr_name, attr_value in vars(obj).items():
42 |                 setattr(obj, attr_name, self.recursive_convert(attr_value))
43 |             return obj
44 |         else:
45 |             return obj
46 | 
47 |     # def recursive_convert(self, obj):
48 |     #     if isinstance(obj, (OmegaConf, ListConfig)):
49 |     #         return OmegaConf.to_container(obj, resolve=True)
50 |     #     elif isinstance(obj, dict):
51 |     #         return {k: self.recursive_convert(v) for k, v in obj.items()}
52 |     #     elif isinstance(obj, list):
53 |     #         return [self.recursive_convert(i) for i in obj]
54 |     #     elif hasattr(obj, '__dict__'):
55 |     #         for attr_name, attr_value in vars(obj).items():
56 |     #             setattr(obj, attr_name, self.recursive_convert(attr_value))
57 |     #         return obj
58 |     #     else:
59 |     #         return obj
60 | 
61 |     def _convert_peft_config(self):
62 |         pc = self.peft_model.peft_config
63 |         self.peft_model.peft_config = self.recursive_convert(pc)
64 | 
65 |     def on_train_epoch_end(self, trainer, pl_module):
66 |         self._convert_peft_config()
67 |         save_path = os.path.join(self.save_dir, f"epoch_{trainer.current_epoch}")
68 |         self.peft_model.save_pretrained(save_path)
69 |         print(f"[PeftSaveCallback] Saved LoRA weights to {save_path}")
70 | 
71 |     def on_train_batch_end(self, trainer, pl_module, outputs, batch, batch_idx):
72 |         if self.save_every_n_steps is not None:
73 |             global_step = trainer.global_step
74 |             if global_step % self.save_every_n_steps == 0 and global_step > 0:
75 |                 self._convert_peft_config()
76 |                 save_path = os.path.join(self.save_dir, f"step_{global_step}")
77 |                 self.peft_model.save_pretrained(save_path)
78 |                 print(f"[PeftSaveCallback] Saved LoRA weights to {save_path}")
79 | 


--------------------------------------------------------------------------------
/hy3dshape/hy3dshape/utils/misc.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | import importlib
  4 | from omegaconf import OmegaConf, DictConfig, ListConfig
  5 | 
  6 | import torch
  7 | import torch.distributed as dist
  8 | from typing import Union
  9 | 
 10 | 
 11 | def get_config_from_file(config_file: str) -> Union[DictConfig, ListConfig]:
 12 |     config_file = OmegaConf.load(config_file)
 13 | 
 14 |     if 'base_config' in config_file.keys():
 15 |         if config_file['base_config'] == "default_base":
 16 |             base_config = OmegaConf.create()
 17 |             # base_config = get_default_config()
 18 |         elif config_file['base_config'].endswith(".yaml"):
 19 |             base_config = get_config_from_file(config_file['base_config'])
 20 |         else:
 21 |             raise ValueError(f"{config_file} must be `.yaml` file or it contains `base_config` key.")
 22 | 
 23 |         config_file = {key: value for key, value in config_file if key != "base_config"}
 24 | 
 25 |         return OmegaConf.merge(base_config, config_file)
 26 | 
 27 |     return config_file
 28 | 
 29 | 
 30 | def get_obj_from_str(string, reload=False):
 31 |     module, cls = string.rsplit(".", 1)
 32 |     if reload:
 33 |         module_imp = importlib.import_module(module)
 34 |         importlib.reload(module_imp)
 35 |     return getattr(importlib.import_module(module, package=None), cls)
 36 | 
 37 | 
 38 | def get_obj_from_config(config):
 39 |     if "target" not in config:
 40 |         raise KeyError("Expected key `target` to instantiate.")
 41 | 
 42 |     return get_obj_from_str(config["target"])
 43 | 
 44 | 
 45 | def instantiate_from_config(config, **kwargs):
 46 |     if "target" not in config:
 47 |         raise KeyError("Expected key `target` to instantiate.")
 48 | 
 49 |     cls = get_obj_from_str(config["target"])
 50 | 
 51 |     if config.get("from_pretrained", None):
 52 |         return cls.from_pretrained(
 53 |                     config["from_pretrained"], 
 54 |                     use_safetensors=config.get('use_safetensors', False),
 55 |                     variant=config.get('variant', 'fp16'))
 56 | 
 57 |     params = config.get("params", dict())
 58 |     # params.update(kwargs)
 59 |     # instance = cls(**params)
 60 |     kwargs.update(params)
 61 |     instance = cls(**kwargs)
 62 | 
 63 |     return instance
 64 | 
 65 | 
 66 | def disabled_train(self, mode=True):
 67 |     """Overwrite model.train with this function to make sure train/eval mode
 68 |     does not change anymore."""
 69 |     return self
 70 | 
 71 | 
 72 | def instantiate_non_trainable_model(config):
 73 |     model = instantiate_from_config(config)
 74 |     model = model.eval()
 75 |     model.train = disabled_train
 76 |     for param in model.parameters():
 77 |         param.requires_grad = False
 78 | 
 79 |     return model
 80 | 
 81 | 
 82 | def is_dist_avail_and_initialized():
 83 |     if not dist.is_available():
 84 |         return False
 85 |     if not dist.is_initialized():
 86 |         return False
 87 |     return True
 88 | 
 89 | 
 90 | def get_rank():
 91 |     if not is_dist_avail_and_initialized():
 92 |         return 0
 93 |     return dist.get_rank()
 94 | 
 95 | 
 96 | def get_world_size():
 97 |     if not is_dist_avail_and_initialized():
 98 |         return 1
 99 |     return dist.get_world_size()
100 | 
101 | 
102 | def all_gather_batch(tensors):
103 |     """
104 |     Performs all_gather operation on the provided tensors.
105 |     """
106 |     # Queue the gathered tensors
107 |     world_size = get_world_size()
108 |     # There is no need for reduction in the single-proc case
109 |     if world_size == 1:
110 |         return tensors
111 |     tensor_list = []
112 |     output_tensor = []
113 |     for tensor in tensors:
114 |         tensor_all = [torch.ones_like(tensor) for _ in range(world_size)]
115 |         dist.all_gather(
116 |             tensor_all,
117 |             tensor,
118 |             async_op=False  # performance opt
119 |         )
120 | 
121 |         tensor_list.append(tensor_all)
122 | 
123 |     for tensor_all in tensor_list:
124 |         output_tensor.append(torch.cat(tensor_all, dim=0))
125 |     return output_tensor
126 | 


--------------------------------------------------------------------------------
/hy3dpaint/DifferentiableRenderer/camera_utils.py:
--------------------------------------------------------------------------------
  1 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
  2 | # except for the third-party components listed below.
  3 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined
  4 | # in the repsective licenses of these third-party components.
  5 | # Users must comply with all terms and conditions of original licenses of these third-party
  6 | # components and must ensure that the usage of the third party components adheres to
  7 | # all relevant laws and regulations.
  8 | 
  9 | # For avoidance of doubts, Hunyuan 3D means the large language models and
 10 | # their software and algorithms, including trained model weights, parameters (including
 11 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
 12 | # fine-tuning enabling code and other elements of the foregoing made publicly available
 13 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
 14 | 
 15 | import math
 16 | 
 17 | import numpy as np
 18 | import torch
 19 | 
 20 | 
 21 | def transform_pos(mtx, pos, keepdim=False):
 22 |     t_mtx = torch.from_numpy(mtx).to(pos.device) if isinstance(mtx, np.ndarray) else mtx
 23 |     if pos.shape[-1] == 3:
 24 |         posw = torch.cat([pos, torch.ones([pos.shape[0], 1]).to(pos.device)], axis=1)
 25 |     else:
 26 |         posw = pos
 27 | 
 28 |     if keepdim:
 29 |         return torch.matmul(posw, t_mtx.t())[...]
 30 |     else:
 31 |         return torch.matmul(posw, t_mtx.t())[None, ...]
 32 | 
 33 | 
 34 | def get_mv_matrix(elev, azim, camera_distance, center=None):
 35 |     elev = -elev
 36 |     azim += 90
 37 | 
 38 |     elev_rad = math.radians(elev)
 39 |     azim_rad = math.radians(azim)
 40 | 
 41 |     camera_position = np.array(
 42 |         [
 43 |             camera_distance * math.cos(elev_rad) * math.cos(azim_rad),
 44 |             camera_distance * math.cos(elev_rad) * math.sin(azim_rad),
 45 |             camera_distance * math.sin(elev_rad),
 46 |         ]
 47 |     )
 48 | 
 49 |     if center is None:
 50 |         center = np.array([0, 0, 0])
 51 |     else:
 52 |         center = np.array(center)
 53 | 
 54 |     lookat = center - camera_position
 55 |     lookat = lookat / np.linalg.norm(lookat)
 56 | 
 57 |     up = np.array([0, 0, 1.0])
 58 |     right = np.cross(lookat, up)
 59 |     right = right / np.linalg.norm(right)
 60 |     up = np.cross(right, lookat)
 61 |     up = up / np.linalg.norm(up)
 62 | 
 63 |     c2w = np.concatenate([np.stack([right, up, -lookat], axis=-1), camera_position[:, None]], axis=-1)
 64 | 
 65 |     w2c = np.zeros((4, 4))
 66 |     w2c[:3, :3] = np.transpose(c2w[:3, :3], (1, 0))
 67 |     w2c[:3, 3:] = -np.matmul(np.transpose(c2w[:3, :3], (1, 0)), c2w[:3, 3:])
 68 |     w2c[3, 3] = 1.0
 69 | 
 70 |     return w2c.astype(np.float32)
 71 | 
 72 | 
 73 | def get_orthographic_projection_matrix(left=-1, right=1, bottom=-1, top=1, near=0, far=2):
 74 |     """
 75 |     计算正交投影矩阵。
 76 | 
 77 |     参数:
 78 |         left (float): 投影区域左侧边界。
 79 |         right (float): 投影区域右侧边界。
 80 |         bottom (float): 投影区域底部边界。
 81 |         top (float): 投影区域顶部边界。
 82 |         near (float): 投影区域近裁剪面距离。
 83 |         far (float): 投影区域远裁剪面距离。
 84 | 
 85 |     返回:
 86 |         numpy.ndarray: 正交投影矩阵。
 87 |     """
 88 |     ortho_matrix = np.eye(4, dtype=np.float32)
 89 |     ortho_matrix[0, 0] = 2 / (right - left)
 90 |     ortho_matrix[1, 1] = 2 / (top - bottom)
 91 |     ortho_matrix[2, 2] = -2 / (far - near)
 92 |     ortho_matrix[0, 3] = -(right + left) / (right - left)
 93 |     ortho_matrix[1, 3] = -(top + bottom) / (top - bottom)
 94 |     ortho_matrix[2, 3] = -(far + near) / (far - near)
 95 |     return ortho_matrix
 96 | 
 97 | 
 98 | def get_perspective_projection_matrix(fovy, aspect_wh, near, far):
 99 |     fovy_rad = math.radians(fovy)
100 |     return np.array(
101 |         [
102 |             [1.0 / (math.tan(fovy_rad / 2.0) * aspect_wh), 0, 0, 0],
103 |             [0, 1.0 / math.tan(fovy_rad / 2.0), 0, 0],
104 |             [0, 0, -(far + near) / (far - near), -2.0 * far * near / (far - near)],
105 |             [0, 0, -1, 0],
106 |         ]
107 |     ).astype(np.float32)
108 | 


--------------------------------------------------------------------------------
/hy3dpaint/README.md:
--------------------------------------------------------------------------------
 1 | # Hunyuan3D-Paint 2.1
 2 | 
 3 | Hunyuan3D-Paint 2.1 is a high quality PBR texture generation model for 3D meshes, powered by [RomanTex](https://github.com/oakshy/RomanTex) and [MaterialMVP](https://github.com/ZebinHe/MaterialMVP/).
 4 | 
 5 | 
 6 | ## Quick Inference
 7 | You need to manually download the RealESRGAN weight to the `ckpt` folder using the following command:
 8 | ```bash
 9 | wget https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRGAN_x4plus.pth -P ckpt
10 | ```
11 | 
12 | Given a 3D mesh `mesh.glb` and a reference image `image.png`, you can run inference using the following code. The result will be saved as `textured_mesh.glb`.
13 | 
14 | ```bash
15 | python3 demo.py
16 | ```
17 | **Optional arguments in `demo.py`:**
18 | 
19 | - `max_num_view` : Maximum number of views, adaptively selected by the model (integer between 6 to 12)
20 | 
21 | - `resolution` : Resolution for generated PBR textures (512 or 768)
22 | 
23 | **Memory Recommendation:** For `max_num_view=6` and `resolution=512`, we recommend using a GPU with at least **21GB VRAM**. 
24 | 
25 | ## Training
26 | 
27 | ### Data Prepare
28 | We provide a piece of data in `train_examples` for the overfitting training test. The data structure should be organized as follows:
29 | 
30 | ```
31 | train_examples/
32 | ├── examples.json
33 | └── 001/
34 |     ├── render_tex/                 # Rendered generated PBR images
35 |     │   ├── 000.png                 # Rendered views (RGB images)
36 |     │   ├── 000_albedo.png          # Albedo maps for each view
37 |     │   ├── 000_mr.png              # Metallic-Roughness maps for each view, R and G channels
38 |     │   ├── 000_normal.png          # Normal maps
39 |     │   ├── 000_normal.png          # Normal maps
40 |     │   ├── 000_pos.png             # Position maps
41 |     │   ├── 000_pos.png             # Position maps
42 |     │   ├── 001.png                 # Additional views...
43 |     │   ├── 001_albedo.png
44 |     │   ├── 001_mr.png
45 |     │   ├── 001_normal.png
46 |     │   ├── 001_pos.png
47 |     │   └── ...                     # More views (002, 003, 004, 005, ...)
48 |     └── render_cond/                # Rendered reference images (at least two light conditions should be rendered to facilitate consistency loss)
49 |         ├── 000_light_AL.png        # Light condition 1 (Area Light)
50 |         ├── 000_light_ENVMAP.png    # Light condition 2 (Environment map)
51 |         ├── 000_light_PL.png        # Light condition 3 (Point lighting)
52 |         ├── 001_light_AL.png        
53 |         ├── 001_light_ENVMAP.png
54 |         ├── 001_light_PL.png
55 |         └── ...                      # More lighting conditions (002-005, ...)
56 | ```
57 | 
58 | Each training example contains:
59 | - **render_tex/**: Multi-view renderings with PBR material properties
60 |   - Main RGB images (`XXX.png`)
61 |   - Albedo maps (`XXX_albedo.png`)
62 |   - Metallic-Roughness maps (`XXX_mr.png`)
63 |   - Normal maps (`XXX_normal.png/jpg`)
64 |   - Position maps (`XXX_pos.png/jpg`)
65 |   - Camera transforms (`transforms.json`)
66 | - **render_cond/**: Lighting condition maps for each view
67 |   - Ambient lighting (`XXX_light_AL.png`)
68 |   - Environment map lighting (`XXX_light_ENVMAP.png`)
69 |   - Point lighting (`XXX_light_PL.png`)
70 | 
71 | ### Launch Training
72 | 
73 | 
74 | ```bash
75 | python3 train.py --base 'cfgs/hunyuan-paint-pbr.yaml' --name overfit --logdir logs/
76 | ```
77 | 
78 | ## BibTeX
79 | 
80 | If you found Hunyuan3D-Paint 2.1 helpful, please cite our papers:
81 | 
82 | ```bibtex
83 | @article{feng2025romantex,
84 |   title={RomanTex: Decoupling 3D-aware Rotary Positional Embedded Multi-Attention Network for Texture Synthesis},
85 |   author={Feng, Yifei and Yang, Mingxin and Yang, Shuhui and Zhang, Sheng and Yu, Jiaao and Zhao, Zibo and Liu, Yuhong and Jiang, Jie and Guo, Chunchao},
86 |   journal={arXiv preprint arXiv:2503.19011},
87 |   year={2025}
88 | }
89 | 
90 | @article{he2025materialmvp,
91 |   title={MaterialMVP: Illumination-Invariant Material Generation via Multi-view PBR Diffusion},
92 |   author={He, Zebin and Yang, Mingxin and Yang, Shuhui and Tang, Yixuan and Wang, Tao and Zhang, Kaihao and Chen, Guanying and Liu, Yuhong and Jiang, Jie and Guo, Chunchao and Luo, Wenhan},
93 |   journal={arXiv preprint arXiv:2503.10289},
94 |   year={2025}
95 | }
96 | ```
97 | 


--------------------------------------------------------------------------------
/hy3dshape/hy3dshape/models/autoencoders/attention_processors.py:
--------------------------------------------------------------------------------
 1 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
 2 | # except for the third-party components listed below.
 3 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined
 4 | # in the repsective licenses of these third-party components.
 5 | # Users must comply with all terms and conditions of original licenses of these third-party
 6 | # components and must ensure that the usage of the third party components adheres to
 7 | # all relevant laws and regulations.
 8 | 
 9 | # For avoidance of doubts, Hunyuan 3D means the large language models and
10 | # their software and algorithms, including trained model weights, parameters (including
11 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
12 | # fine-tuning enabling code and other elements of the foregoing made publicly available
13 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
14 | 
15 | import os
16 | 
17 | import torch
18 | import torch.nn.functional as F
19 | 
20 | scaled_dot_product_attention = F.scaled_dot_product_attention
21 | if os.environ.get('CA_USE_SAGEATTN', '0') == '1':
22 |     try:
23 |         from sageattention import sageattn
24 |     except ImportError:
25 |         raise ImportError('Please install the package "sageattention" to use this USE_SAGEATTN.')
26 |     scaled_dot_product_attention = sageattn
27 | 
28 | 
29 | class CrossAttentionProcessor:
30 |     def __call__(self, attn, q, k, v):
31 |         out = scaled_dot_product_attention(q, k, v)
32 |         return out
33 | 
34 | 
35 | class FlashVDMCrossAttentionProcessor:
36 |     def __init__(self, topk=None):
37 |         self.topk = topk
38 | 
39 |     def __call__(self, attn, q, k, v):
40 |         if k.shape[-2] == 3072:
41 |             topk = 1024
42 |         elif k.shape[-2] == 512:
43 |             topk = 256
44 |         else:
45 |             topk = k.shape[-2] // 3
46 | 
47 |         if self.topk is True:
48 |             q1 = q[:, :, ::100, :]
49 |             sim = q1 @ k.transpose(-1, -2)
50 |             sim = torch.mean(sim, -2)
51 |             topk_ind = torch.topk(sim, dim=-1, k=topk).indices.squeeze(-2).unsqueeze(-1)
52 |             topk_ind = topk_ind.expand(-1, -1, -1, v.shape[-1])
53 |             v0 = torch.gather(v, dim=-2, index=topk_ind)
54 |             k0 = torch.gather(k, dim=-2, index=topk_ind)
55 |             out = scaled_dot_product_attention(q, k0, v0)
56 |         elif self.topk is False:
57 |             out = scaled_dot_product_attention(q, k, v)
58 |         else:
59 |             idx, counts = self.topk
60 |             start = 0
61 |             outs = []
62 |             for grid_coord, count in zip(idx, counts):
63 |                 end = start + count
64 |                 q_chunk = q[:, :, start:end, :]
65 |                 k0, v0 = self.select_topkv(q_chunk, k, v, topk)
66 |                 out = scaled_dot_product_attention(q_chunk, k0, v0)
67 |                 outs.append(out)
68 |                 start += count
69 |             out = torch.cat(outs, dim=-2)
70 |         self.topk = False
71 |         return out
72 | 
73 |     def select_topkv(self, q_chunk, k, v, topk):
74 |         q1 = q_chunk[:, :, ::50, :]
75 |         sim = q1 @ k.transpose(-1, -2)
76 |         sim = torch.mean(sim, -2)
77 |         topk_ind = torch.topk(sim, dim=-1, k=topk).indices.squeeze(-2).unsqueeze(-1)
78 |         topk_ind = topk_ind.expand(-1, -1, -1, v.shape[-1])
79 |         v0 = torch.gather(v, dim=-2, index=topk_ind)
80 |         k0 = torch.gather(k, dim=-2, index=topk_ind)
81 |         return k0, v0
82 | 
83 | 
84 | class FlashVDMTopMCrossAttentionProcessor(FlashVDMCrossAttentionProcessor):
85 |     def select_topkv(self, q_chunk, k, v, topk):
86 |         q1 = q_chunk[:, :, ::30, :]
87 |         sim = q1 @ k.transpose(-1, -2)
88 |         # sim = sim.to(torch.float32)
89 |         sim = sim.softmax(-1)
90 |         sim = torch.mean(sim, 1)
91 |         activated_token = torch.where(sim > 1e-6)[2]
92 |         index = torch.unique(activated_token, return_counts=True)[0].unsqueeze(0).unsqueeze(0).unsqueeze(-1)
93 |         index = index.expand(-1, v.shape[1], -1, v.shape[-1])
94 |         v0 = torch.gather(v, dim=-2, index=index)
95 |         k0 = torch.gather(k, dim=-2, index=index)
96 |         return k0, v0
97 | 


--------------------------------------------------------------------------------
/hy3dshape/hy3dshape/utils/trainings/mesh.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
  4 | # except for the third-party components listed below.
  5 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined
  6 | # in the repsective licenses of these third-party components.
  7 | # Users must comply with all terms and conditions of original licenses of these third-party
  8 | # components and must ensure that the usage of the third party components adheres to
  9 | # all relevant laws and regulations.
 10 | 
 11 | # For avoidance of doubts, Hunyuan 3D means the large language models and
 12 | # their software and algorithms, including trained model weights, parameters (including
 13 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
 14 | # fine-tuning enabling code and other elements of the foregoing made publicly available
 15 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
 16 | 
 17 | import os
 18 | import cv2
 19 | import numpy as np
 20 | import PIL.Image
 21 | from typing import Optional
 22 | 
 23 | import trimesh
 24 | 
 25 | 
 26 | def save_obj(pointnp_px3, facenp_fx3, fname):
 27 |     fid = open(fname, "w")
 28 |     write_str = ""
 29 |     for pidx, p in enumerate(pointnp_px3):
 30 |         pp = p
 31 |         write_str += "v %f %f %f\n" % (pp[0], pp[1], pp[2])
 32 | 
 33 |     for i, f in enumerate(facenp_fx3):
 34 |         f1 = f + 1
 35 |         write_str += "f %d %d %d\n" % (f1[0], f1[1], f1[2])
 36 |     fid.write(write_str)
 37 |     fid.close()
 38 |     return
 39 | 
 40 | 
 41 | def savemeshtes2(pointnp_px3, tcoords_px2, facenp_fx3, facetex_fx3, tex_map, fname):
 42 |     fol, na = os.path.split(fname)
 43 |     na, _ = os.path.splitext(na)
 44 | 
 45 |     matname = "%s/%s.mtl" % (fol, na)
 46 |     fid = open(matname, "w")
 47 |     fid.write("newmtl material_0\n")
 48 |     fid.write("Kd 1 1 1\n")
 49 |     fid.write("Ka 0 0 0\n")
 50 |     fid.write("Ks 0.4 0.4 0.4\n")
 51 |     fid.write("Ns 10\n")
 52 |     fid.write("illum 2\n")
 53 |     fid.write("map_Kd %s.png\n" % na)
 54 |     fid.close()
 55 |     ####
 56 | 
 57 |     fid = open(fname, "w")
 58 |     fid.write("mtllib %s.mtl\n" % na)
 59 | 
 60 |     for pidx, p3 in enumerate(pointnp_px3):
 61 |         pp = p3
 62 |         fid.write("v %f %f %f\n" % (pp[0], pp[1], pp[2]))
 63 | 
 64 |     for pidx, p2 in enumerate(tcoords_px2):
 65 |         pp = p2
 66 |         fid.write("vt %f %f\n" % (pp[0], pp[1]))
 67 | 
 68 |     fid.write("usemtl material_0\n")
 69 |     for i, f in enumerate(facenp_fx3):
 70 |         f1 = f + 1
 71 |         f2 = facetex_fx3[i] + 1
 72 |         fid.write("f %d/%d %d/%d %d/%d\n" % (f1[0], f2[0], f1[1], f2[1], f1[2], f2[2]))
 73 |     fid.close()
 74 | 
 75 |     PIL.Image.fromarray(np.ascontiguousarray(tex_map), "RGB").save(
 76 |         os.path.join(fol, "%s.png" % na))
 77 | 
 78 |     return
 79 | 
 80 | 
 81 | class MeshOutput(object):
 82 | 
 83 |     def __init__(self,
 84 |                  mesh_v: np.ndarray,
 85 |                  mesh_f: np.ndarray,
 86 |                  vertex_colors: Optional[np.ndarray] = None,
 87 |                  uvs: Optional[np.ndarray] = None,
 88 |                  mesh_tex_idx: Optional[np.ndarray] = None,
 89 |                  tex_map: Optional[np.ndarray] = None):
 90 | 
 91 |         self.mesh_v = mesh_v
 92 |         self.mesh_f = mesh_f
 93 |         self.vertex_colors = vertex_colors
 94 |         self.uvs = uvs
 95 |         self.mesh_tex_idx = mesh_tex_idx
 96 |         self.tex_map = tex_map
 97 | 
 98 |     def contain_uv_texture(self):
 99 |         return (self.uvs is not None) and (self.mesh_tex_idx is not None) and (self.tex_map is not None)
100 | 
101 |     def contain_vertex_colors(self):
102 |         return self.vertex_colors is not None
103 | 
104 |     def export(self, fname):
105 | 
106 |         if self.contain_uv_texture():
107 |             savemeshtes2(
108 |                 self.mesh_v,
109 |                 self.uvs,
110 |                 self.mesh_f,
111 |                 self.mesh_tex_idx,
112 |                 self.tex_map,
113 |                 fname
114 |             )
115 | 
116 |         elif self.contain_vertex_colors():
117 |             mesh_obj = trimesh.Trimesh(vertices=self.mesh_v, faces=self.mesh_f, vertex_colors=self.vertex_colors)
118 |             mesh_obj.export(fname)
119 | 
120 |         else:
121 |             save_obj(
122 |                 self.mesh_v,
123 |                 self.mesh_f,
124 |                 fname
125 |             )
126 | 
127 | 
128 | 
129 | 


--------------------------------------------------------------------------------
/hy3dpaint/convert_utils.py:
--------------------------------------------------------------------------------
  1 | import trimesh
  2 | import pygltflib
  3 | import numpy as np
  4 | from PIL import Image
  5 | import base64
  6 | import io
  7 | 
  8 | 
  9 | def combine_metallic_roughness(metallic_path, roughness_path, output_path):
 10 |     """
 11 |     将metallic和roughness贴图合并为一张贴图
 12 |     GLB格式要求metallic在B通道，roughness在G通道
 13 |     """
 14 |     # 加载贴图
 15 |     metallic_img = Image.open(metallic_path).convert("L")  # 转为灰度
 16 |     roughness_img = Image.open(roughness_path).convert("L")  # 转为灰度
 17 | 
 18 |     # 确保尺寸一致
 19 |     if metallic_img.size != roughness_img.size:
 20 |         roughness_img = roughness_img.resize(metallic_img.size)
 21 | 
 22 |     # 创建RGB图像
 23 |     width, height = metallic_img.size
 24 |     combined = Image.new("RGB", (width, height))
 25 | 
 26 |     # 转为numpy数组便于操作
 27 |     metallic_array = np.array(metallic_img)
 28 |     roughness_array = np.array(roughness_img)
 29 | 
 30 |     # 创建合并的数组 (R, G, B) = (AO, Roughness, Metallic)
 31 |     combined_array = np.zeros((height, width, 3), dtype=np.uint8)
 32 |     combined_array[:, :, 0] = 255  # R通道：AO (如果没有AO贴图，设为白色)
 33 |     combined_array[:, :, 1] = roughness_array  # G通道：Roughness
 34 |     combined_array[:, :, 2] = metallic_array  # B通道：Metallic
 35 | 
 36 |     # 转回PIL图像并保存
 37 |     combined = Image.fromarray(combined_array)
 38 |     combined.save(output_path)
 39 |     return output_path
 40 | 
 41 | 
 42 | def create_glb_with_pbr_materials(obj_path, textures_dict, output_path):
 43 |     """
 44 |     使用pygltflib创建包含完整PBR材质的GLB文件
 45 | 
 46 |     textures_dict = {
 47 |         'albedo': 'path/to/albedo.png',
 48 |         'metallic': 'path/to/metallic.png',
 49 |         'roughness': 'path/to/roughness.png',
 50 |         'normal': 'path/to/normal.png',  # 可选
 51 |         'ao': 'path/to/ao.png'  # 可选
 52 |     }
 53 |     """
 54 |     # 1. 加载OBJ文件
 55 |     mesh = trimesh.load(obj_path)
 56 | 
 57 |     # 2. 先导出为临时GLB
 58 |     temp_glb = "temp.glb"
 59 |     mesh.export(temp_glb)
 60 | 
 61 |     # 3. 加载GLB文件进行材质编辑
 62 |     gltf = pygltflib.GLTF2().load(temp_glb)
 63 | 
 64 |     # 4. 准备纹理数据
 65 |     def image_to_data_uri(image_path):
 66 |         """将图像转换为data URI"""
 67 |         with open(image_path, "rb") as f:
 68 |             image_data = f.read()
 69 |         encoded = base64.b64encode(image_data).decode()
 70 |         return f"data:image/png;base64,{encoded}"
 71 | 
 72 |     # 5. 合并metallic和roughness
 73 |     if "metallic" in textures_dict and "roughness" in textures_dict:
 74 |         mr_combined_path = "mr_combined.png"
 75 |         combine_metallic_roughness(textures_dict["metallic"], textures_dict["roughness"], mr_combined_path)
 76 |         textures_dict["metallicRoughness"] = mr_combined_path
 77 | 
 78 |     # 6. 添加图像到GLTF
 79 |     images = []
 80 |     textures = []
 81 | 
 82 |     texture_mapping = {
 83 |         "albedo": "baseColorTexture",
 84 |         "metallicRoughness": "metallicRoughnessTexture",
 85 |         "normal": "normalTexture",
 86 |         "ao": "occlusionTexture",
 87 |     }
 88 | 
 89 |     for tex_type, tex_path in textures_dict.items():
 90 |         if tex_type in texture_mapping and tex_path:
 91 |             # 添加图像
 92 |             image = pygltflib.Image(uri=image_to_data_uri(tex_path))
 93 |             images.append(image)
 94 | 
 95 |             # 添加纹理
 96 |             texture = pygltflib.Texture(source=len(images) - 1)
 97 |             textures.append(texture)
 98 | 
 99 |     # 7. 创建PBR材质
100 |     pbr_metallic_roughness = pygltflib.PbrMetallicRoughness(
101 |         baseColorFactor=[1.0, 1.0, 1.0, 1.0], metallicFactor=1.0, roughnessFactor=1.0
102 |     )
103 | 
104 |     # 设置纹理索引
105 |     texture_index = 0
106 |     if "albedo" in textures_dict:
107 |         pbr_metallic_roughness.baseColorTexture = pygltflib.TextureInfo(index=texture_index)
108 |         texture_index += 1
109 | 
110 |     if "metallicRoughness" in textures_dict:
111 |         pbr_metallic_roughness.metallicRoughnessTexture = pygltflib.TextureInfo(index=texture_index)
112 |         texture_index += 1
113 | 
114 |     # 创建材质
115 |     material = pygltflib.Material(name="PBR_Material", pbrMetallicRoughness=pbr_metallic_roughness)
116 | 
117 |     # 添加法线贴图
118 |     if "normal" in textures_dict:
119 |         material.normalTexture = pygltflib.NormalTextureInfo(index=texture_index)
120 |         texture_index += 1
121 | 
122 |     # 添加AO贴图
123 |     if "ao" in textures_dict:
124 |         material.occlusionTexture = pygltflib.OcclusionTextureInfo(index=texture_index)
125 | 
126 |     # 8. 更新GLTF
127 |     gltf.images = images
128 |     gltf.textures = textures
129 |     gltf.materials = [material]
130 | 
131 |     # 确保mesh使用材质
132 |     if gltf.meshes:
133 |         for primitive in gltf.meshes[0].primitives:
134 |             primitive.material = 0
135 | 
136 |     # 9. 保存最终GLB
137 |     gltf.save(output_path)
138 |     print(f"PBR GLB文件已保存: {output_path}")
139 | 
140 | 
141 | 


--------------------------------------------------------------------------------
/hy3dshape/hy3dshape/utils/utils.py:
--------------------------------------------------------------------------------
  1 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
  2 | # except for the third-party components listed below.
  3 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined
  4 | # in the repsective licenses of these third-party components.
  5 | # Users must comply with all terms and conditions of original licenses of these third-party
  6 | # components and must ensure that the usage of the third party components adheres to
  7 | # all relevant laws and regulations.
  8 | 
  9 | # For avoidance of doubts, Hunyuan 3D means the large language models and
 10 | # their software and algorithms, including trained model weights, parameters (including
 11 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
 12 | # fine-tuning enabling code and other elements of the foregoing made publicly available
 13 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
 14 | 
 15 | import logging
 16 | import os
 17 | from functools import wraps
 18 | 
 19 | import torch
 20 | 
 21 | 
 22 | def get_logger(name):
 23 |     logger = logging.getLogger(name)
 24 |     logger.setLevel(logging.INFO)
 25 | 
 26 |     console_handler = logging.StreamHandler()
 27 |     console_handler.setLevel(logging.INFO)
 28 | 
 29 |     formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
 30 |     console_handler.setFormatter(formatter)
 31 |     logger.addHandler(console_handler)
 32 |     return logger
 33 | 
 34 | 
 35 | logger = get_logger('hy3dgen.shapgen')
 36 | 
 37 | 
 38 | class synchronize_timer:
 39 |     """ Synchronized timer to count the inference time of `nn.Module.forward`.
 40 | 
 41 |         Supports both context manager and decorator usage.
 42 | 
 43 |         Example as context manager:
 44 |         ```python
 45 |         with synchronize_timer('name') as t:
 46 |             run()
 47 |         ```
 48 | 
 49 |         Example as decorator:
 50 |         ```python
 51 |         @synchronize_timer('Export to trimesh')
 52 |         def export_to_trimesh(mesh_output):
 53 |             pass
 54 |         ```
 55 |     """
 56 | 
 57 |     def __init__(self, name=None):
 58 |         self.name = name
 59 | 
 60 |     def __enter__(self):
 61 |         """Context manager entry: start timing."""
 62 |         if os.environ.get('HY3DGEN_DEBUG', '0') == '1':
 63 |             self.start = torch.cuda.Event(enable_timing=True)
 64 |             self.end = torch.cuda.Event(enable_timing=True)
 65 |             self.start.record()
 66 |             return lambda: self.time
 67 | 
 68 |     def __exit__(self, exc_type, exc_value, exc_tb):
 69 |         """Context manager exit: stop timing and log results."""
 70 |         if os.environ.get('HY3DGEN_DEBUG', '0') == '1':
 71 |             self.end.record()
 72 |             torch.cuda.synchronize()
 73 |             self.time = self.start.elapsed_time(self.end)
 74 |             if self.name is not None:
 75 |                 logger.info(f'{self.name} takes {self.time} ms')
 76 | 
 77 |     def __call__(self, func):
 78 |         """Decorator: wrap the function to time its execution."""
 79 | 
 80 |         @wraps(func)
 81 |         def wrapper(*args, **kwargs):
 82 |             with self:
 83 |                 result = func(*args, **kwargs)
 84 |             return result
 85 | 
 86 |         return wrapper
 87 | 
 88 | 
 89 | def smart_load_model(
 90 |     model_path,
 91 |     subfolder,
 92 |     use_safetensors,
 93 |     variant,
 94 | ):
 95 |     original_model_path = model_path
 96 |     # try local path
 97 |     base_dir = os.environ.get('HY3DGEN_MODELS', '~/.cache/hy3dgen')
 98 |     model_fld = os.path.expanduser(os.path.join(base_dir, model_path))
 99 |     model_path = os.path.expanduser(os.path.join(base_dir, model_path, subfolder))
100 |     logger.info(f'Try to load model from local path: {model_path}')
101 |     if not os.path.exists(model_path):
102 |         logger.info('Model path not exists, try to download from huggingface')
103 |         try:
104 |             from huggingface_hub import snapshot_download
105 |             # 只下载指定子目录
106 |             path = snapshot_download(
107 |                 repo_id=original_model_path,
108 |                 allow_patterns=[f"{subfolder}/*"],  # 关键修改：模式匹配子文件夹
109 |                 local_dir=model_fld 
110 |             )
111 |             model_path = os.path.join(path, subfolder)  # 保持路径拼接逻辑不变
112 |         except ImportError:
113 |             logger.warning(
114 |                 "You need to install HuggingFace Hub to load models from the hub."
115 |             )
116 |             raise RuntimeError(f"Model path {model_path} not found")
117 |         except Exception as e:
118 |             raise e
119 | 
120 |     if not os.path.exists(model_path):
121 |         raise FileNotFoundError(f"Model path {original_model_path} not found")
122 | 
123 |     extension = 'ckpt' if not use_safetensors else 'safetensors'
124 |     variant = '' if variant is None else f'.{variant}'
125 |     ckpt_name = f'model{variant}.{extension}'
126 |     config_path = os.path.join(model_path, 'config.yaml')
127 |     ckpt_path = os.path.join(model_path, ckpt_name)
128 |     return config_path, ckpt_path
129 | 


--------------------------------------------------------------------------------
/hy3dshape/hy3dshape/models/diffusion/transport/integrators.py:
--------------------------------------------------------------------------------
  1 | # This file includes code derived from the SiT project (https://github.com/willisma/SiT),
  2 | # which is licensed under the MIT License.
  3 | #
  4 | # MIT License
  5 | #
  6 | # Copyright (c) Meta Platforms, Inc. and affiliates.
  7 | #
  8 | # Permission is hereby granted, free of charge, to any person obtaining a copy
  9 | # of this software and associated documentation files (the "Software"), to deal
 10 | # in the Software without restriction, including without limitation the rights
 11 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 12 | # copies of the Software, and to permit persons to whom the Software is
 13 | # furnished to do so, subject to the following conditions:
 14 | #
 15 | # The above copyright notice and this permission notice shall be included in all
 16 | # copies or substantial portions of the Software.
 17 | #
 18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 24 | # SOFTWARE.
 25 | 
 26 | import numpy as np
 27 | import torch as th
 28 | import torch.nn as nn
 29 | from torchdiffeq import odeint
 30 | from functools import partial
 31 | from tqdm import tqdm
 32 | 
 33 | class sde:
 34 |     """SDE solver class"""
 35 |     def __init__(
 36 |         self, 
 37 |         drift,
 38 |         diffusion,
 39 |         *,
 40 |         t0,
 41 |         t1,
 42 |         num_steps,
 43 |         sampler_type,
 44 |     ):
 45 |         assert t0 < t1, "SDE sampler has to be in forward time"
 46 | 
 47 |         self.num_timesteps = num_steps
 48 |         self.t = th.linspace(t0, t1, num_steps)
 49 |         self.dt = self.t[1] - self.t[0]
 50 |         self.drift = drift
 51 |         self.diffusion = diffusion
 52 |         self.sampler_type = sampler_type
 53 | 
 54 |     def __Euler_Maruyama_step(self, x, mean_x, t, model, **model_kwargs):
 55 |         w_cur = th.randn(x.size()).to(x)
 56 |         t = th.ones(x.size(0)).to(x) * t
 57 |         dw = w_cur * th.sqrt(self.dt)
 58 |         drift = self.drift(x, t, model, **model_kwargs)
 59 |         diffusion = self.diffusion(x, t)
 60 |         mean_x = x + drift * self.dt
 61 |         x = mean_x + th.sqrt(2 * diffusion) * dw
 62 |         return x, mean_x
 63 |     
 64 |     def __Heun_step(self, x, _, t, model, **model_kwargs):
 65 |         w_cur = th.randn(x.size()).to(x)
 66 |         dw = w_cur * th.sqrt(self.dt)
 67 |         t_cur = th.ones(x.size(0)).to(x) * t
 68 |         diffusion = self.diffusion(x, t_cur)
 69 |         xhat = x + th.sqrt(2 * diffusion) * dw
 70 |         K1 = self.drift(xhat, t_cur, model, **model_kwargs)
 71 |         xp = xhat + self.dt * K1
 72 |         K2 = self.drift(xp, t_cur + self.dt, model, **model_kwargs)
 73 |         return xhat + 0.5 * self.dt * (K1 + K2), xhat # at last time point we do not perform the heun step
 74 | 
 75 |     def __forward_fn(self):
 76 |         """TODO: generalize here by adding all private functions ending with steps to it"""
 77 |         sampler_dict = {
 78 |             "Euler": self.__Euler_Maruyama_step,
 79 |             "Heun": self.__Heun_step,
 80 |         }
 81 | 
 82 |         try:
 83 |             sampler = sampler_dict[self.sampler_type]
 84 |         except:
 85 |             raise NotImplementedError("Smapler type not implemented.")
 86 |     
 87 |         return sampler
 88 | 
 89 |     def sample(self, init, model, **model_kwargs):
 90 |         """forward loop of sde"""
 91 |         x = init
 92 |         mean_x = init 
 93 |         samples = []
 94 |         sampler = self.__forward_fn()
 95 |         for ti in self.t[:-1]:
 96 |             with th.no_grad():
 97 |                 x, mean_x = sampler(x, mean_x, ti, model, **model_kwargs)
 98 |                 samples.append(x)
 99 | 
100 |         return samples
101 | 
102 | class ode:
103 |     """ODE solver class"""
104 |     def __init__(
105 |         self,
106 |         drift,
107 |         *,
108 |         t0,
109 |         t1,
110 |         sampler_type,
111 |         num_steps,
112 |         atol,
113 |         rtol,
114 |     ):
115 |         assert t0 < t1, "ODE sampler has to be in forward time"
116 | 
117 |         self.drift = drift
118 |         self.t = th.linspace(t0, t1, num_steps)
119 |         self.atol = atol
120 |         self.rtol = rtol
121 |         self.sampler_type = sampler_type
122 | 
123 |     def sample(self, x, model, **model_kwargs):
124 |         
125 |         device = x[0].device if isinstance(x, tuple) else x.device
126 |         def _fn(t, x):
127 |             t = th.ones(x[0].size(0)).to(device) * t if isinstance(x, tuple) else th.ones(x.size(0)).to(device) * t
128 |             model_output = self.drift(x, t, model, **model_kwargs)
129 |             return model_output
130 | 
131 |         t = self.t.to(device)
132 |         atol = [self.atol] * len(x) if isinstance(x, tuple) else [self.atol]
133 |         rtol = [self.rtol] * len(x) if isinstance(x, tuple) else [self.rtol]
134 |         samples = odeint(
135 |             _fn,
136 |             x,
137 |             t,
138 |             method=self.sampler_type,
139 |             atol=atol,
140 |             rtol=rtol
141 |         )
142 |         return samples
143 | 


--------------------------------------------------------------------------------
/hy3dpaint/utils/torchvision_fix.py:
--------------------------------------------------------------------------------
  1 | # Torchvision compatibility fix for functional_tensor module
  2 | # This file helps resolve compatibility issues between different torchvision versions
  3 | 
  4 | import sys
  5 | import torch
  6 | import torchvision
  7 | 
  8 | def fix_torchvision_functional_tensor():
  9 |     """
 10 |     Fix torchvision.transforms.functional_tensor import issue
 11 |     """
 12 |     try:
 13 |         # Check if the module exists in the expected location
 14 |         import torchvision.transforms.functional_tensor
 15 |         print("torchvision.transforms.functional_tensor is available")
 16 |         return True
 17 |     except ImportError:
 18 |         print("torchvision.transforms.functional_tensor not found, applying compatibility fix...")
 19 |         
 20 |         try:
 21 |             # Create a mock functional_tensor module with the required functions
 22 |             import torchvision.transforms.functional as F
 23 |             
 24 |             class FunctionalTensorMock:
 25 |                 """Mock module to replace functional_tensor"""
 26 |                 
 27 |                 @staticmethod
 28 |                 def _get_grayscale_weights(img):
 29 |                     """Helper to create grayscale weights based on image dimensions"""
 30 |                     weights = torch.tensor([0.299, 0.587, 0.114], device=img.device, dtype=img.dtype)
 31 |                     return weights.view(1, 3, 1, 1) if len(img.shape) == 4 else weights.view(3, 1, 1)
 32 |                 
 33 |                 @staticmethod
 34 |                 def _try_import_fallback(module_names, attr_name):
 35 |                     """Helper to try importing from multiple modules"""
 36 |                     for module_name in module_names:
 37 |                         try:
 38 |                             module = __import__(module_name, fromlist=[attr_name])
 39 |                             if hasattr(module, attr_name):
 40 |                                 return getattr(module, attr_name)
 41 |                         except ImportError:
 42 |                             continue
 43 |                     return None
 44 |                 
 45 |                 @staticmethod
 46 |                 def rgb_to_grayscale(img, num_output_channels=1):
 47 |                     """Convert RGB image to grayscale"""
 48 |                     if hasattr(F, 'rgb_to_grayscale'):
 49 |                         return F.rgb_to_grayscale(img, num_output_channels)
 50 |                     
 51 |                     # Fallback implementation
 52 |                     weights = FunctionalTensorMock._get_grayscale_weights(img)
 53 |                     grayscale = torch.sum(img * weights, dim=-3, keepdim=True)
 54 |                     
 55 |                     if num_output_channels == 3:
 56 |                         repeat_dims = (1, 3, 1, 1) if len(img.shape) == 4 else (3, 1, 1)
 57 |                         grayscale = grayscale.repeat(*repeat_dims)
 58 |                     
 59 |                     return grayscale
 60 |                 
 61 |                 @staticmethod
 62 |                 def resize(img, size, interpolation=2, antialias=None):
 63 |                     """Resize function wrapper"""
 64 |                     # Try v2.functional first, then regular functional, then torch.nn.functional
 65 |                     resize_func = FunctionalTensorMock._try_import_fallback([
 66 |                         'torchvision.transforms.v2.functional',
 67 |                         'torchvision.transforms.functional'
 68 |                     ], 'resize')
 69 |                     
 70 |                     if resize_func:
 71 |                         try:
 72 |                             return resize_func(img, size, interpolation=interpolation, antialias=antialias)
 73 |                         except TypeError:
 74 |                             # Fallback for older versions without antialias parameter
 75 |                             return resize_func(img, size, interpolation=interpolation)
 76 |                     
 77 |                     # Final fallback using torch.nn.functional
 78 |                     import torch.nn.functional as torch_F
 79 |                     size = (size, size) if isinstance(size, int) else size
 80 |                     img_input = img.unsqueeze(0) if len(img.shape) == 3 else img
 81 |                     return torch_F.interpolate(img_input, size=size, mode='bilinear', align_corners=False)
 82 |                 
 83 |                 def __getattr__(self, name):
 84 |                     """Fallback to regular functional module"""
 85 |                     func = self._try_import_fallback([
 86 |                         'torchvision.transforms.functional',
 87 |                         'torchvision.transforms.v2.functional'
 88 |                     ], name)
 89 |                     
 90 |                     if func:
 91 |                         return func
 92 |                     
 93 |                     raise AttributeError(f"'{name}' not found in functional_tensor mock")
 94 |             
 95 |             # Create the mock module instance and monkey patch
 96 |             sys.modules['torchvision.transforms.functional_tensor'] = FunctionalTensorMock()
 97 |             print("Applied compatibility fix: created functional_tensor mock module")
 98 |             return True
 99 |             
100 |         except Exception as e:
101 |             print(f"Failed to create functional_tensor mock: {e}")
102 |             return False
103 | 
104 | def apply_fix():
105 |     """Apply the torchvision compatibility fix"""
106 |     print(f"Torchvision version: {torchvision.__version__}")
107 |     return fix_torchvision_functional_tensor()
108 | 
109 | if __name__ == "__main__":
110 |     apply_fix() 
111 |     


--------------------------------------------------------------------------------
/hy3dshape/configs/hunyuan3ddit-mini-overfitting-flowmatching-dinog518-bf16-lr1e4-512.yaml:
--------------------------------------------------------------------------------
  1 | name: "DiT: Flux large flowmatching; VAE: 1024 token length; ImageEncoder: DINO Giant; ImageSize: 518"
  2 | 
  3 | training:
  4 |   steps: 10_0000_0000
  5 |   use_amp: true
  6 |   amp_type: "bf16"
  7 |   base_lr: 1e-4
  8 |   gradient_clip_val: 1.0
  9 |   gradient_clip_algorithm: "norm"
 10 |   every_n_train_steps: 2000 # 5000
 11 |   val_check_interval: 50 # 4096
 12 |   limit_val_batches: 16
 13 | 
 14 | dataset:
 15 |   target: hy3dshape.data.dit_asl.AlignedShapeLatentModule
 16 |   params:
 17 |     #! Base setting
 18 |     batch_size: 2
 19 |     num_workers: 8
 20 |     val_num_workers: 4
 21 | 
 22 |     # Data 
 23 |     train_data_list: tools/mini_trainset/preprocessed
 24 |     val_data_list: tools/mini_trainset/preprocessed
 25 | 
 26 |     #! Image loading
 27 |     cond_stage_key: "image" # image / text / image_text
 28 |     image_size: 518
 29 |     mean: &mean [0.5, 0.5, 0.5]
 30 |     std: &std [0.5, 0.5, 0.5]
 31 | 
 32 |     #! Point cloud sampling
 33 |     pc_size: &pc_size 10240
 34 |     pc_sharpedge_size: &pc_sharpedge_size 10240
 35 |     sharpedge_label: &sharpedge_label true
 36 |     return_normal: true
 37 | 
 38 |     #! Augmentation
 39 |     padding: true
 40 | 
 41 | model:
 42 |   target: hy3dshape.models.diffusion.flow_matching_sit.Diffuser
 43 |   params:
 44 |     first_stage_key: "surface"
 45 |     cond_stage_key: "image"
 46 |     scale_by_std: false
 47 |     z_scale_factor: &z_scale_factor 0.9990943042622529 # 1 / 1.0009065167661184
 48 |     torch_compile: false
 49 | 
 50 |     # ema_config:
 51 |     #   ema_model: LitEma
 52 |     #   ema_decay: 0.999
 53 |     #   ema_inference: false
 54 | 
 55 |     first_stage_config:
 56 |       target: hy3dshape.models.autoencoders.ShapeVAE
 57 |       from_pretrained: tencent/Hunyuan3D-2.1
 58 |       params:
 59 |         num_latents: &num_latents 512
 60 |         embed_dim: 64
 61 |         num_freqs: 8
 62 |         include_pi: false
 63 |         heads: 16
 64 |         width: 1024
 65 |         point_feats: 4
 66 |         num_decoder_layers: 16
 67 |         pc_size: *pc_size
 68 |         pc_sharpedge_size: *pc_sharpedge_size
 69 |         qkv_bias: false
 70 |         qk_norm: true
 71 |         scale_factor: *z_scale_factor
 72 |         geo_decoder_mlp_expand_ratio: 4
 73 |         geo_decoder_downsample_ratio: 1
 74 |         geo_decoder_ln_post: true
 75 | 
 76 |     cond_stage_config:
 77 |       target: hy3dshape.models.conditioner.SingleImageEncoder
 78 |       params:
 79 |         main_image_encoder:
 80 |             type: DinoImageEncoder # dino giant
 81 |             kwargs:
 82 |                 config:
 83 |                   attention_probs_dropout_prob: 0.0
 84 |                   drop_path_rate: 0.0
 85 |                   hidden_act: gelu
 86 |                   hidden_dropout_prob: 0.0
 87 |                   hidden_size: 1536
 88 |                   image_size: 518
 89 |                   initializer_range: 0.02
 90 |                   layer_norm_eps: 1.e-6
 91 |                   layerscale_value: 1.0
 92 |                   mlp_ratio: 4
 93 |                   model_type: dinov2
 94 |                   num_attention_heads: 24
 95 |                   num_channels: 3
 96 |                   num_hidden_layers: 40
 97 |                   patch_size: 14
 98 |                   qkv_bias: true
 99 |                   torch_dtype: float32
100 |                   use_swiglu_ffn: true
101 |                 image_size: 518
102 | 
103 |     denoiser_cfg:
104 |       target: hy3dshape.models.denoisers.hunyuan3ddit.Hunyuan3DDiT
105 |       params:
106 |         input_size: *num_latents
107 |         context_in_dim: 1536
108 |         hidden_size: 1024
109 |         mlp_ratio: 4.0
110 |         num_heads: 16
111 |         depth: 8
112 |         depth_single_blocks: 16
113 |         axes_dim: [64]
114 |         theta: 10000
115 |         qkv_bias: true
116 |         use_pe: false
117 |         force_norm_fp32: true
118 | 
119 |     scheduler_cfg:
120 |       transport:
121 |         target: hy3dshape.models.diffusion.transport.create_transport
122 |         params:
123 |           path_type: Linear
124 |           prediction: velocity
125 |       sampler:
126 |         target: hy3dshape.models.diffusion.transport.Sampler
127 |         params: {}
128 |         ode_params:
129 |           sampling_method: euler # dopri5 ...
130 |           num_steps: &num_steps 50
131 | 
132 |     optimizer_cfg:
133 |       optimizer:
134 |         target: torch.optim.AdamW
135 |         params:
136 |           betas: [0.9, 0.99]
137 |           eps: 1.e-6
138 |           weight_decay: 1.e-2
139 | 
140 |       scheduler:
141 |         target: hy3dshape.utils.trainings.lr_scheduler.LambdaWarmUpCosineFactorScheduler
142 |         params:
143 |           warm_up_steps: 50 # 5000
144 |           f_start: 1.e-6
145 |           f_min: 1.e-3
146 |           f_max: 1.0
147 | 
148 |     pipeline_cfg:
149 |       target: hy3dshape.pipelines.Hunyuan3DDiTFlowMatchingPipeline
150 | 
151 |     image_processor_cfg:
152 |       target: hy3dshape.preprocessors.ImageProcessorV2
153 |       params: {}
154 | 
155 | callbacks:
156 |     logger:
157 |       target: hy3dshape.utils.trainings.mesh_log_callback.ImageConditionalASLDiffuserLogger
158 |       params:
159 |         step_frequency: 100 # 10000
160 |         num_samples: 1
161 |         sample_times: 1
162 |         mean: *mean
163 |         std: *std
164 |         bounds: [-1.01, -1.01, -1.01, 1.01, 1.01, 1.01]
165 |         octree_depth: 8
166 |         num_chunks: 50000
167 |         mc_level: 0.0
168 |     
169 |     file_loggers:
170 |         target: hy3dshape.utils.trainings.mesh_log_callback.ImageConditionalFixASLDiffuserLogger
171 |         params:
172 |           step_frequency: 50 # 5000
173 |           test_data_path: "tools/mini_testset/images.json"
174 | 


--------------------------------------------------------------------------------
/workflow_examples/Batch_Generator.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "id": "5ad9bf67-cabe-4ef4-8e0c-bbeee0fc546f",
  3 |   "revision": 0,
  4 |   "last_node_id": 8,
  5 |   "last_link_id": 3,
  6 |   "nodes": [
  7 |     {
  8 |       "id": 6,
  9 |       "type": "Hy3D21CameraConfig",
 10 |       "pos": [
 11 |         -706.4094848632812,
 12 |         305.74383544921875
 13 |       ],
 14 |       "size": [
 15 |         382.7560729980469,
 16 |         133.63636779785156
 17 |       ],
 18 |       "flags": {},
 19 |       "order": 0,
 20 |       "mode": 0,
 21 |       "inputs": [],
 22 |       "outputs": [
 23 |         {
 24 |           "name": "camera_config",
 25 |           "type": "HY3D21CAMERA",
 26 |           "links": [
 27 |             1
 28 |           ]
 29 |         }
 30 |       ],
 31 |       "properties": {
 32 |         "aux_id": "visualbruno/ComfyUI-Hunyuan3d-2-1",
 33 |         "ver": "e439689e4b67fb2af5f487ee26ef3a710be92658",
 34 |         "Node name for S&R": "Hy3D21CameraConfig",
 35 |         "widget_ue_connectable": {}
 36 |       },
 37 |       "widgets_values": [
 38 |         "0, 90, 180, 270, 0, 180, 45, 315",
 39 |         "0, 0, 0, 0, 90, -90, 0, 0",
 40 |         "1, 0.5, 1, 0.5, 1, 1, 0.1, 0.1",
 41 |         1.1000000000000003
 42 |       ]
 43 |     },
 44 |     {
 45 |       "id": 4,
 46 |       "type": "Hy3D21MeshGenerationBatch",
 47 |       "pos": [
 48 |         -732.0762939453125,
 49 |         516.9437255859375
 50 |       ],
 51 |       "size": [
 52 |         427.05511474609375,
 53 |         622
 54 |       ],
 55 |       "flags": {},
 56 |       "order": 1,
 57 |       "mode": 0,
 58 |       "inputs": [],
 59 |       "outputs": [
 60 |         {
 61 |           "name": "input_folder",
 62 |           "type": "STRING",
 63 |           "links": [
 64 |             2
 65 |           ]
 66 |         },
 67 |         {
 68 |           "name": "output_folder",
 69 |           "type": "STRING",
 70 |           "links": [
 71 |             3
 72 |           ]
 73 |         },
 74 |         {
 75 |           "name": "processed_input_images",
 76 |           "type": "STRING",
 77 |           "links": null
 78 |         },
 79 |         {
 80 |           "name": "processed_output_meshes",
 81 |           "type": "STRING",
 82 |           "links": null
 83 |         }
 84 |       ],
 85 |       "properties": {
 86 |         "aux_id": "visualbruno/ComfyUI-Hunyuan3d-2-1",
 87 |         "ver": "a1133c7ff88dd2b8c6a85344ffe7acbaa58ec8d4",
 88 |         "Node name for S&R": "Hy3D21MeshGenerationBatch",
 89 |         "widget_ue_connectable": {}
 90 |       },
 91 |       "widgets_values": [
 92 |         "C:\\Travaux\\Test",
 93 |         "C:\\Travaux\\Test\\3D",
 94 |         "Hunyuan3D-vae-v2-1-fp16.ckpt",
 95 |         "hunyuan3d-dit-v2-1-fp16.ckpt",
 96 |         50,
 97 |         7.5,
 98 |         "sdpa",
 99 |         1.01,
100 |         384,
101 |         128000,
102 |         0,
103 |         "dmc",
104 |         true,
105 |         200000,
106 |         1388,
107 |         "randomize",
108 |         true,
109 |         "obj",
110 |         false,
111 |         true,
112 |         true,
113 |         false
114 |       ]
115 |     },
116 |     {
117 |       "id": 5,
118 |       "type": "Hy3D21GenerateMultiViewsBatch",
119 |       "pos": [
120 |         -167.3360137939453,
121 |         428.5770568847656
122 |       ],
123 |       "size": [
124 |         592.5423583984375,
125 |         464.7333679199219
126 |       ],
127 |       "flags": {},
128 |       "order": 2,
129 |       "mode": 0,
130 |       "inputs": [
131 |         {
132 |           "name": "camera_config",
133 |           "type": "HY3D21CAMERA",
134 |           "link": 1
135 |         },
136 |         {
137 |           "name": "input_images_folder",
138 |           "shape": 7,
139 |           "type": "STRING",
140 |           "widget": {
141 |             "name": "input_images_folder"
142 |           },
143 |           "link": 2
144 |         },
145 |         {
146 |           "name": "input_meshes_folder",
147 |           "shape": 7,
148 |           "type": "STRING",
149 |           "widget": {
150 |             "name": "input_meshes_folder"
151 |           },
152 |           "link": 3
153 |         }
154 |       ],
155 |       "outputs": [
156 |         {
157 |           "name": "processed_meshes",
158 |           "type": "STRING",
159 |           "links": null
160 |         }
161 |       ],
162 |       "properties": {
163 |         "aux_id": "visualbruno/ComfyUI-Hunyuan3d-2-1",
164 |         "ver": "f966762862e112b35dfe2e846bfb153f0dd6cae4",
165 |         "Node name for S&R": "Hy3D21GenerateMultiViewsBatch",
166 |         "widget_ue_connectable": {}
167 |       },
168 |       "widgets_values": [
169 |         "C:\\Travaux\\Test\\Meshes",
170 |         512,
171 |         10,
172 |         3,
173 |         2048,
174 |         true,
175 |         411413629,
176 |         "randomize",
177 |         true,
178 |         false,
179 |         true,
180 |         "CustomModel",
181 |         "003_realSR_BSRGAN_DFO_s64w8_SwinIR-M_x4_GAN.pth",
182 |         "",
183 |         "",
184 |         "",
185 |         ""
186 |       ]
187 |     }
188 |   ],
189 |   "links": [
190 |     [
191 |       1,
192 |       6,
193 |       0,
194 |       5,
195 |       0,
196 |       "HY3D21CAMERA"
197 |     ],
198 |     [
199 |       2,
200 |       4,
201 |       0,
202 |       5,
203 |       1,
204 |       "STRING"
205 |     ],
206 |     [
207 |       3,
208 |       4,
209 |       1,
210 |       5,
211 |       2,
212 |       "STRING"
213 |     ]
214 |   ],
215 |   "groups": [],
216 |   "config": {},
217 |   "extra": {
218 |     "ue_links": [],
219 |     "ds": {
220 |       "scale": 0.826446280991736,
221 |       "offset": [
222 |         1020.0376340132016,
223 |         -189.85887715515295
224 |       ]
225 |     },
226 |     "links_added_by_ue": [],
227 |     "frontendVersion": "1.23.4"
228 |   },
229 |   "version": 0.4
230 | }


--------------------------------------------------------------------------------
/hy3dshape/configs/hunyuan3ddit-full-params-finetuning-flowmatching-dinog518-bf16-lr1e5-512.yaml:
--------------------------------------------------------------------------------
  1 | name: "DiT: Flux large flowmatching; VAE: 1024 token length; ImageEncoder: DINO Giant; ImageSize: 518"
  2 | 
  3 | training:
  4 |   steps: 10_0000_0000
  5 |   use_amp: true
  6 |   amp_type: "bf16"
  7 |   base_lr: 1.e-5
  8 |   gradient_clip_val: 1.0
  9 |   gradient_clip_algorithm: "norm"
 10 |   every_n_train_steps: 2000 # 5000
 11 |   val_check_interval: 50 # 4096
 12 |   limit_val_batches: 16
 13 | 
 14 | dataset:
 15 |   target: hy3dshape.data.dit_asl.AlignedShapeLatentModule
 16 |   params:
 17 |     #! Base setting
 18 |     batch_size: 4
 19 |     num_workers: 8
 20 |     val_num_workers: 4
 21 | 
 22 |     # Data 
 23 |     train_data_list: tools/mini_trainset/preprocessed
 24 |     val_data_list: tools/mini_trainset/preprocessed
 25 | 
 26 |     #! Image loading
 27 |     cond_stage_key: "image" # image / text / image_text
 28 |     image_size: 518
 29 |     mean: &mean [0.5, 0.5, 0.5]
 30 |     std: &std [0.5, 0.5, 0.5]
 31 | 
 32 |     #! Point cloud sampling
 33 |     pc_size: &pc_size 30720
 34 |     pc_sharpedge_size: &pc_sharpedge_size 30720
 35 |     sharpedge_label: &sharpedge_label true
 36 |     return_normal: true
 37 | 
 38 |     #! Augmentation
 39 |     padding: true
 40 | 
 41 | model:
 42 |   target: hy3dshape.models.diffusion.flow_matching_sit.Diffuser
 43 |   params:
 44 |     first_stage_key: "surface"
 45 |     cond_stage_key: "image"
 46 |     scale_by_std: false
 47 |     z_scale_factor: &z_scale_factor 0.9990943042622529 # 1 / 1.0009065167661184
 48 |     torch_compile: false
 49 | 
 50 |     # ema_config:
 51 |     #   ema_model: LitEma
 52 |     #   ema_decay: 0.999
 53 |     #   ema_inference: false
 54 | 
 55 |     first_stage_config:
 56 |       target: hy3dshape.models.autoencoders.ShapeVAE
 57 |       from_pretrained: tencent/Hunyuan3D-2.1
 58 |       params:
 59 |         num_latents: &num_latents 512
 60 |         embed_dim: 64
 61 |         num_freqs: 8
 62 |         include_pi: false
 63 |         heads: 16
 64 |         width: 1024
 65 |         point_feats: 4
 66 |         num_decoder_layers: 16
 67 |         pc_size: *pc_size
 68 |         pc_sharpedge_size: *pc_sharpedge_size
 69 |         qkv_bias: false
 70 |         qk_norm: true
 71 |         scale_factor: *z_scale_factor
 72 |         geo_decoder_mlp_expand_ratio: 4
 73 |         geo_decoder_downsample_ratio: 1
 74 |         geo_decoder_ln_post: true
 75 | 
 76 |     cond_stage_config:
 77 |       target: hy3dshape.models.conditioner.SingleImageEncoder
 78 |       params:
 79 |         main_image_encoder:
 80 |             type: DinoImageEncoder # dino giant
 81 |             kwargs:
 82 |                 config:
 83 |                   attention_probs_dropout_prob: 0.0
 84 |                   drop_path_rate: 0.0
 85 |                   hidden_act: gelu
 86 |                   hidden_dropout_prob: 0.0
 87 |                   hidden_size: 1536
 88 |                   image_size: 518
 89 |                   initializer_range: 0.02
 90 |                   layer_norm_eps: 1.e-6
 91 |                   layerscale_value: 1.0
 92 |                   mlp_ratio: 4
 93 |                   model_type: dinov2
 94 |                   num_attention_heads: 24
 95 |                   num_channels: 3
 96 |                   num_hidden_layers: 40
 97 |                   patch_size: 14
 98 |                   qkv_bias: true
 99 |                   torch_dtype: float32
100 |                   use_swiglu_ffn: true
101 |                 image_size: 518
102 | 
103 |     denoiser_cfg:
104 |       target: hy3dshape.models.denoisers.hunyuan3ddit.Hunyuan3DDiT
105 |       params:
106 |         ckpt_path: ~/.cache/hy3dgen/tencent/Hunyuan3D-2-1-Shape/dit/model.fp16.ckpt
107 |         input_size: *num_latents
108 |         context_in_dim: 1536
109 |         hidden_size: 1024
110 |         mlp_ratio: 4.0
111 |         num_heads: 16
112 |         depth: 16
113 |         depth_single_blocks: 32
114 |         axes_dim: [64]
115 |         theta: 10000
116 |         qkv_bias: true
117 |         use_pe: false
118 |         force_norm_fp32: true
119 | 
120 |     scheduler_cfg:
121 |       transport:
122 |         target: hy3dshape.models.diffusion.transport.create_transport
123 |         params:
124 |           path_type: Linear
125 |           prediction: velocity
126 |       sampler:
127 |         target: hy3dshape.models.diffusion.transport.Sampler
128 |         params: {}
129 |         ode_params:
130 |           sampling_method: euler # dopri5 ...
131 |           num_steps: &num_steps 50
132 | 
133 |     optimizer_cfg:
134 |       optimizer:
135 |         target: torch.optim.AdamW
136 |         params:
137 |           betas: [0.9, 0.99]
138 |           eps: 1.e-6
139 |           weight_decay: 1.e-2
140 | 
141 |       scheduler:
142 |         target: hy3dshape.utils.trainings.lr_scheduler.LambdaWarmUpCosineFactorScheduler
143 |         params:
144 |           warm_up_steps: 50 # 5000
145 |           f_start: 1.e-6
146 |           f_min: 1.e-3
147 |           f_max: 1.0
148 | 
149 |     pipeline_cfg:
150 |       target: hy3dshape.pipelines.Hunyuan3DDiTFlowMatchingPipeline
151 | 
152 |     image_processor_cfg:
153 |       target: hy3dshape.preprocessors.ImageProcessorV2
154 |       params: {}
155 | 
156 | callbacks:
157 |     logger:
158 |       target: hy3dshape.utils.trainings.mesh_log_callback.ImageConditionalASLDiffuserLogger
159 |       params:
160 |         step_frequency: 100 # 10000
161 |         num_samples: 1
162 |         sample_times: 1
163 |         mean: *mean
164 |         std: *std
165 |         bounds: [-1.01, -1.01, -1.01, 1.01, 1.01, 1.01]
166 |         octree_depth: 8
167 |         num_chunks: 50000
168 |         mc_level: 0.0
169 |     
170 |     file_loggers:
171 |         target: hy3dshape.utils.trainings.mesh_log_callback.ImageConditionalFixASLDiffuserLogger
172 |         params:
173 |           step_frequency: 50 # 5000
174 |           test_data_path: "tools/mini_testset/images.json"
175 | 


--------------------------------------------------------------------------------
/hy3dshape/configs/hunyuandit-finetuning-flowmatching-dinog518-bf16-lr1e5-4096.yaml:
--------------------------------------------------------------------------------
  1 | name: "DiT: Flux large flowmatching; VAE: 1024 token length; ImageEncoder: DINO Giant; ImageSize: 518"
  2 | 
  3 | training:
  4 |   steps: 10_0000_0000
  5 |   use_amp: true
  6 |   amp_type: "bf16"
  7 |   base_lr: 1e-5
  8 |   gradient_clip_val: 1.0
  9 |   gradient_clip_algorithm: "norm"
 10 |   every_n_train_steps: 2000 # 5000
 11 |   val_check_interval: 50 # 4096
 12 |   limit_val_batches: 16
 13 | 
 14 | dataset:
 15 |   target: hy3dshape.data.dit_asl.AlignedShapeLatentModule
 16 |   params:
 17 |     #! Base setting
 18 |     batch_size: 4
 19 |     num_workers: 8
 20 |     val_num_workers: 4
 21 | 
 22 |     # Data 
 23 |     train_data_list: tools/mini_trainset/preprocessed
 24 |     val_data_list: tools/mini_trainset/preprocessed
 25 | 
 26 |     #! Image loading
 27 |     cond_stage_key: "image" # image / text / image_text
 28 |     image_size: 518
 29 |     mean: &mean [0.5, 0.5, 0.5]
 30 |     std: &std [0.5, 0.5, 0.5]
 31 | 
 32 |     #! Point cloud sampling
 33 |     pc_size: &pc_size 81920
 34 |     pc_sharpedge_size: &pc_sharpedge_size 0
 35 |     sharpedge_label: &sharpedge_label true
 36 |     return_normal: true
 37 | 
 38 |     #! Augmentation
 39 |     padding: true
 40 | 
 41 | model:
 42 |   target: hy3dshape.models.diffusion.flow_matching_sit.Diffuser
 43 |   params:
 44 |     first_stage_key: "surface"
 45 |     cond_stage_key: "image"
 46 |     scale_by_std: false
 47 |     z_scale_factor: &z_scale_factor 1.0039506158752403
 48 |     torch_compile: false
 49 | 
 50 |     # ema_config:
 51 |     #   ema_model: LitEma
 52 |     #   ema_decay: 0.999
 53 |     #   ema_inference: false
 54 | 
 55 |     first_stage_config:
 56 |       target: hy3dshape.models.autoencoders.ShapeVAE
 57 |       from_pretrained: tencent/Hunyuan3D-2.1
 58 |       params:
 59 |         num_latents: &num_latents 4096
 60 |         embed_dim: 64
 61 |         num_freqs: 8
 62 |         include_pi: false
 63 |         heads: 16
 64 |         width: 1024
 65 |         num_encoder_layers: 8
 66 |         num_decoder_layers: 16
 67 |         qkv_bias: false
 68 |         qk_norm: true
 69 |         scale_factor: *z_scale_factor
 70 |         geo_decoder_mlp_expand_ratio: 4
 71 |         geo_decoder_downsample_ratio: 1
 72 |         geo_decoder_ln_post: true
 73 |         point_feats: 4
 74 |         pc_size: *pc_size
 75 |         pc_sharpedge_size: *pc_sharpedge_size
 76 | 
 77 |     cond_stage_config:
 78 |       target: hy3dshape.models.conditioner.SingleImageEncoder
 79 |       params:
 80 |         main_image_encoder:
 81 |             type: DinoImageEncoder # dino large
 82 |             kwargs:
 83 |                 config:
 84 |                   attention_probs_dropout_prob: 0.0
 85 |                   drop_path_rate: 0.0
 86 |                   hidden_act: gelu
 87 |                   hidden_dropout_prob: 0.0
 88 |                   hidden_size: 1024
 89 |                   image_size: 518
 90 |                   initializer_range: 0.02
 91 |                   layer_norm_eps: 1.e-6
 92 |                   layerscale_value: 1.0
 93 |                   mlp_ratio: 4
 94 |                   model_type: dinov2
 95 |                   num_attention_heads: 16
 96 |                   num_channels: 3
 97 |                   num_hidden_layers: 24
 98 |                   patch_size: 14
 99 |                   qkv_bias: true
100 |                   torch_dtype: float32
101 |                   use_swiglu_ffn: false
102 |                 image_size: 518
103 |                 use_cls_token: true
104 | 
105 | 
106 |     denoiser_cfg:
107 |       target: hy3dshape.models.denoisers.hunyuandit.HunYuanDiTPlain
108 |       params:
109 |         input_size: *num_latents
110 |         in_channels: 64
111 |         hidden_size: 2048
112 |         context_dim: 1024
113 |         depth: 21
114 |         num_heads: 16
115 |         qk_norm: true
116 |         text_len: 1370
117 |         with_decoupled_ca: false
118 |         use_attention_pooling: false
119 |         qk_norm_type: 'rms'
120 |         qkv_bias: false
121 |         use_pos_emb: false
122 |         num_moe_layers: 6
123 |         num_experts: 8
124 |         moe_top_k: 2
125 |         
126 |     scheduler_cfg:
127 |       transport:
128 |         target: hy3dshape.models.diffusion.transport.create_transport
129 |         params:
130 |           path_type: Linear
131 |           prediction: velocity
132 |       sampler:
133 |         target: hy3dshape.models.diffusion.transport.Sampler
134 |         params: {}
135 |         ode_params:
136 |           sampling_method: euler # dopri5 ...
137 |           num_steps: &num_steps 50
138 | 
139 |     optimizer_cfg:
140 |       optimizer:
141 |         target: torch.optim.AdamW
142 |         params:
143 |           betas: [0.9, 0.99]
144 |           eps: 1.e-6
145 |           weight_decay: 1.e-2
146 | 
147 |       scheduler:
148 |         target: hy3dshape.utils.trainings.lr_scheduler.LambdaWarmUpCosineFactorScheduler
149 |         params:
150 |           warm_up_steps: 50 # 5000
151 |           f_start: 1.e-6
152 |           f_min: 1.e-3
153 |           f_max: 1.0
154 | 
155 |     pipeline_cfg:
156 |       target: hy3dshape.pipelines.Hunyuan3DDiTFlowMatchingPipeline
157 | 
158 |     image_processor_cfg:
159 |       target: hy3dshape.preprocessors.ImageProcessorV2
160 |       params: {}
161 | 
162 | callbacks:
163 |     logger:
164 |       target: hy3dshape.utils.trainings.mesh_log_callback.ImageConditionalASLDiffuserLogger
165 |       params:
166 |         step_frequency: 100 # 10000
167 |         num_samples: 1
168 |         sample_times: 1
169 |         mean: *mean
170 |         std: *std
171 |         bounds: [-1.01, -1.01, -1.01, 1.01, 1.01, 1.01]
172 |         octree_depth: 8
173 |         num_chunks: 50000
174 |         mc_level: 0.0
175 |     
176 |     file_loggers:
177 |         target: hy3dshape.utils.trainings.mesh_log_callback.ImageConditionalFixASLDiffuserLogger
178 |         params:
179 |           step_frequency: 50 # 5000
180 |           test_data_path: "tools/mini_testset/images.json"
181 | 


--------------------------------------------------------------------------------
/hy3dshape/configs/hunyuandit-mini-overfitting-flowmatching-dinog518-bf16-lr1e4-4096.yaml:
--------------------------------------------------------------------------------
  1 | name: "DiT: Flux large flowmatching; VAE: 1024 token length; ImageEncoder: DINO Giant; ImageSize: 518"
  2 | 
  3 | training:
  4 |   steps: 10_0000_0000
  5 |   use_amp: true
  6 |   amp_type: "bf16"
  7 |   base_lr: 1e-4
  8 |   gradient_clip_val: 1.0
  9 |   gradient_clip_algorithm: "norm"
 10 |   every_n_train_steps: 2000 # 5000
 11 |   val_check_interval: 50 # 4096
 12 |   limit_val_batches: 16
 13 | 
 14 | dataset:
 15 |   target: hy3dshape.data.dit_asl.AlignedShapeLatentModule
 16 |   params:
 17 |     #! Base setting
 18 |     batch_size: 2
 19 |     num_workers: 8
 20 |     val_num_workers: 4
 21 | 
 22 |     # Data 
 23 |     train_data_list: tools/mini_trainset/preprocessed
 24 |     val_data_list: tools/mini_trainset/preprocessed
 25 | 
 26 |     #! Image loading
 27 |     cond_stage_key: "image" # image / text / image_text
 28 |     image_size: 518
 29 |     mean: &mean [0.5, 0.5, 0.5]
 30 |     std: &std [0.5, 0.5, 0.5]
 31 | 
 32 |     #! Point cloud sampling
 33 |     pc_size: &pc_size 81920
 34 |     pc_sharpedge_size: &pc_sharpedge_size 0
 35 |     sharpedge_label: &sharpedge_label true
 36 |     return_normal: true
 37 | 
 38 |     #! Augmentation
 39 |     padding: true
 40 | 
 41 | model:
 42 |   target: hy3dshape.models.diffusion.flow_matching_sit.Diffuser
 43 |   params:
 44 |     first_stage_key: "surface"
 45 |     cond_stage_key: "image"
 46 |     scale_by_std: false
 47 |     z_scale_factor: &z_scale_factor 1.0039506158752403
 48 |     torch_compile: false
 49 | 
 50 |     # ema_config:
 51 |     #   ema_model: LitEma
 52 |     #   ema_decay: 0.999
 53 |     #   ema_inference: false
 54 | 
 55 |     first_stage_config:
 56 |       target: hy3dshape.models.autoencoders.ShapeVAE
 57 |       from_pretrained: tencent/Hunyuan3D-2.1
 58 |       params:
 59 |         num_latents: &num_latents 4096
 60 |         embed_dim: 64
 61 |         num_freqs: 8
 62 |         include_pi: false
 63 |         heads: 16
 64 |         width: 1024
 65 |         num_encoder_layers: 8
 66 |         num_decoder_layers: 16
 67 |         qkv_bias: false
 68 |         qk_norm: true
 69 |         scale_factor: *z_scale_factor
 70 |         geo_decoder_mlp_expand_ratio: 4
 71 |         geo_decoder_downsample_ratio: 1
 72 |         geo_decoder_ln_post: true
 73 |         point_feats: 4
 74 |         pc_size: *pc_size
 75 |         pc_sharpedge_size: *pc_sharpedge_size
 76 | 
 77 |     cond_stage_config:
 78 |       target: hy3dshape.models.conditioner.SingleImageEncoder
 79 |       params:
 80 |         main_image_encoder:
 81 |             type: DinoImageEncoder # dino large
 82 |             kwargs:
 83 |                 config:
 84 |                   attention_probs_dropout_prob: 0.0
 85 |                   drop_path_rate: 0.0
 86 |                   hidden_act: gelu
 87 |                   hidden_dropout_prob: 0.0
 88 |                   hidden_size: 1024
 89 |                   image_size: 518
 90 |                   initializer_range: 0.02
 91 |                   layer_norm_eps: 1.e-6
 92 |                   layerscale_value: 1.0
 93 |                   mlp_ratio: 4
 94 |                   model_type: dinov2
 95 |                   num_attention_heads: 16
 96 |                   num_channels: 3
 97 |                   num_hidden_layers: 24
 98 |                   patch_size: 14
 99 |                   qkv_bias: true
100 |                   torch_dtype: float32
101 |                   use_swiglu_ffn: false
102 |                 image_size: 518
103 |                 use_cls_token: true
104 | 
105 | 
106 |     denoiser_cfg:
107 |       target: hy3dshape.models.denoisers.hunyuandit.HunYuanDiTPlain
108 |       params:
109 |         input_size: *num_latents
110 |         in_channels: 64
111 |         hidden_size: 2048
112 |         context_dim: 1024
113 |         depth: 11
114 |         num_heads: 16
115 |         qk_norm: true
116 |         text_len: 1370
117 |         with_decoupled_ca: false
118 |         use_attention_pooling: false
119 |         qk_norm_type: 'rms'
120 |         qkv_bias: false
121 |         use_pos_emb: false
122 |         num_moe_layers: 6
123 |         num_experts: 8
124 |         moe_top_k: 2
125 |         
126 |     scheduler_cfg:
127 |       transport:
128 |         target: hy3dshape.models.diffusion.transport.create_transport
129 |         params:
130 |           path_type: Linear
131 |           prediction: velocity
132 |       sampler:
133 |         target: hy3dshape.models.diffusion.transport.Sampler
134 |         params: {}
135 |         ode_params:
136 |           sampling_method: euler # dopri5 ...
137 |           num_steps: &num_steps 50
138 | 
139 |     optimizer_cfg:
140 |       optimizer:
141 |         target: torch.optim.AdamW
142 |         params:
143 |           betas: [0.9, 0.99]
144 |           eps: 1.e-6
145 |           weight_decay: 1.e-2
146 | 
147 |       scheduler:
148 |         target: hy3dshape.utils.trainings.lr_scheduler.LambdaWarmUpCosineFactorScheduler
149 |         params:
150 |           warm_up_steps: 50 # 5000
151 |           f_start: 1.e-6
152 |           f_min: 1.e-3
153 |           f_max: 1.0
154 | 
155 |     pipeline_cfg:
156 |       target: hy3dshape.pipelines.Hunyuan3DDiTFlowMatchingPipeline
157 | 
158 |     image_processor_cfg:
159 |       target: hy3dshape.preprocessors.ImageProcessorV2
160 |       params: {}
161 | 
162 | callbacks:
163 |     logger:
164 |       target: hy3dshape.utils.trainings.mesh_log_callback.ImageConditionalASLDiffuserLogger
165 |       params:
166 |         step_frequency: 100 # 10000
167 |         num_samples: 1
168 |         sample_times: 1
169 |         mean: *mean
170 |         std: *std
171 |         bounds: [-1.01, -1.01, -1.01, 1.01, 1.01, 1.01]
172 |         octree_depth: 8
173 |         num_chunks: 50000
174 |         mc_level: 0.0
175 |     
176 |     file_loggers:
177 |         target: hy3dshape.utils.trainings.mesh_log_callback.ImageConditionalFixASLDiffuserLogger
178 |         params:
179 |           step_frequency: 50 # 5000
180 |           test_data_path: "tools/mini_testset/images.json"
181 | 


--------------------------------------------------------------------------------
/hy3dshape/configs/hunyuandit-mini-overfitting-flowmatching-dinog518-bf16-lr1e4-512.yaml:
--------------------------------------------------------------------------------
  1 | name: "DiT: Flux large flowmatching; VAE: 1024 token length; ImageEncoder: DINO Giant; ImageSize: 518"
  2 | 
  3 | training:
  4 |   steps: 10_0000_0000
  5 |   use_amp: true
  6 |   amp_type: "bf16"
  7 |   base_lr: 1e-4
  8 |   gradient_clip_val: 1.0
  9 |   gradient_clip_algorithm: "norm"
 10 |   every_n_train_steps: 2000 # 5000
 11 |   val_check_interval: 50 # 4096
 12 |   limit_val_batches: 16
 13 | 
 14 | dataset:
 15 |   target: hy3dshape.data.dit_asl.AlignedShapeLatentModule
 16 |   params:
 17 |     #! Base setting
 18 |     batch_size: 2
 19 |     num_workers: 8
 20 |     val_num_workers: 4
 21 | 
 22 |     # Data 
 23 |     train_data_list: tools/mini_trainset/preprocessed
 24 |     val_data_list: tools/mini_trainset/preprocessed
 25 | 
 26 |     #! Image loading
 27 |     cond_stage_key: "image" # image / text / image_text
 28 |     image_size: 518
 29 |     mean: &mean [0.5, 0.5, 0.5]
 30 |     std: &std [0.5, 0.5, 0.5]
 31 | 
 32 |     #! Point cloud sampling
 33 |     pc_size: &pc_size 81920
 34 |     pc_sharpedge_size: &pc_sharpedge_size 0
 35 |     sharpedge_label: &sharpedge_label true
 36 |     return_normal: true
 37 | 
 38 |     #! Augmentation
 39 |     padding: true
 40 | 
 41 | model:
 42 |   target: hy3dshape.models.diffusion.flow_matching_sit.Diffuser
 43 |   params:
 44 |     first_stage_key: "surface"
 45 |     cond_stage_key: "image"
 46 |     scale_by_std: false
 47 |     z_scale_factor: &z_scale_factor 1.0039506158752403
 48 |     torch_compile: false
 49 | 
 50 |     # ema_config:
 51 |     #   ema_model: LitEma
 52 |     #   ema_decay: 0.999
 53 |     #   ema_inference: false
 54 | 
 55 |     first_stage_config:
 56 |       target: hy3dshape.models.autoencoders.ShapeVAE
 57 |       from_pretrained: tencent/Hunyuan3D-2.1
 58 |       params:
 59 |         num_latents: &num_latents 512
 60 |         embed_dim: 64
 61 |         num_freqs: 8
 62 |         include_pi: false
 63 |         heads: 16
 64 |         width: 1024
 65 |         num_encoder_layers: 8
 66 |         num_decoder_layers: 16
 67 |         qkv_bias: false
 68 |         qk_norm: true
 69 |         scale_factor: *z_scale_factor
 70 |         geo_decoder_mlp_expand_ratio: 4
 71 |         geo_decoder_downsample_ratio: 1
 72 |         geo_decoder_ln_post: true
 73 |         point_feats: 4
 74 |         pc_size: *pc_size
 75 |         pc_sharpedge_size: *pc_sharpedge_size
 76 | 
 77 |     cond_stage_config:
 78 |       target: hy3dshape.models.conditioner.SingleImageEncoder
 79 |       params:
 80 |         main_image_encoder:
 81 |             type: DinoImageEncoder # dino large
 82 |             kwargs:
 83 |                 config:
 84 |                   attention_probs_dropout_prob: 0.0
 85 |                   drop_path_rate: 0.0
 86 |                   hidden_act: gelu
 87 |                   hidden_dropout_prob: 0.0
 88 |                   hidden_size: 1024
 89 |                   image_size: 518
 90 |                   initializer_range: 0.02
 91 |                   layer_norm_eps: 1.e-6
 92 |                   layerscale_value: 1.0
 93 |                   mlp_ratio: 4
 94 |                   model_type: dinov2
 95 |                   num_attention_heads: 16
 96 |                   num_channels: 3
 97 |                   num_hidden_layers: 24
 98 |                   patch_size: 14
 99 |                   qkv_bias: true
100 |                   torch_dtype: float32
101 |                   use_swiglu_ffn: false
102 |                 image_size: 518
103 |                 use_cls_token: true
104 | 
105 | 
106 |     denoiser_cfg:
107 |       target: hy3dshape.models.denoisers.hunyuandit.HunYuanDiTPlain
108 |       params:
109 |         input_size: *num_latents
110 |         in_channels: 64
111 |         hidden_size: 768
112 |         context_dim: 1024
113 |         depth: 6
114 |         num_heads: 12
115 |         qk_norm: true
116 |         text_len: 1370
117 |         with_decoupled_ca: false
118 |         use_attention_pooling: false
119 |         qk_norm_type: 'rms'
120 |         qkv_bias: false
121 |         use_pos_emb: false
122 |         num_moe_layers: 3
123 |         num_experts: 4
124 |         moe_top_k: 2
125 |         
126 |     scheduler_cfg:
127 |       transport:
128 |         target: hy3dshape.models.diffusion.transport.create_transport
129 |         params:
130 |           path_type: Linear
131 |           prediction: velocity
132 |       sampler:
133 |         target: hy3dshape.models.diffusion.transport.Sampler
134 |         params: {}
135 |         ode_params:
136 |           sampling_method: euler # dopri5 ...
137 |           num_steps: &num_steps 50
138 | 
139 |     optimizer_cfg:
140 |       optimizer:
141 |         target: torch.optim.AdamW
142 |         params:
143 |           betas: [0.9, 0.99]
144 |           eps: 1.e-6
145 |           weight_decay: 1.e-2
146 | 
147 |       scheduler:
148 |         target: hy3dshape.utils.trainings.lr_scheduler.LambdaWarmUpCosineFactorScheduler
149 |         params:
150 |           warm_up_steps: 50 # 5000
151 |           f_start: 1.e-6
152 |           f_min: 1.e-3
153 |           f_max: 1.0
154 | 
155 |     pipeline_cfg:
156 |       target: hy3dshape.pipelines.Hunyuan3DDiTFlowMatchingPipeline
157 | 
158 |     image_processor_cfg:
159 |       target: hy3dshape.preprocessors.ImageProcessorV2
160 |       params: {}
161 | 
162 | callbacks:
163 |     logger:
164 |       target: hy3dshape.utils.trainings.mesh_log_callback.ImageConditionalASLDiffuserLogger
165 |       params:
166 |         step_frequency: 100 # 10000
167 |         num_samples: 1
168 |         sample_times: 1
169 |         mean: *mean
170 |         std: *std
171 |         bounds: [-1.01, -1.01, -1.01, 1.01, 1.01, 1.01]
172 |         octree_depth: 8
173 |         num_chunks: 50000
174 |         mc_level: 0.0
175 |     
176 |     file_loggers:
177 |         target: hy3dshape.utils.trainings.mesh_log_callback.ImageConditionalFixASLDiffuserLogger
178 |         params:
179 |           step_frequency: 50 # 5000
180 |           test_data_path: "tools/mini_testset/images.json"
181 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # 🌀 ComfyUI Wrapper for [Hunyuan3D-2.1](https://github.com/Tencent-Hunyuan/Hunyuan3D-2.1)
  2 | 
  3 | > **ComfyUI integration** for Tencent's powerful **Hunyuan3D-2.1** model. Supports textured 3D generation with optional high-quality UV mapping.
  4 | 
  5 | ---
  6 | 
  7 | ## 📦 Repository & Models
  8 | 
  9 | * **GitHub:** [Tencent-Hunyuan/Hunyuan3D-2.1](https://github.com/Tencent-Hunyuan/Hunyuan3D-2.1)
 10 | * **Model Weights (HuggingFace):**
 11 |   👉 [Main page](https://huggingface.co/tencent/Hunyuan3D-2.1/tree/main)
 12 | 
 13 | ### 🔧 Required Checkpoints
 14 | 
 15 | Place the following checkpoints into the corresponding folders under your `ComfyUI` directory:
 16 | 
 17 | ```
 18 | ComfyUI/
 19 | ├── models/
 20 | │   ├── diffusion_models/
 21 | │   │   └── hunyuan3d-dit-v2-1.ckpt
 22 | │   ├── vae/
 23 | │   │   └── hunyuan3d-vae-v2-1.ckpt
 24 | ```
 25 | 
 26 | ---
 27 | 
 28 | ## ⚙️ Installation Guide
 29 | 
 30 | > Tested on **Windows 11** with **Python 3.12** and **Torch >= 2.6.0 + cu126**. Compatible with the latest ComfyUI Portable release.
 31 | 
 32 | ### 1. Install Python Dependencies
 33 | 
 34 | For a standard Python environment:
 35 | 
 36 | ```bash
 37 | python -m pip install -r ComfyUI/custom_nodes/ComfyUI-Hunyuan3DWrapper/requirements.txt
 38 | ```
 39 | 
 40 | For **ComfyUI Portable**:
 41 | 
 42 | ```bash
 43 | python_embeded\python.exe -m pip install -r ComfyUI\custom_nodes\ComfyUI-Hunyuan3d-2-1\requirements.txt
 44 | ```
 45 | 
 46 | ---
 47 | 
 48 | ### 2. Install or Compile Texture Generation Modules
 49 | 
 50 | Two critical C++ extensions need to be installed: the **custom rasterizer** and the **differentiable renderer**.
 51 | 
 52 | #### Option A: Use Precompiled Wheels (Recommended)
 53 | 
 54 | #### Custom Rasterizer
 55 | 
 56 | You will find precompiled wheels in `hy3dpain\custom_rasterizer\dist` folder
 57 | 
 58 | For standard Python:
 59 | 
 60 | Example, if you are on Python 3.12:
 61 | 
 62 | ```bash
 63 | pip install custom_rasterizer-0.1-cp312-cp312-win_amd64.whl
 64 | ```
 65 | 
 66 | For ComfyUI Portable:
 67 | 
 68 | ```bash
 69 | python_embeded\python.exe -m pip install ComfyUI\custom_nodes\ComfyUI-Hunyuan3d-2-1\hy3dpaint\custom_rasterizer\dist\custom_rasterizer-0.1-cp312-cp312-win_amd64.whl
 70 | ```
 71 | 
 72 | #### Differentiable Renderer
 73 | 
 74 | You will find precompiled wheels in `hy3dpaint\DifferentiableRenderer\dist` folder
 75 | 
 76 | For standard Python:
 77 | 
 78 | Example, if you are on Python 3.12:
 79 | 
 80 | ```bash
 81 | pip install mesh_inpaint_processor-0.0.0-cp312-cp312-win_amd64.whl.whl
 82 | ```
 83 | 
 84 | For ComfyUI Portable:
 85 | 
 86 | ```bash
 87 | python_embeded\python.exe -m pip install ComfyUI\custom_nodes\ComfyUI-Hunyuan3d-2-1\hy3dpaint\DifferentiableRenderer\dist\mesh_inpaint_processor-0.0.0-cp312-cp312-win_amd64.whl.whl
 88 | ```
 89 | 
 90 | ---
 91 | 
 92 | #### Option B: Manual Compilation (for advanced users)
 93 | 
 94 | ```bash
 95 | # Compile custom rasterizer
 96 | cd ComfyUI/custom_nodes/ComfyUI-Hunyuan3d-2-1/hy3dpaint/custom_rasterizer
 97 | python setup.py install
 98 | 
 99 | # Compile differentiable renderer
100 | cd ../DifferentiableRenderer
101 | python setup.py install
102 | ```
103 | 
104 | ---
105 | 
106 | ## 🩻 Optional: Fix UV Wrapping for High Poly Meshes (Patched Xatlas)
107 | 
108 | This upgrade improves UV unwrapping stability for complex meshes.
109 | 
110 | ```bash
111 | # Step 1: Uninstall existing xatlas
112 | python_embeded\python.exe -m pip uninstall xatlas
113 | 
114 | # Step 2: Clone updated xatlas-python wrapper
115 | cd ComfyUI_windows_portable
116 | git clone --recursive https://github.com/mworchel/xatlas-python.git
117 | 
118 | # Step 3: Replace internal xatlas source
119 | cd xatlas-python\extern
120 | del /s /q xatlas
121 | git clone --recursive https://github.com/jpcy/xatlas
122 | 
123 | # Step 4: Patch source file
124 | # In xatlas-python/extern/xatlas/source/xatlas/xatlas.cpp:
125 | Line 6774: change `#if 0` → `//#if 0`
126 | Line 6778: change `#endif` → `//#endif`
127 | 
128 | # Step 5: Install patched xatlas wrapper
129 | cd ../../..
130 | python_embeded\python.exe -m pip install .\xatlas-python\
131 | ```
132 | 
133 | ```python
134 | python_embeded\python.exe -m pip uninstall -y xatlas; `
135 | cd ComfyUI_windows_portable; `
136 | if (Test-Path xatlas-python) { Remove-Item xatlas-python -Recurse -Force }; `
137 | git clone --recursive https://github.com/mworchel/xatlas-python.git; `
138 | cd xatlas-python\extern; `
139 | if (Test-Path xatlas) { Remove-Item xatlas -Recurse -Force }; `
140 | git clone --recursive https://github.com/jpcy/xatlas; `
141 | (Get-Content .\xatlas\source\xatlas\xatlas.cpp) -replace '#if 0', '//#if 0' -replace '#endif', '//#endif' | Set-Content .\xatlas\source\xatlas\xatlas.cpp; `
142 | cd ..\..\..; `
143 | python_embeded\python.exe -m pip install .\xatlas-python\
144 | ```
145 | 
146 | ---
147 | 
148 | ## 📂 Directory Overview
149 | 
150 | ```
151 | ComfyUI/
152 | ├── custom_nodes/
153 | │   └── ComfyUI-Hunyuan3d-2-1/
154 | │       ├── hy3dpaint/
155 | │       │   ├── custom_rasterizer/         # Custom rasterizer module
156 | │       │   │   ├── setup.py
157 | │       │   │   └── dist/                  # Precompiled wheels
158 | │       │   ├── DifferentiableRenderer/    # Differentiable renderer
159 | │       │   │   ├── setup.py
160 | │       │   │   └── dist/                  # Precompiled wheels
161 | ├── models/
162 | │   ├── diffusion_models/
163 | │   │   └── [hunyuan3d-dit-v2-1.ckpt](https://huggingface.co/tencent/Hunyuan3D-2.1/tree/main/hunyuan3d-dit-v2-1)
164 | │   └── vae/
165 | │       └── [hunyuan3d-vae-v2-1.ckpt](https://huggingface.co/tencent/Hunyuan3D-2.1/tree/main/hunyuan3d-vae-v2-1)
166 | ├── xatlas-python/                         # Patched UV unwrapper (optional)
167 | │   └── extern/
168 | │       └── xatlas/
169 | ```
170 | 
171 | ---
172 | 
173 | ## 🙏 Acknowledgements
174 | 
175 | * **[kijai](https://github.com/kijai/ComfyUI-Hunyuan3DWrapper)** — Original wrapper developer for Hunyuan3D v2.0
176 | * TrueMike, Agee, Palindar, and the vibrant Discord community
177 | * Tencent team for the incredible [Hunyuan3D-2.1](https://github.com/Tencent-Hunyuan/Hunyuan3D-2.1) model
178 | 


--------------------------------------------------------------------------------
/hy3dshape/hy3dshape/data/utils.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | # Copyright (c) 2017-2021 NVIDIA CORPORATION. All rights reserved.
  4 | # This file is part of the WebDataset library.
  5 | # See the LICENSE file for licensing terms (BSD-style).
  6 | 
  7 | 
  8 | """Miscellaneous utility functions."""
  9 | 
 10 | import importlib
 11 | import itertools as itt
 12 | import os
 13 | import re
 14 | import sys
 15 | from typing import Any, Callable, Iterator, Union
 16 | import torch
 17 | import numpy as np
 18 | 
 19 | 
 20 | def make_seed(*args):
 21 |     seed = 0
 22 |     for arg in args:
 23 |         seed = (seed * 31 + hash(arg)) & 0x7FFFFFFF
 24 |     return seed
 25 | 
 26 | 
 27 | class PipelineStage:
 28 |     def invoke(self, *args, **kw):
 29 |         raise NotImplementedError
 30 | 
 31 | 
 32 | def identity(x: Any) -> Any:
 33 |     """Return the argument as is."""
 34 |     return x
 35 | 
 36 | 
 37 | def safe_eval(s: str, expr: str = "{}"):
 38 |     """Evaluate the given expression more safely."""
 39 |     if re.sub("[^A-Za-z0-9_]", "", s) != s:
 40 |         raise ValueError(f"safe_eval: illegal characters in: '{s}'")
 41 |     return eval(expr.format(s))
 42 | 
 43 | 
 44 | def lookup_sym(sym: str, modules: list):
 45 |     """Look up a symbol in a list of modules."""
 46 |     for mname in modules:
 47 |         module = importlib.import_module(mname, package="webdataset")
 48 |         result = getattr(module, sym, None)
 49 |         if result is not None:
 50 |             return result
 51 |     return None
 52 | 
 53 | 
 54 | def repeatedly0(
 55 |     loader: Iterator, nepochs: int = sys.maxsize, nbatches: int = sys.maxsize
 56 | ):
 57 |     """Repeatedly returns batches from a DataLoader."""
 58 |     for _ in range(nepochs):
 59 |         yield from itt.islice(loader, nbatches)
 60 | 
 61 | 
 62 | def guess_batchsize(batch: Union[tuple, list]):
 63 |     """Guess the batch size by looking at the length of the first element in a tuple."""
 64 |     return len(batch[0])
 65 | 
 66 | 
 67 | def repeatedly(
 68 |     source: Iterator,
 69 |     nepochs: int = None,
 70 |     nbatches: int = None,
 71 |     nsamples: int = None,
 72 |     batchsize: Callable[..., int] = guess_batchsize,
 73 | ):
 74 |     """Repeatedly yield samples from an iterator."""
 75 |     epoch = 0
 76 |     batch = 0
 77 |     total = 0
 78 |     while True:
 79 |         for sample in source:
 80 |             yield sample
 81 |             batch += 1
 82 |             if nbatches is not None and batch >= nbatches:
 83 |                 return
 84 |             if nsamples is not None:
 85 |                 total += guess_batchsize(sample)
 86 |                 if total >= nsamples:
 87 |                     return
 88 |         epoch += 1
 89 |         if nepochs is not None and epoch >= nepochs:
 90 |             return
 91 | 
 92 | 
 93 | def pytorch_worker_info(group=None):  # sourcery skip: use-contextlib-suppress
 94 |     """Return node and worker info for PyTorch and some distributed environments."""
 95 |     rank = 0
 96 |     world_size = 1
 97 |     worker = 0
 98 |     num_workers = 1
 99 |     if "RANK" in os.environ and "WORLD_SIZE" in os.environ:
100 |         rank = int(os.environ["RANK"])
101 |         world_size = int(os.environ["WORLD_SIZE"])
102 |     else:
103 |         try:
104 |             import torch.distributed
105 | 
106 |             if torch.distributed.is_available() and torch.distributed.is_initialized():
107 |                 group = group or torch.distributed.group.WORLD
108 |                 rank = torch.distributed.get_rank(group=group)
109 |                 world_size = torch.distributed.get_world_size(group=group)
110 |         except ModuleNotFoundError:
111 |             pass
112 |     if "WORKER" in os.environ and "NUM_WORKERS" in os.environ:
113 |         worker = int(os.environ["WORKER"])
114 |         num_workers = int(os.environ["NUM_WORKERS"])
115 |     else:
116 |         try:
117 |             import torch.utils.data
118 | 
119 |             worker_info = torch.utils.data.get_worker_info()
120 |             if worker_info is not None:
121 |                 worker = worker_info.id
122 |                 num_workers = worker_info.num_workers
123 |         except ModuleNotFoundError:
124 |             pass
125 | 
126 |     return rank, world_size, worker, num_workers
127 | 
128 | 
129 | def pytorch_worker_seed(group=None):
130 |     """Compute a distinct, deterministic RNG seed for each worker and node."""
131 |     rank, world_size, worker, num_workers = pytorch_worker_info(group=group)
132 |     return rank * 1000 + worker
133 | 
134 | def worker_init_fn(_):
135 |     worker_info = torch.utils.data.get_worker_info()
136 |     worker_id = worker_info.id
137 | 
138 |     # dataset = worker_info.dataset
139 |     # split_size = dataset.num_records // worker_info.num_workers
140 |     # # reset num_records to the true number to retain reliable length information
141 |     # dataset.sample_ids = dataset.valid_ids[worker_id * split_size:(worker_id + 1) * split_size]
142 |     # current_id = np.random.choice(len(np.random.get_state()[1]), 1)
143 |     # return np.random.seed(np.random.get_state()[1][current_id] + worker_id)
144 | 
145 |     return np.random.seed(np.random.get_state()[1][0] + worker_id)
146 | 
147 | 
148 | def collation_fn(samples, combine_tensors=True, combine_scalars=True):
149 |     """
150 | 
151 |     Args:
152 |         samples (list[dict]):
153 |         combine_tensors:
154 |         combine_scalars:
155 | 
156 |     Returns:
157 | 
158 |     """
159 | 
160 |     result = {}
161 | 
162 |     keys = samples[0].keys()
163 | 
164 |     for key in keys:
165 |         result[key] = []
166 | 
167 |     for sample in samples:
168 |         for key in keys:
169 |             val = sample[key]
170 |             result[key].append(val)
171 | 
172 |     for key in keys:
173 |         val_list = result[key]
174 |         if isinstance(val_list[0], (int, float)):
175 |             if combine_scalars:
176 |                 result[key] = np.array(result[key])
177 | 
178 |         elif isinstance(val_list[0], torch.Tensor):
179 |             if combine_tensors:
180 |                 result[key] = torch.stack(val_list)
181 | 
182 |         elif isinstance(val_list[0], np.ndarray):
183 |             if combine_tensors:
184 |                 result[key] = np.stack(val_list)
185 | 
186 |     return result
187 | 


--------------------------------------------------------------------------------
/hy3dpaint/utils/multiview_utils.py:
--------------------------------------------------------------------------------
  1 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
  2 | # except for the third-party components listed below.
  3 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined
  4 | # in the repsective licenses of these third-party components.
  5 | # Users must comply with all terms and conditions of original licenses of these third-party
  6 | # components and must ensure that the usage of the third party components adheres to
  7 | # all relevant laws and regulations.
  8 | 
  9 | # For avoidance of doubts, Hunyuan 3D means the large language models and
 10 | # their software and algorithms, including trained model weights, parameters (including
 11 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
 12 | # fine-tuning enabling code and other elements of the foregoing made publicly available
 13 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
 14 | 
 15 | import os
 16 | import torch
 17 | import random
 18 | import numpy as np
 19 | from PIL import Image
 20 | from typing import List
 21 | import huggingface_hub
 22 | from omegaconf import OmegaConf
 23 | from diffusers import DiffusionPipeline
 24 | from diffusers import EulerAncestralDiscreteScheduler, DDIMScheduler, UniPCMultistepScheduler
 25 | from ..hunyuanpaintpbr.pipeline import HunyuanPaintPipeline
 26 | 
 27 | 
 28 | class multiviewDiffusionNet:
 29 |     def __init__(self, config) -> None:
 30 |         self.device = config.device
 31 | 
 32 |         cfg_path = config.multiview_cfg_path
 33 |         custom_pipeline = config.custom_pipeline
 34 |         cfg = OmegaConf.load(cfg_path)
 35 |         self.cfg = cfg
 36 |         self.mode = self.cfg.model.params.stable_diffusion_config.custom_pipeline[2:]
 37 | 
 38 |         model_path = huggingface_hub.snapshot_download(
 39 |             repo_id=config.multiview_pretrained_path,
 40 |             allow_patterns=["hunyuan3d-paintpbr-v2-1/*"],
 41 |         )
 42 | 
 43 |         model_path = os.path.join(model_path, "hunyuan3d-paintpbr-v2-1")
 44 |                 
 45 |         pipeline = HunyuanPaintPipeline.from_pretrained(
 46 |             model_path,
 47 |             torch_dtype=torch.float16
 48 |         )
 49 | 
 50 |         pipeline.scheduler = EulerAncestralDiscreteScheduler.from_config(pipeline.scheduler.config, timestep_spacing="trailing")
 51 |         pipeline.set_progress_bar_config(disable=False)
 52 |         pipeline.eval()
 53 |         setattr(pipeline, "view_size", cfg.model.params.get("view_size", 320))
 54 |         pipeline.enable_model_cpu_offload()
 55 |         self.pipeline = pipeline.to(self.device)
 56 |         self.pipeline.enable_vae_slicing()
 57 |         self.pipeline.enable_vae_tiling()
 58 | 
 59 |         if hasattr(self.pipeline.unet, "use_dino") and self.pipeline.unet.use_dino:
 60 |             from ..hunyuanpaintpbr.unet.modules import Dino_v2
 61 |             self.dino_v2 = Dino_v2(config.dino_ckpt_path).to(torch.float16)
 62 |             self.dino_v2 = self.dino_v2.to(self.device)
 63 | 
 64 |     def seed_everything(self, seed):
 65 |         random.seed(seed)
 66 |         np.random.seed(seed)
 67 |         torch.manual_seed(seed)
 68 |         os.environ["PL_GLOBAL_SEED"] = str(seed)
 69 | 
 70 |     @torch.no_grad()
 71 |     def __call__(self, images, conditions, prompt=None, custom_view_size=None, resize_input=False, num_steps=10, guidance_scale=3.0, seed=0):
 72 |         pils = self.forward_one(
 73 |             images, conditions, prompt=prompt, custom_view_size=custom_view_size, resize_input=resize_input, num_steps=num_steps, guidance_scale=guidance_scale, seed=seed
 74 |         )
 75 |         return pils
 76 | 
 77 |     def forward_one(self, input_images, control_images, prompt=None, custom_view_size=None, resize_input=False, num_steps=10, guidance_scale=3.0, seed=0):
 78 |         self.seed_everything(seed)
 79 |         custom_view_size = custom_view_size if custom_view_size is not None else self.pipeline.view_size
 80 |         
 81 |         if not isinstance(input_images, List):
 82 |             input_images = [input_images]
 83 |             
 84 |         if not resize_input:
 85 |             input_images = [
 86 |                 input_image.resize((self.pipeline.view_size, self.pipeline.view_size)) for input_image in input_images
 87 |             ]
 88 |         else:
 89 |             input_images = [input_image.resize((custom_view_size, custom_view_size)) for input_image in input_images]
 90 |             
 91 |         for i in range(len(control_images)):
 92 |             control_images[i] = control_images[i].resize((custom_view_size, custom_view_size))
 93 |             if control_images[i].mode == "L":
 94 |                 control_images[i] = control_images[i].point(lambda x: 255 if x > 1 else 0, mode="1")
 95 |         kwargs = dict(generator=torch.Generator(device=self.pipeline.device).manual_seed(0))
 96 | 
 97 |         num_view = len(control_images) // 2
 98 |         normal_image = [[control_images[i] for i in range(num_view)]]
 99 |         position_image = [[control_images[i + num_view] for i in range(num_view)]]
100 | 
101 |         kwargs["width"] = custom_view_size
102 |         kwargs["height"] = custom_view_size
103 |         kwargs["num_in_batch"] = num_view
104 |         kwargs["images_normal"] = normal_image
105 |         kwargs["images_position"] = position_image
106 | 
107 |         if hasattr(self.pipeline.unet, "use_dino") and self.pipeline.unet.use_dino:
108 |             dino_hidden_states = self.dino_v2(input_images[0])
109 |             kwargs["dino_hidden_states"] = dino_hidden_states
110 | 
111 |         sync_condition = None
112 | 
113 |         infer_steps_dict = {
114 |             "EulerAncestralDiscreteScheduler": 10,
115 |             "UniPCMultistepScheduler": 10,
116 |             "DDIMScheduler": 10,
117 |             "ShiftSNRScheduler": 10,
118 |         }
119 | 
120 |         mvd_image = self.pipeline(
121 |             input_images[0:1],
122 |             num_inference_steps=num_steps,
123 |             prompt=prompt,
124 |             sync_condition=sync_condition,
125 |             guidance_scale=guidance_scale,
126 |             **kwargs,
127 |         ).images
128 | 
129 |         if "pbr" in self.mode:
130 |             mvd_image = {"albedo": mvd_image[:num_view], "mr": mvd_image[num_view:]}
131 |             # mvd_image = {'albedo':mvd_image[:num_view]}
132 |         else:
133 |             mvd_image = {"hdr": mvd_image}
134 | 
135 |         return mvd_image
136 | 


--------------------------------------------------------------------------------
/hy3dpaint/utils/pipeline_utils.py:
--------------------------------------------------------------------------------
  1 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
  2 | # except for the third-party components listed below.
  3 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined
  4 | # in the repsective licenses of these third-party components.
  5 | # Users must comply with all terms and conditions of original licenses of these third-party
  6 | # components and must ensure that the usage of the third party components adheres to
  7 | # all relevant laws and regulations.
  8 | 
  9 | # For avoidance of doubts, Hunyuan 3D means the large language models and
 10 | # their software and algorithms, including trained model weights, parameters (including
 11 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
 12 | # fine-tuning enabling code and other elements of the foregoing made publicly available
 13 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
 14 | 
 15 | import torch
 16 | import numpy as np
 17 | 
 18 | 
 19 | class ViewProcessor:
 20 |     def __init__(self, config, render):
 21 |         self.config = config
 22 |         self.render = render
 23 | 
 24 |     def render_normal_multiview(self, camera_elevs, camera_azims, use_abs_coor=True):
 25 |         normal_maps = []
 26 |         for elev, azim in zip(camera_elevs, camera_azims):
 27 |             normal_map = self.render.render_normal(elev, azim, use_abs_coor=use_abs_coor, return_type="pl")
 28 |             normal_maps.append(normal_map)
 29 | 
 30 |         return normal_maps
 31 | 
 32 |     def render_position_multiview(self, camera_elevs, camera_azims):
 33 |         position_maps = []
 34 |         for elev, azim in zip(camera_elevs, camera_azims):
 35 |             position_map = self.render.render_position(elev, azim, return_type="pl")
 36 |             position_maps.append(position_map)
 37 | 
 38 |         return position_maps
 39 | 
 40 |     def bake_view_selection(
 41 |         self, candidate_camera_elevs, candidate_camera_azims, candidate_view_weights, max_selected_view_num
 42 |     ):
 43 | 
 44 |         original_resolution = self.render.default_resolution
 45 |         self.render.set_default_render_resolution(1024)
 46 | 
 47 |         selected_camera_elevs = []
 48 |         selected_camera_azims = []
 49 |         selected_view_weights = []
 50 |         selected_alpha_maps = []
 51 |         viewed_tri_idxs = []
 52 |         viewed_masks = []
 53 | 
 54 |         # 计算每个三角片的面积
 55 |         face_areas = self.render.get_face_areas(from_one_index=True)
 56 |         total_area = face_areas.sum()
 57 |         face_area_ratios = face_areas / total_area
 58 | 
 59 |         candidate_view_num = len(candidate_camera_elevs)
 60 |         self.render.set_boundary_unreliable_scale(2)
 61 | 
 62 |         for elev, azim in zip(candidate_camera_elevs, candidate_camera_azims):
 63 |             viewed_tri_idx = self.render.render_alpha(elev, azim, return_type="np")
 64 |             viewed_tri_idxs.append(set(np.unique(viewed_tri_idx.flatten())))
 65 |             viewed_masks.append(viewed_tri_idx[0, :, :, 0] > 0)
 66 | 
 67 |         is_selected = [False for _ in range(candidate_view_num)]
 68 |         total_viewed_tri_idxs = set()
 69 |         total_viewed_area = 0.0
 70 | 
 71 |         for idx in range(6):
 72 |             selected_camera_elevs.append(candidate_camera_elevs[idx])
 73 |             selected_camera_azims.append(candidate_camera_azims[idx])
 74 |             selected_view_weights.append(candidate_view_weights[idx])
 75 |             selected_alpha_maps.append(viewed_masks[idx])
 76 |             is_selected[idx] = True
 77 |             total_viewed_tri_idxs.update(viewed_tri_idxs[idx])
 78 | 
 79 |         total_viewed_area = face_area_ratios[list(total_viewed_tri_idxs)].sum()
 80 |         for iter in range(max_selected_view_num - len(selected_view_weights)):
 81 |             max_inc = 0
 82 |             max_idx = -1
 83 | 
 84 |             for idx, (elev, azim, weight) in enumerate(
 85 |                 zip(candidate_camera_elevs, candidate_camera_azims, candidate_view_weights)
 86 |             ):
 87 |                 if is_selected[idx]:
 88 |                     continue
 89 |                 new_tri_idxs = viewed_tri_idxs[idx] - total_viewed_tri_idxs
 90 |                 new_inc_area = face_area_ratios[list(new_tri_idxs)].sum()
 91 | 
 92 |                 if new_inc_area > max_inc:
 93 |                     max_inc = new_inc_area
 94 |                     max_idx = idx
 95 | 
 96 |             if max_inc > 0.0001:
 97 |                 is_selected[max_idx] = True
 98 |                 selected_camera_elevs.append(candidate_camera_elevs[max_idx])
 99 |                 selected_camera_azims.append(candidate_camera_azims[max_idx])
100 |                 selected_view_weights.append(candidate_view_weights[max_idx])
101 |                 selected_alpha_maps.append(viewed_masks[max_idx])
102 |                 total_viewed_tri_idxs = total_viewed_tri_idxs.union(viewed_tri_idxs[max_idx])
103 |                 total_viewed_area += max_inc
104 |             else:
105 |                 break
106 | 
107 |         self.render.set_default_render_resolution(original_resolution)
108 | 
109 |         return selected_camera_elevs, selected_camera_azims, selected_view_weights
110 | 
111 |     def bake_from_multiview(self, views, camera_elevs, camera_azims, view_weights):
112 |         project_textures, project_weighted_cos_maps = [], []
113 |         project_boundary_maps = []
114 | 
115 |         for view, camera_elev, camera_azim, weight in zip(views, camera_elevs, camera_azims, view_weights):
116 |             project_texture, project_cos_map, project_boundary_map = self.render.back_project(
117 |                 view, camera_elev, camera_azim
118 |             )
119 |             project_cos_map = weight * (project_cos_map**self.config.bake_exp)
120 |             project_textures.append(project_texture)
121 |             project_weighted_cos_maps.append(project_cos_map)
122 |             project_boundary_maps.append(project_boundary_map)
123 |             texture, ori_trust_map = self.render.fast_bake_texture(project_textures, project_weighted_cos_maps)
124 |         return texture, ori_trust_map > 1e-8
125 | 
126 |     def texture_inpaint(self, texture, mask, vertex_inpaint=True, method="NS", default=None, ):
127 |         if default is not None:
128 |             mask = mask.astype(bool)
129 |             inpaint_value = torch.tensor(default, dtype=texture.dtype, device=texture.device)
130 |             texture[~mask] = inpaint_value
131 |         else:
132 |             texture_np = self.render.uv_inpaint(texture, mask, vertex_inpaint, method)
133 |             texture = torch.tensor(texture_np / 255).float().to(texture.device)
134 | 
135 |         return texture
136 | 


--------------------------------------------------------------------------------
/hy3dpaint/src/data/dataloader/objaverse_loader_forTexturePBR.py:
--------------------------------------------------------------------------------
  1 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
  2 | # except for the third-party components listed below.
  3 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined
  4 | # in the repsective licenses of these third-party components.
  5 | # Users must comply with all terms and conditions of original licenses of these third-party
  6 | # components and must ensure that the usage of the third party components adheres to
  7 | # all relevant laws and regulations.
  8 | 
  9 | # For avoidance of doubts, Hunyuan 3D means the large language models and
 10 | # their software and algorithms, including trained model weights, parameters (including
 11 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
 12 | # fine-tuning enabling code and other elements of the foregoing made publicly available
 13 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
 14 | 
 15 | import os
 16 | import time
 17 | import glob
 18 | import json
 19 | import random
 20 | import numpy as np
 21 | import torch
 22 | from .loader_util import BaseDataset
 23 | 
 24 | 
 25 | class TextureDataset(BaseDataset):
 26 | 
 27 |     def __init__(
 28 |         self, json_path, num_view=6, image_size=512, lighting_suffix_pool=["light_PL", "light_AL", "light_ENVMAP"]
 29 |     ):
 30 |         self.data = list()
 31 |         self.num_view = num_view
 32 |         self.image_size = image_size
 33 |         self.lighting_suffix_pool = lighting_suffix_pool
 34 |         if isinstance(json_path, str):
 35 |             json_path = [json_path]
 36 |         for jp in json_path:
 37 |             with open(jp) as f:
 38 |                 self.data.extend(json.load(f))
 39 |         print("============= length of dataset %d =============" % len(self.data))
 40 | 
 41 |     def __getitem__(self, index):
 42 |         try_sleep_interval = 20
 43 |         total_try_num = 100
 44 |         cnt = try_sleep_interval * total_try_num
 45 |         # try:
 46 |         images_ref = list()
 47 |         images_albedo = list()
 48 |         images_mr = list()
 49 |         images_normal = list()
 50 |         images_position = list()
 51 |         bg_white = [1.0, 1.0, 1.0]
 52 |         bg_black = [0.0, 0.0, 0.0]
 53 |         bg_gray = [127 / 255.0, 127 / 255.0, 127 / 255.0]
 54 |         dirx = self.data[index]
 55 | 
 56 |         condition_dict = {}
 57 | 
 58 |         # 6view
 59 |         fix_num_view = self.num_view
 60 |         available_views = []
 61 |         for ext in ["*_albedo.png", "*_albedo.jpg", "*_albedo.jpeg"]:
 62 |             available_views.extend(glob.glob(os.path.join(dirx, "render_tex", ext)))
 63 |         cond_images = (
 64 |             glob.glob(os.path.join(dirx, "render_cond", "*.png"))
 65 |             + glob.glob(os.path.join(dirx, "render_cond", "*.jpg"))
 66 |             + glob.glob(os.path.join(dirx, "render_cond", "*.jpeg"))
 67 |         )
 68 | 
 69 |         # 确保有足够的样本
 70 |         if len(available_views) < fix_num_view:
 71 |             print(
 72 |                 f"Warning: Only {len(available_views)} views available, but {fix_num_view} requested."
 73 |                 "Using all available views."
 74 |             )
 75 |             images_gen = available_views
 76 |         else:
 77 |             images_gen = random.sample(available_views, fix_num_view)
 78 | 
 79 |         if not cond_images:
 80 |             raise ValueError(f"No condition images found in {os.path.join(dirx, 'render_cond')}")
 81 |         ref_image_path = random.choice(cond_images)
 82 |         light_suffix = None
 83 |         for suffix in self.lighting_suffix_pool:
 84 |             if suffix in ref_image_path:
 85 |                 light_suffix = suffix
 86 |                 break
 87 |         if light_suffix is None:
 88 |             raise ValueError(f"light suffix not found in {ref_image_path}")
 89 |         ref_image_diff_light_path = random.choice(
 90 |             [
 91 |                 ref_image_path.replace(light_suffix, tar_suffix)
 92 |                 for tar_suffix in self.lighting_suffix_pool
 93 |                 if tar_suffix != light_suffix
 94 |             ]
 95 |         )
 96 |         images_ref_paths = [ref_image_path, ref_image_diff_light_path]
 97 | 
 98 |         # Data aug
 99 |         bg_c_record = None
100 |         for i, image_ref in enumerate(images_ref_paths):
101 |             if random.random() < 0.6:
102 |                 bg_c = bg_gray
103 |             else:
104 |                 if random.random() < 0.5:
105 |                     bg_c = bg_black
106 |                 else:
107 |                     bg_c = bg_white
108 |             if i == 0:
109 |                 bg_c_record = bg_c
110 |             image, alpha = self.load_image(image_ref, bg_c_record)
111 |             image = self.augment_image(image, bg_c_record).float()
112 |             images_ref.append(image)
113 |         condition_dict["images_cond"] = torch.stack(images_ref, dim=0).float()
114 | 
115 |         for i, image_gen in enumerate(images_gen):
116 |             images_albedo.append(self.augment_image(self.load_image(image_gen, bg_gray)[0], bg_gray))
117 |             images_mr.append(
118 |                 self.augment_image(self.load_image(image_gen.replace("_albedo", "_mr"), bg_gray)[0], bg_gray)
119 |             )
120 |             images_normal.append(
121 |                 self.augment_image(self.load_image(image_gen.replace("_albedo", "_normal"), bg_gray)[0], bg_gray)
122 |             )
123 |             images_position.append(
124 |                 self.augment_image(self.load_image(image_gen.replace("_albedo", "_pos"), bg_gray)[0], bg_gray)
125 |             )
126 | 
127 |         condition_dict["images_albedo"] = torch.stack(images_albedo, dim=0).float()
128 |         condition_dict["images_mr"] = torch.stack(images_mr, dim=0).float()
129 |         condition_dict["images_normal"] = torch.stack(images_normal, dim=0).float()
130 |         condition_dict["images_position"] = torch.stack(images_position, dim=0).float()
131 |         condition_dict["name"] = dirx  # .replace('/', '_')
132 |         return condition_dict  # (N, 3, H, W)
133 | 
134 |         # except Exception as e:
135 |         #     print(e, self.data[index])
136 |         #     # exit()
137 | 
138 | 
139 | if __name__ == "__main__":
140 |     dataset = TextureDataset(json_path=["../../../train_examples/examples.json"])
141 |     print("images_cond", dataset[0]["images_cond"].shape)
142 |     print("images_albedo", dataset[0]["images_albedo"].shape)
143 |     print("images_mr", dataset[0]["images_mr"].shape)
144 |     print("images_normal", dataset[0]["images_normal"].shape)
145 |     print("images_position", dataset[0]["images_position"].shape)
146 |     print("name", dataset[0]["name"])
147 | 


--------------------------------------------------------------------------------
/hy3dshape/hy3dshape/preprocessors.py:
--------------------------------------------------------------------------------
  1 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
  2 | # except for the third-party components listed below.
  3 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined
  4 | # in the repsective licenses of these third-party components.
  5 | # Users must comply with all terms and conditions of original licenses of these third-party
  6 | # components and must ensure that the usage of the third party components adheres to
  7 | # all relevant laws and regulations.
  8 | 
  9 | # For avoidance of doubts, Hunyuan 3D means the large language models and
 10 | # their software and algorithms, including trained model weights, parameters (including
 11 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
 12 | # fine-tuning enabling code and other elements of the foregoing made publicly available
 13 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
 14 | 
 15 | import cv2
 16 | import numpy as np
 17 | import torch
 18 | from PIL import Image
 19 | from einops import repeat, rearrange
 20 | 
 21 | 
 22 | def array_to_tensor(np_array):
 23 |     image_pt = torch.tensor(np_array).float()
 24 |     image_pt = image_pt / 255 * 2 - 1
 25 |     image_pt = rearrange(image_pt, "h w c -> c h w")
 26 |     image_pts = repeat(image_pt, "c h w -> b c h w", b=1)
 27 |     return image_pts
 28 | 
 29 | 
 30 | class ImageProcessorV2:
 31 |     def __init__(self, size=512, border_ratio=None):
 32 |         self.size = size
 33 |         self.border_ratio = border_ratio
 34 | 
 35 |     @staticmethod
 36 |     def recenter(image, border_ratio: float = 0.2):
 37 |         """ recenter an image to leave some empty space at the image border.
 38 | 
 39 |         Args:
 40 |             image (ndarray): input image, float/uint8 [H, W, 3/4]
 41 |             mask (ndarray): alpha mask, bool [H, W]
 42 |             border_ratio (float, optional): border ratio, image will be resized to (1 - border_ratio). Defaults to 0.2.
 43 | 
 44 |         Returns:
 45 |             ndarray: output image, float/uint8 [H, W, 3/4]
 46 |         """
 47 | 
 48 |         if image.shape[-1] == 4:
 49 |             mask = image[..., 3]
 50 |         else:
 51 |             mask = np.ones_like(image[..., 0:1]) * 255
 52 |             image = np.concatenate([image, mask], axis=-1)
 53 |             mask = mask[..., 0]
 54 | 
 55 |         H, W, C = image.shape
 56 | 
 57 |         size = max(H, W)
 58 |         result = np.zeros((size, size, C), dtype=np.uint8)
 59 | 
 60 |         coords = np.nonzero(mask)
 61 |         x_min, x_max = coords[0].min(), coords[0].max()
 62 |         y_min, y_max = coords[1].min(), coords[1].max()
 63 |         h = x_max - x_min
 64 |         w = y_max - y_min
 65 |         if h == 0 or w == 0:
 66 |             raise ValueError('input image is empty')
 67 |         desired_size = int(size * (1 - border_ratio))
 68 |         scale = desired_size / max(h, w)
 69 |         h2 = int(h * scale)
 70 |         w2 = int(w * scale)
 71 |         x2_min = (size - h2) // 2
 72 |         x2_max = x2_min + h2
 73 | 
 74 |         y2_min = (size - w2) // 2
 75 |         y2_max = y2_min + w2
 76 | 
 77 |         result[x2_min:x2_max, y2_min:y2_max] = cv2.resize(image[x_min:x_max, y_min:y_max], (w2, h2),
 78 |                                                           interpolation=cv2.INTER_AREA)
 79 | 
 80 |         bg = np.ones((result.shape[0], result.shape[1], 3), dtype=np.uint8) * 255
 81 | 
 82 |         mask = result[..., 3:].astype(np.float32) / 255
 83 |         result = result[..., :3] * mask + bg * (1 - mask)
 84 | 
 85 |         mask = mask * 255
 86 |         result = result.clip(0, 255).astype(np.uint8)
 87 |         mask = mask.clip(0, 255).astype(np.uint8)
 88 |         return result, mask
 89 | 
 90 |     def load_image(self, image, border_ratio=0.15, to_tensor=True):
 91 |         if isinstance(image, str):
 92 |             image = cv2.imread(image, cv2.IMREAD_UNCHANGED)
 93 |             image, mask = self.recenter(image, border_ratio=border_ratio)
 94 |             image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
 95 |         elif isinstance(image, Image.Image):
 96 |             image = image.convert("RGBA")
 97 |             image = np.asarray(image)
 98 |             image, mask = self.recenter(image, border_ratio=border_ratio)
 99 | 
100 |         image = cv2.resize(image, (self.size, self.size), interpolation=cv2.INTER_CUBIC)
101 |         mask = cv2.resize(mask, (self.size, self.size), interpolation=cv2.INTER_NEAREST)
102 |         mask = mask[..., np.newaxis]
103 | 
104 |         if to_tensor:
105 |             image = array_to_tensor(image)
106 |             mask = array_to_tensor(mask)
107 |         return image, mask
108 | 
109 |     def __call__(self, image, border_ratio=0.15, to_tensor=True, **kwargs):
110 |         if self.border_ratio is not None:
111 |             border_ratio = self.border_ratio
112 |         image, mask = self.load_image(image, border_ratio=border_ratio, to_tensor=to_tensor)
113 |         outputs = {
114 |             'image': image,
115 |             'mask': mask
116 |         }
117 |         return outputs
118 | 
119 | 
120 | class MVImageProcessorV2(ImageProcessorV2):
121 |     """
122 |     view order: front, front clockwise 90, back, front clockwise 270
123 |     """
124 |     return_view_idx = True
125 | 
126 |     def __init__(self, size=512, border_ratio=None):
127 |         super().__init__(size, border_ratio)
128 |         self.view2idx = {
129 |             'front': 0,
130 |             'left': 1,
131 |             'back': 2,
132 |             'right': 3
133 |         }
134 | 
135 |     def __call__(self, image_dict, border_ratio=0.15, to_tensor=True, **kwargs):
136 |         if self.border_ratio is not None:
137 |             border_ratio = self.border_ratio
138 | 
139 |         images = []
140 |         masks = []
141 |         view_idxs = []
142 |         for idx, (view_tag, image) in enumerate(image_dict.items()):
143 |             view_idxs.append(self.view2idx[view_tag])
144 |             image, mask = self.load_image(image, border_ratio=border_ratio, to_tensor=to_tensor)
145 |             images.append(image)
146 |             masks.append(mask)
147 | 
148 |         zipped_lists = zip(view_idxs, images, masks)
149 |         sorted_zipped_lists = sorted(zipped_lists)
150 |         view_idxs, images, masks = zip(*sorted_zipped_lists)
151 | 
152 |         image = torch.cat(images, 0).unsqueeze(0)
153 |         mask = torch.cat(masks, 0).unsqueeze(0)
154 |         outputs = {
155 |             'image': image,
156 |             'mask': mask,
157 |             'view_idxs': view_idxs
158 |         }
159 |         return outputs
160 | 
161 | 
162 | IMAGE_PROCESSORS = {
163 |     "v2": ImageProcessorV2,
164 |     'mv_v2': MVImageProcessorV2,
165 | }
166 | 
167 | DEFAULT_IMAGEPROCESSOR = 'v2'
168 | 


--------------------------------------------------------------------------------
/hy3dpaint/custom_rasterizer/lib/custom_rasterizer_kernel/rasterizer_gpu.cu:
--------------------------------------------------------------------------------
  1 | #include "rasterizer.h"
  2 | 
  3 | __device__ void rasterizeTriangleGPU(int idx, float* vt0, float* vt1, float* vt2, int width, int height, INT64* zbuffer, float* d, float occlusion_truncation) {
  4 |     float x_min = std::min(vt0[0], std::min(vt1[0],vt2[0]));
  5 |     float x_max = std::max(vt0[0], std::max(vt1[0],vt2[0]));
  6 |     float y_min = std::min(vt0[1], std::min(vt1[1],vt2[1]));
  7 |     float y_max = std::max(vt0[1], std::max(vt1[1],vt2[1]));
  8 | 
  9 |     for (int px = x_min; px < x_max + 1; ++px) {
 10 |         if (px < 0 || px >= width)
 11 |             continue;
 12 |         for (int py = y_min; py < y_max + 1; ++py) {
 13 |             if (py < 0 || py >= height)
 14 |                 continue;
 15 |             float vt[2] = {px + 0.5f, py + 0.5f};
 16 |             float baryCentricCoordinate[3];
 17 |             calculateBarycentricCoordinate(vt0, vt1, vt2, vt, baryCentricCoordinate);
 18 |             if (isBarycentricCoordInBounds(baryCentricCoordinate)) {
 19 |                 int pixel = py * width + px;
 20 |                 if (zbuffer == 0) {
 21 |                     atomicExch(reinterpret_cast<unsigned long long*>(&zbuffer[pixel]),static_cast<unsigned long long>(idx + 1));
 22 |                     continue;
 23 |                 }
 24 |                 float depth = baryCentricCoordinate[0] * vt0[2] + baryCentricCoordinate[1] * vt1[2] + baryCentricCoordinate[2] * vt2[2];
 25 |                 float depth_thres = 0;
 26 |                 if (d) {
 27 |                     depth_thres = d[pixel] * 0.49999f + 0.5f + occlusion_truncation;
 28 |                 }
 29 |                 
 30 |                 int z_quantize = depth * (2<<17);
 31 |                 INT64 token = (INT64)z_quantize * MAXINT + (INT64)(idx + 1);
 32 |                 if (depth < depth_thres)
 33 |                     continue;
 34 |                 atomicMin(reinterpret_cast<unsigned long long*>(&zbuffer[pixel]),static_cast<unsigned long long>(token));
 35 |             }
 36 |         }
 37 |     }
 38 | }
 39 | 
 40 | __global__ void barycentricFromImgcoordGPU(float* V, int* F, int* findices, INT64* zbuffer, int width, int height, int num_vertices, int num_faces,
 41 |     float* barycentric_map)
 42 | {
 43 |     int pix = blockIdx.x * blockDim.x + threadIdx.x;
 44 |     if (pix >= width * height)
 45 |         return;
 46 |     INT64 f = zbuffer[pix] % MAXINT;
 47 |     if (f == (MAXINT-1)) {
 48 |         findices[pix] = 0;
 49 |         barycentric_map[pix * 3] = 0;
 50 |         barycentric_map[pix * 3 + 1] = 0;
 51 |         barycentric_map[pix * 3 + 2] = 0;
 52 |         return;
 53 |     }
 54 |     findices[pix] = f;
 55 |     f -= 1;
 56 |     float barycentric[3] = {0, 0, 0};
 57 |     if (f >= 0) {
 58 |         float vt[2] = {float(pix % width) + 0.5f, float(pix / width) + 0.5f};
 59 |         float* vt0_ptr = V + (F[f * 3] * 4);
 60 |         float* vt1_ptr = V + (F[f * 3 + 1] * 4);
 61 |         float* vt2_ptr = V + (F[f * 3 + 2] * 4);
 62 | 
 63 |         float vt0[2] = {(vt0_ptr[0] / vt0_ptr[3] * 0.5f + 0.5f) * (width - 1) + 0.5f, (0.5f + 0.5f * vt0_ptr[1] / vt0_ptr[3]) * (height - 1) + 0.5f};
 64 |         float vt1[2] = {(vt1_ptr[0] / vt1_ptr[3] * 0.5f + 0.5f) * (width - 1) + 0.5f, (0.5f + 0.5f * vt1_ptr[1] / vt1_ptr[3]) * (height - 1) + 0.5f};
 65 |         float vt2[2] = {(vt2_ptr[0] / vt2_ptr[3] * 0.5f + 0.5f) * (width - 1) + 0.5f, (0.5f + 0.5f * vt2_ptr[1] / vt2_ptr[3]) * (height - 1) + 0.5f};
 66 | 
 67 |         calculateBarycentricCoordinate(vt0, vt1, vt2, vt, barycentric);
 68 | 
 69 |         barycentric[0] = barycentric[0] / vt0_ptr[3];
 70 |         barycentric[1] = barycentric[1] / vt1_ptr[3];
 71 |         barycentric[2] = barycentric[2] / vt2_ptr[3];
 72 |         float w = 1.0f / (barycentric[0] + barycentric[1] + barycentric[2]);
 73 |         barycentric[0] *= w;
 74 |         barycentric[1] *= w;
 75 |         barycentric[2] *= w;
 76 | 
 77 |     }
 78 |     barycentric_map[pix * 3] = barycentric[0];
 79 |     barycentric_map[pix * 3 + 1] = barycentric[1];
 80 |     barycentric_map[pix * 3 + 2] = barycentric[2];
 81 | }
 82 | 
 83 | __global__ void rasterizeImagecoordsKernelGPU(float* V, int* F, float* d, INT64* zbuffer, float occlusion_trunc, int width, int height, int num_vertices, int num_faces)
 84 | {
 85 |     int f = blockIdx.x * blockDim.x + threadIdx.x;
 86 |     if (f >= num_faces)
 87 |         return; 
 88 | 
 89 |     float* vt0_ptr = V + (F[f * 3] * 4);
 90 |     float* vt1_ptr = V + (F[f * 3 + 1] * 4);
 91 |     float* vt2_ptr = V + (F[f * 3 + 2] * 4);
 92 | 
 93 |     float vt0[3] = {(vt0_ptr[0] / vt0_ptr[3] * 0.5f + 0.5f) * (width - 1) + 0.5f, (0.5f + 0.5f * vt0_ptr[1] / vt0_ptr[3]) * (height - 1) + 0.5f, vt0_ptr[2] / vt0_ptr[3] * 0.49999f + 0.5f};
 94 |     float vt1[3] = {(vt1_ptr[0] / vt1_ptr[3] * 0.5f + 0.5f) * (width - 1) + 0.5f, (0.5f + 0.5f * vt1_ptr[1] / vt1_ptr[3]) * (height - 1) + 0.5f, vt1_ptr[2] / vt1_ptr[3] * 0.49999f + 0.5f};
 95 |     float vt2[3] = {(vt2_ptr[0] / vt2_ptr[3] * 0.5f + 0.5f) * (width - 1) + 0.5f, (0.5f + 0.5f * vt2_ptr[1] / vt2_ptr[3]) * (height - 1) + 0.5f, vt2_ptr[2] / vt2_ptr[3] * 0.49999f + 0.5f};
 96 | 
 97 |     rasterizeTriangleGPU(f, vt0, vt1, vt2, width, height, zbuffer, d, occlusion_trunc);
 98 | }
 99 | 
100 | std::vector<torch::Tensor> rasterize_image_gpu(torch::Tensor V, torch::Tensor F, torch::Tensor D,
101 |     int width, int height, float occlusion_truncation, int use_depth_prior)
102 | {
103 |     int device_id = V.get_device();
104 |     cudaSetDevice(device_id);
105 |     int num_faces = F.size(0);
106 |     int num_vertices = V.size(0);
107 |     auto options = torch::TensorOptions().dtype(torch::kInt32).device(torch::kCUDA, device_id).requires_grad(false);
108 |     auto INT64_options = torch::TensorOptions().dtype(torch::kInt64).device(torch::kCUDA, device_id).requires_grad(false);
109 |     auto findices = torch::zeros({height, width}, options);
110 |     INT64 maxint = (INT64)MAXINT * (INT64)MAXINT + (MAXINT - 1);
111 |     auto z_min = torch::ones({height, width}, INT64_options) * (int64_t)maxint;
112 | 
113 |     if (!use_depth_prior) {
114 |         rasterizeImagecoordsKernelGPU<<<(num_faces+255)/256,256,0,at::cuda::getCurrentCUDAStream()>>>(V.data_ptr<float>(), F.data_ptr<int>(), 0,
115 |             (INT64*)z_min.data_ptr<int64_t>(), occlusion_truncation, width, height, num_vertices, num_faces); 
116 |     } else {
117 |         rasterizeImagecoordsKernelGPU<<<(num_faces+255)/256,256,0,at::cuda::getCurrentCUDAStream()>>>(V.data_ptr<float>(), F.data_ptr<int>(), D.data_ptr<float>(),
118 |             (INT64*)z_min.data_ptr<int64_t>(), occlusion_truncation, width, height, num_vertices, num_faces); 
119 |     }
120 | 
121 |     auto float_options = torch::TensorOptions().dtype(torch::kFloat32).device(torch::kCUDA, device_id).requires_grad(false);
122 |     auto barycentric = torch::zeros({height, width, 3}, float_options);
123 |     barycentricFromImgcoordGPU<<<(width * height + 255)/256, 256>>>(V.data_ptr<float>(), F.data_ptr<int>(),
124 |         findices.data_ptr<int>(), (INT64*)z_min.data_ptr<int64_t>(), width, height, num_vertices, num_faces, barycentric.data_ptr<float>());
125 | 
126 |     return {findices, barycentric};
127 | }
128 | 


--------------------------------------------------------------------------------
/hy3dpaint/custom_rasterizer/lib/custom_rasterizer_kernel/rasterizer.cpp:
--------------------------------------------------------------------------------
  1 | #include "rasterizer.h"
  2 | 
  3 | void rasterizeTriangleCPU(int idx, float* vt0, float* vt1, float* vt2, int width, int height, INT64* zbuffer, float* d, float occlusion_truncation) {
  4 |     float x_min = std::min(vt0[0], std::min(vt1[0],vt2[0]));
  5 |     float x_max = std::max(vt0[0], std::max(vt1[0],vt2[0]));
  6 |     float y_min = std::min(vt0[1], std::min(vt1[1],vt2[1]));
  7 |     float y_max = std::max(vt0[1], std::max(vt1[1],vt2[1]));
  8 | 
  9 |     for (int px = x_min; px < x_max + 1; ++px) {
 10 |         if (px < 0 || px >= width)
 11 |             continue;
 12 |         for (int py = y_min; py < y_max + 1; ++py) {
 13 |             if (py < 0 || py >= height)
 14 |                 continue;
 15 |             float vt[2] = {px + 0.5f, py + 0.5f};
 16 |             float baryCentricCoordinate[3];
 17 |             calculateBarycentricCoordinate(vt0, vt1, vt2, vt, baryCentricCoordinate);
 18 |             if (isBarycentricCoordInBounds(baryCentricCoordinate)) {
 19 |                 int pixel = py * width + px;
 20 |                 if (zbuffer == 0) {
 21 |                     zbuffer[pixel] = (INT64)(idx + 1);
 22 |                     continue;
 23 |                 }
 24 | 
 25 |                 float depth = baryCentricCoordinate[0] * vt0[2] + baryCentricCoordinate[1] * vt1[2] + baryCentricCoordinate[2] * vt2[2];
 26 |                 float depth_thres = 0;
 27 |                 if (d) {
 28 |                     depth_thres = d[pixel] * 0.49999f + 0.5f + occlusion_truncation;
 29 |                 }
 30 |                 
 31 |                 int z_quantize = depth * (2<<17);
 32 |                 INT64 token = (INT64)z_quantize * MAXINT + (INT64)(idx + 1);
 33 |                 if (depth < depth_thres)
 34 |                     continue;
 35 |                 zbuffer[pixel] = std::min(zbuffer[pixel], token);
 36 |             }
 37 |         }
 38 |     }
 39 | }
 40 | 
 41 | void barycentricFromImgcoordCPU(float* V, int* F, int* findices, INT64* zbuffer, int width, int height, int num_vertices, int num_faces,
 42 |     float* barycentric_map, int pix)
 43 | {
 44 |     INT64 f = zbuffer[pix] % MAXINT;
 45 |     if (f == (MAXINT-1)) {
 46 |         findices[pix] = 0;
 47 |         barycentric_map[pix * 3] = 0;
 48 |         barycentric_map[pix * 3 + 1] = 0;
 49 |         barycentric_map[pix * 3 + 2] = 0;
 50 |         return;
 51 |     }
 52 |     findices[pix] = f;
 53 |     f -= 1;
 54 |     float barycentric[3] = {0, 0, 0};
 55 |     if (f >= 0) {
 56 |         float vt[2] = {float(pix % width) + 0.5f, float(pix / width) + 0.5f};
 57 |         float* vt0_ptr = V + (F[f * 3] * 4);
 58 |         float* vt1_ptr = V + (F[f * 3 + 1] * 4);
 59 |         float* vt2_ptr = V + (F[f * 3 + 2] * 4);
 60 | 
 61 |         float vt0[2] = {(vt0_ptr[0] / vt0_ptr[3] * 0.5f + 0.5f) * (width - 1) + 0.5f, (0.5f + 0.5f * vt0_ptr[1] / vt0_ptr[3]) * (height - 1) + 0.5f};
 62 |         float vt1[2] = {(vt1_ptr[0] / vt1_ptr[3] * 0.5f + 0.5f) * (width - 1) + 0.5f, (0.5f + 0.5f * vt1_ptr[1] / vt1_ptr[3]) * (height - 1) + 0.5f};
 63 |         float vt2[2] = {(vt2_ptr[0] / vt2_ptr[3] * 0.5f + 0.5f) * (width - 1) + 0.5f, (0.5f + 0.5f * vt2_ptr[1] / vt2_ptr[3]) * (height - 1) + 0.5f};
 64 | 
 65 |         calculateBarycentricCoordinate(vt0, vt1, vt2, vt, barycentric);
 66 | 
 67 |         barycentric[0] = barycentric[0] / vt0_ptr[3];
 68 |         barycentric[1] = barycentric[1] / vt1_ptr[3];
 69 |         barycentric[2] = barycentric[2] / vt2_ptr[3];
 70 |         float w = 1.0f / (barycentric[0] + barycentric[1] + barycentric[2]);
 71 |         barycentric[0] *= w;
 72 |         barycentric[1] *= w;
 73 |         barycentric[2] *= w;
 74 | 
 75 |     }
 76 |     barycentric_map[pix * 3] = barycentric[0];
 77 |     barycentric_map[pix * 3 + 1] = barycentric[1];
 78 |     barycentric_map[pix * 3 + 2] = barycentric[2];
 79 | }
 80 | 
 81 | void rasterizeImagecoordsKernelCPU(float* V, int* F, float* d, INT64* zbuffer, float occlusion_trunc, int width, int height, int num_vertices, int num_faces, int f)
 82 | {
 83 |     float* vt0_ptr = V + (F[f * 3] * 4);
 84 |     float* vt1_ptr = V + (F[f * 3 + 1] * 4);
 85 |     float* vt2_ptr = V + (F[f * 3 + 2] * 4);
 86 | 
 87 |     float vt0[3] = {(vt0_ptr[0] / vt0_ptr[3] * 0.5f + 0.5f) * (width - 1) + 0.5f, (0.5f + 0.5f * vt0_ptr[1] / vt0_ptr[3]) * (height - 1) + 0.5f, vt0_ptr[2] / vt0_ptr[3] * 0.49999f + 0.5f};
 88 |     float vt1[3] = {(vt1_ptr[0] / vt1_ptr[3] * 0.5f + 0.5f) * (width - 1) + 0.5f, (0.5f + 0.5f * vt1_ptr[1] / vt1_ptr[3]) * (height - 1) + 0.5f, vt1_ptr[2] / vt1_ptr[3] * 0.49999f + 0.5f};
 89 |     float vt2[3] = {(vt2_ptr[0] / vt2_ptr[3] * 0.5f + 0.5f) * (width - 1) + 0.5f, (0.5f + 0.5f * vt2_ptr[1] / vt2_ptr[3]) * (height - 1) + 0.5f, vt2_ptr[2] / vt2_ptr[3] * 0.49999f + 0.5f};
 90 | 
 91 |     rasterizeTriangleCPU(f, vt0, vt1, vt2, width, height, zbuffer, d, occlusion_trunc);
 92 | }
 93 | 
 94 | std::vector<torch::Tensor> rasterize_image_cpu(torch::Tensor V, torch::Tensor F, torch::Tensor D,
 95 |     int width, int height, float occlusion_truncation, int use_depth_prior)
 96 | {
 97 |     int num_faces = F.size(0);
 98 |     int num_vertices = V.size(0);
 99 |     auto options = torch::TensorOptions().dtype(torch::kInt32).requires_grad(false);
100 |     auto INT64_options = torch::TensorOptions().dtype(torch::kInt64).requires_grad(false);
101 |     auto findices = torch::zeros({height, width}, options);
102 |     INT64 maxint = (INT64)MAXINT * (INT64)MAXINT + (MAXINT - 1);
103 |     auto z_min = torch::ones({height, width}, INT64_options) * (int64_t)maxint;
104 | 
105 |     if (!use_depth_prior) {
106 |         for (int i = 0; i < num_faces; ++i) {
107 |             rasterizeImagecoordsKernelCPU(V.data_ptr<float>(), F.data_ptr<int>(), 0,
108 |                 (INT64*)z_min.data_ptr<int64_t>(), occlusion_truncation, width, height, num_vertices, num_faces, i); 
109 |         }
110 |     } else {
111 |         for (int i = 0; i < num_faces; ++i)
112 |             rasterizeImagecoordsKernelCPU(V.data_ptr<float>(), F.data_ptr<int>(), D.data_ptr<float>(),
113 |                 (INT64*)z_min.data_ptr<int64_t>(), occlusion_truncation, width, height, num_vertices, num_faces, i);
114 |     }
115 | 
116 |     auto float_options = torch::TensorOptions().dtype(torch::kFloat32).requires_grad(false);
117 |     auto barycentric = torch::zeros({height, width, 3}, float_options);
118 |     for (int i = 0; i < width * height; ++i)
119 |         barycentricFromImgcoordCPU(V.data_ptr<float>(), F.data_ptr<int>(),
120 |             findices.data_ptr<int>(), (INT64*)z_min.data_ptr<int64_t>(), width, height, num_vertices, num_faces, barycentric.data_ptr<float>(), i);
121 | 
122 |     return {findices, barycentric};
123 | }
124 | 
125 | std::vector<torch::Tensor> rasterize_image(torch::Tensor V, torch::Tensor F, torch::Tensor D,
126 |     int width, int height, float occlusion_truncation, int use_depth_prior)
127 | {
128 |     int device_id = V.get_device();
129 |     if (device_id == -1)
130 |         return rasterize_image_cpu(V, F, D, width, height, occlusion_truncation, use_depth_prior);
131 |     else
132 |         return rasterize_image_gpu(V, F, D, width, height, occlusion_truncation, use_depth_prior);
133 | }
134 | 
135 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
136 |   m.def("rasterize_image", &rasterize_image, "Custom image rasterization");
137 |   m.def("build_hierarchy", &build_hierarchy, "Custom image rasterization");
138 |   m.def("build_hierarchy_with_feat", &build_hierarchy_with_feat, "Custom image rasterization");
139 | }
140 | 


--------------------------------------------------------------------------------
/hy3dshape/hy3dshape/models/autoencoders/surface_extractors.py:
--------------------------------------------------------------------------------
  1 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
  2 | # except for the third-party components listed below.
  3 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined
  4 | # in the repsective licenses of these third-party components.
  5 | # Users must comply with all terms and conditions of original licenses of these third-party
  6 | # components and must ensure that the usage of the third party components adheres to
  7 | # all relevant laws and regulations.
  8 | 
  9 | # For avoidance of doubts, Hunyuan 3D means the large language models and
 10 | # their software and algorithms, including trained model weights, parameters (including
 11 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
 12 | # fine-tuning enabling code and other elements of the foregoing made publicly available
 13 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
 14 | 
 15 | from typing import Union, Tuple, List
 16 | 
 17 | import numpy as np
 18 | import torch
 19 | from skimage import measure
 20 | 
 21 | 
 22 | class Latent2MeshOutput:
 23 |     def __init__(self, mesh_v=None, mesh_f=None):
 24 |         self.mesh_v = mesh_v
 25 |         self.mesh_f = mesh_f
 26 | 
 27 | 
 28 | def center_vertices(vertices):
 29 |     """Translate the vertices so that bounding box is centered at zero."""
 30 |     vert_min = vertices.min(dim=0)[0]
 31 |     vert_max = vertices.max(dim=0)[0]
 32 |     vert_center = 0.5 * (vert_min + vert_max)
 33 |     return vertices - vert_center
 34 | 
 35 | 
 36 | class SurfaceExtractor:
 37 |     def _compute_box_stat(self, bounds: Union[Tuple[float], List[float], float], octree_resolution: int):
 38 |         """
 39 |         Compute grid size, bounding box minimum coordinates, and bounding box size based on input 
 40 |         bounds and resolution.
 41 | 
 42 |         Args:
 43 |             bounds (Union[Tuple[float], List[float], float]): Bounding box coordinates or a single 
 44 |             float representing half side length.
 45 |                 If float, bounds are assumed symmetric around zero in all axes.
 46 |                 Expected format if list/tuple: [xmin, ymin, zmin, xmax, ymax, zmax].
 47 |             octree_resolution (int): Resolution of the octree grid.
 48 | 
 49 |         Returns:
 50 |             grid_size (List[int]): Grid size along each axis (x, y, z), each equal to octree_resolution + 1.
 51 |             bbox_min (np.ndarray): Minimum coordinates of the bounding box (xmin, ymin, zmin).
 52 |             bbox_size (np.ndarray): Size of the bounding box along each axis (xmax - xmin, etc.).
 53 |         """
 54 |         if isinstance(bounds, float):
 55 |             bounds = [-bounds, -bounds, -bounds, bounds, bounds, bounds]
 56 | 
 57 |         bbox_min, bbox_max = np.array(bounds[0:3]), np.array(bounds[3:6])
 58 |         bbox_size = bbox_max - bbox_min
 59 |         grid_size = [int(octree_resolution) + 1, int(octree_resolution) + 1, int(octree_resolution) + 1]
 60 |         return grid_size, bbox_min, bbox_size
 61 | 
 62 |     def run(self, *args, **kwargs):
 63 |         """
 64 |         Abstract method to extract surface mesh from grid logits.
 65 | 
 66 |         This method should be implemented by subclasses.
 67 | 
 68 |         Raises:
 69 |             NotImplementedError: Always, since this is an abstract method.
 70 |         """
 71 |         return NotImplementedError
 72 | 
 73 |     def __call__(self, grid_logits, **kwargs):
 74 |         """
 75 |         Process a batch of grid logits to extract surface meshes.
 76 | 
 77 |         Args:
 78 |             grid_logits (torch.Tensor): Batch of grid logits with shape (batch_size, ...).
 79 |             **kwargs: Additional keyword arguments passed to the `run` method.
 80 | 
 81 |         Returns:
 82 |             List[Optional[Latent2MeshOutput]]: List of mesh outputs for each grid in the batch.
 83 |                 If extraction fails for a grid, None is appended at that position.
 84 |         """
 85 |         outputs = []
 86 |         for i in range(grid_logits.shape[0]):
 87 |             try:
 88 |                 vertices, faces = self.run(grid_logits[i], **kwargs)
 89 |                 vertices = vertices.astype(np.float32)
 90 |                 faces = np.ascontiguousarray(faces)
 91 |                 outputs.append(Latent2MeshOutput(mesh_v=vertices, mesh_f=faces))
 92 | 
 93 |             except Exception:
 94 |                 import traceback
 95 |                 traceback.print_exc()
 96 |                 outputs.append(None)
 97 | 
 98 |         return outputs
 99 | 
100 | 
101 | class MCSurfaceExtractor(SurfaceExtractor):
102 |     def run(self, grid_logit, *, mc_level, bounds, octree_resolution, **kwargs):
103 |         """
104 |         Extract surface mesh using the Marching Cubes algorithm.
105 | 
106 |         Args:
107 |             grid_logit (torch.Tensor): 3D grid logits tensor representing the scalar field.
108 |             mc_level (float): The level (iso-value) at which to extract the surface.
109 |             bounds (Union[Tuple[float], List[float], float]): Bounding box coordinates or half side length.
110 |             octree_resolution (int): Resolution of the octree grid.
111 |             **kwargs: Additional keyword arguments (ignored).
112 | 
113 |         Returns:
114 |             Tuple[np.ndarray, np.ndarray]: Tuple containing:
115 |                 - vertices (np.ndarray): Extracted mesh vertices, scaled and translated to bounding 
116 |                   box coordinates.
117 |                 - faces (np.ndarray): Extracted mesh faces (triangles).
118 |         """
119 |         vertices, faces, normals, _ = measure.marching_cubes(grid_logit.cpu().numpy(),
120 |                                                              mc_level,
121 |                                                              method="lewiner")
122 |         grid_size, bbox_min, bbox_size = self._compute_box_stat(bounds, octree_resolution)
123 |         vertices = vertices / grid_size * bbox_size + bbox_min
124 |         return vertices, faces
125 | 
126 | 
127 | class DMCSurfaceExtractor(SurfaceExtractor):
128 |     def run(self, grid_logit, *, octree_resolution, **kwargs):
129 |         """
130 |         Extract surface mesh using Differentiable Marching Cubes (DMC) algorithm.
131 | 
132 |         Args:
133 |             grid_logit (torch.Tensor): 3D grid logits tensor representing the scalar field.
134 |             octree_resolution (int): Resolution of the octree grid.
135 |             **kwargs: Additional keyword arguments (ignored).
136 | 
137 |         Returns:
138 |             Tuple[np.ndarray, np.ndarray]: Tuple containing:
139 |                 - vertices (np.ndarray): Extracted mesh vertices, centered and converted to numpy.
140 |                 - faces (np.ndarray): Extracted mesh faces (triangles), with reversed vertex order.
141 |         
142 |         Raises:
143 |             ImportError: If the 'diso' package is not installed.
144 |         """
145 |         device = grid_logit.device
146 |         if not hasattr(self, 'dmc'):
147 |             try:
148 |                 from diso import DiffDMC
149 |                 self.dmc = DiffDMC(dtype=torch.float32).to(device)
150 |             except:
151 |                 raise ImportError("Please install diso via `pip install diso`, or set mc_algo to 'mc'")
152 |         sdf = -grid_logit / octree_resolution
153 |         sdf = sdf.to(torch.float32).contiguous()
154 |         verts, faces = self.dmc(sdf, deform=None, return_quads=False, normalize=True)
155 |         verts = center_vertices(verts)
156 |         vertices = verts.detach().cpu().numpy()
157 |         faces = faces.detach().cpu().numpy()[:, ::-1]
158 |         return vertices, faces
159 | 
160 | 
161 | SurfaceExtractors = {
162 |     'mc': MCSurfaceExtractor,
163 |     'dmc': DMCSurfaceExtractor,
164 | }
165 | 


--------------------------------------------------------------------------------
/hy3dshape/hy3dshape/postprocessors.py:
--------------------------------------------------------------------------------
  1 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
  2 | # except for the third-party components listed below.
  3 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined
  4 | # in the repsective licenses of these third-party components.
  5 | # Users must comply with all terms and conditions of original licenses of these third-party
  6 | # components and must ensure that the usage of the third party components adheres to
  7 | # all relevant laws and regulations.
  8 | 
  9 | # For avoidance of doubts, Hunyuan 3D means the large language models and
 10 | # their software and algorithms, including trained model weights, parameters (including
 11 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
 12 | # fine-tuning enabling code and other elements of the foregoing made publicly available
 13 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
 14 | 
 15 | import os
 16 | import tempfile
 17 | from typing import Union
 18 | 
 19 | import numpy as np
 20 | import pymeshlab
 21 | import torch
 22 | import trimesh
 23 | 
 24 | from .models.autoencoders import Latent2MeshOutput
 25 | from .utils import synchronize_timer
 26 | 
 27 | 
 28 | def load_mesh(path):
 29 |     if path.endswith(".glb"):
 30 |         mesh = trimesh.load(path)
 31 |     else:
 32 |         mesh = pymeshlab.MeshSet()
 33 |         mesh.load_new_mesh(path)
 34 |     return mesh
 35 | 
 36 | 
 37 | def reduce_face(mesh: pymeshlab.MeshSet, max_facenum: int = 200000):
 38 |     if max_facenum > mesh.current_mesh().face_number():
 39 |         return mesh
 40 | 
 41 |     mesh.apply_filter(
 42 |         "meshing_decimation_quadric_edge_collapse",
 43 |         targetfacenum=max_facenum,
 44 |         qualitythr=1.0,
 45 |         preserveboundary=True,
 46 |         boundaryweight=3,
 47 |         preservenormal=True,
 48 |         preservetopology=True,
 49 |         autoclean=True
 50 |     )
 51 |     return mesh
 52 | 
 53 | 
 54 | def remove_floater(mesh: pymeshlab.MeshSet):
 55 |     mesh.apply_filter("compute_selection_by_small_disconnected_components_per_face",
 56 |                       nbfaceratio=0.005)
 57 |     mesh.apply_filter("compute_selection_transfer_face_to_vertex", inclusive=False)
 58 |     mesh.apply_filter("meshing_remove_selected_vertices_and_faces")
 59 |     return mesh
 60 | 
 61 | 
 62 | def pymeshlab2trimesh(mesh: pymeshlab.MeshSet):
 63 |     with tempfile.NamedTemporaryFile(suffix='.ply', delete=False) as temp_file:
 64 |         mesh.save_current_mesh(temp_file.name)
 65 |         mesh = trimesh.load(temp_file.name)
 66 |     # 检查加载的对象类型
 67 |     if isinstance(mesh, trimesh.Scene):
 68 |         combined_mesh = trimesh.Trimesh()
 69 |         # 如果是Scene，遍历所有的geometry并合并
 70 |         for geom in mesh.geometry.values():
 71 |             combined_mesh = trimesh.util.concatenate([combined_mesh, geom])
 72 |         mesh = combined_mesh
 73 |     return mesh
 74 | 
 75 | 
 76 | def trimesh2pymeshlab(mesh: trimesh.Trimesh):
 77 |     with tempfile.NamedTemporaryFile(suffix='.ply', delete=False) as temp_file:
 78 |         if isinstance(mesh, trimesh.scene.Scene):
 79 |             for idx, obj in enumerate(mesh.geometry.values()):
 80 |                 if idx == 0:
 81 |                     temp_mesh = obj
 82 |                 else:
 83 |                     temp_mesh = temp_mesh + obj
 84 |             mesh = temp_mesh
 85 |         mesh.export(temp_file.name)
 86 |         mesh = pymeshlab.MeshSet()
 87 |         mesh.load_new_mesh(temp_file.name)
 88 |     return mesh
 89 | 
 90 | 
 91 | def export_mesh(input, output):
 92 |     if isinstance(input, pymeshlab.MeshSet):
 93 |         mesh = output
 94 |     elif isinstance(input, Latent2MeshOutput):
 95 |         output = Latent2MeshOutput()
 96 |         output.mesh_v = output.current_mesh().vertex_matrix()
 97 |         output.mesh_f = output.current_mesh().face_matrix()
 98 |         mesh = output
 99 |     else:
100 |         mesh = pymeshlab2trimesh(output)
101 |     return mesh
102 | 
103 | 
104 | def import_mesh(mesh: Union[pymeshlab.MeshSet, trimesh.Trimesh, Latent2MeshOutput, str]) -> pymeshlab.MeshSet:
105 |     if isinstance(mesh, str):
106 |         mesh = load_mesh(mesh)
107 |     elif isinstance(mesh, Latent2MeshOutput):
108 |         mesh = pymeshlab.MeshSet()
109 |         mesh_pymeshlab = pymeshlab.Mesh(vertex_matrix=mesh.mesh_v, face_matrix=mesh.mesh_f)
110 |         mesh.add_mesh(mesh_pymeshlab, "converted_mesh")
111 | 
112 |     if isinstance(mesh, (trimesh.Trimesh, trimesh.scene.Scene)):
113 |         mesh = trimesh2pymeshlab(mesh)
114 | 
115 |     return mesh
116 | 
117 | 
118 | class FaceReducer:
119 |     @synchronize_timer('FaceReducer')
120 |     def __call__(
121 |         self,
122 |         mesh: Union[pymeshlab.MeshSet, trimesh.Trimesh, Latent2MeshOutput, str],
123 |         max_facenum: int = 40000
124 |     ) -> Union[pymeshlab.MeshSet, trimesh.Trimesh]:
125 |         ms = import_mesh(mesh)
126 |         ms = reduce_face(ms, max_facenum=max_facenum)
127 |         mesh = export_mesh(mesh, ms)
128 |         return mesh
129 | 
130 | 
131 | class FloaterRemover:
132 |     @synchronize_timer('FloaterRemover')
133 |     def __call__(
134 |         self,
135 |         mesh: Union[pymeshlab.MeshSet, trimesh.Trimesh, Latent2MeshOutput, str],
136 |     ) -> Union[pymeshlab.MeshSet, trimesh.Trimesh, Latent2MeshOutput]:
137 |         ms = import_mesh(mesh)
138 |         ms = remove_floater(ms)
139 |         mesh = export_mesh(mesh, ms)
140 |         return mesh
141 | 
142 | 
143 | class DegenerateFaceRemover:
144 |     @synchronize_timer('DegenerateFaceRemover')
145 |     def __call__(
146 |         self,
147 |         mesh: Union[pymeshlab.MeshSet, trimesh.Trimesh, Latent2MeshOutput, str],
148 |     ) -> Union[pymeshlab.MeshSet, trimesh.Trimesh, Latent2MeshOutput]:
149 |         ms = import_mesh(mesh)
150 | 
151 |         with tempfile.NamedTemporaryFile(suffix='.ply', delete=False) as temp_file:
152 |             ms.save_current_mesh(temp_file.name)
153 |             ms = pymeshlab.MeshSet()
154 |             ms.load_new_mesh(temp_file.name)
155 | 
156 |         mesh = export_mesh(mesh, ms)
157 |         return mesh
158 | 
159 | 
160 | def mesh_normalize(mesh):
161 |     """
162 |     Normalize mesh vertices to sphere
163 |     """
164 |     scale_factor = 1.2
165 |     vtx_pos = np.asarray(mesh.vertices)
166 |     max_bb = (vtx_pos - 0).max(0)[0]
167 |     min_bb = (vtx_pos - 0).min(0)[0]
168 | 
169 |     center = (max_bb + min_bb) / 2
170 | 
171 |     scale = torch.norm(torch.tensor(vtx_pos - center, dtype=torch.float32), dim=1).max() * 2.0
172 | 
173 |     vtx_pos = (vtx_pos - center) * (scale_factor / float(scale))
174 |     mesh.vertices = vtx_pos
175 | 
176 |     return mesh
177 | 
178 | 
179 | class MeshSimplifier:
180 |     def __init__(self, executable: str = None):
181 |         if executable is None:
182 |             CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
183 |             executable = os.path.join(CURRENT_DIR, "mesh_simplifier.bin")
184 |         self.executable = executable
185 | 
186 |     @synchronize_timer('MeshSimplifier')
187 |     def __call__(
188 |         self,
189 |         mesh: Union[trimesh.Trimesh],
190 |     ) -> Union[trimesh.Trimesh]:
191 |         with tempfile.NamedTemporaryFile(suffix='.obj', delete=False) as temp_input:
192 |             with tempfile.NamedTemporaryFile(suffix='.obj', delete=False) as temp_output:
193 |                 mesh.export(temp_input.name)
194 |                 os.system(f'{self.executable} {temp_input.name} {temp_output.name}')
195 |                 ms = trimesh.load(temp_output.name, process=False)
196 |                 if isinstance(ms, trimesh.Scene):
197 |                     combined_mesh = trimesh.Trimesh()
198 |                     for geom in ms.geometry.values():
199 |                         combined_mesh = trimesh.util.concatenate([combined_mesh, geom])
200 |                     ms = combined_mesh
201 |                 ms = mesh_normalize(ms)
202 |                 return ms
203 | 


--------------------------------------------------------------------------------
/hy3dshape/hy3dshape/models/denoisers/moe_layers.py:
--------------------------------------------------------------------------------
  1 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
  2 | # except for the third-party components listed below.
  3 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined
  4 | # in the repsective licenses of these third-party components.
  5 | # Users must comply with all terms and conditions of original licenses of these third-party
  6 | # components and must ensure that the usage of the third party components adheres to
  7 | # all relevant laws and regulations.
  8 | 
  9 | # For avoidance of doubts, Hunyuan 3D means the large language models and
 10 | # their software and algorithms, including trained model weights, parameters (including
 11 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
 12 | # fine-tuning enabling code and other elements of the foregoing made publicly available
 13 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
 14 | 
 15 | import torch
 16 | import torch.nn as nn
 17 | import numpy as np
 18 | import math
 19 | from timm.models.vision_transformer import PatchEmbed, Attention, Mlp
 20 | 
 21 | import torch.nn.functional as F
 22 | from diffusers.models.attention import FeedForward
 23 | 
 24 | class AddAuxiliaryLoss(torch.autograd.Function):
 25 |     """
 26 |     The trick function of adding auxiliary (aux) loss, 
 27 |     which includes the gradient of the aux loss during backpropagation.
 28 |     """
 29 |     @staticmethod
 30 |     def forward(ctx, x, loss):
 31 |         assert loss.numel() == 1
 32 |         ctx.dtype = loss.dtype
 33 |         ctx.required_aux_loss = loss.requires_grad
 34 |         return x
 35 | 
 36 |     @staticmethod
 37 |     def backward(ctx, grad_output):
 38 |         grad_loss = None
 39 |         if ctx.required_aux_loss:
 40 |             grad_loss = torch.ones(1, dtype=ctx.dtype, device=grad_output.device)
 41 |         return grad_output, grad_loss
 42 | 
 43 | class MoEGate(nn.Module):
 44 |     def __init__(self, embed_dim, num_experts=16, num_experts_per_tok=2, aux_loss_alpha=0.01):
 45 |         super().__init__()
 46 |         self.top_k = num_experts_per_tok
 47 |         self.n_routed_experts = num_experts
 48 | 
 49 |         self.scoring_func = 'softmax'
 50 |         self.alpha = aux_loss_alpha
 51 |         self.seq_aux = False
 52 | 
 53 |         # topk selection algorithm
 54 |         self.norm_topk_prob = False
 55 |         self.gating_dim = embed_dim
 56 |         self.weight = nn.Parameter(torch.empty((self.n_routed_experts, self.gating_dim)))
 57 |         self.reset_parameters()
 58 | 
 59 |     def reset_parameters(self) -> None:
 60 |         import torch.nn.init  as init
 61 |         init.kaiming_uniform_(self.weight, a=math.sqrt(5))
 62 |     
 63 |     def forward(self, hidden_states):
 64 |         bsz, seq_len, h = hidden_states.shape    
 65 |         # print(bsz, seq_len, h)    
 66 |         ### compute gating score
 67 |         hidden_states = hidden_states.view(-1, h)
 68 |         logits = F.linear(hidden_states, self.weight, None)
 69 |         if self.scoring_func == 'softmax':
 70 |             scores = logits.softmax(dim=-1)
 71 |         else:
 72 |             raise NotImplementedError(f'insupportable scoring function for MoE gating: {self.scoring_func}')
 73 |         
 74 |         ### select top-k experts
 75 |         topk_weight, topk_idx = torch.topk(scores, k=self.top_k, dim=-1, sorted=False)
 76 |         
 77 |         ### norm gate to sum 1
 78 |         if self.top_k > 1 and self.norm_topk_prob:
 79 |             denominator = topk_weight.sum(dim=-1, keepdim=True) + 1e-20
 80 |             topk_weight = topk_weight / denominator
 81 | 
 82 |         ### expert-level computation auxiliary loss
 83 |         if self.training and self.alpha > 0.0:
 84 |             scores_for_aux = scores
 85 |             aux_topk = self.top_k
 86 |             # always compute aux loss based on the naive greedy topk method
 87 |             topk_idx_for_aux_loss = topk_idx.view(bsz, -1)
 88 |             if self.seq_aux:
 89 |                 scores_for_seq_aux = scores_for_aux.view(bsz, seq_len, -1)
 90 |                 ce = torch.zeros(bsz, self.n_routed_experts, device=hidden_states.device)
 91 |                 ce.scatter_add_(
 92 |                     1, 
 93 |                     topk_idx_for_aux_loss, 
 94 |                     torch.ones(
 95 |                         bsz, seq_len * aux_topk,
 96 |                         device=hidden_states.device
 97 |                     )
 98 |                 ).div_(seq_len * aux_topk / self.n_routed_experts)
 99 |                 aux_loss = (ce * scores_for_seq_aux.mean(dim = 1)).sum(dim = 1).mean()
100 |                 aux_loss = aux_loss * self.alpha
101 |             else:
102 |                 mask_ce = F.one_hot(topk_idx_for_aux_loss.view(-1),
103 |                                     num_classes=self.n_routed_experts)
104 |                 ce = mask_ce.float().mean(0)
105 |                 Pi = scores_for_aux.mean(0)
106 |                 fi = ce * self.n_routed_experts
107 |                 aux_loss = (Pi * fi).sum() * self.alpha
108 |         else:
109 |             aux_loss = None
110 |         return topk_idx, topk_weight, aux_loss
111 | 
112 | class MoEBlock(nn.Module):
113 |     def __init__(self, dim, num_experts=8, moe_top_k=2,
114 |                     activation_fn = "gelu", dropout=0.0, final_dropout = False, 
115 |                     ff_inner_dim = None, ff_bias = True):
116 |         super().__init__()
117 |         self.moe_top_k = moe_top_k
118 |         self.experts = nn.ModuleList([
119 |                 FeedForward(dim,dropout=dropout, 
120 |                             activation_fn=activation_fn,  
121 |                             final_dropout=final_dropout,  
122 |                             inner_dim=ff_inner_dim,  
123 |                             bias=ff_bias)
124 |         for i in range(num_experts)])
125 |         self.gate = MoEGate(embed_dim=dim, num_experts=num_experts, num_experts_per_tok=moe_top_k)
126 | 
127 |         self.shared_experts = FeedForward(dim,dropout=dropout, activation_fn=activation_fn,  
128 |                                           final_dropout=final_dropout,  inner_dim=ff_inner_dim,  
129 |                                           bias=ff_bias)
130 | 
131 |     def initialize_weight(self):
132 |         pass
133 |     
134 |     def forward(self, hidden_states):
135 |         identity = hidden_states
136 |         orig_shape = hidden_states.shape
137 |         topk_idx, topk_weight, aux_loss = self.gate(hidden_states) 
138 | 
139 |         hidden_states = hidden_states.view(-1, hidden_states.shape[-1])
140 |         flat_topk_idx = topk_idx.view(-1)
141 |         if self.training:
142 |             hidden_states = hidden_states.repeat_interleave(self.moe_top_k, dim=0)
143 |             y = torch.empty_like(hidden_states, dtype=hidden_states.dtype)
144 |             for i, expert in enumerate(self.experts): 
145 |                 tmp = expert(hidden_states[flat_topk_idx == i])
146 |                 y[flat_topk_idx == i] = tmp.to(hidden_states.dtype)
147 |             y = (y.view(*topk_weight.shape, -1) * topk_weight.unsqueeze(-1)).sum(dim=1)
148 |             y =  y.view(*orig_shape)
149 |             y = AddAuxiliaryLoss.apply(y, aux_loss)
150 |         else:
151 |             y = self.moe_infer(hidden_states, flat_topk_idx, topk_weight.view(-1, 1)).view(*orig_shape)
152 |         y = y + self.shared_experts(identity)
153 |         return y
154 |     
155 | 
156 |     @torch.no_grad()
157 |     def moe_infer(self, x, flat_expert_indices, flat_expert_weights):
158 |         expert_cache = torch.zeros_like(x) 
159 |         idxs = flat_expert_indices.argsort()
160 |         tokens_per_expert = flat_expert_indices.bincount().cpu().numpy().cumsum(0)
161 |         token_idxs = idxs // self.moe_top_k 
162 |         for i, end_idx in enumerate(tokens_per_expert):
163 |             start_idx = 0 if i == 0 else tokens_per_expert[i-1]
164 |             if start_idx == end_idx:
165 |                 continue
166 |             expert = self.experts[i]
167 |             exp_token_idx = token_idxs[start_idx:end_idx]
168 |             expert_tokens = x[exp_token_idx]
169 |             expert_out = expert(expert_tokens)
170 |             expert_out.mul_(flat_expert_weights[idxs[start_idx:end_idx]]) 
171 |             
172 |             # for fp16 and other dtype
173 |             expert_cache = expert_cache.to(expert_out.dtype)
174 |             expert_cache.scatter_reduce_(0, exp_token_idx.view(-1, 1).repeat(1, x.shape[-1]),
175 |                                          expert_out, 
176 |                                          reduce='sum')
177 |         return expert_cache
178 | 


--------------------------------------------------------------------------------