├── .gitignore
├── hy3dpaint
├── DifferentiableRenderer
│ ├── __init__.py
│ ├── compile_mesh_painter.sh
│ ├── dist
│ │ ├── mesh_inpaint_processor-0.0.0-cp310-cp310-win_amd64.whl
│ │ ├── mesh_inpaint_processor-0.0.0-cp311-cp311-win_amd64.whl
│ │ ├── mesh_inpaint_processor-0.0.0-cp312-cp312-win_amd64.whl
│ │ └── mesh_inpaint_processor-0.0.0-cp311-cp311-linux_x86_64.whl
│ ├── setup.py
│ └── camera_utils.py
├── custom_rasterizer
│ ├── lib
│ │ └── custom_rasterizer_kernel
│ │ │ ├── __init__.py
│ │ │ ├── rasterizer.h
│ │ │ ├── rasterizer_gpu.cu
│ │ │ └── rasterizer.cpp
│ ├── custom_rasterizer
│ │ ├── __init__.py
│ │ └── render.py
│ ├── dist
│ │ ├── custom_rasterizer-0.1-cp310-cp310-win_amd64.whl
│ │ ├── custom_rasterizer-0.1-cp311-cp311-win_amd64.whl
│ │ ├── custom_rasterizer-0.1-cp312-cp312-win_amd64.whl
│ │ └── custom_rasterizer-0.1-cp311-cp311-linux_x86_64.whl
│ └── setup.py
├── 4.0
│ └── python
│ │ └── lib
│ │ └── site-packages
│ │ └── extern_draco.dll
├── src
│ ├── data
│ │ ├── dataloader
│ │ │ ├── pbr_data_format.txt
│ │ │ └── objaverse_loader_forTexturePBR.py
│ │ ├── __init__.py
│ │ └── objaverse_hunyuan.py
│ ├── __init__.py
│ └── utils
│ │ ├── __init__.py
│ │ └── train_util.py
├── utils
│ ├── __init__.py
│ ├── uvwrap_utils.py
│ ├── simplify_mesh_utils.py
│ ├── image_super_utils.py
│ ├── torchvision_fix.py
│ ├── multiview_utils.py
│ └── pipeline_utils.py
├── cfgs
│ └── hunyuan-paint-pbr.yaml
├── hunyuanpaintpbr
│ └── __init__.py
├── demo.py
├── README.md
└── convert_utils.py
├── hy3dshape
├── hy3dshape
│ ├── utils
│ │ ├── trainings
│ │ │ ├── __init__.py
│ │ │ ├── lr_scheduler.py
│ │ │ ├── peft.py
│ │ │ └── mesh.py
│ │ ├── visualizers
│ │ │ ├── __init__.py
│ │ │ ├── html_util.py
│ │ │ └── color_util.py
│ │ ├── __init__.py
│ │ ├── ema.py
│ │ ├── misc.py
│ │ └── utils.py
│ ├── models
│ │ ├── denoisers
│ │ │ ├── __init__.py
│ │ │ └── moe_layers.py
│ │ ├── autoencoders
│ │ │ ├── __init__.py
│ │ │ ├── attention_processors.py
│ │ │ └── surface_extractors.py
│ │ ├── __init__.py
│ │ └── diffusion
│ │ │ └── transport
│ │ │ ├── utils.py
│ │ │ ├── __init__.py
│ │ │ └── integrators.py
│ ├── __init__.py
│ ├── rembg.py
│ ├── meshlib.py
│ ├── data
│ │ └── utils.py
│ ├── preprocessors.py
│ └── postprocessors.py
├── minimal_demo.py
├── minimal_vae_demo.py
└── configs
│ ├── hunyuan3ddit-mini-overfitting-flowmatching-dinog518-bf16-lr1e4-512.yaml
│ ├── hunyuan3ddit-full-params-finetuning-flowmatching-dinog518-bf16-lr1e5-512.yaml
│ ├── hunyuandit-finetuning-flowmatching-dinog518-bf16-lr1e5-4096.yaml
│ ├── hunyuandit-mini-overfitting-flowmatching-dinog518-bf16-lr1e4-4096.yaml
│ └── hunyuandit-mini-overfitting-flowmatching-dinog518-bf16-lr1e4-512.yaml
├── __init__.py
├── requirements.txt
├── configs
├── dit_config.yaml
├── dit_config_mini.yaml
└── dit_config_2_1.yaml
├── workflow_examples
└── Batch_Generator.json
└── README.md
/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__
--------------------------------------------------------------------------------
/hy3dpaint/DifferentiableRenderer/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/hy3dpaint/custom_rasterizer/lib/custom_rasterizer_kernel/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/hy3dshape/hy3dshape/utils/trainings/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
--------------------------------------------------------------------------------
/hy3dshape/hy3dshape/utils/visualizers/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
--------------------------------------------------------------------------------
/hy3dpaint/custom_rasterizer/custom_rasterizer/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | from .render import rasterize, interpolate
3 | """
4 | from .render import *
5 |
--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
1 | from .nodes import NODE_CLASS_MAPPINGS, NODE_DISPLAY_NAME_MAPPINGS
2 |
3 | __all__ = ["NODE_CLASS_MAPPINGS", "NODE_DISPLAY_NAME_MAPPINGS"]
--------------------------------------------------------------------------------
/hy3dpaint/4.0/python/lib/site-packages/extern_draco.dll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/visualbruno/ComfyUI-Hunyuan3d-2-1/HEAD/hy3dpaint/4.0/python/lib/site-packages/extern_draco.dll
--------------------------------------------------------------------------------
/hy3dpaint/DifferentiableRenderer/compile_mesh_painter.sh:
--------------------------------------------------------------------------------
1 | c++ -O3 -Wall -shared -std=c++11 -fPIC `python -m pybind11 --includes` mesh_inpaint_processor.cpp -o mesh_inpaint_processor`python3-config --extension-suffix`
--------------------------------------------------------------------------------
/hy3dpaint/custom_rasterizer/dist/custom_rasterizer-0.1-cp310-cp310-win_amd64.whl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/visualbruno/ComfyUI-Hunyuan3d-2-1/HEAD/hy3dpaint/custom_rasterizer/dist/custom_rasterizer-0.1-cp310-cp310-win_amd64.whl
--------------------------------------------------------------------------------
/hy3dpaint/custom_rasterizer/dist/custom_rasterizer-0.1-cp311-cp311-win_amd64.whl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/visualbruno/ComfyUI-Hunyuan3d-2-1/HEAD/hy3dpaint/custom_rasterizer/dist/custom_rasterizer-0.1-cp311-cp311-win_amd64.whl
--------------------------------------------------------------------------------
/hy3dpaint/custom_rasterizer/dist/custom_rasterizer-0.1-cp312-cp312-win_amd64.whl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/visualbruno/ComfyUI-Hunyuan3d-2-1/HEAD/hy3dpaint/custom_rasterizer/dist/custom_rasterizer-0.1-cp312-cp312-win_amd64.whl
--------------------------------------------------------------------------------
/hy3dpaint/custom_rasterizer/dist/custom_rasterizer-0.1-cp311-cp311-linux_x86_64.whl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/visualbruno/ComfyUI-Hunyuan3d-2-1/HEAD/hy3dpaint/custom_rasterizer/dist/custom_rasterizer-0.1-cp311-cp311-linux_x86_64.whl
--------------------------------------------------------------------------------
/hy3dshape/hy3dshape/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | from .misc import get_config_from_file
4 | from .misc import instantiate_from_config
5 | from .utils import get_logger, logger, synchronize_timer, smart_load_model
6 |
--------------------------------------------------------------------------------
/hy3dpaint/DifferentiableRenderer/dist/mesh_inpaint_processor-0.0.0-cp310-cp310-win_amd64.whl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/visualbruno/ComfyUI-Hunyuan3d-2-1/HEAD/hy3dpaint/DifferentiableRenderer/dist/mesh_inpaint_processor-0.0.0-cp310-cp310-win_amd64.whl
--------------------------------------------------------------------------------
/hy3dpaint/DifferentiableRenderer/dist/mesh_inpaint_processor-0.0.0-cp311-cp311-win_amd64.whl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/visualbruno/ComfyUI-Hunyuan3d-2-1/HEAD/hy3dpaint/DifferentiableRenderer/dist/mesh_inpaint_processor-0.0.0-cp311-cp311-win_amd64.whl
--------------------------------------------------------------------------------
/hy3dpaint/DifferentiableRenderer/dist/mesh_inpaint_processor-0.0.0-cp312-cp312-win_amd64.whl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/visualbruno/ComfyUI-Hunyuan3d-2-1/HEAD/hy3dpaint/DifferentiableRenderer/dist/mesh_inpaint_processor-0.0.0-cp312-cp312-win_amd64.whl
--------------------------------------------------------------------------------
/hy3dpaint/DifferentiableRenderer/dist/mesh_inpaint_processor-0.0.0-cp311-cp311-linux_x86_64.whl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/visualbruno/ComfyUI-Hunyuan3d-2-1/HEAD/hy3dpaint/DifferentiableRenderer/dist/mesh_inpaint_processor-0.0.0-cp311-cp311-linux_x86_64.whl
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | trimesh
2 | pymeshlab
3 | pygltflib
4 | xatlas
5 | open3d
6 | omegaconf
7 | pyyaml
8 | configargparse
9 | transformers
10 | diffusers
11 | accelerate
12 | pytorch-lightning
13 | opencv-python
14 | huggingface-hub
15 | safetensors
16 | scikit-image
17 | pybind11
18 | timm
19 |
20 | meshlib
21 |
--------------------------------------------------------------------------------
/hy3dpaint/src/data/dataloader/pbr_data_format.txt:
--------------------------------------------------------------------------------
1 | +-----------------+----------------------------------+
2 | | Key | Value |
3 | +-----------------+----------------------------------+
4 | | images_cond | torch.Size([2, 2, 3, 512, 512]) |
5 | | images_albedo | torch.Size([2, 6, 3, 512, 512]) |
6 | | images_mr | torch.Size([2, 6, 3, 512, 512]) |
7 | | images_normal | torch.Size([2, 6, 3, 512, 512]) |
8 | | images_position | torch.Size([2, 6, 3, 512, 512]) |
9 | | caption | ['high quality', 'high quality'] |
10 | +-----------------+----------------------------------+
--------------------------------------------------------------------------------
/hy3dpaint/src/__init__.py:
--------------------------------------------------------------------------------
1 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
2 | # except for the third-party components listed below.
3 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined
4 | # in the repsective licenses of these third-party components.
5 | # Users must comply with all terms and conditions of original licenses of these third-party
6 | # components and must ensure that the usage of the third party components adheres to
7 | # all relevant laws and regulations.
8 |
9 | # For avoidance of doubts, Hunyuan 3D means the large language models and
10 | # their software and algorithms, including trained model weights, parameters (including
11 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
12 | # fine-tuning enabling code and other elements of the foregoing made publicly available
13 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
14 |
--------------------------------------------------------------------------------
/hy3dpaint/src/data/__init__.py:
--------------------------------------------------------------------------------
1 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
2 | # except for the third-party components listed below.
3 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined
4 | # in the repsective licenses of these third-party components.
5 | # Users must comply with all terms and conditions of original licenses of these third-party
6 | # components and must ensure that the usage of the third party components adheres to
7 | # all relevant laws and regulations.
8 |
9 | # For avoidance of doubts, Hunyuan 3D means the large language models and
10 | # their software and algorithms, including trained model weights, parameters (including
11 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
12 | # fine-tuning enabling code and other elements of the foregoing made publicly available
13 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
14 |
--------------------------------------------------------------------------------
/hy3dpaint/src/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
2 | # except for the third-party components listed below.
3 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined
4 | # in the repsective licenses of these third-party components.
5 | # Users must comply with all terms and conditions of original licenses of these third-party
6 | # components and must ensure that the usage of the third party components adheres to
7 | # all relevant laws and regulations.
8 |
9 | # For avoidance of doubts, Hunyuan 3D means the large language models and
10 | # their software and algorithms, including trained model weights, parameters (including
11 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
12 | # fine-tuning enabling code and other elements of the foregoing made publicly available
13 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
14 |
--------------------------------------------------------------------------------
/hy3dpaint/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
2 | # except for the third-party components listed below.
3 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined
4 | # in the repsective licenses of these third-party components.
5 | # Users must comply with all terms and conditions of original licenses of these third-party
6 | # components and must ensure that the usage of the third party components adheres to
7 | # all relevant laws and regulations.
8 |
9 | # For avoidance of doubts, Hunyuan 3D means the large language models and
10 | # their software and algorithms, including trained model weights, parameters (including
11 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
12 | # fine-tuning enabling code and other elements of the foregoing made publicly available
13 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
14 |
--------------------------------------------------------------------------------
/hy3dshape/hy3dshape/models/denoisers/__init__.py:
--------------------------------------------------------------------------------
1 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
2 | # except for the third-party components listed below.
3 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined
4 | # in the repsective licenses of these third-party components.
5 | # Users must comply with all terms and conditions of original licenses of these third-party
6 | # components and must ensure that the usage of the third party components adheres to
7 | # all relevant laws and regulations.
8 |
9 | # For avoidance of doubts, Hunyuan 3D means the large language models and
10 | # their software and algorithms, including trained model weights, parameters (including
11 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
12 | # fine-tuning enabling code and other elements of the foregoing made publicly available
13 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
14 |
15 | from .hunyuan3ddit import Hunyuan3DDiT
16 |
--------------------------------------------------------------------------------
/hy3dpaint/DifferentiableRenderer/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup, Extension
2 | from setuptools.command.build_ext import build_ext
3 | import sys
4 | import os
5 | import pybind11
6 | class BuildExt(build_ext):
7 | def build_extensions(self):
8 | if sys.platform == 'win32':
9 | # Windows-specific compiler flags
10 | for ext in self.extensions:
11 | ext.extra_compile_args = ['/O2', '/Wall']
12 | else:
13 | # Linux/Mac flags
14 | for ext in self.extensions:
15 | ext.extra_compile_args = ['-O3', '-Wall', '-fPIC']
16 | build_ext.build_extensions(self)
17 |
18 | setup(
19 | name="mesh_inpaint_processor",
20 | ext_modules=[
21 | Extension(
22 | "mesh_inpaint_processor",
23 | ["mesh_inpaint_processor.cpp"],
24 | include_dirs=[
25 | pybind11.get_include(),
26 | pybind11.get_include(user=True)
27 | ],
28 | language='c++'
29 | ),
30 | ],
31 | cmdclass={'build_ext': BuildExt},
32 | )
--------------------------------------------------------------------------------
/hy3dshape/hy3dshape/__init__.py:
--------------------------------------------------------------------------------
1 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
2 | # except for the third-party components listed below.
3 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined
4 | # in the repsective licenses of these third-party components.
5 | # Users must comply with all terms and conditions of original licenses of these third-party
6 | # components and must ensure that the usage of the third party components adheres to
7 | # all relevant laws and regulations.
8 |
9 | # For avoidance of doubts, Hunyuan 3D means the large language models and
10 | # their software and algorithms, including trained model weights, parameters (including
11 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
12 | # fine-tuning enabling code and other elements of the foregoing made publicly available
13 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
14 |
15 | from .pipelines import Hunyuan3DDiTPipeline, Hunyuan3DDiTFlowMatchingPipeline
16 | from .postprocessors import FaceReducer, FloaterRemover, DegenerateFaceRemover, MeshSimplifier
17 | from .preprocessors import ImageProcessorV2, IMAGE_PROCESSORS, DEFAULT_IMAGEPROCESSOR
18 |
--------------------------------------------------------------------------------
/hy3dshape/hy3dshape/rembg.py:
--------------------------------------------------------------------------------
1 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
2 | # except for the third-party components listed below.
3 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined
4 | # in the repsective licenses of these third-party components.
5 | # Users must comply with all terms and conditions of original licenses of these third-party
6 | # components and must ensure that the usage of the third party components adheres to
7 | # all relevant laws and regulations.
8 |
9 | # For avoidance of doubts, Hunyuan 3D means the large language models and
10 | # their software and algorithms, including trained model weights, parameters (including
11 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
12 | # fine-tuning enabling code and other elements of the foregoing made publicly available
13 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
14 |
15 | from PIL import Image
16 | from rembg import remove, new_session
17 |
18 |
19 | class BackgroundRemover():
20 | def __init__(self):
21 | self.session = new_session()
22 |
23 | def __call__(self, image: Image.Image):
24 | output = remove(image, session=self.session, bgcolor=[255, 255, 255, 0])
25 | return output
26 |
--------------------------------------------------------------------------------
/hy3dshape/hy3dshape/models/autoencoders/__init__.py:
--------------------------------------------------------------------------------
1 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
2 | # except for the third-party components listed below.
3 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined
4 | # in the repsective licenses of these third-party components.
5 | # Users must comply with all terms and conditions of original licenses of these third-party
6 | # components and must ensure that the usage of the third party components adheres to
7 | # all relevant laws and regulations.
8 |
9 | # For avoidance of doubts, Hunyuan 3D means the large language models and
10 | # their software and algorithms, including trained model weights, parameters (including
11 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
12 | # fine-tuning enabling code and other elements of the foregoing made publicly available
13 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
14 |
15 | from .attention_blocks import CrossAttentionDecoder
16 | from .attention_processors import FlashVDMCrossAttentionProcessor, CrossAttentionProcessor, \
17 | FlashVDMTopMCrossAttentionProcessor
18 | from .model import ShapeVAE, VectsetVAE
19 | from .surface_extractors import SurfaceExtractors, MCSurfaceExtractor, DMCSurfaceExtractor, Latent2MeshOutput
20 | from .volume_decoders import HierarchicalVolumeDecoding, FlashVDMVolumeDecoding, VanillaVolumeDecoder
21 |
--------------------------------------------------------------------------------
/hy3dpaint/cfgs/hunyuan-paint-pbr.yaml:
--------------------------------------------------------------------------------
1 | model:
2 | base_learning_rate: 5.0e-05
3 | target: hunyuanpaintpbr.model.HunyuanPaint
4 | params:
5 | num_view: 6
6 | view_size: 512
7 | drop_cond_prob: 0.1
8 |
9 | noise_in_channels: 12
10 |
11 | stable_diffusion_config:
12 | pretrained_model_name_or_path: stabilityai/stable-diffusion-2-1
13 | custom_pipeline: ./hunyuanpaintpbr
14 |
15 |
16 | data:
17 | target: src.data.objaverse_hunyuan.DataModuleFromConfig
18 | params:
19 | batch_size: 1
20 | num_workers: 4
21 | train:
22 | -
23 | target: src.data.dataloader.objaverse_loader_forTexturePBR.TextureDataset
24 | params:
25 | num_view: 6
26 | json_path: train_examples/examples.json
27 | validation:
28 | -
29 | target: src.data.dataloader.objaverse_loader_forTexturePBR.TextureDataset
30 | params:
31 | num_view: 6
32 | json_path: train_examples/examples.json
33 |
34 | lightning:
35 | modelcheckpoint:
36 | params:
37 | every_n_train_steps: 10000
38 | save_top_k: -1
39 | save_last: true
40 | callbacks: {}
41 |
42 | trainer:
43 | benchmark: true
44 | max_epochs: -1
45 | gradient_clip_val: 1.0
46 | val_check_interval: 1000
47 | num_sanity_val_steps: 0
48 | accumulate_grad_batches: 1
49 | check_val_every_n_epoch: null # if not set this, validation does not run
50 |
51 | init_control_from:
52 | resume_from:
53 |
--------------------------------------------------------------------------------
/hy3dpaint/utils/uvwrap_utils.py:
--------------------------------------------------------------------------------
1 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
2 | # except for the third-party components listed below.
3 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined
4 | # in the repsective licenses of these third-party components.
5 | # Users must comply with all terms and conditions of original licenses of these third-party
6 | # components and must ensure that the usage of the third party components adheres to
7 | # all relevant laws and regulations.
8 |
9 | # For avoidance of doubts, Hunyuan 3D means the large language models and
10 | # their software and algorithms, including trained model weights, parameters (including
11 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
12 | # fine-tuning enabling code and other elements of the foregoing made publicly available
13 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
14 |
15 | import trimesh
16 | import xatlas
17 |
18 |
19 | def mesh_uv_wrap(mesh):
20 | if isinstance(mesh, trimesh.Scene):
21 | mesh = mesh.dump(concatenate=True)
22 |
23 | if len(mesh.faces) > 500000000:
24 | raise ValueError("The mesh has more than 500,000,000 faces, which is not supported.")
25 |
26 | vmapping, indices, uvs = xatlas.parametrize(mesh.vertices, mesh.faces)
27 |
28 | mesh.vertices = mesh.vertices[vmapping]
29 | mesh.faces = indices
30 | mesh.visual.uv = uvs
31 |
32 | return mesh
33 |
--------------------------------------------------------------------------------
/hy3dshape/minimal_demo.py:
--------------------------------------------------------------------------------
1 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
2 | # except for the third-party components listed below.
3 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined
4 | # in the repsective licenses of these third-party components.
5 | # Users must comply with all terms and conditions of original licenses of these third-party
6 | # components and must ensure that the usage of the third party components adheres to
7 | # all relevant laws and regulations.
8 |
9 | # For avoidance of doubts, Hunyuan 3D means the large language models and
10 | # their software and algorithms, including trained model weights, parameters (including
11 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
12 | # fine-tuning enabling code and other elements of the foregoing made publicly available
13 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
14 |
15 | from PIL import Image
16 |
17 | from hy3dshape.rembg import BackgroundRemover
18 | from hy3dshape.pipelines import Hunyuan3DDiTFlowMatchingPipeline
19 |
20 | model_path = 'tencent/Hunyuan3D-2.1'
21 | pipeline_shapegen = Hunyuan3DDiTFlowMatchingPipeline.from_pretrained(model_path)
22 |
23 | image_path = 'demos/demo.png'
24 | image = Image.open(image_path).convert("RGBA")
25 | if image.mode == 'RGB':
26 | rembg = BackgroundRemover()
27 | image = rembg(image)
28 |
29 | mesh = pipeline_shapegen(image=image)[0]
30 | mesh.export('demo.glb')
31 |
--------------------------------------------------------------------------------
/hy3dpaint/hunyuanpaintpbr/__init__.py:
--------------------------------------------------------------------------------
1 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
2 | # except for the third-party components listed below.
3 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined
4 | # in the repsective licenses of these third-party components.
5 | # Users must comply with all terms and conditions of original licenses of these third-party
6 | # components and must ensure that the usage of the third party components adheres to
7 | # all relevant laws and regulations.
8 |
9 | # For avoidance of doubts, Hunyuan 3D means the large language models and
10 | # their software and algorithms, including trained model weights, parameters (including
11 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
12 | # fine-tuning enabling code and other elements of the foregoing made publicly available
13 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
14 |
15 | from .pipeline import HunyuanPaintPipeline
16 | from .unet.model import HunyuanPaint
17 | from .unet.modules import (
18 | Dino_v2,
19 | Basic2p5DTransformerBlock,
20 | ImageProjModel,
21 | UNet2p5DConditionModel,
22 | )
23 | from .unet.attn_processor import (
24 | PoseRoPEAttnProcessor2_0,
25 | SelfAttnProcessor2_0,
26 | RefAttnProcessor2_0,
27 | )
28 |
29 | __all__ = [
30 | 'HunyuanPaintPipeline',
31 | 'HunyuanPaint',
32 | 'Dino_v2',
33 | 'Basic2p5DTransformerBlock',
34 | 'ImageProjModel',
35 | 'UNet2p5DConditionModel',
36 | 'PoseRoPEAttnProcessor2_0',
37 | 'SelfAttnProcessor2_0',
38 | 'RefAttnProcessor2_0',
39 | ]
40 |
--------------------------------------------------------------------------------
/hy3dpaint/custom_rasterizer/custom_rasterizer/render.py:
--------------------------------------------------------------------------------
1 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
2 | # except for the third-party components listed below.
3 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined
4 | # in the repsective licenses of these third-party components.
5 | # Users must comply with all terms and conditions of original licenses of these third-party
6 | # components and must ensure that the usage of the third party components adheres to
7 | # all relevant laws and regulations.
8 |
9 | # For avoidance of doubts, Hunyuan 3D means the large language models and
10 | # their software and algorithms, including trained model weights, parameters (including
11 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
12 | # fine-tuning enabling code and other elements of the foregoing made publicly available
13 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
14 |
15 | import custom_rasterizer_kernel
16 | import torch
17 |
18 |
19 | def rasterize(pos, tri, resolution, clamp_depth=torch.zeros(0), use_depth_prior=0):
20 | assert pos.device == tri.device
21 | findices, barycentric = custom_rasterizer_kernel.rasterize_image(
22 | pos[0], tri, clamp_depth, resolution[1], resolution[0], 1e-6, use_depth_prior
23 | )
24 | return findices, barycentric
25 |
26 |
27 | def interpolate(col, findices, barycentric, tri):
28 | f = findices - 1 + (findices == 0)
29 | vcol = col[0, tri.long()[f.long()]]
30 | result = barycentric.view(*barycentric.shape, 1) * vcol
31 | result = torch.sum(result, axis=-2)
32 | return result.view(1, *result.shape)
33 |
--------------------------------------------------------------------------------
/hy3dpaint/custom_rasterizer/setup.py:
--------------------------------------------------------------------------------
1 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
2 | # except for the third-party components listed below.
3 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined
4 | # in the repsective licenses of these third-party components.
5 | # Users must comply with all terms and conditions of original licenses of these third-party
6 | # components and must ensure that the usage of the third party components adheres to
7 | # all relevant laws and regulations.
8 |
9 | # For avoidance of doubts, Hunyuan 3D means the large language models and
10 | # their software and algorithms, including trained model weights, parameters (including
11 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
12 | # fine-tuning enabling code and other elements of the foregoing made publicly available
13 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
14 |
15 | from setuptools import setup, find_packages
16 | import torch
17 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension, CppExtension
18 |
19 | # build custom rasterizer
20 |
21 | custom_rasterizer_module = CUDAExtension(
22 | "custom_rasterizer_kernel",
23 | [
24 | "lib/custom_rasterizer_kernel/rasterizer.cpp",
25 | "lib/custom_rasterizer_kernel/grid_neighbor.cpp",
26 | "lib/custom_rasterizer_kernel/rasterizer_gpu.cu",
27 | ],
28 | )
29 |
30 | setup(
31 | packages=find_packages(),
32 | version="0.1",
33 | name="custom_rasterizer",
34 | include_package_data=True,
35 | package_dir={"": "."},
36 | ext_modules=[
37 | custom_rasterizer_module,
38 | ],
39 | cmdclass={"build_ext": BuildExtension},
40 | )
41 |
--------------------------------------------------------------------------------
/hy3dshape/hy3dshape/models/__init__.py:
--------------------------------------------------------------------------------
1 | # Open Source Model Licensed under the Apache License Version 2.0
2 | # and Other Licenses of the Third-Party Components therein:
3 | # The below Model in this distribution may have been modified by THL A29 Limited
4 | # ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited.
5 |
6 | # Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved.
7 | # The below software and/or models in this distribution may have been
8 | # modified by THL A29 Limited ("Tencent Modifications").
9 | # All Tencent Modifications are Copyright (C) THL A29 Limited.
10 |
11 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
12 | # except for the third-party components listed below.
13 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined
14 | # in the repsective licenses of these third-party components.
15 | # Users must comply with all terms and conditions of original licenses of these third-party
16 | # components and must ensure that the usage of the third party components adheres to
17 | # all relevant laws and regulations.
18 |
19 | # For avoidance of doubts, Hunyuan 3D means the large language models and
20 | # their software and algorithms, including trained model weights, parameters (including
21 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
22 | # fine-tuning enabling code and other elements of the foregoing made publicly available
23 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
24 |
25 |
26 | from .autoencoders import ShapeVAE
27 | from .conditioner import DualImageEncoder, SingleImageEncoder, DinoImageEncoder, CLIPImageEncoder
28 | from .denoisers import Hunyuan3DDiT
29 |
--------------------------------------------------------------------------------
/hy3dpaint/demo.py:
--------------------------------------------------------------------------------
1 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
2 | # except for the third-party components listed below.
3 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined
4 | # in the repsective licenses of these third-party components.
5 | # Users must comply with all terms and conditions of original licenses of these third-party
6 | # components and must ensure that the usage of the third party components adheres to
7 | # all relevant laws and regulations.
8 |
9 | # For avoidance of doubts, Hunyuan 3D means the large language models and
10 | # their software and algorithms, including trained model weights, parameters (including
11 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
12 | # fine-tuning enabling code and other elements of the foregoing made publicly available
13 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
14 |
15 | from textureGenPipeline import Hunyuan3DPaintPipeline, Hunyuan3DPaintConfig
16 |
17 | try:
18 | from utils.torchvision_fix import apply_fix
19 |
20 | apply_fix()
21 | except ImportError:
22 | print("Warning: torchvision_fix module not found, proceeding without compatibility fix")
23 | except Exception as e:
24 | print(f"Warning: Failed to apply torchvision fix: {e}")
25 |
26 |
27 | if __name__ == "__main__":
28 |
29 | max_num_view = 6 # can be 6 to 9
30 | resolution = 768 # can be 768 or 512
31 |
32 | conf = Hunyuan3DPaintConfig(max_num_view, resolution)
33 | paint_pipeline = Hunyuan3DPaintPipeline(conf)
34 | output_mesh_path = paint_pipeline(mesh_path="./assets/FireElementalMonster.obj", image_path="./assets/FireElementalMonster.png")
35 | print(f"Output mesh path: {output_mesh_path}")
36 |
--------------------------------------------------------------------------------
/hy3dshape/hy3dshape/meshlib.py:
--------------------------------------------------------------------------------
1 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
2 | # except for the third-party components listed below.
3 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined
4 | # in the repsective licenses of these third-party components.
5 | # Users must comply with all terms and conditions of original licenses of these third-party
6 | # components and must ensure that the usage of the third party components adheres to
7 | # all relevant laws and regulations.
8 |
9 | # For avoidance of doubts, Hunyuan 3D means the large language models and
10 | # their software and algorithms, including trained model weights, parameters (including
11 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
12 | # fine-tuning enabling code and other elements of the foregoing made publicly available
13 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
14 |
15 | import numpy as np
16 | import meshlib.mrmeshnumpy as mrmeshnumpy
17 | import meshlib.mrmeshpy as mrmeshpy
18 | import trimesh
19 |
20 | def postprocessmesh(vertices: np.array, faces: np.array, settings):
21 | print('Generating Meshlib Mesh ...')
22 | mesh = mrmeshnumpy.meshFromFacesVerts(faces, vertices)
23 | print('Packing Optimally ...')
24 | mesh.packOptimally()
25 | print('Decimating ...')
26 | mrmeshpy.decimateMesh(mesh, settings)
27 |
28 | out_verts = mrmeshnumpy.getNumpyVerts(mesh)
29 | out_faces = mrmeshnumpy.getNumpyFaces(mesh.topology)
30 |
31 | mesh = trimesh.Trimesh(vertices=out_verts, faces=out_faces)
32 | print(f"Reduced faces, resulting in {mesh.vertices.shape[0]} vertices and {mesh.faces.shape[0]} faces")
33 |
34 | return mesh
35 |
36 |
37 |
--------------------------------------------------------------------------------
/hy3dpaint/utils/simplify_mesh_utils.py:
--------------------------------------------------------------------------------
1 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
2 | # except for the third-party components listed below.
3 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined
4 | # in the repsective licenses of these third-party components.
5 | # Users must comply with all terms and conditions of original licenses of these third-party
6 | # components and must ensure that the usage of the third party components adheres to
7 | # all relevant laws and regulations.
8 |
9 | # For avoidance of doubts, Hunyuan 3D means the large language models and
10 | # their software and algorithms, including trained model weights, parameters (including
11 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
12 | # fine-tuning enabling code and other elements of the foregoing made publicly available
13 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
14 |
15 | import trimesh
16 | import pymeshlab
17 |
18 |
19 | def remesh_mesh(mesh_path, remesh_path):
20 | mesh = mesh_simplify_trimesh(mesh_path, remesh_path)
21 |
22 |
23 | def mesh_simplify_trimesh(inputpath, outputpath, target_count=50000):
24 | # 先去除离散面
25 | ms = pymeshlab.MeshSet()
26 | if inputpath.endswith(".glb"):
27 | ms.load_new_mesh(inputpath, load_in_a_single_layer=True)
28 | else:
29 | ms.load_new_mesh(inputpath)
30 | ms.save_current_mesh(outputpath.replace(".glb", ".obj"), save_textures=False)
31 | # 调用减面函数
32 | courent = trimesh.load(outputpath.replace(".glb", ".obj"), force="mesh")
33 | face_num = courent.faces.shape[0]
34 |
35 | if face_num > target_count:
36 | courent = courent.simplify_quadric_decimation(target_count)
37 | courent.export(outputpath)
38 |
--------------------------------------------------------------------------------
/hy3dpaint/utils/image_super_utils.py:
--------------------------------------------------------------------------------
1 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
2 | # except for the third-party components listed below.
3 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined
4 | # in the repsective licenses of these third-party components.
5 | # Users must comply with all terms and conditions of original licenses of these third-party
6 | # components and must ensure that the usage of the third party components adheres to
7 | # all relevant laws and regulations.
8 |
9 | # For avoidance of doubts, Hunyuan 3D means the large language models and
10 | # their software and algorithms, including trained model weights, parameters (including
11 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
12 | # fine-tuning enabling code and other elements of the foregoing made publicly available
13 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
14 |
15 | import numpy as np
16 | from PIL import Image
17 |
18 |
19 | class imageSuperNet:
20 | def __init__(self, config) -> None:
21 | from realesrgan import RealESRGANer
22 | from basicsr.archs.rrdbnet_arch import RRDBNet
23 |
24 | model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=4)
25 | upsampler = RealESRGANer(
26 | scale=4,
27 | model_path=config.realesrgan_ckpt_path,
28 | dni_weight=None,
29 | model=model,
30 | tile=0,
31 | tile_pad=10,
32 | pre_pad=0,
33 | half=True,
34 | gpu_id=None,
35 | )
36 | self.upsampler = upsampler
37 |
38 | def __call__(self, image):
39 | output, _ = self.upsampler.enhance(np.array(image))
40 | output = Image.fromarray(output)
41 | return output
42 |
--------------------------------------------------------------------------------
/configs/dit_config.yaml:
--------------------------------------------------------------------------------
1 | model:
2 | target: .hy3dgen.shapegen.models.Hunyuan3DDiT
3 | params:
4 | in_channels: 64
5 | context_in_dim: 1536
6 | hidden_size: 1024
7 | mlp_ratio: 4.0
8 | num_heads: 16
9 | depth: 16
10 | depth_single_blocks: 32
11 | axes_dim: [ 64 ]
12 | theta: 10000
13 | qkv_bias: True
14 | guidance_embed: False
15 |
16 | vae:
17 | target: .hy3dgen.shapegen.models.ShapeVAE
18 | params:
19 | num_latents: 3072
20 | embed_dim: 64
21 | num_freqs: 8
22 | include_pi: false
23 | heads: 16
24 | width: 1024
25 | num_decoder_layers: 16
26 | qkv_bias: false
27 | qk_norm: true
28 | scale_factor: 0.9990943042622529
29 |
30 | conditioner:
31 | target: .hy3dgen.shapegen.models.SingleImageEncoder
32 | params:
33 | main_image_encoder:
34 | type: DinoImageEncoder # dino giant
35 | kwargs:
36 | config:
37 | attention_probs_dropout_prob: 0.0
38 | drop_path_rate: 0.0
39 | hidden_act: gelu
40 | hidden_dropout_prob: 0.0
41 | hidden_size: 1536
42 | image_size: 518
43 | initializer_range: 0.02
44 | layer_norm_eps: 1.e-6
45 | layerscale_value: 1.0
46 | mlp_ratio: 4
47 | model_type: dinov2
48 | num_attention_heads: 24
49 | num_channels: 3
50 | num_hidden_layers: 40
51 | patch_size: 14
52 | qkv_bias: true
53 | torch_dtype: float32
54 | use_swiglu_ffn: true
55 | image_size: 518
56 |
57 | scheduler:
58 | target: .hy3dgen.shapegen.schedulers.FlowMatchEulerDiscreteScheduler
59 | params:
60 | num_train_timesteps: 1000
61 |
62 | image_processor:
63 | target: .hy3dgen.shapegen.preprocessors.ImageProcessorV2
64 | params:
65 | size: 512
66 | border_ratio: 0.15
67 |
68 | pipeline:
69 | target: .hy3dgen.shapegen.pipelines.Hunyuan3DDiTFlowMatchingPipeline
70 |
--------------------------------------------------------------------------------
/configs/dit_config_mini.yaml:
--------------------------------------------------------------------------------
1 | model:
2 | target: .hy3dgen.shapegen.models.Hunyuan3DDiT
3 | params:
4 | in_channels: 64
5 | context_in_dim: 1536
6 | hidden_size: 1024
7 | mlp_ratio: 4.0
8 | num_heads: 16
9 | depth: 8
10 | depth_single_blocks: 16
11 | axes_dim: [ 64 ]
12 | theta: 10000
13 | qkv_bias: True
14 | guidance_embed: False
15 |
16 | vae:
17 | target: .hy3dgen.shapegen.models.ShapeVAE
18 | params:
19 | num_latents: 512
20 | embed_dim: 64
21 | num_freqs: 8
22 | include_pi: false
23 | heads: 16
24 | width: 1024
25 | num_decoder_layers: 16
26 | qkv_bias: false
27 | qk_norm: true
28 | scale_factor: 1.0188137142395404
29 |
30 | conditioner:
31 | target: .hy3dgen.shapegen.models.SingleImageEncoder
32 | params:
33 | main_image_encoder:
34 | type: DinoImageEncoder # dino giant
35 | kwargs:
36 | config:
37 | attention_probs_dropout_prob: 0.0
38 | drop_path_rate: 0.0
39 | hidden_act: gelu
40 | hidden_dropout_prob: 0.0
41 | hidden_size: 1536
42 | image_size: 518
43 | initializer_range: 0.02
44 | layer_norm_eps: 1.e-6
45 | layerscale_value: 1.0
46 | mlp_ratio: 4
47 | model_type: dinov2
48 | num_attention_heads: 24
49 | num_channels: 3
50 | num_hidden_layers: 40
51 | patch_size: 14
52 | qkv_bias: true
53 | torch_dtype: float32
54 | use_swiglu_ffn: true
55 | image_size: 518
56 |
57 | scheduler:
58 | target: .hy3dgen.shapegen.schedulers.FlowMatchEulerDiscreteScheduler
59 | params:
60 | num_train_timesteps: 1000
61 |
62 | image_processor:
63 | target: .hy3dgen.shapegen.preprocessors.ImageProcessorV2
64 | params:
65 | size: 512
66 | border_ratio: 0.15
67 |
68 | pipeline:
69 | target: .hy3dgen.shapegen.pipelines.Hunyuan3DDiTFlowMatchingPipeline
70 |
--------------------------------------------------------------------------------
/hy3dshape/minimal_vae_demo.py:
--------------------------------------------------------------------------------
1 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
2 | # except for the third-party components listed below.
3 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined
4 | # in the repsective licenses of these third-party components.
5 | # Users must comply with all terms and conditions of original licenses of these third-party
6 | # components and must ensure that the usage of the third party components adheres to
7 | # all relevant laws and regulations.
8 |
9 | # For avoidance of doubts, Hunyuan 3D means the large language models and
10 | # their software and algorithms, including trained model weights, parameters (including
11 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
12 | # fine-tuning enabling code and other elements of the foregoing made publicly available
13 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
14 |
15 | import torch
16 |
17 | from hy3dshape.surface_loaders import SharpEdgeSurfaceLoader
18 | from hy3dshape.models.autoencoders import ShapeVAE
19 | from hy3dshape.pipelines import export_to_trimesh
20 |
21 |
22 | vae = ShapeVAE.from_pretrained(
23 | 'tencent/Hunyuan3D-2.1',
24 | use_safetensors=False,
25 | variant='fp16',
26 | )
27 |
28 |
29 | loader = SharpEdgeSurfaceLoader(
30 | num_sharp_points=0,
31 | num_uniform_points=81920,
32 | )
33 | mesh_demo = 'demos/demo.glb'
34 | surface = loader(mesh_demo).to('cuda', dtype=torch.float16)
35 | print(surface.shape)
36 |
37 | latents = vae.encode(surface)
38 | latents = vae.decode(latents)
39 | mesh = vae.latents2mesh(
40 | latents,
41 | output_type='trimesh',
42 | bounds=1.01,
43 | mc_level=0.0,
44 | num_chunks=20000,
45 | octree_resolution=256,
46 | mc_algo='mc',
47 | enable_pbar=True
48 | )
49 |
50 | mesh = export_to_trimesh(mesh)[0]
51 | mesh.export('output.obj')
52 |
--------------------------------------------------------------------------------
/hy3dpaint/src/utils/train_util.py:
--------------------------------------------------------------------------------
1 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
2 | # except for the third-party components listed below.
3 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined
4 | # in the repsective licenses of these third-party components.
5 | # Users must comply with all terms and conditions of original licenses of these third-party
6 | # components and must ensure that the usage of the third party components adheres to
7 | # all relevant laws and regulations.
8 |
9 | # For avoidance of doubts, Hunyuan 3D means the large language models and
10 | # their software and algorithms, including trained model weights, parameters (including
11 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
12 | # fine-tuning enabling code and other elements of the foregoing made publicly available
13 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
14 |
15 | import importlib
16 |
17 |
18 | def count_params(model, verbose=False):
19 | total_params = sum(p.numel() for p in model.parameters())
20 | if verbose:
21 | print(f"{model.__class__.__name__} has {total_params*1.e-6:.2f} M params.")
22 | return total_params
23 |
24 |
25 | def instantiate_from_config(config):
26 | if not "target" in config:
27 | if config == "__is_first_stage__":
28 | return None
29 | elif config == "__is_unconditional__":
30 | return None
31 | raise KeyError("Expected key `target` to instantiate.")
32 | return get_obj_from_str(config["target"])(**config.get("params", dict()))
33 |
34 |
35 | def get_obj_from_str(string, reload=False):
36 | module, cls = string.rsplit(".", 1)
37 | if reload:
38 | module_imp = importlib.import_module(module)
39 | importlib.reload(module_imp)
40 | return getattr(importlib.import_module(module, package=None), cls)
41 |
--------------------------------------------------------------------------------
/hy3dshape/hy3dshape/utils/visualizers/html_util.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
4 | # except for the third-party components listed below.
5 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined
6 | # in the repsective licenses of these third-party components.
7 | # Users must comply with all terms and conditions of original licenses of these third-party
8 | # components and must ensure that the usage of the third party components adheres to
9 | # all relevant laws and regulations.
10 |
11 | # For avoidance of doubts, Hunyuan 3D means the large language models and
12 | # their software and algorithms, including trained model weights, parameters (including
13 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
14 | # fine-tuning enabling code and other elements of the foregoing made publicly available
15 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
16 |
17 | import io
18 | import base64
19 | import numpy as np
20 | from PIL import Image
21 |
22 |
23 | def to_html_frame(content):
24 |
25 | html_frame = f"""
26 |
27 |
28 | {content}
29 |
30 |
31 | """
32 |
33 | return html_frame
34 |
35 |
36 | def to_single_row_table(caption: str, content: str):
37 |
38 | table_html = f"""
39 |
40 | {caption}
41 |
42 | {content}
43 |
44 |
45 | """
46 |
47 | return table_html
48 |
49 |
50 | def to_image_embed_tag(image: np.ndarray):
51 |
52 | # Convert np.ndarray to bytes
53 | img = Image.fromarray(image)
54 | raw_bytes = io.BytesIO()
55 | img.save(raw_bytes, "PNG")
56 |
57 | # Encode bytes to base64
58 | image_base64 = base64.b64encode(raw_bytes.getvalue()).decode("utf-8")
59 |
60 | image_tag = f"""
61 |
62 | """
63 |
64 | return image_tag
65 |
--------------------------------------------------------------------------------
/hy3dpaint/custom_rasterizer/lib/custom_rasterizer_kernel/rasterizer.h:
--------------------------------------------------------------------------------
1 | #ifndef RASTERIZER_H_
2 | #define RASTERIZER_H_
3 |
4 | #include
5 | #include
6 | #include
7 | #include // For CUDA context
8 | #include
9 | #define INT64 uint64_t
10 | #define MAXINT 2147483647
11 |
12 | __host__ __device__ inline float calculateSignedArea2(float* a, float* b, float* c) {
13 | return ((c[0] - a[0]) * (b[1] - a[1]) - (b[0] - a[0]) * (c[1] - a[1]));
14 | }
15 |
16 | __host__ __device__ inline void calculateBarycentricCoordinate(float* a, float* b, float* c, float* p,
17 | float* barycentric)
18 | {
19 | float beta_tri = calculateSignedArea2(a, p, c);
20 | float gamma_tri = calculateSignedArea2(a, b, p);
21 | float area = calculateSignedArea2(a, b, c);
22 | if (area == 0) {
23 | barycentric[0] = -1.0;
24 | barycentric[1] = -1.0;
25 | barycentric[2] = -1.0;
26 | return;
27 | }
28 | float tri_inv = 1.0 / area;
29 | float beta = beta_tri * tri_inv;
30 | float gamma = gamma_tri * tri_inv;
31 | float alpha = 1.0 - beta - gamma;
32 | barycentric[0] = alpha;
33 | barycentric[1] = beta;
34 | barycentric[2] = gamma;
35 | }
36 |
37 | __host__ __device__ inline bool isBarycentricCoordInBounds(float* barycentricCoord) {
38 | return barycentricCoord[0] >= 0.0 && barycentricCoord[0] <= 1.0 &&
39 | barycentricCoord[1] >= 0.0 && barycentricCoord[1] <= 1.0 &&
40 | barycentricCoord[2] >= 0.0 && barycentricCoord[2] <= 1.0;
41 | }
42 |
43 | std::vector rasterize_image_gpu(torch::Tensor V, torch::Tensor F, torch::Tensor D,
44 | int width, int height, float occlusion_truncation, int use_depth_prior);
45 |
46 | std::vector> build_hierarchy(std::vector view_layer_positions, std::vector view_layer_normals, int num_level, int resolution);
47 |
48 | std::vector> build_hierarchy_with_feat(
49 | std::vector view_layer_positions,
50 | std::vector view_layer_normals,
51 | std::vector view_layer_feats,
52 | int num_level, int resolution);
53 |
54 | #endif
--------------------------------------------------------------------------------
/hy3dshape/hy3dshape/models/diffusion/transport/utils.py:
--------------------------------------------------------------------------------
1 | # This file includes code derived from the SiT project (https://github.com/willisma/SiT),
2 | # which is licensed under the MIT License.
3 | #
4 | # MIT License
5 | #
6 | # Copyright (c) Meta Platforms, Inc. and affiliates.
7 | #
8 | # Permission is hereby granted, free of charge, to any person obtaining a copy
9 | # of this software and associated documentation files (the "Software"), to deal
10 | # in the Software without restriction, including without limitation the rights
11 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 | # copies of the Software, and to permit persons to whom the Software is
13 | # furnished to do so, subject to the following conditions:
14 | #
15 | # The above copyright notice and this permission notice shall be included in all
16 | # copies or substantial portions of the Software.
17 | #
18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 | # SOFTWARE.
25 |
26 | import torch as th
27 |
28 | class EasyDict:
29 |
30 | def __init__(self, sub_dict):
31 | for k, v in sub_dict.items():
32 | setattr(self, k, v)
33 |
34 | def __getitem__(self, key):
35 | return getattr(self, key)
36 |
37 | def mean_flat(x):
38 | """
39 | Take the mean over all non-batch dimensions.
40 | """
41 | return th.mean(x, dim=list(range(1, len(x.size()))))
42 |
43 | def log_state(state):
44 | result = []
45 |
46 | sorted_state = dict(sorted(state.items()))
47 | for key, value in sorted_state.items():
48 | # Check if the value is an instance of a class
49 | if " 0:
39 | if n % self.verbosity_interval == 0:
40 | print(f"current step: {n}, recent lr-multiplier: {self.f_start}")
41 | if n < self.lr_warm_up_steps:
42 | f = (self.f_max - self.f_start) / self.lr_warm_up_steps * n + self.f_start
43 | self.last_f = f
44 | return f
45 | else:
46 | t = (n - self.lr_warm_up_steps) / (self.lr_max_decay_steps - self.lr_warm_up_steps)
47 | t = min(t, 1.0)
48 | f = self.f_min + 0.5 * (self.f_max - self.f_min) * (1 + np.cos(t * np.pi))
49 | self.last_f = f
50 | return f
51 |
52 | def __call__(self, n, **kwargs):
53 | return self.schedule(n, **kwargs)
54 |
--------------------------------------------------------------------------------
/hy3dshape/hy3dshape/utils/ema.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch import nn
3 |
4 |
5 | class LitEma(nn.Module):
6 | def __init__(self, model, decay=0.9999, use_num_updates=True):
7 | super().__init__()
8 | if decay < 0.0 or decay > 1.0:
9 | raise ValueError('Decay must be between 0 and 1')
10 |
11 | self.m_name2s_name = {}
12 | self.register_buffer('decay', torch.tensor(decay, dtype=torch.float32))
13 | self.register_buffer('num_updates', torch.tensor(0, dtype=torch.int) if use_num_updates
14 | else torch.tensor(-1, dtype=torch.int))
15 |
16 | for name, p in model.named_parameters():
17 | if p.requires_grad:
18 | # remove as '.'-character is not allowed in buffers
19 | s_name = name.replace('.', '_____')
20 | self.m_name2s_name.update({name: s_name})
21 | self.register_buffer(s_name, p.clone().detach().data)
22 |
23 | self.collected_params = []
24 |
25 | def forward(self, model):
26 | decay = self.decay
27 |
28 | if self.num_updates >= 0:
29 | self.num_updates += 1
30 | decay = min(self.decay, (1 + self.num_updates) / (10 + self.num_updates))
31 |
32 | one_minus_decay = 1.0 - decay
33 |
34 | with torch.no_grad():
35 | m_param = dict(model.named_parameters())
36 | shadow_params = dict(self.named_buffers())
37 |
38 | for key in m_param:
39 | if m_param[key].requires_grad:
40 | sname = self.m_name2s_name[key]
41 | shadow_params[sname] = shadow_params[sname].type_as(m_param[key])
42 | shadow_params[sname].sub_(one_minus_decay * (shadow_params[sname] - m_param[key]))
43 | else:
44 | assert not key in self.m_name2s_name
45 |
46 | def copy_to(self, model):
47 | m_param = dict(model.named_parameters())
48 | shadow_params = dict(self.named_buffers())
49 | for key in m_param:
50 | if m_param[key].requires_grad:
51 | m_param[key].data.copy_(shadow_params[self.m_name2s_name[key]].data)
52 | else:
53 | assert not key in self.m_name2s_name
54 |
55 | def store(self, model):
56 | """
57 | Save the current parameters for restoring later.
58 | Args:
59 | parameters: Iterable of `torch.nn.Parameter`; the parameters to be
60 | temporarily stored.
61 | """
62 | self.collected_params = [param.clone() for param in model.parameters()]
63 |
64 | def restore(self, model):
65 | """
66 | Restore the parameters stored with the `store` method.
67 | Useful to validate the model with EMA parameters without affecting the
68 | original optimization process. Store the parameters before the
69 | `copy_to` method. After validation (or model saving), use this to
70 | restore the former parameters.
71 | Args:
72 | parameters: Iterable of `torch.nn.Parameter`; the parameters to be
73 | updated with the stored parameters.
74 | """
75 | for c_param, param in zip(self.collected_params, model.parameters()):
76 | param.data.copy_(c_param.data)
77 |
--------------------------------------------------------------------------------
/hy3dpaint/src/data/objaverse_hunyuan.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
4 | # except for the third-party components listed below.
5 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined
6 | # in the repsective licenses of these third-party components.
7 | # Users must comply with all terms and conditions of original licenses of these third-party
8 | # components and must ensure that the usage of the third party components adheres to
9 | # all relevant laws and regulations.
10 |
11 | # For avoidance of doubts, Hunyuan 3D means the large language models and
12 | # their software and algorithms, including trained model weights, parameters (including
13 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
14 | # fine-tuning enabling code and other elements of the foregoing made publicly available
15 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
16 |
17 | import pytorch_lightning as pl
18 | from torch.utils.data import Dataset, ConcatDataset, DataLoader
19 | from torch.utils.data.distributed import DistributedSampler
20 |
21 |
22 | class DataModuleFromConfig(pl.LightningDataModule):
23 | def __init__(
24 | self,
25 | batch_size=8,
26 | num_workers=4,
27 | train=None,
28 | validation=None,
29 | test=None,
30 | **kwargs,
31 | ):
32 | super().__init__()
33 |
34 | self.batch_size = batch_size
35 | self.num_workers = num_workers
36 |
37 | self.dataset_configs = dict()
38 | if train is not None:
39 | self.dataset_configs["train"] = train
40 | if validation is not None:
41 | self.dataset_configs["validation"] = validation
42 | if test is not None:
43 | self.dataset_configs["test"] = test
44 |
45 | def setup(self, stage):
46 | from src.utils.train_util import instantiate_from_config
47 |
48 | if stage in ["fit"]:
49 | dataset_dict = {}
50 | for k in self.dataset_configs:
51 | dataset_dict[k] = []
52 | for loader in self.dataset_configs[k]:
53 | dataset_dict[k].append(instantiate_from_config(loader))
54 | self.datasets = dataset_dict
55 | print(self.datasets)
56 | else:
57 | raise NotImplementedError
58 |
59 | def train_dataloader(self):
60 | datasets = ConcatDataset(self.datasets["train"])
61 | sampler = DistributedSampler(datasets)
62 | return DataLoader(
63 | datasets,
64 | batch_size=self.batch_size,
65 | num_workers=self.num_workers,
66 | shuffle=False,
67 | sampler=sampler,
68 | prefetch_factor=2,
69 | pin_memory=True,
70 | )
71 |
72 | def val_dataloader(self):
73 | datasets = ConcatDataset(self.datasets["validation"])
74 | sampler = DistributedSampler(datasets)
75 | return DataLoader(datasets, batch_size=4, num_workers=self.num_workers, shuffle=False, sampler=sampler)
76 |
77 | def test_dataloader(self):
78 | datasets = ConcatDataset(self.datasets["test"])
79 | return DataLoader(datasets, batch_size=self.batch_size, num_workers=self.num_workers, shuffle=False)
80 |
--------------------------------------------------------------------------------
/hy3dshape/hy3dshape/models/diffusion/transport/__init__.py:
--------------------------------------------------------------------------------
1 | # This file includes code derived from the SiT project (https://github.com/willisma/SiT),
2 | # which is licensed under the MIT License.
3 | #
4 | # MIT License
5 | #
6 | # Copyright (c) Meta Platforms, Inc. and affiliates.
7 | #
8 | # Permission is hereby granted, free of charge, to any person obtaining a copy
9 | # of this software and associated documentation files (the "Software"), to deal
10 | # in the Software without restriction, including without limitation the rights
11 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 | # copies of the Software, and to permit persons to whom the Software is
13 | # furnished to do so, subject to the following conditions:
14 | #
15 | # The above copyright notice and this permission notice shall be included in all
16 | # copies or substantial portions of the Software.
17 | #
18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 | # SOFTWARE.
25 |
26 | from .transport import Transport, ModelType, WeightType, PathType, Sampler
27 |
28 |
29 | def create_transport(
30 | path_type='Linear',
31 | prediction="velocity",
32 | loss_weight=None,
33 | train_eps=None,
34 | sample_eps=None,
35 | train_sample_type="uniform",
36 | mean = 0.0,
37 | std = 1.0,
38 | shift_scale = 1.0,
39 | ):
40 | """function for creating Transport object
41 | **Note**: model prediction defaults to velocity
42 | Args:
43 | - path_type: type of path to use; default to linear
44 | - learn_score: set model prediction to score
45 | - learn_noise: set model prediction to noise
46 | - velocity_weighted: weight loss by velocity weight
47 | - likelihood_weighted: weight loss by likelihood weight
48 | - train_eps: small epsilon for avoiding instability during training
49 | - sample_eps: small epsilon for avoiding instability during sampling
50 | """
51 |
52 | if prediction == "noise":
53 | model_type = ModelType.NOISE
54 | elif prediction == "score":
55 | model_type = ModelType.SCORE
56 | else:
57 | model_type = ModelType.VELOCITY
58 |
59 | if loss_weight == "velocity":
60 | loss_type = WeightType.VELOCITY
61 | elif loss_weight == "likelihood":
62 | loss_type = WeightType.LIKELIHOOD
63 | else:
64 | loss_type = WeightType.NONE
65 |
66 | path_choice = {
67 | "Linear": PathType.LINEAR,
68 | "GVP": PathType.GVP,
69 | "VP": PathType.VP,
70 | }
71 |
72 | path_type = path_choice[path_type]
73 |
74 | if (path_type in [PathType.VP]):
75 | train_eps = 1e-5 if train_eps is None else train_eps
76 | sample_eps = 1e-3 if train_eps is None else sample_eps
77 | elif (path_type in [PathType.GVP, PathType.LINEAR] and model_type != ModelType.VELOCITY):
78 | train_eps = 1e-3 if train_eps is None else train_eps
79 | sample_eps = 1e-3 if train_eps is None else sample_eps
80 | else: # velocity & [GVP, LINEAR] is stable everywhere
81 | train_eps = 0
82 | sample_eps = 0
83 |
84 | # create flow state
85 | state = Transport(
86 | model_type=model_type,
87 | path_type=path_type,
88 | loss_type=loss_type,
89 | train_eps=train_eps,
90 | sample_eps=sample_eps,
91 | train_sample_type=train_sample_type,
92 | mean=mean,
93 | std=std,
94 | shift_scale =shift_scale,
95 | )
96 |
97 | return state
98 |
--------------------------------------------------------------------------------
/hy3dshape/hy3dshape/utils/trainings/peft.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
4 | # except for the third-party components listed below.
5 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined
6 | # in the repsective licenses of these third-party components.
7 | # Users must comply with all terms and conditions of original licenses of these third-party
8 | # components and must ensure that the usage of the third party components adheres to
9 | # all relevant laws and regulations.
10 |
11 | # For avoidance of doubts, Hunyuan 3D means the large language models and
12 | # their software and algorithms, including trained model weights, parameters (including
13 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
14 | # fine-tuning enabling code and other elements of the foregoing made publicly available
15 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
16 |
17 | import os
18 | from pytorch_lightning.callbacks import Callback
19 | from omegaconf import OmegaConf, ListConfig
20 |
21 | class PeftSaveCallback(Callback):
22 | def __init__(self, peft_model, save_dir: str, save_every_n_steps: int = None):
23 | super().__init__()
24 | self.peft_model = peft_model
25 | self.save_dir = save_dir
26 | self.save_every_n_steps = save_every_n_steps
27 | os.makedirs(self.save_dir, exist_ok=True)
28 |
29 | def recursive_convert(self, obj):
30 | from omegaconf import OmegaConf, ListConfig
31 | if isinstance(obj, (OmegaConf, ListConfig)):
32 | return OmegaConf.to_container(obj, resolve=True)
33 | elif isinstance(obj, dict):
34 | return {k: self.recursive_convert(v) for k, v in obj.items()}
35 | elif isinstance(obj, list):
36 | return [self.recursive_convert(i) for i in obj]
37 | elif isinstance(obj, type):
38 | # 避免修改类对象
39 | return obj
40 | elif hasattr(obj, '__dict__'):
41 | for attr_name, attr_value in vars(obj).items():
42 | setattr(obj, attr_name, self.recursive_convert(attr_value))
43 | return obj
44 | else:
45 | return obj
46 |
47 | # def recursive_convert(self, obj):
48 | # if isinstance(obj, (OmegaConf, ListConfig)):
49 | # return OmegaConf.to_container(obj, resolve=True)
50 | # elif isinstance(obj, dict):
51 | # return {k: self.recursive_convert(v) for k, v in obj.items()}
52 | # elif isinstance(obj, list):
53 | # return [self.recursive_convert(i) for i in obj]
54 | # elif hasattr(obj, '__dict__'):
55 | # for attr_name, attr_value in vars(obj).items():
56 | # setattr(obj, attr_name, self.recursive_convert(attr_value))
57 | # return obj
58 | # else:
59 | # return obj
60 |
61 | def _convert_peft_config(self):
62 | pc = self.peft_model.peft_config
63 | self.peft_model.peft_config = self.recursive_convert(pc)
64 |
65 | def on_train_epoch_end(self, trainer, pl_module):
66 | self._convert_peft_config()
67 | save_path = os.path.join(self.save_dir, f"epoch_{trainer.current_epoch}")
68 | self.peft_model.save_pretrained(save_path)
69 | print(f"[PeftSaveCallback] Saved LoRA weights to {save_path}")
70 |
71 | def on_train_batch_end(self, trainer, pl_module, outputs, batch, batch_idx):
72 | if self.save_every_n_steps is not None:
73 | global_step = trainer.global_step
74 | if global_step % self.save_every_n_steps == 0 and global_step > 0:
75 | self._convert_peft_config()
76 | save_path = os.path.join(self.save_dir, f"step_{global_step}")
77 | self.peft_model.save_pretrained(save_path)
78 | print(f"[PeftSaveCallback] Saved LoRA weights to {save_path}")
79 |
--------------------------------------------------------------------------------
/hy3dshape/hy3dshape/utils/misc.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import importlib
4 | from omegaconf import OmegaConf, DictConfig, ListConfig
5 |
6 | import torch
7 | import torch.distributed as dist
8 | from typing import Union
9 |
10 |
11 | def get_config_from_file(config_file: str) -> Union[DictConfig, ListConfig]:
12 | config_file = OmegaConf.load(config_file)
13 |
14 | if 'base_config' in config_file.keys():
15 | if config_file['base_config'] == "default_base":
16 | base_config = OmegaConf.create()
17 | # base_config = get_default_config()
18 | elif config_file['base_config'].endswith(".yaml"):
19 | base_config = get_config_from_file(config_file['base_config'])
20 | else:
21 | raise ValueError(f"{config_file} must be `.yaml` file or it contains `base_config` key.")
22 |
23 | config_file = {key: value for key, value in config_file if key != "base_config"}
24 |
25 | return OmegaConf.merge(base_config, config_file)
26 |
27 | return config_file
28 |
29 |
30 | def get_obj_from_str(string, reload=False):
31 | module, cls = string.rsplit(".", 1)
32 | if reload:
33 | module_imp = importlib.import_module(module)
34 | importlib.reload(module_imp)
35 | return getattr(importlib.import_module(module, package=None), cls)
36 |
37 |
38 | def get_obj_from_config(config):
39 | if "target" not in config:
40 | raise KeyError("Expected key `target` to instantiate.")
41 |
42 | return get_obj_from_str(config["target"])
43 |
44 |
45 | def instantiate_from_config(config, **kwargs):
46 | if "target" not in config:
47 | raise KeyError("Expected key `target` to instantiate.")
48 |
49 | cls = get_obj_from_str(config["target"])
50 |
51 | if config.get("from_pretrained", None):
52 | return cls.from_pretrained(
53 | config["from_pretrained"],
54 | use_safetensors=config.get('use_safetensors', False),
55 | variant=config.get('variant', 'fp16'))
56 |
57 | params = config.get("params", dict())
58 | # params.update(kwargs)
59 | # instance = cls(**params)
60 | kwargs.update(params)
61 | instance = cls(**kwargs)
62 |
63 | return instance
64 |
65 |
66 | def disabled_train(self, mode=True):
67 | """Overwrite model.train with this function to make sure train/eval mode
68 | does not change anymore."""
69 | return self
70 |
71 |
72 | def instantiate_non_trainable_model(config):
73 | model = instantiate_from_config(config)
74 | model = model.eval()
75 | model.train = disabled_train
76 | for param in model.parameters():
77 | param.requires_grad = False
78 |
79 | return model
80 |
81 |
82 | def is_dist_avail_and_initialized():
83 | if not dist.is_available():
84 | return False
85 | if not dist.is_initialized():
86 | return False
87 | return True
88 |
89 |
90 | def get_rank():
91 | if not is_dist_avail_and_initialized():
92 | return 0
93 | return dist.get_rank()
94 |
95 |
96 | def get_world_size():
97 | if not is_dist_avail_and_initialized():
98 | return 1
99 | return dist.get_world_size()
100 |
101 |
102 | def all_gather_batch(tensors):
103 | """
104 | Performs all_gather operation on the provided tensors.
105 | """
106 | # Queue the gathered tensors
107 | world_size = get_world_size()
108 | # There is no need for reduction in the single-proc case
109 | if world_size == 1:
110 | return tensors
111 | tensor_list = []
112 | output_tensor = []
113 | for tensor in tensors:
114 | tensor_all = [torch.ones_like(tensor) for _ in range(world_size)]
115 | dist.all_gather(
116 | tensor_all,
117 | tensor,
118 | async_op=False # performance opt
119 | )
120 |
121 | tensor_list.append(tensor_all)
122 |
123 | for tensor_all in tensor_list:
124 | output_tensor.append(torch.cat(tensor_all, dim=0))
125 | return output_tensor
126 |
--------------------------------------------------------------------------------
/hy3dpaint/DifferentiableRenderer/camera_utils.py:
--------------------------------------------------------------------------------
1 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
2 | # except for the third-party components listed below.
3 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined
4 | # in the repsective licenses of these third-party components.
5 | # Users must comply with all terms and conditions of original licenses of these third-party
6 | # components and must ensure that the usage of the third party components adheres to
7 | # all relevant laws and regulations.
8 |
9 | # For avoidance of doubts, Hunyuan 3D means the large language models and
10 | # their software and algorithms, including trained model weights, parameters (including
11 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
12 | # fine-tuning enabling code and other elements of the foregoing made publicly available
13 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
14 |
15 | import math
16 |
17 | import numpy as np
18 | import torch
19 |
20 |
21 | def transform_pos(mtx, pos, keepdim=False):
22 | t_mtx = torch.from_numpy(mtx).to(pos.device) if isinstance(mtx, np.ndarray) else mtx
23 | if pos.shape[-1] == 3:
24 | posw = torch.cat([pos, torch.ones([pos.shape[0], 1]).to(pos.device)], axis=1)
25 | else:
26 | posw = pos
27 |
28 | if keepdim:
29 | return torch.matmul(posw, t_mtx.t())[...]
30 | else:
31 | return torch.matmul(posw, t_mtx.t())[None, ...]
32 |
33 |
34 | def get_mv_matrix(elev, azim, camera_distance, center=None):
35 | elev = -elev
36 | azim += 90
37 |
38 | elev_rad = math.radians(elev)
39 | azim_rad = math.radians(azim)
40 |
41 | camera_position = np.array(
42 | [
43 | camera_distance * math.cos(elev_rad) * math.cos(azim_rad),
44 | camera_distance * math.cos(elev_rad) * math.sin(azim_rad),
45 | camera_distance * math.sin(elev_rad),
46 | ]
47 | )
48 |
49 | if center is None:
50 | center = np.array([0, 0, 0])
51 | else:
52 | center = np.array(center)
53 |
54 | lookat = center - camera_position
55 | lookat = lookat / np.linalg.norm(lookat)
56 |
57 | up = np.array([0, 0, 1.0])
58 | right = np.cross(lookat, up)
59 | right = right / np.linalg.norm(right)
60 | up = np.cross(right, lookat)
61 | up = up / np.linalg.norm(up)
62 |
63 | c2w = np.concatenate([np.stack([right, up, -lookat], axis=-1), camera_position[:, None]], axis=-1)
64 |
65 | w2c = np.zeros((4, 4))
66 | w2c[:3, :3] = np.transpose(c2w[:3, :3], (1, 0))
67 | w2c[:3, 3:] = -np.matmul(np.transpose(c2w[:3, :3], (1, 0)), c2w[:3, 3:])
68 | w2c[3, 3] = 1.0
69 |
70 | return w2c.astype(np.float32)
71 |
72 |
73 | def get_orthographic_projection_matrix(left=-1, right=1, bottom=-1, top=1, near=0, far=2):
74 | """
75 | 计算正交投影矩阵。
76 |
77 | 参数:
78 | left (float): 投影区域左侧边界。
79 | right (float): 投影区域右侧边界。
80 | bottom (float): 投影区域底部边界。
81 | top (float): 投影区域顶部边界。
82 | near (float): 投影区域近裁剪面距离。
83 | far (float): 投影区域远裁剪面距离。
84 |
85 | 返回:
86 | numpy.ndarray: 正交投影矩阵。
87 | """
88 | ortho_matrix = np.eye(4, dtype=np.float32)
89 | ortho_matrix[0, 0] = 2 / (right - left)
90 | ortho_matrix[1, 1] = 2 / (top - bottom)
91 | ortho_matrix[2, 2] = -2 / (far - near)
92 | ortho_matrix[0, 3] = -(right + left) / (right - left)
93 | ortho_matrix[1, 3] = -(top + bottom) / (top - bottom)
94 | ortho_matrix[2, 3] = -(far + near) / (far - near)
95 | return ortho_matrix
96 |
97 |
98 | def get_perspective_projection_matrix(fovy, aspect_wh, near, far):
99 | fovy_rad = math.radians(fovy)
100 | return np.array(
101 | [
102 | [1.0 / (math.tan(fovy_rad / 2.0) * aspect_wh), 0, 0, 0],
103 | [0, 1.0 / math.tan(fovy_rad / 2.0), 0, 0],
104 | [0, 0, -(far + near) / (far - near), -2.0 * far * near / (far - near)],
105 | [0, 0, -1, 0],
106 | ]
107 | ).astype(np.float32)
108 |
--------------------------------------------------------------------------------
/hy3dpaint/README.md:
--------------------------------------------------------------------------------
1 | # Hunyuan3D-Paint 2.1
2 |
3 | Hunyuan3D-Paint 2.1 is a high quality PBR texture generation model for 3D meshes, powered by [RomanTex](https://github.com/oakshy/RomanTex) and [MaterialMVP](https://github.com/ZebinHe/MaterialMVP/).
4 |
5 |
6 | ## Quick Inference
7 | You need to manually download the RealESRGAN weight to the `ckpt` folder using the following command:
8 | ```bash
9 | wget https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRGAN_x4plus.pth -P ckpt
10 | ```
11 |
12 | Given a 3D mesh `mesh.glb` and a reference image `image.png`, you can run inference using the following code. The result will be saved as `textured_mesh.glb`.
13 |
14 | ```bash
15 | python3 demo.py
16 | ```
17 | **Optional arguments in `demo.py`:**
18 |
19 | - `max_num_view` : Maximum number of views, adaptively selected by the model (integer between 6 to 12)
20 |
21 | - `resolution` : Resolution for generated PBR textures (512 or 768)
22 |
23 | **Memory Recommendation:** For `max_num_view=6` and `resolution=512`, we recommend using a GPU with at least **21GB VRAM**.
24 |
25 | ## Training
26 |
27 | ### Data Prepare
28 | We provide a piece of data in `train_examples` for the overfitting training test. The data structure should be organized as follows:
29 |
30 | ```
31 | train_examples/
32 | ├── examples.json
33 | └── 001/
34 | ├── render_tex/ # Rendered generated PBR images
35 | │ ├── 000.png # Rendered views (RGB images)
36 | │ ├── 000_albedo.png # Albedo maps for each view
37 | │ ├── 000_mr.png # Metallic-Roughness maps for each view, R and G channels
38 | │ ├── 000_normal.png # Normal maps
39 | │ ├── 000_normal.png # Normal maps
40 | │ ├── 000_pos.png # Position maps
41 | │ ├── 000_pos.png # Position maps
42 | │ ├── 001.png # Additional views...
43 | │ ├── 001_albedo.png
44 | │ ├── 001_mr.png
45 | │ ├── 001_normal.png
46 | │ ├── 001_pos.png
47 | │ └── ... # More views (002, 003, 004, 005, ...)
48 | └── render_cond/ # Rendered reference images (at least two light conditions should be rendered to facilitate consistency loss)
49 | ├── 000_light_AL.png # Light condition 1 (Area Light)
50 | ├── 000_light_ENVMAP.png # Light condition 2 (Environment map)
51 | ├── 000_light_PL.png # Light condition 3 (Point lighting)
52 | ├── 001_light_AL.png
53 | ├── 001_light_ENVMAP.png
54 | ├── 001_light_PL.png
55 | └── ... # More lighting conditions (002-005, ...)
56 | ```
57 |
58 | Each training example contains:
59 | - **render_tex/**: Multi-view renderings with PBR material properties
60 | - Main RGB images (`XXX.png`)
61 | - Albedo maps (`XXX_albedo.png`)
62 | - Metallic-Roughness maps (`XXX_mr.png`)
63 | - Normal maps (`XXX_normal.png/jpg`)
64 | - Position maps (`XXX_pos.png/jpg`)
65 | - Camera transforms (`transforms.json`)
66 | - **render_cond/**: Lighting condition maps for each view
67 | - Ambient lighting (`XXX_light_AL.png`)
68 | - Environment map lighting (`XXX_light_ENVMAP.png`)
69 | - Point lighting (`XXX_light_PL.png`)
70 |
71 | ### Launch Training
72 |
73 |
74 | ```bash
75 | python3 train.py --base 'cfgs/hunyuan-paint-pbr.yaml' --name overfit --logdir logs/
76 | ```
77 |
78 | ## BibTeX
79 |
80 | If you found Hunyuan3D-Paint 2.1 helpful, please cite our papers:
81 |
82 | ```bibtex
83 | @article{feng2025romantex,
84 | title={RomanTex: Decoupling 3D-aware Rotary Positional Embedded Multi-Attention Network for Texture Synthesis},
85 | author={Feng, Yifei and Yang, Mingxin and Yang, Shuhui and Zhang, Sheng and Yu, Jiaao and Zhao, Zibo and Liu, Yuhong and Jiang, Jie and Guo, Chunchao},
86 | journal={arXiv preprint arXiv:2503.19011},
87 | year={2025}
88 | }
89 |
90 | @article{he2025materialmvp,
91 | title={MaterialMVP: Illumination-Invariant Material Generation via Multi-view PBR Diffusion},
92 | author={He, Zebin and Yang, Mingxin and Yang, Shuhui and Tang, Yixuan and Wang, Tao and Zhang, Kaihao and Chen, Guanying and Liu, Yuhong and Jiang, Jie and Guo, Chunchao and Luo, Wenhan},
93 | journal={arXiv preprint arXiv:2503.10289},
94 | year={2025}
95 | }
96 | ```
97 |
--------------------------------------------------------------------------------
/hy3dshape/hy3dshape/models/autoencoders/attention_processors.py:
--------------------------------------------------------------------------------
1 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
2 | # except for the third-party components listed below.
3 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined
4 | # in the repsective licenses of these third-party components.
5 | # Users must comply with all terms and conditions of original licenses of these third-party
6 | # components and must ensure that the usage of the third party components adheres to
7 | # all relevant laws and regulations.
8 |
9 | # For avoidance of doubts, Hunyuan 3D means the large language models and
10 | # their software and algorithms, including trained model weights, parameters (including
11 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
12 | # fine-tuning enabling code and other elements of the foregoing made publicly available
13 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
14 |
15 | import os
16 |
17 | import torch
18 | import torch.nn.functional as F
19 |
20 | scaled_dot_product_attention = F.scaled_dot_product_attention
21 | if os.environ.get('CA_USE_SAGEATTN', '0') == '1':
22 | try:
23 | from sageattention import sageattn
24 | except ImportError:
25 | raise ImportError('Please install the package "sageattention" to use this USE_SAGEATTN.')
26 | scaled_dot_product_attention = sageattn
27 |
28 |
29 | class CrossAttentionProcessor:
30 | def __call__(self, attn, q, k, v):
31 | out = scaled_dot_product_attention(q, k, v)
32 | return out
33 |
34 |
35 | class FlashVDMCrossAttentionProcessor:
36 | def __init__(self, topk=None):
37 | self.topk = topk
38 |
39 | def __call__(self, attn, q, k, v):
40 | if k.shape[-2] == 3072:
41 | topk = 1024
42 | elif k.shape[-2] == 512:
43 | topk = 256
44 | else:
45 | topk = k.shape[-2] // 3
46 |
47 | if self.topk is True:
48 | q1 = q[:, :, ::100, :]
49 | sim = q1 @ k.transpose(-1, -2)
50 | sim = torch.mean(sim, -2)
51 | topk_ind = torch.topk(sim, dim=-1, k=topk).indices.squeeze(-2).unsqueeze(-1)
52 | topk_ind = topk_ind.expand(-1, -1, -1, v.shape[-1])
53 | v0 = torch.gather(v, dim=-2, index=topk_ind)
54 | k0 = torch.gather(k, dim=-2, index=topk_ind)
55 | out = scaled_dot_product_attention(q, k0, v0)
56 | elif self.topk is False:
57 | out = scaled_dot_product_attention(q, k, v)
58 | else:
59 | idx, counts = self.topk
60 | start = 0
61 | outs = []
62 | for grid_coord, count in zip(idx, counts):
63 | end = start + count
64 | q_chunk = q[:, :, start:end, :]
65 | k0, v0 = self.select_topkv(q_chunk, k, v, topk)
66 | out = scaled_dot_product_attention(q_chunk, k0, v0)
67 | outs.append(out)
68 | start += count
69 | out = torch.cat(outs, dim=-2)
70 | self.topk = False
71 | return out
72 |
73 | def select_topkv(self, q_chunk, k, v, topk):
74 | q1 = q_chunk[:, :, ::50, :]
75 | sim = q1 @ k.transpose(-1, -2)
76 | sim = torch.mean(sim, -2)
77 | topk_ind = torch.topk(sim, dim=-1, k=topk).indices.squeeze(-2).unsqueeze(-1)
78 | topk_ind = topk_ind.expand(-1, -1, -1, v.shape[-1])
79 | v0 = torch.gather(v, dim=-2, index=topk_ind)
80 | k0 = torch.gather(k, dim=-2, index=topk_ind)
81 | return k0, v0
82 |
83 |
84 | class FlashVDMTopMCrossAttentionProcessor(FlashVDMCrossAttentionProcessor):
85 | def select_topkv(self, q_chunk, k, v, topk):
86 | q1 = q_chunk[:, :, ::30, :]
87 | sim = q1 @ k.transpose(-1, -2)
88 | # sim = sim.to(torch.float32)
89 | sim = sim.softmax(-1)
90 | sim = torch.mean(sim, 1)
91 | activated_token = torch.where(sim > 1e-6)[2]
92 | index = torch.unique(activated_token, return_counts=True)[0].unsqueeze(0).unsqueeze(0).unsqueeze(-1)
93 | index = index.expand(-1, v.shape[1], -1, v.shape[-1])
94 | v0 = torch.gather(v, dim=-2, index=index)
95 | k0 = torch.gather(k, dim=-2, index=index)
96 | return k0, v0
97 |
--------------------------------------------------------------------------------
/hy3dshape/hy3dshape/utils/trainings/mesh.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
4 | # except for the third-party components listed below.
5 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined
6 | # in the repsective licenses of these third-party components.
7 | # Users must comply with all terms and conditions of original licenses of these third-party
8 | # components and must ensure that the usage of the third party components adheres to
9 | # all relevant laws and regulations.
10 |
11 | # For avoidance of doubts, Hunyuan 3D means the large language models and
12 | # their software and algorithms, including trained model weights, parameters (including
13 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
14 | # fine-tuning enabling code and other elements of the foregoing made publicly available
15 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
16 |
17 | import os
18 | import cv2
19 | import numpy as np
20 | import PIL.Image
21 | from typing import Optional
22 |
23 | import trimesh
24 |
25 |
26 | def save_obj(pointnp_px3, facenp_fx3, fname):
27 | fid = open(fname, "w")
28 | write_str = ""
29 | for pidx, p in enumerate(pointnp_px3):
30 | pp = p
31 | write_str += "v %f %f %f\n" % (pp[0], pp[1], pp[2])
32 |
33 | for i, f in enumerate(facenp_fx3):
34 | f1 = f + 1
35 | write_str += "f %d %d %d\n" % (f1[0], f1[1], f1[2])
36 | fid.write(write_str)
37 | fid.close()
38 | return
39 |
40 |
41 | def savemeshtes2(pointnp_px3, tcoords_px2, facenp_fx3, facetex_fx3, tex_map, fname):
42 | fol, na = os.path.split(fname)
43 | na, _ = os.path.splitext(na)
44 |
45 | matname = "%s/%s.mtl" % (fol, na)
46 | fid = open(matname, "w")
47 | fid.write("newmtl material_0\n")
48 | fid.write("Kd 1 1 1\n")
49 | fid.write("Ka 0 0 0\n")
50 | fid.write("Ks 0.4 0.4 0.4\n")
51 | fid.write("Ns 10\n")
52 | fid.write("illum 2\n")
53 | fid.write("map_Kd %s.png\n" % na)
54 | fid.close()
55 | ####
56 |
57 | fid = open(fname, "w")
58 | fid.write("mtllib %s.mtl\n" % na)
59 |
60 | for pidx, p3 in enumerate(pointnp_px3):
61 | pp = p3
62 | fid.write("v %f %f %f\n" % (pp[0], pp[1], pp[2]))
63 |
64 | for pidx, p2 in enumerate(tcoords_px2):
65 | pp = p2
66 | fid.write("vt %f %f\n" % (pp[0], pp[1]))
67 |
68 | fid.write("usemtl material_0\n")
69 | for i, f in enumerate(facenp_fx3):
70 | f1 = f + 1
71 | f2 = facetex_fx3[i] + 1
72 | fid.write("f %d/%d %d/%d %d/%d\n" % (f1[0], f2[0], f1[1], f2[1], f1[2], f2[2]))
73 | fid.close()
74 |
75 | PIL.Image.fromarray(np.ascontiguousarray(tex_map), "RGB").save(
76 | os.path.join(fol, "%s.png" % na))
77 |
78 | return
79 |
80 |
81 | class MeshOutput(object):
82 |
83 | def __init__(self,
84 | mesh_v: np.ndarray,
85 | mesh_f: np.ndarray,
86 | vertex_colors: Optional[np.ndarray] = None,
87 | uvs: Optional[np.ndarray] = None,
88 | mesh_tex_idx: Optional[np.ndarray] = None,
89 | tex_map: Optional[np.ndarray] = None):
90 |
91 | self.mesh_v = mesh_v
92 | self.mesh_f = mesh_f
93 | self.vertex_colors = vertex_colors
94 | self.uvs = uvs
95 | self.mesh_tex_idx = mesh_tex_idx
96 | self.tex_map = tex_map
97 |
98 | def contain_uv_texture(self):
99 | return (self.uvs is not None) and (self.mesh_tex_idx is not None) and (self.tex_map is not None)
100 |
101 | def contain_vertex_colors(self):
102 | return self.vertex_colors is not None
103 |
104 | def export(self, fname):
105 |
106 | if self.contain_uv_texture():
107 | savemeshtes2(
108 | self.mesh_v,
109 | self.uvs,
110 | self.mesh_f,
111 | self.mesh_tex_idx,
112 | self.tex_map,
113 | fname
114 | )
115 |
116 | elif self.contain_vertex_colors():
117 | mesh_obj = trimesh.Trimesh(vertices=self.mesh_v, faces=self.mesh_f, vertex_colors=self.vertex_colors)
118 | mesh_obj.export(fname)
119 |
120 | else:
121 | save_obj(
122 | self.mesh_v,
123 | self.mesh_f,
124 | fname
125 | )
126 |
127 |
128 |
129 |
--------------------------------------------------------------------------------
/hy3dpaint/convert_utils.py:
--------------------------------------------------------------------------------
1 | import trimesh
2 | import pygltflib
3 | import numpy as np
4 | from PIL import Image
5 | import base64
6 | import io
7 |
8 |
9 | def combine_metallic_roughness(metallic_path, roughness_path, output_path):
10 | """
11 | 将metallic和roughness贴图合并为一张贴图
12 | GLB格式要求metallic在B通道,roughness在G通道
13 | """
14 | # 加载贴图
15 | metallic_img = Image.open(metallic_path).convert("L") # 转为灰度
16 | roughness_img = Image.open(roughness_path).convert("L") # 转为灰度
17 |
18 | # 确保尺寸一致
19 | if metallic_img.size != roughness_img.size:
20 | roughness_img = roughness_img.resize(metallic_img.size)
21 |
22 | # 创建RGB图像
23 | width, height = metallic_img.size
24 | combined = Image.new("RGB", (width, height))
25 |
26 | # 转为numpy数组便于操作
27 | metallic_array = np.array(metallic_img)
28 | roughness_array = np.array(roughness_img)
29 |
30 | # 创建合并的数组 (R, G, B) = (AO, Roughness, Metallic)
31 | combined_array = np.zeros((height, width, 3), dtype=np.uint8)
32 | combined_array[:, :, 0] = 255 # R通道:AO (如果没有AO贴图,设为白色)
33 | combined_array[:, :, 1] = roughness_array # G通道:Roughness
34 | combined_array[:, :, 2] = metallic_array # B通道:Metallic
35 |
36 | # 转回PIL图像并保存
37 | combined = Image.fromarray(combined_array)
38 | combined.save(output_path)
39 | return output_path
40 |
41 |
42 | def create_glb_with_pbr_materials(obj_path, textures_dict, output_path):
43 | """
44 | 使用pygltflib创建包含完整PBR材质的GLB文件
45 |
46 | textures_dict = {
47 | 'albedo': 'path/to/albedo.png',
48 | 'metallic': 'path/to/metallic.png',
49 | 'roughness': 'path/to/roughness.png',
50 | 'normal': 'path/to/normal.png', # 可选
51 | 'ao': 'path/to/ao.png' # 可选
52 | }
53 | """
54 | # 1. 加载OBJ文件
55 | mesh = trimesh.load(obj_path)
56 |
57 | # 2. 先导出为临时GLB
58 | temp_glb = "temp.glb"
59 | mesh.export(temp_glb)
60 |
61 | # 3. 加载GLB文件进行材质编辑
62 | gltf = pygltflib.GLTF2().load(temp_glb)
63 |
64 | # 4. 准备纹理数据
65 | def image_to_data_uri(image_path):
66 | """将图像转换为data URI"""
67 | with open(image_path, "rb") as f:
68 | image_data = f.read()
69 | encoded = base64.b64encode(image_data).decode()
70 | return f"data:image/png;base64,{encoded}"
71 |
72 | # 5. 合并metallic和roughness
73 | if "metallic" in textures_dict and "roughness" in textures_dict:
74 | mr_combined_path = "mr_combined.png"
75 | combine_metallic_roughness(textures_dict["metallic"], textures_dict["roughness"], mr_combined_path)
76 | textures_dict["metallicRoughness"] = mr_combined_path
77 |
78 | # 6. 添加图像到GLTF
79 | images = []
80 | textures = []
81 |
82 | texture_mapping = {
83 | "albedo": "baseColorTexture",
84 | "metallicRoughness": "metallicRoughnessTexture",
85 | "normal": "normalTexture",
86 | "ao": "occlusionTexture",
87 | }
88 |
89 | for tex_type, tex_path in textures_dict.items():
90 | if tex_type in texture_mapping and tex_path:
91 | # 添加图像
92 | image = pygltflib.Image(uri=image_to_data_uri(tex_path))
93 | images.append(image)
94 |
95 | # 添加纹理
96 | texture = pygltflib.Texture(source=len(images) - 1)
97 | textures.append(texture)
98 |
99 | # 7. 创建PBR材质
100 | pbr_metallic_roughness = pygltflib.PbrMetallicRoughness(
101 | baseColorFactor=[1.0, 1.0, 1.0, 1.0], metallicFactor=1.0, roughnessFactor=1.0
102 | )
103 |
104 | # 设置纹理索引
105 | texture_index = 0
106 | if "albedo" in textures_dict:
107 | pbr_metallic_roughness.baseColorTexture = pygltflib.TextureInfo(index=texture_index)
108 | texture_index += 1
109 |
110 | if "metallicRoughness" in textures_dict:
111 | pbr_metallic_roughness.metallicRoughnessTexture = pygltflib.TextureInfo(index=texture_index)
112 | texture_index += 1
113 |
114 | # 创建材质
115 | material = pygltflib.Material(name="PBR_Material", pbrMetallicRoughness=pbr_metallic_roughness)
116 |
117 | # 添加法线贴图
118 | if "normal" in textures_dict:
119 | material.normalTexture = pygltflib.NormalTextureInfo(index=texture_index)
120 | texture_index += 1
121 |
122 | # 添加AO贴图
123 | if "ao" in textures_dict:
124 | material.occlusionTexture = pygltflib.OcclusionTextureInfo(index=texture_index)
125 |
126 | # 8. 更新GLTF
127 | gltf.images = images
128 | gltf.textures = textures
129 | gltf.materials = [material]
130 |
131 | # 确保mesh使用材质
132 | if gltf.meshes:
133 | for primitive in gltf.meshes[0].primitives:
134 | primitive.material = 0
135 |
136 | # 9. 保存最终GLB
137 | gltf.save(output_path)
138 | print(f"PBR GLB文件已保存: {output_path}")
139 |
140 |
141 |
--------------------------------------------------------------------------------
/hy3dshape/hy3dshape/utils/utils.py:
--------------------------------------------------------------------------------
1 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
2 | # except for the third-party components listed below.
3 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined
4 | # in the repsective licenses of these third-party components.
5 | # Users must comply with all terms and conditions of original licenses of these third-party
6 | # components and must ensure that the usage of the third party components adheres to
7 | # all relevant laws and regulations.
8 |
9 | # For avoidance of doubts, Hunyuan 3D means the large language models and
10 | # their software and algorithms, including trained model weights, parameters (including
11 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
12 | # fine-tuning enabling code and other elements of the foregoing made publicly available
13 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
14 |
15 | import logging
16 | import os
17 | from functools import wraps
18 |
19 | import torch
20 |
21 |
22 | def get_logger(name):
23 | logger = logging.getLogger(name)
24 | logger.setLevel(logging.INFO)
25 |
26 | console_handler = logging.StreamHandler()
27 | console_handler.setLevel(logging.INFO)
28 |
29 | formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
30 | console_handler.setFormatter(formatter)
31 | logger.addHandler(console_handler)
32 | return logger
33 |
34 |
35 | logger = get_logger('hy3dgen.shapgen')
36 |
37 |
38 | class synchronize_timer:
39 | """ Synchronized timer to count the inference time of `nn.Module.forward`.
40 |
41 | Supports both context manager and decorator usage.
42 |
43 | Example as context manager:
44 | ```python
45 | with synchronize_timer('name') as t:
46 | run()
47 | ```
48 |
49 | Example as decorator:
50 | ```python
51 | @synchronize_timer('Export to trimesh')
52 | def export_to_trimesh(mesh_output):
53 | pass
54 | ```
55 | """
56 |
57 | def __init__(self, name=None):
58 | self.name = name
59 |
60 | def __enter__(self):
61 | """Context manager entry: start timing."""
62 | if os.environ.get('HY3DGEN_DEBUG', '0') == '1':
63 | self.start = torch.cuda.Event(enable_timing=True)
64 | self.end = torch.cuda.Event(enable_timing=True)
65 | self.start.record()
66 | return lambda: self.time
67 |
68 | def __exit__(self, exc_type, exc_value, exc_tb):
69 | """Context manager exit: stop timing and log results."""
70 | if os.environ.get('HY3DGEN_DEBUG', '0') == '1':
71 | self.end.record()
72 | torch.cuda.synchronize()
73 | self.time = self.start.elapsed_time(self.end)
74 | if self.name is not None:
75 | logger.info(f'{self.name} takes {self.time} ms')
76 |
77 | def __call__(self, func):
78 | """Decorator: wrap the function to time its execution."""
79 |
80 | @wraps(func)
81 | def wrapper(*args, **kwargs):
82 | with self:
83 | result = func(*args, **kwargs)
84 | return result
85 |
86 | return wrapper
87 |
88 |
89 | def smart_load_model(
90 | model_path,
91 | subfolder,
92 | use_safetensors,
93 | variant,
94 | ):
95 | original_model_path = model_path
96 | # try local path
97 | base_dir = os.environ.get('HY3DGEN_MODELS', '~/.cache/hy3dgen')
98 | model_fld = os.path.expanduser(os.path.join(base_dir, model_path))
99 | model_path = os.path.expanduser(os.path.join(base_dir, model_path, subfolder))
100 | logger.info(f'Try to load model from local path: {model_path}')
101 | if not os.path.exists(model_path):
102 | logger.info('Model path not exists, try to download from huggingface')
103 | try:
104 | from huggingface_hub import snapshot_download
105 | # 只下载指定子目录
106 | path = snapshot_download(
107 | repo_id=original_model_path,
108 | allow_patterns=[f"{subfolder}/*"], # 关键修改:模式匹配子文件夹
109 | local_dir=model_fld
110 | )
111 | model_path = os.path.join(path, subfolder) # 保持路径拼接逻辑不变
112 | except ImportError:
113 | logger.warning(
114 | "You need to install HuggingFace Hub to load models from the hub."
115 | )
116 | raise RuntimeError(f"Model path {model_path} not found")
117 | except Exception as e:
118 | raise e
119 |
120 | if not os.path.exists(model_path):
121 | raise FileNotFoundError(f"Model path {original_model_path} not found")
122 |
123 | extension = 'ckpt' if not use_safetensors else 'safetensors'
124 | variant = '' if variant is None else f'.{variant}'
125 | ckpt_name = f'model{variant}.{extension}'
126 | config_path = os.path.join(model_path, 'config.yaml')
127 | ckpt_path = os.path.join(model_path, ckpt_name)
128 | return config_path, ckpt_path
129 |
--------------------------------------------------------------------------------
/hy3dshape/hy3dshape/models/diffusion/transport/integrators.py:
--------------------------------------------------------------------------------
1 | # This file includes code derived from the SiT project (https://github.com/willisma/SiT),
2 | # which is licensed under the MIT License.
3 | #
4 | # MIT License
5 | #
6 | # Copyright (c) Meta Platforms, Inc. and affiliates.
7 | #
8 | # Permission is hereby granted, free of charge, to any person obtaining a copy
9 | # of this software and associated documentation files (the "Software"), to deal
10 | # in the Software without restriction, including without limitation the rights
11 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 | # copies of the Software, and to permit persons to whom the Software is
13 | # furnished to do so, subject to the following conditions:
14 | #
15 | # The above copyright notice and this permission notice shall be included in all
16 | # copies or substantial portions of the Software.
17 | #
18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 | # SOFTWARE.
25 |
26 | import numpy as np
27 | import torch as th
28 | import torch.nn as nn
29 | from torchdiffeq import odeint
30 | from functools import partial
31 | from tqdm import tqdm
32 |
33 | class sde:
34 | """SDE solver class"""
35 | def __init__(
36 | self,
37 | drift,
38 | diffusion,
39 | *,
40 | t0,
41 | t1,
42 | num_steps,
43 | sampler_type,
44 | ):
45 | assert t0 < t1, "SDE sampler has to be in forward time"
46 |
47 | self.num_timesteps = num_steps
48 | self.t = th.linspace(t0, t1, num_steps)
49 | self.dt = self.t[1] - self.t[0]
50 | self.drift = drift
51 | self.diffusion = diffusion
52 | self.sampler_type = sampler_type
53 |
54 | def __Euler_Maruyama_step(self, x, mean_x, t, model, **model_kwargs):
55 | w_cur = th.randn(x.size()).to(x)
56 | t = th.ones(x.size(0)).to(x) * t
57 | dw = w_cur * th.sqrt(self.dt)
58 | drift = self.drift(x, t, model, **model_kwargs)
59 | diffusion = self.diffusion(x, t)
60 | mean_x = x + drift * self.dt
61 | x = mean_x + th.sqrt(2 * diffusion) * dw
62 | return x, mean_x
63 |
64 | def __Heun_step(self, x, _, t, model, **model_kwargs):
65 | w_cur = th.randn(x.size()).to(x)
66 | dw = w_cur * th.sqrt(self.dt)
67 | t_cur = th.ones(x.size(0)).to(x) * t
68 | diffusion = self.diffusion(x, t_cur)
69 | xhat = x + th.sqrt(2 * diffusion) * dw
70 | K1 = self.drift(xhat, t_cur, model, **model_kwargs)
71 | xp = xhat + self.dt * K1
72 | K2 = self.drift(xp, t_cur + self.dt, model, **model_kwargs)
73 | return xhat + 0.5 * self.dt * (K1 + K2), xhat # at last time point we do not perform the heun step
74 |
75 | def __forward_fn(self):
76 | """TODO: generalize here by adding all private functions ending with steps to it"""
77 | sampler_dict = {
78 | "Euler": self.__Euler_Maruyama_step,
79 | "Heun": self.__Heun_step,
80 | }
81 |
82 | try:
83 | sampler = sampler_dict[self.sampler_type]
84 | except:
85 | raise NotImplementedError("Smapler type not implemented.")
86 |
87 | return sampler
88 |
89 | def sample(self, init, model, **model_kwargs):
90 | """forward loop of sde"""
91 | x = init
92 | mean_x = init
93 | samples = []
94 | sampler = self.__forward_fn()
95 | for ti in self.t[:-1]:
96 | with th.no_grad():
97 | x, mean_x = sampler(x, mean_x, ti, model, **model_kwargs)
98 | samples.append(x)
99 |
100 | return samples
101 |
102 | class ode:
103 | """ODE solver class"""
104 | def __init__(
105 | self,
106 | drift,
107 | *,
108 | t0,
109 | t1,
110 | sampler_type,
111 | num_steps,
112 | atol,
113 | rtol,
114 | ):
115 | assert t0 < t1, "ODE sampler has to be in forward time"
116 |
117 | self.drift = drift
118 | self.t = th.linspace(t0, t1, num_steps)
119 | self.atol = atol
120 | self.rtol = rtol
121 | self.sampler_type = sampler_type
122 |
123 | def sample(self, x, model, **model_kwargs):
124 |
125 | device = x[0].device if isinstance(x, tuple) else x.device
126 | def _fn(t, x):
127 | t = th.ones(x[0].size(0)).to(device) * t if isinstance(x, tuple) else th.ones(x.size(0)).to(device) * t
128 | model_output = self.drift(x, t, model, **model_kwargs)
129 | return model_output
130 |
131 | t = self.t.to(device)
132 | atol = [self.atol] * len(x) if isinstance(x, tuple) else [self.atol]
133 | rtol = [self.rtol] * len(x) if isinstance(x, tuple) else [self.rtol]
134 | samples = odeint(
135 | _fn,
136 | x,
137 | t,
138 | method=self.sampler_type,
139 | atol=atol,
140 | rtol=rtol
141 | )
142 | return samples
143 |
--------------------------------------------------------------------------------
/hy3dpaint/utils/torchvision_fix.py:
--------------------------------------------------------------------------------
1 | # Torchvision compatibility fix for functional_tensor module
2 | # This file helps resolve compatibility issues between different torchvision versions
3 |
4 | import sys
5 | import torch
6 | import torchvision
7 |
8 | def fix_torchvision_functional_tensor():
9 | """
10 | Fix torchvision.transforms.functional_tensor import issue
11 | """
12 | try:
13 | # Check if the module exists in the expected location
14 | import torchvision.transforms.functional_tensor
15 | print("torchvision.transforms.functional_tensor is available")
16 | return True
17 | except ImportError:
18 | print("torchvision.transforms.functional_tensor not found, applying compatibility fix...")
19 |
20 | try:
21 | # Create a mock functional_tensor module with the required functions
22 | import torchvision.transforms.functional as F
23 |
24 | class FunctionalTensorMock:
25 | """Mock module to replace functional_tensor"""
26 |
27 | @staticmethod
28 | def _get_grayscale_weights(img):
29 | """Helper to create grayscale weights based on image dimensions"""
30 | weights = torch.tensor([0.299, 0.587, 0.114], device=img.device, dtype=img.dtype)
31 | return weights.view(1, 3, 1, 1) if len(img.shape) == 4 else weights.view(3, 1, 1)
32 |
33 | @staticmethod
34 | def _try_import_fallback(module_names, attr_name):
35 | """Helper to try importing from multiple modules"""
36 | for module_name in module_names:
37 | try:
38 | module = __import__(module_name, fromlist=[attr_name])
39 | if hasattr(module, attr_name):
40 | return getattr(module, attr_name)
41 | except ImportError:
42 | continue
43 | return None
44 |
45 | @staticmethod
46 | def rgb_to_grayscale(img, num_output_channels=1):
47 | """Convert RGB image to grayscale"""
48 | if hasattr(F, 'rgb_to_grayscale'):
49 | return F.rgb_to_grayscale(img, num_output_channels)
50 |
51 | # Fallback implementation
52 | weights = FunctionalTensorMock._get_grayscale_weights(img)
53 | grayscale = torch.sum(img * weights, dim=-3, keepdim=True)
54 |
55 | if num_output_channels == 3:
56 | repeat_dims = (1, 3, 1, 1) if len(img.shape) == 4 else (3, 1, 1)
57 | grayscale = grayscale.repeat(*repeat_dims)
58 |
59 | return grayscale
60 |
61 | @staticmethod
62 | def resize(img, size, interpolation=2, antialias=None):
63 | """Resize function wrapper"""
64 | # Try v2.functional first, then regular functional, then torch.nn.functional
65 | resize_func = FunctionalTensorMock._try_import_fallback([
66 | 'torchvision.transforms.v2.functional',
67 | 'torchvision.transforms.functional'
68 | ], 'resize')
69 |
70 | if resize_func:
71 | try:
72 | return resize_func(img, size, interpolation=interpolation, antialias=antialias)
73 | except TypeError:
74 | # Fallback for older versions without antialias parameter
75 | return resize_func(img, size, interpolation=interpolation)
76 |
77 | # Final fallback using torch.nn.functional
78 | import torch.nn.functional as torch_F
79 | size = (size, size) if isinstance(size, int) else size
80 | img_input = img.unsqueeze(0) if len(img.shape) == 3 else img
81 | return torch_F.interpolate(img_input, size=size, mode='bilinear', align_corners=False)
82 |
83 | def __getattr__(self, name):
84 | """Fallback to regular functional module"""
85 | func = self._try_import_fallback([
86 | 'torchvision.transforms.functional',
87 | 'torchvision.transforms.v2.functional'
88 | ], name)
89 |
90 | if func:
91 | return func
92 |
93 | raise AttributeError(f"'{name}' not found in functional_tensor mock")
94 |
95 | # Create the mock module instance and monkey patch
96 | sys.modules['torchvision.transforms.functional_tensor'] = FunctionalTensorMock()
97 | print("Applied compatibility fix: created functional_tensor mock module")
98 | return True
99 |
100 | except Exception as e:
101 | print(f"Failed to create functional_tensor mock: {e}")
102 | return False
103 |
104 | def apply_fix():
105 | """Apply the torchvision compatibility fix"""
106 | print(f"Torchvision version: {torchvision.__version__}")
107 | return fix_torchvision_functional_tensor()
108 |
109 | if __name__ == "__main__":
110 | apply_fix()
111 |
--------------------------------------------------------------------------------
/hy3dshape/configs/hunyuan3ddit-mini-overfitting-flowmatching-dinog518-bf16-lr1e4-512.yaml:
--------------------------------------------------------------------------------
1 | name: "DiT: Flux large flowmatching; VAE: 1024 token length; ImageEncoder: DINO Giant; ImageSize: 518"
2 |
3 | training:
4 | steps: 10_0000_0000
5 | use_amp: true
6 | amp_type: "bf16"
7 | base_lr: 1e-4
8 | gradient_clip_val: 1.0
9 | gradient_clip_algorithm: "norm"
10 | every_n_train_steps: 2000 # 5000
11 | val_check_interval: 50 # 4096
12 | limit_val_batches: 16
13 |
14 | dataset:
15 | target: hy3dshape.data.dit_asl.AlignedShapeLatentModule
16 | params:
17 | #! Base setting
18 | batch_size: 2
19 | num_workers: 8
20 | val_num_workers: 4
21 |
22 | # Data
23 | train_data_list: tools/mini_trainset/preprocessed
24 | val_data_list: tools/mini_trainset/preprocessed
25 |
26 | #! Image loading
27 | cond_stage_key: "image" # image / text / image_text
28 | image_size: 518
29 | mean: &mean [0.5, 0.5, 0.5]
30 | std: &std [0.5, 0.5, 0.5]
31 |
32 | #! Point cloud sampling
33 | pc_size: &pc_size 10240
34 | pc_sharpedge_size: &pc_sharpedge_size 10240
35 | sharpedge_label: &sharpedge_label true
36 | return_normal: true
37 |
38 | #! Augmentation
39 | padding: true
40 |
41 | model:
42 | target: hy3dshape.models.diffusion.flow_matching_sit.Diffuser
43 | params:
44 | first_stage_key: "surface"
45 | cond_stage_key: "image"
46 | scale_by_std: false
47 | z_scale_factor: &z_scale_factor 0.9990943042622529 # 1 / 1.0009065167661184
48 | torch_compile: false
49 |
50 | # ema_config:
51 | # ema_model: LitEma
52 | # ema_decay: 0.999
53 | # ema_inference: false
54 |
55 | first_stage_config:
56 | target: hy3dshape.models.autoencoders.ShapeVAE
57 | from_pretrained: tencent/Hunyuan3D-2.1
58 | params:
59 | num_latents: &num_latents 512
60 | embed_dim: 64
61 | num_freqs: 8
62 | include_pi: false
63 | heads: 16
64 | width: 1024
65 | point_feats: 4
66 | num_decoder_layers: 16
67 | pc_size: *pc_size
68 | pc_sharpedge_size: *pc_sharpedge_size
69 | qkv_bias: false
70 | qk_norm: true
71 | scale_factor: *z_scale_factor
72 | geo_decoder_mlp_expand_ratio: 4
73 | geo_decoder_downsample_ratio: 1
74 | geo_decoder_ln_post: true
75 |
76 | cond_stage_config:
77 | target: hy3dshape.models.conditioner.SingleImageEncoder
78 | params:
79 | main_image_encoder:
80 | type: DinoImageEncoder # dino giant
81 | kwargs:
82 | config:
83 | attention_probs_dropout_prob: 0.0
84 | drop_path_rate: 0.0
85 | hidden_act: gelu
86 | hidden_dropout_prob: 0.0
87 | hidden_size: 1536
88 | image_size: 518
89 | initializer_range: 0.02
90 | layer_norm_eps: 1.e-6
91 | layerscale_value: 1.0
92 | mlp_ratio: 4
93 | model_type: dinov2
94 | num_attention_heads: 24
95 | num_channels: 3
96 | num_hidden_layers: 40
97 | patch_size: 14
98 | qkv_bias: true
99 | torch_dtype: float32
100 | use_swiglu_ffn: true
101 | image_size: 518
102 |
103 | denoiser_cfg:
104 | target: hy3dshape.models.denoisers.hunyuan3ddit.Hunyuan3DDiT
105 | params:
106 | input_size: *num_latents
107 | context_in_dim: 1536
108 | hidden_size: 1024
109 | mlp_ratio: 4.0
110 | num_heads: 16
111 | depth: 8
112 | depth_single_blocks: 16
113 | axes_dim: [64]
114 | theta: 10000
115 | qkv_bias: true
116 | use_pe: false
117 | force_norm_fp32: true
118 |
119 | scheduler_cfg:
120 | transport:
121 | target: hy3dshape.models.diffusion.transport.create_transport
122 | params:
123 | path_type: Linear
124 | prediction: velocity
125 | sampler:
126 | target: hy3dshape.models.diffusion.transport.Sampler
127 | params: {}
128 | ode_params:
129 | sampling_method: euler # dopri5 ...
130 | num_steps: &num_steps 50
131 |
132 | optimizer_cfg:
133 | optimizer:
134 | target: torch.optim.AdamW
135 | params:
136 | betas: [0.9, 0.99]
137 | eps: 1.e-6
138 | weight_decay: 1.e-2
139 |
140 | scheduler:
141 | target: hy3dshape.utils.trainings.lr_scheduler.LambdaWarmUpCosineFactorScheduler
142 | params:
143 | warm_up_steps: 50 # 5000
144 | f_start: 1.e-6
145 | f_min: 1.e-3
146 | f_max: 1.0
147 |
148 | pipeline_cfg:
149 | target: hy3dshape.pipelines.Hunyuan3DDiTFlowMatchingPipeline
150 |
151 | image_processor_cfg:
152 | target: hy3dshape.preprocessors.ImageProcessorV2
153 | params: {}
154 |
155 | callbacks:
156 | logger:
157 | target: hy3dshape.utils.trainings.mesh_log_callback.ImageConditionalASLDiffuserLogger
158 | params:
159 | step_frequency: 100 # 10000
160 | num_samples: 1
161 | sample_times: 1
162 | mean: *mean
163 | std: *std
164 | bounds: [-1.01, -1.01, -1.01, 1.01, 1.01, 1.01]
165 | octree_depth: 8
166 | num_chunks: 50000
167 | mc_level: 0.0
168 |
169 | file_loggers:
170 | target: hy3dshape.utils.trainings.mesh_log_callback.ImageConditionalFixASLDiffuserLogger
171 | params:
172 | step_frequency: 50 # 5000
173 | test_data_path: "tools/mini_testset/images.json"
174 |
--------------------------------------------------------------------------------
/workflow_examples/Batch_Generator.json:
--------------------------------------------------------------------------------
1 | {
2 | "id": "5ad9bf67-cabe-4ef4-8e0c-bbeee0fc546f",
3 | "revision": 0,
4 | "last_node_id": 8,
5 | "last_link_id": 3,
6 | "nodes": [
7 | {
8 | "id": 6,
9 | "type": "Hy3D21CameraConfig",
10 | "pos": [
11 | -706.4094848632812,
12 | 305.74383544921875
13 | ],
14 | "size": [
15 | 382.7560729980469,
16 | 133.63636779785156
17 | ],
18 | "flags": {},
19 | "order": 0,
20 | "mode": 0,
21 | "inputs": [],
22 | "outputs": [
23 | {
24 | "name": "camera_config",
25 | "type": "HY3D21CAMERA",
26 | "links": [
27 | 1
28 | ]
29 | }
30 | ],
31 | "properties": {
32 | "aux_id": "visualbruno/ComfyUI-Hunyuan3d-2-1",
33 | "ver": "e439689e4b67fb2af5f487ee26ef3a710be92658",
34 | "Node name for S&R": "Hy3D21CameraConfig",
35 | "widget_ue_connectable": {}
36 | },
37 | "widgets_values": [
38 | "0, 90, 180, 270, 0, 180, 45, 315",
39 | "0, 0, 0, 0, 90, -90, 0, 0",
40 | "1, 0.5, 1, 0.5, 1, 1, 0.1, 0.1",
41 | 1.1000000000000003
42 | ]
43 | },
44 | {
45 | "id": 4,
46 | "type": "Hy3D21MeshGenerationBatch",
47 | "pos": [
48 | -732.0762939453125,
49 | 516.9437255859375
50 | ],
51 | "size": [
52 | 427.05511474609375,
53 | 622
54 | ],
55 | "flags": {},
56 | "order": 1,
57 | "mode": 0,
58 | "inputs": [],
59 | "outputs": [
60 | {
61 | "name": "input_folder",
62 | "type": "STRING",
63 | "links": [
64 | 2
65 | ]
66 | },
67 | {
68 | "name": "output_folder",
69 | "type": "STRING",
70 | "links": [
71 | 3
72 | ]
73 | },
74 | {
75 | "name": "processed_input_images",
76 | "type": "STRING",
77 | "links": null
78 | },
79 | {
80 | "name": "processed_output_meshes",
81 | "type": "STRING",
82 | "links": null
83 | }
84 | ],
85 | "properties": {
86 | "aux_id": "visualbruno/ComfyUI-Hunyuan3d-2-1",
87 | "ver": "a1133c7ff88dd2b8c6a85344ffe7acbaa58ec8d4",
88 | "Node name for S&R": "Hy3D21MeshGenerationBatch",
89 | "widget_ue_connectable": {}
90 | },
91 | "widgets_values": [
92 | "C:\\Travaux\\Test",
93 | "C:\\Travaux\\Test\\3D",
94 | "Hunyuan3D-vae-v2-1-fp16.ckpt",
95 | "hunyuan3d-dit-v2-1-fp16.ckpt",
96 | 50,
97 | 7.5,
98 | "sdpa",
99 | 1.01,
100 | 384,
101 | 128000,
102 | 0,
103 | "dmc",
104 | true,
105 | 200000,
106 | 1388,
107 | "randomize",
108 | true,
109 | "obj",
110 | false,
111 | true,
112 | true,
113 | false
114 | ]
115 | },
116 | {
117 | "id": 5,
118 | "type": "Hy3D21GenerateMultiViewsBatch",
119 | "pos": [
120 | -167.3360137939453,
121 | 428.5770568847656
122 | ],
123 | "size": [
124 | 592.5423583984375,
125 | 464.7333679199219
126 | ],
127 | "flags": {},
128 | "order": 2,
129 | "mode": 0,
130 | "inputs": [
131 | {
132 | "name": "camera_config",
133 | "type": "HY3D21CAMERA",
134 | "link": 1
135 | },
136 | {
137 | "name": "input_images_folder",
138 | "shape": 7,
139 | "type": "STRING",
140 | "widget": {
141 | "name": "input_images_folder"
142 | },
143 | "link": 2
144 | },
145 | {
146 | "name": "input_meshes_folder",
147 | "shape": 7,
148 | "type": "STRING",
149 | "widget": {
150 | "name": "input_meshes_folder"
151 | },
152 | "link": 3
153 | }
154 | ],
155 | "outputs": [
156 | {
157 | "name": "processed_meshes",
158 | "type": "STRING",
159 | "links": null
160 | }
161 | ],
162 | "properties": {
163 | "aux_id": "visualbruno/ComfyUI-Hunyuan3d-2-1",
164 | "ver": "f966762862e112b35dfe2e846bfb153f0dd6cae4",
165 | "Node name for S&R": "Hy3D21GenerateMultiViewsBatch",
166 | "widget_ue_connectable": {}
167 | },
168 | "widgets_values": [
169 | "C:\\Travaux\\Test\\Meshes",
170 | 512,
171 | 10,
172 | 3,
173 | 2048,
174 | true,
175 | 411413629,
176 | "randomize",
177 | true,
178 | false,
179 | true,
180 | "CustomModel",
181 | "003_realSR_BSRGAN_DFO_s64w8_SwinIR-M_x4_GAN.pth",
182 | "",
183 | "",
184 | "",
185 | ""
186 | ]
187 | }
188 | ],
189 | "links": [
190 | [
191 | 1,
192 | 6,
193 | 0,
194 | 5,
195 | 0,
196 | "HY3D21CAMERA"
197 | ],
198 | [
199 | 2,
200 | 4,
201 | 0,
202 | 5,
203 | 1,
204 | "STRING"
205 | ],
206 | [
207 | 3,
208 | 4,
209 | 1,
210 | 5,
211 | 2,
212 | "STRING"
213 | ]
214 | ],
215 | "groups": [],
216 | "config": {},
217 | "extra": {
218 | "ue_links": [],
219 | "ds": {
220 | "scale": 0.826446280991736,
221 | "offset": [
222 | 1020.0376340132016,
223 | -189.85887715515295
224 | ]
225 | },
226 | "links_added_by_ue": [],
227 | "frontendVersion": "1.23.4"
228 | },
229 | "version": 0.4
230 | }
--------------------------------------------------------------------------------
/hy3dshape/configs/hunyuan3ddit-full-params-finetuning-flowmatching-dinog518-bf16-lr1e5-512.yaml:
--------------------------------------------------------------------------------
1 | name: "DiT: Flux large flowmatching; VAE: 1024 token length; ImageEncoder: DINO Giant; ImageSize: 518"
2 |
3 | training:
4 | steps: 10_0000_0000
5 | use_amp: true
6 | amp_type: "bf16"
7 | base_lr: 1.e-5
8 | gradient_clip_val: 1.0
9 | gradient_clip_algorithm: "norm"
10 | every_n_train_steps: 2000 # 5000
11 | val_check_interval: 50 # 4096
12 | limit_val_batches: 16
13 |
14 | dataset:
15 | target: hy3dshape.data.dit_asl.AlignedShapeLatentModule
16 | params:
17 | #! Base setting
18 | batch_size: 4
19 | num_workers: 8
20 | val_num_workers: 4
21 |
22 | # Data
23 | train_data_list: tools/mini_trainset/preprocessed
24 | val_data_list: tools/mini_trainset/preprocessed
25 |
26 | #! Image loading
27 | cond_stage_key: "image" # image / text / image_text
28 | image_size: 518
29 | mean: &mean [0.5, 0.5, 0.5]
30 | std: &std [0.5, 0.5, 0.5]
31 |
32 | #! Point cloud sampling
33 | pc_size: &pc_size 30720
34 | pc_sharpedge_size: &pc_sharpedge_size 30720
35 | sharpedge_label: &sharpedge_label true
36 | return_normal: true
37 |
38 | #! Augmentation
39 | padding: true
40 |
41 | model:
42 | target: hy3dshape.models.diffusion.flow_matching_sit.Diffuser
43 | params:
44 | first_stage_key: "surface"
45 | cond_stage_key: "image"
46 | scale_by_std: false
47 | z_scale_factor: &z_scale_factor 0.9990943042622529 # 1 / 1.0009065167661184
48 | torch_compile: false
49 |
50 | # ema_config:
51 | # ema_model: LitEma
52 | # ema_decay: 0.999
53 | # ema_inference: false
54 |
55 | first_stage_config:
56 | target: hy3dshape.models.autoencoders.ShapeVAE
57 | from_pretrained: tencent/Hunyuan3D-2.1
58 | params:
59 | num_latents: &num_latents 512
60 | embed_dim: 64
61 | num_freqs: 8
62 | include_pi: false
63 | heads: 16
64 | width: 1024
65 | point_feats: 4
66 | num_decoder_layers: 16
67 | pc_size: *pc_size
68 | pc_sharpedge_size: *pc_sharpedge_size
69 | qkv_bias: false
70 | qk_norm: true
71 | scale_factor: *z_scale_factor
72 | geo_decoder_mlp_expand_ratio: 4
73 | geo_decoder_downsample_ratio: 1
74 | geo_decoder_ln_post: true
75 |
76 | cond_stage_config:
77 | target: hy3dshape.models.conditioner.SingleImageEncoder
78 | params:
79 | main_image_encoder:
80 | type: DinoImageEncoder # dino giant
81 | kwargs:
82 | config:
83 | attention_probs_dropout_prob: 0.0
84 | drop_path_rate: 0.0
85 | hidden_act: gelu
86 | hidden_dropout_prob: 0.0
87 | hidden_size: 1536
88 | image_size: 518
89 | initializer_range: 0.02
90 | layer_norm_eps: 1.e-6
91 | layerscale_value: 1.0
92 | mlp_ratio: 4
93 | model_type: dinov2
94 | num_attention_heads: 24
95 | num_channels: 3
96 | num_hidden_layers: 40
97 | patch_size: 14
98 | qkv_bias: true
99 | torch_dtype: float32
100 | use_swiglu_ffn: true
101 | image_size: 518
102 |
103 | denoiser_cfg:
104 | target: hy3dshape.models.denoisers.hunyuan3ddit.Hunyuan3DDiT
105 | params:
106 | ckpt_path: ~/.cache/hy3dgen/tencent/Hunyuan3D-2-1-Shape/dit/model.fp16.ckpt
107 | input_size: *num_latents
108 | context_in_dim: 1536
109 | hidden_size: 1024
110 | mlp_ratio: 4.0
111 | num_heads: 16
112 | depth: 16
113 | depth_single_blocks: 32
114 | axes_dim: [64]
115 | theta: 10000
116 | qkv_bias: true
117 | use_pe: false
118 | force_norm_fp32: true
119 |
120 | scheduler_cfg:
121 | transport:
122 | target: hy3dshape.models.diffusion.transport.create_transport
123 | params:
124 | path_type: Linear
125 | prediction: velocity
126 | sampler:
127 | target: hy3dshape.models.diffusion.transport.Sampler
128 | params: {}
129 | ode_params:
130 | sampling_method: euler # dopri5 ...
131 | num_steps: &num_steps 50
132 |
133 | optimizer_cfg:
134 | optimizer:
135 | target: torch.optim.AdamW
136 | params:
137 | betas: [0.9, 0.99]
138 | eps: 1.e-6
139 | weight_decay: 1.e-2
140 |
141 | scheduler:
142 | target: hy3dshape.utils.trainings.lr_scheduler.LambdaWarmUpCosineFactorScheduler
143 | params:
144 | warm_up_steps: 50 # 5000
145 | f_start: 1.e-6
146 | f_min: 1.e-3
147 | f_max: 1.0
148 |
149 | pipeline_cfg:
150 | target: hy3dshape.pipelines.Hunyuan3DDiTFlowMatchingPipeline
151 |
152 | image_processor_cfg:
153 | target: hy3dshape.preprocessors.ImageProcessorV2
154 | params: {}
155 |
156 | callbacks:
157 | logger:
158 | target: hy3dshape.utils.trainings.mesh_log_callback.ImageConditionalASLDiffuserLogger
159 | params:
160 | step_frequency: 100 # 10000
161 | num_samples: 1
162 | sample_times: 1
163 | mean: *mean
164 | std: *std
165 | bounds: [-1.01, -1.01, -1.01, 1.01, 1.01, 1.01]
166 | octree_depth: 8
167 | num_chunks: 50000
168 | mc_level: 0.0
169 |
170 | file_loggers:
171 | target: hy3dshape.utils.trainings.mesh_log_callback.ImageConditionalFixASLDiffuserLogger
172 | params:
173 | step_frequency: 50 # 5000
174 | test_data_path: "tools/mini_testset/images.json"
175 |
--------------------------------------------------------------------------------
/hy3dshape/configs/hunyuandit-finetuning-flowmatching-dinog518-bf16-lr1e5-4096.yaml:
--------------------------------------------------------------------------------
1 | name: "DiT: Flux large flowmatching; VAE: 1024 token length; ImageEncoder: DINO Giant; ImageSize: 518"
2 |
3 | training:
4 | steps: 10_0000_0000
5 | use_amp: true
6 | amp_type: "bf16"
7 | base_lr: 1e-5
8 | gradient_clip_val: 1.0
9 | gradient_clip_algorithm: "norm"
10 | every_n_train_steps: 2000 # 5000
11 | val_check_interval: 50 # 4096
12 | limit_val_batches: 16
13 |
14 | dataset:
15 | target: hy3dshape.data.dit_asl.AlignedShapeLatentModule
16 | params:
17 | #! Base setting
18 | batch_size: 4
19 | num_workers: 8
20 | val_num_workers: 4
21 |
22 | # Data
23 | train_data_list: tools/mini_trainset/preprocessed
24 | val_data_list: tools/mini_trainset/preprocessed
25 |
26 | #! Image loading
27 | cond_stage_key: "image" # image / text / image_text
28 | image_size: 518
29 | mean: &mean [0.5, 0.5, 0.5]
30 | std: &std [0.5, 0.5, 0.5]
31 |
32 | #! Point cloud sampling
33 | pc_size: &pc_size 81920
34 | pc_sharpedge_size: &pc_sharpedge_size 0
35 | sharpedge_label: &sharpedge_label true
36 | return_normal: true
37 |
38 | #! Augmentation
39 | padding: true
40 |
41 | model:
42 | target: hy3dshape.models.diffusion.flow_matching_sit.Diffuser
43 | params:
44 | first_stage_key: "surface"
45 | cond_stage_key: "image"
46 | scale_by_std: false
47 | z_scale_factor: &z_scale_factor 1.0039506158752403
48 | torch_compile: false
49 |
50 | # ema_config:
51 | # ema_model: LitEma
52 | # ema_decay: 0.999
53 | # ema_inference: false
54 |
55 | first_stage_config:
56 | target: hy3dshape.models.autoencoders.ShapeVAE
57 | from_pretrained: tencent/Hunyuan3D-2.1
58 | params:
59 | num_latents: &num_latents 4096
60 | embed_dim: 64
61 | num_freqs: 8
62 | include_pi: false
63 | heads: 16
64 | width: 1024
65 | num_encoder_layers: 8
66 | num_decoder_layers: 16
67 | qkv_bias: false
68 | qk_norm: true
69 | scale_factor: *z_scale_factor
70 | geo_decoder_mlp_expand_ratio: 4
71 | geo_decoder_downsample_ratio: 1
72 | geo_decoder_ln_post: true
73 | point_feats: 4
74 | pc_size: *pc_size
75 | pc_sharpedge_size: *pc_sharpedge_size
76 |
77 | cond_stage_config:
78 | target: hy3dshape.models.conditioner.SingleImageEncoder
79 | params:
80 | main_image_encoder:
81 | type: DinoImageEncoder # dino large
82 | kwargs:
83 | config:
84 | attention_probs_dropout_prob: 0.0
85 | drop_path_rate: 0.0
86 | hidden_act: gelu
87 | hidden_dropout_prob: 0.0
88 | hidden_size: 1024
89 | image_size: 518
90 | initializer_range: 0.02
91 | layer_norm_eps: 1.e-6
92 | layerscale_value: 1.0
93 | mlp_ratio: 4
94 | model_type: dinov2
95 | num_attention_heads: 16
96 | num_channels: 3
97 | num_hidden_layers: 24
98 | patch_size: 14
99 | qkv_bias: true
100 | torch_dtype: float32
101 | use_swiglu_ffn: false
102 | image_size: 518
103 | use_cls_token: true
104 |
105 |
106 | denoiser_cfg:
107 | target: hy3dshape.models.denoisers.hunyuandit.HunYuanDiTPlain
108 | params:
109 | input_size: *num_latents
110 | in_channels: 64
111 | hidden_size: 2048
112 | context_dim: 1024
113 | depth: 21
114 | num_heads: 16
115 | qk_norm: true
116 | text_len: 1370
117 | with_decoupled_ca: false
118 | use_attention_pooling: false
119 | qk_norm_type: 'rms'
120 | qkv_bias: false
121 | use_pos_emb: false
122 | num_moe_layers: 6
123 | num_experts: 8
124 | moe_top_k: 2
125 |
126 | scheduler_cfg:
127 | transport:
128 | target: hy3dshape.models.diffusion.transport.create_transport
129 | params:
130 | path_type: Linear
131 | prediction: velocity
132 | sampler:
133 | target: hy3dshape.models.diffusion.transport.Sampler
134 | params: {}
135 | ode_params:
136 | sampling_method: euler # dopri5 ...
137 | num_steps: &num_steps 50
138 |
139 | optimizer_cfg:
140 | optimizer:
141 | target: torch.optim.AdamW
142 | params:
143 | betas: [0.9, 0.99]
144 | eps: 1.e-6
145 | weight_decay: 1.e-2
146 |
147 | scheduler:
148 | target: hy3dshape.utils.trainings.lr_scheduler.LambdaWarmUpCosineFactorScheduler
149 | params:
150 | warm_up_steps: 50 # 5000
151 | f_start: 1.e-6
152 | f_min: 1.e-3
153 | f_max: 1.0
154 |
155 | pipeline_cfg:
156 | target: hy3dshape.pipelines.Hunyuan3DDiTFlowMatchingPipeline
157 |
158 | image_processor_cfg:
159 | target: hy3dshape.preprocessors.ImageProcessorV2
160 | params: {}
161 |
162 | callbacks:
163 | logger:
164 | target: hy3dshape.utils.trainings.mesh_log_callback.ImageConditionalASLDiffuserLogger
165 | params:
166 | step_frequency: 100 # 10000
167 | num_samples: 1
168 | sample_times: 1
169 | mean: *mean
170 | std: *std
171 | bounds: [-1.01, -1.01, -1.01, 1.01, 1.01, 1.01]
172 | octree_depth: 8
173 | num_chunks: 50000
174 | mc_level: 0.0
175 |
176 | file_loggers:
177 | target: hy3dshape.utils.trainings.mesh_log_callback.ImageConditionalFixASLDiffuserLogger
178 | params:
179 | step_frequency: 50 # 5000
180 | test_data_path: "tools/mini_testset/images.json"
181 |
--------------------------------------------------------------------------------
/hy3dshape/configs/hunyuandit-mini-overfitting-flowmatching-dinog518-bf16-lr1e4-4096.yaml:
--------------------------------------------------------------------------------
1 | name: "DiT: Flux large flowmatching; VAE: 1024 token length; ImageEncoder: DINO Giant; ImageSize: 518"
2 |
3 | training:
4 | steps: 10_0000_0000
5 | use_amp: true
6 | amp_type: "bf16"
7 | base_lr: 1e-4
8 | gradient_clip_val: 1.0
9 | gradient_clip_algorithm: "norm"
10 | every_n_train_steps: 2000 # 5000
11 | val_check_interval: 50 # 4096
12 | limit_val_batches: 16
13 |
14 | dataset:
15 | target: hy3dshape.data.dit_asl.AlignedShapeLatentModule
16 | params:
17 | #! Base setting
18 | batch_size: 2
19 | num_workers: 8
20 | val_num_workers: 4
21 |
22 | # Data
23 | train_data_list: tools/mini_trainset/preprocessed
24 | val_data_list: tools/mini_trainset/preprocessed
25 |
26 | #! Image loading
27 | cond_stage_key: "image" # image / text / image_text
28 | image_size: 518
29 | mean: &mean [0.5, 0.5, 0.5]
30 | std: &std [0.5, 0.5, 0.5]
31 |
32 | #! Point cloud sampling
33 | pc_size: &pc_size 81920
34 | pc_sharpedge_size: &pc_sharpedge_size 0
35 | sharpedge_label: &sharpedge_label true
36 | return_normal: true
37 |
38 | #! Augmentation
39 | padding: true
40 |
41 | model:
42 | target: hy3dshape.models.diffusion.flow_matching_sit.Diffuser
43 | params:
44 | first_stage_key: "surface"
45 | cond_stage_key: "image"
46 | scale_by_std: false
47 | z_scale_factor: &z_scale_factor 1.0039506158752403
48 | torch_compile: false
49 |
50 | # ema_config:
51 | # ema_model: LitEma
52 | # ema_decay: 0.999
53 | # ema_inference: false
54 |
55 | first_stage_config:
56 | target: hy3dshape.models.autoencoders.ShapeVAE
57 | from_pretrained: tencent/Hunyuan3D-2.1
58 | params:
59 | num_latents: &num_latents 4096
60 | embed_dim: 64
61 | num_freqs: 8
62 | include_pi: false
63 | heads: 16
64 | width: 1024
65 | num_encoder_layers: 8
66 | num_decoder_layers: 16
67 | qkv_bias: false
68 | qk_norm: true
69 | scale_factor: *z_scale_factor
70 | geo_decoder_mlp_expand_ratio: 4
71 | geo_decoder_downsample_ratio: 1
72 | geo_decoder_ln_post: true
73 | point_feats: 4
74 | pc_size: *pc_size
75 | pc_sharpedge_size: *pc_sharpedge_size
76 |
77 | cond_stage_config:
78 | target: hy3dshape.models.conditioner.SingleImageEncoder
79 | params:
80 | main_image_encoder:
81 | type: DinoImageEncoder # dino large
82 | kwargs:
83 | config:
84 | attention_probs_dropout_prob: 0.0
85 | drop_path_rate: 0.0
86 | hidden_act: gelu
87 | hidden_dropout_prob: 0.0
88 | hidden_size: 1024
89 | image_size: 518
90 | initializer_range: 0.02
91 | layer_norm_eps: 1.e-6
92 | layerscale_value: 1.0
93 | mlp_ratio: 4
94 | model_type: dinov2
95 | num_attention_heads: 16
96 | num_channels: 3
97 | num_hidden_layers: 24
98 | patch_size: 14
99 | qkv_bias: true
100 | torch_dtype: float32
101 | use_swiglu_ffn: false
102 | image_size: 518
103 | use_cls_token: true
104 |
105 |
106 | denoiser_cfg:
107 | target: hy3dshape.models.denoisers.hunyuandit.HunYuanDiTPlain
108 | params:
109 | input_size: *num_latents
110 | in_channels: 64
111 | hidden_size: 2048
112 | context_dim: 1024
113 | depth: 11
114 | num_heads: 16
115 | qk_norm: true
116 | text_len: 1370
117 | with_decoupled_ca: false
118 | use_attention_pooling: false
119 | qk_norm_type: 'rms'
120 | qkv_bias: false
121 | use_pos_emb: false
122 | num_moe_layers: 6
123 | num_experts: 8
124 | moe_top_k: 2
125 |
126 | scheduler_cfg:
127 | transport:
128 | target: hy3dshape.models.diffusion.transport.create_transport
129 | params:
130 | path_type: Linear
131 | prediction: velocity
132 | sampler:
133 | target: hy3dshape.models.diffusion.transport.Sampler
134 | params: {}
135 | ode_params:
136 | sampling_method: euler # dopri5 ...
137 | num_steps: &num_steps 50
138 |
139 | optimizer_cfg:
140 | optimizer:
141 | target: torch.optim.AdamW
142 | params:
143 | betas: [0.9, 0.99]
144 | eps: 1.e-6
145 | weight_decay: 1.e-2
146 |
147 | scheduler:
148 | target: hy3dshape.utils.trainings.lr_scheduler.LambdaWarmUpCosineFactorScheduler
149 | params:
150 | warm_up_steps: 50 # 5000
151 | f_start: 1.e-6
152 | f_min: 1.e-3
153 | f_max: 1.0
154 |
155 | pipeline_cfg:
156 | target: hy3dshape.pipelines.Hunyuan3DDiTFlowMatchingPipeline
157 |
158 | image_processor_cfg:
159 | target: hy3dshape.preprocessors.ImageProcessorV2
160 | params: {}
161 |
162 | callbacks:
163 | logger:
164 | target: hy3dshape.utils.trainings.mesh_log_callback.ImageConditionalASLDiffuserLogger
165 | params:
166 | step_frequency: 100 # 10000
167 | num_samples: 1
168 | sample_times: 1
169 | mean: *mean
170 | std: *std
171 | bounds: [-1.01, -1.01, -1.01, 1.01, 1.01, 1.01]
172 | octree_depth: 8
173 | num_chunks: 50000
174 | mc_level: 0.0
175 |
176 | file_loggers:
177 | target: hy3dshape.utils.trainings.mesh_log_callback.ImageConditionalFixASLDiffuserLogger
178 | params:
179 | step_frequency: 50 # 5000
180 | test_data_path: "tools/mini_testset/images.json"
181 |
--------------------------------------------------------------------------------
/hy3dshape/configs/hunyuandit-mini-overfitting-flowmatching-dinog518-bf16-lr1e4-512.yaml:
--------------------------------------------------------------------------------
1 | name: "DiT: Flux large flowmatching; VAE: 1024 token length; ImageEncoder: DINO Giant; ImageSize: 518"
2 |
3 | training:
4 | steps: 10_0000_0000
5 | use_amp: true
6 | amp_type: "bf16"
7 | base_lr: 1e-4
8 | gradient_clip_val: 1.0
9 | gradient_clip_algorithm: "norm"
10 | every_n_train_steps: 2000 # 5000
11 | val_check_interval: 50 # 4096
12 | limit_val_batches: 16
13 |
14 | dataset:
15 | target: hy3dshape.data.dit_asl.AlignedShapeLatentModule
16 | params:
17 | #! Base setting
18 | batch_size: 2
19 | num_workers: 8
20 | val_num_workers: 4
21 |
22 | # Data
23 | train_data_list: tools/mini_trainset/preprocessed
24 | val_data_list: tools/mini_trainset/preprocessed
25 |
26 | #! Image loading
27 | cond_stage_key: "image" # image / text / image_text
28 | image_size: 518
29 | mean: &mean [0.5, 0.5, 0.5]
30 | std: &std [0.5, 0.5, 0.5]
31 |
32 | #! Point cloud sampling
33 | pc_size: &pc_size 81920
34 | pc_sharpedge_size: &pc_sharpedge_size 0
35 | sharpedge_label: &sharpedge_label true
36 | return_normal: true
37 |
38 | #! Augmentation
39 | padding: true
40 |
41 | model:
42 | target: hy3dshape.models.diffusion.flow_matching_sit.Diffuser
43 | params:
44 | first_stage_key: "surface"
45 | cond_stage_key: "image"
46 | scale_by_std: false
47 | z_scale_factor: &z_scale_factor 1.0039506158752403
48 | torch_compile: false
49 |
50 | # ema_config:
51 | # ema_model: LitEma
52 | # ema_decay: 0.999
53 | # ema_inference: false
54 |
55 | first_stage_config:
56 | target: hy3dshape.models.autoencoders.ShapeVAE
57 | from_pretrained: tencent/Hunyuan3D-2.1
58 | params:
59 | num_latents: &num_latents 512
60 | embed_dim: 64
61 | num_freqs: 8
62 | include_pi: false
63 | heads: 16
64 | width: 1024
65 | num_encoder_layers: 8
66 | num_decoder_layers: 16
67 | qkv_bias: false
68 | qk_norm: true
69 | scale_factor: *z_scale_factor
70 | geo_decoder_mlp_expand_ratio: 4
71 | geo_decoder_downsample_ratio: 1
72 | geo_decoder_ln_post: true
73 | point_feats: 4
74 | pc_size: *pc_size
75 | pc_sharpedge_size: *pc_sharpedge_size
76 |
77 | cond_stage_config:
78 | target: hy3dshape.models.conditioner.SingleImageEncoder
79 | params:
80 | main_image_encoder:
81 | type: DinoImageEncoder # dino large
82 | kwargs:
83 | config:
84 | attention_probs_dropout_prob: 0.0
85 | drop_path_rate: 0.0
86 | hidden_act: gelu
87 | hidden_dropout_prob: 0.0
88 | hidden_size: 1024
89 | image_size: 518
90 | initializer_range: 0.02
91 | layer_norm_eps: 1.e-6
92 | layerscale_value: 1.0
93 | mlp_ratio: 4
94 | model_type: dinov2
95 | num_attention_heads: 16
96 | num_channels: 3
97 | num_hidden_layers: 24
98 | patch_size: 14
99 | qkv_bias: true
100 | torch_dtype: float32
101 | use_swiglu_ffn: false
102 | image_size: 518
103 | use_cls_token: true
104 |
105 |
106 | denoiser_cfg:
107 | target: hy3dshape.models.denoisers.hunyuandit.HunYuanDiTPlain
108 | params:
109 | input_size: *num_latents
110 | in_channels: 64
111 | hidden_size: 768
112 | context_dim: 1024
113 | depth: 6
114 | num_heads: 12
115 | qk_norm: true
116 | text_len: 1370
117 | with_decoupled_ca: false
118 | use_attention_pooling: false
119 | qk_norm_type: 'rms'
120 | qkv_bias: false
121 | use_pos_emb: false
122 | num_moe_layers: 3
123 | num_experts: 4
124 | moe_top_k: 2
125 |
126 | scheduler_cfg:
127 | transport:
128 | target: hy3dshape.models.diffusion.transport.create_transport
129 | params:
130 | path_type: Linear
131 | prediction: velocity
132 | sampler:
133 | target: hy3dshape.models.diffusion.transport.Sampler
134 | params: {}
135 | ode_params:
136 | sampling_method: euler # dopri5 ...
137 | num_steps: &num_steps 50
138 |
139 | optimizer_cfg:
140 | optimizer:
141 | target: torch.optim.AdamW
142 | params:
143 | betas: [0.9, 0.99]
144 | eps: 1.e-6
145 | weight_decay: 1.e-2
146 |
147 | scheduler:
148 | target: hy3dshape.utils.trainings.lr_scheduler.LambdaWarmUpCosineFactorScheduler
149 | params:
150 | warm_up_steps: 50 # 5000
151 | f_start: 1.e-6
152 | f_min: 1.e-3
153 | f_max: 1.0
154 |
155 | pipeline_cfg:
156 | target: hy3dshape.pipelines.Hunyuan3DDiTFlowMatchingPipeline
157 |
158 | image_processor_cfg:
159 | target: hy3dshape.preprocessors.ImageProcessorV2
160 | params: {}
161 |
162 | callbacks:
163 | logger:
164 | target: hy3dshape.utils.trainings.mesh_log_callback.ImageConditionalASLDiffuserLogger
165 | params:
166 | step_frequency: 100 # 10000
167 | num_samples: 1
168 | sample_times: 1
169 | mean: *mean
170 | std: *std
171 | bounds: [-1.01, -1.01, -1.01, 1.01, 1.01, 1.01]
172 | octree_depth: 8
173 | num_chunks: 50000
174 | mc_level: 0.0
175 |
176 | file_loggers:
177 | target: hy3dshape.utils.trainings.mesh_log_callback.ImageConditionalFixASLDiffuserLogger
178 | params:
179 | step_frequency: 50 # 5000
180 | test_data_path: "tools/mini_testset/images.json"
181 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # 🌀 ComfyUI Wrapper for [Hunyuan3D-2.1](https://github.com/Tencent-Hunyuan/Hunyuan3D-2.1)
2 |
3 | > **ComfyUI integration** for Tencent's powerful **Hunyuan3D-2.1** model. Supports textured 3D generation with optional high-quality UV mapping.
4 |
5 | ---
6 |
7 | ## 📦 Repository & Models
8 |
9 | * **GitHub:** [Tencent-Hunyuan/Hunyuan3D-2.1](https://github.com/Tencent-Hunyuan/Hunyuan3D-2.1)
10 | * **Model Weights (HuggingFace):**
11 | 👉 [Main page](https://huggingface.co/tencent/Hunyuan3D-2.1/tree/main)
12 |
13 | ### 🔧 Required Checkpoints
14 |
15 | Place the following checkpoints into the corresponding folders under your `ComfyUI` directory:
16 |
17 | ```
18 | ComfyUI/
19 | ├── models/
20 | │ ├── diffusion_models/
21 | │ │ └── hunyuan3d-dit-v2-1.ckpt
22 | │ ├── vae/
23 | │ │ └── hunyuan3d-vae-v2-1.ckpt
24 | ```
25 |
26 | ---
27 |
28 | ## ⚙️ Installation Guide
29 |
30 | > Tested on **Windows 11** with **Python 3.12** and **Torch >= 2.6.0 + cu126**. Compatible with the latest ComfyUI Portable release.
31 |
32 | ### 1. Install Python Dependencies
33 |
34 | For a standard Python environment:
35 |
36 | ```bash
37 | python -m pip install -r ComfyUI/custom_nodes/ComfyUI-Hunyuan3DWrapper/requirements.txt
38 | ```
39 |
40 | For **ComfyUI Portable**:
41 |
42 | ```bash
43 | python_embeded\python.exe -m pip install -r ComfyUI\custom_nodes\ComfyUI-Hunyuan3d-2-1\requirements.txt
44 | ```
45 |
46 | ---
47 |
48 | ### 2. Install or Compile Texture Generation Modules
49 |
50 | Two critical C++ extensions need to be installed: the **custom rasterizer** and the **differentiable renderer**.
51 |
52 | #### Option A: Use Precompiled Wheels (Recommended)
53 |
54 | #### Custom Rasterizer
55 |
56 | You will find precompiled wheels in `hy3dpain\custom_rasterizer\dist` folder
57 |
58 | For standard Python:
59 |
60 | Example, if you are on Python 3.12:
61 |
62 | ```bash
63 | pip install custom_rasterizer-0.1-cp312-cp312-win_amd64.whl
64 | ```
65 |
66 | For ComfyUI Portable:
67 |
68 | ```bash
69 | python_embeded\python.exe -m pip install ComfyUI\custom_nodes\ComfyUI-Hunyuan3d-2-1\hy3dpaint\custom_rasterizer\dist\custom_rasterizer-0.1-cp312-cp312-win_amd64.whl
70 | ```
71 |
72 | #### Differentiable Renderer
73 |
74 | You will find precompiled wheels in `hy3dpaint\DifferentiableRenderer\dist` folder
75 |
76 | For standard Python:
77 |
78 | Example, if you are on Python 3.12:
79 |
80 | ```bash
81 | pip install mesh_inpaint_processor-0.0.0-cp312-cp312-win_amd64.whl.whl
82 | ```
83 |
84 | For ComfyUI Portable:
85 |
86 | ```bash
87 | python_embeded\python.exe -m pip install ComfyUI\custom_nodes\ComfyUI-Hunyuan3d-2-1\hy3dpaint\DifferentiableRenderer\dist\mesh_inpaint_processor-0.0.0-cp312-cp312-win_amd64.whl.whl
88 | ```
89 |
90 | ---
91 |
92 | #### Option B: Manual Compilation (for advanced users)
93 |
94 | ```bash
95 | # Compile custom rasterizer
96 | cd ComfyUI/custom_nodes/ComfyUI-Hunyuan3d-2-1/hy3dpaint/custom_rasterizer
97 | python setup.py install
98 |
99 | # Compile differentiable renderer
100 | cd ../DifferentiableRenderer
101 | python setup.py install
102 | ```
103 |
104 | ---
105 |
106 | ## 🩻 Optional: Fix UV Wrapping for High Poly Meshes (Patched Xatlas)
107 |
108 | This upgrade improves UV unwrapping stability for complex meshes.
109 |
110 | ```bash
111 | # Step 1: Uninstall existing xatlas
112 | python_embeded\python.exe -m pip uninstall xatlas
113 |
114 | # Step 2: Clone updated xatlas-python wrapper
115 | cd ComfyUI_windows_portable
116 | git clone --recursive https://github.com/mworchel/xatlas-python.git
117 |
118 | # Step 3: Replace internal xatlas source
119 | cd xatlas-python\extern
120 | del /s /q xatlas
121 | git clone --recursive https://github.com/jpcy/xatlas
122 |
123 | # Step 4: Patch source file
124 | # In xatlas-python/extern/xatlas/source/xatlas/xatlas.cpp:
125 | Line 6774: change `#if 0` → `//#if 0`
126 | Line 6778: change `#endif` → `//#endif`
127 |
128 | # Step 5: Install patched xatlas wrapper
129 | cd ../../..
130 | python_embeded\python.exe -m pip install .\xatlas-python\
131 | ```
132 |
133 | ```python
134 | python_embeded\python.exe -m pip uninstall -y xatlas; `
135 | cd ComfyUI_windows_portable; `
136 | if (Test-Path xatlas-python) { Remove-Item xatlas-python -Recurse -Force }; `
137 | git clone --recursive https://github.com/mworchel/xatlas-python.git; `
138 | cd xatlas-python\extern; `
139 | if (Test-Path xatlas) { Remove-Item xatlas -Recurse -Force }; `
140 | git clone --recursive https://github.com/jpcy/xatlas; `
141 | (Get-Content .\xatlas\source\xatlas\xatlas.cpp) -replace '#if 0', '//#if 0' -replace '#endif', '//#endif' | Set-Content .\xatlas\source\xatlas\xatlas.cpp; `
142 | cd ..\..\..; `
143 | python_embeded\python.exe -m pip install .\xatlas-python\
144 | ```
145 |
146 | ---
147 |
148 | ## 📂 Directory Overview
149 |
150 | ```
151 | ComfyUI/
152 | ├── custom_nodes/
153 | │ └── ComfyUI-Hunyuan3d-2-1/
154 | │ ├── hy3dpaint/
155 | │ │ ├── custom_rasterizer/ # Custom rasterizer module
156 | │ │ │ ├── setup.py
157 | │ │ │ └── dist/ # Precompiled wheels
158 | │ │ ├── DifferentiableRenderer/ # Differentiable renderer
159 | │ │ │ ├── setup.py
160 | │ │ │ └── dist/ # Precompiled wheels
161 | ├── models/
162 | │ ├── diffusion_models/
163 | │ │ └── [hunyuan3d-dit-v2-1.ckpt](https://huggingface.co/tencent/Hunyuan3D-2.1/tree/main/hunyuan3d-dit-v2-1)
164 | │ └── vae/
165 | │ └── [hunyuan3d-vae-v2-1.ckpt](https://huggingface.co/tencent/Hunyuan3D-2.1/tree/main/hunyuan3d-vae-v2-1)
166 | ├── xatlas-python/ # Patched UV unwrapper (optional)
167 | │ └── extern/
168 | │ └── xatlas/
169 | ```
170 |
171 | ---
172 |
173 | ## 🙏 Acknowledgements
174 |
175 | * **[kijai](https://github.com/kijai/ComfyUI-Hunyuan3DWrapper)** — Original wrapper developer for Hunyuan3D v2.0
176 | * TrueMike, Agee, Palindar, and the vibrant Discord community
177 | * Tencent team for the incredible [Hunyuan3D-2.1](https://github.com/Tencent-Hunyuan/Hunyuan3D-2.1) model
178 |
--------------------------------------------------------------------------------
/hy3dshape/hy3dshape/data/utils.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Copyright (c) 2017-2021 NVIDIA CORPORATION. All rights reserved.
4 | # This file is part of the WebDataset library.
5 | # See the LICENSE file for licensing terms (BSD-style).
6 |
7 |
8 | """Miscellaneous utility functions."""
9 |
10 | import importlib
11 | import itertools as itt
12 | import os
13 | import re
14 | import sys
15 | from typing import Any, Callable, Iterator, Union
16 | import torch
17 | import numpy as np
18 |
19 |
20 | def make_seed(*args):
21 | seed = 0
22 | for arg in args:
23 | seed = (seed * 31 + hash(arg)) & 0x7FFFFFFF
24 | return seed
25 |
26 |
27 | class PipelineStage:
28 | def invoke(self, *args, **kw):
29 | raise NotImplementedError
30 |
31 |
32 | def identity(x: Any) -> Any:
33 | """Return the argument as is."""
34 | return x
35 |
36 |
37 | def safe_eval(s: str, expr: str = "{}"):
38 | """Evaluate the given expression more safely."""
39 | if re.sub("[^A-Za-z0-9_]", "", s) != s:
40 | raise ValueError(f"safe_eval: illegal characters in: '{s}'")
41 | return eval(expr.format(s))
42 |
43 |
44 | def lookup_sym(sym: str, modules: list):
45 | """Look up a symbol in a list of modules."""
46 | for mname in modules:
47 | module = importlib.import_module(mname, package="webdataset")
48 | result = getattr(module, sym, None)
49 | if result is not None:
50 | return result
51 | return None
52 |
53 |
54 | def repeatedly0(
55 | loader: Iterator, nepochs: int = sys.maxsize, nbatches: int = sys.maxsize
56 | ):
57 | """Repeatedly returns batches from a DataLoader."""
58 | for _ in range(nepochs):
59 | yield from itt.islice(loader, nbatches)
60 |
61 |
62 | def guess_batchsize(batch: Union[tuple, list]):
63 | """Guess the batch size by looking at the length of the first element in a tuple."""
64 | return len(batch[0])
65 |
66 |
67 | def repeatedly(
68 | source: Iterator,
69 | nepochs: int = None,
70 | nbatches: int = None,
71 | nsamples: int = None,
72 | batchsize: Callable[..., int] = guess_batchsize,
73 | ):
74 | """Repeatedly yield samples from an iterator."""
75 | epoch = 0
76 | batch = 0
77 | total = 0
78 | while True:
79 | for sample in source:
80 | yield sample
81 | batch += 1
82 | if nbatches is not None and batch >= nbatches:
83 | return
84 | if nsamples is not None:
85 | total += guess_batchsize(sample)
86 | if total >= nsamples:
87 | return
88 | epoch += 1
89 | if nepochs is not None and epoch >= nepochs:
90 | return
91 |
92 |
93 | def pytorch_worker_info(group=None): # sourcery skip: use-contextlib-suppress
94 | """Return node and worker info for PyTorch and some distributed environments."""
95 | rank = 0
96 | world_size = 1
97 | worker = 0
98 | num_workers = 1
99 | if "RANK" in os.environ and "WORLD_SIZE" in os.environ:
100 | rank = int(os.environ["RANK"])
101 | world_size = int(os.environ["WORLD_SIZE"])
102 | else:
103 | try:
104 | import torch.distributed
105 |
106 | if torch.distributed.is_available() and torch.distributed.is_initialized():
107 | group = group or torch.distributed.group.WORLD
108 | rank = torch.distributed.get_rank(group=group)
109 | world_size = torch.distributed.get_world_size(group=group)
110 | except ModuleNotFoundError:
111 | pass
112 | if "WORKER" in os.environ and "NUM_WORKERS" in os.environ:
113 | worker = int(os.environ["WORKER"])
114 | num_workers = int(os.environ["NUM_WORKERS"])
115 | else:
116 | try:
117 | import torch.utils.data
118 |
119 | worker_info = torch.utils.data.get_worker_info()
120 | if worker_info is not None:
121 | worker = worker_info.id
122 | num_workers = worker_info.num_workers
123 | except ModuleNotFoundError:
124 | pass
125 |
126 | return rank, world_size, worker, num_workers
127 |
128 |
129 | def pytorch_worker_seed(group=None):
130 | """Compute a distinct, deterministic RNG seed for each worker and node."""
131 | rank, world_size, worker, num_workers = pytorch_worker_info(group=group)
132 | return rank * 1000 + worker
133 |
134 | def worker_init_fn(_):
135 | worker_info = torch.utils.data.get_worker_info()
136 | worker_id = worker_info.id
137 |
138 | # dataset = worker_info.dataset
139 | # split_size = dataset.num_records // worker_info.num_workers
140 | # # reset num_records to the true number to retain reliable length information
141 | # dataset.sample_ids = dataset.valid_ids[worker_id * split_size:(worker_id + 1) * split_size]
142 | # current_id = np.random.choice(len(np.random.get_state()[1]), 1)
143 | # return np.random.seed(np.random.get_state()[1][current_id] + worker_id)
144 |
145 | return np.random.seed(np.random.get_state()[1][0] + worker_id)
146 |
147 |
148 | def collation_fn(samples, combine_tensors=True, combine_scalars=True):
149 | """
150 |
151 | Args:
152 | samples (list[dict]):
153 | combine_tensors:
154 | combine_scalars:
155 |
156 | Returns:
157 |
158 | """
159 |
160 | result = {}
161 |
162 | keys = samples[0].keys()
163 |
164 | for key in keys:
165 | result[key] = []
166 |
167 | for sample in samples:
168 | for key in keys:
169 | val = sample[key]
170 | result[key].append(val)
171 |
172 | for key in keys:
173 | val_list = result[key]
174 | if isinstance(val_list[0], (int, float)):
175 | if combine_scalars:
176 | result[key] = np.array(result[key])
177 |
178 | elif isinstance(val_list[0], torch.Tensor):
179 | if combine_tensors:
180 | result[key] = torch.stack(val_list)
181 |
182 | elif isinstance(val_list[0], np.ndarray):
183 | if combine_tensors:
184 | result[key] = np.stack(val_list)
185 |
186 | return result
187 |
--------------------------------------------------------------------------------
/hy3dpaint/utils/multiview_utils.py:
--------------------------------------------------------------------------------
1 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
2 | # except for the third-party components listed below.
3 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined
4 | # in the repsective licenses of these third-party components.
5 | # Users must comply with all terms and conditions of original licenses of these third-party
6 | # components and must ensure that the usage of the third party components adheres to
7 | # all relevant laws and regulations.
8 |
9 | # For avoidance of doubts, Hunyuan 3D means the large language models and
10 | # their software and algorithms, including trained model weights, parameters (including
11 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
12 | # fine-tuning enabling code and other elements of the foregoing made publicly available
13 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
14 |
15 | import os
16 | import torch
17 | import random
18 | import numpy as np
19 | from PIL import Image
20 | from typing import List
21 | import huggingface_hub
22 | from omegaconf import OmegaConf
23 | from diffusers import DiffusionPipeline
24 | from diffusers import EulerAncestralDiscreteScheduler, DDIMScheduler, UniPCMultistepScheduler
25 | from ..hunyuanpaintpbr.pipeline import HunyuanPaintPipeline
26 |
27 |
28 | class multiviewDiffusionNet:
29 | def __init__(self, config) -> None:
30 | self.device = config.device
31 |
32 | cfg_path = config.multiview_cfg_path
33 | custom_pipeline = config.custom_pipeline
34 | cfg = OmegaConf.load(cfg_path)
35 | self.cfg = cfg
36 | self.mode = self.cfg.model.params.stable_diffusion_config.custom_pipeline[2:]
37 |
38 | model_path = huggingface_hub.snapshot_download(
39 | repo_id=config.multiview_pretrained_path,
40 | allow_patterns=["hunyuan3d-paintpbr-v2-1/*"],
41 | )
42 |
43 | model_path = os.path.join(model_path, "hunyuan3d-paintpbr-v2-1")
44 |
45 | pipeline = HunyuanPaintPipeline.from_pretrained(
46 | model_path,
47 | torch_dtype=torch.float16
48 | )
49 |
50 | pipeline.scheduler = EulerAncestralDiscreteScheduler.from_config(pipeline.scheduler.config, timestep_spacing="trailing")
51 | pipeline.set_progress_bar_config(disable=False)
52 | pipeline.eval()
53 | setattr(pipeline, "view_size", cfg.model.params.get("view_size", 320))
54 | pipeline.enable_model_cpu_offload()
55 | self.pipeline = pipeline.to(self.device)
56 | self.pipeline.enable_vae_slicing()
57 | self.pipeline.enable_vae_tiling()
58 |
59 | if hasattr(self.pipeline.unet, "use_dino") and self.pipeline.unet.use_dino:
60 | from ..hunyuanpaintpbr.unet.modules import Dino_v2
61 | self.dino_v2 = Dino_v2(config.dino_ckpt_path).to(torch.float16)
62 | self.dino_v2 = self.dino_v2.to(self.device)
63 |
64 | def seed_everything(self, seed):
65 | random.seed(seed)
66 | np.random.seed(seed)
67 | torch.manual_seed(seed)
68 | os.environ["PL_GLOBAL_SEED"] = str(seed)
69 |
70 | @torch.no_grad()
71 | def __call__(self, images, conditions, prompt=None, custom_view_size=None, resize_input=False, num_steps=10, guidance_scale=3.0, seed=0):
72 | pils = self.forward_one(
73 | images, conditions, prompt=prompt, custom_view_size=custom_view_size, resize_input=resize_input, num_steps=num_steps, guidance_scale=guidance_scale, seed=seed
74 | )
75 | return pils
76 |
77 | def forward_one(self, input_images, control_images, prompt=None, custom_view_size=None, resize_input=False, num_steps=10, guidance_scale=3.0, seed=0):
78 | self.seed_everything(seed)
79 | custom_view_size = custom_view_size if custom_view_size is not None else self.pipeline.view_size
80 |
81 | if not isinstance(input_images, List):
82 | input_images = [input_images]
83 |
84 | if not resize_input:
85 | input_images = [
86 | input_image.resize((self.pipeline.view_size, self.pipeline.view_size)) for input_image in input_images
87 | ]
88 | else:
89 | input_images = [input_image.resize((custom_view_size, custom_view_size)) for input_image in input_images]
90 |
91 | for i in range(len(control_images)):
92 | control_images[i] = control_images[i].resize((custom_view_size, custom_view_size))
93 | if control_images[i].mode == "L":
94 | control_images[i] = control_images[i].point(lambda x: 255 if x > 1 else 0, mode="1")
95 | kwargs = dict(generator=torch.Generator(device=self.pipeline.device).manual_seed(0))
96 |
97 | num_view = len(control_images) // 2
98 | normal_image = [[control_images[i] for i in range(num_view)]]
99 | position_image = [[control_images[i + num_view] for i in range(num_view)]]
100 |
101 | kwargs["width"] = custom_view_size
102 | kwargs["height"] = custom_view_size
103 | kwargs["num_in_batch"] = num_view
104 | kwargs["images_normal"] = normal_image
105 | kwargs["images_position"] = position_image
106 |
107 | if hasattr(self.pipeline.unet, "use_dino") and self.pipeline.unet.use_dino:
108 | dino_hidden_states = self.dino_v2(input_images[0])
109 | kwargs["dino_hidden_states"] = dino_hidden_states
110 |
111 | sync_condition = None
112 |
113 | infer_steps_dict = {
114 | "EulerAncestralDiscreteScheduler": 10,
115 | "UniPCMultistepScheduler": 10,
116 | "DDIMScheduler": 10,
117 | "ShiftSNRScheduler": 10,
118 | }
119 |
120 | mvd_image = self.pipeline(
121 | input_images[0:1],
122 | num_inference_steps=num_steps,
123 | prompt=prompt,
124 | sync_condition=sync_condition,
125 | guidance_scale=guidance_scale,
126 | **kwargs,
127 | ).images
128 |
129 | if "pbr" in self.mode:
130 | mvd_image = {"albedo": mvd_image[:num_view], "mr": mvd_image[num_view:]}
131 | # mvd_image = {'albedo':mvd_image[:num_view]}
132 | else:
133 | mvd_image = {"hdr": mvd_image}
134 |
135 | return mvd_image
136 |
--------------------------------------------------------------------------------
/hy3dpaint/utils/pipeline_utils.py:
--------------------------------------------------------------------------------
1 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
2 | # except for the third-party components listed below.
3 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined
4 | # in the repsective licenses of these third-party components.
5 | # Users must comply with all terms and conditions of original licenses of these third-party
6 | # components and must ensure that the usage of the third party components adheres to
7 | # all relevant laws and regulations.
8 |
9 | # For avoidance of doubts, Hunyuan 3D means the large language models and
10 | # their software and algorithms, including trained model weights, parameters (including
11 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
12 | # fine-tuning enabling code and other elements of the foregoing made publicly available
13 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
14 |
15 | import torch
16 | import numpy as np
17 |
18 |
19 | class ViewProcessor:
20 | def __init__(self, config, render):
21 | self.config = config
22 | self.render = render
23 |
24 | def render_normal_multiview(self, camera_elevs, camera_azims, use_abs_coor=True):
25 | normal_maps = []
26 | for elev, azim in zip(camera_elevs, camera_azims):
27 | normal_map = self.render.render_normal(elev, azim, use_abs_coor=use_abs_coor, return_type="pl")
28 | normal_maps.append(normal_map)
29 |
30 | return normal_maps
31 |
32 | def render_position_multiview(self, camera_elevs, camera_azims):
33 | position_maps = []
34 | for elev, azim in zip(camera_elevs, camera_azims):
35 | position_map = self.render.render_position(elev, azim, return_type="pl")
36 | position_maps.append(position_map)
37 |
38 | return position_maps
39 |
40 | def bake_view_selection(
41 | self, candidate_camera_elevs, candidate_camera_azims, candidate_view_weights, max_selected_view_num
42 | ):
43 |
44 | original_resolution = self.render.default_resolution
45 | self.render.set_default_render_resolution(1024)
46 |
47 | selected_camera_elevs = []
48 | selected_camera_azims = []
49 | selected_view_weights = []
50 | selected_alpha_maps = []
51 | viewed_tri_idxs = []
52 | viewed_masks = []
53 |
54 | # 计算每个三角片的面积
55 | face_areas = self.render.get_face_areas(from_one_index=True)
56 | total_area = face_areas.sum()
57 | face_area_ratios = face_areas / total_area
58 |
59 | candidate_view_num = len(candidate_camera_elevs)
60 | self.render.set_boundary_unreliable_scale(2)
61 |
62 | for elev, azim in zip(candidate_camera_elevs, candidate_camera_azims):
63 | viewed_tri_idx = self.render.render_alpha(elev, azim, return_type="np")
64 | viewed_tri_idxs.append(set(np.unique(viewed_tri_idx.flatten())))
65 | viewed_masks.append(viewed_tri_idx[0, :, :, 0] > 0)
66 |
67 | is_selected = [False for _ in range(candidate_view_num)]
68 | total_viewed_tri_idxs = set()
69 | total_viewed_area = 0.0
70 |
71 | for idx in range(6):
72 | selected_camera_elevs.append(candidate_camera_elevs[idx])
73 | selected_camera_azims.append(candidate_camera_azims[idx])
74 | selected_view_weights.append(candidate_view_weights[idx])
75 | selected_alpha_maps.append(viewed_masks[idx])
76 | is_selected[idx] = True
77 | total_viewed_tri_idxs.update(viewed_tri_idxs[idx])
78 |
79 | total_viewed_area = face_area_ratios[list(total_viewed_tri_idxs)].sum()
80 | for iter in range(max_selected_view_num - len(selected_view_weights)):
81 | max_inc = 0
82 | max_idx = -1
83 |
84 | for idx, (elev, azim, weight) in enumerate(
85 | zip(candidate_camera_elevs, candidate_camera_azims, candidate_view_weights)
86 | ):
87 | if is_selected[idx]:
88 | continue
89 | new_tri_idxs = viewed_tri_idxs[idx] - total_viewed_tri_idxs
90 | new_inc_area = face_area_ratios[list(new_tri_idxs)].sum()
91 |
92 | if new_inc_area > max_inc:
93 | max_inc = new_inc_area
94 | max_idx = idx
95 |
96 | if max_inc > 0.0001:
97 | is_selected[max_idx] = True
98 | selected_camera_elevs.append(candidate_camera_elevs[max_idx])
99 | selected_camera_azims.append(candidate_camera_azims[max_idx])
100 | selected_view_weights.append(candidate_view_weights[max_idx])
101 | selected_alpha_maps.append(viewed_masks[max_idx])
102 | total_viewed_tri_idxs = total_viewed_tri_idxs.union(viewed_tri_idxs[max_idx])
103 | total_viewed_area += max_inc
104 | else:
105 | break
106 |
107 | self.render.set_default_render_resolution(original_resolution)
108 |
109 | return selected_camera_elevs, selected_camera_azims, selected_view_weights
110 |
111 | def bake_from_multiview(self, views, camera_elevs, camera_azims, view_weights):
112 | project_textures, project_weighted_cos_maps = [], []
113 | project_boundary_maps = []
114 |
115 | for view, camera_elev, camera_azim, weight in zip(views, camera_elevs, camera_azims, view_weights):
116 | project_texture, project_cos_map, project_boundary_map = self.render.back_project(
117 | view, camera_elev, camera_azim
118 | )
119 | project_cos_map = weight * (project_cos_map**self.config.bake_exp)
120 | project_textures.append(project_texture)
121 | project_weighted_cos_maps.append(project_cos_map)
122 | project_boundary_maps.append(project_boundary_map)
123 | texture, ori_trust_map = self.render.fast_bake_texture(project_textures, project_weighted_cos_maps)
124 | return texture, ori_trust_map > 1e-8
125 |
126 | def texture_inpaint(self, texture, mask, vertex_inpaint=True, method="NS", default=None, ):
127 | if default is not None:
128 | mask = mask.astype(bool)
129 | inpaint_value = torch.tensor(default, dtype=texture.dtype, device=texture.device)
130 | texture[~mask] = inpaint_value
131 | else:
132 | texture_np = self.render.uv_inpaint(texture, mask, vertex_inpaint, method)
133 | texture = torch.tensor(texture_np / 255).float().to(texture.device)
134 |
135 | return texture
136 |
--------------------------------------------------------------------------------
/hy3dpaint/src/data/dataloader/objaverse_loader_forTexturePBR.py:
--------------------------------------------------------------------------------
1 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
2 | # except for the third-party components listed below.
3 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined
4 | # in the repsective licenses of these third-party components.
5 | # Users must comply with all terms and conditions of original licenses of these third-party
6 | # components and must ensure that the usage of the third party components adheres to
7 | # all relevant laws and regulations.
8 |
9 | # For avoidance of doubts, Hunyuan 3D means the large language models and
10 | # their software and algorithms, including trained model weights, parameters (including
11 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
12 | # fine-tuning enabling code and other elements of the foregoing made publicly available
13 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
14 |
15 | import os
16 | import time
17 | import glob
18 | import json
19 | import random
20 | import numpy as np
21 | import torch
22 | from .loader_util import BaseDataset
23 |
24 |
25 | class TextureDataset(BaseDataset):
26 |
27 | def __init__(
28 | self, json_path, num_view=6, image_size=512, lighting_suffix_pool=["light_PL", "light_AL", "light_ENVMAP"]
29 | ):
30 | self.data = list()
31 | self.num_view = num_view
32 | self.image_size = image_size
33 | self.lighting_suffix_pool = lighting_suffix_pool
34 | if isinstance(json_path, str):
35 | json_path = [json_path]
36 | for jp in json_path:
37 | with open(jp) as f:
38 | self.data.extend(json.load(f))
39 | print("============= length of dataset %d =============" % len(self.data))
40 |
41 | def __getitem__(self, index):
42 | try_sleep_interval = 20
43 | total_try_num = 100
44 | cnt = try_sleep_interval * total_try_num
45 | # try:
46 | images_ref = list()
47 | images_albedo = list()
48 | images_mr = list()
49 | images_normal = list()
50 | images_position = list()
51 | bg_white = [1.0, 1.0, 1.0]
52 | bg_black = [0.0, 0.0, 0.0]
53 | bg_gray = [127 / 255.0, 127 / 255.0, 127 / 255.0]
54 | dirx = self.data[index]
55 |
56 | condition_dict = {}
57 |
58 | # 6view
59 | fix_num_view = self.num_view
60 | available_views = []
61 | for ext in ["*_albedo.png", "*_albedo.jpg", "*_albedo.jpeg"]:
62 | available_views.extend(glob.glob(os.path.join(dirx, "render_tex", ext)))
63 | cond_images = (
64 | glob.glob(os.path.join(dirx, "render_cond", "*.png"))
65 | + glob.glob(os.path.join(dirx, "render_cond", "*.jpg"))
66 | + glob.glob(os.path.join(dirx, "render_cond", "*.jpeg"))
67 | )
68 |
69 | # 确保有足够的样本
70 | if len(available_views) < fix_num_view:
71 | print(
72 | f"Warning: Only {len(available_views)} views available, but {fix_num_view} requested."
73 | "Using all available views."
74 | )
75 | images_gen = available_views
76 | else:
77 | images_gen = random.sample(available_views, fix_num_view)
78 |
79 | if not cond_images:
80 | raise ValueError(f"No condition images found in {os.path.join(dirx, 'render_cond')}")
81 | ref_image_path = random.choice(cond_images)
82 | light_suffix = None
83 | for suffix in self.lighting_suffix_pool:
84 | if suffix in ref_image_path:
85 | light_suffix = suffix
86 | break
87 | if light_suffix is None:
88 | raise ValueError(f"light suffix not found in {ref_image_path}")
89 | ref_image_diff_light_path = random.choice(
90 | [
91 | ref_image_path.replace(light_suffix, tar_suffix)
92 | for tar_suffix in self.lighting_suffix_pool
93 | if tar_suffix != light_suffix
94 | ]
95 | )
96 | images_ref_paths = [ref_image_path, ref_image_diff_light_path]
97 |
98 | # Data aug
99 | bg_c_record = None
100 | for i, image_ref in enumerate(images_ref_paths):
101 | if random.random() < 0.6:
102 | bg_c = bg_gray
103 | else:
104 | if random.random() < 0.5:
105 | bg_c = bg_black
106 | else:
107 | bg_c = bg_white
108 | if i == 0:
109 | bg_c_record = bg_c
110 | image, alpha = self.load_image(image_ref, bg_c_record)
111 | image = self.augment_image(image, bg_c_record).float()
112 | images_ref.append(image)
113 | condition_dict["images_cond"] = torch.stack(images_ref, dim=0).float()
114 |
115 | for i, image_gen in enumerate(images_gen):
116 | images_albedo.append(self.augment_image(self.load_image(image_gen, bg_gray)[0], bg_gray))
117 | images_mr.append(
118 | self.augment_image(self.load_image(image_gen.replace("_albedo", "_mr"), bg_gray)[0], bg_gray)
119 | )
120 | images_normal.append(
121 | self.augment_image(self.load_image(image_gen.replace("_albedo", "_normal"), bg_gray)[0], bg_gray)
122 | )
123 | images_position.append(
124 | self.augment_image(self.load_image(image_gen.replace("_albedo", "_pos"), bg_gray)[0], bg_gray)
125 | )
126 |
127 | condition_dict["images_albedo"] = torch.stack(images_albedo, dim=0).float()
128 | condition_dict["images_mr"] = torch.stack(images_mr, dim=0).float()
129 | condition_dict["images_normal"] = torch.stack(images_normal, dim=0).float()
130 | condition_dict["images_position"] = torch.stack(images_position, dim=0).float()
131 | condition_dict["name"] = dirx # .replace('/', '_')
132 | return condition_dict # (N, 3, H, W)
133 |
134 | # except Exception as e:
135 | # print(e, self.data[index])
136 | # # exit()
137 |
138 |
139 | if __name__ == "__main__":
140 | dataset = TextureDataset(json_path=["../../../train_examples/examples.json"])
141 | print("images_cond", dataset[0]["images_cond"].shape)
142 | print("images_albedo", dataset[0]["images_albedo"].shape)
143 | print("images_mr", dataset[0]["images_mr"].shape)
144 | print("images_normal", dataset[0]["images_normal"].shape)
145 | print("images_position", dataset[0]["images_position"].shape)
146 | print("name", dataset[0]["name"])
147 |
--------------------------------------------------------------------------------
/hy3dshape/hy3dshape/preprocessors.py:
--------------------------------------------------------------------------------
1 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
2 | # except for the third-party components listed below.
3 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined
4 | # in the repsective licenses of these third-party components.
5 | # Users must comply with all terms and conditions of original licenses of these third-party
6 | # components and must ensure that the usage of the third party components adheres to
7 | # all relevant laws and regulations.
8 |
9 | # For avoidance of doubts, Hunyuan 3D means the large language models and
10 | # their software and algorithms, including trained model weights, parameters (including
11 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
12 | # fine-tuning enabling code and other elements of the foregoing made publicly available
13 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
14 |
15 | import cv2
16 | import numpy as np
17 | import torch
18 | from PIL import Image
19 | from einops import repeat, rearrange
20 |
21 |
22 | def array_to_tensor(np_array):
23 | image_pt = torch.tensor(np_array).float()
24 | image_pt = image_pt / 255 * 2 - 1
25 | image_pt = rearrange(image_pt, "h w c -> c h w")
26 | image_pts = repeat(image_pt, "c h w -> b c h w", b=1)
27 | return image_pts
28 |
29 |
30 | class ImageProcessorV2:
31 | def __init__(self, size=512, border_ratio=None):
32 | self.size = size
33 | self.border_ratio = border_ratio
34 |
35 | @staticmethod
36 | def recenter(image, border_ratio: float = 0.2):
37 | """ recenter an image to leave some empty space at the image border.
38 |
39 | Args:
40 | image (ndarray): input image, float/uint8 [H, W, 3/4]
41 | mask (ndarray): alpha mask, bool [H, W]
42 | border_ratio (float, optional): border ratio, image will be resized to (1 - border_ratio). Defaults to 0.2.
43 |
44 | Returns:
45 | ndarray: output image, float/uint8 [H, W, 3/4]
46 | """
47 |
48 | if image.shape[-1] == 4:
49 | mask = image[..., 3]
50 | else:
51 | mask = np.ones_like(image[..., 0:1]) * 255
52 | image = np.concatenate([image, mask], axis=-1)
53 | mask = mask[..., 0]
54 |
55 | H, W, C = image.shape
56 |
57 | size = max(H, W)
58 | result = np.zeros((size, size, C), dtype=np.uint8)
59 |
60 | coords = np.nonzero(mask)
61 | x_min, x_max = coords[0].min(), coords[0].max()
62 | y_min, y_max = coords[1].min(), coords[1].max()
63 | h = x_max - x_min
64 | w = y_max - y_min
65 | if h == 0 or w == 0:
66 | raise ValueError('input image is empty')
67 | desired_size = int(size * (1 - border_ratio))
68 | scale = desired_size / max(h, w)
69 | h2 = int(h * scale)
70 | w2 = int(w * scale)
71 | x2_min = (size - h2) // 2
72 | x2_max = x2_min + h2
73 |
74 | y2_min = (size - w2) // 2
75 | y2_max = y2_min + w2
76 |
77 | result[x2_min:x2_max, y2_min:y2_max] = cv2.resize(image[x_min:x_max, y_min:y_max], (w2, h2),
78 | interpolation=cv2.INTER_AREA)
79 |
80 | bg = np.ones((result.shape[0], result.shape[1], 3), dtype=np.uint8) * 255
81 |
82 | mask = result[..., 3:].astype(np.float32) / 255
83 | result = result[..., :3] * mask + bg * (1 - mask)
84 |
85 | mask = mask * 255
86 | result = result.clip(0, 255).astype(np.uint8)
87 | mask = mask.clip(0, 255).astype(np.uint8)
88 | return result, mask
89 |
90 | def load_image(self, image, border_ratio=0.15, to_tensor=True):
91 | if isinstance(image, str):
92 | image = cv2.imread(image, cv2.IMREAD_UNCHANGED)
93 | image, mask = self.recenter(image, border_ratio=border_ratio)
94 | image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
95 | elif isinstance(image, Image.Image):
96 | image = image.convert("RGBA")
97 | image = np.asarray(image)
98 | image, mask = self.recenter(image, border_ratio=border_ratio)
99 |
100 | image = cv2.resize(image, (self.size, self.size), interpolation=cv2.INTER_CUBIC)
101 | mask = cv2.resize(mask, (self.size, self.size), interpolation=cv2.INTER_NEAREST)
102 | mask = mask[..., np.newaxis]
103 |
104 | if to_tensor:
105 | image = array_to_tensor(image)
106 | mask = array_to_tensor(mask)
107 | return image, mask
108 |
109 | def __call__(self, image, border_ratio=0.15, to_tensor=True, **kwargs):
110 | if self.border_ratio is not None:
111 | border_ratio = self.border_ratio
112 | image, mask = self.load_image(image, border_ratio=border_ratio, to_tensor=to_tensor)
113 | outputs = {
114 | 'image': image,
115 | 'mask': mask
116 | }
117 | return outputs
118 |
119 |
120 | class MVImageProcessorV2(ImageProcessorV2):
121 | """
122 | view order: front, front clockwise 90, back, front clockwise 270
123 | """
124 | return_view_idx = True
125 |
126 | def __init__(self, size=512, border_ratio=None):
127 | super().__init__(size, border_ratio)
128 | self.view2idx = {
129 | 'front': 0,
130 | 'left': 1,
131 | 'back': 2,
132 | 'right': 3
133 | }
134 |
135 | def __call__(self, image_dict, border_ratio=0.15, to_tensor=True, **kwargs):
136 | if self.border_ratio is not None:
137 | border_ratio = self.border_ratio
138 |
139 | images = []
140 | masks = []
141 | view_idxs = []
142 | for idx, (view_tag, image) in enumerate(image_dict.items()):
143 | view_idxs.append(self.view2idx[view_tag])
144 | image, mask = self.load_image(image, border_ratio=border_ratio, to_tensor=to_tensor)
145 | images.append(image)
146 | masks.append(mask)
147 |
148 | zipped_lists = zip(view_idxs, images, masks)
149 | sorted_zipped_lists = sorted(zipped_lists)
150 | view_idxs, images, masks = zip(*sorted_zipped_lists)
151 |
152 | image = torch.cat(images, 0).unsqueeze(0)
153 | mask = torch.cat(masks, 0).unsqueeze(0)
154 | outputs = {
155 | 'image': image,
156 | 'mask': mask,
157 | 'view_idxs': view_idxs
158 | }
159 | return outputs
160 |
161 |
162 | IMAGE_PROCESSORS = {
163 | "v2": ImageProcessorV2,
164 | 'mv_v2': MVImageProcessorV2,
165 | }
166 |
167 | DEFAULT_IMAGEPROCESSOR = 'v2'
168 |
--------------------------------------------------------------------------------
/hy3dpaint/custom_rasterizer/lib/custom_rasterizer_kernel/rasterizer_gpu.cu:
--------------------------------------------------------------------------------
1 | #include "rasterizer.h"
2 |
3 | __device__ void rasterizeTriangleGPU(int idx, float* vt0, float* vt1, float* vt2, int width, int height, INT64* zbuffer, float* d, float occlusion_truncation) {
4 | float x_min = std::min(vt0[0], std::min(vt1[0],vt2[0]));
5 | float x_max = std::max(vt0[0], std::max(vt1[0],vt2[0]));
6 | float y_min = std::min(vt0[1], std::min(vt1[1],vt2[1]));
7 | float y_max = std::max(vt0[1], std::max(vt1[1],vt2[1]));
8 |
9 | for (int px = x_min; px < x_max + 1; ++px) {
10 | if (px < 0 || px >= width)
11 | continue;
12 | for (int py = y_min; py < y_max + 1; ++py) {
13 | if (py < 0 || py >= height)
14 | continue;
15 | float vt[2] = {px + 0.5f, py + 0.5f};
16 | float baryCentricCoordinate[3];
17 | calculateBarycentricCoordinate(vt0, vt1, vt2, vt, baryCentricCoordinate);
18 | if (isBarycentricCoordInBounds(baryCentricCoordinate)) {
19 | int pixel = py * width + px;
20 | if (zbuffer == 0) {
21 | atomicExch(reinterpret_cast(&zbuffer[pixel]),static_cast(idx + 1));
22 | continue;
23 | }
24 | float depth = baryCentricCoordinate[0] * vt0[2] + baryCentricCoordinate[1] * vt1[2] + baryCentricCoordinate[2] * vt2[2];
25 | float depth_thres = 0;
26 | if (d) {
27 | depth_thres = d[pixel] * 0.49999f + 0.5f + occlusion_truncation;
28 | }
29 |
30 | int z_quantize = depth * (2<<17);
31 | INT64 token = (INT64)z_quantize * MAXINT + (INT64)(idx + 1);
32 | if (depth < depth_thres)
33 | continue;
34 | atomicMin(reinterpret_cast(&zbuffer[pixel]),static_cast(token));
35 | }
36 | }
37 | }
38 | }
39 |
40 | __global__ void barycentricFromImgcoordGPU(float* V, int* F, int* findices, INT64* zbuffer, int width, int height, int num_vertices, int num_faces,
41 | float* barycentric_map)
42 | {
43 | int pix = blockIdx.x * blockDim.x + threadIdx.x;
44 | if (pix >= width * height)
45 | return;
46 | INT64 f = zbuffer[pix] % MAXINT;
47 | if (f == (MAXINT-1)) {
48 | findices[pix] = 0;
49 | barycentric_map[pix * 3] = 0;
50 | barycentric_map[pix * 3 + 1] = 0;
51 | barycentric_map[pix * 3 + 2] = 0;
52 | return;
53 | }
54 | findices[pix] = f;
55 | f -= 1;
56 | float barycentric[3] = {0, 0, 0};
57 | if (f >= 0) {
58 | float vt[2] = {float(pix % width) + 0.5f, float(pix / width) + 0.5f};
59 | float* vt0_ptr = V + (F[f * 3] * 4);
60 | float* vt1_ptr = V + (F[f * 3 + 1] * 4);
61 | float* vt2_ptr = V + (F[f * 3 + 2] * 4);
62 |
63 | float vt0[2] = {(vt0_ptr[0] / vt0_ptr[3] * 0.5f + 0.5f) * (width - 1) + 0.5f, (0.5f + 0.5f * vt0_ptr[1] / vt0_ptr[3]) * (height - 1) + 0.5f};
64 | float vt1[2] = {(vt1_ptr[0] / vt1_ptr[3] * 0.5f + 0.5f) * (width - 1) + 0.5f, (0.5f + 0.5f * vt1_ptr[1] / vt1_ptr[3]) * (height - 1) + 0.5f};
65 | float vt2[2] = {(vt2_ptr[0] / vt2_ptr[3] * 0.5f + 0.5f) * (width - 1) + 0.5f, (0.5f + 0.5f * vt2_ptr[1] / vt2_ptr[3]) * (height - 1) + 0.5f};
66 |
67 | calculateBarycentricCoordinate(vt0, vt1, vt2, vt, barycentric);
68 |
69 | barycentric[0] = barycentric[0] / vt0_ptr[3];
70 | barycentric[1] = barycentric[1] / vt1_ptr[3];
71 | barycentric[2] = barycentric[2] / vt2_ptr[3];
72 | float w = 1.0f / (barycentric[0] + barycentric[1] + barycentric[2]);
73 | barycentric[0] *= w;
74 | barycentric[1] *= w;
75 | barycentric[2] *= w;
76 |
77 | }
78 | barycentric_map[pix * 3] = barycentric[0];
79 | barycentric_map[pix * 3 + 1] = barycentric[1];
80 | barycentric_map[pix * 3 + 2] = barycentric[2];
81 | }
82 |
83 | __global__ void rasterizeImagecoordsKernelGPU(float* V, int* F, float* d, INT64* zbuffer, float occlusion_trunc, int width, int height, int num_vertices, int num_faces)
84 | {
85 | int f = blockIdx.x * blockDim.x + threadIdx.x;
86 | if (f >= num_faces)
87 | return;
88 |
89 | float* vt0_ptr = V + (F[f * 3] * 4);
90 | float* vt1_ptr = V + (F[f * 3 + 1] * 4);
91 | float* vt2_ptr = V + (F[f * 3 + 2] * 4);
92 |
93 | float vt0[3] = {(vt0_ptr[0] / vt0_ptr[3] * 0.5f + 0.5f) * (width - 1) + 0.5f, (0.5f + 0.5f * vt0_ptr[1] / vt0_ptr[3]) * (height - 1) + 0.5f, vt0_ptr[2] / vt0_ptr[3] * 0.49999f + 0.5f};
94 | float vt1[3] = {(vt1_ptr[0] / vt1_ptr[3] * 0.5f + 0.5f) * (width - 1) + 0.5f, (0.5f + 0.5f * vt1_ptr[1] / vt1_ptr[3]) * (height - 1) + 0.5f, vt1_ptr[2] / vt1_ptr[3] * 0.49999f + 0.5f};
95 | float vt2[3] = {(vt2_ptr[0] / vt2_ptr[3] * 0.5f + 0.5f) * (width - 1) + 0.5f, (0.5f + 0.5f * vt2_ptr[1] / vt2_ptr[3]) * (height - 1) + 0.5f, vt2_ptr[2] / vt2_ptr[3] * 0.49999f + 0.5f};
96 |
97 | rasterizeTriangleGPU(f, vt0, vt1, vt2, width, height, zbuffer, d, occlusion_trunc);
98 | }
99 |
100 | std::vector rasterize_image_gpu(torch::Tensor V, torch::Tensor F, torch::Tensor D,
101 | int width, int height, float occlusion_truncation, int use_depth_prior)
102 | {
103 | int device_id = V.get_device();
104 | cudaSetDevice(device_id);
105 | int num_faces = F.size(0);
106 | int num_vertices = V.size(0);
107 | auto options = torch::TensorOptions().dtype(torch::kInt32).device(torch::kCUDA, device_id).requires_grad(false);
108 | auto INT64_options = torch::TensorOptions().dtype(torch::kInt64).device(torch::kCUDA, device_id).requires_grad(false);
109 | auto findices = torch::zeros({height, width}, options);
110 | INT64 maxint = (INT64)MAXINT * (INT64)MAXINT + (MAXINT - 1);
111 | auto z_min = torch::ones({height, width}, INT64_options) * (int64_t)maxint;
112 |
113 | if (!use_depth_prior) {
114 | rasterizeImagecoordsKernelGPU<<<(num_faces+255)/256,256,0,at::cuda::getCurrentCUDAStream()>>>(V.data_ptr(), F.data_ptr(), 0,
115 | (INT64*)z_min.data_ptr(), occlusion_truncation, width, height, num_vertices, num_faces);
116 | } else {
117 | rasterizeImagecoordsKernelGPU<<<(num_faces+255)/256,256,0,at::cuda::getCurrentCUDAStream()>>>(V.data_ptr(), F.data_ptr(), D.data_ptr(),
118 | (INT64*)z_min.data_ptr(), occlusion_truncation, width, height, num_vertices, num_faces);
119 | }
120 |
121 | auto float_options = torch::TensorOptions().dtype(torch::kFloat32).device(torch::kCUDA, device_id).requires_grad(false);
122 | auto barycentric = torch::zeros({height, width, 3}, float_options);
123 | barycentricFromImgcoordGPU<<<(width * height + 255)/256, 256>>>(V.data_ptr(), F.data_ptr(),
124 | findices.data_ptr(), (INT64*)z_min.data_ptr(), width, height, num_vertices, num_faces, barycentric.data_ptr());
125 |
126 | return {findices, barycentric};
127 | }
128 |
--------------------------------------------------------------------------------
/hy3dpaint/custom_rasterizer/lib/custom_rasterizer_kernel/rasterizer.cpp:
--------------------------------------------------------------------------------
1 | #include "rasterizer.h"
2 |
3 | void rasterizeTriangleCPU(int idx, float* vt0, float* vt1, float* vt2, int width, int height, INT64* zbuffer, float* d, float occlusion_truncation) {
4 | float x_min = std::min(vt0[0], std::min(vt1[0],vt2[0]));
5 | float x_max = std::max(vt0[0], std::max(vt1[0],vt2[0]));
6 | float y_min = std::min(vt0[1], std::min(vt1[1],vt2[1]));
7 | float y_max = std::max(vt0[1], std::max(vt1[1],vt2[1]));
8 |
9 | for (int px = x_min; px < x_max + 1; ++px) {
10 | if (px < 0 || px >= width)
11 | continue;
12 | for (int py = y_min; py < y_max + 1; ++py) {
13 | if (py < 0 || py >= height)
14 | continue;
15 | float vt[2] = {px + 0.5f, py + 0.5f};
16 | float baryCentricCoordinate[3];
17 | calculateBarycentricCoordinate(vt0, vt1, vt2, vt, baryCentricCoordinate);
18 | if (isBarycentricCoordInBounds(baryCentricCoordinate)) {
19 | int pixel = py * width + px;
20 | if (zbuffer == 0) {
21 | zbuffer[pixel] = (INT64)(idx + 1);
22 | continue;
23 | }
24 |
25 | float depth = baryCentricCoordinate[0] * vt0[2] + baryCentricCoordinate[1] * vt1[2] + baryCentricCoordinate[2] * vt2[2];
26 | float depth_thres = 0;
27 | if (d) {
28 | depth_thres = d[pixel] * 0.49999f + 0.5f + occlusion_truncation;
29 | }
30 |
31 | int z_quantize = depth * (2<<17);
32 | INT64 token = (INT64)z_quantize * MAXINT + (INT64)(idx + 1);
33 | if (depth < depth_thres)
34 | continue;
35 | zbuffer[pixel] = std::min(zbuffer[pixel], token);
36 | }
37 | }
38 | }
39 | }
40 |
41 | void barycentricFromImgcoordCPU(float* V, int* F, int* findices, INT64* zbuffer, int width, int height, int num_vertices, int num_faces,
42 | float* barycentric_map, int pix)
43 | {
44 | INT64 f = zbuffer[pix] % MAXINT;
45 | if (f == (MAXINT-1)) {
46 | findices[pix] = 0;
47 | barycentric_map[pix * 3] = 0;
48 | barycentric_map[pix * 3 + 1] = 0;
49 | barycentric_map[pix * 3 + 2] = 0;
50 | return;
51 | }
52 | findices[pix] = f;
53 | f -= 1;
54 | float barycentric[3] = {0, 0, 0};
55 | if (f >= 0) {
56 | float vt[2] = {float(pix % width) + 0.5f, float(pix / width) + 0.5f};
57 | float* vt0_ptr = V + (F[f * 3] * 4);
58 | float* vt1_ptr = V + (F[f * 3 + 1] * 4);
59 | float* vt2_ptr = V + (F[f * 3 + 2] * 4);
60 |
61 | float vt0[2] = {(vt0_ptr[0] / vt0_ptr[3] * 0.5f + 0.5f) * (width - 1) + 0.5f, (0.5f + 0.5f * vt0_ptr[1] / vt0_ptr[3]) * (height - 1) + 0.5f};
62 | float vt1[2] = {(vt1_ptr[0] / vt1_ptr[3] * 0.5f + 0.5f) * (width - 1) + 0.5f, (0.5f + 0.5f * vt1_ptr[1] / vt1_ptr[3]) * (height - 1) + 0.5f};
63 | float vt2[2] = {(vt2_ptr[0] / vt2_ptr[3] * 0.5f + 0.5f) * (width - 1) + 0.5f, (0.5f + 0.5f * vt2_ptr[1] / vt2_ptr[3]) * (height - 1) + 0.5f};
64 |
65 | calculateBarycentricCoordinate(vt0, vt1, vt2, vt, barycentric);
66 |
67 | barycentric[0] = barycentric[0] / vt0_ptr[3];
68 | barycentric[1] = barycentric[1] / vt1_ptr[3];
69 | barycentric[2] = barycentric[2] / vt2_ptr[3];
70 | float w = 1.0f / (barycentric[0] + barycentric[1] + barycentric[2]);
71 | barycentric[0] *= w;
72 | barycentric[1] *= w;
73 | barycentric[2] *= w;
74 |
75 | }
76 | barycentric_map[pix * 3] = barycentric[0];
77 | barycentric_map[pix * 3 + 1] = barycentric[1];
78 | barycentric_map[pix * 3 + 2] = barycentric[2];
79 | }
80 |
81 | void rasterizeImagecoordsKernelCPU(float* V, int* F, float* d, INT64* zbuffer, float occlusion_trunc, int width, int height, int num_vertices, int num_faces, int f)
82 | {
83 | float* vt0_ptr = V + (F[f * 3] * 4);
84 | float* vt1_ptr = V + (F[f * 3 + 1] * 4);
85 | float* vt2_ptr = V + (F[f * 3 + 2] * 4);
86 |
87 | float vt0[3] = {(vt0_ptr[0] / vt0_ptr[3] * 0.5f + 0.5f) * (width - 1) + 0.5f, (0.5f + 0.5f * vt0_ptr[1] / vt0_ptr[3]) * (height - 1) + 0.5f, vt0_ptr[2] / vt0_ptr[3] * 0.49999f + 0.5f};
88 | float vt1[3] = {(vt1_ptr[0] / vt1_ptr[3] * 0.5f + 0.5f) * (width - 1) + 0.5f, (0.5f + 0.5f * vt1_ptr[1] / vt1_ptr[3]) * (height - 1) + 0.5f, vt1_ptr[2] / vt1_ptr[3] * 0.49999f + 0.5f};
89 | float vt2[3] = {(vt2_ptr[0] / vt2_ptr[3] * 0.5f + 0.5f) * (width - 1) + 0.5f, (0.5f + 0.5f * vt2_ptr[1] / vt2_ptr[3]) * (height - 1) + 0.5f, vt2_ptr[2] / vt2_ptr[3] * 0.49999f + 0.5f};
90 |
91 | rasterizeTriangleCPU(f, vt0, vt1, vt2, width, height, zbuffer, d, occlusion_trunc);
92 | }
93 |
94 | std::vector rasterize_image_cpu(torch::Tensor V, torch::Tensor F, torch::Tensor D,
95 | int width, int height, float occlusion_truncation, int use_depth_prior)
96 | {
97 | int num_faces = F.size(0);
98 | int num_vertices = V.size(0);
99 | auto options = torch::TensorOptions().dtype(torch::kInt32).requires_grad(false);
100 | auto INT64_options = torch::TensorOptions().dtype(torch::kInt64).requires_grad(false);
101 | auto findices = torch::zeros({height, width}, options);
102 | INT64 maxint = (INT64)MAXINT * (INT64)MAXINT + (MAXINT - 1);
103 | auto z_min = torch::ones({height, width}, INT64_options) * (int64_t)maxint;
104 |
105 | if (!use_depth_prior) {
106 | for (int i = 0; i < num_faces; ++i) {
107 | rasterizeImagecoordsKernelCPU(V.data_ptr(), F.data_ptr(), 0,
108 | (INT64*)z_min.data_ptr(), occlusion_truncation, width, height, num_vertices, num_faces, i);
109 | }
110 | } else {
111 | for (int i = 0; i < num_faces; ++i)
112 | rasterizeImagecoordsKernelCPU(V.data_ptr(), F.data_ptr(), D.data_ptr(),
113 | (INT64*)z_min.data_ptr(), occlusion_truncation, width, height, num_vertices, num_faces, i);
114 | }
115 |
116 | auto float_options = torch::TensorOptions().dtype(torch::kFloat32).requires_grad(false);
117 | auto barycentric = torch::zeros({height, width, 3}, float_options);
118 | for (int i = 0; i < width * height; ++i)
119 | barycentricFromImgcoordCPU(V.data_ptr(), F.data_ptr(),
120 | findices.data_ptr(), (INT64*)z_min.data_ptr(), width, height, num_vertices, num_faces, barycentric.data_ptr(), i);
121 |
122 | return {findices, barycentric};
123 | }
124 |
125 | std::vector rasterize_image(torch::Tensor V, torch::Tensor F, torch::Tensor D,
126 | int width, int height, float occlusion_truncation, int use_depth_prior)
127 | {
128 | int device_id = V.get_device();
129 | if (device_id == -1)
130 | return rasterize_image_cpu(V, F, D, width, height, occlusion_truncation, use_depth_prior);
131 | else
132 | return rasterize_image_gpu(V, F, D, width, height, occlusion_truncation, use_depth_prior);
133 | }
134 |
135 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
136 | m.def("rasterize_image", &rasterize_image, "Custom image rasterization");
137 | m.def("build_hierarchy", &build_hierarchy, "Custom image rasterization");
138 | m.def("build_hierarchy_with_feat", &build_hierarchy_with_feat, "Custom image rasterization");
139 | }
140 |
--------------------------------------------------------------------------------
/hy3dshape/hy3dshape/models/autoencoders/surface_extractors.py:
--------------------------------------------------------------------------------
1 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
2 | # except for the third-party components listed below.
3 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined
4 | # in the repsective licenses of these third-party components.
5 | # Users must comply with all terms and conditions of original licenses of these third-party
6 | # components and must ensure that the usage of the third party components adheres to
7 | # all relevant laws and regulations.
8 |
9 | # For avoidance of doubts, Hunyuan 3D means the large language models and
10 | # their software and algorithms, including trained model weights, parameters (including
11 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
12 | # fine-tuning enabling code and other elements of the foregoing made publicly available
13 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
14 |
15 | from typing import Union, Tuple, List
16 |
17 | import numpy as np
18 | import torch
19 | from skimage import measure
20 |
21 |
22 | class Latent2MeshOutput:
23 | def __init__(self, mesh_v=None, mesh_f=None):
24 | self.mesh_v = mesh_v
25 | self.mesh_f = mesh_f
26 |
27 |
28 | def center_vertices(vertices):
29 | """Translate the vertices so that bounding box is centered at zero."""
30 | vert_min = vertices.min(dim=0)[0]
31 | vert_max = vertices.max(dim=0)[0]
32 | vert_center = 0.5 * (vert_min + vert_max)
33 | return vertices - vert_center
34 |
35 |
36 | class SurfaceExtractor:
37 | def _compute_box_stat(self, bounds: Union[Tuple[float], List[float], float], octree_resolution: int):
38 | """
39 | Compute grid size, bounding box minimum coordinates, and bounding box size based on input
40 | bounds and resolution.
41 |
42 | Args:
43 | bounds (Union[Tuple[float], List[float], float]): Bounding box coordinates or a single
44 | float representing half side length.
45 | If float, bounds are assumed symmetric around zero in all axes.
46 | Expected format if list/tuple: [xmin, ymin, zmin, xmax, ymax, zmax].
47 | octree_resolution (int): Resolution of the octree grid.
48 |
49 | Returns:
50 | grid_size (List[int]): Grid size along each axis (x, y, z), each equal to octree_resolution + 1.
51 | bbox_min (np.ndarray): Minimum coordinates of the bounding box (xmin, ymin, zmin).
52 | bbox_size (np.ndarray): Size of the bounding box along each axis (xmax - xmin, etc.).
53 | """
54 | if isinstance(bounds, float):
55 | bounds = [-bounds, -bounds, -bounds, bounds, bounds, bounds]
56 |
57 | bbox_min, bbox_max = np.array(bounds[0:3]), np.array(bounds[3:6])
58 | bbox_size = bbox_max - bbox_min
59 | grid_size = [int(octree_resolution) + 1, int(octree_resolution) + 1, int(octree_resolution) + 1]
60 | return grid_size, bbox_min, bbox_size
61 |
62 | def run(self, *args, **kwargs):
63 | """
64 | Abstract method to extract surface mesh from grid logits.
65 |
66 | This method should be implemented by subclasses.
67 |
68 | Raises:
69 | NotImplementedError: Always, since this is an abstract method.
70 | """
71 | return NotImplementedError
72 |
73 | def __call__(self, grid_logits, **kwargs):
74 | """
75 | Process a batch of grid logits to extract surface meshes.
76 |
77 | Args:
78 | grid_logits (torch.Tensor): Batch of grid logits with shape (batch_size, ...).
79 | **kwargs: Additional keyword arguments passed to the `run` method.
80 |
81 | Returns:
82 | List[Optional[Latent2MeshOutput]]: List of mesh outputs for each grid in the batch.
83 | If extraction fails for a grid, None is appended at that position.
84 | """
85 | outputs = []
86 | for i in range(grid_logits.shape[0]):
87 | try:
88 | vertices, faces = self.run(grid_logits[i], **kwargs)
89 | vertices = vertices.astype(np.float32)
90 | faces = np.ascontiguousarray(faces)
91 | outputs.append(Latent2MeshOutput(mesh_v=vertices, mesh_f=faces))
92 |
93 | except Exception:
94 | import traceback
95 | traceback.print_exc()
96 | outputs.append(None)
97 |
98 | return outputs
99 |
100 |
101 | class MCSurfaceExtractor(SurfaceExtractor):
102 | def run(self, grid_logit, *, mc_level, bounds, octree_resolution, **kwargs):
103 | """
104 | Extract surface mesh using the Marching Cubes algorithm.
105 |
106 | Args:
107 | grid_logit (torch.Tensor): 3D grid logits tensor representing the scalar field.
108 | mc_level (float): The level (iso-value) at which to extract the surface.
109 | bounds (Union[Tuple[float], List[float], float]): Bounding box coordinates or half side length.
110 | octree_resolution (int): Resolution of the octree grid.
111 | **kwargs: Additional keyword arguments (ignored).
112 |
113 | Returns:
114 | Tuple[np.ndarray, np.ndarray]: Tuple containing:
115 | - vertices (np.ndarray): Extracted mesh vertices, scaled and translated to bounding
116 | box coordinates.
117 | - faces (np.ndarray): Extracted mesh faces (triangles).
118 | """
119 | vertices, faces, normals, _ = measure.marching_cubes(grid_logit.cpu().numpy(),
120 | mc_level,
121 | method="lewiner")
122 | grid_size, bbox_min, bbox_size = self._compute_box_stat(bounds, octree_resolution)
123 | vertices = vertices / grid_size * bbox_size + bbox_min
124 | return vertices, faces
125 |
126 |
127 | class DMCSurfaceExtractor(SurfaceExtractor):
128 | def run(self, grid_logit, *, octree_resolution, **kwargs):
129 | """
130 | Extract surface mesh using Differentiable Marching Cubes (DMC) algorithm.
131 |
132 | Args:
133 | grid_logit (torch.Tensor): 3D grid logits tensor representing the scalar field.
134 | octree_resolution (int): Resolution of the octree grid.
135 | **kwargs: Additional keyword arguments (ignored).
136 |
137 | Returns:
138 | Tuple[np.ndarray, np.ndarray]: Tuple containing:
139 | - vertices (np.ndarray): Extracted mesh vertices, centered and converted to numpy.
140 | - faces (np.ndarray): Extracted mesh faces (triangles), with reversed vertex order.
141 |
142 | Raises:
143 | ImportError: If the 'diso' package is not installed.
144 | """
145 | device = grid_logit.device
146 | if not hasattr(self, 'dmc'):
147 | try:
148 | from diso import DiffDMC
149 | self.dmc = DiffDMC(dtype=torch.float32).to(device)
150 | except:
151 | raise ImportError("Please install diso via `pip install diso`, or set mc_algo to 'mc'")
152 | sdf = -grid_logit / octree_resolution
153 | sdf = sdf.to(torch.float32).contiguous()
154 | verts, faces = self.dmc(sdf, deform=None, return_quads=False, normalize=True)
155 | verts = center_vertices(verts)
156 | vertices = verts.detach().cpu().numpy()
157 | faces = faces.detach().cpu().numpy()[:, ::-1]
158 | return vertices, faces
159 |
160 |
161 | SurfaceExtractors = {
162 | 'mc': MCSurfaceExtractor,
163 | 'dmc': DMCSurfaceExtractor,
164 | }
165 |
--------------------------------------------------------------------------------
/hy3dshape/hy3dshape/postprocessors.py:
--------------------------------------------------------------------------------
1 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
2 | # except for the third-party components listed below.
3 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined
4 | # in the repsective licenses of these third-party components.
5 | # Users must comply with all terms and conditions of original licenses of these third-party
6 | # components and must ensure that the usage of the third party components adheres to
7 | # all relevant laws and regulations.
8 |
9 | # For avoidance of doubts, Hunyuan 3D means the large language models and
10 | # their software and algorithms, including trained model weights, parameters (including
11 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
12 | # fine-tuning enabling code and other elements of the foregoing made publicly available
13 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
14 |
15 | import os
16 | import tempfile
17 | from typing import Union
18 |
19 | import numpy as np
20 | import pymeshlab
21 | import torch
22 | import trimesh
23 |
24 | from .models.autoencoders import Latent2MeshOutput
25 | from .utils import synchronize_timer
26 |
27 |
28 | def load_mesh(path):
29 | if path.endswith(".glb"):
30 | mesh = trimesh.load(path)
31 | else:
32 | mesh = pymeshlab.MeshSet()
33 | mesh.load_new_mesh(path)
34 | return mesh
35 |
36 |
37 | def reduce_face(mesh: pymeshlab.MeshSet, max_facenum: int = 200000):
38 | if max_facenum > mesh.current_mesh().face_number():
39 | return mesh
40 |
41 | mesh.apply_filter(
42 | "meshing_decimation_quadric_edge_collapse",
43 | targetfacenum=max_facenum,
44 | qualitythr=1.0,
45 | preserveboundary=True,
46 | boundaryweight=3,
47 | preservenormal=True,
48 | preservetopology=True,
49 | autoclean=True
50 | )
51 | return mesh
52 |
53 |
54 | def remove_floater(mesh: pymeshlab.MeshSet):
55 | mesh.apply_filter("compute_selection_by_small_disconnected_components_per_face",
56 | nbfaceratio=0.005)
57 | mesh.apply_filter("compute_selection_transfer_face_to_vertex", inclusive=False)
58 | mesh.apply_filter("meshing_remove_selected_vertices_and_faces")
59 | return mesh
60 |
61 |
62 | def pymeshlab2trimesh(mesh: pymeshlab.MeshSet):
63 | with tempfile.NamedTemporaryFile(suffix='.ply', delete=False) as temp_file:
64 | mesh.save_current_mesh(temp_file.name)
65 | mesh = trimesh.load(temp_file.name)
66 | # 检查加载的对象类型
67 | if isinstance(mesh, trimesh.Scene):
68 | combined_mesh = trimesh.Trimesh()
69 | # 如果是Scene,遍历所有的geometry并合并
70 | for geom in mesh.geometry.values():
71 | combined_mesh = trimesh.util.concatenate([combined_mesh, geom])
72 | mesh = combined_mesh
73 | return mesh
74 |
75 |
76 | def trimesh2pymeshlab(mesh: trimesh.Trimesh):
77 | with tempfile.NamedTemporaryFile(suffix='.ply', delete=False) as temp_file:
78 | if isinstance(mesh, trimesh.scene.Scene):
79 | for idx, obj in enumerate(mesh.geometry.values()):
80 | if idx == 0:
81 | temp_mesh = obj
82 | else:
83 | temp_mesh = temp_mesh + obj
84 | mesh = temp_mesh
85 | mesh.export(temp_file.name)
86 | mesh = pymeshlab.MeshSet()
87 | mesh.load_new_mesh(temp_file.name)
88 | return mesh
89 |
90 |
91 | def export_mesh(input, output):
92 | if isinstance(input, pymeshlab.MeshSet):
93 | mesh = output
94 | elif isinstance(input, Latent2MeshOutput):
95 | output = Latent2MeshOutput()
96 | output.mesh_v = output.current_mesh().vertex_matrix()
97 | output.mesh_f = output.current_mesh().face_matrix()
98 | mesh = output
99 | else:
100 | mesh = pymeshlab2trimesh(output)
101 | return mesh
102 |
103 |
104 | def import_mesh(mesh: Union[pymeshlab.MeshSet, trimesh.Trimesh, Latent2MeshOutput, str]) -> pymeshlab.MeshSet:
105 | if isinstance(mesh, str):
106 | mesh = load_mesh(mesh)
107 | elif isinstance(mesh, Latent2MeshOutput):
108 | mesh = pymeshlab.MeshSet()
109 | mesh_pymeshlab = pymeshlab.Mesh(vertex_matrix=mesh.mesh_v, face_matrix=mesh.mesh_f)
110 | mesh.add_mesh(mesh_pymeshlab, "converted_mesh")
111 |
112 | if isinstance(mesh, (trimesh.Trimesh, trimesh.scene.Scene)):
113 | mesh = trimesh2pymeshlab(mesh)
114 |
115 | return mesh
116 |
117 |
118 | class FaceReducer:
119 | @synchronize_timer('FaceReducer')
120 | def __call__(
121 | self,
122 | mesh: Union[pymeshlab.MeshSet, trimesh.Trimesh, Latent2MeshOutput, str],
123 | max_facenum: int = 40000
124 | ) -> Union[pymeshlab.MeshSet, trimesh.Trimesh]:
125 | ms = import_mesh(mesh)
126 | ms = reduce_face(ms, max_facenum=max_facenum)
127 | mesh = export_mesh(mesh, ms)
128 | return mesh
129 |
130 |
131 | class FloaterRemover:
132 | @synchronize_timer('FloaterRemover')
133 | def __call__(
134 | self,
135 | mesh: Union[pymeshlab.MeshSet, trimesh.Trimesh, Latent2MeshOutput, str],
136 | ) -> Union[pymeshlab.MeshSet, trimesh.Trimesh, Latent2MeshOutput]:
137 | ms = import_mesh(mesh)
138 | ms = remove_floater(ms)
139 | mesh = export_mesh(mesh, ms)
140 | return mesh
141 |
142 |
143 | class DegenerateFaceRemover:
144 | @synchronize_timer('DegenerateFaceRemover')
145 | def __call__(
146 | self,
147 | mesh: Union[pymeshlab.MeshSet, trimesh.Trimesh, Latent2MeshOutput, str],
148 | ) -> Union[pymeshlab.MeshSet, trimesh.Trimesh, Latent2MeshOutput]:
149 | ms = import_mesh(mesh)
150 |
151 | with tempfile.NamedTemporaryFile(suffix='.ply', delete=False) as temp_file:
152 | ms.save_current_mesh(temp_file.name)
153 | ms = pymeshlab.MeshSet()
154 | ms.load_new_mesh(temp_file.name)
155 |
156 | mesh = export_mesh(mesh, ms)
157 | return mesh
158 |
159 |
160 | def mesh_normalize(mesh):
161 | """
162 | Normalize mesh vertices to sphere
163 | """
164 | scale_factor = 1.2
165 | vtx_pos = np.asarray(mesh.vertices)
166 | max_bb = (vtx_pos - 0).max(0)[0]
167 | min_bb = (vtx_pos - 0).min(0)[0]
168 |
169 | center = (max_bb + min_bb) / 2
170 |
171 | scale = torch.norm(torch.tensor(vtx_pos - center, dtype=torch.float32), dim=1).max() * 2.0
172 |
173 | vtx_pos = (vtx_pos - center) * (scale_factor / float(scale))
174 | mesh.vertices = vtx_pos
175 |
176 | return mesh
177 |
178 |
179 | class MeshSimplifier:
180 | def __init__(self, executable: str = None):
181 | if executable is None:
182 | CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
183 | executable = os.path.join(CURRENT_DIR, "mesh_simplifier.bin")
184 | self.executable = executable
185 |
186 | @synchronize_timer('MeshSimplifier')
187 | def __call__(
188 | self,
189 | mesh: Union[trimesh.Trimesh],
190 | ) -> Union[trimesh.Trimesh]:
191 | with tempfile.NamedTemporaryFile(suffix='.obj', delete=False) as temp_input:
192 | with tempfile.NamedTemporaryFile(suffix='.obj', delete=False) as temp_output:
193 | mesh.export(temp_input.name)
194 | os.system(f'{self.executable} {temp_input.name} {temp_output.name}')
195 | ms = trimesh.load(temp_output.name, process=False)
196 | if isinstance(ms, trimesh.Scene):
197 | combined_mesh = trimesh.Trimesh()
198 | for geom in ms.geometry.values():
199 | combined_mesh = trimesh.util.concatenate([combined_mesh, geom])
200 | ms = combined_mesh
201 | ms = mesh_normalize(ms)
202 | return ms
203 |
--------------------------------------------------------------------------------
/hy3dshape/hy3dshape/models/denoisers/moe_layers.py:
--------------------------------------------------------------------------------
1 | # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
2 | # except for the third-party components listed below.
3 | # Hunyuan 3D does not impose any additional limitations beyond what is outlined
4 | # in the repsective licenses of these third-party components.
5 | # Users must comply with all terms and conditions of original licenses of these third-party
6 | # components and must ensure that the usage of the third party components adheres to
7 | # all relevant laws and regulations.
8 |
9 | # For avoidance of doubts, Hunyuan 3D means the large language models and
10 | # their software and algorithms, including trained model weights, parameters (including
11 | # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
12 | # fine-tuning enabling code and other elements of the foregoing made publicly available
13 | # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
14 |
15 | import torch
16 | import torch.nn as nn
17 | import numpy as np
18 | import math
19 | from timm.models.vision_transformer import PatchEmbed, Attention, Mlp
20 |
21 | import torch.nn.functional as F
22 | from diffusers.models.attention import FeedForward
23 |
24 | class AddAuxiliaryLoss(torch.autograd.Function):
25 | """
26 | The trick function of adding auxiliary (aux) loss,
27 | which includes the gradient of the aux loss during backpropagation.
28 | """
29 | @staticmethod
30 | def forward(ctx, x, loss):
31 | assert loss.numel() == 1
32 | ctx.dtype = loss.dtype
33 | ctx.required_aux_loss = loss.requires_grad
34 | return x
35 |
36 | @staticmethod
37 | def backward(ctx, grad_output):
38 | grad_loss = None
39 | if ctx.required_aux_loss:
40 | grad_loss = torch.ones(1, dtype=ctx.dtype, device=grad_output.device)
41 | return grad_output, grad_loss
42 |
43 | class MoEGate(nn.Module):
44 | def __init__(self, embed_dim, num_experts=16, num_experts_per_tok=2, aux_loss_alpha=0.01):
45 | super().__init__()
46 | self.top_k = num_experts_per_tok
47 | self.n_routed_experts = num_experts
48 |
49 | self.scoring_func = 'softmax'
50 | self.alpha = aux_loss_alpha
51 | self.seq_aux = False
52 |
53 | # topk selection algorithm
54 | self.norm_topk_prob = False
55 | self.gating_dim = embed_dim
56 | self.weight = nn.Parameter(torch.empty((self.n_routed_experts, self.gating_dim)))
57 | self.reset_parameters()
58 |
59 | def reset_parameters(self) -> None:
60 | import torch.nn.init as init
61 | init.kaiming_uniform_(self.weight, a=math.sqrt(5))
62 |
63 | def forward(self, hidden_states):
64 | bsz, seq_len, h = hidden_states.shape
65 | # print(bsz, seq_len, h)
66 | ### compute gating score
67 | hidden_states = hidden_states.view(-1, h)
68 | logits = F.linear(hidden_states, self.weight, None)
69 | if self.scoring_func == 'softmax':
70 | scores = logits.softmax(dim=-1)
71 | else:
72 | raise NotImplementedError(f'insupportable scoring function for MoE gating: {self.scoring_func}')
73 |
74 | ### select top-k experts
75 | topk_weight, topk_idx = torch.topk(scores, k=self.top_k, dim=-1, sorted=False)
76 |
77 | ### norm gate to sum 1
78 | if self.top_k > 1 and self.norm_topk_prob:
79 | denominator = topk_weight.sum(dim=-1, keepdim=True) + 1e-20
80 | topk_weight = topk_weight / denominator
81 |
82 | ### expert-level computation auxiliary loss
83 | if self.training and self.alpha > 0.0:
84 | scores_for_aux = scores
85 | aux_topk = self.top_k
86 | # always compute aux loss based on the naive greedy topk method
87 | topk_idx_for_aux_loss = topk_idx.view(bsz, -1)
88 | if self.seq_aux:
89 | scores_for_seq_aux = scores_for_aux.view(bsz, seq_len, -1)
90 | ce = torch.zeros(bsz, self.n_routed_experts, device=hidden_states.device)
91 | ce.scatter_add_(
92 | 1,
93 | topk_idx_for_aux_loss,
94 | torch.ones(
95 | bsz, seq_len * aux_topk,
96 | device=hidden_states.device
97 | )
98 | ).div_(seq_len * aux_topk / self.n_routed_experts)
99 | aux_loss = (ce * scores_for_seq_aux.mean(dim = 1)).sum(dim = 1).mean()
100 | aux_loss = aux_loss * self.alpha
101 | else:
102 | mask_ce = F.one_hot(topk_idx_for_aux_loss.view(-1),
103 | num_classes=self.n_routed_experts)
104 | ce = mask_ce.float().mean(0)
105 | Pi = scores_for_aux.mean(0)
106 | fi = ce * self.n_routed_experts
107 | aux_loss = (Pi * fi).sum() * self.alpha
108 | else:
109 | aux_loss = None
110 | return topk_idx, topk_weight, aux_loss
111 |
112 | class MoEBlock(nn.Module):
113 | def __init__(self, dim, num_experts=8, moe_top_k=2,
114 | activation_fn = "gelu", dropout=0.0, final_dropout = False,
115 | ff_inner_dim = None, ff_bias = True):
116 | super().__init__()
117 | self.moe_top_k = moe_top_k
118 | self.experts = nn.ModuleList([
119 | FeedForward(dim,dropout=dropout,
120 | activation_fn=activation_fn,
121 | final_dropout=final_dropout,
122 | inner_dim=ff_inner_dim,
123 | bias=ff_bias)
124 | for i in range(num_experts)])
125 | self.gate = MoEGate(embed_dim=dim, num_experts=num_experts, num_experts_per_tok=moe_top_k)
126 |
127 | self.shared_experts = FeedForward(dim,dropout=dropout, activation_fn=activation_fn,
128 | final_dropout=final_dropout, inner_dim=ff_inner_dim,
129 | bias=ff_bias)
130 |
131 | def initialize_weight(self):
132 | pass
133 |
134 | def forward(self, hidden_states):
135 | identity = hidden_states
136 | orig_shape = hidden_states.shape
137 | topk_idx, topk_weight, aux_loss = self.gate(hidden_states)
138 |
139 | hidden_states = hidden_states.view(-1, hidden_states.shape[-1])
140 | flat_topk_idx = topk_idx.view(-1)
141 | if self.training:
142 | hidden_states = hidden_states.repeat_interleave(self.moe_top_k, dim=0)
143 | y = torch.empty_like(hidden_states, dtype=hidden_states.dtype)
144 | for i, expert in enumerate(self.experts):
145 | tmp = expert(hidden_states[flat_topk_idx == i])
146 | y[flat_topk_idx == i] = tmp.to(hidden_states.dtype)
147 | y = (y.view(*topk_weight.shape, -1) * topk_weight.unsqueeze(-1)).sum(dim=1)
148 | y = y.view(*orig_shape)
149 | y = AddAuxiliaryLoss.apply(y, aux_loss)
150 | else:
151 | y = self.moe_infer(hidden_states, flat_topk_idx, topk_weight.view(-1, 1)).view(*orig_shape)
152 | y = y + self.shared_experts(identity)
153 | return y
154 |
155 |
156 | @torch.no_grad()
157 | def moe_infer(self, x, flat_expert_indices, flat_expert_weights):
158 | expert_cache = torch.zeros_like(x)
159 | idxs = flat_expert_indices.argsort()
160 | tokens_per_expert = flat_expert_indices.bincount().cpu().numpy().cumsum(0)
161 | token_idxs = idxs // self.moe_top_k
162 | for i, end_idx in enumerate(tokens_per_expert):
163 | start_idx = 0 if i == 0 else tokens_per_expert[i-1]
164 | if start_idx == end_idx:
165 | continue
166 | expert = self.experts[i]
167 | exp_token_idx = token_idxs[start_idx:end_idx]
168 | expert_tokens = x[exp_token_idx]
169 | expert_out = expert(expert_tokens)
170 | expert_out.mul_(flat_expert_weights[idxs[start_idx:end_idx]])
171 |
172 | # for fp16 and other dtype
173 | expert_cache = expert_cache.to(expert_out.dtype)
174 | expert_cache.scatter_reduce_(0, exp_token_idx.view(-1, 1).repeat(1, x.shape[-1]),
175 | expert_out,
176 | reduce='sum')
177 | return expert_cache
178 |
--------------------------------------------------------------------------------