├── .editorconfig ├── .github └── workflows │ └── pre-commit.yaml ├── .gitignore ├── .pre-commit-config.yaml ├── .pylintrc ├── DOCUMENTATION.md ├── LICENSE ├── README.md ├── assets ├── Interactive3d.jpg ├── arc.png ├── config.json ├── interactive3d.png └── results.png ├── configs ├── control4d-static.yaml ├── dreamfusion-if.yaml ├── dreamfusion-sd.yaml ├── experimental │ ├── co3d-imagecondition.yaml │ ├── imagecondition.yaml │ ├── imagecondition_zero123nerf.yaml │ └── imagecondition_zero123nerf_refine.yaml ├── fantasia3d-texture.yaml ├── fantasia3d.yaml ├── fit_gs.yaml ├── gaussian_splatting.yaml ├── geo_refine.yaml ├── gradio │ ├── dreamfusion-if.yaml │ ├── dreamfusion-sd.yaml │ ├── fantasia3d.yaml │ ├── latentnerf.yaml │ ├── sjc.yaml │ └── textmesh-if.yaml ├── instructnerf2nerf.yaml ├── interested_refine.yaml ├── interested_refine_pixart.yaml ├── latentnerf-refine.yaml ├── latentnerf.yaml ├── magic123-coarse-sd.yaml ├── magic3d-coarse-if.yaml ├── magic3d-coarse-sd.yaml ├── magic3d-refine-sd.yaml ├── mvdream-sd21-gaussian.yaml ├── mvdream-sd21-shading.yaml ├── mvdream-sd21.yaml ├── post_geo_refine.yaml ├── prolificdreamer-geometry-from.yaml ├── prolificdreamer-geometry.yaml ├── prolificdreamer-patch.yaml ├── prolificdreamer-scene.yaml ├── prolificdreamer-texture.yaml ├── prolificdreamer.yaml ├── sjc.yaml ├── sketchshape-refine.yaml ├── sketchshape.yaml ├── textmesh-if.yaml ├── zero123-geometry.yaml ├── zero123.yaml └── zero123_64.yaml ├── docker ├── Dockerfile └── compose.yaml ├── docs └── installation.md ├── extern ├── MVDream │ ├── .gitignore │ ├── LICENSE-CODE │ ├── README.md │ ├── mvdream │ │ ├── __init__.py │ │ ├── camera_utils.py │ │ ├── configs │ │ │ ├── sd-v1.yaml │ │ │ └── sd-v2-base.yaml │ │ ├── ldm │ │ │ ├── __init__.py │ │ │ ├── interface.py │ │ │ ├── models │ │ │ │ ├── __init__.py │ │ │ │ ├── autoencoder.py │ │ │ │ └── diffusion │ │ │ │ │ ├── __init__.py │ │ │ │ │ └── ddim.py │ │ │ ├── modules │ │ │ │ ├── __init__.py │ │ │ │ ├── attention.py │ │ │ │ ├── diffusionmodules │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── model.py │ │ │ │ │ ├── openaimodel.py │ │ │ │ │ └── util.py │ │ │ │ ├── distributions │ │ │ │ │ ├── __init__.py │ │ │ │ │ └── distributions.py │ │ │ │ ├── ema.py │ │ │ │ └── encoders │ │ │ │ │ ├── __init__.py │ │ │ │ │ └── modules.py │ │ │ └── util.py │ │ └── model_zoo.py │ ├── requirements.txt │ ├── scripts │ │ ├── gradio_app.py │ │ └── t2i.py │ └── setup.py ├── ldm_zero123 │ ├── extras.py │ ├── guidance.py │ ├── lr_scheduler.py │ ├── models │ │ ├── autoencoder.py │ │ └── diffusion │ │ │ ├── __init__.py │ │ │ ├── classifier.py │ │ │ ├── ddim.py │ │ │ ├── ddpm.py │ │ │ ├── plms.py │ │ │ └── sampling_util.py │ ├── modules │ │ ├── attention.py │ │ ├── diffusionmodules │ │ │ ├── __init__.py │ │ │ ├── model.py │ │ │ ├── openaimodel.py │ │ │ └── util.py │ │ ├── distributions │ │ │ ├── __init__.py │ │ │ └── distributions.py │ │ ├── ema.py │ │ ├── encoders │ │ │ ├── __init__.py │ │ │ └── modules.py │ │ ├── evaluate │ │ │ ├── adm_evaluator.py │ │ │ ├── evaluate_perceptualsim.py │ │ │ ├── frechet_video_distance.py │ │ │ ├── ssim.py │ │ │ └── torch_frechet_video_distance.py │ │ ├── image_degradation │ │ │ ├── __init__.py │ │ │ ├── bsrgan.py │ │ │ ├── bsrgan_light.py │ │ │ └── utils_image.py │ │ ├── losses │ │ │ ├── __init__.py │ │ │ ├── contperceptual.py │ │ │ └── vqperceptual.py │ │ └── x_transformer.py │ ├── thirdp │ │ └── psp │ │ │ ├── helpers.py │ │ │ ├── id_loss.py │ │ │ └── model_irse.py │ └── util.py └── zero123.py ├── gradio_app.py ├── keyboard.py ├── launch.py ├── requirements-dev.txt ├── requirements.txt ├── threestudio ├── __init__.py ├── data │ ├── __init__.py │ ├── co3d.py │ ├── edit_multiview.py │ ├── edit_multiview_gs.py │ ├── image.py │ ├── multiview.py │ └── uncond.py ├── models │ ├── __init__.py │ ├── background │ │ ├── __init__.py │ │ ├── base.py │ │ ├── neural_environment_map_background.py │ │ ├── solid_color_background.py │ │ └── textured_background.py │ ├── exporters │ │ ├── __init__.py │ │ ├── base.py │ │ └── mesh_exporter.py │ ├── geometry │ │ ├── __init__.py │ │ ├── base.py │ │ ├── gaussian.py │ │ ├── implicit_sdf.py │ │ ├── implicit_volume.py │ │ ├── implicit_volume_edit.py │ │ ├── tetrahedra_sdf_grid.py │ │ └── volume_grid.py │ ├── guidance │ │ ├── __init__.py │ │ ├── controlnet_guidance.py │ │ ├── deep_floyd_guidance.py │ │ ├── deep_floyd_guidance_stage2.py │ │ ├── instructpix2pix_guidance.py │ │ ├── multiview_diffusion_guidance.py │ │ ├── pixart_guidance.py │ │ ├── stable_diffusion_guidance.py │ │ ├── stable_diffusion_unified_guidance.py │ │ ├── stable_diffusion_vsd_guidance.py │ │ ├── zero123_guidance.py │ │ └── zero123_unified_guidance.py │ ├── isosurface.py │ ├── materials │ │ ├── __init__.py │ │ ├── base.py │ │ ├── diffuse_with_point_light_material.py │ │ ├── hybrid_rgb_latent_material.py │ │ ├── neural_radiance_material.py │ │ ├── no_material.py │ │ ├── no_material_backup.py │ │ ├── pbr_material.py │ │ └── sd_latent_adapter_material.py │ ├── mesh.py │ ├── networks.py │ ├── prompt_processors │ │ ├── __init__.py │ │ ├── base.py │ │ ├── deepfloyd_prompt_processor.py │ │ ├── dummy_prompt_processor.py │ │ ├── pixart_prompt_processor.py │ │ └── stable_diffusion_prompt_processor.py │ └── renderers │ │ ├── __init__.py │ │ ├── base.py │ │ ├── deferred_volume_renderer.py │ │ ├── diff_gaussian_rasterizer.py │ │ ├── gan_volume_renderer.py │ │ ├── gsgen_renderer.py │ │ ├── magic123_renderer.py │ │ ├── nerf_volume_renderer.py │ │ ├── neus_volume_renderer.py │ │ ├── nvdiff_rasterizer.py │ │ ├── patch_renderer.py │ │ └── threestudio_renderer.py ├── scripts │ ├── make_training_vid.py │ ├── run_gaussian.py │ ├── run_zero123.sh │ ├── run_zero123_comparison.sh │ ├── run_zero123_phase.sh │ ├── run_zero123_phase2.sh │ ├── run_zero123_sbatch.py │ ├── zero123_demo.py │ └── zero123_sbatch.sh ├── systems │ ├── __init__.py │ ├── base.py │ ├── control4d_multiview.py │ ├── dreamfusion.py │ ├── fantasia3d.py │ ├── gaussian_splatting.py │ ├── imagedreamfusion.py │ ├── instructnerf2nerf.py │ ├── interactive3d.py │ ├── latentnerf.py │ ├── magic123.py │ ├── magic3d.py │ ├── optimizers.py │ ├── prolificdreamer.py │ ├── sjc.py │ ├── textmesh.py │ ├── utils.py │ └── zero123.py └── utils │ ├── GAN │ ├── attention.py │ ├── discriminator.py │ ├── distribution.py │ ├── loss.py │ ├── mobilenet.py │ ├── network_util.py │ ├── util.py │ └── vae.py │ ├── __init__.py │ ├── base.py │ ├── callbacks.py │ ├── config.py │ ├── mesh.py │ ├── misc.py │ ├── ops.py │ ├── perceptual │ ├── __init__.py │ ├── perceptual.py │ └── utils.py │ ├── rasterize.py │ ├── saving.py │ └── typing.py └── utils ├── region_select_tool.py ├── test_pixart.py └── test_sdxl.py /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*.py] 4 | charset = utf-8 5 | trim_trailing_whitespace = true 6 | end_of_line = lf 7 | insert_final_newline = true 8 | indent_style = space 9 | indent_size = 4 10 | 11 | [*.md] 12 | trim_trailing_whitespace = false 13 | -------------------------------------------------------------------------------- /.github/workflows/pre-commit.yaml: -------------------------------------------------------------------------------- 1 | name: pre-commit 2 | on: [push, pull_request] 3 | 4 | concurrency: 5 | group: ${{ github.workflow }}-${{ github.ref }} 6 | cancel-in-progress: true 7 | 8 | jobs: 9 | pre-commit: 10 | runs-on: ubuntu-22.04 11 | steps: 12 | - uses: actions/checkout@v3 13 | - name: Set up Python 3.8 14 | uses: actions/setup-python@v4 15 | with: 16 | python-version: '3.8' 17 | - name: Install pre-commit 18 | run: | 19 | pip install pre-commit 20 | pre-commit install 21 | - name: Run pre-commit 22 | run: pre-commit run --all-files 23 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | default_language_version: 2 | python: python3 3 | 4 | repos: 5 | - repo: https://github.com/pre-commit/pre-commit-hooks 6 | rev: v4.4.0 7 | hooks: 8 | - id: trailing-whitespace 9 | - id: check-ast 10 | - id: check-merge-conflict 11 | - id: check-yaml 12 | - id: end-of-file-fixer 13 | - id: trailing-whitespace 14 | args: [--markdown-linebreak-ext=md] 15 | 16 | - repo: https://github.com/psf/black 17 | rev: 23.3.0 18 | hooks: 19 | - id: black 20 | language_version: python3.8 21 | 22 | - repo: https://github.com/pycqa/isort 23 | rev: 5.12.0 24 | hooks: 25 | - id: isort 26 | exclude: README.md 27 | args: ["--profile", "black"] 28 | 29 | # temporarily disable static type checking 30 | # - repo: https://github.com/pre-commit/mirrors-mypy 31 | # rev: v1.2.0 32 | # hooks: 33 | # - id: mypy 34 | # args: ["--ignore-missing-imports", "--scripts-are-modules", "--pretty"] 35 | -------------------------------------------------------------------------------- /.pylintrc: -------------------------------------------------------------------------------- 1 | disable=R,C 2 | 3 | [TYPECHECK] 4 | # List of members which are set dynamically and missed by pylint inference 5 | # system, and so shouldn't trigger E1101 when accessed. Python regular 6 | # expressions are accepted. 7 | generated-members=numpy.*,torch.*,cv2.* 8 | -------------------------------------------------------------------------------- /assets/Interactive3d.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/interactive-3d/interactive3d/11519c0d8a489cf19efa1ec3b8fa0a058e7ed45a/assets/Interactive3d.jpg -------------------------------------------------------------------------------- /assets/arc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/interactive-3d/interactive3d/11519c0d8a489cf19efa1ec3b8fa0a058e7ed45a/assets/arc.png -------------------------------------------------------------------------------- /assets/config.json: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /assets/interactive3d.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/interactive-3d/interactive3d/11519c0d8a489cf19efa1ec3b8fa0a058e7ed45a/assets/interactive3d.png -------------------------------------------------------------------------------- /assets/results.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/interactive-3d/interactive3d/11519c0d8a489cf19efa1ec3b8fa0a058e7ed45a/assets/results.png -------------------------------------------------------------------------------- /configs/control4d-static.yaml: -------------------------------------------------------------------------------- 1 | name: "control4d-static" 2 | tag: "${basename:${data.dataroot}}_${rmspace:${system.prompt_processor.prompt},_}" 3 | exp_root_dir: "outputs" 4 | seed: 0 5 | 6 | data_type: "multiview-camera-datamodule" 7 | data: 8 | train_downsample_resolution: 2 9 | eval_downsample_resolution: 2 10 | dataroot: ??? 11 | 12 | system_type: "control4d-multiview-system" 13 | system: 14 | start_editing_step: 2000 15 | 16 | geometry_type: "implicit-volume" 17 | geometry: 18 | radius: 2. 19 | n_feature_dims: 11 20 | normal_type: analytic 21 | pos_encoding_config: 22 | otype: HashGrid 23 | n_levels: 16 24 | n_features_per_level: 2 25 | log2_hashmap_size: 19 26 | base_resolution: 16 27 | per_level_scale: 1.4472692374403782 # max resolution 4096 28 | density_bias: "blob_magic3d" 29 | density_activation: softplus 30 | density_blob_scale: 10. 31 | density_blob_std: 0.5 32 | isosurface_resolution: 128 33 | isosurface_threshold: auto 34 | isosurface_coarse_to_fine: true 35 | 36 | material_type: "hybrid-rgb-latent-material" 37 | material: 38 | n_output_dims: 11 39 | requires_normal: true 40 | 41 | background_type: "solid-color-background" 42 | background: 43 | n_output_dims: 11 44 | color: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] 45 | 46 | renderer_type: "gan-volume-renderer" 47 | renderer: 48 | base_renderer_type: "nerf-volume-renderer" 49 | base_renderer: 50 | radius: ${system.geometry.radius} 51 | num_samples_per_ray: 512 52 | 53 | guidance_type: "stable-diffusion-controlnet-guidance" 54 | guidance: 55 | control_type: "normal" 56 | min_step_percent: 0.05 57 | max_step_percent: 0.8 58 | condition_scale: 1.0 59 | 60 | prompt_processor_type: "stable-diffusion-prompt-processor" 61 | prompt_processor: 62 | pretrained_model_name_or_path: "runwayml/stable-diffusion-v1-5" 63 | 64 | loggers: 65 | wandb: 66 | enable: false 67 | project: 'threestudio' 68 | 69 | loss: 70 | lambda_sds: 0. 71 | lambda_orient: [0, 10.0, 1000., 5000.0] 72 | lambda_sparsity: 1.0 73 | lambda_opaque: 1.0 74 | lambda_l1: 10. 75 | lambda_p: 10. 76 | lambda_kl: 0.000001 77 | lambda_G: 0.01 78 | lambda_D: 1. 79 | optimizer: 80 | name: Adam 81 | args: 82 | lr: 0.01 83 | betas: [0.9, 0.99] 84 | eps: 1.e-15 85 | params: 86 | geometry: 87 | lr: 0.01 88 | background: 89 | lr: 0.001 90 | renderer.generator: 91 | lr: 0.0001 92 | renderer.local_encoder: 93 | lr: 0.0001 94 | renderer.global_encoder: 95 | lr: 0.0001 96 | optimizer_dis: 97 | name: Adam 98 | args: 99 | lr: 0.01 100 | betas: [0.9, 0.99] 101 | eps: 1.e-15 102 | params: 103 | renderer.discriminator: 104 | lr: 0.00001 105 | 106 | trainer: 107 | max_steps: 50000 108 | log_every_n_steps: 1 109 | num_sanity_val_steps: 0 110 | val_check_interval: 200 111 | enable_progress_bar: true 112 | precision: 16-mixed 113 | 114 | checkpoint: 115 | save_last: true 116 | save_top_k: -1 117 | every_n_train_steps: ${trainer.max_steps} 118 | -------------------------------------------------------------------------------- /configs/dreamfusion-if.yaml: -------------------------------------------------------------------------------- 1 | name: "dreamfusion-if" 2 | tag: "${rmspace:${system.prompt_processor.prompt},_}" 3 | exp_root_dir: "outputs" 4 | seed: 0 5 | 6 | data_type: "random-camera-datamodule" 7 | data: 8 | batch_size: 1 9 | width: 64 10 | height: 64 11 | camera_distance_range: [1.5, 2.0] 12 | fovy_range: [40, 70] 13 | elevation_range: [-10, 90] 14 | light_sample_strategy: "dreamfusion" 15 | eval_camera_distance: 2.0 16 | eval_fovy_deg: 70. 17 | 18 | system_type: "dreamfusion-system" 19 | system: 20 | geometry_type: "implicit-volume" 21 | geometry: 22 | radius: 2.0 23 | normal_type: "analytic" 24 | 25 | # the density initialization proposed in the DreamFusion paper 26 | # does not work very well 27 | # density_bias: "blob_dreamfusion" 28 | # density_activation: exp 29 | # density_blob_scale: 5. 30 | # density_blob_std: 0.2 31 | 32 | # use Magic3D density initialization instead 33 | density_bias: "blob_magic3d" 34 | density_activation: softplus 35 | density_blob_scale: 10. 36 | density_blob_std: 0.5 37 | 38 | # coarse to fine hash grid encoding 39 | # to ensure smooth analytic normals 40 | pos_encoding_config: 41 | otype: ProgressiveBandHashGrid 42 | n_levels: 16 43 | n_features_per_level: 2 44 | log2_hashmap_size: 19 45 | base_resolution: 16 46 | per_level_scale: 1.447269237440378 # max resolution 4096 47 | start_level: 8 # resolution ~200 48 | start_step: 2000 49 | update_steps: 500 50 | 51 | material_type: "diffuse-with-point-light-material" 52 | material: 53 | ambient_only_steps: 2001 54 | albedo_activation: scale_-11_01 55 | 56 | background_type: "neural-environment-map-background" 57 | background: 58 | color_activation: scale_-11_01 59 | 60 | renderer_type: "nerf-volume-renderer" 61 | renderer: 62 | radius: ${system.geometry.radius} 63 | num_samples_per_ray: 512 64 | 65 | prompt_processor_type: "deep-floyd-prompt-processor" 66 | prompt_processor: 67 | pretrained_model_name_or_path: "/cpfs01/shared/Gveval3/gongkaixiong/dlh/.cache/huggingface/hub/models--DeepFloyd--IF-I-XL-v1.0/snapshots/c03d510e9b75bce9f9db5bb85148c1402ad7e694" # "DeepFloyd/IF-I-XL-v1.0" 68 | prompt: ??? 69 | 70 | guidance_type: "deep-floyd-guidance" 71 | guidance: 72 | pretrained_model_name_or_path: "/cpfs01/shared/Gveval3/gongkaixiong/dlh/.cache/huggingface/hub/models--DeepFloyd--IF-I-XL-v1.0/snapshots/c03d510e9b75bce9f9db5bb85148c1402ad7e694" # "DeepFloyd/IF-I-XL-v1.0" 73 | guidance_scale: 20. 74 | weighting_strategy: sds 75 | min_step_percent: 0.02 76 | max_step_percent: 0.98 77 | 78 | loggers: 79 | wandb: 80 | enable: false 81 | project: 'threestudio' 82 | name: None 83 | 84 | loss: 85 | lambda_sds: 1. 86 | lambda_orient: [0, 10., 1000., 5000] 87 | lambda_sparsity: 1. 88 | lambda_opaque: 0.0 89 | optimizer: 90 | name: Adam 91 | args: 92 | lr: 0.01 93 | betas: [0.9, 0.99] 94 | eps: 1.e-15 95 | params: 96 | geometry: 97 | lr: 0.01 98 | background: 99 | lr: 0.001 100 | 101 | trainer: 102 | max_steps: 10000 103 | log_every_n_steps: 1 104 | num_sanity_val_steps: 0 105 | val_check_interval: 200 106 | enable_progress_bar: true 107 | precision: 16-mixed 108 | 109 | checkpoint: 110 | save_last: true # save at each validation time 111 | save_top_k: -1 112 | every_n_train_steps: ${trainer.max_steps} 113 | -------------------------------------------------------------------------------- /configs/dreamfusion-sd.yaml: -------------------------------------------------------------------------------- 1 | name: "dreamfusion-sd" 2 | tag: "${rmspace:${system.prompt_processor.prompt},_}" 3 | exp_root_dir: "outputs" 4 | seed: 0 5 | 6 | data_type: "random-camera-datamodule" 7 | data: 8 | batch_size: 1 9 | width: 64 10 | height: 64 11 | camera_distance_range: [1.5, 2.0] 12 | fovy_range: [40, 70] 13 | elevation_range: [-10, 45] 14 | light_sample_strategy: "dreamfusion" 15 | eval_camera_distance: 2.0 16 | eval_fovy_deg: 70. 17 | 18 | system_type: "dreamfusion-system" 19 | system: 20 | geometry_type: "implicit-volume" 21 | geometry: 22 | radius: 2.0 23 | normal_type: "analytic" 24 | 25 | # the density initialization proposed in the DreamFusion paper 26 | # does not work very well 27 | # density_bias: "blob_dreamfusion" 28 | # density_activation: exp 29 | # density_blob_scale: 5. 30 | # density_blob_std: 0.2 31 | 32 | # use Magic3D density initialization instead 33 | density_bias: "blob_magic3d" 34 | density_activation: softplus 35 | density_blob_scale: 10. 36 | density_blob_std: 0.5 37 | 38 | # coarse to fine hash grid encoding 39 | # to ensure smooth analytic normals 40 | pos_encoding_config: 41 | otype: ProgressiveBandHashGrid 42 | n_levels: 16 43 | n_features_per_level: 2 44 | log2_hashmap_size: 19 45 | base_resolution: 16 46 | per_level_scale: 1.447269237440378 # max resolution 4096 47 | start_level: 8 # resolution ~200 48 | start_step: 2000 49 | update_steps: 500 50 | 51 | material_type: "diffuse-with-point-light-material" 52 | material: 53 | ambient_only_steps: 2001 54 | albedo_activation: sigmoid 55 | 56 | background_type: "neural-environment-map-background" 57 | background: 58 | color_activation: sigmoid 59 | 60 | renderer_type: "nerf-volume-renderer" 61 | renderer: 62 | radius: ${system.geometry.radius} 63 | num_samples_per_ray: 512 64 | 65 | prompt_processor_type: "stable-diffusion-prompt-processor" 66 | prompt_processor: 67 | pretrained_model_name_or_path: "/cpfs01/shared/Gveval3/gongkaixiong/dlh/sd_weights/sd-2.1-base" # "stabilityai/stable-diffusion-2-1-base" 68 | prompt: ??? 69 | 70 | guidance_type: "stable-diffusion-guidance" 71 | guidance: 72 | pretrained_model_name_or_path: "/cpfs01/shared/Gveval3/gongkaixiong/dlh/sd_weights/sd-2.1-base" # "stabilityai/stable-diffusion-2-1-base" 73 | guidance_scale: 100. 74 | weighting_strategy: sds 75 | min_step_percent: 0.02 76 | max_step_percent: 0.98 77 | 78 | loggers: 79 | wandb: 80 | enable: false 81 | project: "threestudio" 82 | name: None 83 | 84 | loss: 85 | lambda_sds: 1. 86 | lambda_orient: [0, 10., 1000., 5000] 87 | lambda_sparsity: 1. 88 | lambda_opaque: 0. 89 | optimizer: 90 | name: Adam 91 | args: 92 | lr: 0.01 93 | betas: [0.9, 0.99] 94 | eps: 1.e-15 95 | params: 96 | geometry: 97 | lr: 0.01 98 | background: 99 | lr: 0.001 100 | 101 | trainer: 102 | max_steps: 10000 103 | log_every_n_steps: 1 104 | num_sanity_val_steps: 0 105 | val_check_interval: 200 106 | enable_progress_bar: true 107 | precision: 16-mixed 108 | 109 | checkpoint: 110 | save_last: true # save at each validation time 111 | save_top_k: -1 112 | every_n_train_steps: ${trainer.max_steps} 113 | -------------------------------------------------------------------------------- /configs/experimental/co3d-imagecondition.yaml: -------------------------------------------------------------------------------- 1 | name: "co3d-imagecondition" 2 | tag: "${rmspace:${system.prompt_processor.prompt},_}" 3 | exp_root_dir: "outputs" 4 | seed: 0 5 | 6 | data_type: "co3d-datamodule" 7 | data: 8 | root_dir: ??? 9 | height: 256 10 | width: 256 11 | scale_radius: 3.0 12 | load_preprocessed: false 13 | cam_scale_factor: 0.95 # inherited from plenoxels 14 | max_num_frames: 300 # use less frames for debugging 15 | v2_mode: true 16 | use_mask: true 17 | box_crop: true 18 | box_crop_mask_thr: 0.4 19 | box_crop_context: 0.1 # The amount of additional padding added to each dimention of the cropping bounding box, relative to vox size. 20 | train_num_rays: 4096 21 | train_split: "train" 22 | val_split: "val" 23 | test_split: "test" 24 | render_path: "circle" 25 | train_views: [0, 50, 100] 26 | random_camera: 27 | eval_height: 256 28 | eval_width: 256 29 | eval_elevation_deg: 0. 30 | eval_camera_distance: 1.2 31 | eval_fovy_deg: 60. 32 | 33 | system_type: "image-condition-dreamfusion-system" 34 | system: 35 | geometry_type: "implicit-volume" 36 | geometry: 37 | isosurface_method: "mc-cpu" 38 | isosurface_resolution: 128 39 | isosurface_threshold: 0.0 40 | normal_type: "finite_difference" 41 | finite_difference_normal_eps: 0.004 42 | n_feature_dims: 32 43 | mlp_network_config: 44 | otype: "VanillaMLP" 45 | activation: "ReLU" 46 | output_activation: "none" 47 | n_neurons: 64 48 | n_hidden_layers: 2 49 | 50 | material_type: "diffuse-with-point-light-material" 51 | material: 52 | diffuse_prob: 1.0 53 | textureless_prob: 0.2 54 | ambient_light_color: [1.0, 1.0, 1.0] 55 | diffuse_light_color: [0.0, 0.0, 0.0] 56 | ambient_only_steps: ${system.freq.ref_only_steps} 57 | 58 | background_type: "neural-environment-map-background" 59 | background: 60 | dir_encoding_config: 61 | otype: ProgressiveBandFrequency 62 | n_frequencies: 6 63 | mlp_network_config: 64 | otype: VanillaMLP 65 | n_neurons: 32 66 | n_hidden_layers: 1 67 | activation: "ReLU" 68 | 69 | renderer_type: "nerf-volume-renderer" 70 | renderer: 71 | num_samples_per_ray: 512 72 | 73 | prompt_processor_type: "stable-diffusion-prompt-processor" 74 | prompt_processor: 75 | pretrained_model_name_or_path: "runwayml/stable-diffusion-v1-5" 76 | prompt: ??? 77 | 78 | guidance_type: "stable-diffusion-guidance" 79 | guidance: 80 | pretrained_model_name_or_path: "runwayml/stable-diffusion-v1-5" 81 | guidance_scale: 100. 82 | weighting_strategy: sds 83 | 84 | freq: 85 | n_ref: 2 86 | ref_only_steps: 1000 87 | 88 | loggers: 89 | wandb: 90 | enable: false 91 | project: 'threestudio' 92 | name: None 93 | 94 | loss: 95 | lambda_sds: 0.1 96 | lambda_rgb: 10. 97 | lambda_mask: 1. 98 | lambda_depth: 0. 99 | # lambda_depth: [0.0, 0.0, 1.0, 10000] 100 | lambda_normal_smooth: 0.0 101 | lambda_orient: 1.0 102 | # lambda_orient: [1000, 0.0, 10, 6000] 103 | lambda_sparsity: 0.0 104 | lambda_opaque: 0.01 105 | optimizer: 106 | name: Adan 107 | args: 108 | eps: 1.0e-8 109 | weight_decay: 2.0e-5 110 | max_grad_norm: 5.0 111 | foreach: False 112 | params: 113 | geometry.encoding: 114 | lr: 0.05 115 | geometry.network: 116 | lr: 0.005 117 | background.network: 118 | lr: 0.005 119 | 120 | trainer: 121 | max_steps: 10000 122 | log_every_n_steps: 1 123 | num_sanity_val_steps: 0 124 | val_check_interval: 500 125 | limit_val_batches: 6 126 | enable_progress_bar: true 127 | precision: 16-mixed 128 | 129 | checkpoint: 130 | save_last: true # save at each validation time 131 | save_top_k: -1 132 | every_n_train_steps: ${trainer.max_steps} 133 | -------------------------------------------------------------------------------- /configs/fantasia3d-texture.yaml: -------------------------------------------------------------------------------- 1 | name: "fantasia3d-texture" 2 | tag: "${rmspace:${system.prompt_processor.prompt},_}" 3 | exp_root_dir: "outputs" 4 | seed: 0 5 | 6 | data_type: "random-camera-datamodule" 7 | data: 8 | batch_size: 1 9 | width: 512 10 | height: 512 11 | camera_distance_range: [3, 3] 12 | fovy_range: [25, 45] 13 | camera_perturb: 0. 14 | center_perturb: 0. 15 | up_perturb: 0. 16 | elevation_range: [-10, 45] 17 | azimuth_range: [-180, 180] 18 | batch_uniform_azimuth: true 19 | eval_camera_distance: 3. 20 | eval_fovy_deg: 45. 21 | 22 | system_type: "fantasia3d-system" 23 | system: 24 | # do texture training 25 | texture: true 26 | geometry_convert_from: ??? 27 | geometry_convert_inherit_texture: false 28 | geometry_type: "tetrahedra-sdf-grid" 29 | geometry: 30 | radius: 1.0 # consistent with coarse 31 | isosurface_resolution: 128 32 | isosurface_deformable_grid: true 33 | pos_encoding_config: 34 | otype: HashGrid 35 | n_levels: 16 36 | n_features_per_level: 2 37 | log2_hashmap_size: 19 38 | base_resolution: 16 39 | per_level_scale: 1.4472692374403782 # max resolution 4096 40 | n_feature_dims: 8 # albedo3 + roughness1 + metallic1 + bump3 41 | fix_geometry: true 42 | 43 | material_type: "pbr-material" 44 | material: 45 | material_activation: sigmoid 46 | environment_texture: "load/lights/mud_road_puresky_1k.hdr" 47 | environment_scale: 2.0 48 | min_metallic: 0.0 49 | max_metallic: 0.9 50 | min_roughness: 0.08 51 | max_roughness: 0.9 52 | use_bump: true 53 | 54 | background_type: "solid-color-background" 55 | 56 | renderer_type: "nvdiff-rasterizer" 57 | renderer: 58 | context_type: cuda 59 | 60 | prompt_processor_type: "stable-diffusion-prompt-processor" 61 | prompt_processor: 62 | pretrained_model_name_or_path: "/nvme/lihe/workspace/weights/sd_weights/sd-2.1-base" # "stabilityai/stable-diffusion-2-1-base" 63 | prompt: ??? 64 | 65 | guidance_type: "stable-diffusion-guidance" 66 | guidance: 67 | pretrained_model_name_or_path: "/nvme/lihe/workspace/weights/sd_weights/sd-2.1-base" # "stabilityai/stable-diffusion-2-1-base" 68 | guidance_scale: 100 69 | weighting_strategy: sds 70 | min_step_percent: 0.02 71 | max_step_percent: 0.50 72 | 73 | loggers: 74 | wandb: 75 | enable: false 76 | project: "threestudio" 77 | 78 | loss: 79 | lambda_sds: 1. 80 | lambda_normal_consistency: 0. 81 | 82 | optimizer: 83 | name: AdamW 84 | args: 85 | lr: 0.01 86 | betas: [0.9, 0.99] 87 | eps: 1.e-15 88 | 89 | trainer: 90 | max_steps: 5000 91 | log_every_n_steps: 1 92 | num_sanity_val_steps: 1 93 | val_check_interval: 500 94 | enable_progress_bar: true 95 | precision: 16-mixed 96 | 97 | checkpoint: 98 | save_last: true # save at each validation time 99 | save_top_k: -1 100 | every_n_train_steps: ${trainer.max_steps} 101 | -------------------------------------------------------------------------------- /configs/fantasia3d.yaml: -------------------------------------------------------------------------------- 1 | name: "fantasia3d" 2 | tag: "${rmspace:${system.prompt_processor.prompt},_}" 3 | exp_root_dir: "outputs" 4 | seed: 0 5 | 6 | data_type: "random-camera-datamodule" 7 | data: 8 | batch_size: 1 9 | width: 512 10 | height: 512 11 | camera_distance_range: [3, 3] 12 | fovy_range: [25, 45] 13 | camera_perturb: 0. 14 | center_perturb: 0. 15 | up_perturb: 0. 16 | elevation_range: [-10, 45] 17 | azimuth_range: [-180, 180] 18 | batch_uniform_azimuth: true 19 | eval_camera_distance: 3. 20 | eval_fovy_deg: 45. 21 | 22 | system_type: "fantasia3d-system" 23 | system: 24 | latent_steps: 1000 25 | geometry_type: "implicit-sdf" 26 | geometry: 27 | radius: 1.0 28 | n_feature_dims: 0 29 | isosurface_resolution: 128 30 | isosurface_deformable_grid: true 31 | isosurface_coarse_to_fine: false 32 | 33 | # initialize SDF by optimization 34 | shape_init: sphere 35 | shape_init_params: 0.5 36 | 37 | # or you can initialize SDF using a guide mesh 38 | # shape_init: mesh:load/shapes/human.obj 39 | # shape_init_params: 0.9 40 | # shape_init_mesh_up: +y 41 | # shape_init_mesh_front: +z 42 | 43 | # an alternative initialization implementation: 44 | # you can initialize SDF to sphere/ellipsoid by adding a bias value 45 | # which leads to more smooth initialized shape 46 | # sdf_bias: sphere 47 | # sdf_bias_params: 0.5 48 | # DO NOT use the two initialization methods together 49 | 50 | material_type: "no-material" # unused 51 | material: 52 | n_output_dims: 0 53 | 54 | background_type: "solid-color-background" # unused 55 | 56 | renderer_type: "nvdiff-rasterizer" 57 | renderer: 58 | context_type: cuda 59 | 60 | prompt_processor_type: "stable-diffusion-prompt-processor" 61 | prompt_processor: 62 | pretrained_model_name_or_path: "/nvme/lihe/workspace/weights/sd_weights/sd-2.1-base" # "stabilityai/stable-diffusion-2-1-base" 63 | prompt: ??? 64 | 65 | guidance_type: "stable-diffusion-guidance" 66 | guidance: 67 | pretrained_model_name_or_path: "/nvme/lihe/workspace/weights/sd_weights/sd-2.1-base" # "stabilityai/stable-diffusion-2-1-base" 68 | guidance_scale: 100. 69 | max_step_percent: 0.5 70 | weighting_strategy: fantasia3d 71 | 72 | loggers: 73 | wandb: 74 | enable: false 75 | project: 'threestudio' 76 | name: None 77 | 78 | loss: 79 | lambda_sds: 1. 80 | lambda_normal_consistency: 0. 81 | 82 | optimizer: 83 | name: AdamW 84 | args: 85 | lr: 0.001 86 | betas: [0.9, 0.99] 87 | eps: 1.e-15 88 | 89 | trainer: 90 | max_steps: 10000 91 | log_every_n_steps: 1 92 | num_sanity_val_steps: 1 93 | val_check_interval: 500 94 | enable_progress_bar: true 95 | precision: 16-mixed 96 | 97 | checkpoint: 98 | save_last: true # save at each validation time 99 | save_top_k: -1 100 | every_n_train_steps: ${trainer.max_steps} 101 | -------------------------------------------------------------------------------- /configs/gaussian_splatting.yaml: -------------------------------------------------------------------------------- 1 | name: "gs-sd" 2 | tag: "${rmspace:${system.prompt_processor.prompt},_}" 3 | exp_root_dir: "outputs_gs" 4 | seed: 0 5 | 6 | data_type: "random-camera-datamodule" 7 | data: 8 | batch_size: 4 9 | width: 512 10 | height: 512 11 | camera_distance_range: [2.5, 2.5] 12 | fovy_range: [60, 70] 13 | elevation_range: [0, 30] # [-20, 90] 14 | light_sample_strategy: "dreamfusion" 15 | eval_camera_distance: 2.5 16 | eval_fovy_deg: 70 17 | near_far: [0.01, 100] 18 | 19 | system_type: "gaussian-splatting-system" 20 | system: 21 | invert_bg_prob: 0.0 22 | 23 | geometry_type: "gaussian" 24 | geometry: 25 | position_lr_init: 0.005 26 | position_lr_final: 0.00003 27 | position_lr_delay_mult: 0.02 28 | position_lr_max_steps: ${trainer.max_steps} 29 | scale_lr_init: 0.003 30 | scale_lr_final: 0.001 31 | scale_lr_max_steps: ${trainer.max_steps} 32 | feature_lr: 0.01 33 | opacity_lr: 0.003 34 | scaling_lr: 0.003 35 | rotation_lr: 0.003 36 | densification_interval: 1000 37 | prune_interval: 500 38 | opacity_reset_interval: 100000 39 | densify_from_iter: 1000 40 | densify_until_iter: 10000 41 | prune_from_iter: 500 42 | prune_until_iter: ${trainer.max_steps} 43 | # prune_until_iter: 0 44 | densify_grad_threshold: 0.02 45 | min_opac_prune: 0.05 46 | split_thresh: 0.02 47 | radii2d_thresh: 1000 48 | init_num_pts: 4096 49 | pc_init_radius: 0.8 50 | opacity_init: 0.8 51 | scales_init: 0.02 # 0.04 # ? 0.02 52 | # mesh init 53 | init: true 54 | type: mesh 55 | mesh: debug_data/sample_128.ply 56 | rotate_xy: true 57 | flip_z: true 58 | flip_x: true 59 | prompt: a human face 60 | num_points: 4096 61 | mean_std: 0.8 62 | svec_val: 0.02 63 | alpha_val: 0.8 64 | random_color: true 65 | facex: true 66 | 67 | renderer_type: "diff-gaussian-rasterizer" 68 | renderer: 69 | debug: false 70 | 71 | material_type: "no-material" # unused 72 | material: 73 | n_output_dims: 0 74 | 75 | background_type: "solid-color-background" # unused 76 | 77 | prompt_processor_type: "stable-diffusion-prompt-processor" 78 | prompt_processor: 79 | pretrained_model_name_or_path: "/cpfs01/shared/Gveval3/gongkaixiong/dlh/sd_weights/sd-2.1-base" # "stabilityai/stable-diffusion-2-1-base" 80 | prompt: ??? 81 | 82 | guidance_type: "stable-diffusion-guidance" 83 | guidance: 84 | pretrained_model_name_or_path: "/cpfs01/shared/Gveval3/gongkaixiong/dlh/sd_weights/sd-2.1-base" # "stabilityai/stable-diffusion-2-1-base" 85 | guidance_scale: 100.0 86 | weighting_strategy: sds 87 | min_step_percent: 0.02 88 | max_step_percent: [2000, 0.98, 0.5, 2001] 89 | 90 | loggers: 91 | wandb: 92 | enable: false 93 | project: 'threestudio' 94 | name: None 95 | 96 | loss: 97 | lambda_sds: 0.1 98 | lambda_position: 0.0 99 | lambda_opacity: 0.0 100 | 101 | trainer: 102 | max_steps: 15000 103 | log_every_n_steps: 1 104 | num_sanity_val_steps: 0 105 | val_check_interval: 100 106 | enable_progress_bar: true 107 | precision: 32-true 108 | 109 | checkpoint: 110 | save_last: true # save at each validation time 111 | save_top_k: -1 112 | every_n_train_steps: ${trainer.max_steps} -------------------------------------------------------------------------------- /configs/gradio/dreamfusion-if.yaml: -------------------------------------------------------------------------------- 1 | name: "dreamfusion-if" 2 | tag: "${rmspace:${system.prompt_processor.prompt},_}" 3 | exp_root_dir: "outputs-gradio" 4 | seed: 0 5 | 6 | data_type: "random-camera-datamodule" 7 | data: 8 | batch_size: 1 9 | width: 64 10 | height: 64 11 | camera_distance_range: [1.5, 2.0] 12 | fovy_range: [40, 70] 13 | elevation_range: [-10, 90] 14 | light_sample_strategy: "dreamfusion" 15 | eval_camera_distance: 2.0 16 | eval_fovy_deg: 70. 17 | 18 | system_type: "dreamfusion-system" 19 | system: 20 | geometry_type: "implicit-volume" 21 | geometry: 22 | radius: 2.0 23 | normal_type: "analytic" 24 | 25 | # the density initialization proposed in the DreamFusion paper 26 | # does not work very well 27 | # density_bias: "blob_dreamfusion" 28 | # density_activation: exp 29 | # density_blob_scale: 5. 30 | # density_blob_std: 0.2 31 | 32 | # use Magic3D density initialization instead 33 | density_bias: "blob_magic3d" 34 | density_activation: softplus 35 | density_blob_scale: 10. 36 | density_blob_std: 0.5 37 | 38 | # coarse to fine hash grid encoding 39 | # to ensure smooth analytic normals 40 | pos_encoding_config: 41 | otype: ProgressiveBandHashGrid 42 | n_levels: 16 43 | n_features_per_level: 2 44 | log2_hashmap_size: 19 45 | base_resolution: 16 46 | per_level_scale: 1.381912879967776 # max resolution 2048 47 | start_level: 10 # resolution ~300 48 | start_step: 2000 49 | update_steps: 400 50 | 51 | material_type: "diffuse-with-point-light-material" 52 | material: 53 | ambient_only_steps: 2001 54 | albedo_activation: scale_-11_01 55 | 56 | background_type: "neural-environment-map-background" 57 | background: 58 | color_activation: scale_-11_01 59 | random_aug: true 60 | 61 | renderer_type: "nerf-volume-renderer" 62 | renderer: 63 | radius: ${system.geometry.radius} 64 | num_samples_per_ray: 512 65 | 66 | prompt_processor_type: "deep-floyd-prompt-processor" 67 | prompt_processor: 68 | pretrained_model_name_or_path: "DeepFloyd/IF-I-XL-v1.0" 69 | prompt: ??? 70 | 71 | guidance_type: "deep-floyd-guidance" 72 | guidance: 73 | pretrained_model_name_or_path: "DeepFloyd/IF-I-XL-v1.0" 74 | guidance_scale: 20. 75 | weighting_strategy: sds 76 | min_step_percent: 0.02 77 | max_step_percent: 0.98 78 | 79 | exporter_type: "mesh-exporter" 80 | exporter: 81 | fmt: obj 82 | save_uv: false 83 | context_type: cuda 84 | 85 | loggers: 86 | wandb: 87 | enable: false 88 | project: "threestudio" 89 | name: None 90 | 91 | loss: 92 | lambda_sds: 1. 93 | lambda_orient: [0, 10., 1000., 5000] 94 | lambda_sparsity: 1. 95 | lambda_opaque: 0.0 96 | optimizer: 97 | name: Adam 98 | args: 99 | lr: 0.01 100 | betas: [0.9, 0.99] 101 | eps: 1.e-15 102 | params: 103 | geometry: 104 | lr: 0.01 105 | background: 106 | lr: 0.001 107 | 108 | trainer: 109 | max_steps: 5000 110 | log_every_n_steps: 1 111 | num_sanity_val_steps: 0 112 | val_check_interval: 100 113 | enable_progress_bar: true 114 | precision: 16-mixed 115 | 116 | checkpoint: 117 | save_last: false 118 | save_top_k: -1 119 | every_n_train_steps: 0 # do not save checkpoints during training 120 | -------------------------------------------------------------------------------- /configs/gradio/dreamfusion-sd.yaml: -------------------------------------------------------------------------------- 1 | name: "dreamfusion-sd" 2 | tag: "${rmspace:${system.prompt_processor.prompt},_}" 3 | exp_root_dir: "outputs-gradio" 4 | seed: 0 5 | 6 | data_type: "random-camera-datamodule" 7 | data: 8 | batch_size: 1 9 | width: 64 10 | height: 64 11 | camera_distance_range: [1.5, 2.0] 12 | fovy_range: [40, 70] 13 | elevation_range: [-10, 45] 14 | light_sample_strategy: "dreamfusion" 15 | eval_camera_distance: 2.0 16 | eval_fovy_deg: 70. 17 | 18 | system_type: "dreamfusion-system" 19 | system: 20 | geometry_type: "implicit-volume" 21 | geometry: 22 | radius: 2.0 23 | normal_type: "analytic" 24 | 25 | # the density initialization proposed in the DreamFusion paper 26 | # does not work very well 27 | # density_bias: "blob_dreamfusion" 28 | # density_activation: exp 29 | # density_blob_scale: 5. 30 | # density_blob_std: 0.2 31 | 32 | # use Magic3D density initialization instead 33 | density_bias: "blob_magic3d" 34 | density_activation: softplus 35 | density_blob_scale: 10. 36 | density_blob_std: 0.5 37 | 38 | # coarse to fine hash grid encoding 39 | # to ensure smooth analytic normals 40 | pos_encoding_config: 41 | otype: ProgressiveBandHashGrid 42 | n_levels: 16 43 | n_features_per_level: 2 44 | log2_hashmap_size: 19 45 | base_resolution: 16 46 | per_level_scale: 1.381912879967776 # max resolution 2048 47 | start_level: 10 # resolution ~300 48 | start_step: 2000 49 | update_steps: 400 50 | 51 | material_type: "diffuse-with-point-light-material" 52 | material: 53 | ambient_only_steps: 2001 54 | albedo_activation: sigmoid 55 | 56 | background_type: "neural-environment-map-background" 57 | background: 58 | color_activation: sigmoid 59 | random_aug: true 60 | 61 | renderer_type: "nerf-volume-renderer" 62 | renderer: 63 | radius: ${system.geometry.radius} 64 | num_samples_per_ray: 512 65 | 66 | prompt_processor_type: "stable-diffusion-prompt-processor" 67 | prompt_processor: 68 | pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base" 69 | prompt: ??? 70 | 71 | guidance_type: "stable-diffusion-guidance" 72 | guidance: 73 | pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base" 74 | guidance_scale: 100. 75 | weighting_strategy: sds 76 | min_step_percent: 0.02 77 | max_step_percent: 0.98 78 | grad_clip: [0, 0.5, 2.0, 5000] 79 | 80 | exporter_type: "mesh-exporter" 81 | exporter: 82 | fmt: obj 83 | save_uv: false 84 | context_type: cuda 85 | 86 | loggers: 87 | wandb: 88 | enable: false 89 | project: "threestudio" 90 | name: None 91 | 92 | loss: 93 | lambda_sds: 1. 94 | lambda_orient: [0, 10., 1000., 5000] 95 | lambda_sparsity: 1. 96 | lambda_opaque: 0. 97 | optimizer: 98 | name: Adam 99 | args: 100 | lr: 0.01 101 | betas: [0.9, 0.99] 102 | eps: 1.e-15 103 | params: 104 | geometry: 105 | lr: 0.01 106 | background: 107 | lr: 0.001 108 | 109 | trainer: 110 | max_steps: 5000 111 | log_every_n_steps: 1 112 | num_sanity_val_steps: 0 113 | val_check_interval: 100 114 | enable_progress_bar: true 115 | precision: 16-mixed 116 | 117 | checkpoint: 118 | save_last: false 119 | save_top_k: -1 120 | every_n_train_steps: 0 # do not save checkpoints during training 121 | -------------------------------------------------------------------------------- /configs/gradio/fantasia3d.yaml: -------------------------------------------------------------------------------- 1 | name: "fantasia3d" 2 | tag: "${rmspace:${system.prompt_processor.prompt},_}" 3 | exp_root_dir: "outputs-gradio" 4 | seed: 0 5 | 6 | data_type: "random-camera-datamodule" 7 | data: 8 | batch_size: 1 9 | width: 512 10 | height: 512 11 | camera_distance_range: [3, 3] 12 | fovy_range: [25, 45] 13 | camera_perturb: 0. 14 | center_perturb: 0. 15 | up_perturb: 0. 16 | elevation_range: [-10, 45] 17 | azimuth_range: [-180, 180] 18 | batch_uniform_azimuth: true 19 | eval_camera_distance: 3. 20 | eval_fovy_deg: 45. 21 | 22 | system_type: "fantasia3d-system" 23 | system: 24 | latent_steps: 1000 25 | geometry_type: "implicit-sdf" 26 | geometry: 27 | radius: 1.0 28 | n_feature_dims: 0 29 | isosurface_resolution: 128 30 | isosurface_deformable_grid: true 31 | isosurface_coarse_to_fine: false 32 | 33 | # initialize SDF by optimization 34 | shape_init: sphere 35 | shape_init_params: 0.5 36 | 37 | # or you can initialize SDF using a guide mesh 38 | # shape_init: mesh:load/shapes/human.obj 39 | # shape_init_params: 0.9 40 | # shape_init_mesh_up: +y 41 | # shape_init_mesh_front: +z 42 | 43 | # an alternative initialization implementation: 44 | # you can initialize SDF to sphere/ellipsoid by adding a bias value 45 | # which leads to more smooth initialized shape 46 | # sdf_bias: sphere 47 | # sdf_bias_params: 0.5 48 | # DO NOT use the two initialization methods together 49 | 50 | material_type: "no-material" # unused 51 | material: 52 | n_output_dims: 0 53 | 54 | background_type: "solid-color-background" # unused 55 | 56 | renderer_type: "nvdiff-rasterizer" 57 | renderer: 58 | context_type: cuda 59 | 60 | prompt_processor_type: "stable-diffusion-prompt-processor" 61 | prompt_processor: 62 | pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base" 63 | prompt: ??? 64 | 65 | guidance_type: "stable-diffusion-guidance" 66 | guidance: 67 | pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base" 68 | guidance_scale: 100. 69 | max_step_percent: 0.5 70 | weighting_strategy: fantasia3d 71 | 72 | exporter_type: "mesh-exporter" 73 | exporter: 74 | fmt: obj 75 | save_uv: false 76 | save_texture: false 77 | context_type: cuda 78 | 79 | loggers: 80 | wandb: 81 | enable: false 82 | project: "threestudio" 83 | name: None 84 | 85 | loss: 86 | lambda_sds: 1. 87 | lambda_normal_consistency: 0. 88 | 89 | optimizer: 90 | name: AdamW 91 | args: 92 | lr: 0.001 93 | betas: [0.9, 0.99] 94 | eps: 1.e-15 95 | 96 | trainer: 97 | max_steps: 5000 98 | log_every_n_steps: 1 99 | num_sanity_val_steps: 1 100 | val_check_interval: 200 101 | enable_progress_bar: true 102 | precision: 16-mixed 103 | 104 | checkpoint: 105 | save_last: false 106 | save_top_k: -1 107 | every_n_train_steps: 0 # do not save checkpoints during training 108 | -------------------------------------------------------------------------------- /configs/gradio/latentnerf.yaml: -------------------------------------------------------------------------------- 1 | name: "latentnerf" 2 | tag: "${rmspace:${system.prompt_processor.prompt},_}" 3 | exp_root_dir: "outputs-gradio" 4 | seed: 0 5 | 6 | data_type: "random-camera-datamodule" 7 | data: 8 | elevation_range: [-10, 45] 9 | 10 | system_type: "latentnerf-system" 11 | system: 12 | geometry_type: "implicit-volume" 13 | geometry: 14 | n_feature_dims: 4 15 | normal_type: null 16 | 17 | density_bias: "blob_dreamfusion" 18 | density_activation: trunc_exp 19 | density_blob_scale: 5. 20 | density_blob_std: 0.2 21 | 22 | pos_encoding_config: 23 | otype: HashGrid 24 | n_levels: 16 25 | n_features_per_level: 2 26 | log2_hashmap_size: 19 27 | base_resolution: 16 28 | per_level_scale: 1.381912879967776 # max resolution 2048 29 | 30 | material_type: "no-material" 31 | material: 32 | n_output_dims: 4 33 | color_activation: none 34 | 35 | background_type: "neural-environment-map-background" 36 | background: 37 | n_output_dims: 4 38 | color_activation: none 39 | 40 | renderer_type: "nerf-volume-renderer" 41 | renderer: 42 | num_samples_per_ray: 512 43 | 44 | prompt_processor_type: "stable-diffusion-prompt-processor" 45 | prompt_processor: 46 | pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base" 47 | prompt: ??? 48 | 49 | guidance_type: "stable-diffusion-guidance" 50 | guidance: 51 | pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base" 52 | guidance_scale: 100. 53 | weighting_strategy: sds 54 | grad_clip: [0, 2.0, 8.0, 5000] 55 | 56 | exporter_type: "dummy-exporter" 57 | 58 | loggers: 59 | wandb: 60 | enable: false 61 | project: "threestudio" 62 | name: None 63 | 64 | loss: 65 | lambda_sds: 1. 66 | lambda_sparsity: 5.e-4 67 | lambda_opaque: 0.0 68 | lambda_orient: 0.0 69 | optimizer: 70 | name: Adam 71 | args: 72 | lr: 0.01 73 | betas: [0.9, 0.99] 74 | eps: 1.e-15 75 | scheduler: 76 | name: SequentialLR 77 | interval: step 78 | warmup_steps: 100 79 | milestones: 80 | - ${system.scheduler.warmup_steps} 81 | schedulers: 82 | - name: LinearLR # linear warm-up in the first system.warmup_steps steps 83 | args: 84 | start_factor: 0.1 85 | end_factor: 1.0 86 | total_iters: ${system.scheduler.warmup_steps} 87 | - name: ExponentialLR 88 | args: 89 | gamma: ${calc_exp_lr_decay_rate:0.1,${sub:${trainer.max_steps},${system.scheduler.warmup_steps}}} 90 | 91 | trainer: 92 | max_steps: 5000 93 | log_every_n_steps: 1 94 | num_sanity_val_steps: 0 95 | val_check_interval: 200 96 | enable_progress_bar: true 97 | precision: 16-mixed 98 | 99 | checkpoint: 100 | save_last: false 101 | save_top_k: -1 102 | every_n_train_steps: 0 # do not save checkpoints during training 103 | -------------------------------------------------------------------------------- /configs/gradio/sjc.yaml: -------------------------------------------------------------------------------- 1 | name: sjc 2 | tag: "${rmspace:${system.prompt_processor.prompt},_}" 3 | exp_root_dir: "outputs-gradio" 4 | seed: 0 5 | 6 | data_type: random-camera-datamodule 7 | data: 8 | camera_distance_range: [1.50, 1.50] 9 | elevation_range: [-10, 45] 10 | camera_perturb: 0.0 11 | center_perturb: 0.0 12 | up_perturb: 0.0 13 | light_position_perturb: 0.0 14 | eval_elevation_deg: 20.0 15 | 16 | system_type: sjc-system 17 | system: 18 | subpixel_rendering: false 19 | 20 | geometry_type: volume-grid 21 | geometry: 22 | normal_type: null 23 | grid_size: [100, 100, 100] 24 | density_bias: -1.0 25 | n_feature_dims: 4 26 | 27 | material_type: no-material 28 | material: 29 | n_output_dims: 4 30 | color_activation: none 31 | 32 | background_type: textured-background 33 | background: 34 | n_output_dims: 4 35 | color_activation: none 36 | height: 4 37 | width: 4 38 | 39 | renderer_type: nerf-volume-renderer 40 | renderer: 41 | num_samples_per_ray: 512 42 | grid_prune: false 43 | 44 | prompt_processor_type: stable-diffusion-prompt-processor 45 | prompt_processor: 46 | pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base" 47 | prompt: ??? 48 | view_dependent_prompt_front: true 49 | 50 | guidance_type: stable-diffusion-guidance 51 | guidance: 52 | pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base" 53 | guidance_scale: 100. 54 | use_sjc: true 55 | var_red: true 56 | min_step_percent: 0.01 57 | max_step_percent: 0.97 58 | grad_clip: [0, 2.0, 8.0, 5000] 59 | 60 | exporter_type: "dummy-exporter" 61 | 62 | loggers: 63 | wandb: 64 | enable: false 65 | project: "threestudio" 66 | name: None 67 | 68 | loss: 69 | lambda_sds: 1. 70 | center_ratio: 0.78125 # = 50 / 64 71 | lambda_depth: 0 # or try 10 72 | lambda_emptiness: [5000, 1.e+4, 2.e+5, 5001] 73 | emptiness_scale: 10 74 | 75 | optimizer: 76 | name: Adamax 77 | args: 78 | lr: 0.05 79 | params: 80 | geometry: 81 | lr: 0.05 82 | background: 83 | lr: 0.0001 # maybe 0.001/0.01 is better 84 | 85 | trainer: 86 | max_steps: 5000 87 | log_every_n_steps: 1 88 | num_sanity_val_steps: 0 89 | val_check_interval: 200 90 | enable_progress_bar: true 91 | precision: 16-mixed 92 | 93 | checkpoint: 94 | save_last: false 95 | save_top_k: -1 96 | every_n_train_steps: 0 # do not save checkpoints during training 97 | -------------------------------------------------------------------------------- /configs/gradio/textmesh-if.yaml: -------------------------------------------------------------------------------- 1 | name: "textmesh-if" 2 | tag: "${rmspace:${system.prompt_processor.prompt},_}" 3 | exp_root_dir: "outputs-gradio" 4 | seed: 0 5 | 6 | data_type: "random-camera-datamodule" 7 | data: 8 | batch_size: 1 9 | width: 64 10 | height: 64 11 | camera_distance_range: [1.5, 2.0] 12 | fovy_range: [40, 70] 13 | elevation_range: [-10, 90] 14 | light_sample_strategy: "dreamfusion" 15 | eval_camera_distance: 2.0 16 | eval_fovy_deg: 70. 17 | 18 | system_type: "textmesh-system" 19 | system: 20 | geometry_type: "implicit-sdf" 21 | geometry: 22 | radius: 2.0 23 | normal_type: finite_difference 24 | # progressive eps from Neuralangelo 25 | finite_difference_normal_eps: progressive 26 | 27 | sdf_bias: sphere 28 | sdf_bias_params: 0.5 29 | 30 | # coarse to fine hash grid encoding 31 | pos_encoding_config: 32 | otype: ProgressiveBandHashGrid 33 | n_levels: 16 34 | n_features_per_level: 2 35 | log2_hashmap_size: 19 36 | base_resolution: 16 37 | per_level_scale: 1.381912879967776 # max resolution 2048 38 | start_level: 10 # resolution ~300 39 | start_step: 2000 40 | update_steps: 400 41 | 42 | material_type: "diffuse-with-point-light-material" 43 | material: 44 | ambient_only_steps: 2001 45 | albedo_activation: sigmoid 46 | 47 | background_type: "neural-environment-map-background" 48 | background: 49 | color_activation: sigmoid 50 | random_aug: true 51 | 52 | renderer_type: "neus-volume-renderer" 53 | renderer: 54 | radius: ${system.geometry.radius} 55 | num_samples_per_ray: 512 56 | cos_anneal_end_steps: ${trainer.max_steps} 57 | eval_chunk_size: 8192 58 | 59 | prompt_processor_type: "deep-floyd-prompt-processor" 60 | prompt_processor: 61 | pretrained_model_name_or_path: "DeepFloyd/IF-I-XL-v1.0" 62 | prompt: ??? 63 | 64 | guidance_type: "deep-floyd-guidance" 65 | guidance: 66 | pretrained_model_name_or_path: "DeepFloyd/IF-I-XL-v1.0" 67 | guidance_scale: 20. 68 | weighting_strategy: sds 69 | min_step_percent: 0.02 70 | max_step_percent: 0.98 71 | 72 | exporter_type: "mesh-exporter" 73 | exporter: 74 | fmt: obj 75 | save_uv: false 76 | context_type: cuda 77 | 78 | loss: 79 | lambda_sds: 1. 80 | lambda_orient: 0.0 81 | lambda_sparsity: 0.0 82 | lambda_opaque: 0.0 83 | lambda_eikonal: 1000. 84 | optimizer: 85 | name: Adam 86 | args: 87 | betas: [0.9, 0.99] 88 | eps: 1.e-15 89 | params: 90 | geometry.encoding: 91 | lr: 0.01 92 | geometry.sdf_network: 93 | lr: 0.001 94 | geometry.feature_network: 95 | lr: 0.001 96 | background: 97 | lr: 0.001 98 | renderer: 99 | lr: 0.001 100 | 101 | trainer: 102 | max_steps: 5000 103 | log_every_n_steps: 1 104 | num_sanity_val_steps: 0 105 | val_check_interval: 100 106 | enable_progress_bar: true 107 | precision: 16-mixed 108 | 109 | checkpoint: 110 | save_last: false 111 | save_top_k: -1 112 | every_n_train_steps: 0 # do not save checkpoints during training 113 | -------------------------------------------------------------------------------- /configs/instructnerf2nerf.yaml: -------------------------------------------------------------------------------- 1 | name: "instructnerf2nerf" 2 | tag: "${basename:${data.dataroot}}_${rmspace:${system.prompt_processor.prompt},_}" 3 | exp_root_dir: "outputs" 4 | seed: 0 5 | 6 | data_type: "multiview-camera-datamodule" 7 | data: 8 | train_downsample_resolution: 2 9 | eval_downsample_resolution: 2 10 | dataroot: ??? 11 | 12 | system_type: "instructnerf2nerf-system" 13 | system: 14 | start_editing_step: 600 15 | per_editing_step: 10 16 | 17 | geometry_type: "implicit-volume" 18 | geometry: 19 | radius: 1. 20 | normal_type: analytic 21 | 22 | pos_encoding_config: 23 | otype: HashGrid 24 | n_levels: 16 25 | n_features_per_level: 2 26 | log2_hashmap_size: 19 27 | base_resolution: 16 28 | per_level_scale: 1.4472692374403782 # max resolution 4096 29 | 30 | density_bias: "blob_magic3d" 31 | density_activation: softplus 32 | density_blob_scale: 10. 33 | density_blob_std: 0.5 34 | 35 | 36 | material_type: "diffuse-with-point-light-material" 37 | material: 38 | ambient_only_steps: 9999999 39 | albedo_activation: sigmoid 40 | 41 | background_type: "neural-environment-map-background" 42 | background: 43 | color_activation: sigmoid 44 | random_aug: false 45 | 46 | renderer_type: "patch-renderer" 47 | renderer: 48 | base_renderer_type: "nerf-volume-renderer" 49 | base_renderer: 50 | radius: ${system.geometry.radius} 51 | num_samples_per_ray: 384 52 | patch_size: 128 53 | 54 | guidance_type: "stable-diffusion-instructpix2pix-guidance" 55 | guidance: 56 | min_step_percent: 0.02 57 | max_step_percent: 0.98 58 | 59 | prompt_processor_type: "stable-diffusion-prompt-processor" 60 | prompt_processor: 61 | pretrained_model_name_or_path: "runwayml/stable-diffusion-v1-5" 62 | prompt: "Turn him into Elon Musk" 63 | 64 | loggers: 65 | wandb: 66 | enable: false 67 | project: 'threestudio' 68 | 69 | loss: 70 | lambda_sds: 0. 71 | lambda_orient: [0, 10.0, 1000., 5000.0] 72 | lambda_sparsity: 1.0 73 | lambda_opaque: 1.0 74 | lambda_l1: 10. 75 | lambda_p: 10. 76 | optimizer: 77 | name: Adam 78 | args: 79 | lr: 0.01 80 | betas: [0.9, 0.99] 81 | eps: 1.e-15 82 | params: 83 | geometry: 84 | lr: 0.01 85 | background: 86 | lr: 0.001 87 | 88 | trainer: 89 | max_steps: 20000 90 | log_every_n_steps: 1 91 | num_sanity_val_steps: 0 92 | val_check_interval: 600 93 | enable_progress_bar: true 94 | precision: 16-mixed 95 | 96 | checkpoint: 97 | save_last: true 98 | save_top_k: -1 99 | every_n_train_steps: ${trainer.max_steps} 100 | -------------------------------------------------------------------------------- /configs/latentnerf-refine.yaml: -------------------------------------------------------------------------------- 1 | name: "latentnerf-refine" 2 | tag: "${rmspace:${system.prompt_processor.prompt},_}" 3 | exp_root_dir: "outputs" 4 | seed: 0 5 | 6 | data_type: "random-camera-datamodule" 7 | data: 8 | elevation_range: [-10, 45] 9 | 10 | system_type: "latentnerf-system" 11 | system: 12 | refinement: true 13 | weights: ??? 14 | weights_ignore_modules: ["material", "background"] 15 | 16 | geometry_type: "implicit-volume" 17 | geometry: 18 | n_feature_dims: 4 19 | normal_type: null 20 | 21 | density_bias: "blob_dreamfusion" 22 | density_activation: trunc_exp 23 | density_blob_scale: 5. 24 | density_blob_std: 0.2 25 | 26 | material_type: "sd-latent-adapter-material" 27 | 28 | background_type: "neural-environment-map-background" 29 | 30 | renderer_type: "nerf-volume-renderer" 31 | renderer: 32 | num_samples_per_ray: 512 33 | 34 | prompt_processor_type: "stable-diffusion-prompt-processor" 35 | prompt_processor: 36 | pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base" 37 | prompt: ??? 38 | 39 | guidance_type: "stable-diffusion-guidance" 40 | guidance: 41 | pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base" 42 | guidance_scale: 100. 43 | weighting_strategy: sds 44 | 45 | loggers: 46 | wandb: 47 | enable: false 48 | project: "threestudio" 49 | name: None 50 | 51 | loss: 52 | lambda_sds: 1. 53 | lambda_sparsity: 5.e-4 54 | lambda_opaque: 0.0 55 | lambda_orient: 0.0 56 | optimizer: 57 | name: Adam 58 | args: 59 | lr: 0.01 60 | betas: [0.9, 0.99] 61 | eps: 1.e-15 62 | scheduler: 63 | name: SequentialLR 64 | interval: step 65 | warmup_steps: 100 66 | milestones: 67 | - ${system.scheduler.warmup_steps} 68 | schedulers: 69 | - name: LinearLR # linear warm-up in the first system.warmup_steps steps 70 | args: 71 | start_factor: 0.1 72 | end_factor: 1.0 73 | total_iters: ${system.scheduler.warmup_steps} 74 | - name: ExponentialLR 75 | args: 76 | gamma: ${calc_exp_lr_decay_rate:0.1,${sub:${trainer.max_steps},${system.scheduler.warmup_steps}}} 77 | 78 | trainer: 79 | max_steps: 10000 80 | log_every_n_steps: 1 81 | num_sanity_val_steps: 1 82 | val_check_interval: 200 83 | enable_progress_bar: true 84 | precision: 16-mixed 85 | 86 | checkpoint: 87 | save_last: true # save at each validation time 88 | save_top_k: -1 89 | every_n_train_steps: ${trainer.max_steps} 90 | -------------------------------------------------------------------------------- /configs/latentnerf.yaml: -------------------------------------------------------------------------------- 1 | name: "latentnerf" 2 | tag: "${rmspace:${system.prompt_processor.prompt},_}" 3 | exp_root_dir: "outputs" 4 | seed: 0 5 | 6 | data_type: "random-camera-datamodule" 7 | data: 8 | elevation_range: [-10, 45] 9 | 10 | system_type: "latentnerf-system" 11 | system: 12 | geometry_type: "implicit-volume" 13 | geometry: 14 | n_feature_dims: 4 15 | normal_type: null 16 | 17 | density_bias: "blob_dreamfusion" 18 | density_activation: trunc_exp 19 | density_blob_scale: 5. 20 | density_blob_std: 0.2 21 | 22 | material_type: "no-material" 23 | material: 24 | n_output_dims: 4 25 | color_activation: none 26 | 27 | background_type: "neural-environment-map-background" 28 | background: 29 | n_output_dims: 4 30 | color_activation: none 31 | 32 | renderer_type: "nerf-volume-renderer" 33 | renderer: 34 | num_samples_per_ray: 512 35 | 36 | prompt_processor_type: "stable-diffusion-prompt-processor" 37 | prompt_processor: 38 | pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base" 39 | prompt: ??? 40 | 41 | guidance_type: "stable-diffusion-guidance" 42 | guidance: 43 | pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base" 44 | guidance_scale: 100. 45 | weighting_strategy: sds 46 | 47 | loggers: 48 | wandb: 49 | enable: false 50 | project: "threestudio" 51 | name: None 52 | 53 | loss: 54 | lambda_sds: 1. 55 | lambda_sparsity: 5.e-4 56 | lambda_opaque: 0.0 57 | lambda_orient: 0.0 58 | optimizer: 59 | name: Adam 60 | args: 61 | lr: 0.01 62 | betas: [0.9, 0.99] 63 | eps: 1.e-15 64 | scheduler: 65 | name: SequentialLR 66 | interval: step 67 | warmup_steps: 100 68 | milestones: 69 | - ${system.scheduler.warmup_steps} 70 | schedulers: 71 | - name: LinearLR # linear warm-up in the first system.warmup_steps steps 72 | args: 73 | start_factor: 0.1 74 | end_factor: 1.0 75 | total_iters: ${system.scheduler.warmup_steps} 76 | - name: ExponentialLR 77 | args: 78 | gamma: ${calc_exp_lr_decay_rate:0.1,${sub:${trainer.max_steps},${system.scheduler.warmup_steps}}} 79 | 80 | trainer: 81 | max_steps: 10000 82 | log_every_n_steps: 1 83 | num_sanity_val_steps: 0 84 | val_check_interval: 200 85 | enable_progress_bar: true 86 | precision: 16-mixed 87 | 88 | checkpoint: 89 | save_last: true # save at each validation time 90 | save_top_k: -1 91 | every_n_train_steps: ${trainer.max_steps} 92 | -------------------------------------------------------------------------------- /configs/magic3d-coarse-if.yaml: -------------------------------------------------------------------------------- 1 | name: "magic3d-coarse-if" 2 | tag: "${rmspace:${system.prompt_processor.prompt},_}" 3 | exp_root_dir: "outputs" 4 | seed: 0 5 | 6 | data_type: "random-camera-datamodule" 7 | data: 8 | width: 64 9 | height: 64 10 | camera_distance_range: [1.5, 2.0] 11 | light_sample_strategy: "magic3d" 12 | eval_camera_distance: 2.0 13 | eval_fovy_deg: 70. 14 | 15 | system_type: "magic3d-system" 16 | system: 17 | geometry_type: "implicit-volume" 18 | geometry: 19 | radius: 2. 20 | normal_type: analytic 21 | pos_encoding_config: 22 | otype: HashGrid 23 | n_levels: 16 24 | n_features_per_level: 2 25 | log2_hashmap_size: 19 26 | base_resolution: 16 27 | per_level_scale: 1.4472692374403782 # max resolution 4096 28 | density_bias: "blob_magic3d" 29 | density_activation: softplus 30 | density_blob_scale: 10. 31 | density_blob_std: 0.5 32 | isosurface_resolution: 128 33 | isosurface_threshold: auto 34 | isosurface_coarse_to_fine: true 35 | 36 | material_type: "diffuse-with-point-light-material" 37 | material: 38 | ambient_only_steps: 2001 39 | soft_shading: true 40 | 41 | background_type: "neural-environment-map-background" 42 | 43 | renderer_type: "nerf-volume-renderer" 44 | renderer: 45 | radius: ${system.geometry.radius} 46 | num_samples_per_ray: 512 47 | 48 | prompt_processor_type: "deep-floyd-prompt-processor" 49 | prompt_processor: 50 | pretrained_model_name_or_path: "DeepFloyd/IF-I-XL-v1.0" 51 | prompt: ??? 52 | 53 | guidance_type: "deep-floyd-guidance" 54 | guidance: 55 | pretrained_model_name_or_path: "DeepFloyd/IF-I-XL-v1.0" 56 | weighting_strategy: uniform 57 | guidance_scale: 20. 58 | min_step_percent: 0.02 59 | max_step_percent: 0.98 60 | 61 | loggers: 62 | wandb: 63 | enable: false 64 | project: 'threestudio' 65 | name: None 66 | 67 | loss: 68 | lambda_sds: 1. 69 | lambda_orient: [0, 10., 1000., 5000] 70 | lambda_sparsity: 1. 71 | lambda_opaque: 0. 72 | optimizer: 73 | name: Adam 74 | args: 75 | lr: 0.01 76 | betas: [0.9, 0.99] 77 | eps: 1.e-15 78 | params: 79 | geometry: 80 | lr: 0.01 81 | background: 82 | lr: 0.001 83 | 84 | trainer: 85 | max_steps: 10000 86 | log_every_n_steps: 1 87 | num_sanity_val_steps: 0 88 | val_check_interval: 200 89 | enable_progress_bar: true 90 | precision: 16-mixed 91 | 92 | checkpoint: 93 | save_last: true 94 | save_top_k: -1 95 | every_n_train_steps: ${trainer.max_steps} 96 | -------------------------------------------------------------------------------- /configs/magic3d-coarse-sd.yaml: -------------------------------------------------------------------------------- 1 | name: "magic3d-coarse-sd" 2 | tag: "${rmspace:${system.prompt_processor.prompt},_}" 3 | exp_root_dir: "outputs" 4 | seed: 0 5 | 6 | data_type: "random-camera-datamodule" 7 | data: 8 | width: 64 9 | height: 64 10 | camera_distance_range: [1.5, 2.0] 11 | elevation_range: [-10, 45] 12 | light_sample_strategy: "magic3d" 13 | eval_camera_distance: 2.0 14 | eval_fovy_deg: 70. 15 | 16 | system_type: "magic3d-system" 17 | system: 18 | geometry_type: "implicit-volume" 19 | geometry: 20 | radius: 2. 21 | normal_type: analytic 22 | pos_encoding_config: 23 | otype: HashGrid 24 | n_levels: 16 25 | n_features_per_level: 2 26 | log2_hashmap_size: 19 27 | base_resolution: 16 28 | per_level_scale: 1.4472692374403782 # max resolution 4096 29 | density_bias: "blob_magic3d" 30 | density_activation: softplus 31 | density_blob_scale: 10. 32 | density_blob_std: 0.5 33 | isosurface_resolution: 128 34 | isosurface_threshold: auto 35 | isosurface_coarse_to_fine: true 36 | 37 | material_type: "diffuse-with-point-light-material" 38 | material: 39 | ambient_only_steps: 2001 40 | soft_shading: true 41 | 42 | background_type: "neural-environment-map-background" 43 | 44 | renderer_type: "nerf-volume-renderer" 45 | renderer: 46 | radius: ${system.geometry.radius} 47 | num_samples_per_ray: 512 48 | 49 | prompt_processor_type: "stable-diffusion-prompt-processor" 50 | prompt_processor: 51 | pretrained_model_name_or_path: "/nvme/lihe/workspace/weights/sd_weights/sd-2.1-base" # "stabilityai/stable-diffusion-2-1-base" 52 | prompt: ??? 53 | 54 | guidance_type: "stable-diffusion-guidance" 55 | guidance: 56 | pretrained_model_name_or_path: "/nvme/lihe/workspace/weights/sd_weights/sd-2.1-base" # "stabilityai/stable-diffusion-2-1-base" 57 | weighting_strategy: uniform 58 | guidance_scale: 100. 59 | min_step_percent: 0.02 60 | max_step_percent: 0.98 61 | 62 | loggers: 63 | wandb: 64 | enable: false 65 | project: "threestudio" 66 | name: None 67 | 68 | loss: 69 | lambda_sds: 1. 70 | lambda_orient: [0, 10., 1000., 5000] 71 | lambda_sparsity: 1. 72 | lambda_opaque: 0. 73 | optimizer: 74 | name: Adam 75 | args: 76 | lr: 0.01 77 | betas: [0.9, 0.99] 78 | eps: 1.e-15 79 | params: 80 | geometry: 81 | lr: 0.01 82 | background: 83 | lr: 0.001 84 | 85 | trainer: 86 | max_steps: 10000 87 | log_every_n_steps: 1 88 | num_sanity_val_steps: 0 89 | val_check_interval: 200 90 | enable_progress_bar: true 91 | precision: 16-mixed 92 | 93 | checkpoint: 94 | save_last: true 95 | save_top_k: -1 96 | every_n_train_steps: ${trainer.max_steps} 97 | -------------------------------------------------------------------------------- /configs/magic3d-refine-sd.yaml: -------------------------------------------------------------------------------- 1 | name: "magic3d-refine-sd" 2 | tag: "${rmspace:${system.prompt_processor.prompt},_}" 3 | exp_root_dir: "outputs" 4 | seed: 0 5 | 6 | data_type: "random-camera-datamodule" 7 | data: 8 | width: 512 9 | height: 512 10 | camera_distance_range: [1.5, 2.0] 11 | elevation_range: [-10, 45] 12 | light_sample_strategy: "magic3d" 13 | fovy_range: [30, 45] 14 | eval_camera_distance: 2.0 15 | eval_fovy_deg: 70. 16 | 17 | system_type: "magic3d-system" 18 | system: 19 | refinement: true 20 | geometry_convert_from: ??? 21 | geometry_convert_inherit_texture: true 22 | geometry_type: "tetrahedra-sdf-grid" 23 | geometry: 24 | radius: 2.0 # consistent with coarse 25 | isosurface_resolution: 128 26 | isosurface_deformable_grid: true 27 | pos_encoding_config: # consistent with coarse, no progressive band 28 | otype: HashGrid 29 | n_levels: 16 30 | n_features_per_level: 2 31 | log2_hashmap_size: 19 32 | base_resolution: 16 33 | per_level_scale: 1.4472692374403782 # max resolution 4096 34 | fix_geometry: false # optimize grid sdf and deformation 35 | 36 | material_type: "diffuse-with-point-light-material" 37 | material: 38 | ambient_only_steps: 0 39 | soft_shading: true 40 | 41 | background_type: "neural-environment-map-background" 42 | 43 | renderer_type: "nvdiff-rasterizer" 44 | renderer: 45 | context_type: cuda # gl 46 | 47 | prompt_processor_type: "stable-diffusion-prompt-processor" 48 | prompt_processor: 49 | pretrained_model_name_or_path: "/nvme/lihe/workspace/weights/sd_weights/sd-2.1-base" # "stabilityai/stable-diffusion-2-1-base" 50 | prompt: ??? 51 | 52 | guidance_type: "stable-diffusion-guidance" 53 | guidance: 54 | pretrained_model_name_or_path: "/nvme/lihe/workspace/weights/sd_weights/sd-2.1-base" # "stabilityai/stable-diffusion-2-1-base" 55 | weighting_strategy: sds 56 | guidance_scale: 100. 57 | min_step_percent: 0.02 58 | max_step_percent: 0.5 59 | 60 | loggers: 61 | wandb: 62 | enable: false 63 | project: "threestudio" 64 | name: None 65 | 66 | loss: 67 | lambda_sds: 1. 68 | lambda_normal_consistency: 10000. 69 | 70 | optimizer: 71 | name: Adam 72 | args: 73 | lr: 0.01 74 | betas: [0.9, 0.99] 75 | eps: 1.e-15 76 | 77 | trainer: 78 | max_steps: 5000 79 | log_every_n_steps: 1 80 | num_sanity_val_steps: 1 81 | val_check_interval: 100 82 | enable_progress_bar: true 83 | precision: 16-mixed 84 | 85 | checkpoint: 86 | save_last: true 87 | save_top_k: -1 88 | every_n_train_steps: ${trainer.max_steps} 89 | -------------------------------------------------------------------------------- /configs/mvdream-sd21-shading.yaml: -------------------------------------------------------------------------------- 1 | name: "mvdream-sd21-rescale0.5-shading" 2 | tag: "${rmspace:${system.prompt_processor.prompt},_}" 3 | exp_root_dir: "outputs" 4 | seed: 0 5 | 6 | data_type: "random-multiview-camera-datamodule" 7 | data: 8 | batch_size: [8,4] # must be dividable by n_view 9 | n_view: 4 10 | # 0-4999: 64x64, >=5000: 256x256 11 | width: [64, 256] 12 | height: [64, 256] 13 | resolution_milestones: [5000] 14 | camera_distance_range: [0.8, 1.0] # relative 15 | fovy_range: [15, 60] 16 | elevation_range: [0, 30] 17 | camera_perturb: 0. 18 | center_perturb: 0. 19 | up_perturb: 0. 20 | n_val_views: 4 21 | eval_camera_distance: 3.0 22 | eval_fovy_deg: 40. 23 | 24 | system_type: "mvdream-system" 25 | system: 26 | geometry_type: "implicit-volume" 27 | geometry: 28 | radius: 1.0 29 | normal_type: "analytic" 30 | 31 | density_bias: "blob_magic3d" 32 | density_activation: softplus 33 | density_blob_scale: 10. 34 | density_blob_std: 0.5 35 | 36 | pos_encoding_config: 37 | otype: HashGrid 38 | n_levels: 16 39 | n_features_per_level: 2 40 | log2_hashmap_size: 19 41 | base_resolution: 16 42 | per_level_scale: 1.447269237440378 # max resolution 4096 43 | 44 | material_type: "diffuse-with-point-light-material" 45 | material: 46 | ambient_only_steps: 5000 47 | textureless_prob: 0.5 48 | ambient_light_color: [1.0, 1.0, 1.0] 49 | diffuse_light_color: [0.0, 0.0, 0.0] 50 | soft_shading: true 51 | albedo_activation: sigmoid 52 | 53 | background_type: "neural-environment-map-background" 54 | background: 55 | color_activation: sigmoid 56 | random_aug: true 57 | share_aug_bg: true 58 | 59 | renderer_type: "nerf-volume-renderer" 60 | renderer: 61 | radius: ${system.geometry.radius} 62 | num_samples_per_ray: 512 63 | 64 | prompt_processor_type: "stable-diffusion-prompt-processor" 65 | prompt_processor: 66 | pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base" 67 | prompt: ??? 68 | negative_prompt: "ugly, bad anatomy, blurry, pixelated obscure, unnatural colors, poor lighting, dull, and unclear, cropped, lowres, low quality, artifacts, duplicate, morbid, mutilated, poorly drawn face, deformed, dehydrated, bad proportions" 69 | front_threshold: 30. 70 | back_threshold: 30. 71 | 72 | guidance_type: "multiview-diffusion-guidance" 73 | guidance: 74 | model_name: "sd-v2.1-base-4view" 75 | ckpt_path: null # path to a pre-downloaded checkpoint file (null for loading from URL) 76 | guidance_scale: 50.0 77 | min_step_percent: [0, 0.98, 0.02, 8000] # (start_iter, start_val, end_val, end_iter) 78 | max_step_percent: [0, 0.98, 0.50, 8000] 79 | recon_loss: true 80 | recon_std_rescale: 0.5 81 | 82 | loggers: 83 | wandb: 84 | enable: false 85 | project: "threestudio" 86 | 87 | loss: 88 | lambda_sds: 1. 89 | lambda_orient: [0, 10., 1000., 5000] 90 | lambda_sparsity: 0. 91 | lambda_opaque: 0. 92 | lambda_z_variance: 0. 93 | optimizer: 94 | name: AdamW 95 | args: 96 | betas: [0.9, 0.99] 97 | eps: 1.e-15 98 | params: 99 | geometry.encoding: 100 | lr: 0.01 101 | geometry.density_network: 102 | lr: 0.001 103 | geometry.feature_network: 104 | lr: 0.001 105 | background: 106 | lr: 0.001 107 | 108 | trainer: 109 | max_steps: 10000 110 | log_every_n_steps: 1 111 | num_sanity_val_steps: 0 112 | val_check_interval: 200 113 | enable_progress_bar: true 114 | precision: 16-mixed 115 | 116 | checkpoint: 117 | save_last: true 118 | save_top_k: -1 119 | every_n_train_steps: ${trainer.max_steps} 120 | -------------------------------------------------------------------------------- /configs/mvdream-sd21.yaml: -------------------------------------------------------------------------------- 1 | name: "mvdream-sd21-rescale0.5" 2 | tag: "${rmspace:${system.prompt_processor.prompt},_}" 3 | exp_root_dir: "outputs2" 4 | seed: 0 5 | 6 | data_type: "random-multiview-camera-datamodule" 7 | data: 8 | batch_size: [8,4] # must be dividable by n_view 9 | n_view: 4 10 | # 0-4999: 64x64, >=5000: 256x256 11 | width: [64, 256] 12 | height: [64, 256] 13 | resolution_milestones: [5000] 14 | camera_distance_range: [0.8, 1.0] # relative 15 | fovy_range: [15, 60] 16 | elevation_range: [0, 30] 17 | camera_perturb: 0. 18 | center_perturb: 0. 19 | up_perturb: 0. 20 | n_val_views: 4 21 | eval_camera_distance: 3.0 22 | eval_fovy_deg: 40. 23 | 24 | system_type: "mvdream-system" 25 | system: 26 | geometry_type: "implicit-volume" 27 | geometry: 28 | radius: 1.0 29 | normal_type: null 30 | 31 | density_bias: "blob_magic3d" 32 | density_activation: softplus 33 | density_blob_scale: 10. 34 | density_blob_std: 0.5 35 | 36 | pos_encoding_config: 37 | otype: HashGrid 38 | n_levels: 16 39 | n_features_per_level: 2 40 | log2_hashmap_size: 19 41 | base_resolution: 16 42 | per_level_scale: 1.447269237440378 # max resolution 4096 43 | 44 | material_type: "no-material" 45 | material: 46 | n_output_dims: 3 47 | color_activation: sigmoid 48 | 49 | background_type: "neural-environment-map-background" 50 | background: 51 | color_activation: sigmoid 52 | random_aug: true 53 | share_aug_bg: true 54 | 55 | renderer_type: "nerf-volume-renderer" 56 | renderer: 57 | radius: ${system.geometry.radius} 58 | num_samples_per_ray: 512 59 | 60 | prompt_processor_type: "stable-diffusion-prompt-processor" 61 | prompt_processor: 62 | pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base" 63 | prompt: ??? 64 | negative_prompt: "ugly, bad anatomy, blurry, pixelated obscure, unnatural colors, poor lighting, dull, and unclear, cropped, lowres, low quality, artifacts, duplicate, morbid, mutilated, poorly drawn face, deformed, dehydrated, bad proportions" 65 | front_threshold: 30. 66 | back_threshold: 30. 67 | 68 | guidance_type: "multiview-diffusion-guidance" 69 | guidance: 70 | model_name: "sd-v2.1-base-4view" 71 | ckpt_path: null # path to a pre-downloaded checkpoint file (null for loading from URL) 72 | guidance_scale: 50.0 73 | min_step_percent: [0, 0.98, 0.02, 8000] # (start_iter, start_val, end_val, end_iter) 74 | max_step_percent: [0, 0.98, 0.50, 8000] 75 | recon_loss: true 76 | recon_std_rescale: 0.5 77 | 78 | loggers: 79 | wandb: 80 | enable: false 81 | project: "threestudio" 82 | 83 | loss: 84 | lambda_sds: 1. 85 | lambda_orient: 0. 86 | lambda_sparsity: 0. 87 | lambda_opaque: 0. 88 | lambda_z_variance: 0. 89 | optimizer: 90 | name: AdamW 91 | args: 92 | betas: [0.9, 0.99] 93 | eps: 1.e-15 94 | params: 95 | geometry.encoding: 96 | lr: 0.01 97 | geometry.density_network: 98 | lr: 0.001 99 | geometry.feature_network: 100 | lr: 0.001 101 | background: 102 | lr: 0.001 103 | 104 | trainer: 105 | max_steps: 10000 106 | log_every_n_steps: 1 107 | num_sanity_val_steps: 0 108 | val_check_interval: 200 109 | enable_progress_bar: true 110 | precision: 16-mixed 111 | 112 | checkpoint: 113 | save_last: true 114 | save_top_k: -1 115 | every_n_train_steps: ${trainer.max_steps} 116 | -------------------------------------------------------------------------------- /configs/prolificdreamer-geometry-from.yaml: -------------------------------------------------------------------------------- 1 | name: "prolificdreamer-geometry" 2 | tag: "${rmspace:${system.prompt_processor.prompt},_}" 3 | exp_root_dir: "outputs" 4 | seed: 0 5 | 6 | data_type: "random-camera-datamodule" 7 | data: 8 | batch_size: 1 9 | width: 512 10 | height: 512 11 | camera_distance_range: [1.0, 1.5] 12 | fovy_range: [40, 70] 13 | elevation_range: [-10, 45] 14 | camera_perturb: 0. 15 | center_perturb: 0. 16 | up_perturb: 0. 17 | eval_camera_distance: 1.5 18 | eval_fovy_deg: 70. 19 | 20 | system_type: "prolificdreamer-system" 21 | system: 22 | stage: geometry 23 | geometry_convert_from: ??? 24 | geometry_type: "tetrahedra-sdf-grid" 25 | geometry: 26 | radius: 1.0 # consistent with coarse 27 | isosurface_resolution: 512 # 400 # 128 28 | isosurface_deformable_grid: true 29 | geometry_only: true 30 | 31 | material_type: "no-material" # unused 32 | material: 33 | n_output_dims: 0 34 | 35 | background_type: "solid-color-background" # unused 36 | 37 | renderer_type: "nvdiff-rasterizer" 38 | renderer: 39 | context_type: cuda # gl 40 | 41 | prompt_processor_type: "stable-diffusion-prompt-processor" 42 | prompt_processor: 43 | pretrained_model_name_or_path: "/cpfs01/shared/Gveval3/gongkaixiong/dlh/sd_weights/sd-2.1-base" # "stabilityai/stable-diffusion-2-1-base" 44 | prompt: lib:michelangelo_dog 45 | 46 | guidance_type: "stable-diffusion-guidance" 47 | guidance: 48 | pretrained_model_name_or_path: "/cpfs01/shared/Gveval3/gongkaixiong/dlh/sd_weights/sd-2.1-base" # "stabilityai/stable-diffusion-2-1-base" 49 | guidance_scale: 100. 50 | min_step_percent: 0.02 51 | max_step_percent: [5000, 0.98, 0.5, 5001] # annealed to 0.5 after 5000 steps 52 | weighting_strategy: sds 53 | 54 | loggers: 55 | wandb: 56 | enable: false 57 | project: "threestudio" 58 | name: None 59 | 60 | loss: 61 | lambda_sds: 1. 62 | lambda_normal_consistency: 10000. 63 | lambda_laplacian_smoothness: 10000. 64 | 65 | optimizer: 66 | name: Adam 67 | args: 68 | lr: 0.005 69 | betas: [0.9, 0.99] 70 | eps: 1.e-15 71 | 72 | trainer: 73 | max_steps: 15000 74 | log_every_n_steps: 1 75 | num_sanity_val_steps: 1 76 | val_check_interval: 200 77 | enable_progress_bar: true 78 | precision: 32 79 | 80 | checkpoint: 81 | save_last: true 82 | save_top_k: -1 83 | every_n_train_steps: ${trainer.max_steps} 84 | -------------------------------------------------------------------------------- /configs/prolificdreamer-geometry.yaml: -------------------------------------------------------------------------------- 1 | name: "prolificdreamer-geometry" 2 | tag: "${rmspace:${system.prompt_processor.prompt},_}" 3 | exp_root_dir: "outputs" 4 | seed: 0 5 | 6 | data_type: "random-camera-datamodule" 7 | data: 8 | batch_size: 1 9 | width: 512 10 | height: 512 11 | camera_distance_range: [1.0, 1.5] 12 | fovy_range: [40, 70] 13 | elevation_range: [-10, 45] 14 | camera_perturb: 0. 15 | center_perturb: 0. 16 | up_perturb: 0. 17 | eval_camera_distance: 1.5 18 | eval_fovy_deg: 70. 19 | 20 | system_type: "prolificdreamer-system" 21 | system: 22 | stage: geometry 23 | geometry_convert_from: ??? 24 | geometry_type: "tetrahedra-sdf-grid" 25 | geometry: 26 | radius: 1.0 # consistent with coarse 27 | isosurface_resolution: 128 28 | isosurface_deformable_grid: true 29 | geometry_only: true 30 | 31 | material_type: "no-material" # unused 32 | material: 33 | n_output_dims: 0 34 | 35 | background_type: "solid-color-background" # unused 36 | 37 | renderer_type: "nvdiff-rasterizer" 38 | renderer: 39 | context_type: cuda # gl 40 | 41 | prompt_processor_type: "stable-diffusion-prompt-processor" 42 | prompt_processor: 43 | pretrained_model_name_or_path: "/cpfs01/shared/Gveval3/gongkaixiong/dlh/sd_weights/sd-2.1-base" # "stabilityai/stable-diffusion-2-1-base" 44 | prompt: lib:michelangelo_dog 45 | 46 | guidance_type: "stable-diffusion-guidance" 47 | guidance: 48 | pretrained_model_name_or_path: "/cpfs01/shared/Gveval3/gongkaixiong/dlh/sd_weights/sd-2.1-base" # "stabilityai/stable-diffusion-2-1-base" 49 | guidance_scale: 100. 50 | min_step_percent: 0.02 51 | max_step_percent: [5000, 0.98, 0.5, 5001] # annealed to 0.5 after 5000 steps 52 | weighting_strategy: sds 53 | 54 | loggers: 55 | wandb: 56 | enable: false 57 | project: "threestudio" 58 | name: None 59 | 60 | loss: 61 | lambda_sds: 1. 62 | lambda_normal_consistency: 10000. 63 | lambda_laplacian_smoothness: 10000. 64 | 65 | optimizer: 66 | name: Adam 67 | args: 68 | lr: 0.005 69 | betas: [0.9, 0.99] 70 | eps: 1.e-15 71 | 72 | trainer: 73 | max_steps: 15000 74 | log_every_n_steps: 1 75 | num_sanity_val_steps: 1 76 | val_check_interval: 200 77 | enable_progress_bar: true 78 | precision: 32 79 | 80 | checkpoint: 81 | save_last: true 82 | save_top_k: -1 83 | every_n_train_steps: ${trainer.max_steps} 84 | -------------------------------------------------------------------------------- /configs/prolificdreamer-patch.yaml: -------------------------------------------------------------------------------- 1 | name: "prolificdreamer-patch" 2 | tag: "${rmspace:${system.prompt_processor.prompt},_}" 3 | exp_root_dir: "outputs" 4 | seed: 0 5 | 6 | data_type: "random-camera-datamodule" 7 | data: 8 | batch_size: 1 9 | width: 512 10 | height: 512 11 | camera_distance_range: [1.0, 1.5] 12 | fovy_range: [40, 70] 13 | elevation_range: [-10, 45] 14 | camera_perturb: 0. 15 | center_perturb: 0. 16 | up_perturb: 0. 17 | eval_camera_distance: 1.5 18 | eval_fovy_deg: 70. 19 | 20 | system_type: "prolificdreamer-system" 21 | system: 22 | stage: coarse 23 | geometry_type: "implicit-volume" 24 | geometry: 25 | radius: 1.0 26 | normal_type: null 27 | 28 | density_bias: "blob_magic3d" 29 | density_activation: softplus 30 | density_blob_scale: 10. 31 | density_blob_std: 0.5 32 | 33 | pos_encoding_config: 34 | otype: HashGrid 35 | n_levels: 16 36 | n_features_per_level: 2 37 | log2_hashmap_size: 19 38 | base_resolution: 16 39 | per_level_scale: 1.447269237440378 # max resolution 4096 40 | 41 | material_type: "no-material" 42 | material: 43 | n_output_dims: 3 44 | color_activation: sigmoid 45 | 46 | background_type: "neural-environment-map-background" 47 | background: 48 | color_activation: sigmoid 49 | random_aug: true 50 | 51 | renderer_type: "patch-renderer" 52 | renderer: 53 | base_renderer_type: "nerf-volume-renderer" 54 | base_renderer: 55 | radius: ${system.geometry.radius} 56 | num_samples_per_ray: 512 57 | patch_size: 128 58 | 59 | prompt_processor_type: "stable-diffusion-prompt-processor" 60 | prompt_processor: 61 | pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base" 62 | prompt: ??? 63 | front_threshold: 30. 64 | back_threshold: 30. 65 | 66 | guidance_type: "stable-diffusion-vsd-guidance" 67 | guidance: 68 | pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base" 69 | pretrained_model_name_or_path_lora: "stabilityai/stable-diffusion-2-1" 70 | guidance_scale: 7.5 71 | min_step_percent: 0.02 72 | max_step_percent: [5000, 0.98, 0.5, 5001] # annealed to 0.5 after 5000 steps 73 | 74 | loggers: 75 | wandb: 76 | enable: false 77 | project: "threestudio" 78 | 79 | loss: 80 | lambda_vsd: 1. 81 | lambda_lora: 1. 82 | lambda_orient: 0. 83 | lambda_sparsity: 10. 84 | lambda_opaque: [10000, 0.0, 1000.0, 10001] 85 | lambda_z_variance: 0. 86 | optimizer: 87 | name: AdamW 88 | args: 89 | betas: [0.9, 0.99] 90 | eps: 1.e-15 91 | params: 92 | geometry.encoding: 93 | lr: 0.01 94 | geometry.density_network: 95 | lr: 0.001 96 | geometry.feature_network: 97 | lr: 0.001 98 | background: 99 | lr: 0.001 100 | guidance: 101 | lr: 0.0001 102 | 103 | trainer: 104 | max_steps: 25000 105 | log_every_n_steps: 1 106 | num_sanity_val_steps: 0 107 | val_check_interval: 200 108 | enable_progress_bar: true 109 | precision: 32 110 | 111 | checkpoint: 112 | save_last: true 113 | save_top_k: -1 114 | every_n_train_steps: ${trainer.max_steps} 115 | -------------------------------------------------------------------------------- /configs/prolificdreamer-scene.yaml: -------------------------------------------------------------------------------- 1 | name: "prolificdreamer" 2 | tag: "${rmspace:${system.prompt_processor.prompt},_}" 3 | exp_root_dir: "outputs" 4 | seed: 0 5 | 6 | data_type: "random-camera-datamodule" 7 | data: 8 | batch_size: [1, 1] 9 | # 0-4999: 64x64, >=5000: 512x512 10 | # this drastically reduces VRAM usage as empty space is pruned in early training 11 | width: [64, 512] 12 | height: [64, 512] 13 | resolution_milestones: [5000] 14 | camera_distance_range: [0.1, 2.3] 15 | fovy_range: [40, 70] 16 | elevation_range: [-10, 45] 17 | camera_perturb: 0. 18 | center_perturb: 0. 19 | up_perturb: 0. 20 | eval_camera_distance: 2.0 21 | eval_fovy_deg: 70. 22 | 23 | system_type: "prolificdreamer-system" 24 | system: 25 | stage: coarse 26 | geometry_type: "implicit-volume" 27 | geometry: 28 | radius: 5.0 29 | normal_type: null 30 | 31 | density_bias: "blob_magic3d" 32 | density_activation: softplus 33 | density_blob_scale: -10. 34 | density_blob_std: 2.5 35 | 36 | pos_encoding_config: 37 | otype: HashGrid 38 | n_levels: 16 39 | n_features_per_level: 2 40 | log2_hashmap_size: 19 41 | base_resolution: 16 42 | per_level_scale: 1.447269237440378 # max resolution 4096 43 | 44 | material_type: "no-material" 45 | material: 46 | n_output_dims: 3 47 | color_activation: sigmoid 48 | 49 | background_type: "neural-environment-map-background" 50 | background: 51 | color_activation: sigmoid 52 | 53 | renderer_type: "nerf-volume-renderer" 54 | renderer: 55 | radius: ${system.geometry.radius} 56 | num_samples_per_ray: 512 57 | 58 | prompt_processor_type: "stable-diffusion-prompt-processor" 59 | prompt_processor: 60 | pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base" 61 | prompt: ??? 62 | 63 | guidance_type: "stable-diffusion-vsd-guidance" 64 | guidance: 65 | pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base" 66 | pretrained_model_name_or_path_lora: "stabilityai/stable-diffusion-2-1" 67 | guidance_scale: 7.5 68 | min_step_percent: 0.02 69 | max_step_percent: [10000, 0.98, 0.5, 10001] # annealed to 0.5 after 10000 steps 70 | view_dependent_prompting: false 71 | 72 | loggers: 73 | wandb: 74 | enable: false 75 | project: "threestudio" 76 | name: None 77 | 78 | loss: 79 | lambda_vsd: 1. 80 | lambda_lora: 1. 81 | lambda_orient: 0. 82 | lambda_sparsity: 0. 83 | lambda_opaque: 0. 84 | lambda_z_variance: 1. 85 | optimizer: 86 | name: AdamW 87 | args: 88 | betas: [0.9, 0.99] 89 | eps: 1.e-15 90 | params: 91 | geometry.encoding: 92 | lr: 0.01 93 | geometry.density_network: 94 | lr: 0.001 95 | geometry.feature_network: 96 | lr: 0.001 97 | background: 98 | lr: 0.001 99 | guidance: 100 | lr: 0.0001 101 | 102 | trainer: 103 | max_steps: 25000 104 | log_every_n_steps: 1 105 | num_sanity_val_steps: 0 106 | val_check_interval: 200 107 | enable_progress_bar: true 108 | precision: 32 109 | 110 | checkpoint: 111 | save_last: true 112 | save_top_k: -1 113 | every_n_train_steps: ${trainer.max_steps} 114 | -------------------------------------------------------------------------------- /configs/prolificdreamer-texture.yaml: -------------------------------------------------------------------------------- 1 | name: "prolificdreamer-texture" 2 | tag: "${rmspace:${system.prompt_processor.prompt},_}" 3 | exp_root_dir: "outputs" 4 | seed: 0 5 | 6 | data_type: "random-camera-datamodule" 7 | data: 8 | batch_size: 1 9 | width: 512 10 | height: 512 11 | camera_distance_range: [1.0, 1.5] 12 | fovy_range: [40, 70] 13 | elevation_range: [-10, 45] 14 | camera_perturb: 0. 15 | center_perturb: 0. 16 | up_perturb: 0. 17 | eval_camera_distance: 1.5 18 | eval_fovy_deg: 70. 19 | 20 | system_type: "prolificdreamer-system" 21 | system: 22 | stage: texture 23 | geometry_convert_from: ??? 24 | geometry_type: "tetrahedra-sdf-grid" 25 | geometry: 26 | radius: 1.0 # consistent with last stage 27 | isosurface_resolution: 400 # 128 # consistent with last stage 28 | isosurface_deformable_grid: true 29 | isosurface_remove_outliers: true 30 | pos_encoding_config: 31 | otype: HashGrid 32 | n_levels: 16 33 | n_features_per_level: 2 34 | log2_hashmap_size: 19 35 | base_resolution: 16 36 | per_level_scale: 1.447269237440378 # max resolution 4096 37 | fix_geometry: true 38 | 39 | material_type: "no-material" 40 | material: 41 | n_output_dims: 3 42 | color_activation: sigmoid 43 | 44 | background_type: "neural-environment-map-background" 45 | background: 46 | color_activation: sigmoid 47 | 48 | renderer_type: "nvdiff-rasterizer" 49 | renderer: 50 | context_type: cuda # gl 51 | 52 | prompt_processor_type: "stable-diffusion-prompt-processor" 53 | prompt_processor: 54 | pretrained_model_name_or_path: "/cpfs01/shared/Gveval3/gongkaixiong/dlh/sd_weights/sd-2.1-base" # "stabilityai/stable-diffusion-2-1-base" 55 | prompt: ??? 56 | front_threshold: 30. 57 | back_threshold: 30. 58 | 59 | guidance_type: "stable-diffusion-vsd-guidance" 60 | guidance: 61 | pretrained_model_name_or_path: "/cpfs01/shared/Gveval3/gongkaixiong/dlh/sd_weights/sd-2.1-base" # "stabilityai/stable-diffusion-2-1-base" 62 | pretrained_model_name_or_path_lora: "/cpfs01/shared/Gveval3/gongkaixiong/dlh/sd_weights/sd-lora" # "stabilityai/stable-diffusion-2-1" 63 | guidance_scale: 7.5 64 | min_step_percent: 0.02 65 | max_step_percent: [5000, 0.98, 0.5, 5001] # annealed to 0.5 after 5000 steps 66 | 67 | loggers: 68 | wandb: 69 | enable: false 70 | project: "threestudio" 71 | name: None 72 | 73 | loss: 74 | lambda_vsd: 1. 75 | lambda_lora: 1. 76 | optimizer: 77 | name: AdamW 78 | args: 79 | betas: [0.9, 0.99] 80 | eps: 1.e-15 81 | params: 82 | geometry.encoding: 83 | lr: 0.01 84 | geometry.feature_network: 85 | lr: 0.001 86 | background: 87 | lr: 0.001 88 | guidance: 89 | lr: 0.0001 90 | 91 | trainer: 92 | max_steps: 30000 93 | log_every_n_steps: 1 94 | num_sanity_val_steps: 1 95 | val_check_interval: 200 96 | enable_progress_bar: true 97 | precision: 32 98 | 99 | checkpoint: 100 | save_last: true 101 | save_top_k: -1 102 | every_n_train_steps: ${trainer.max_steps} 103 | -------------------------------------------------------------------------------- /configs/prolificdreamer.yaml: -------------------------------------------------------------------------------- 1 | name: "prolificdreamer" 2 | tag: "${rmspace:${system.prompt_processor.prompt},_}" 3 | exp_root_dir: "outputs" 4 | seed: 0 5 | 6 | data_type: "random-camera-datamodule" 7 | data: 8 | batch_size: [1, 1] 9 | # 0-4999: 64x64, >=5000: 512x512 10 | # this drastically reduces VRAM usage as empty space is pruned in early training 11 | width: [64, 512] 12 | height: [64, 512] 13 | resolution_milestones: [5000] 14 | camera_distance_range: [1.0, 1.5] 15 | fovy_range: [40, 70] 16 | elevation_range: [-10, 45] 17 | camera_perturb: 0. 18 | center_perturb: 0. 19 | up_perturb: 0. 20 | eval_camera_distance: 1.5 21 | eval_fovy_deg: 70. 22 | 23 | system_type: "prolificdreamer-system" 24 | system: 25 | stage: coarse 26 | geometry_type: "implicit-volume" 27 | geometry: 28 | radius: 1.0 29 | normal_type: null 30 | 31 | density_bias: "blob_magic3d" 32 | density_activation: softplus 33 | density_blob_scale: 10. 34 | density_blob_std: 0.5 35 | 36 | pos_encoding_config: 37 | otype: HashGrid 38 | n_levels: 16 39 | n_features_per_level: 2 40 | log2_hashmap_size: 19 41 | base_resolution: 16 42 | per_level_scale: 1.447269237440378 # max resolution 4096 43 | 44 | material_type: "no-material" 45 | material: 46 | n_output_dims: 3 47 | color_activation: sigmoid 48 | 49 | background_type: "neural-environment-map-background" 50 | background: 51 | color_activation: sigmoid 52 | random_aug: true 53 | 54 | renderer_type: "nerf-volume-renderer" 55 | renderer: 56 | radius: ${system.geometry.radius} 57 | num_samples_per_ray: 512 58 | 59 | prompt_processor_type: "stable-diffusion-prompt-processor" 60 | prompt_processor: 61 | pretrained_model_name_or_path: "/cpfs01/shared/Gveval3/gongkaixiong/dlh/sd_weights/sd-2.1-base" # "stabilityai/stable-diffusion-2-1-base" 62 | prompt: ??? 63 | front_threshold: 30. 64 | back_threshold: 30. 65 | 66 | guidance_type: "stable-diffusion-vsd-guidance" 67 | guidance: 68 | pretrained_model_name_or_path: "/cpfs01/shared/Gveval3/gongkaixiong/dlh/sd_weights/sd-2.1-base" # "stabilityai/stable-diffusion-2-1-base" 69 | pretrained_model_name_or_path_lora: "/cpfs01/shared/Gveval3/gongkaixiong/dlh/sd_weights/sd-lora" # "stabilityai/stable-diffusion-2-1" 70 | guidance_scale: 7.5 71 | min_step_percent: 0.02 72 | max_step_percent: [5000, 0.98, 0.5, 5001] # annealed to 0.5 after 5000 steps 73 | 74 | loggers: 75 | wandb: 76 | enable: false 77 | project: "threestudio" 78 | name: None 79 | 80 | loss: 81 | lambda_vsd: 1. 82 | lambda_lora: 1. 83 | lambda_orient: 0. 84 | lambda_sparsity: 10. 85 | lambda_opaque: [10000, 0.0, 1000.0, 10001] 86 | lambda_z_variance: 0. 87 | optimizer: 88 | name: AdamW 89 | args: 90 | betas: [0.9, 0.99] 91 | eps: 1.e-15 92 | params: 93 | geometry.encoding: 94 | lr: 0.01 95 | geometry.density_network: 96 | lr: 0.001 97 | geometry.feature_network: 98 | lr: 0.001 99 | background: 100 | lr: 0.001 101 | guidance: 102 | lr: 0.0001 103 | 104 | trainer: 105 | max_steps: 25000 106 | log_every_n_steps: 1 107 | num_sanity_val_steps: 0 108 | val_check_interval: 200 109 | enable_progress_bar: true 110 | precision: 32 111 | 112 | checkpoint: 113 | save_last: true 114 | save_top_k: -1 115 | every_n_train_steps: ${trainer.max_steps} 116 | -------------------------------------------------------------------------------- /configs/sjc.yaml: -------------------------------------------------------------------------------- 1 | name: sjc 2 | tag: "${rmspace:${system.prompt_processor.prompt},_}" 3 | exp_root_dir: outputs 4 | seed: 0 5 | 6 | data_type: random-camera-datamodule 7 | data: 8 | camera_distance_range: [1.50, 1.50] 9 | elevation_range: [-10, 45] 10 | camera_perturb: 0.0 11 | center_perturb: 0.0 12 | up_perturb: 0.0 13 | light_position_perturb: 0.0 14 | eval_elevation_deg: 20.0 15 | 16 | system_type: sjc-system 17 | system: 18 | geometry_type: volume-grid 19 | geometry: 20 | normal_type: null 21 | grid_size: [100, 100, 100] 22 | density_bias: -1.0 23 | n_feature_dims: 4 24 | 25 | material_type: no-material 26 | material: 27 | n_output_dims: 4 28 | color_activation: none 29 | 30 | background_type: textured-background 31 | background: 32 | n_output_dims: 4 33 | color_activation: none 34 | height: 4 35 | width: 4 36 | 37 | renderer_type: nerf-volume-renderer 38 | renderer: 39 | num_samples_per_ray: 512 40 | grid_prune: false 41 | 42 | prompt_processor_type: stable-diffusion-prompt-processor 43 | prompt_processor: 44 | pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base" 45 | prompt: ??? 46 | view_dependent_prompt_front: true 47 | 48 | guidance_type: stable-diffusion-guidance 49 | guidance: 50 | pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base" 51 | guidance_scale: 100. 52 | use_sjc: true 53 | var_red: true 54 | min_step_percent: 0.01 55 | max_step_percent: 0.97 56 | 57 | loggers: 58 | wandb: 59 | enable: false 60 | project: "threestudio" 61 | name: None 62 | 63 | loss: 64 | lambda_sds: 1. 65 | center_ratio: 0.78125 # = 50 / 64 66 | lambda_depth: 0 # or try 10 67 | lambda_emptiness: [5000, 1.e+4, 2.e+5, 5001] 68 | emptiness_scale: 10 69 | 70 | optimizer: 71 | name: Adamax 72 | args: 73 | lr: 0.05 74 | params: 75 | geometry: 76 | lr: 0.05 77 | background: 78 | lr: 0.0001 # maybe 0.001/0.01 is better 79 | 80 | trainer: 81 | max_steps: 10000 82 | log_every_n_steps: 1 83 | num_sanity_val_steps: 0 84 | val_check_interval: 200 85 | enable_progress_bar: true 86 | precision: 16-mixed 87 | 88 | checkpoint: 89 | save_last: true # save at each validation tim 90 | save_top_k: -1 91 | every_n_train_steps: ${trainer.max_steps} 92 | -------------------------------------------------------------------------------- /configs/sketchshape-refine.yaml: -------------------------------------------------------------------------------- 1 | name: "sketchshape-refine" 2 | tag: "${rmspace:${system.prompt_processor.prompt},_}" 3 | exp_root_dir: "outputs" 4 | seed: 0 5 | 6 | data_type: "random-camera-datamodule" 7 | data: 8 | elevation_range: [-10, 45] 9 | 10 | system_type: "latentnerf-system" 11 | system: 12 | refinement: true 13 | weights: ??? 14 | weights_ignore_modules: ["material", "background"] 15 | guide_shape: ??? 16 | 17 | geometry_type: "implicit-volume" 18 | geometry: 19 | n_feature_dims: 4 20 | normal_type: null 21 | 22 | material_type: "sd-latent-adapter-material" 23 | 24 | background_type: "neural-environment-map-background" 25 | 26 | renderer_type: "nerf-volume-renderer" 27 | renderer: 28 | num_samples_per_ray: 512 29 | 30 | prompt_processor_type: "stable-diffusion-prompt-processor" 31 | prompt_processor: 32 | pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base" 33 | prompt: ??? 34 | 35 | guidance_type: "stable-diffusion-guidance" 36 | guidance: 37 | pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base" 38 | guidance_scale: 100. 39 | weighting_strategy: sds 40 | 41 | loggers: 42 | wandb: 43 | enable: false 44 | project: "threestudio" 45 | name: None 46 | 47 | loss: 48 | lambda_sds: 1. 49 | lambda_sparsity: 0.0 50 | lambda_shape: 1. 51 | lambda_opaque: 0.0 52 | lambda_orient: 0.0 53 | optimizer: 54 | name: Adam 55 | args: 56 | lr: 0.01 57 | betas: [0.9, 0.99] 58 | eps: 1.e-15 59 | scheduler: 60 | name: SequentialLR 61 | interval: step 62 | warmup_steps: 100 63 | milestones: 64 | - ${system.scheduler.warmup_steps} 65 | schedulers: 66 | - name: LinearLR # linear warm-up in the first system.warmup_steps steps 67 | args: 68 | start_factor: 0.1 69 | end_factor: 1.0 70 | total_iters: ${system.scheduler.warmup_steps} 71 | - name: ExponentialLR 72 | args: 73 | gamma: ${calc_exp_lr_decay_rate:0.1,${sub:${trainer.max_steps},${system.scheduler.warmup_steps}}} 74 | 75 | trainer: 76 | max_steps: 10000 77 | log_every_n_steps: 1 78 | num_sanity_val_steps: 1 79 | val_check_interval: 200 80 | enable_progress_bar: true 81 | precision: 16-mixed 82 | 83 | checkpoint: 84 | save_last: true # save at each validation time 85 | save_top_k: -1 86 | every_n_train_steps: ${trainer.max_steps} 87 | -------------------------------------------------------------------------------- /configs/sketchshape.yaml: -------------------------------------------------------------------------------- 1 | name: "sketchshape" 2 | tag: "${rmspace:${system.prompt_processor.prompt},_}" 3 | exp_root_dir: "outputs" 4 | seed: 0 5 | 6 | data_type: "random-camera-datamodule" 7 | data: 8 | elevation_range: [-10, 45] 9 | 10 | system_type: "latentnerf-system" 11 | system: 12 | guide_shape: ??? 13 | 14 | geometry_type: "implicit-volume" 15 | geometry: 16 | n_feature_dims: 4 17 | normal_type: null 18 | 19 | material_type: "no-material" 20 | material: 21 | n_output_dims: 4 22 | color_activation: none 23 | 24 | background_type: "neural-environment-map-background" 25 | background: 26 | n_output_dims: 4 27 | color_activation: none 28 | 29 | renderer_type: "nerf-volume-renderer" 30 | renderer: 31 | num_samples_per_ray: 512 32 | 33 | prompt_processor_type: "stable-diffusion-prompt-processor" 34 | prompt_processor: 35 | pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base" 36 | prompt: ??? 37 | 38 | guidance_type: "stable-diffusion-guidance" 39 | guidance: 40 | pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base" 41 | guidance_scale: 100. 42 | weighting_strategy: sds 43 | 44 | loggers: 45 | wandb: 46 | enable: false 47 | project: "threestudio" 48 | name: None 49 | 50 | loss: 51 | lambda_sds: 1.0 52 | lambda_sparsity: 0.0 53 | lambda_shape: 1.0 54 | lambda_opaque: 0.0 55 | lambda_orient: 0.0 56 | optimizer: 57 | name: Adam 58 | args: 59 | lr: 0.01 60 | betas: [0.9, 0.99] 61 | eps: 1.e-15 62 | scheduler: 63 | name: SequentialLR 64 | interval: step 65 | warmup_steps: 100 66 | milestones: 67 | - ${system.scheduler.warmup_steps} 68 | schedulers: 69 | - name: LinearLR # linear warm-up in the first system.warmup_steps steps 70 | args: 71 | start_factor: 0.1 72 | end_factor: 1.0 73 | total_iters: ${system.scheduler.warmup_steps} 74 | - name: ExponentialLR 75 | args: 76 | gamma: ${calc_exp_lr_decay_rate:0.1,${sub:${trainer.max_steps},${system.scheduler.warmup_steps}}} 77 | 78 | trainer: 79 | max_steps: 10000 80 | log_every_n_steps: 1 81 | num_sanity_val_steps: 0 82 | val_check_interval: 200 83 | enable_progress_bar: true 84 | precision: 16-mixed 85 | 86 | checkpoint: 87 | save_last: true # save at each validation time 88 | save_top_k: -1 89 | every_n_train_steps: ${trainer.max_steps} 90 | -------------------------------------------------------------------------------- /configs/textmesh-if.yaml: -------------------------------------------------------------------------------- 1 | name: "textmesh-if" 2 | tag: "${rmspace:${system.prompt_processor.prompt},_}" 3 | exp_root_dir: "outputs" 4 | seed: 0 5 | 6 | data_type: "random-camera-datamodule" 7 | data: 8 | batch_size: 1 9 | width: 64 10 | height: 64 11 | camera_distance_range: [1.5, 2.0] 12 | fovy_range: [40, 70] 13 | elevation_range: [-10, 90] 14 | light_sample_strategy: "dreamfusion" 15 | eval_camera_distance: 2.0 16 | eval_fovy_deg: 70. 17 | 18 | system_type: "textmesh-system" 19 | system: 20 | geometry_type: "implicit-sdf" 21 | geometry: 22 | radius: 2.0 23 | normal_type: finite_difference 24 | # progressive eps from Neuralangelo 25 | finite_difference_normal_eps: progressive 26 | 27 | sdf_bias: sphere 28 | sdf_bias_params: 0.5 29 | 30 | # coarse to fine hash grid encoding 31 | pos_encoding_config: 32 | otype: ProgressiveBandHashGrid 33 | n_levels: 16 34 | n_features_per_level: 2 35 | log2_hashmap_size: 19 36 | base_resolution: 16 37 | per_level_scale: 1.381912879967776 # max resolution 2048 38 | start_level: 8 # resolution ~200 39 | start_step: 2000 40 | update_steps: 500 41 | 42 | material_type: "diffuse-with-point-light-material" 43 | material: 44 | ambient_only_steps: 2001 45 | albedo_activation: sigmoid 46 | 47 | background_type: "neural-environment-map-background" 48 | background: 49 | color_activation: sigmoid 50 | 51 | renderer_type: "neus-volume-renderer" 52 | renderer: 53 | radius: ${system.geometry.radius} 54 | num_samples_per_ray: 512 55 | cos_anneal_end_steps: ${trainer.max_steps} 56 | eval_chunk_size: 8192 57 | 58 | prompt_processor_type: "deep-floyd-prompt-processor" 59 | prompt_processor: 60 | pretrained_model_name_or_path: "DeepFloyd/IF-I-XL-v1.0" 61 | prompt: ??? 62 | 63 | guidance_type: "deep-floyd-guidance" 64 | guidance: 65 | pretrained_model_name_or_path: "DeepFloyd/IF-I-XL-v1.0" 66 | guidance_scale: 20. 67 | weighting_strategy: sds 68 | min_step_percent: 0.02 69 | max_step_percent: 0.98 70 | 71 | loss: 72 | lambda_sds: 1. 73 | lambda_orient: 0.0 74 | lambda_sparsity: 0.0 75 | lambda_opaque: 0.0 76 | lambda_eikonal: 1000. 77 | optimizer: 78 | name: Adam 79 | args: 80 | betas: [0.9, 0.99] 81 | eps: 1.e-15 82 | params: 83 | geometry.encoding: 84 | lr: 0.01 85 | geometry.sdf_network: 86 | lr: 0.001 87 | geometry.feature_network: 88 | lr: 0.001 89 | background: 90 | lr: 0.001 91 | renderer: 92 | lr: 0.001 93 | 94 | trainer: 95 | max_steps: 10000 96 | log_every_n_steps: 1 97 | num_sanity_val_steps: 0 98 | val_check_interval: 200 99 | enable_progress_bar: true 100 | precision: 16-mixed 101 | 102 | checkpoint: 103 | save_last: true # save at each validation time 104 | save_top_k: -1 105 | every_n_train_steps: ${trainer.max_steps} 106 | -------------------------------------------------------------------------------- /docker/Dockerfile: -------------------------------------------------------------------------------- 1 | # Reference: 2 | # https://github.com/cvpaperchallenge/Ascender 3 | # https://github.com/nerfstudio-project/nerfstudio 4 | 5 | FROM nvidia/cuda:11.8.0-devel-ubuntu22.04 6 | 7 | ARG USER_NAME=dreamer 8 | ARG GROUP_NAME=dreamers 9 | ARG UID=1000 10 | ARG GID=1000 11 | 12 | # Set compute capability for nerfacc and tiny-cuda-nn 13 | # See https://developer.nvidia.com/cuda-gpus and limit number to speed-up build 14 | ENV TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX" 15 | ENV TCNN_CUDA_ARCHITECTURES=90;89;86;80;75;70;61;60 16 | # Speed-up build for RTX 30xx 17 | # ENV TORCH_CUDA_ARCH_LIST="8.6" 18 | # ENV TCNN_CUDA_ARCHITECTURES=86 19 | # Speed-up build for RTX 40xx 20 | # ENV TORCH_CUDA_ARCH_LIST="8.9" 21 | # ENV TCNN_CUDA_ARCHITECTURES=89 22 | 23 | ENV CUDA_HOME=/usr/local/cuda 24 | ENV PATH=${CUDA_HOME}/bin:/home/${USER_NAME}/.local/bin:${PATH} 25 | ENV LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH} 26 | ENV LIBRARY_PATH=${CUDA_HOME}/lib64/stubs:${LIBRARY_PATH} 27 | 28 | # apt install by root user 29 | RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ 30 | build-essential \ 31 | curl \ 32 | git \ 33 | libegl1-mesa-dev \ 34 | libgl1-mesa-dev \ 35 | libgles2-mesa-dev \ 36 | libglib2.0-0 \ 37 | libsm6 \ 38 | libxext6 \ 39 | libxrender1 \ 40 | python-is-python3 \ 41 | python3.10-dev \ 42 | python3-pip \ 43 | wget \ 44 | && rm -rf /var/lib/apt/lists/* 45 | 46 | # Change user to non-root user 47 | RUN groupadd -g ${GID} ${GROUP_NAME} \ 48 | && useradd -ms /bin/sh -u ${UID} -g ${GID} ${USER_NAME} 49 | USER ${USER_NAME} 50 | 51 | RUN pip install --upgrade pip setuptools ninja 52 | RUN pip install torch==2.0.1+cu118 torchvision==0.15.2+cu118 --index-url https://download.pytorch.org/whl/cu118 53 | # Install nerfacc and tiny-cuda-nn before installing requirements.txt 54 | # because these two installations are time consuming and error prone 55 | RUN pip install git+https://github.com/KAIR-BAIR/nerfacc.git@v0.5.2 56 | RUN pip install git+https://github.com/NVlabs/tiny-cuda-nn.git#subdirectory=bindings/torch 57 | 58 | COPY requirements.txt /tmp 59 | RUN cd /tmp && pip install -r requirements.txt 60 | WORKDIR /home/${USER_NAME}/threestudio 61 | -------------------------------------------------------------------------------- /docker/compose.yaml: -------------------------------------------------------------------------------- 1 | services: 2 | threestudio: 3 | build: 4 | context: ../ 5 | dockerfile: docker/Dockerfile 6 | args: 7 | # you can set environment variables, otherwise default values will be used 8 | USER_NAME: ${HOST_USER_NAME:-dreamer} # export HOST_USER_NAME=$USER 9 | GROUP_NAME: ${HOST_GROUP_NAME:-dreamers} 10 | UID: ${HOST_UID:-1000} # export HOST_UID=$(id -u) 11 | GID: ${HOST_GID:-1000} # export HOST_GID=$(id -g) 12 | shm_size: '4gb' 13 | environment: 14 | NVIDIA_DISABLE_REQUIRE: 1 # avoid wrong `nvidia-container-cli: requirement error` 15 | tty: true 16 | volumes: 17 | - ../:/home/${HOST_USER_NAME:-dreamer}/threestudio 18 | deploy: 19 | resources: 20 | reservations: 21 | devices: 22 | - driver: nvidia 23 | capabilities: [gpu] 24 | -------------------------------------------------------------------------------- /docs/installation.md: -------------------------------------------------------------------------------- 1 | # Installation 2 | 3 | ## Prerequisite 4 | 5 | - NVIDIA GPU with at least 6GB VRAM. The more memory you have, the more methods and higher resolutions you can try. 6 | - [NVIDIA Driver](https://www.nvidia.com/Download/index.aspx) whose version is higher than the [Minimum Required Driver Version](https://docs.nvidia.com/cuda/cuda-toolkit-release-notes/index.html) of CUDA Toolkit you want to use. 7 | 8 | ## Install CUDA Toolkit 9 | 10 | You can skip this step if you have installed sufficiently new version or you use Docker. 11 | 12 | Install [CUDA Toolkit](https://developer.nvidia.com/cuda-toolkit-archive). 13 | 14 | - Example for Ubuntu 22.04: 15 | - Run [command for CUDA 11.8 Ubuntu 22.04](https://developer.nvidia.com/cuda-11-8-0-download-archive?target_os=Linux&target_arch=x86_64&Distribution=Ubuntu&target_version=22.04&target_type=deb_local) 16 | - Example for Ubuntu on WSL2: 17 | - `sudo apt-key del 7fa2af80` 18 | - Run [command for CUDA 11.8 WSL-Ubuntu](https://developer.nvidia.com/cuda-11-8-0-download-archive?target_os=Linux&target_arch=x86_64&Distribution=WSL-Ubuntu&target_version=2.0&target_type=deb_local) 19 | 20 | ## Install threestudio via Docker 21 | 22 | 1. [Install Docker Engine](https://docs.docker.com/engine/install/). 23 | This document assumes you [install Docker Engine on Ubuntu](https://docs.docker.com/engine/install/ubuntu/). 24 | 2. [Create `docker` group](https://docs.docker.com/engine/install/linux-postinstall/). 25 | Otherwise, you need to type `sudo docker` instead of `docker`. 26 | 3. [Install NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html#setting-up-nvidia-container-toolkit). 27 | 4. If you use WSL2, [enable systemd](https://learn.microsoft.com/en-us/windows/wsl/wsl-config#systemd-support). 28 | 5. Edit [Dockerfile](../docker/Dockerfile) for your GPU to speed-up build. 29 | The default Dockerfile takes into account many types of GPUs. 30 | 6. Run Docker via `docker compose`. 31 | 32 | ```bash 33 | cd docker/ 34 | docker compose build # build Docker image 35 | docker compose up -d # create and start a container in background 36 | docker compose exec threestudio bash # run bash in the container 37 | 38 | # Enjoy threestudio! 39 | 40 | exit # or Ctrl+D 41 | docker compose stop # stop the container 42 | docker compose start # start the container 43 | docker compose down # stop and remove the container 44 | ``` 45 | 46 | Note: The current Dockerfile will cause errors when using the OpenGL-based rasterizer of nvdiffrast. 47 | You can use the CUDA-based rasterizer by adding commands or editing configs. 48 | 49 | - `system.renderer.context_type=cuda` for training 50 | - `system.exporter.context_type=cuda` for exporting meshes 51 | 52 | [This comment by the nvdiffrast author](https://github.com/NVlabs/nvdiffrast/issues/94#issuecomment-1288566038) could be a guide to resolve this limitation. 53 | -------------------------------------------------------------------------------- /extern/MVDream/.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | */__pycache__/ 6 | 7 | # dataset-related, pre-trained models, 8 | vae_models/vqgan 9 | vae_models/*.gz 10 | vae_models/*.pt 11 | vae_models/*vqgan 12 | *.pt 13 | *.pth 14 | 15 | # log files 16 | log/*.log 17 | out* 18 | test_results 19 | err* 20 | 21 | 22 | # C extensions 23 | *.so 24 | 25 | # Distribution / packaging 26 | .Python 27 | build/ 28 | develop-eggs/ 29 | dist/ 30 | downloads/ 31 | eggs/ 32 | .eggs/ 33 | lib/ 34 | lib64/ 35 | parts/ 36 | sdist/ 37 | var/ 38 | wheels/ 39 | pip-wheel-metadata/ 40 | share/python-wheels/ 41 | *.egg-info/ 42 | .installed.cfg 43 | *.egg 44 | MANIFEST 45 | 46 | # PyInstaller 47 | # Usually these files are written by a python script from a template 48 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 49 | *.manifest 50 | *.spec 51 | 52 | # Installer logs 53 | pip-log.txt 54 | pip-delete-this-directory.txt 55 | 56 | # Unit test / coverage reports 57 | htmlcov/ 58 | .tox/ 59 | .nox/ 60 | .coverage 61 | .coverage.* 62 | .cache 63 | nosetests.xml 64 | coverage.xml 65 | *.cover 66 | *.py,cover 67 | .hypothesis/ 68 | .pytest_cache/ 69 | 70 | # Translations 71 | *.mo 72 | *.pot 73 | 74 | # Django stuff: 75 | *.log 76 | local_settings.py 77 | db.sqlite3 78 | db.sqlite3-journal 79 | 80 | # Flask stuff: 81 | instance/ 82 | .webassets-cache 83 | 84 | # Scrapy stuff: 85 | .scrapy 86 | 87 | # Sphinx documentation 88 | docs/_build/ 89 | 90 | # PyBuilder 91 | target/ 92 | 93 | # Jupyter Notebook 94 | .ipynb_checkpoints 95 | 96 | # IPython 97 | profile_default/ 98 | ipython_config.py 99 | 100 | # pyenv 101 | .python-version 102 | 103 | # pipenv 104 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 105 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 106 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 107 | # install all needed dependencies. 108 | #Pipfile.lock 109 | 110 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 111 | __pypackages__/ 112 | 113 | # Celery stuff 114 | celerybeat-schedule 115 | celerybeat.pid 116 | 117 | # SageMath parsed files 118 | *.sage.py 119 | 120 | # Environments 121 | .env 122 | .venv 123 | env/ 124 | venv/ 125 | ENV/ 126 | env.bak/ 127 | venv.bak/ 128 | 129 | # Spyder project settings 130 | .spyderproject 131 | .spyproject 132 | 133 | # Rope project settings 134 | .ropeproject 135 | 136 | # mkdocs documentation 137 | /site 138 | 139 | # mypy 140 | .mypy_cache/ 141 | .dmypy.json 142 | dmypy.json 143 | 144 | # Pyre type checker 145 | .pyre/ 146 | 147 | *.zip 148 | *.pkl 149 | *.csv 150 | *.ckpt 151 | *.parquet 152 | 153 | *.whl 154 | *.th 155 | *.onnx -------------------------------------------------------------------------------- /extern/MVDream/LICENSE-CODE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 ByteDance 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /extern/MVDream/mvdream/__init__.py: -------------------------------------------------------------------------------- 1 | from .model_zoo import build_model -------------------------------------------------------------------------------- /extern/MVDream/mvdream/camera_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | 5 | def create_camera_to_world_matrix(elevation, azimuth): 6 | elevation = np.radians(elevation) 7 | azimuth = np.radians(azimuth) 8 | # Convert elevation and azimuth angles to Cartesian coordinates on a unit sphere 9 | x = np.cos(elevation) * np.sin(azimuth) 10 | y = np.sin(elevation) 11 | z = np.cos(elevation) * np.cos(azimuth) 12 | 13 | # Calculate camera position, target, and up vectors 14 | camera_pos = np.array([x, y, z]) 15 | target = np.array([0, 0, 0]) 16 | up = np.array([0, 1, 0]) 17 | 18 | # Construct view matrix 19 | forward = target - camera_pos 20 | forward /= np.linalg.norm(forward) 21 | right = np.cross(forward, up) 22 | right /= np.linalg.norm(right) 23 | new_up = np.cross(right, forward) 24 | new_up /= np.linalg.norm(new_up) 25 | cam2world = np.eye(4) 26 | cam2world[:3, :3] = np.array([right, new_up, -forward]).T 27 | cam2world[:3, 3] = camera_pos 28 | return cam2world 29 | 30 | 31 | def convert_opengl_to_blender(camera_matrix): 32 | if isinstance(camera_matrix, np.ndarray): 33 | # Construct transformation matrix to convert from OpenGL space to Blender space 34 | flip_yz = np.array([[1, 0, 0, 0], [0, 0, -1, 0], [0, 1, 0, 0], [0, 0, 0, 1]]) 35 | camera_matrix_blender = np.dot(flip_yz, camera_matrix) 36 | else: 37 | # Construct transformation matrix to convert from OpenGL space to Blender space 38 | flip_yz = torch.tensor([[1, 0, 0, 0], [0, 0, -1, 0], [0, 1, 0, 0], [0, 0, 0, 1]]) 39 | if camera_matrix.ndim == 3: 40 | flip_yz = flip_yz.unsqueeze(0) 41 | camera_matrix_blender = torch.matmul(flip_yz.to(camera_matrix), camera_matrix) 42 | return camera_matrix_blender 43 | 44 | 45 | def normalize_camera(camera_matrix): 46 | ''' normalize the camera location onto a unit-sphere''' 47 | if isinstance(camera_matrix, np.ndarray): 48 | camera_matrix = camera_matrix.reshape(-1,4,4) 49 | translation = camera_matrix[:,:3,3] 50 | translation = translation / (np.linalg.norm(translation, axis=1, keepdims=True) + 1e-8) 51 | camera_matrix[:,:3,3] = translation 52 | else: 53 | camera_matrix = camera_matrix.reshape(-1,4,4) 54 | translation = camera_matrix[:,:3,3] 55 | translation = translation / (torch.norm(translation, dim=1, keepdim=True) + 1e-8) 56 | camera_matrix[:,:3,3] = translation 57 | return camera_matrix.reshape(-1,16) 58 | 59 | 60 | def get_camera(num_frames, elevation=15, azimuth_start=0, azimuth_span=360, blender_coord=True): 61 | angle_gap = azimuth_span / num_frames 62 | cameras = [] 63 | for azimuth in np.arange(azimuth_start, azimuth_span+azimuth_start, angle_gap): 64 | camera_matrix = create_camera_to_world_matrix(elevation, azimuth) 65 | if blender_coord: 66 | camera_matrix = convert_opengl_to_blender(camera_matrix) 67 | cameras.append(camera_matrix.flatten()) 68 | return torch.tensor(np.stack(cameras, 0)).float() -------------------------------------------------------------------------------- /extern/MVDream/mvdream/configs/sd-v1.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | target: mvdream.ldm.interface.LatentDiffusionInterface 3 | params: 4 | linear_start: 0.00085 5 | linear_end: 0.0120 6 | timesteps: 1000 7 | scale_factor: 0.18215 8 | parameterization: "eps" 9 | 10 | unet_config: 11 | target: mvdream.ldm.modules.diffusionmodules.openaimodel.MultiViewUNetModel 12 | params: 13 | image_size: 32 # unused 14 | in_channels: 4 15 | out_channels: 4 16 | model_channels: 320 17 | attention_resolutions: [ 4, 2, 1 ] 18 | num_res_blocks: 2 19 | channel_mult: [ 1, 2, 4, 4 ] 20 | num_heads: 8 21 | use_spatial_transformer: True 22 | transformer_depth: 1 23 | context_dim: 768 24 | use_checkpoint: False 25 | legacy: False 26 | camera_dim: 16 27 | 28 | first_stage_config: 29 | target: mvdream.ldm.models.autoencoder.AutoencoderKL 30 | params: 31 | embed_dim: 4 32 | monitor: val/rec_loss 33 | ddconfig: 34 | double_z: true 35 | z_channels: 4 36 | resolution: 256 37 | in_channels: 3 38 | out_ch: 3 39 | ch: 128 40 | ch_mult: 41 | - 1 42 | - 2 43 | - 4 44 | - 4 45 | num_res_blocks: 2 46 | attn_resolutions: [] 47 | dropout: 0.0 48 | lossconfig: 49 | target: torch.nn.Identity 50 | 51 | cond_stage_config: 52 | target: mvdream.ldm.modules.encoders.modules.FrozenCLIPEmbedder 53 | -------------------------------------------------------------------------------- /extern/MVDream/mvdream/configs/sd-v2-base.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | target: mvdream.ldm.interface.LatentDiffusionInterface 3 | params: 4 | linear_start: 0.00085 5 | linear_end: 0.0120 6 | timesteps: 1000 7 | scale_factor: 0.18215 8 | parameterization: "eps" 9 | 10 | unet_config: 11 | target: mvdream.ldm.modules.diffusionmodules.openaimodel.MultiViewUNetModel 12 | params: 13 | image_size: 32 # unused 14 | in_channels: 4 15 | out_channels: 4 16 | model_channels: 320 17 | attention_resolutions: [ 4, 2, 1 ] 18 | num_res_blocks: 2 19 | channel_mult: [ 1, 2, 4, 4 ] 20 | num_head_channels: 64 # need to fix for flash-attn 21 | use_spatial_transformer: True 22 | use_linear_in_transformer: True 23 | transformer_depth: 1 24 | context_dim: 1024 25 | use_checkpoint: False 26 | legacy: False 27 | camera_dim: 16 28 | 29 | first_stage_config: 30 | target: mvdream.ldm.models.autoencoder.AutoencoderKL 31 | params: 32 | embed_dim: 4 33 | monitor: val/rec_loss 34 | ddconfig: 35 | #attn_type: "vanilla-xformers" 36 | double_z: true 37 | z_channels: 4 38 | resolution: 256 39 | in_channels: 3 40 | out_ch: 3 41 | ch: 128 42 | ch_mult: 43 | - 1 44 | - 2 45 | - 4 46 | - 4 47 | num_res_blocks: 2 48 | attn_resolutions: [] 49 | dropout: 0.0 50 | lossconfig: 51 | target: torch.nn.Identity 52 | 53 | cond_stage_config: 54 | target: mvdream.ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder 55 | params: 56 | freeze: True 57 | layer: "penultimate" -------------------------------------------------------------------------------- /extern/MVDream/mvdream/ldm/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/interactive-3d/interactive3d/11519c0d8a489cf19efa1ec3b8fa0a058e7ed45a/extern/MVDream/mvdream/ldm/__init__.py -------------------------------------------------------------------------------- /extern/MVDream/mvdream/ldm/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/interactive-3d/interactive3d/11519c0d8a489cf19efa1ec3b8fa0a058e7ed45a/extern/MVDream/mvdream/ldm/models/__init__.py -------------------------------------------------------------------------------- /extern/MVDream/mvdream/ldm/models/diffusion/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/interactive-3d/interactive3d/11519c0d8a489cf19efa1ec3b8fa0a058e7ed45a/extern/MVDream/mvdream/ldm/models/diffusion/__init__.py -------------------------------------------------------------------------------- /extern/MVDream/mvdream/ldm/modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/interactive-3d/interactive3d/11519c0d8a489cf19efa1ec3b8fa0a058e7ed45a/extern/MVDream/mvdream/ldm/modules/__init__.py -------------------------------------------------------------------------------- /extern/MVDream/mvdream/ldm/modules/diffusionmodules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/interactive-3d/interactive3d/11519c0d8a489cf19efa1ec3b8fa0a058e7ed45a/extern/MVDream/mvdream/ldm/modules/diffusionmodules/__init__.py -------------------------------------------------------------------------------- /extern/MVDream/mvdream/ldm/modules/distributions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/interactive-3d/interactive3d/11519c0d8a489cf19efa1ec3b8fa0a058e7ed45a/extern/MVDream/mvdream/ldm/modules/distributions/__init__.py -------------------------------------------------------------------------------- /extern/MVDream/mvdream/ldm/modules/distributions/distributions.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | 4 | 5 | class AbstractDistribution: 6 | def sample(self): 7 | raise NotImplementedError() 8 | 9 | def mode(self): 10 | raise NotImplementedError() 11 | 12 | 13 | class DiracDistribution(AbstractDistribution): 14 | def __init__(self, value): 15 | self.value = value 16 | 17 | def sample(self): 18 | return self.value 19 | 20 | def mode(self): 21 | return self.value 22 | 23 | 24 | class DiagonalGaussianDistribution(object): 25 | def __init__(self, parameters, deterministic=False): 26 | self.parameters = parameters 27 | self.mean, self.logvar = torch.chunk(parameters, 2, dim=1) 28 | self.logvar = torch.clamp(self.logvar, -30.0, 20.0) 29 | self.deterministic = deterministic 30 | self.std = torch.exp(0.5 * self.logvar) 31 | self.var = torch.exp(self.logvar) 32 | if self.deterministic: 33 | self.var = self.std = torch.zeros_like(self.mean).to(device=self.parameters.device) 34 | 35 | def sample(self): 36 | x = self.mean + self.std * torch.randn(self.mean.shape).to(device=self.parameters.device) 37 | return x 38 | 39 | def kl(self, other=None): 40 | if self.deterministic: 41 | return torch.Tensor([0.]) 42 | else: 43 | if other is None: 44 | return 0.5 * torch.sum(torch.pow(self.mean, 2) 45 | + self.var - 1.0 - self.logvar, 46 | dim=[1, 2, 3]) 47 | else: 48 | return 0.5 * torch.sum( 49 | torch.pow(self.mean - other.mean, 2) / other.var 50 | + self.var / other.var - 1.0 - self.logvar + other.logvar, 51 | dim=[1, 2, 3]) 52 | 53 | def nll(self, sample, dims=[1,2,3]): 54 | if self.deterministic: 55 | return torch.Tensor([0.]) 56 | logtwopi = np.log(2.0 * np.pi) 57 | return 0.5 * torch.sum( 58 | logtwopi + self.logvar + torch.pow(sample - self.mean, 2) / self.var, 59 | dim=dims) 60 | 61 | def mode(self): 62 | return self.mean 63 | 64 | 65 | def normal_kl(mean1, logvar1, mean2, logvar2): 66 | """ 67 | source: https://github.com/openai/guided-diffusion/blob/27c20a8fab9cb472df5d6bdd6c8d11c8f430b924/guided_diffusion/losses.py#L12 68 | Compute the KL divergence between two gaussians. 69 | Shapes are automatically broadcasted, so batches can be compared to 70 | scalars, among other use cases. 71 | """ 72 | tensor = None 73 | for obj in (mean1, logvar1, mean2, logvar2): 74 | if isinstance(obj, torch.Tensor): 75 | tensor = obj 76 | break 77 | assert tensor is not None, "at least one argument must be a Tensor" 78 | 79 | # Force variances to be Tensors. Broadcasting helps convert scalars to 80 | # Tensors, but it does not work for torch.exp(). 81 | logvar1, logvar2 = [ 82 | x if isinstance(x, torch.Tensor) else torch.tensor(x).to(tensor) 83 | for x in (logvar1, logvar2) 84 | ] 85 | 86 | return 0.5 * ( 87 | -1.0 88 | + logvar2 89 | - logvar1 90 | + torch.exp(logvar1 - logvar2) 91 | + ((mean1 - mean2) ** 2) * torch.exp(-logvar2) 92 | ) 93 | -------------------------------------------------------------------------------- /extern/MVDream/mvdream/ldm/modules/ema.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | 4 | 5 | class LitEma(nn.Module): 6 | def __init__(self, model, decay=0.9999, use_num_upates=True): 7 | super().__init__() 8 | if decay < 0.0 or decay > 1.0: 9 | raise ValueError('Decay must be between 0 and 1') 10 | 11 | self.m_name2s_name = {} 12 | self.register_buffer('decay', torch.tensor(decay, dtype=torch.float32)) 13 | self.register_buffer('num_updates', torch.tensor(0, dtype=torch.int) if use_num_upates 14 | else torch.tensor(-1, dtype=torch.int)) 15 | 16 | for name, p in model.named_parameters(): 17 | if p.requires_grad: 18 | # remove as '.'-character is not allowed in buffers 19 | s_name = name.replace('.', '') 20 | self.m_name2s_name.update({name: s_name}) 21 | self.register_buffer(s_name, p.clone().detach().data) 22 | 23 | self.collected_params = [] 24 | 25 | def reset_num_updates(self): 26 | del self.num_updates 27 | self.register_buffer('num_updates', torch.tensor(0, dtype=torch.int)) 28 | 29 | def forward(self, model): 30 | decay = self.decay 31 | 32 | if self.num_updates >= 0: 33 | self.num_updates += 1 34 | decay = min(self.decay, (1 + self.num_updates) / (10 + self.num_updates)) 35 | 36 | one_minus_decay = 1.0 - decay 37 | 38 | with torch.no_grad(): 39 | m_param = dict(model.named_parameters()) 40 | shadow_params = dict(self.named_buffers()) 41 | 42 | for key in m_param: 43 | if m_param[key].requires_grad: 44 | sname = self.m_name2s_name[key] 45 | shadow_params[sname] = shadow_params[sname].type_as(m_param[key]) 46 | shadow_params[sname].sub_(one_minus_decay * (shadow_params[sname] - m_param[key])) 47 | else: 48 | assert not key in self.m_name2s_name 49 | 50 | def copy_to(self, model): 51 | m_param = dict(model.named_parameters()) 52 | shadow_params = dict(self.named_buffers()) 53 | for key in m_param: 54 | if m_param[key].requires_grad: 55 | m_param[key].data.copy_(shadow_params[self.m_name2s_name[key]].data) 56 | else: 57 | assert not key in self.m_name2s_name 58 | 59 | def store(self, parameters): 60 | """ 61 | Save the current parameters for restoring later. 62 | Args: 63 | parameters: Iterable of `torch.nn.Parameter`; the parameters to be 64 | temporarily stored. 65 | """ 66 | self.collected_params = [param.clone() for param in parameters] 67 | 68 | def restore(self, parameters): 69 | """ 70 | Restore the parameters stored with the `store` method. 71 | Useful to validate the model with EMA parameters without affecting the 72 | original optimization process. Store the parameters before the 73 | `copy_to` method. After validation (or model saving), use this to 74 | restore the former parameters. 75 | Args: 76 | parameters: Iterable of `torch.nn.Parameter`; the parameters to be 77 | updated with the stored parameters. 78 | """ 79 | for c_param, param in zip(self.collected_params, parameters): 80 | param.data.copy_(c_param.data) -------------------------------------------------------------------------------- /extern/MVDream/mvdream/ldm/modules/encoders/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/interactive-3d/interactive3d/11519c0d8a489cf19efa1ec3b8fa0a058e7ed45a/extern/MVDream/mvdream/ldm/modules/encoders/__init__.py -------------------------------------------------------------------------------- /extern/MVDream/mvdream/model_zoo.py: -------------------------------------------------------------------------------- 1 | ''' Utiliy functions to load pre-trained models more easily ''' 2 | import os 3 | import pkg_resources 4 | from omegaconf import OmegaConf 5 | 6 | import torch 7 | from huggingface_hub import hf_hub_download 8 | 9 | from mvdream.ldm.util import instantiate_from_config 10 | 11 | 12 | PRETRAINED_MODELS = { 13 | "sd-v2.1-base-4view": { 14 | "config": "sd-v2-base.yaml", 15 | "repo_id": "MVDream/MVDream", 16 | "filename": "sd-v2.1-base-4view.pt" 17 | }, 18 | "sd-v1.5-4view": { 19 | "config": "sd-v1.yaml", 20 | "repo_id": "MVDream/MVDream", 21 | "filename": "sd-v1.5-4view.pt" 22 | } 23 | } 24 | 25 | 26 | def get_config_file(config_path): 27 | cfg_file = pkg_resources.resource_filename( 28 | "mvdream", os.path.join("configs", config_path) 29 | ) 30 | if not os.path.exists(cfg_file): 31 | raise RuntimeError(f"Config {config_path} not available!") 32 | return cfg_file 33 | 34 | 35 | def build_model(model_name, ckpt_path=None, cache_dir=None): 36 | print("========building model=======") 37 | print(model_name, ckpt_path, cache_dir) 38 | if not model_name in PRETRAINED_MODELS: 39 | raise RuntimeError( 40 | f"Model name {model_name} is not a pre-trained model. Available models are:\n- " + \ 41 | "\n- ".join(PRETRAINED_MODELS.keys()) 42 | ) 43 | model_info = PRETRAINED_MODELS[model_name] 44 | 45 | # Instiantiate the model 46 | print(f"Loading model from config: {model_info['config']}") 47 | config_file = get_config_file(model_info["config"]) 48 | print("++++++++++++++++++++++ 1") 49 | config = OmegaConf.load(config_file) 50 | print("++++++++++++++++++++++ 2") 51 | model = instantiate_from_config(config.model) 52 | 53 | print("++++++++++++++++++++++") 54 | 55 | # Load pre-trained checkpoint from huggingface 56 | if not ckpt_path: 57 | ckpt_path = hf_hub_download( 58 | repo_id=model_info["repo_id"], 59 | filename=model_info["filename"], 60 | cache_dir=cache_dir 61 | ) 62 | print(f"Loading model from cache file: {ckpt_path}") 63 | model.load_state_dict(torch.load(ckpt_path, map_location="cpu")) 64 | return model 65 | -------------------------------------------------------------------------------- /extern/MVDream/requirements.txt: -------------------------------------------------------------------------------- 1 | opencv-python 2 | imageio 3 | imageio-ffmpeg 4 | omegaconf 5 | einops 6 | transformers==4.27.1 7 | open-clip-torch==2.7.0 8 | gradio>=3.13.2 9 | xformers==0.0.16 10 | -------------------------------------------------------------------------------- /extern/MVDream/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | setup( 4 | name='mvdream', 5 | version='0.0.1', 6 | description='Multi-view Diffusion Models', 7 | author="ByteDance", 8 | packages=find_packages(), 9 | package_data={"mvdream": ["configs/*.yaml"]} , 10 | install_requires=[ 11 | 'torch', 12 | 'numpy', 13 | 'tqdm', 14 | 'omegaconf', 15 | 'einops', 16 | 'huggingface_hub', 17 | "transformers", 18 | "open-clip-torch", 19 | ], 20 | ) 21 | -------------------------------------------------------------------------------- /extern/ldm_zero123/extras.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from contextlib import contextmanager 3 | from pathlib import Path 4 | 5 | import torch 6 | from omegaconf import OmegaConf 7 | 8 | from extern.ldm_zero123.util import instantiate_from_config 9 | 10 | 11 | @contextmanager 12 | def all_logging_disabled(highest_level=logging.CRITICAL): 13 | """ 14 | A context manager that will prevent any logging messages 15 | triggered during the body from being processed. 16 | 17 | :param highest_level: the maximum logging level in use. 18 | This would only need to be changed if a custom level greater than CRITICAL 19 | is defined. 20 | 21 | https://gist.github.com/simon-weber/7853144 22 | """ 23 | # two kind-of hacks here: 24 | # * can't get the highest logging level in effect => delegate to the user 25 | # * can't get the current module-level override => use an undocumented 26 | # (but non-private!) interface 27 | 28 | previous_level = logging.root.manager.disable 29 | 30 | logging.disable(highest_level) 31 | 32 | try: 33 | yield 34 | finally: 35 | logging.disable(previous_level) 36 | 37 | 38 | def load_training_dir(train_dir, device, epoch="last"): 39 | """Load a checkpoint and config from training directory""" 40 | train_dir = Path(train_dir) 41 | ckpt = list(train_dir.rglob(f"*{epoch}.ckpt")) 42 | assert len(ckpt) == 1, f"found {len(ckpt)} matching ckpt files" 43 | config = list(train_dir.rglob(f"*-project.yaml")) 44 | assert len(ckpt) > 0, f"didn't find any config in {train_dir}" 45 | if len(config) > 1: 46 | print(f"found {len(config)} matching config files") 47 | config = sorted(config)[-1] 48 | print(f"selecting {config}") 49 | else: 50 | config = config[0] 51 | 52 | config = OmegaConf.load(config) 53 | return load_model_from_config(config, ckpt[0], device) 54 | 55 | 56 | def load_model_from_config(config, ckpt, device="cpu", verbose=False): 57 | """Loads a model from config and a ckpt 58 | if config is a path will use omegaconf to load 59 | """ 60 | if isinstance(config, (str, Path)): 61 | config = OmegaConf.load(config) 62 | 63 | with all_logging_disabled(): 64 | print(f"Loading model from {ckpt}") 65 | pl_sd = torch.load(ckpt, map_location="cpu") 66 | global_step = pl_sd["global_step"] 67 | sd = pl_sd["state_dict"] 68 | model = instantiate_from_config(config.model) 69 | m, u = model.load_state_dict(sd, strict=False) 70 | if len(m) > 0 and verbose: 71 | print("missing keys:") 72 | print(m) 73 | if len(u) > 0 and verbose: 74 | print("unexpected keys:") 75 | model.to(device) 76 | model.eval() 77 | model.cond_stage_model.device = device 78 | return model 79 | -------------------------------------------------------------------------------- /extern/ldm_zero123/models/diffusion/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/interactive-3d/interactive3d/11519c0d8a489cf19efa1ec3b8fa0a058e7ed45a/extern/ldm_zero123/models/diffusion/__init__.py -------------------------------------------------------------------------------- /extern/ldm_zero123/models/diffusion/sampling_util.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | 5 | def append_dims(x, target_dims): 6 | """Appends dimensions to the end of a tensor until it has target_dims dimensions. 7 | From https://github.com/crowsonkb/k-diffusion/blob/master/k_diffusion/utils.py""" 8 | dims_to_append = target_dims - x.ndim 9 | if dims_to_append < 0: 10 | raise ValueError( 11 | f"input has {x.ndim} dims but target_dims is {target_dims}, which is less" 12 | ) 13 | return x[(...,) + (None,) * dims_to_append] 14 | 15 | 16 | def renorm_thresholding(x0, value): 17 | # renorm 18 | pred_max = x0.max() 19 | pred_min = x0.min() 20 | pred_x0 = (x0 - pred_min) / (pred_max - pred_min) # 0 ... 1 21 | pred_x0 = 2 * pred_x0 - 1.0 # -1 ... 1 22 | 23 | s = torch.quantile(rearrange(pred_x0, "b ... -> b (...)").abs(), value, dim=-1) 24 | s.clamp_(min=1.0) 25 | s = s.view(-1, *((1,) * (pred_x0.ndim - 1))) 26 | 27 | # clip by threshold 28 | # pred_x0 = pred_x0.clamp(-s, s) / s # needs newer pytorch # TODO bring back to pure-gpu with min/max 29 | 30 | # temporary hack: numpy on cpu 31 | pred_x0 = ( 32 | np.clip(pred_x0.cpu().numpy(), -s.cpu().numpy(), s.cpu().numpy()) 33 | / s.cpu().numpy() 34 | ) 35 | pred_x0 = torch.tensor(pred_x0).to(self.model.device) 36 | 37 | # re.renorm 38 | pred_x0 = (pred_x0 + 1.0) / 2.0 # 0 ... 1 39 | pred_x0 = (pred_max - pred_min) * pred_x0 + pred_min # orig range 40 | return pred_x0 41 | 42 | 43 | def norm_thresholding(x0, value): 44 | s = append_dims(x0.pow(2).flatten(1).mean(1).sqrt().clamp(min=value), x0.ndim) 45 | return x0 * (value / s) 46 | 47 | 48 | def spatial_norm_thresholding(x0, value): 49 | # b c h w 50 | s = x0.pow(2).mean(1, keepdim=True).sqrt().clamp(min=value) 51 | return x0 * (value / s) 52 | -------------------------------------------------------------------------------- /extern/ldm_zero123/modules/diffusionmodules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/interactive-3d/interactive3d/11519c0d8a489cf19efa1ec3b8fa0a058e7ed45a/extern/ldm_zero123/modules/diffusionmodules/__init__.py -------------------------------------------------------------------------------- /extern/ldm_zero123/modules/distributions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/interactive-3d/interactive3d/11519c0d8a489cf19efa1ec3b8fa0a058e7ed45a/extern/ldm_zero123/modules/distributions/__init__.py -------------------------------------------------------------------------------- /extern/ldm_zero123/modules/distributions/distributions.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | 5 | class AbstractDistribution: 6 | def sample(self): 7 | raise NotImplementedError() 8 | 9 | def mode(self): 10 | raise NotImplementedError() 11 | 12 | 13 | class DiracDistribution(AbstractDistribution): 14 | def __init__(self, value): 15 | self.value = value 16 | 17 | def sample(self): 18 | return self.value 19 | 20 | def mode(self): 21 | return self.value 22 | 23 | 24 | class DiagonalGaussianDistribution(object): 25 | def __init__(self, parameters, deterministic=False): 26 | self.parameters = parameters 27 | self.mean, self.logvar = torch.chunk(parameters, 2, dim=1) 28 | self.logvar = torch.clamp(self.logvar, -30.0, 20.0) 29 | self.deterministic = deterministic 30 | self.std = torch.exp(0.5 * self.logvar) 31 | self.var = torch.exp(self.logvar) 32 | if self.deterministic: 33 | self.var = self.std = torch.zeros_like(self.mean).to( 34 | device=self.parameters.device 35 | ) 36 | 37 | def sample(self): 38 | x = self.mean + self.std * torch.randn(self.mean.shape).to( 39 | device=self.parameters.device 40 | ) 41 | return x 42 | 43 | def kl(self, other=None): 44 | if self.deterministic: 45 | return torch.Tensor([0.0]) 46 | else: 47 | if other is None: 48 | return 0.5 * torch.sum( 49 | torch.pow(self.mean, 2) + self.var - 1.0 - self.logvar, 50 | dim=[1, 2, 3], 51 | ) 52 | else: 53 | return 0.5 * torch.sum( 54 | torch.pow(self.mean - other.mean, 2) / other.var 55 | + self.var / other.var 56 | - 1.0 57 | - self.logvar 58 | + other.logvar, 59 | dim=[1, 2, 3], 60 | ) 61 | 62 | def nll(self, sample, dims=[1, 2, 3]): 63 | if self.deterministic: 64 | return torch.Tensor([0.0]) 65 | logtwopi = np.log(2.0 * np.pi) 66 | return 0.5 * torch.sum( 67 | logtwopi + self.logvar + torch.pow(sample - self.mean, 2) / self.var, 68 | dim=dims, 69 | ) 70 | 71 | def mode(self): 72 | return self.mean 73 | 74 | 75 | def normal_kl(mean1, logvar1, mean2, logvar2): 76 | """ 77 | source: https://github.com/openai/guided-diffusion/blob/27c20a8fab9cb472df5d6bdd6c8d11c8f430b924/guided_diffusion/losses.py#L12 78 | Compute the KL divergence between two gaussians. 79 | Shapes are automatically broadcasted, so batches can be compared to 80 | scalars, among other use cases. 81 | """ 82 | tensor = None 83 | for obj in (mean1, logvar1, mean2, logvar2): 84 | if isinstance(obj, torch.Tensor): 85 | tensor = obj 86 | break 87 | assert tensor is not None, "at least one argument must be a Tensor" 88 | 89 | # Force variances to be Tensors. Broadcasting helps convert scalars to 90 | # Tensors, but it does not work for torch.exp(). 91 | logvar1, logvar2 = [ 92 | x if isinstance(x, torch.Tensor) else torch.tensor(x).to(tensor) 93 | for x in (logvar1, logvar2) 94 | ] 95 | 96 | return 0.5 * ( 97 | -1.0 98 | + logvar2 99 | - logvar1 100 | + torch.exp(logvar1 - logvar2) 101 | + ((mean1 - mean2) ** 2) * torch.exp(-logvar2) 102 | ) 103 | -------------------------------------------------------------------------------- /extern/ldm_zero123/modules/ema.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | 4 | 5 | class LitEma(nn.Module): 6 | def __init__(self, model, decay=0.9999, use_num_upates=True): 7 | super().__init__() 8 | if decay < 0.0 or decay > 1.0: 9 | raise ValueError("Decay must be between 0 and 1") 10 | 11 | self.m_name2s_name = {} 12 | self.register_buffer("decay", torch.tensor(decay, dtype=torch.float32)) 13 | self.register_buffer( 14 | "num_updates", 15 | torch.tensor(0, dtype=torch.int) 16 | if use_num_upates 17 | else torch.tensor(-1, dtype=torch.int), 18 | ) 19 | 20 | for name, p in model.named_parameters(): 21 | if p.requires_grad: 22 | # remove as '.'-character is not allowed in buffers 23 | s_name = name.replace(".", "") 24 | self.m_name2s_name.update({name: s_name}) 25 | self.register_buffer(s_name, p.clone().detach().data) 26 | 27 | self.collected_params = [] 28 | 29 | def forward(self, model): 30 | decay = self.decay 31 | 32 | if self.num_updates >= 0: 33 | self.num_updates += 1 34 | decay = min(self.decay, (1 + self.num_updates) / (10 + self.num_updates)) 35 | 36 | one_minus_decay = 1.0 - decay 37 | 38 | with torch.no_grad(): 39 | m_param = dict(model.named_parameters()) 40 | shadow_params = dict(self.named_buffers()) 41 | 42 | for key in m_param: 43 | if m_param[key].requires_grad: 44 | sname = self.m_name2s_name[key] 45 | shadow_params[sname] = shadow_params[sname].type_as(m_param[key]) 46 | shadow_params[sname].sub_( 47 | one_minus_decay * (shadow_params[sname] - m_param[key]) 48 | ) 49 | else: 50 | assert not key in self.m_name2s_name 51 | 52 | def copy_to(self, model): 53 | m_param = dict(model.named_parameters()) 54 | shadow_params = dict(self.named_buffers()) 55 | for key in m_param: 56 | if m_param[key].requires_grad: 57 | m_param[key].data.copy_(shadow_params[self.m_name2s_name[key]].data) 58 | else: 59 | assert not key in self.m_name2s_name 60 | 61 | def store(self, parameters): 62 | """ 63 | Save the current parameters for restoring later. 64 | Args: 65 | parameters: Iterable of `torch.nn.Parameter`; the parameters to be 66 | temporarily stored. 67 | """ 68 | self.collected_params = [param.clone() for param in parameters] 69 | 70 | def restore(self, parameters): 71 | """ 72 | Restore the parameters stored with the `store` method. 73 | Useful to validate the model with EMA parameters without affecting the 74 | original optimization process. Store the parameters before the 75 | `copy_to` method. After validation (or model saving), use this to 76 | restore the former parameters. 77 | Args: 78 | parameters: Iterable of `torch.nn.Parameter`; the parameters to be 79 | updated with the stored parameters. 80 | """ 81 | for c_param, param in zip(self.collected_params, parameters): 82 | param.data.copy_(c_param.data) 83 | -------------------------------------------------------------------------------- /extern/ldm_zero123/modules/encoders/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/interactive-3d/interactive3d/11519c0d8a489cf19efa1ec3b8fa0a058e7ed45a/extern/ldm_zero123/modules/encoders/__init__.py -------------------------------------------------------------------------------- /extern/ldm_zero123/modules/image_degradation/__init__.py: -------------------------------------------------------------------------------- 1 | from extern.ldm_zero123.modules.image_degradation.bsrgan import ( 2 | degradation_bsrgan_variant as degradation_fn_bsr, 3 | ) 4 | from extern.ldm_zero123.modules.image_degradation.bsrgan_light import ( 5 | degradation_bsrgan_variant as degradation_fn_bsr_light, 6 | ) 7 | -------------------------------------------------------------------------------- /extern/ldm_zero123/modules/losses/__init__.py: -------------------------------------------------------------------------------- 1 | from extern.ldm_zero123.modules.losses.contperceptual import LPIPSWithDiscriminator 2 | -------------------------------------------------------------------------------- /extern/ldm_zero123/thirdp/psp/id_loss.py: -------------------------------------------------------------------------------- 1 | # https://github.com/eladrich/pixel2style2pixel 2 | import torch 3 | from torch import nn 4 | 5 | from extern.ldm_zero123.thirdp.psp.model_irse import Backbone 6 | 7 | 8 | class IDFeatures(nn.Module): 9 | def __init__(self, model_path): 10 | super(IDFeatures, self).__init__() 11 | print("Loading ResNet ArcFace") 12 | self.facenet = Backbone( 13 | input_size=112, num_layers=50, drop_ratio=0.6, mode="ir_se" 14 | ) 15 | self.facenet.load_state_dict(torch.load(model_path, map_location="cpu")) 16 | self.face_pool = torch.nn.AdaptiveAvgPool2d((112, 112)) 17 | self.facenet.eval() 18 | 19 | def forward(self, x, crop=False): 20 | # Not sure of the image range here 21 | if crop: 22 | x = torch.nn.functional.interpolate(x, (256, 256), mode="area") 23 | x = x[:, :, 35:223, 32:220] 24 | x = self.face_pool(x) 25 | x_feats = self.facenet(x) 26 | return x_feats 27 | -------------------------------------------------------------------------------- /extern/ldm_zero123/thirdp/psp/model_irse.py: -------------------------------------------------------------------------------- 1 | # https://github.com/eladrich/pixel2style2pixel 2 | 3 | from torch.nn import ( 4 | BatchNorm1d, 5 | BatchNorm2d, 6 | Conv2d, 7 | Dropout, 8 | Linear, 9 | Module, 10 | PReLU, 11 | Sequential, 12 | ) 13 | 14 | from extern.ldm_zero123.thirdp.psp.helpers import ( 15 | Flatten, 16 | bottleneck_IR, 17 | bottleneck_IR_SE, 18 | get_blocks, 19 | l2_norm, 20 | ) 21 | 22 | """ 23 | Modified Backbone implementation from [TreB1eN](https://github.com/TreB1eN/InsightFace_Pytorch) 24 | """ 25 | 26 | 27 | class Backbone(Module): 28 | def __init__(self, input_size, num_layers, mode="ir", drop_ratio=0.4, affine=True): 29 | super(Backbone, self).__init__() 30 | assert input_size in [112, 224], "input_size should be 112 or 224" 31 | assert num_layers in [50, 100, 152], "num_layers should be 50, 100 or 152" 32 | assert mode in ["ir", "ir_se"], "mode should be ir or ir_se" 33 | blocks = get_blocks(num_layers) 34 | if mode == "ir": 35 | unit_module = bottleneck_IR 36 | elif mode == "ir_se": 37 | unit_module = bottleneck_IR_SE 38 | self.input_layer = Sequential( 39 | Conv2d(3, 64, (3, 3), 1, 1, bias=False), BatchNorm2d(64), PReLU(64) 40 | ) 41 | if input_size == 112: 42 | self.output_layer = Sequential( 43 | BatchNorm2d(512), 44 | Dropout(drop_ratio), 45 | Flatten(), 46 | Linear(512 * 7 * 7, 512), 47 | BatchNorm1d(512, affine=affine), 48 | ) 49 | else: 50 | self.output_layer = Sequential( 51 | BatchNorm2d(512), 52 | Dropout(drop_ratio), 53 | Flatten(), 54 | Linear(512 * 14 * 14, 512), 55 | BatchNorm1d(512, affine=affine), 56 | ) 57 | 58 | modules = [] 59 | for block in blocks: 60 | for bottleneck in block: 61 | modules.append( 62 | unit_module( 63 | bottleneck.in_channel, bottleneck.depth, bottleneck.stride 64 | ) 65 | ) 66 | self.body = Sequential(*modules) 67 | 68 | def forward(self, x): 69 | x = self.input_layer(x) 70 | x = self.body(x) 71 | x = self.output_layer(x) 72 | return l2_norm(x) 73 | 74 | 75 | def IR_50(input_size): 76 | """Constructs a ir-50 model.""" 77 | model = Backbone(input_size, num_layers=50, mode="ir", drop_ratio=0.4, affine=False) 78 | return model 79 | 80 | 81 | def IR_101(input_size): 82 | """Constructs a ir-101 model.""" 83 | model = Backbone( 84 | input_size, num_layers=100, mode="ir", drop_ratio=0.4, affine=False 85 | ) 86 | return model 87 | 88 | 89 | def IR_152(input_size): 90 | """Constructs a ir-152 model.""" 91 | model = Backbone( 92 | input_size, num_layers=152, mode="ir", drop_ratio=0.4, affine=False 93 | ) 94 | return model 95 | 96 | 97 | def IR_SE_50(input_size): 98 | """Constructs a ir_se-50 model.""" 99 | model = Backbone( 100 | input_size, num_layers=50, mode="ir_se", drop_ratio=0.4, affine=False 101 | ) 102 | return model 103 | 104 | 105 | def IR_SE_101(input_size): 106 | """Constructs a ir_se-101 model.""" 107 | model = Backbone( 108 | input_size, num_layers=100, mode="ir_se", drop_ratio=0.4, affine=False 109 | ) 110 | return model 111 | 112 | 113 | def IR_SE_152(input_size): 114 | """Constructs a ir_se-152 model.""" 115 | model = Backbone( 116 | input_size, num_layers=152, mode="ir_se", drop_ratio=0.4, affine=False 117 | ) 118 | return model 119 | -------------------------------------------------------------------------------- /keyboard.py: -------------------------------------------------------------------------------- 1 | import curses 2 | 3 | def interactive_mode(stdscr): 4 | a = 0 5 | b = 0 6 | 7 | curses.noecho() 8 | curses.cbreak() 9 | stdscr.keypad(1) 10 | 11 | stdscr.addstr(0, 10, "Press 'a', 'b' or 'q'...") 12 | stdscr.refresh() 13 | 14 | while True: 15 | key = stdscr.getch() 16 | if key == ord('a'): 17 | a += 1 18 | stdscr.addstr(1, 10, f"Variable 'a' incremented. Value: {a} ") # Added spaces to clear previous message 19 | stdscr.refresh() 20 | elif key == ord('b'): 21 | b += 1 22 | stdscr.addstr(1, 10, f"Variable 'b' incremented. Value: {b} ") 23 | stdscr.refresh() 24 | elif key == ord('q'): 25 | stdscr.addstr(1, 10, "Exiting interactive mode. ") 26 | stdscr.refresh() 27 | break 28 | else: 29 | stdscr.addstr(1, 10, f"Unknown key: {chr(key)}. Press 'a', 'b' or 'q'... ") 30 | stdscr.refresh() 31 | 32 | # Run the function 33 | curses.wrapper(interactive_mode) 34 | -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- 1 | black 2 | mypy 3 | pylint 4 | pre-commit 5 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | torch>=2.0.1 2 | boto3 3 | lightning==2.0.0 4 | omegaconf==2.3.0 5 | jaxtyping 6 | typeguard 7 | git+https://github.com/KAIR-BAIR/nerfacc.git@v0.5.2 8 | git+https://github.com/NVlabs/tiny-cuda-nn/#subdirectory=bindings/torch 9 | diffusers 10 | transformers 11 | accelerate 12 | opencv-python 13 | tensorboard 14 | matplotlib 15 | imageio>=2.28.0 16 | imageio[ffmpeg] 17 | git+https://github.com/NVlabs/nvdiffrast.git 18 | libigl 19 | xatlas 20 | trimesh[easy] 21 | networkx 22 | pysdf 23 | PyMCubes 24 | wandb 25 | gradio 26 | git+https://github.com/ashawkey/envlight.git 27 | torchmetrics 28 | 29 | # deepfloyd 30 | xformers 31 | bitsandbytes 32 | sentencepiece 33 | safetensors 34 | huggingface_hub 35 | 36 | # for zero123 37 | einops 38 | kornia 39 | taming-transformers-rom1504 40 | git+https://github.com/openai/CLIP.git 41 | 42 | #controlnet 43 | controlnet_aux 44 | 45 | # mvdream 46 | open-clip-torch==2.7.0 47 | git+https://github.com/bytedance/MVDream 48 | 49 | git+https://github.com/graphdeco-inria/diff-gaussian-rasterization.git -------------------------------------------------------------------------------- /threestudio/__init__.py: -------------------------------------------------------------------------------- 1 | __modules__ = {} 2 | 3 | 4 | def register(name): 5 | def decorator(cls): 6 | __modules__[name] = cls 7 | return cls 8 | 9 | return decorator 10 | 11 | 12 | def find(name): 13 | return __modules__[name] 14 | 15 | 16 | ### grammar sugar for logging utilities ### 17 | import logging 18 | 19 | logger = logging.getLogger("pytorch_lightning") 20 | 21 | from pytorch_lightning.utilities.rank_zero import ( 22 | rank_zero_debug, 23 | rank_zero_info, 24 | rank_zero_only, 25 | ) 26 | 27 | debug = rank_zero_debug 28 | info = rank_zero_info 29 | 30 | 31 | @rank_zero_only 32 | def warn(*args, **kwargs): 33 | logger.warn(*args, **kwargs) 34 | 35 | 36 | from . import data, models, systems 37 | -------------------------------------------------------------------------------- /threestudio/data/__init__.py: -------------------------------------------------------------------------------- 1 | from . import co3d, image, multiview, uncond, edit_multiview, edit_multiview_gs 2 | -------------------------------------------------------------------------------- /threestudio/models/__init__.py: -------------------------------------------------------------------------------- 1 | from . import ( 2 | background, 3 | exporters, 4 | geometry, 5 | guidance, 6 | materials, 7 | prompt_processors, 8 | renderers, 9 | ) 10 | -------------------------------------------------------------------------------- /threestudio/models/background/__init__.py: -------------------------------------------------------------------------------- 1 | from . import ( 2 | base, 3 | neural_environment_map_background, 4 | solid_color_background, 5 | textured_background, 6 | ) 7 | -------------------------------------------------------------------------------- /threestudio/models/background/base.py: -------------------------------------------------------------------------------- 1 | import random 2 | from dataclasses import dataclass, field 3 | 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | 8 | import threestudio 9 | from threestudio.utils.base import BaseModule 10 | from threestudio.utils.typing import * 11 | 12 | 13 | class BaseBackground(BaseModule): 14 | @dataclass 15 | class Config(BaseModule.Config): 16 | pass 17 | 18 | cfg: Config 19 | 20 | def configure(self): 21 | pass 22 | 23 | def forward(self, dirs: Float[Tensor, "B H W 3"]) -> Float[Tensor, "B H W Nc"]: 24 | raise NotImplementedError 25 | -------------------------------------------------------------------------------- /threestudio/models/background/neural_environment_map_background.py: -------------------------------------------------------------------------------- 1 | import random 2 | from dataclasses import dataclass, field 3 | 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | 8 | import threestudio 9 | from threestudio.models.background.base import BaseBackground 10 | from threestudio.models.networks import get_encoding, get_mlp 11 | from threestudio.utils.ops import get_activation 12 | from threestudio.utils.typing import * 13 | 14 | 15 | @threestudio.register("neural-environment-map-background") 16 | class NeuralEnvironmentMapBackground(BaseBackground): 17 | @dataclass 18 | class Config(BaseBackground.Config): 19 | n_output_dims: int = 3 20 | color_activation: str = "sigmoid" 21 | dir_encoding_config: dict = field( 22 | default_factory=lambda: {"otype": "SphericalHarmonics", "degree": 3} 23 | ) 24 | mlp_network_config: dict = field( 25 | default_factory=lambda: { 26 | "otype": "VanillaMLP", 27 | "activation": "ReLU", 28 | "n_neurons": 16, 29 | "n_hidden_layers": 2, 30 | } 31 | ) 32 | random_aug: bool = False 33 | random_aug_prob: float = 0.5 34 | share_aug_bg: bool = False 35 | eval_color: Optional[Tuple[float, float, float]] = None 36 | 37 | cfg: Config 38 | 39 | def configure(self) -> None: 40 | self.encoding = get_encoding(3, self.cfg.dir_encoding_config) 41 | self.network = get_mlp( 42 | self.encoding.n_output_dims, 43 | self.cfg.n_output_dims, 44 | self.cfg.mlp_network_config, 45 | ) 46 | self.cur_bg = None 47 | 48 | def forward(self, dirs: Float[Tensor, "B H W 3"]) -> Float[Tensor, "B H W Nc"]: 49 | if not self.training and self.cfg.eval_color is not None: 50 | return torch.ones(*dirs.shape[:-1], self.cfg.n_output_dims).to( 51 | dirs 52 | ) * torch.as_tensor(self.cfg.eval_color).to(dirs) 53 | # viewdirs must be normalized before passing to this function 54 | dirs = (dirs + 1.0) / 2.0 # (-1, 1) => (0, 1) 55 | dirs_embd = self.encoding(dirs.view(-1, 3)) 56 | color = self.network(dirs_embd).view(*dirs.shape[:-1], self.cfg.n_output_dims) 57 | color = get_activation(self.cfg.color_activation)(color) 58 | if ( 59 | self.training 60 | and self.cfg.random_aug 61 | and random.random() < self.cfg.random_aug_prob 62 | ): 63 | # use random background color with probability random_aug_prob 64 | n_color = 1 if self.cfg.share_aug_bg else dirs.shape[0] 65 | color = color * 0 + ( # prevent checking for unused parameters in DDP 66 | torch.rand(n_color, 1, 1, self.cfg.n_output_dims) 67 | .to(dirs) 68 | .expand(*dirs.shape[:-1], -1) 69 | ) 70 | self.cur_bg = color 71 | return color 72 | -------------------------------------------------------------------------------- /threestudio/models/background/solid_color_background.py: -------------------------------------------------------------------------------- 1 | import random 2 | from dataclasses import dataclass, field 3 | 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | 8 | import threestudio 9 | from threestudio.models.background.base import BaseBackground 10 | from threestudio.utils.typing import * 11 | 12 | 13 | @threestudio.register("solid-color-background") 14 | class SolidColorBackground(BaseBackground): 15 | @dataclass 16 | class Config(BaseBackground.Config): 17 | n_output_dims: int = 3 18 | color: Tuple = (1.0, 1.0, 1.0) 19 | learned: bool = False 20 | random_aug: bool = False 21 | random_aug_prob: float = 0.5 22 | 23 | cfg: Config 24 | 25 | def configure(self) -> None: 26 | self.env_color: Float[Tensor, "Nc"] 27 | if self.cfg.learned: 28 | self.env_color = nn.Parameter( 29 | torch.as_tensor(self.cfg.color, dtype=torch.float32) 30 | ) 31 | else: 32 | self.register_buffer( 33 | "env_color", torch.as_tensor(self.cfg.color, dtype=torch.float32) 34 | ) 35 | 36 | def forward(self, dirs: Float[Tensor, "B H W 3"]) -> Float[Tensor, "B H W Nc"]: 37 | color = ( 38 | torch.ones(*dirs.shape[:-1], self.cfg.n_output_dims).to(dirs) 39 | * self.env_color 40 | ) 41 | if ( 42 | self.training 43 | and self.cfg.random_aug 44 | and random.random() < self.cfg.random_aug_prob 45 | ): 46 | # use random background color with probability random_aug_prob 47 | color = color * 0 + ( # prevent checking for unused parameters in DDP 48 | torch.rand(dirs.shape[0], 1, 1, self.cfg.n_output_dims) 49 | .to(dirs) 50 | .expand(*dirs.shape[:-1], -1) 51 | ) 52 | return color 53 | -------------------------------------------------------------------------------- /threestudio/models/background/textured_background.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass, field 2 | 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | 7 | import threestudio 8 | from threestudio.models.background.base import BaseBackground 9 | from threestudio.utils.ops import get_activation 10 | from threestudio.utils.typing import * 11 | 12 | 13 | @threestudio.register("textured-background") 14 | class TexturedBackground(BaseBackground): 15 | @dataclass 16 | class Config(BaseBackground.Config): 17 | n_output_dims: int = 3 18 | height: int = 64 19 | width: int = 64 20 | color_activation: str = "sigmoid" 21 | 22 | cfg: Config 23 | 24 | def configure(self) -> None: 25 | self.texture = nn.Parameter( 26 | torch.randn((1, self.cfg.n_output_dims, self.cfg.height, self.cfg.width)) 27 | ) 28 | 29 | def spherical_xyz_to_uv(self, dirs: Float[Tensor, "*B 3"]) -> Float[Tensor, "*B 2"]: 30 | x, y, z = dirs[..., 0], dirs[..., 1], dirs[..., 2] 31 | xy = (x**2 + y**2) ** 0.5 32 | u = torch.atan2(xy, z) / torch.pi 33 | v = torch.atan2(y, x) / (torch.pi * 2) + 0.5 34 | uv = torch.stack([u, v], -1) 35 | return uv 36 | 37 | def forward(self, dirs: Float[Tensor, "*B 3"]) -> Float[Tensor, "*B Nc"]: 38 | dirs_shape = dirs.shape[:-1] 39 | uv = self.spherical_xyz_to_uv(dirs.reshape(-1, dirs.shape[-1])) 40 | uv = 2 * uv - 1 # rescale to [-1, 1] for grid_sample 41 | uv = uv.reshape(1, -1, 1, 2) 42 | color = ( 43 | F.grid_sample( 44 | self.texture, 45 | uv, 46 | mode="bilinear", 47 | padding_mode="reflection", 48 | align_corners=False, 49 | ) 50 | .reshape(self.cfg.n_output_dims, -1) 51 | .T.reshape(*dirs_shape, self.cfg.n_output_dims) 52 | ) 53 | color = get_activation(self.cfg.color_activation)(color) 54 | return color 55 | -------------------------------------------------------------------------------- /threestudio/models/exporters/__init__.py: -------------------------------------------------------------------------------- 1 | from . import base, mesh_exporter 2 | -------------------------------------------------------------------------------- /threestudio/models/exporters/base.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | 3 | import threestudio 4 | from threestudio.models.background.base import BaseBackground 5 | from threestudio.models.geometry.base import BaseImplicitGeometry 6 | from threestudio.models.materials.base import BaseMaterial 7 | from threestudio.utils.base import BaseObject 8 | from threestudio.utils.typing import * 9 | 10 | 11 | @dataclass 12 | class ExporterOutput: 13 | save_name: str 14 | save_type: str 15 | params: Dict[str, Any] 16 | 17 | 18 | class Exporter(BaseObject): 19 | @dataclass 20 | class Config(BaseObject.Config): 21 | save_video: bool = False 22 | 23 | cfg: Config 24 | 25 | def configure( 26 | self, 27 | geometry: BaseImplicitGeometry, 28 | material: BaseMaterial, 29 | background: BaseBackground, 30 | ) -> None: 31 | @dataclass 32 | class SubModules: 33 | geometry: BaseImplicitGeometry 34 | material: BaseMaterial 35 | background: BaseBackground 36 | 37 | self.sub_modules = SubModules(geometry, material, background) 38 | 39 | @property 40 | def geometry(self) -> BaseImplicitGeometry: 41 | return self.sub_modules.geometry 42 | 43 | @property 44 | def material(self) -> BaseMaterial: 45 | return self.sub_modules.material 46 | 47 | @property 48 | def background(self) -> BaseBackground: 49 | return self.sub_modules.background 50 | 51 | def __call__(self, *args, **kwargs) -> List[ExporterOutput]: 52 | raise NotImplementedError 53 | 54 | 55 | @threestudio.register("dummy-exporter") 56 | class DummyExporter(Exporter): 57 | def __call__(self, *args, **kwargs) -> List[ExporterOutput]: 58 | # DummyExporter does not export anything 59 | return [] 60 | -------------------------------------------------------------------------------- /threestudio/models/geometry/__init__.py: -------------------------------------------------------------------------------- 1 | from . import base, implicit_sdf, implicit_volume, tetrahedra_sdf_grid, volume_grid, gaussian, implicit_volume_edit 2 | -------------------------------------------------------------------------------- /threestudio/models/guidance/__init__.py: -------------------------------------------------------------------------------- 1 | from . import ( 2 | controlnet_guidance, 3 | deep_floyd_guidance, 4 | instructpix2pix_guidance, 5 | stable_diffusion_guidance, 6 | stable_diffusion_vsd_guidance, 7 | zero123_guidance, 8 | multiview_diffusion_guidance, 9 | stable_diffusion_unified_guidance, 10 | # zero123_unified_guidance, 11 | deep_floyd_guidance_stage2, 12 | pixart_guidance, 13 | ) 14 | -------------------------------------------------------------------------------- /threestudio/models/materials/__init__.py: -------------------------------------------------------------------------------- 1 | from . import ( 2 | base, 3 | diffuse_with_point_light_material, 4 | hybrid_rgb_latent_material, 5 | neural_radiance_material, 6 | no_material, 7 | pbr_material, 8 | sd_latent_adapter_material, 9 | ) 10 | -------------------------------------------------------------------------------- /threestudio/models/materials/base.py: -------------------------------------------------------------------------------- 1 | import random 2 | from dataclasses import dataclass, field 3 | 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | 8 | import threestudio 9 | from threestudio.utils.base import BaseModule 10 | from threestudio.utils.typing import * 11 | 12 | 13 | class BaseMaterial(BaseModule): 14 | @dataclass 15 | class Config(BaseModule.Config): 16 | pass 17 | 18 | cfg: Config 19 | requires_normal: bool = False 20 | requires_tangent: bool = False 21 | 22 | def configure(self): 23 | pass 24 | 25 | def forward(self, *args, **kwargs) -> Float[Tensor, "*B 3"]: 26 | raise NotImplementedError 27 | 28 | def export(self, *args, **kwargs) -> Dict[str, Any]: 29 | return {} 30 | -------------------------------------------------------------------------------- /threestudio/models/materials/hybrid_rgb_latent_material.py: -------------------------------------------------------------------------------- 1 | import random 2 | from dataclasses import dataclass, field 3 | 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | 8 | import threestudio 9 | from threestudio.models.materials.base import BaseMaterial 10 | from threestudio.models.networks import get_encoding, get_mlp 11 | from threestudio.utils.ops import dot, get_activation 12 | from threestudio.utils.typing import * 13 | 14 | 15 | @threestudio.register("hybrid-rgb-latent-material") 16 | class HybridRGBLatentMaterial(BaseMaterial): 17 | @dataclass 18 | class Config(BaseMaterial.Config): 19 | n_output_dims: int = 3 20 | color_activation: str = "sigmoid" 21 | requires_normal: bool = True 22 | 23 | cfg: Config 24 | 25 | def configure(self) -> None: 26 | self.requires_normal = self.cfg.requires_normal 27 | 28 | def forward( 29 | self, features: Float[Tensor, "B ... Nf"], **kwargs 30 | ) -> Float[Tensor, "B ... Nc"]: 31 | assert ( 32 | features.shape[-1] == self.cfg.n_output_dims 33 | ), f"Expected {self.cfg.n_output_dims} output dims, only got {features.shape[-1]} dims input." 34 | color = features 35 | color[..., :3] = get_activation(self.cfg.color_activation)(color[..., :3]) 36 | return color 37 | -------------------------------------------------------------------------------- /threestudio/models/materials/neural_radiance_material.py: -------------------------------------------------------------------------------- 1 | import random 2 | from dataclasses import dataclass, field 3 | 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | 8 | import threestudio 9 | from threestudio.models.materials.base import BaseMaterial 10 | from threestudio.models.networks import get_encoding, get_mlp 11 | from threestudio.utils.ops import dot, get_activation 12 | from threestudio.utils.typing import * 13 | 14 | 15 | @threestudio.register("neural-radiance-material") 16 | class NeuralRadianceMaterial(BaseMaterial): 17 | @dataclass 18 | class Config(BaseMaterial.Config): 19 | input_feature_dims: int = 8 20 | color_activation: str = "sigmoid" 21 | dir_encoding_config: dict = field( 22 | default_factory=lambda: {"otype": "SphericalHarmonics", "degree": 3} 23 | ) 24 | mlp_network_config: dict = field( 25 | default_factory=lambda: { 26 | "otype": "FullyFusedMLP", 27 | "activation": "ReLU", 28 | "n_neurons": 16, 29 | "n_hidden_layers": 2, 30 | } 31 | ) 32 | 33 | cfg: Config 34 | 35 | def configure(self) -> None: 36 | self.encoding = get_encoding(3, self.cfg.dir_encoding_config) 37 | self.n_input_dims = self.cfg.input_feature_dims + self.encoding.n_output_dims # type: ignore 38 | self.network = get_mlp(self.n_input_dims, 3, self.cfg.mlp_network_config) 39 | 40 | def forward( 41 | self, 42 | features: Float[Tensor, "*B Nf"], 43 | viewdirs: Float[Tensor, "*B 3"], 44 | **kwargs, 45 | ) -> Float[Tensor, "*B 3"]: 46 | # viewdirs and normals must be normalized before passing to this function 47 | viewdirs = (viewdirs + 1.0) / 2.0 # (-1, 1) => (0, 1) 48 | viewdirs_embd = self.encoding(viewdirs.view(-1, 3)) 49 | network_inp = torch.cat( 50 | [features.view(-1, features.shape[-1]), viewdirs_embd], dim=-1 51 | ) 52 | color = self.network(network_inp).view(*features.shape[:-1], 3) 53 | color = get_activation(self.cfg.color_activation)(color) 54 | return color 55 | -------------------------------------------------------------------------------- /threestudio/models/materials/no_material.py: -------------------------------------------------------------------------------- 1 | import random 2 | from dataclasses import dataclass, field 3 | 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | 8 | import threestudio 9 | from threestudio.models.materials.base import BaseMaterial 10 | from threestudio.models.networks import get_encoding, get_mlp 11 | from threestudio.utils.ops import dot, get_activation 12 | from threestudio.utils.typing import * 13 | 14 | 15 | @threestudio.register("no-material") 16 | class NoMaterial(BaseMaterial): 17 | @dataclass 18 | class Config(BaseMaterial.Config): 19 | n_output_dims: int = 3 20 | color_activation: str = "sigmoid" 21 | input_feature_dims: Optional[int] = None 22 | mlp_network_config: Optional[dict] = None 23 | requires_normal: bool = False 24 | 25 | cfg: Config 26 | 27 | def configure(self) -> None: 28 | self.use_network = False 29 | if ( 30 | self.cfg.input_feature_dims is not None 31 | and self.cfg.mlp_network_config is not None 32 | ): 33 | self.network = get_mlp( 34 | self.cfg.input_feature_dims, 35 | self.cfg.n_output_dims, 36 | self.cfg.mlp_network_config, 37 | ) 38 | self.use_network = True 39 | self.requires_normal = self.cfg.requires_normal 40 | 41 | def forward( 42 | self, features: Float[Tensor, "B ... Nf"], **kwargs 43 | ) -> Float[Tensor, "B ... Nc"]: 44 | if not self.use_network: 45 | assert ( 46 | features.shape[-1] == self.cfg.n_output_dims 47 | ), f"Expected {self.cfg.n_output_dims} output dims, only got {features.shape[-1]} dims input." 48 | color = get_activation(self.cfg.color_activation)(features) 49 | else: 50 | color = self.network(features.view(-1, features.shape[-1])).view( 51 | *features.shape[:-1], self.cfg.n_output_dims 52 | ) 53 | color = get_activation(self.cfg.color_activation)(color) 54 | return color 55 | 56 | def export(self, features: Float[Tensor, "*N Nf"], **kwargs) -> Dict[str, Any]: 57 | color = self(features, **kwargs).clamp(0, 1) 58 | assert color.shape[-1] >= 3, "Output color must have at least 3 channels" 59 | if color.shape[-1] > 3: 60 | threestudio.warn( 61 | "Output color has >3 channels, treating the first 3 as RGB" 62 | ) 63 | return {"albedo": color[..., :3]} 64 | -------------------------------------------------------------------------------- /threestudio/models/materials/no_material_backup.py: -------------------------------------------------------------------------------- 1 | import random 2 | from dataclasses import dataclass, field 3 | 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | 8 | import threestudio 9 | from threestudio.models.materials.base import BaseMaterial 10 | from threestudio.models.networks import get_encoding, get_mlp 11 | from threestudio.utils.ops import dot, get_activation 12 | from threestudio.utils.typing import * 13 | 14 | 15 | @threestudio.register("no-material") 16 | class NoMaterial(BaseMaterial): 17 | @dataclass 18 | class Config(BaseMaterial.Config): 19 | n_output_dims: int = 3 20 | color_activation: str = "sigmoid" 21 | input_feature_dims: Optional[int] = None 22 | mlp_network_config: Optional[dict] = None 23 | 24 | cfg: Config 25 | 26 | def configure(self) -> None: 27 | self.use_network = False 28 | if ( 29 | self.cfg.input_feature_dims is not None 30 | and self.cfg.mlp_network_config is not None 31 | ): 32 | self.network = get_mlp( 33 | self.cfg.input_feature_dims, 34 | self.cfg.n_output_dims, 35 | self.cfg.mlp_network_config, 36 | ) 37 | self.use_network = True 38 | 39 | def forward( 40 | self, features: Float[Tensor, "B ... Nf"], **kwargs 41 | ) -> Float[Tensor, "B ... Nc"]: 42 | if not self.use_network: 43 | assert ( 44 | features.shape[-1] == self.cfg.n_output_dims 45 | ), f"Expected {self.cfg.n_output_dims} output dims, only got {features.shape[-1]} dims input." 46 | color = get_activation(self.cfg.color_activation)(features) 47 | else: 48 | color = self.network(features.view(-1, features.shape[-1])).view( 49 | *features.shape[:-1], self.cfg.n_output_dims 50 | ) 51 | color = get_activation(self.cfg.color_activation)(color) 52 | return color 53 | 54 | def export(self, features: Float[Tensor, "*N Nf"], **kwargs) -> Dict[str, Any]: 55 | color = self(features, **kwargs).clamp(0, 1) 56 | assert color.shape[-1] >= 3, "Output color must have at least 3 channels" 57 | if color.shape[-1] > 3: 58 | threestudio.warn( 59 | "Output color has >3 channels, treating the first 3 as RGB" 60 | ) 61 | return {"albedo": color[..., :3]} 62 | -------------------------------------------------------------------------------- /threestudio/models/materials/sd_latent_adapter_material.py: -------------------------------------------------------------------------------- 1 | import random 2 | from dataclasses import dataclass, field 3 | 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | 8 | import threestudio 9 | from threestudio.models.materials.base import BaseMaterial 10 | from threestudio.utils.typing import * 11 | 12 | 13 | @threestudio.register("sd-latent-adapter-material") 14 | class StableDiffusionLatentAdapterMaterial(BaseMaterial): 15 | @dataclass 16 | class Config(BaseMaterial.Config): 17 | pass 18 | 19 | cfg: Config 20 | 21 | def configure(self) -> None: 22 | adapter = nn.Parameter( 23 | torch.as_tensor( 24 | [ 25 | # R G B 26 | [0.298, 0.207, 0.208], # L1 27 | [0.187, 0.286, 0.173], # L2 28 | [-0.158, 0.189, 0.264], # L3 29 | [-0.184, -0.271, -0.473], # L4 30 | ] 31 | ) 32 | ) 33 | self.register_parameter("adapter", adapter) 34 | 35 | def forward( 36 | self, features: Float[Tensor, "B ... 4"], **kwargs 37 | ) -> Float[Tensor, "B ... 3"]: 38 | assert features.shape[-1] == 4 39 | color = features @ self.adapter 40 | color = (color + 1) / 2 41 | color = color.clamp(0.0, 1.0) 42 | return color 43 | -------------------------------------------------------------------------------- /threestudio/models/prompt_processors/__init__.py: -------------------------------------------------------------------------------- 1 | from . import ( 2 | base, 3 | deepfloyd_prompt_processor, 4 | dummy_prompt_processor, 5 | stable_diffusion_prompt_processor, 6 | pixart_prompt_processor, 7 | ) 8 | -------------------------------------------------------------------------------- /threestudio/models/prompt_processors/deepfloyd_prompt_processor.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | from dataclasses import dataclass 4 | 5 | import torch 6 | import torch.nn as nn 7 | from diffusers import IFPipeline 8 | from transformers import T5EncoderModel, T5Tokenizer 9 | 10 | import threestudio 11 | from threestudio.models.prompt_processors.base import PromptProcessor, hash_prompt 12 | from threestudio.utils.misc import cleanup 13 | from threestudio.utils.typing import * 14 | 15 | 16 | @threestudio.register("deep-floyd-prompt-processor") 17 | class DeepFloydPromptProcessor(PromptProcessor): 18 | @dataclass 19 | class Config(PromptProcessor.Config): 20 | pretrained_model_name_or_path: str = "DeepFloyd/IF-I-XL-v1.0" 21 | 22 | cfg: Config 23 | 24 | ### these functions are unused, kept for debugging ### 25 | def configure_text_encoder(self) -> None: 26 | os.environ["TOKENIZERS_PARALLELISM"] = "false" 27 | self.text_encoder = T5EncoderModel.from_pretrained( 28 | self.cfg.pretrained_model_name_or_path, 29 | subfolder="text_encoder", 30 | load_in_8bit=True, 31 | variant="8bit", 32 | device_map="auto", 33 | ) # FIXME: behavior of auto device map in multi-GPU training 34 | self.pipe = IFPipeline.from_pretrained( 35 | self.cfg.pretrained_model_name_or_path, 36 | text_encoder=self.text_encoder, # pass the previously instantiated 8bit text encoder 37 | unet=None, 38 | local_files_only=True 39 | ) 40 | 41 | def destroy_text_encoder(self) -> None: 42 | del self.text_encoder 43 | del self.pipe 44 | cleanup() 45 | 46 | def get_text_embeddings( 47 | self, prompt: Union[str, List[str]], negative_prompt: Union[str, List[str]] 48 | ) -> Tuple[Float[Tensor, "B 77 4096"], Float[Tensor, "B 77 4096"]]: 49 | text_embeddings, uncond_text_embeddings = self.pipe.encode_prompt( 50 | prompt=prompt, negative_prompt=negative_prompt, device=self.device 51 | ) 52 | return text_embeddings, uncond_text_embeddings 53 | 54 | ### 55 | 56 | @staticmethod 57 | def spawn_func(pretrained_model_name_or_path, prompts, cache_dir): 58 | max_length = 77 59 | tokenizer = T5Tokenizer.from_pretrained( 60 | pretrained_model_name_or_path, subfolder="tokenizer" 61 | ) 62 | print("==== pretrained_model_name_or_path ===", pretrained_model_name_or_path) 63 | text_encoder = T5EncoderModel.from_pretrained( 64 | pretrained_model_name_or_path, 65 | subfolder="text_encoder", 66 | torch_dtype=torch.float16, # suppress warning 67 | load_in_8bit=True, 68 | variant="8bit", 69 | device_map="auto", 70 | ) 71 | with torch.no_grad(): 72 | text_inputs = tokenizer( 73 | prompts, 74 | padding="max_length", 75 | max_length=max_length, 76 | truncation=True, 77 | add_special_tokens=True, 78 | return_tensors="pt", 79 | ) 80 | text_input_ids = text_inputs.input_ids 81 | attention_mask = text_inputs.attention_mask 82 | text_embeddings = text_encoder( 83 | text_input_ids, 84 | attention_mask=attention_mask, 85 | ) 86 | text_embeddings = text_embeddings[0] 87 | 88 | for prompt, embedding in zip(prompts, text_embeddings): 89 | torch.save( 90 | embedding, 91 | os.path.join( 92 | cache_dir, 93 | f"{hash_prompt(pretrained_model_name_or_path, prompt)}.pt", 94 | ), 95 | ) 96 | 97 | del text_encoder 98 | -------------------------------------------------------------------------------- /threestudio/models/prompt_processors/dummy_prompt_processor.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | from dataclasses import dataclass 4 | 5 | import threestudio 6 | from threestudio.models.prompt_processors.base import PromptProcessor, hash_prompt 7 | from threestudio.utils.misc import cleanup 8 | from threestudio.utils.typing import * 9 | 10 | 11 | @threestudio.register("dummy-prompt-processor") 12 | class DummyPromptProcessor(PromptProcessor): 13 | @dataclass 14 | class Config(PromptProcessor.Config): 15 | pretrained_model_name_or_path: str = "" 16 | prompt: str = "" 17 | 18 | cfg: Config 19 | -------------------------------------------------------------------------------- /threestudio/models/renderers/__init__.py: -------------------------------------------------------------------------------- 1 | from . import ( 2 | base, 3 | deferred_volume_renderer, 4 | gan_volume_renderer, 5 | nerf_volume_renderer, 6 | neus_volume_renderer, 7 | nvdiff_rasterizer, 8 | patch_renderer, 9 | diff_gaussian_rasterizer, 10 | gsgen_renderer, 11 | magic123_renderer, 12 | threestudio_renderer, 13 | ) 14 | -------------------------------------------------------------------------------- /threestudio/models/renderers/base.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | 3 | import nerfacc 4 | import torch 5 | import torch.nn.functional as F 6 | 7 | import threestudio 8 | from threestudio.models.background.base import BaseBackground 9 | from threestudio.models.geometry.base import BaseImplicitGeometry 10 | from threestudio.models.materials.base import BaseMaterial 11 | from threestudio.utils.base import BaseModule 12 | from threestudio.utils.typing import * 13 | 14 | 15 | class Renderer(BaseModule): 16 | @dataclass 17 | class Config(BaseModule.Config): 18 | radius: float = 1.0 19 | 20 | cfg: Config 21 | 22 | def configure( 23 | self, 24 | geometry: BaseImplicitGeometry, 25 | material: BaseMaterial, 26 | background: BaseBackground, 27 | ) -> None: 28 | # keep references to submodules using namedtuple, avoid being registered as modules 29 | @dataclass 30 | class SubModules: 31 | geometry: BaseImplicitGeometry 32 | material: BaseMaterial 33 | background: BaseBackground 34 | 35 | self.sub_modules = SubModules(geometry, material, background) 36 | 37 | # set up bounding box 38 | self.bbox: Float[Tensor, "2 3"] 39 | self.register_buffer( 40 | "bbox", 41 | torch.as_tensor( 42 | [ 43 | [-self.cfg.radius, -self.cfg.radius, -self.cfg.radius], 44 | [self.cfg.radius, self.cfg.radius, self.cfg.radius], 45 | ], 46 | dtype=torch.float32, 47 | ), 48 | ) 49 | 50 | def forward(self, *args, **kwargs) -> Dict[str, Any]: 51 | raise NotImplementedError 52 | 53 | @property 54 | def geometry(self) -> BaseImplicitGeometry: 55 | return self.sub_modules.geometry 56 | 57 | @property 58 | def material(self) -> BaseMaterial: 59 | return self.sub_modules.material 60 | 61 | @property 62 | def background(self) -> BaseBackground: 63 | return self.sub_modules.background 64 | 65 | def set_geometry(self, geometry: BaseImplicitGeometry) -> None: 66 | self.sub_modules.geometry = geometry 67 | 68 | def set_material(self, material: BaseMaterial) -> None: 69 | self.sub_modules.material = material 70 | 71 | def set_background(self, background: BaseBackground) -> None: 72 | self.sub_modules.background = background 73 | 74 | 75 | class VolumeRenderer(Renderer): 76 | pass 77 | 78 | 79 | class Rasterizer(Renderer): 80 | pass 81 | -------------------------------------------------------------------------------- /threestudio/models/renderers/deferred_volume_renderer.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | 3 | import torch 4 | import torch.nn.functional as F 5 | 6 | import threestudio 7 | from threestudio.models.renderers.base import VolumeRenderer 8 | 9 | 10 | class DeferredVolumeRenderer(VolumeRenderer): 11 | pass 12 | -------------------------------------------------------------------------------- /threestudio/models/renderers/gsgen_renderer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import sys 4 | import threestudio 5 | from threestudio.utils.typing import * 6 | from gs.gaussian_splatting import GaussianSplattingRenderer 7 | 8 | import numpy as np 9 | 10 | from torchvision.utils import save_image 11 | import pdb 12 | from omegaconf import OmegaConf 13 | 14 | @threestudio.register("gs-renderer") 15 | class GSRenderer(nn.Module): 16 | def __init__(self, ckpt=None): 17 | super(GSRenderer, self).__init__() 18 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 19 | if ckpt is None: 20 | ckpt = "path_to_your_ckpt.pt" 21 | ckpt = torch.load(ckpt, map_location="cpu") 22 | cfg = OmegaConf.create(ckpt["cfg"]) 23 | self.renderer = GaussianSplattingRenderer.load( 24 | cfg.renderer, ckpt["params"] 25 | ).to(device) 26 | cfg.type = 'fixed' 27 | cfg.color = [0.5, 0.5, 0.5] 28 | cfg.random_aug = False 29 | cfg.random_aug_prob = 0. 30 | self.renderer.setup_bg(cfg) 31 | print("===== [NOTE]: set gs bg color to 0.5 ====== ") 32 | self.cfg = cfg 33 | 34 | def forward( 35 | self, 36 | sampled_cameras, 37 | cam_id=None, 38 | gt_img=None, 39 | **kwargs 40 | ) -> Dict[str, Float[Tensor, "..."]]: 41 | with torch.cuda.amp.autocast(enabled=False): 42 | # rotate 43 | c2w = sampled_cameras['c2w'] 44 | c2w = torch.cat( 45 | [c2w, torch.zeros_like(c2w[:, :1])], dim=1 46 | ) 47 | c2w[:, 3, 3] = 1.0 48 | trans = torch.zeros_like(c2w).type(c2w.dtype) # b, 4, 4 49 | trans[:, 3, 3] = 1.0 50 | 51 | # trans[:, 0, 0] = -1. 52 | # trans[:, 1, 1] = -1. 53 | # trans[:, 2, 2] = 1. 54 | 55 | trans[:, 0, 0] = 0. 56 | trans[:, 0, 1] = -1. 57 | trans[:, 1, 0] = 1. 58 | trans[:, 1, 1] = 0. 59 | trans[:, 2, 2] = 1. 60 | 61 | c2w = torch.matmul(trans, c2w) 62 | sampled_cameras['c2w'] = c2w[:, :3].cuda() 63 | out = self.renderer(sampled_cameras, self.cfg.use_bg, self.cfg.rgb_only) 64 | comp_rgb = out["rgb"] 65 | # save_image(out['rgb'].permute(0, 3, 1, 2), f'debug_data/gsgen_gt.png') 66 | 67 | return {"comp_rgb": comp_rgb} -------------------------------------------------------------------------------- /threestudio/models/renderers/magic123_renderer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import sys 4 | import threestudio 5 | from threestudio.utils.typing import * 6 | 7 | from threestudio.utils.config import ExperimentConfig, load_config 8 | import numpy as np 9 | from torchvision.utils import save_image 10 | import pdb 11 | from omegaconf import OmegaConf 12 | from dataclasses import dataclass, field 13 | 14 | @threestudio.register("magic123-renderer") 15 | class Magic123Renderer(nn.Module): 16 | def __init__(self, ckpt=None): 17 | super(Magic123Renderer, self).__init__() 18 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 19 | ckpt = "path_to_your_checkpoint.ckpt" if ckpt is None else ckpt 20 | cfg_path = "parsed.yaml" 21 | # parse YAML config to OmegaConf 22 | cfg: ExperimentConfig 23 | # cfg = load_config(args.config, cli_args=extras, n_gpus=n_gpus) 24 | cfg = load_config(cfg_path) 25 | self.cfg = cfg.system 26 | ckpt = torch.load(ckpt, map_location="cpu") 27 | self.geometry = threestudio.find(self.cfg.geometry_type)(self.cfg.geometry) 28 | self.material = threestudio.find(self.cfg.material_type)(self.cfg.material) if self.cfg.material_type != 'none' else None 29 | self.background = threestudio.find(self.cfg.background_type)( 30 | self.cfg.background 31 | ) if self.cfg.background_type != 'none' else None 32 | self.renderer = threestudio.find(self.cfg.renderer_type)( 33 | self.cfg.renderer, 34 | geometry=self.geometry, 35 | material=self.material, 36 | background=self.background, 37 | ) 38 | ckpt['state_dict']['background.env_color'] = torch.tensor([0.5, 0.5, 0.5]) 39 | 40 | self.load_state_dict(ckpt['state_dict'], strict=True) 41 | self.geometry.encoding.encoding.disable_mask = True 42 | 43 | 44 | def forward( 45 | self, 46 | **kwargs 47 | ) -> Dict[str, Float[Tensor, "..."]]: 48 | # with torch.no_grad(): 49 | out = self.renderer(**kwargs) 50 | comp_rgb = out["comp_rgb"] 51 | save_image(out['comp_rgb'].permute(0, 3, 1, 2), f'debug_data/magic123_gt.png') 52 | 53 | return {"comp_rgb": comp_rgb} -------------------------------------------------------------------------------- /threestudio/models/renderers/threestudio_renderer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import sys 4 | import threestudio 5 | from threestudio.utils.typing import * 6 | from threestudio.utils.config import ExperimentConfig, load_config 7 | from torchvision.utils import save_image 8 | 9 | @threestudio.register("threestudio-renderer") 10 | class ThreestudioRenderer(nn.Module): 11 | def __init__(self, ckpt=None): 12 | super(ThreestudioRenderer, self).__init__() 13 | ckpt_root = 'path_to_your_ckpt_root' 14 | ckpt = ckpt_root + '/ckpts/last.ckpt' 15 | ckpt = torch.load(ckpt, map_location="cpu") 16 | 17 | cfg_path = ckpt_root + '/configs/parsed.yaml' 18 | cfg = load_config(cfg_path) 19 | self.cfg = cfg.system 20 | 21 | self.configure() 22 | 23 | self.load_state_dict(ckpt['state_dict'], strict=False) 24 | print(f"==== loaded state dict from {ckpt_root} ====") 25 | 26 | 27 | def configure(self) -> None: 28 | 29 | self.geometry = threestudio.find(self.cfg.geometry_type)(self.cfg.geometry) 30 | 31 | self.material = threestudio.find(self.cfg.material_type)(self.cfg.material) 32 | self.background = threestudio.find(self.cfg.background_type)( 33 | self.cfg.background 34 | ) 35 | self.renderer = threestudio.find(self.cfg.renderer_type)( 36 | self.cfg.renderer, 37 | geometry=self.geometry, 38 | material=self.material, 39 | background=self.background, 40 | ) 41 | 42 | def forward( 43 | self, 44 | bg_color=None, 45 | force_shading=None, 46 | **kwargs 47 | ) -> Dict[str, Float[Tensor, "..."]]: 48 | with torch.cuda.amp.autocast(enabled=False): 49 | out = self.renderer(bg_color=bg_color, force_shading=force_shading, **kwargs) 50 | comp_rgb = out["comp_rgb"] 51 | save_image(out['comp_rgb'].permute(0, 3, 1, 2), f'debug_data/three_gt.png') 52 | 53 | return {"comp_rgb": comp_rgb} -------------------------------------------------------------------------------- /threestudio/scripts/make_training_vid.py: -------------------------------------------------------------------------------- 1 | # make_training_vid("outputs/zero123/64_teddy_rgba.png@20230627-195615", frames_per_vid=30, fps=20, max_iters=200) 2 | import argparse 3 | import glob 4 | import os 5 | 6 | import imageio 7 | import numpy as np 8 | from PIL import Image, ImageDraw 9 | from tqdm import tqdm 10 | 11 | 12 | def draw_text_in_image(img, texts): 13 | img = Image.fromarray(img) 14 | draw = ImageDraw.Draw(img) 15 | black, white = (0, 0, 0), (255, 255, 255) 16 | for i, text in enumerate(texts): 17 | draw.text((2, (img.size[1] // len(texts)) * i + 1), f"{text}", white) 18 | draw.text((0, (img.size[1] // len(texts)) * i + 1), f"{text}", white) 19 | draw.text((2, (img.size[1] // len(texts)) * i - 1), f"{text}", white) 20 | draw.text((0, (img.size[1] // len(texts)) * i - 1), f"{text}", white) 21 | draw.text((1, (img.size[1] // len(texts)) * i), f"{text}", black) 22 | return np.asarray(img) 23 | 24 | 25 | def make_training_vid(exp, frames_per_vid=1, fps=3, max_iters=None, max_vids=None): 26 | # exp = "/admin/home-vikram/git/threestudio/outputs/zero123/64_teddy_rgba.png@20230627-195615" 27 | files = glob.glob(os.path.join(exp, "save", "*.mp4")) 28 | if os.path.join(exp, "save", "training_vid.mp4") in files: 29 | files.remove(os.path.join(exp, "save", "training_vid.mp4")) 30 | its = [int(os.path.basename(file).split("-")[0].split("it")[-1]) for file in files] 31 | it_sort = np.argsort(its) 32 | files = list(np.array(files)[it_sort]) 33 | its = list(np.array(its)[it_sort]) 34 | max_vids = max_iters // its[0] if max_iters is not None else max_vids 35 | files, its = files[:max_vids], its[:max_vids] 36 | frames, i = [], 0 37 | for it, file in tqdm(zip(its, files), total=len(files)): 38 | vid = imageio.mimread(file) 39 | for _ in range(frames_per_vid): 40 | frame = vid[i % len(vid)] 41 | frame = draw_text_in_image(frame, [str(it)]) 42 | frames.append(frame) 43 | i += 1 44 | # Save 45 | imageio.mimwrite(os.path.join(exp, "save", "training_vid.mp4"), frames, fps=fps) 46 | 47 | 48 | def join(file1, file2, name): 49 | # file1 = "/admin/home-vikram/git/threestudio/outputs/zero123/OLD_64_dragon2_rgba.png@20230629-023028/save/it200-val.mp4" 50 | # file2 = "/admin/home-vikram/git/threestudio/outputs/zero123/64_dragon2_rgba.png@20230628-152734/save/it200-val.mp4" 51 | vid1 = imageio.mimread(file1) 52 | vid2 = imageio.mimread(file2) 53 | frames = [] 54 | for f1, f2 in zip(vid1, vid2): 55 | frames.append( 56 | np.concatenate([f1[:, : f1.shape[0]], f2[:, : f2.shape[0]]], axis=1) 57 | ) 58 | imageio.mimwrite(name, frames) 59 | 60 | 61 | if __name__ == "__main__": 62 | parser = argparse.ArgumentParser() 63 | parser.add_argument("--exp", help="directory of experiment") 64 | parser.add_argument( 65 | "--frames_per_vid", type=int, default=1, help="# of frames from each val vid" 66 | ) 67 | parser.add_argument("--fps", type=int, help="max # of iters to save") 68 | parser.add_argument("--max_iters", type=int, help="max # of iters to save") 69 | parser.add_argument( 70 | "--max_vids", 71 | type=int, 72 | help="max # of val videos to save. Will be overridden by max_iters", 73 | ) 74 | args = parser.parse_args() 75 | make_training_vid( 76 | args.exp, args.frames_per_vid, args.fps, args.max_iters, args.max_vids 77 | ) 78 | -------------------------------------------------------------------------------- /threestudio/scripts/run_gaussian.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | 3 | prompt_list = [ 4 | "a delicious hamburger", 5 | "A DSLR photo of a roast turkey on a platter", 6 | "A high quality photo of a dragon", 7 | "A DSLR photo of a bald eagle", 8 | "A bunch of blue rose, highly detailed", 9 | "A 3D model of an adorable cottage with a thatched roof", 10 | "A high quality photo of a furry corgi", 11 | "A DSLR photo of a panda", 12 | "a DSLR photo of a cat lying on its side batting at a ball of yarn", 13 | "a beautiful dress made out of fruit, on a mannequin. Studio lighting, high quality, high resolution", 14 | "a DSLR photo of a corgi wearing a beret and holding a baguette, standing up on two hind legs", 15 | "a zoomed out DSLR photo of a stack of pancakes", 16 | "a zoomed out DSLR photo of a baby bunny sitting on top of a stack of pancakes", 17 | ] 18 | negative_prompt = "oversaturated color, ugly, tiling, low quality, noise, ugly pattern" 19 | 20 | gpu_id = 0 21 | max_steps = 10 22 | val_check = 1 23 | out_name = "gsgen_baseline" 24 | for prompt in prompt_list: 25 | print(f"Running model on device {gpu_id}: ", prompt) 26 | command = [ 27 | "python", "launch.py", 28 | "--config", "configs/gaussian_splatting.yaml", 29 | "--train", 30 | f"system.prompt_processor.prompt={prompt}", 31 | f"system.prompt_processor.negative_prompt={negative_prompt}", 32 | f"name={out_name}", 33 | "--gpu", f"{gpu_id}" 34 | ] 35 | subprocess.run(command) 36 | -------------------------------------------------------------------------------- /threestudio/scripts/run_zero123.sh: -------------------------------------------------------------------------------- 1 | NAME="dragon2" 2 | 3 | # Phase 1 - 64x64 4 | python launch.py --config configs/zero123.yaml --train --gpu 7 data.image_path=./load/images/${NAME}_rgba.png use_timestamp=False name=${NAME} tag=Phase1 # system.freq.guidance_eval=0 system.loggers.wandb.enable=false system.loggers.wandb.project="zero123" system.loggers.wandb.name=${NAME}_Phase1 5 | 6 | # Phase 1.5 - 512 refine 7 | python launch.py --config configs/zero123-geometry.yaml --train --gpu 4 data.image_path=./load/images/${NAME}_rgba.png system.geometry_convert_from=./outputs/${NAME}/Phase1/ckpts/last.ckpt use_timestamp=False name=${NAME} tag=Phase1p5 # system.freq.guidance_eval=0 system.loggers.wandb.enable=false system.loggers.wandb.project="zero123" system.loggers.wandb.name=${NAME}_Phase1p5 8 | 9 | # Phase 2 - dreamfusion 10 | python launch.py --config configs/experimental/imagecondition_zero123nerf.yaml --train --gpu 5 data.image_path=./load/images/${NAME}_rgba.png system.prompt_processor.prompt="A 3D model of a friendly dragon" system.weights="/admin/home-vikram/git/threestudio/outputs/${NAME}/Phase1/ckpts/last.ckpt" name=${NAME} tag=Phase2 # system.freq.guidance_eval=0 system.loggers.wandb.enable=false system.loggers.wandb.project="zero123" system.loggers.wandb.name=${NAME}_Phase2 11 | 12 | # Phase 2 - SDF + dreamfusion 13 | python launch.py --config configs/experimental/imagecondition_zero123nerf_refine.yaml --train --gpu 5 data.image_path=./load/images/${NAME}_rgba.png system.prompt_processor.prompt="A 3D model of a friendly dragon" system.geometry_convert_from="/admin/home-vikram/git/threestudio/outputs/${NAME}/Phase1/ckpts/last.ckpt" name=${NAME} tag=Phase2_refine # system.freq.guidance_eval=0 system.loggers.wandb.enable=false system.loggers.wandb.project="zero123" system.loggers.wandb.name=${NAME}_Phase2_refine 14 | -------------------------------------------------------------------------------- /threestudio/scripts/run_zero123_comparison.sh: -------------------------------------------------------------------------------- 1 | # with standard zero123 2 | threestudio/scripts/run_zero123_phase.sh 6 anya_front 105000 0 3 | 4 | # with zero123XL (not released yet!) 5 | threestudio/scripts/run_zero123_phase.sh 1 anya_front XL_20230604 0 6 | threestudio/scripts/run_zero123_phase.sh 2 baby_phoenix_on_ice XL_20230604 20 7 | threestudio/scripts/run_zero123_phase.sh 3 beach_house_1 XL_20230604 50 8 | threestudio/scripts/run_zero123_phase.sh 4 bollywood_actress XL_20230604 0 9 | threestudio/scripts/run_zero123_phase.sh 5 beach_house_2 XL_20230604 30 10 | threestudio/scripts/run_zero123_phase.sh 6 hamburger XL_20230604 10 11 | threestudio/scripts/run_zero123_phase.sh 7 cactus XL_20230604 8 12 | threestudio/scripts/run_zero123_phase.sh 0 catstatue XL_20230604 50 13 | threestudio/scripts/run_zero123_phase.sh 1 church_ruins XL_20230604 0 14 | threestudio/scripts/run_zero123_phase.sh 2 firekeeper XL_20230604 10 15 | threestudio/scripts/run_zero123_phase.sh 3 futuristic_car XL_20230604 20 16 | threestudio/scripts/run_zero123_phase.sh 4 mona_lisa XL_20230604 10 17 | threestudio/scripts/run_zero123_phase.sh 5 teddy XL_20230604 20 18 | 19 | # set guidance_eval to 0, to greatly speed up training 20 | threestudio/scripts/run_zero123_phase.sh 7 anya_front XL_20230604 0 system.freq.guidance_eval=0 21 | 22 | # disable wandb for faster training (or if you don't want to use it) 23 | threestudio/scripts/run_zero123_phase.sh 7 anya_front XL_20230604 0 system.loggers.wandb.enable=false system.freq.guidance_eval=0 24 | -------------------------------------------------------------------------------- /threestudio/scripts/run_zero123_phase.sh: -------------------------------------------------------------------------------- 1 | 2 | GPU_ID=$1 # e.g. 0 3 | IMAGE_PREFIX=$2 # e.g. "anya_front" 4 | ZERO123_PREFIX=$3 # e.g. "zero123-xl" 5 | ELEVATION=$4 # e.g. 0 6 | REST=${@:5:99} # e.g. "system.guidance.min_step_percent=0.1 system.guidance.max_step_percent=0.9" 7 | 8 | # change this config if you don't use wandb or want to speed up training 9 | python launch.py --config configs/zero123.yaml --train --gpu $GPU_ID system.loggers.wandb.enable=true system.loggers.wandb.project="claforte-noise_atten" \ 10 | system.loggers.wandb.name="${IMAGE_PREFIX}_zero123_${ZERO123_PREFIX}...fov20_${REST}" \ 11 | data.image_path=./load/images/${IMAGE_PREFIX}_rgba.png system.freq.guidance_eval=37 \ 12 | system.guidance.pretrained_model_name_or_path="./load/zero123/${ZERO123_PREFIX}.ckpt" \ 13 | system.guidance.cond_elevation_deg=$ELEVATION \ 14 | ${REST} 15 | -------------------------------------------------------------------------------- /threestudio/scripts/run_zero123_phase2.sh: -------------------------------------------------------------------------------- 1 | # Reconstruct Anya using latest Zero123XL, in <2000 steps. 2 | python launch.py --config configs/zero123.yaml --train --gpu 0 system.loggers.wandb.enable=true system.loggers.wandb.project="voletiv-anya-new" system.loggers.wandb.name="claforte_params" data.image_path=./load/images/anya_front_rgba.png system.freq.ref_or_zero123="accumulate" system.freq.guidance_eval=13 system.guidance.pretrained_model_name_or_path="./load/zero123/zero123-xl.ckpt" 3 | 4 | # PHASE 2 5 | python launch.py --config configs/experimental/imagecondition_zero123nerf.yaml --train --gpu 0 system.prompt_processor.prompt="A DSLR 3D photo of a cute anime schoolgirl stands proudly with her arms in the air, pink hair ( unreal engine 5 trending on Artstation Ghibli 4k )" system.weights=outputs/zero123/128_anya_front_rgba.png@20230623-145711/ckpts/last.ckpt system.freq.guidance_eval=13 system.loggers.wandb.enable=true system.loggers.wandb.project="voletiv-anya-new" data.image_path=./load/images/anya_front_rgba.png system.loggers.wandb.name="anya" data.random_camera.progressive_until=500 6 | -------------------------------------------------------------------------------- /threestudio/scripts/run_zero123_sbatch.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | 4 | files = [ 5 | "~/git/threestudio/load/images/dog1_rgba.png", 6 | "~/git/threestudio/load/images/dragon2_rgba.png", 7 | ] 8 | 9 | for file in files: 10 | name = os.path.basename(file).split("_rgba.png")[0] 11 | with open( 12 | os.path.expanduser("~/git/threestudio/threestudio/scripts/zero123_sbatch.sh"), 13 | "w", 14 | ) as f: 15 | f.write("#!/bin/bash\n") 16 | f.write(f"#SBATCH --job-name=vikky_{name}\n") 17 | f.write("#SBATCH --account=mod3d\n") 18 | f.write("#SBATCH --partition=g40\n") 19 | f.write("#SBATCH --gpus=1\n") 20 | f.write("#SBATCH --time=0-00:07:00\n") 21 | f.write("conda activate three\n") 22 | f.write("cd ~/git/threestudio/\n") 23 | f.write(f"NAME={name}\n") 24 | # Phase 1 25 | f.write( 26 | "python launch.py --config configs/zero123.yaml --train data.image_path=./load/images/${NAME}_rgba.png use_timestamp=true name=${NAME} tag=Phase1 system.loggers.wandb.enable=false system.loggers.wandb.project='zero123' system.loggers.wandb.name=${NAME}_Phase1\n" 27 | ) 28 | # # Phase 1.5 29 | # f.write( 30 | # "python launch.py --config configs/zero123-geometry.yaml --train data.image_path=./load/images/${NAME}_rgba.png system.geometry_convert_from=./outputs/${NAME}/Phase1/ckpts/last.ckpt use_timestamp=False name=${NAME} tag=Phase1p5 system.loggers.wandb.enable=true system.loggers.wandb.project='zero123' system.loggers.wandb.name=${NAME}_Phase1p5\n" 31 | # ) 32 | os.system("sbatch ~/git/threestudio/threestudio/scripts/zero123_sbatch.sh") 33 | time.sleep(1) 34 | -------------------------------------------------------------------------------- /threestudio/scripts/zero123_demo.py: -------------------------------------------------------------------------------- 1 | # 1. Generate using StableDiffusionXL https://clipdrop.co/stable-diffusion 2 | 3 | # 2. Remove background https://clipdrop.co/remove-background 4 | 5 | # 3. Resize to 512x512 https://www.iloveimg.com/resize-image 6 | 7 | # (OPTIONAL) 8 | # 4. Estimate depth and normal https://omnidata.vision/demo/ (I used Omnidata Normal (with X-TC & 3DCC), and MiDaS Depth) 9 | 10 | 11 | # (OPTIONAL) 12 | # 5. Convert depth image from RGB to greyscale 13 | def depth_rgb_to_grey(depth_filename): 14 | # depth_filename = "image_depth.png" 15 | import cv2 16 | import numpy as np 17 | 18 | # import shutil 19 | # shutil.copyfile(depth_filename, depth_filename.replace("_depth", "_depth_orig")) 20 | depth = cv2.imread(depth_filename) 21 | depth = cv2.cvtColor(depth, cv2.COLOR_BGR2GRAY) 22 | mask = ( 23 | cv2.resize( 24 | cv2.imread(depth_filename.replace("_depth", "_rgba"), cv2.IMREAD_UNCHANGED)[ 25 | :, :, -1 26 | ], 27 | depth.shape, 28 | ) 29 | > 0 30 | ) 31 | # depth[mask] = (depth[mask] - depth.min()) / (depth.max() - depth.min() + 1e-9) 32 | depth = (depth - depth.min()) / (depth.max() - depth.min() + 1e-9) 33 | depth[~mask] = 0 34 | depth = (depth * 255).astype(np.uint8) 35 | cv2.imwrite(depth_filename, depth) 36 | 37 | 38 | # (OPTIONAL) 39 | # 6. Mask normal 40 | def normal_mask(normal_filename): 41 | # filename = "image_normal.png" 42 | import cv2 43 | 44 | # import shutil 45 | # shutil.copyfile(normal_filename, normal_filename.replace("_normal", "_normal_orig")) 46 | normal = cv2.imread(normal_filename) 47 | mask = ( 48 | cv2.resize( 49 | cv2.imread( 50 | normal_filename.replace("_normal", "_rgba"), cv2.IMREAD_UNCHANGED 51 | )[:, :, -1], 52 | normal.shape[:2], 53 | ) 54 | > 0 55 | ) 56 | normal[~mask] = 0 57 | cv2.imwrite(normal_filename, normal) 58 | 59 | 60 | # 5. Run Zero123 61 | # python launch.py --config configs/zero123.yaml --train data.image_path=./load/images/grootplant_rgba.png 62 | -------------------------------------------------------------------------------- /threestudio/scripts/zero123_sbatch.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH --job-name=vikky 3 | #SBATCH --account=mod3d 4 | #SBATCH --partition=g40 5 | #SBATCH --gpus=1 6 | #SBATCH --time=0-00:07:00 7 | conda activate three 8 | cd ~/git/threestudio/ 9 | NAME="dog1" 10 | python launch.py --config configs/zero123.yaml --train data.image_path=./load/images/${NAME}_rgba.png use_timestamp=False name=${NAME} tag=Phase1 system.loggers.wandb.enable=true system.loggers.wandb.project='zero123' system.loggers.wandb.name=${NAME}_Phase1 11 | -------------------------------------------------------------------------------- /threestudio/systems/__init__.py: -------------------------------------------------------------------------------- 1 | from . import ( 2 | control4d_multiview, 3 | dreamfusion, 4 | fantasia3d, 5 | imagedreamfusion, 6 | instructnerf2nerf, 7 | interactive3d, 8 | latentnerf, 9 | magic3d, 10 | prolificdreamer, 11 | sjc, 12 | textmesh, 13 | zero123, 14 | gaussian_splatting, 15 | magic123, 16 | ) 17 | -------------------------------------------------------------------------------- /threestudio/systems/utils.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import warnings 3 | from bisect import bisect_right 4 | 5 | import torch 6 | import torch.nn as nn 7 | from torch.optim import lr_scheduler 8 | 9 | import threestudio 10 | 11 | 12 | def get_scheduler(name): 13 | if hasattr(lr_scheduler, name): 14 | return getattr(lr_scheduler, name) 15 | else: 16 | raise NotImplementedError 17 | 18 | 19 | def getattr_recursive(m, attr): 20 | for name in attr.split("."): 21 | m = getattr(m, name) 22 | return m 23 | 24 | 25 | def get_parameters(model, name): 26 | module = getattr_recursive(model, name) 27 | if isinstance(module, nn.Module): 28 | return module.parameters() 29 | elif isinstance(module, nn.Parameter): 30 | return module 31 | return [] 32 | 33 | 34 | def parse_optimizer(config, model): 35 | if hasattr(config, "params"): 36 | params = [ 37 | {"params": get_parameters(model, name), "name": name, **args} 38 | for name, args in config.params.items() 39 | ] 40 | threestudio.debug(f"Specify optimizer params: {config.params}") 41 | # print("=====params===") 42 | # print(params) 43 | # print(" len paprams ++++++++: ", len(params)) 44 | else: 45 | params = model.parameters() 46 | if config.name in ["FusedAdam"]: 47 | import apex 48 | 49 | optim = getattr(apex.optimizers, config.name)(params, **config.args) 50 | elif config.name in ["Adan"]: 51 | from threestudio.systems import optimizers 52 | 53 | optim = getattr(optimizers, config.name)(params, **config.args) 54 | else: 55 | optim = getattr(torch.optim, config.name)(params, **config.args) 56 | # print("===========++++++++++", optim.state_dict()) 57 | return optim 58 | 59 | 60 | def parse_scheduler(config, optimizer): 61 | interval = config.get("interval", "epoch") 62 | assert interval in ["epoch", "step"] 63 | if config.name == "SequentialLR": 64 | scheduler = { 65 | "scheduler": lr_scheduler.SequentialLR( 66 | optimizer, 67 | [ 68 | parse_scheduler(conf, optimizer)["scheduler"] 69 | for conf in config.schedulers 70 | ], 71 | milestones=config.milestones, 72 | ), 73 | "interval": interval, 74 | } 75 | elif config.name == "ChainedScheduler": 76 | scheduler = { 77 | "scheduler": lr_scheduler.ChainedScheduler( 78 | [ 79 | parse_scheduler(conf, optimizer)["scheduler"] 80 | for conf in config.schedulers 81 | ] 82 | ), 83 | "interval": interval, 84 | } 85 | else: 86 | scheduler = { 87 | "scheduler": get_scheduler(config.name)(optimizer, **config.args), 88 | "interval": interval, 89 | } 90 | return scheduler 91 | -------------------------------------------------------------------------------- /threestudio/utils/GAN/distribution.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | 5 | class AbstractDistribution: 6 | def sample(self): 7 | raise NotImplementedError() 8 | 9 | def mode(self): 10 | raise NotImplementedError() 11 | 12 | 13 | class DiracDistribution(AbstractDistribution): 14 | def __init__(self, value): 15 | self.value = value 16 | 17 | def sample(self): 18 | return self.value 19 | 20 | def mode(self): 21 | return self.value 22 | 23 | 24 | class DiagonalGaussianDistribution(object): 25 | def __init__(self, parameters, deterministic=False): 26 | self.parameters = parameters 27 | self.mean, self.logvar = torch.chunk(parameters, 2, dim=1) 28 | self.logvar = torch.clamp(self.logvar, -30.0, 20.0) 29 | self.deterministic = deterministic 30 | self.std = torch.exp(0.5 * self.logvar) 31 | self.var = torch.exp(self.logvar) 32 | if self.deterministic: 33 | self.var = self.std = torch.zeros_like(self.mean).to( 34 | device=self.parameters.device 35 | ) 36 | 37 | def sample(self): 38 | x = self.mean + self.std * torch.randn(self.mean.shape).to( 39 | device=self.parameters.device 40 | ) 41 | return x 42 | 43 | def kl(self, other=None): 44 | if self.deterministic: 45 | return torch.Tensor([0.0]) 46 | else: 47 | if other is None: 48 | return 0.5 * torch.sum( 49 | torch.pow(self.mean, 2) + self.var - 1.0 - self.logvar, 50 | dim=[1, 2, 3], 51 | ) 52 | else: 53 | return 0.5 * torch.sum( 54 | torch.pow(self.mean - other.mean, 2) / other.var 55 | + self.var / other.var 56 | - 1.0 57 | - self.logvar 58 | + other.logvar, 59 | dim=[1, 2, 3], 60 | ) 61 | 62 | def nll(self, sample, dims=[1, 2, 3]): 63 | if self.deterministic: 64 | return torch.Tensor([0.0]) 65 | logtwopi = np.log(2.0 * np.pi) 66 | return 0.5 * torch.sum( 67 | logtwopi + self.logvar + torch.pow(sample - self.mean, 2) / self.var, 68 | dim=dims, 69 | ) 70 | 71 | def mode(self): 72 | return self.mean 73 | 74 | 75 | def normal_kl(mean1, logvar1, mean2, logvar2): 76 | """ 77 | source: https://github.com/openai/guided-diffusion/blob/27c20a8fab9cb472df5d6bdd6c8d11c8f430b924/guided_diffusion/losses.py#L12 78 | Compute the KL divergence between two gaussians. 79 | Shapes are automatically broadcasted, so batches can be compared to 80 | scalars, among other use cases. 81 | """ 82 | tensor = None 83 | for obj in (mean1, logvar1, mean2, logvar2): 84 | if isinstance(obj, torch.Tensor): 85 | tensor = obj 86 | break 87 | assert tensor is not None, "at least one argument must be a Tensor" 88 | 89 | # Force variances to be Tensors. Broadcasting helps convert scalars to 90 | # Tensors, but it does not work for torch.exp(). 91 | logvar1, logvar2 = [ 92 | x if isinstance(x, torch.Tensor) else torch.tensor(x).to(tensor) 93 | for x in (logvar1, logvar2) 94 | ] 95 | 96 | return 0.5 * ( 97 | -1.0 98 | + logvar2 99 | - logvar1 100 | + torch.exp(logvar1 - logvar2) 101 | + ((mean1 - mean2) ** 2) * torch.exp(-logvar2) 102 | ) 103 | -------------------------------------------------------------------------------- /threestudio/utils/GAN/loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | 4 | 5 | def generator_loss(discriminator, inputs, reconstructions, cond=None): 6 | if cond is None: 7 | logits_fake = discriminator(reconstructions.contiguous()) 8 | else: 9 | logits_fake = discriminator( 10 | torch.cat((reconstructions.contiguous(), cond), dim=1) 11 | ) 12 | g_loss = -torch.mean(logits_fake) 13 | return g_loss 14 | 15 | 16 | def hinge_d_loss(logits_real, logits_fake): 17 | loss_real = torch.mean(F.relu(1.0 - logits_real)) 18 | loss_fake = torch.mean(F.relu(1.0 + logits_fake)) 19 | d_loss = 0.5 * (loss_real + loss_fake) 20 | return d_loss 21 | 22 | 23 | def discriminator_loss(discriminator, inputs, reconstructions, cond=None): 24 | if cond is None: 25 | logits_real = discriminator(inputs.contiguous().detach()) 26 | logits_fake = discriminator(reconstructions.contiguous().detach()) 27 | else: 28 | logits_real = discriminator( 29 | torch.cat((inputs.contiguous().detach(), cond), dim=1) 30 | ) 31 | logits_fake = discriminator( 32 | torch.cat((reconstructions.contiguous().detach(), cond), dim=1) 33 | ) 34 | d_loss = hinge_d_loss(logits_real, logits_fake).mean() 35 | return d_loss 36 | -------------------------------------------------------------------------------- /threestudio/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from . import base 2 | -------------------------------------------------------------------------------- /threestudio/utils/base.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | 3 | import torch 4 | import torch.nn as nn 5 | 6 | from threestudio.utils.config import parse_structured 7 | from threestudio.utils.misc import get_device, load_module_weights 8 | from threestudio.utils.typing import * 9 | 10 | 11 | class Configurable: 12 | @dataclass 13 | class Config: 14 | pass 15 | 16 | def __init__(self, cfg: Optional[dict] = None) -> None: 17 | super().__init__() 18 | self.cfg = parse_structured(self.Config, cfg) 19 | 20 | 21 | class Updateable: 22 | def do_update_step( 23 | self, epoch: int, global_step: int, on_load_weights: bool = False 24 | ): 25 | for attr in self.__dir__(): 26 | if attr.startswith("_"): 27 | continue 28 | try: 29 | module = getattr(self, attr) 30 | except: 31 | continue # ignore attributes like property, which can't be retrived using getattr? 32 | if isinstance(module, Updateable): 33 | module.do_update_step( 34 | epoch, global_step, on_load_weights=on_load_weights 35 | ) 36 | self.update_step(epoch, global_step, on_load_weights=on_load_weights) 37 | 38 | def update_step(self, epoch: int, global_step: int, on_load_weights: bool = False): 39 | # override this method to implement custom update logic 40 | # if on_load_weights is True, you should be careful doing things related to model evaluations, 41 | # as the models and tensors are not guarenteed to be on the same device 42 | pass 43 | 44 | 45 | def update_if_possible(module: Any, epoch: int, global_step: int) -> None: 46 | if isinstance(module, Updateable): 47 | module.do_update_step(epoch, global_step) 48 | 49 | 50 | class BaseObject(Updateable): 51 | @dataclass 52 | class Config: 53 | pass 54 | 55 | cfg: Config # add this to every subclass of BaseObject to enable static type checking 56 | 57 | def __init__( 58 | self, cfg: Optional[Union[dict, DictConfig]] = None, *args, **kwargs 59 | ) -> None: 60 | super().__init__() 61 | self.cfg = parse_structured(self.Config, cfg) 62 | self.device = get_device() 63 | self.configure(*args, **kwargs) 64 | 65 | def configure(self, *args, **kwargs) -> None: 66 | pass 67 | 68 | 69 | class BaseModule(nn.Module, Updateable): 70 | @dataclass 71 | class Config: 72 | weights: Optional[str] = None 73 | 74 | cfg: Config # add this to every subclass of BaseModule to enable static type checking 75 | 76 | def __init__( 77 | self, cfg: Optional[Union[dict, DictConfig]] = None, *args, **kwargs 78 | ) -> None: 79 | super().__init__() 80 | self.cfg = parse_structured(self.Config, cfg) 81 | self.device = get_device() 82 | self.configure(*args, **kwargs) 83 | if self.cfg.weights is not None: 84 | # format: path/to/weights:module_name 85 | weights_path, module_name = self.cfg.weights.split(":") 86 | state_dict, epoch, global_step = load_module_weights( 87 | weights_path, module_name=module_name, map_location="cpu" 88 | ) 89 | self.load_state_dict(state_dict) 90 | self.do_update_step( 91 | epoch, global_step, on_load_weights=True 92 | ) # restore states 93 | # dummy tensor to indicate model state 94 | self._dummy: Float[Tensor, "..."] 95 | self.register_buffer("_dummy", torch.zeros(0).float(), persistent=False) 96 | 97 | def configure(self, *args, **kwargs) -> None: 98 | pass 99 | -------------------------------------------------------------------------------- /threestudio/utils/mesh.py: -------------------------------------------------------------------------------- 1 | import trimesh 2 | from pathlib import Path 3 | import torch 4 | import numpy as np 5 | from vedo import Mesh 6 | 7 | 8 | def as_mesh(scene_or_mesh): 9 | """ 10 | Convert a possible scene to a mesh. 11 | 12 | If conversion occurs, the returned mesh has only vertex and face data. 13 | 14 | reference: https://github.com/mikedh/trimesh/issues/507#issuecomment-514973337 15 | """ 16 | if isinstance(scene_or_mesh, trimesh.Scene): 17 | if len(scene_or_mesh.geometry) == 0: 18 | mesh = None # empty scene 19 | else: 20 | # we lose texture information here 21 | mesh = trimesh.util.concatenate( 22 | tuple( 23 | trimesh.Trimesh(vertices=g.vertices, faces=g.faces) 24 | for g in scene_or_mesh.geometry.values() 25 | ) 26 | ) 27 | else: 28 | assert isinstance(scene_or_mesh, trimesh.Trimesh) 29 | mesh = scene_or_mesh 30 | return mesh 31 | 32 | 33 | def load_mesh_obj(obj_file, texture_file=None): 34 | mesh = Mesh(str(obj_file)) 35 | if texture_file is not None: 36 | mesh.texture(texture_file) 37 | 38 | xyz = mesh.points() 39 | rgb = mesh.pointcolors.astype(np.float32) / 255.0 40 | 41 | return torch.from_numpy(xyz), torch.from_numpy(rgb) 42 | 43 | 44 | def load_mesh_as_pcd(mesh_file, texture_file): 45 | mesh_file = Path(mesh_file) 46 | if mesh_file.suffix == ".obj": 47 | return load_mesh_obj(mesh_file, texture_file) 48 | else: 49 | raise NotImplementedError(f"Unknown mesh file {mesh_file}") 50 | 51 | 52 | def load_mesh_as_pcd_trimesh(mesh_file, num_points): 53 | mesh = as_mesh(trimesh.load_mesh(mesh_file)) 54 | n = num_points 55 | points = [] 56 | while n > 0: 57 | p, _ = trimesh.sample.sample_surface_even(mesh, n) 58 | n -= p.shape[0] 59 | if n >= 0: 60 | points.append(p) 61 | else: 62 | points.append(p[:n]) 63 | if len(points) > 1: 64 | points = np.concatenate(points, axis=0) 65 | else: 66 | points = points[0] 67 | points = torch.from_numpy(points.astype(np.float32)) 68 | 69 | return points, torch.rand_like(points) 70 | -------------------------------------------------------------------------------- /threestudio/utils/perceptual/__init__.py: -------------------------------------------------------------------------------- 1 | from .perceptual import PerceptualLoss 2 | -------------------------------------------------------------------------------- /threestudio/utils/rasterize.py: -------------------------------------------------------------------------------- 1 | import nvdiffrast.torch as dr 2 | import torch 3 | 4 | from threestudio.utils.typing import * 5 | 6 | 7 | class NVDiffRasterizerContext: 8 | def __init__(self, context_type: str, device: torch.device) -> None: 9 | self.device = device 10 | self.ctx = self.initialize_context(context_type, device) 11 | 12 | def initialize_context( 13 | self, context_type: str, device: torch.device 14 | ) -> Union[dr.RasterizeGLContext, dr.RasterizeCudaContext]: 15 | if context_type == "gl": 16 | return dr.RasterizeGLContext(device=device) 17 | elif context_type == "cuda": 18 | return dr.RasterizeCudaContext(device=device) 19 | else: 20 | raise ValueError(f"Unknown rasterizer context type: {context_type}") 21 | 22 | def vertex_transform( 23 | self, verts: Float[Tensor, "Nv 3"], mvp_mtx: Float[Tensor, "B 4 4"] 24 | ) -> Float[Tensor, "B Nv 4"]: 25 | verts_homo = torch.cat( 26 | [verts, torch.ones([verts.shape[0], 1]).to(verts)], dim=-1 27 | ) 28 | return torch.matmul(verts_homo, mvp_mtx.permute(0, 2, 1)) 29 | 30 | def rasterize( 31 | self, 32 | pos: Float[Tensor, "B Nv 4"], 33 | tri: Integer[Tensor, "Nf 3"], 34 | resolution: Union[int, Tuple[int, int]], 35 | ): 36 | # rasterize in instance mode (single topology) 37 | return dr.rasterize(self.ctx, pos.float(), tri.int(), resolution, grad_db=True) 38 | 39 | def rasterize_one( 40 | self, 41 | pos: Float[Tensor, "Nv 4"], 42 | tri: Integer[Tensor, "Nf 3"], 43 | resolution: Union[int, Tuple[int, int]], 44 | ): 45 | # rasterize one single mesh under a single viewpoint 46 | rast, rast_db = self.rasterize(pos[None, ...], tri, resolution) 47 | return rast[0], rast_db[0] 48 | 49 | def antialias( 50 | self, 51 | color: Float[Tensor, "B H W C"], 52 | rast: Float[Tensor, "B H W 4"], 53 | pos: Float[Tensor, "B Nv 4"], 54 | tri: Integer[Tensor, "Nf 3"], 55 | ) -> Float[Tensor, "B H W C"]: 56 | return dr.antialias(color.float(), rast, pos.float(), tri.int()) 57 | 58 | def interpolate( 59 | self, 60 | attr: Float[Tensor, "B Nv C"], 61 | rast: Float[Tensor, "B H W 4"], 62 | tri: Integer[Tensor, "Nf 3"], 63 | rast_db=None, 64 | diff_attrs=None, 65 | ) -> Float[Tensor, "B H W C"]: 66 | return dr.interpolate( 67 | attr.float(), rast, tri.int(), rast_db=rast_db, diff_attrs=diff_attrs 68 | ) 69 | 70 | def interpolate_one( 71 | self, 72 | attr: Float[Tensor, "Nv C"], 73 | rast: Float[Tensor, "B H W 4"], 74 | tri: Integer[Tensor, "Nf 3"], 75 | rast_db=None, 76 | diff_attrs=None, 77 | ) -> Float[Tensor, "B H W C"]: 78 | return self.interpolate(attr[None, ...], rast, tri, rast_db, diff_attrs) 79 | -------------------------------------------------------------------------------- /threestudio/utils/typing.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module contains type annotations for the project, using 3 | 1. Python type hints (https://docs.python.org/3/library/typing.html) for Python objects 4 | 2. jaxtyping (https://github.com/google/jaxtyping/blob/main/API.md) for PyTorch tensors 5 | 6 | Two types of typing checking can be used: 7 | 1. Static type checking with mypy (install with pip and enabled as the default linter in VSCode) 8 | 2. Runtime type checking with typeguard (install with pip and triggered at runtime, mainly for tensor dtype and shape checking) 9 | """ 10 | 11 | # Basic types 12 | from typing import ( 13 | Any, 14 | Callable, 15 | Dict, 16 | Iterable, 17 | List, 18 | Literal, 19 | NamedTuple, 20 | NewType, 21 | Optional, 22 | Sized, 23 | Tuple, 24 | Type, 25 | TypeVar, 26 | Union, 27 | ) 28 | 29 | # Tensor dtype 30 | # for jaxtyping usage, see https://github.com/google/jaxtyping/blob/main/API.md 31 | from jaxtyping import Bool, Complex, Float, Inexact, Int, Integer, Num, Shaped, UInt 32 | 33 | # Config type 34 | from omegaconf import DictConfig 35 | 36 | # PyTorch Tensor type 37 | from torch import Tensor 38 | 39 | # Runtime type checking decorator 40 | from typeguard import typechecked as typechecker 41 | -------------------------------------------------------------------------------- /utils/test_pixart.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from diffusers import PixArtAlphaPipeline, ConsistencyDecoderVAE, AutoencoderKL 3 | from diffusers import DDIMScheduler, DDPMScheduler, StableDiffusionPipeline 4 | 5 | scheduler_params = { 6 | "beta_end": 0.02, # 0.012, 7 | "beta_schedule": "linear", # "scaled_linear", 8 | "beta_start": 0.0001, # 0.00085, 9 | "dynamic_thresholding_ratio": 0.995, 10 | "clip_sample": False, 11 | "num_train_timesteps": 1000, 12 | "prediction_type": "epsilon", # "v_prediction", 13 | "timestep_spacing": "linspace", 14 | "set_alpha_to_one": False, 15 | # "skip_prk_steps": True, 16 | # "steps_offset": 1, 17 | # "trained_betas": None 18 | } 19 | scheduler = DDIMScheduler(**scheduler_params) 20 | 21 | pipe = PixArtAlphaPipeline.from_pretrained("PixArt-alpha/PixArt-XL-2-512x512", torch_dtype=torch.float16, use_safetensors=True, scheduler=scheduler) 22 | 23 | # Enable memory optimizations. 24 | pipe.enable_model_cpu_offload() 25 | 26 | prompt = "A gundam robot holding a sword with angel wings" 27 | image = pipe(prompt, num_inference_steps=50).images[0] 28 | image.save("./test_pixart.png") -------------------------------------------------------------------------------- /utils/test_sdxl.py: -------------------------------------------------------------------------------- 1 | from diffusers import StableDiffusionXLPipeline, StableDiffusionXLImg2ImgPipeline 2 | from diffusers import DDIMScheduler 3 | import torch 4 | 5 | scheduler_params = { 6 | "beta_end": 0.012, 7 | "beta_schedule": "scaled_linear", 8 | "beta_start": 0.00085, 9 | # "dynamic_thresholding_ratio": 0.995, 10 | "clip_sample": False, 11 | "num_train_timesteps": 1000, 12 | "prediction_type": "epsilon", # "v_prediction", 13 | "timestep_spacing": "linspace", 14 | "set_alpha_to_one": False, 15 | # "skip_prk_steps": True, 16 | # "steps_offset": 1, 17 | # "trained_betas": None 18 | } 19 | scheduler = DDIMScheduler(**scheduler_params) 20 | 21 | pipeline = StableDiffusionXLPipeline.from_pretrained( 22 | "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16, variant="fp16", use_safetensors=True, scheduler=scheduler 23 | ).to("cuda") 24 | 25 | prompt = "A gundam robot holding a sword with angel wings, detailed, 8k" 26 | image = pipeline(prompt=prompt).images[0] 27 | image.save("./test_sdxl.png") --------------------------------------------------------------------------------