├── .editorconfig
├── .github
    └── workflows
    │   └── pre-commit.yaml
├── .gitignore
├── .pre-commit-config.yaml
├── .pylintrc
├── DOCUMENTATION.md
├── LICENSE
├── README.md
├── assets
    ├── Interactive3d.jpg
    ├── arc.png
    ├── config.json
    ├── interactive3d.png
    └── results.png
├── configs
    ├── control4d-static.yaml
    ├── dreamfusion-if.yaml
    ├── dreamfusion-sd.yaml
    ├── experimental
    │   ├── co3d-imagecondition.yaml
    │   ├── imagecondition.yaml
    │   ├── imagecondition_zero123nerf.yaml
    │   └── imagecondition_zero123nerf_refine.yaml
    ├── fantasia3d-texture.yaml
    ├── fantasia3d.yaml
    ├── fit_gs.yaml
    ├── gaussian_splatting.yaml
    ├── geo_refine.yaml
    ├── gradio
    │   ├── dreamfusion-if.yaml
    │   ├── dreamfusion-sd.yaml
    │   ├── fantasia3d.yaml
    │   ├── latentnerf.yaml
    │   ├── sjc.yaml
    │   └── textmesh-if.yaml
    ├── instructnerf2nerf.yaml
    ├── interested_refine.yaml
    ├── interested_refine_pixart.yaml
    ├── latentnerf-refine.yaml
    ├── latentnerf.yaml
    ├── magic123-coarse-sd.yaml
    ├── magic3d-coarse-if.yaml
    ├── magic3d-coarse-sd.yaml
    ├── magic3d-refine-sd.yaml
    ├── mvdream-sd21-gaussian.yaml
    ├── mvdream-sd21-shading.yaml
    ├── mvdream-sd21.yaml
    ├── post_geo_refine.yaml
    ├── prolificdreamer-geometry-from.yaml
    ├── prolificdreamer-geometry.yaml
    ├── prolificdreamer-patch.yaml
    ├── prolificdreamer-scene.yaml
    ├── prolificdreamer-texture.yaml
    ├── prolificdreamer.yaml
    ├── sjc.yaml
    ├── sketchshape-refine.yaml
    ├── sketchshape.yaml
    ├── textmesh-if.yaml
    ├── zero123-geometry.yaml
    ├── zero123.yaml
    └── zero123_64.yaml
├── docker
    ├── Dockerfile
    └── compose.yaml
├── docs
    └── installation.md
├── extern
    ├── MVDream
    │   ├── .gitignore
    │   ├── LICENSE-CODE
    │   ├── README.md
    │   ├── mvdream
    │   │   ├── __init__.py
    │   │   ├── camera_utils.py
    │   │   ├── configs
    │   │   │   ├── sd-v1.yaml
    │   │   │   └── sd-v2-base.yaml
    │   │   ├── ldm
    │   │   │   ├── __init__.py
    │   │   │   ├── interface.py
    │   │   │   ├── models
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── autoencoder.py
    │   │   │   │   └── diffusion
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   └── ddim.py
    │   │   │   ├── modules
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── attention.py
    │   │   │   │   ├── diffusionmodules
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   ├── model.py
    │   │   │   │   │   ├── openaimodel.py
    │   │   │   │   │   └── util.py
    │   │   │   │   ├── distributions
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   └── distributions.py
    │   │   │   │   ├── ema.py
    │   │   │   │   └── encoders
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   └── modules.py
    │   │   │   └── util.py
    │   │   └── model_zoo.py
    │   ├── requirements.txt
    │   ├── scripts
    │   │   ├── gradio_app.py
    │   │   └── t2i.py
    │   └── setup.py
    ├── ldm_zero123
    │   ├── extras.py
    │   ├── guidance.py
    │   ├── lr_scheduler.py
    │   ├── models
    │   │   ├── autoencoder.py
    │   │   └── diffusion
    │   │   │   ├── __init__.py
    │   │   │   ├── classifier.py
    │   │   │   ├── ddim.py
    │   │   │   ├── ddpm.py
    │   │   │   ├── plms.py
    │   │   │   └── sampling_util.py
    │   ├── modules
    │   │   ├── attention.py
    │   │   ├── diffusionmodules
    │   │   │   ├── __init__.py
    │   │   │   ├── model.py
    │   │   │   ├── openaimodel.py
    │   │   │   └── util.py
    │   │   ├── distributions
    │   │   │   ├── __init__.py
    │   │   │   └── distributions.py
    │   │   ├── ema.py
    │   │   ├── encoders
    │   │   │   ├── __init__.py
    │   │   │   └── modules.py
    │   │   ├── evaluate
    │   │   │   ├── adm_evaluator.py
    │   │   │   ├── evaluate_perceptualsim.py
    │   │   │   ├── frechet_video_distance.py
    │   │   │   ├── ssim.py
    │   │   │   └── torch_frechet_video_distance.py
    │   │   ├── image_degradation
    │   │   │   ├── __init__.py
    │   │   │   ├── bsrgan.py
    │   │   │   ├── bsrgan_light.py
    │   │   │   └── utils_image.py
    │   │   ├── losses
    │   │   │   ├── __init__.py
    │   │   │   ├── contperceptual.py
    │   │   │   └── vqperceptual.py
    │   │   └── x_transformer.py
    │   ├── thirdp
    │   │   └── psp
    │   │   │   ├── helpers.py
    │   │   │   ├── id_loss.py
    │   │   │   └── model_irse.py
    │   └── util.py
    └── zero123.py
├── gradio_app.py
├── keyboard.py
├── launch.py
├── requirements-dev.txt
├── requirements.txt
├── threestudio
    ├── __init__.py
    ├── data
    │   ├── __init__.py
    │   ├── co3d.py
    │   ├── edit_multiview.py
    │   ├── edit_multiview_gs.py
    │   ├── image.py
    │   ├── multiview.py
    │   └── uncond.py
    ├── models
    │   ├── __init__.py
    │   ├── background
    │   │   ├── __init__.py
    │   │   ├── base.py
    │   │   ├── neural_environment_map_background.py
    │   │   ├── solid_color_background.py
    │   │   └── textured_background.py
    │   ├── exporters
    │   │   ├── __init__.py
    │   │   ├── base.py
    │   │   └── mesh_exporter.py
    │   ├── geometry
    │   │   ├── __init__.py
    │   │   ├── base.py
    │   │   ├── gaussian.py
    │   │   ├── implicit_sdf.py
    │   │   ├── implicit_volume.py
    │   │   ├── implicit_volume_edit.py
    │   │   ├── tetrahedra_sdf_grid.py
    │   │   └── volume_grid.py
    │   ├── guidance
    │   │   ├── __init__.py
    │   │   ├── controlnet_guidance.py
    │   │   ├── deep_floyd_guidance.py
    │   │   ├── deep_floyd_guidance_stage2.py
    │   │   ├── instructpix2pix_guidance.py
    │   │   ├── multiview_diffusion_guidance.py
    │   │   ├── pixart_guidance.py
    │   │   ├── stable_diffusion_guidance.py
    │   │   ├── stable_diffusion_unified_guidance.py
    │   │   ├── stable_diffusion_vsd_guidance.py
    │   │   ├── zero123_guidance.py
    │   │   └── zero123_unified_guidance.py
    │   ├── isosurface.py
    │   ├── materials
    │   │   ├── __init__.py
    │   │   ├── base.py
    │   │   ├── diffuse_with_point_light_material.py
    │   │   ├── hybrid_rgb_latent_material.py
    │   │   ├── neural_radiance_material.py
    │   │   ├── no_material.py
    │   │   ├── no_material_backup.py
    │   │   ├── pbr_material.py
    │   │   └── sd_latent_adapter_material.py
    │   ├── mesh.py
    │   ├── networks.py
    │   ├── prompt_processors
    │   │   ├── __init__.py
    │   │   ├── base.py
    │   │   ├── deepfloyd_prompt_processor.py
    │   │   ├── dummy_prompt_processor.py
    │   │   ├── pixart_prompt_processor.py
    │   │   └── stable_diffusion_prompt_processor.py
    │   └── renderers
    │   │   ├── __init__.py
    │   │   ├── base.py
    │   │   ├── deferred_volume_renderer.py
    │   │   ├── diff_gaussian_rasterizer.py
    │   │   ├── gan_volume_renderer.py
    │   │   ├── gsgen_renderer.py
    │   │   ├── magic123_renderer.py
    │   │   ├── nerf_volume_renderer.py
    │   │   ├── neus_volume_renderer.py
    │   │   ├── nvdiff_rasterizer.py
    │   │   ├── patch_renderer.py
    │   │   └── threestudio_renderer.py
    ├── scripts
    │   ├── make_training_vid.py
    │   ├── run_gaussian.py
    │   ├── run_zero123.sh
    │   ├── run_zero123_comparison.sh
    │   ├── run_zero123_phase.sh
    │   ├── run_zero123_phase2.sh
    │   ├── run_zero123_sbatch.py
    │   ├── zero123_demo.py
    │   └── zero123_sbatch.sh
    ├── systems
    │   ├── __init__.py
    │   ├── base.py
    │   ├── control4d_multiview.py
    │   ├── dreamfusion.py
    │   ├── fantasia3d.py
    │   ├── gaussian_splatting.py
    │   ├── imagedreamfusion.py
    │   ├── instructnerf2nerf.py
    │   ├── interactive3d.py
    │   ├── latentnerf.py
    │   ├── magic123.py
    │   ├── magic3d.py
    │   ├── optimizers.py
    │   ├── prolificdreamer.py
    │   ├── sjc.py
    │   ├── textmesh.py
    │   ├── utils.py
    │   └── zero123.py
    └── utils
    │   ├── GAN
    │       ├── attention.py
    │       ├── discriminator.py
    │       ├── distribution.py
    │       ├── loss.py
    │       ├── mobilenet.py
    │       ├── network_util.py
    │       ├── util.py
    │       └── vae.py
    │   ├── __init__.py
    │   ├── base.py
    │   ├── callbacks.py
    │   ├── config.py
    │   ├── mesh.py
    │   ├── misc.py
    │   ├── ops.py
    │   ├── perceptual
    │       ├── __init__.py
    │       ├── perceptual.py
    │       └── utils.py
    │   ├── rasterize.py
    │   ├── saving.py
    │   └── typing.py
└── utils
    ├── region_select_tool.py
    ├── test_pixart.py
    └── test_sdxl.py


/.editorconfig:
--------------------------------------------------------------------------------
 1 | root = true
 2 | 
 3 | [*.py]
 4 | charset = utf-8
 5 | trim_trailing_whitespace = true
 6 | end_of_line = lf
 7 | insert_final_newline = true
 8 | indent_style = space
 9 | indent_size = 4
10 | 
11 | [*.md]
12 | trim_trailing_whitespace = false
13 | 


--------------------------------------------------------------------------------
/.github/workflows/pre-commit.yaml:
--------------------------------------------------------------------------------
 1 | name: pre-commit
 2 | on: [push, pull_request]
 3 | 
 4 | concurrency:
 5 |   group: ${{ github.workflow }}-${{ github.ref }}
 6 |   cancel-in-progress: true
 7 | 
 8 | jobs:
 9 |   pre-commit:
10 |     runs-on: ubuntu-22.04
11 |     steps:
12 |       - uses: actions/checkout@v3
13 |       - name: Set up Python 3.8
14 |         uses: actions/setup-python@v4
15 |         with:
16 |           python-version: '3.8'
17 |       - name: Install pre-commit
18 |         run: |
19 |           pip install pre-commit
20 |           pre-commit install
21 |       - name: Run pre-commit
22 |         run: pre-commit run --all-files
23 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | default_language_version:
 2 |   python: python3
 3 | 
 4 | repos:
 5 |   - repo: https://github.com/pre-commit/pre-commit-hooks
 6 |     rev: v4.4.0
 7 |     hooks:
 8 |       - id: trailing-whitespace
 9 |       - id: check-ast
10 |       - id: check-merge-conflict
11 |       - id: check-yaml
12 |       - id: end-of-file-fixer
13 |       - id: trailing-whitespace
14 |         args: [--markdown-linebreak-ext=md]
15 | 
16 |   - repo: https://github.com/psf/black
17 |     rev: 23.3.0
18 |     hooks:
19 |       - id: black
20 |         language_version: python3.8
21 | 
22 |   - repo: https://github.com/pycqa/isort
23 |     rev: 5.12.0
24 |     hooks:
25 |       - id: isort
26 |         exclude: README.md
27 |         args: ["--profile", "black"]
28 | 
29 |   # temporarily disable static type checking
30 |   # - repo: https://github.com/pre-commit/mirrors-mypy
31 |   #   rev: v1.2.0
32 |   #   hooks:
33 |   #     - id: mypy
34 |   #       args: ["--ignore-missing-imports", "--scripts-are-modules", "--pretty"]
35 | 


--------------------------------------------------------------------------------
/.pylintrc:
--------------------------------------------------------------------------------
1 | disable=R,C
2 | 
3 | [TYPECHECK]
4 | # List of members which are set dynamically and missed by pylint inference
5 | # system, and so shouldn't trigger E1101 when accessed. Python regular
6 | # expressions are accepted.
7 | generated-members=numpy.*,torch.*,cv2.*
8 | 


--------------------------------------------------------------------------------
/assets/Interactive3d.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/interactive-3d/interactive3d/11519c0d8a489cf19efa1ec3b8fa0a058e7ed45a/assets/Interactive3d.jpg


--------------------------------------------------------------------------------
/assets/arc.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/interactive-3d/interactive3d/11519c0d8a489cf19efa1ec3b8fa0a058e7ed45a/assets/arc.png


--------------------------------------------------------------------------------
/assets/config.json:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/assets/interactive3d.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/interactive-3d/interactive3d/11519c0d8a489cf19efa1ec3b8fa0a058e7ed45a/assets/interactive3d.png


--------------------------------------------------------------------------------
/assets/results.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/interactive-3d/interactive3d/11519c0d8a489cf19efa1ec3b8fa0a058e7ed45a/assets/results.png


--------------------------------------------------------------------------------
/configs/control4d-static.yaml:
--------------------------------------------------------------------------------
  1 | name: "control4d-static"
  2 | tag: "${basename:${data.dataroot}}_${rmspace:${system.prompt_processor.prompt},_}"
  3 | exp_root_dir: "outputs"
  4 | seed: 0
  5 | 
  6 | data_type: "multiview-camera-datamodule"
  7 | data:
  8 |   train_downsample_resolution: 2
  9 |   eval_downsample_resolution: 2
 10 |   dataroot: ???
 11 | 
 12 | system_type: "control4d-multiview-system"
 13 | system:
 14 |   start_editing_step: 2000
 15 | 
 16 |   geometry_type: "implicit-volume"
 17 |   geometry:
 18 |     radius: 2.
 19 |     n_feature_dims: 11
 20 |     normal_type: analytic
 21 |     pos_encoding_config:
 22 |       otype: HashGrid
 23 |       n_levels: 16
 24 |       n_features_per_level: 2
 25 |       log2_hashmap_size: 19
 26 |       base_resolution: 16
 27 |       per_level_scale: 1.4472692374403782 # max resolution 4096
 28 |     density_bias: "blob_magic3d"
 29 |     density_activation: softplus
 30 |     density_blob_scale: 10.
 31 |     density_blob_std: 0.5
 32 |     isosurface_resolution: 128
 33 |     isosurface_threshold: auto
 34 |     isosurface_coarse_to_fine: true
 35 | 
 36 |   material_type: "hybrid-rgb-latent-material"
 37 |   material:
 38 |     n_output_dims: 11
 39 |     requires_normal: true
 40 | 
 41 |   background_type: "solid-color-background"
 42 |   background:
 43 |     n_output_dims: 11
 44 |     color: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
 45 | 
 46 |   renderer_type: "gan-volume-renderer"
 47 |   renderer:
 48 |     base_renderer_type: "nerf-volume-renderer"
 49 |     base_renderer:
 50 |       radius: ${system.geometry.radius}
 51 |       num_samples_per_ray: 512
 52 | 
 53 |   guidance_type: "stable-diffusion-controlnet-guidance"
 54 |   guidance:
 55 |     control_type: "normal"
 56 |     min_step_percent: 0.05
 57 |     max_step_percent: 0.8
 58 |     condition_scale: 1.0
 59 | 
 60 |   prompt_processor_type: "stable-diffusion-prompt-processor"
 61 |   prompt_processor:
 62 |     pretrained_model_name_or_path: "runwayml/stable-diffusion-v1-5"
 63 | 
 64 |   loggers:
 65 |     wandb:
 66 |       enable: false
 67 |       project: 'threestudio'
 68 | 
 69 |   loss:
 70 |     lambda_sds: 0.
 71 |     lambda_orient: [0, 10.0, 1000., 5000.0]
 72 |     lambda_sparsity: 1.0
 73 |     lambda_opaque: 1.0
 74 |     lambda_l1: 10.
 75 |     lambda_p: 10.
 76 |     lambda_kl: 0.000001
 77 |     lambda_G: 0.01
 78 |     lambda_D: 1.
 79 |   optimizer:
 80 |     name: Adam
 81 |     args:
 82 |       lr: 0.01
 83 |       betas: [0.9, 0.99]
 84 |       eps: 1.e-15
 85 |     params:
 86 |       geometry:
 87 |         lr: 0.01
 88 |       background:
 89 |         lr: 0.001
 90 |       renderer.generator:
 91 |         lr: 0.0001
 92 |       renderer.local_encoder:
 93 |         lr: 0.0001
 94 |       renderer.global_encoder:
 95 |         lr: 0.0001
 96 |     optimizer_dis:
 97 |       name: Adam
 98 |       args:
 99 |         lr: 0.01
100 |         betas: [0.9, 0.99]
101 |         eps: 1.e-15
102 |       params:
103 |         renderer.discriminator:
104 |           lr: 0.00001
105 | 
106 | trainer:
107 |   max_steps: 50000
108 |   log_every_n_steps: 1
109 |   num_sanity_val_steps: 0
110 |   val_check_interval: 200
111 |   enable_progress_bar: true
112 |   precision: 16-mixed
113 | 
114 | checkpoint:
115 |   save_last: true
116 |   save_top_k: -1
117 |   every_n_train_steps: ${trainer.max_steps}
118 | 


--------------------------------------------------------------------------------
/configs/dreamfusion-if.yaml:
--------------------------------------------------------------------------------
  1 | name: "dreamfusion-if"
  2 | tag: "${rmspace:${system.prompt_processor.prompt},_}"
  3 | exp_root_dir: "outputs"
  4 | seed: 0
  5 | 
  6 | data_type: "random-camera-datamodule"
  7 | data:
  8 |   batch_size: 1
  9 |   width: 64
 10 |   height: 64
 11 |   camera_distance_range: [1.5, 2.0]
 12 |   fovy_range: [40, 70]
 13 |   elevation_range: [-10, 90]
 14 |   light_sample_strategy: "dreamfusion"
 15 |   eval_camera_distance: 2.0
 16 |   eval_fovy_deg: 70.
 17 | 
 18 | system_type: "dreamfusion-system"
 19 | system:
 20 |   geometry_type: "implicit-volume"
 21 |   geometry:
 22 |     radius: 2.0
 23 |     normal_type: "analytic"
 24 | 
 25 |     # the density initialization proposed in the DreamFusion paper
 26 |     # does not work very well
 27 |     # density_bias: "blob_dreamfusion"
 28 |     # density_activation: exp
 29 |     # density_blob_scale: 5.
 30 |     # density_blob_std: 0.2
 31 | 
 32 |     # use Magic3D density initialization instead
 33 |     density_bias: "blob_magic3d"
 34 |     density_activation: softplus
 35 |     density_blob_scale: 10.
 36 |     density_blob_std: 0.5
 37 | 
 38 |     # coarse to fine hash grid encoding
 39 |     # to ensure smooth analytic normals
 40 |     pos_encoding_config:
 41 |       otype: ProgressiveBandHashGrid
 42 |       n_levels: 16
 43 |       n_features_per_level: 2
 44 |       log2_hashmap_size: 19
 45 |       base_resolution: 16
 46 |       per_level_scale: 1.447269237440378 # max resolution 4096
 47 |       start_level: 8 # resolution ~200
 48 |       start_step: 2000
 49 |       update_steps: 500
 50 | 
 51 |   material_type: "diffuse-with-point-light-material"
 52 |   material:
 53 |     ambient_only_steps: 2001
 54 |     albedo_activation: scale_-11_01
 55 | 
 56 |   background_type: "neural-environment-map-background"
 57 |   background:
 58 |     color_activation: scale_-11_01
 59 | 
 60 |   renderer_type: "nerf-volume-renderer"
 61 |   renderer:
 62 |     radius: ${system.geometry.radius}
 63 |     num_samples_per_ray: 512
 64 | 
 65 |   prompt_processor_type: "deep-floyd-prompt-processor"
 66 |   prompt_processor:
 67 |     pretrained_model_name_or_path: "/cpfs01/shared/Gveval3/gongkaixiong/dlh/.cache/huggingface/hub/models--DeepFloyd--IF-I-XL-v1.0/snapshots/c03d510e9b75bce9f9db5bb85148c1402ad7e694" # "DeepFloyd/IF-I-XL-v1.0"
 68 |     prompt: ???
 69 | 
 70 |   guidance_type: "deep-floyd-guidance"
 71 |   guidance:
 72 |     pretrained_model_name_or_path: "/cpfs01/shared/Gveval3/gongkaixiong/dlh/.cache/huggingface/hub/models--DeepFloyd--IF-I-XL-v1.0/snapshots/c03d510e9b75bce9f9db5bb85148c1402ad7e694" # "DeepFloyd/IF-I-XL-v1.0"
 73 |     guidance_scale: 20.
 74 |     weighting_strategy: sds
 75 |     min_step_percent: 0.02
 76 |     max_step_percent: 0.98
 77 | 
 78 |   loggers:
 79 |     wandb:
 80 |       enable: false
 81 |       project: 'threestudio'
 82 |       name: None
 83 | 
 84 |   loss:
 85 |     lambda_sds: 1.
 86 |     lambda_orient: [0, 10., 1000., 5000]
 87 |     lambda_sparsity: 1.
 88 |     lambda_opaque: 0.0
 89 |   optimizer:
 90 |     name: Adam
 91 |     args:
 92 |       lr: 0.01
 93 |       betas: [0.9, 0.99]
 94 |       eps: 1.e-15
 95 |     params:
 96 |       geometry:
 97 |         lr: 0.01
 98 |       background:
 99 |         lr: 0.001
100 | 
101 | trainer:
102 |   max_steps: 10000
103 |   log_every_n_steps: 1
104 |   num_sanity_val_steps: 0
105 |   val_check_interval: 200
106 |   enable_progress_bar: true
107 |   precision: 16-mixed
108 | 
109 | checkpoint:
110 |   save_last: true # save at each validation time
111 |   save_top_k: -1
112 |   every_n_train_steps: ${trainer.max_steps}
113 | 


--------------------------------------------------------------------------------
/configs/dreamfusion-sd.yaml:
--------------------------------------------------------------------------------
  1 | name: "dreamfusion-sd"
  2 | tag: "${rmspace:${system.prompt_processor.prompt},_}"
  3 | exp_root_dir: "outputs"
  4 | seed: 0
  5 | 
  6 | data_type: "random-camera-datamodule"
  7 | data:
  8 |   batch_size: 1
  9 |   width: 64
 10 |   height: 64
 11 |   camera_distance_range: [1.5, 2.0]
 12 |   fovy_range: [40, 70]
 13 |   elevation_range: [-10, 45]
 14 |   light_sample_strategy: "dreamfusion"
 15 |   eval_camera_distance: 2.0
 16 |   eval_fovy_deg: 70.
 17 | 
 18 | system_type: "dreamfusion-system"
 19 | system:
 20 |   geometry_type: "implicit-volume"
 21 |   geometry:
 22 |     radius: 2.0
 23 |     normal_type: "analytic"
 24 | 
 25 |     # the density initialization proposed in the DreamFusion paper
 26 |     # does not work very well
 27 |     # density_bias: "blob_dreamfusion"
 28 |     # density_activation: exp
 29 |     # density_blob_scale: 5.
 30 |     # density_blob_std: 0.2
 31 | 
 32 |     # use Magic3D density initialization instead
 33 |     density_bias: "blob_magic3d"
 34 |     density_activation: softplus
 35 |     density_blob_scale: 10.
 36 |     density_blob_std: 0.5
 37 | 
 38 |     # coarse to fine hash grid encoding
 39 |     # to ensure smooth analytic normals
 40 |     pos_encoding_config:
 41 |       otype: ProgressiveBandHashGrid
 42 |       n_levels: 16
 43 |       n_features_per_level: 2
 44 |       log2_hashmap_size: 19
 45 |       base_resolution: 16
 46 |       per_level_scale: 1.447269237440378 # max resolution 4096
 47 |       start_level: 8 # resolution ~200
 48 |       start_step: 2000
 49 |       update_steps: 500
 50 | 
 51 |   material_type: "diffuse-with-point-light-material"
 52 |   material:
 53 |     ambient_only_steps: 2001
 54 |     albedo_activation: sigmoid
 55 | 
 56 |   background_type: "neural-environment-map-background"
 57 |   background:
 58 |     color_activation: sigmoid
 59 | 
 60 |   renderer_type: "nerf-volume-renderer"
 61 |   renderer:
 62 |     radius: ${system.geometry.radius}
 63 |     num_samples_per_ray: 512
 64 | 
 65 |   prompt_processor_type: "stable-diffusion-prompt-processor"
 66 |   prompt_processor:
 67 |     pretrained_model_name_or_path: "/cpfs01/shared/Gveval3/gongkaixiong/dlh/sd_weights/sd-2.1-base"  # "stabilityai/stable-diffusion-2-1-base"
 68 |     prompt: ???
 69 | 
 70 |   guidance_type: "stable-diffusion-guidance"
 71 |   guidance:
 72 |     pretrained_model_name_or_path: "/cpfs01/shared/Gveval3/gongkaixiong/dlh/sd_weights/sd-2.1-base" # "stabilityai/stable-diffusion-2-1-base"
 73 |     guidance_scale: 100.
 74 |     weighting_strategy: sds
 75 |     min_step_percent: 0.02
 76 |     max_step_percent: 0.98
 77 | 
 78 |   loggers:
 79 |     wandb:
 80 |       enable: false
 81 |       project: "threestudio"
 82 |       name: None
 83 | 
 84 |   loss:
 85 |     lambda_sds: 1.
 86 |     lambda_orient: [0, 10., 1000., 5000]
 87 |     lambda_sparsity: 1.
 88 |     lambda_opaque: 0.
 89 |   optimizer:
 90 |     name: Adam
 91 |     args:
 92 |       lr: 0.01
 93 |       betas: [0.9, 0.99]
 94 |       eps: 1.e-15
 95 |     params:
 96 |       geometry:
 97 |         lr: 0.01
 98 |       background:
 99 |         lr: 0.001
100 | 
101 | trainer:
102 |   max_steps: 10000
103 |   log_every_n_steps: 1
104 |   num_sanity_val_steps: 0
105 |   val_check_interval: 200
106 |   enable_progress_bar: true
107 |   precision: 16-mixed
108 | 
109 | checkpoint:
110 |   save_last: true # save at each validation time
111 |   save_top_k: -1
112 |   every_n_train_steps: ${trainer.max_steps}
113 | 


--------------------------------------------------------------------------------
/configs/experimental/co3d-imagecondition.yaml:
--------------------------------------------------------------------------------
  1 | name: "co3d-imagecondition"
  2 | tag: "${rmspace:${system.prompt_processor.prompt},_}"
  3 | exp_root_dir: "outputs"
  4 | seed: 0
  5 | 
  6 | data_type: "co3d-datamodule"
  7 | data:
  8 |   root_dir: ???
  9 |   height: 256
 10 |   width: 256
 11 |   scale_radius: 3.0
 12 |   load_preprocessed: false
 13 |   cam_scale_factor: 0.95 # inherited from plenoxels
 14 |   max_num_frames: 300 # use less frames for debugging
 15 |   v2_mode: true
 16 |   use_mask: true
 17 |   box_crop: true
 18 |   box_crop_mask_thr: 0.4
 19 |   box_crop_context: 0.1 # The amount of additional padding added to each dimention of the cropping bounding box, relative to vox size.
 20 |   train_num_rays: 4096
 21 |   train_split: "train"
 22 |   val_split: "val"
 23 |   test_split: "test"
 24 |   render_path: "circle"
 25 |   train_views: [0, 50, 100]
 26 |   random_camera:
 27 |     eval_height: 256
 28 |     eval_width: 256
 29 |     eval_elevation_deg: 0.
 30 |     eval_camera_distance: 1.2
 31 |     eval_fovy_deg: 60.
 32 | 
 33 | system_type: "image-condition-dreamfusion-system"
 34 | system:
 35 |   geometry_type: "implicit-volume"
 36 |   geometry:
 37 |     isosurface_method: "mc-cpu"
 38 |     isosurface_resolution: 128
 39 |     isosurface_threshold: 0.0
 40 |     normal_type: "finite_difference"
 41 |     finite_difference_normal_eps: 0.004
 42 |     n_feature_dims: 32
 43 |     mlp_network_config:
 44 |       otype: "VanillaMLP"
 45 |       activation: "ReLU"
 46 |       output_activation: "none"
 47 |       n_neurons: 64
 48 |       n_hidden_layers: 2
 49 | 
 50 |   material_type: "diffuse-with-point-light-material"
 51 |   material:
 52 |     diffuse_prob: 1.0
 53 |     textureless_prob: 0.2
 54 |     ambient_light_color: [1.0, 1.0, 1.0]
 55 |     diffuse_light_color: [0.0, 0.0, 0.0]
 56 |     ambient_only_steps: ${system.freq.ref_only_steps}
 57 | 
 58 |   background_type: "neural-environment-map-background"
 59 |   background:
 60 |     dir_encoding_config:
 61 |       otype: ProgressiveBandFrequency
 62 |       n_frequencies: 6
 63 |     mlp_network_config:
 64 |       otype: VanillaMLP
 65 |       n_neurons: 32
 66 |       n_hidden_layers: 1
 67 |       activation: "ReLU"
 68 | 
 69 |   renderer_type: "nerf-volume-renderer"
 70 |   renderer:
 71 |     num_samples_per_ray: 512
 72 | 
 73 |   prompt_processor_type: "stable-diffusion-prompt-processor"
 74 |   prompt_processor:
 75 |     pretrained_model_name_or_path: "runwayml/stable-diffusion-v1-5"
 76 |     prompt: ???
 77 | 
 78 |   guidance_type: "stable-diffusion-guidance"
 79 |   guidance:
 80 |     pretrained_model_name_or_path: "runwayml/stable-diffusion-v1-5"
 81 |     guidance_scale: 100.
 82 |     weighting_strategy: sds
 83 | 
 84 |   freq:
 85 |     n_ref: 2
 86 |     ref_only_steps: 1000
 87 | 
 88 |   loggers:
 89 |     wandb:
 90 |       enable: false
 91 |       project: 'threestudio'
 92 |       name: None
 93 | 
 94 |   loss:
 95 |     lambda_sds: 0.1
 96 |     lambda_rgb: 10.
 97 |     lambda_mask: 1.
 98 |     lambda_depth: 0.
 99 |     # lambda_depth: [0.0, 0.0, 1.0, 10000]
100 |     lambda_normal_smooth: 0.0
101 |     lambda_orient: 1.0
102 |     # lambda_orient: [1000, 0.0, 10, 6000]
103 |     lambda_sparsity: 0.0
104 |     lambda_opaque: 0.01
105 |   optimizer:
106 |     name: Adan
107 |     args:
108 |       eps: 1.0e-8
109 |       weight_decay: 2.0e-5
110 |       max_grad_norm: 5.0
111 |       foreach: False
112 |     params:
113 |       geometry.encoding:
114 |         lr: 0.05
115 |       geometry.network:
116 |         lr: 0.005
117 |       background.network:
118 |         lr: 0.005
119 | 
120 | trainer:
121 |   max_steps: 10000
122 |   log_every_n_steps: 1
123 |   num_sanity_val_steps: 0
124 |   val_check_interval: 500
125 |   limit_val_batches: 6
126 |   enable_progress_bar: true
127 |   precision: 16-mixed
128 | 
129 | checkpoint:
130 |   save_last: true # save at each validation time
131 |   save_top_k: -1
132 |   every_n_train_steps: ${trainer.max_steps}
133 | 


--------------------------------------------------------------------------------
/configs/fantasia3d-texture.yaml:
--------------------------------------------------------------------------------
  1 | name: "fantasia3d-texture"
  2 | tag: "${rmspace:${system.prompt_processor.prompt},_}"
  3 | exp_root_dir: "outputs"
  4 | seed: 0
  5 | 
  6 | data_type: "random-camera-datamodule"
  7 | data:
  8 |   batch_size: 1
  9 |   width: 512
 10 |   height: 512
 11 |   camera_distance_range: [3, 3]
 12 |   fovy_range: [25, 45]
 13 |   camera_perturb: 0.
 14 |   center_perturb: 0.
 15 |   up_perturb: 0.
 16 |   elevation_range: [-10, 45]
 17 |   azimuth_range: [-180, 180]
 18 |   batch_uniform_azimuth: true
 19 |   eval_camera_distance: 3.
 20 |   eval_fovy_deg: 45.
 21 | 
 22 | system_type: "fantasia3d-system"
 23 | system:
 24 |   # do texture training
 25 |   texture: true
 26 |   geometry_convert_from: ???
 27 |   geometry_convert_inherit_texture: false
 28 |   geometry_type: "tetrahedra-sdf-grid"
 29 |   geometry:
 30 |     radius: 1.0 # consistent with coarse
 31 |     isosurface_resolution: 128
 32 |     isosurface_deformable_grid: true
 33 |     pos_encoding_config:
 34 |       otype: HashGrid
 35 |       n_levels: 16
 36 |       n_features_per_level: 2
 37 |       log2_hashmap_size: 19
 38 |       base_resolution: 16
 39 |       per_level_scale: 1.4472692374403782 # max resolution 4096
 40 |     n_feature_dims: 8 # albedo3 + roughness1 + metallic1 + bump3
 41 |     fix_geometry: true
 42 | 
 43 |   material_type: "pbr-material"
 44 |   material:
 45 |     material_activation: sigmoid
 46 |     environment_texture: "load/lights/mud_road_puresky_1k.hdr"
 47 |     environment_scale: 2.0
 48 |     min_metallic: 0.0
 49 |     max_metallic: 0.9
 50 |     min_roughness: 0.08
 51 |     max_roughness: 0.9
 52 |     use_bump: true
 53 | 
 54 |   background_type: "solid-color-background"
 55 | 
 56 |   renderer_type: "nvdiff-rasterizer"
 57 |   renderer:
 58 |     context_type: cuda
 59 | 
 60 |   prompt_processor_type: "stable-diffusion-prompt-processor"
 61 |   prompt_processor:
 62 |     pretrained_model_name_or_path: "/nvme/lihe/workspace/weights/sd_weights/sd-2.1-base" # "stabilityai/stable-diffusion-2-1-base"
 63 |     prompt: ???
 64 | 
 65 |   guidance_type: "stable-diffusion-guidance"
 66 |   guidance:
 67 |     pretrained_model_name_or_path: "/nvme/lihe/workspace/weights/sd_weights/sd-2.1-base" # "stabilityai/stable-diffusion-2-1-base"
 68 |     guidance_scale: 100
 69 |     weighting_strategy: sds
 70 |     min_step_percent: 0.02
 71 |     max_step_percent: 0.50
 72 | 
 73 |   loggers:
 74 |     wandb:
 75 |       enable: false
 76 |       project: "threestudio"
 77 | 
 78 |   loss:
 79 |     lambda_sds: 1.
 80 |     lambda_normal_consistency: 0.
 81 | 
 82 |   optimizer:
 83 |     name: AdamW
 84 |     args:
 85 |       lr: 0.01
 86 |       betas: [0.9, 0.99]
 87 |       eps: 1.e-15
 88 | 
 89 | trainer:
 90 |   max_steps: 5000
 91 |   log_every_n_steps: 1
 92 |   num_sanity_val_steps: 1
 93 |   val_check_interval: 500
 94 |   enable_progress_bar: true
 95 |   precision: 16-mixed
 96 | 
 97 | checkpoint:
 98 |   save_last: true # save at each validation time
 99 |   save_top_k: -1
100 |   every_n_train_steps: ${trainer.max_steps}
101 | 


--------------------------------------------------------------------------------
/configs/fantasia3d.yaml:
--------------------------------------------------------------------------------
  1 | name: "fantasia3d"
  2 | tag: "${rmspace:${system.prompt_processor.prompt},_}"
  3 | exp_root_dir: "outputs"
  4 | seed: 0
  5 | 
  6 | data_type: "random-camera-datamodule"
  7 | data:
  8 |   batch_size: 1
  9 |   width: 512
 10 |   height: 512
 11 |   camera_distance_range: [3, 3]
 12 |   fovy_range: [25, 45]
 13 |   camera_perturb: 0.
 14 |   center_perturb: 0.
 15 |   up_perturb: 0.
 16 |   elevation_range: [-10, 45]
 17 |   azimuth_range: [-180, 180]
 18 |   batch_uniform_azimuth: true
 19 |   eval_camera_distance: 3.
 20 |   eval_fovy_deg: 45.
 21 | 
 22 | system_type: "fantasia3d-system"
 23 | system:
 24 |   latent_steps: 1000
 25 |   geometry_type: "implicit-sdf"
 26 |   geometry:
 27 |     radius: 1.0
 28 |     n_feature_dims: 0
 29 |     isosurface_resolution: 128
 30 |     isosurface_deformable_grid: true
 31 |     isosurface_coarse_to_fine: false
 32 | 
 33 |     # initialize SDF by optimization
 34 |     shape_init: sphere
 35 |     shape_init_params: 0.5
 36 | 
 37 |     # or you can initialize SDF using a guide mesh
 38 |     # shape_init: mesh:load/shapes/human.obj
 39 |     # shape_init_params: 0.9
 40 |     # shape_init_mesh_up: +y
 41 |     # shape_init_mesh_front: +z
 42 | 
 43 |     # an alternative initialization implementation:
 44 |     # you can initialize SDF to sphere/ellipsoid by adding a bias value
 45 |     # which leads to more smooth initialized shape
 46 |     # sdf_bias: sphere
 47 |     # sdf_bias_params: 0.5
 48 |     # DO NOT use the two initialization methods together
 49 | 
 50 |   material_type: "no-material" # unused
 51 |   material:
 52 |     n_output_dims: 0
 53 | 
 54 |   background_type: "solid-color-background" # unused
 55 | 
 56 |   renderer_type: "nvdiff-rasterizer"
 57 |   renderer:
 58 |     context_type: cuda
 59 | 
 60 |   prompt_processor_type: "stable-diffusion-prompt-processor"
 61 |   prompt_processor:
 62 |     pretrained_model_name_or_path: "/nvme/lihe/workspace/weights/sd_weights/sd-2.1-base" # "stabilityai/stable-diffusion-2-1-base"
 63 |     prompt: ???
 64 | 
 65 |   guidance_type: "stable-diffusion-guidance"
 66 |   guidance:
 67 |     pretrained_model_name_or_path: "/nvme/lihe/workspace/weights/sd_weights/sd-2.1-base" # "stabilityai/stable-diffusion-2-1-base"
 68 |     guidance_scale: 100.
 69 |     max_step_percent: 0.5
 70 |     weighting_strategy: fantasia3d
 71 | 
 72 |   loggers:
 73 |     wandb:
 74 |       enable: false
 75 |       project: 'threestudio'
 76 |       name: None
 77 | 
 78 |   loss:
 79 |     lambda_sds: 1.
 80 |     lambda_normal_consistency: 0.
 81 | 
 82 |   optimizer:
 83 |     name: AdamW
 84 |     args:
 85 |       lr: 0.001
 86 |       betas: [0.9, 0.99]
 87 |       eps: 1.e-15
 88 | 
 89 | trainer:
 90 |   max_steps: 10000
 91 |   log_every_n_steps: 1
 92 |   num_sanity_val_steps: 1
 93 |   val_check_interval: 500
 94 |   enable_progress_bar: true
 95 |   precision: 16-mixed
 96 | 
 97 | checkpoint:
 98 |   save_last: true # save at each validation time
 99 |   save_top_k: -1
100 |   every_n_train_steps: ${trainer.max_steps}
101 | 


--------------------------------------------------------------------------------
/configs/gaussian_splatting.yaml:
--------------------------------------------------------------------------------
  1 | name: "gs-sd"
  2 | tag: "${rmspace:${system.prompt_processor.prompt},_}"
  3 | exp_root_dir: "outputs_gs"
  4 | seed: 0
  5 | 
  6 | data_type: "random-camera-datamodule"
  7 | data:
  8 |   batch_size: 4
  9 |   width: 512
 10 |   height: 512
 11 |   camera_distance_range: [2.5, 2.5]
 12 |   fovy_range: [60, 70]
 13 |   elevation_range: [0, 30] # [-20, 90]
 14 |   light_sample_strategy: "dreamfusion"
 15 |   eval_camera_distance: 2.5
 16 |   eval_fovy_deg: 70
 17 |   near_far: [0.01, 100]
 18 | 
 19 | system_type: "gaussian-splatting-system"
 20 | system:
 21 |   invert_bg_prob: 0.0
 22 | 
 23 |   geometry_type: "gaussian"
 24 |   geometry:
 25 |     position_lr_init: 0.005
 26 |     position_lr_final: 0.00003
 27 |     position_lr_delay_mult: 0.02
 28 |     position_lr_max_steps: ${trainer.max_steps}
 29 |     scale_lr_init: 0.003
 30 |     scale_lr_final: 0.001
 31 |     scale_lr_max_steps: ${trainer.max_steps}
 32 |     feature_lr: 0.01
 33 |     opacity_lr: 0.003
 34 |     scaling_lr: 0.003
 35 |     rotation_lr: 0.003
 36 |     densification_interval: 1000
 37 |     prune_interval: 500
 38 |     opacity_reset_interval: 100000
 39 |     densify_from_iter: 1000
 40 |     densify_until_iter: 10000
 41 |     prune_from_iter: 500
 42 |     prune_until_iter: ${trainer.max_steps}
 43 |     # prune_until_iter: 0
 44 |     densify_grad_threshold: 0.02
 45 |     min_opac_prune: 0.05
 46 |     split_thresh: 0.02 
 47 |     radii2d_thresh: 1000
 48 |     init_num_pts: 4096
 49 |     pc_init_radius: 0.8
 50 |     opacity_init: 0.8
 51 |     scales_init: 0.02 # 0.04 # ? 0.02
 52 |     # mesh init
 53 |     init: true
 54 |     type: mesh
 55 |     mesh: debug_data/sample_128.ply
 56 |     rotate_xy: true
 57 |     flip_z: true
 58 |     flip_x: true
 59 |     prompt: a human face
 60 |     num_points: 4096
 61 |     mean_std: 0.8
 62 |     svec_val: 0.02
 63 |     alpha_val: 0.8
 64 |     random_color: true
 65 |     facex: true
 66 | 
 67 |   renderer_type: "diff-gaussian-rasterizer"
 68 |   renderer:
 69 |     debug: false
 70 | 
 71 |   material_type: "no-material" # unused
 72 |   material:
 73 |     n_output_dims: 0
 74 | 
 75 |   background_type: "solid-color-background" # unused
 76 | 
 77 |   prompt_processor_type: "stable-diffusion-prompt-processor"
 78 |   prompt_processor:
 79 |     pretrained_model_name_or_path: "/cpfs01/shared/Gveval3/gongkaixiong/dlh/sd_weights/sd-2.1-base" # "stabilityai/stable-diffusion-2-1-base"
 80 |     prompt: ???
 81 | 
 82 |   guidance_type: "stable-diffusion-guidance"
 83 |   guidance:
 84 |     pretrained_model_name_or_path: "/cpfs01/shared/Gveval3/gongkaixiong/dlh/sd_weights/sd-2.1-base" # "stabilityai/stable-diffusion-2-1-base"
 85 |     guidance_scale: 100.0
 86 |     weighting_strategy: sds
 87 |     min_step_percent: 0.02
 88 |     max_step_percent: [2000, 0.98, 0.5, 2001]
 89 | 
 90 |   loggers:
 91 |     wandb:
 92 |       enable: false
 93 |       project: 'threestudio'
 94 |       name: None
 95 | 
 96 |   loss:
 97 |     lambda_sds: 0.1
 98 |     lambda_position: 0.0
 99 |     lambda_opacity: 0.0
100 | 
101 | trainer:
102 |   max_steps: 15000
103 |   log_every_n_steps: 1
104 |   num_sanity_val_steps: 0
105 |   val_check_interval: 100
106 |   enable_progress_bar: true
107 |   precision: 32-true
108 | 
109 | checkpoint:
110 |   save_last: true # save at each validation time
111 |   save_top_k: -1
112 |   every_n_train_steps: ${trainer.max_steps}


--------------------------------------------------------------------------------
/configs/gradio/dreamfusion-if.yaml:
--------------------------------------------------------------------------------
  1 | name: "dreamfusion-if"
  2 | tag: "${rmspace:${system.prompt_processor.prompt},_}"
  3 | exp_root_dir: "outputs-gradio"
  4 | seed: 0
  5 | 
  6 | data_type: "random-camera-datamodule"
  7 | data:
  8 |   batch_size: 1
  9 |   width: 64
 10 |   height: 64
 11 |   camera_distance_range: [1.5, 2.0]
 12 |   fovy_range: [40, 70]
 13 |   elevation_range: [-10, 90]
 14 |   light_sample_strategy: "dreamfusion"
 15 |   eval_camera_distance: 2.0
 16 |   eval_fovy_deg: 70.
 17 | 
 18 | system_type: "dreamfusion-system"
 19 | system:
 20 |   geometry_type: "implicit-volume"
 21 |   geometry:
 22 |     radius: 2.0
 23 |     normal_type: "analytic"
 24 | 
 25 |     # the density initialization proposed in the DreamFusion paper
 26 |     # does not work very well
 27 |     # density_bias: "blob_dreamfusion"
 28 |     # density_activation: exp
 29 |     # density_blob_scale: 5.
 30 |     # density_blob_std: 0.2
 31 | 
 32 |     # use Magic3D density initialization instead
 33 |     density_bias: "blob_magic3d"
 34 |     density_activation: softplus
 35 |     density_blob_scale: 10.
 36 |     density_blob_std: 0.5
 37 | 
 38 |     # coarse to fine hash grid encoding
 39 |     # to ensure smooth analytic normals
 40 |     pos_encoding_config:
 41 |       otype: ProgressiveBandHashGrid
 42 |       n_levels: 16
 43 |       n_features_per_level: 2
 44 |       log2_hashmap_size: 19
 45 |       base_resolution: 16
 46 |       per_level_scale: 1.381912879967776 # max resolution 2048
 47 |       start_level: 10 # resolution ~300
 48 |       start_step: 2000
 49 |       update_steps: 400
 50 | 
 51 |   material_type: "diffuse-with-point-light-material"
 52 |   material:
 53 |     ambient_only_steps: 2001
 54 |     albedo_activation: scale_-11_01
 55 | 
 56 |   background_type: "neural-environment-map-background"
 57 |   background:
 58 |     color_activation: scale_-11_01
 59 |     random_aug: true
 60 | 
 61 |   renderer_type: "nerf-volume-renderer"
 62 |   renderer:
 63 |     radius: ${system.geometry.radius}
 64 |     num_samples_per_ray: 512
 65 | 
 66 |   prompt_processor_type: "deep-floyd-prompt-processor"
 67 |   prompt_processor:
 68 |     pretrained_model_name_or_path: "DeepFloyd/IF-I-XL-v1.0"
 69 |     prompt: ???
 70 | 
 71 |   guidance_type: "deep-floyd-guidance"
 72 |   guidance:
 73 |     pretrained_model_name_or_path: "DeepFloyd/IF-I-XL-v1.0"
 74 |     guidance_scale: 20.
 75 |     weighting_strategy: sds
 76 |     min_step_percent: 0.02
 77 |     max_step_percent: 0.98
 78 | 
 79 |   exporter_type: "mesh-exporter"
 80 |   exporter:
 81 |     fmt: obj
 82 |     save_uv: false
 83 |     context_type: cuda
 84 | 
 85 |   loggers:
 86 |     wandb:
 87 |       enable: false
 88 |       project: "threestudio"
 89 |       name: None
 90 | 
 91 |   loss:
 92 |     lambda_sds: 1.
 93 |     lambda_orient: [0, 10., 1000., 5000]
 94 |     lambda_sparsity: 1.
 95 |     lambda_opaque: 0.0
 96 |   optimizer:
 97 |     name: Adam
 98 |     args:
 99 |       lr: 0.01
100 |       betas: [0.9, 0.99]
101 |       eps: 1.e-15
102 |     params:
103 |       geometry:
104 |         lr: 0.01
105 |       background:
106 |         lr: 0.001
107 | 
108 | trainer:
109 |   max_steps: 5000
110 |   log_every_n_steps: 1
111 |   num_sanity_val_steps: 0
112 |   val_check_interval: 100
113 |   enable_progress_bar: true
114 |   precision: 16-mixed
115 | 
116 | checkpoint:
117 |   save_last: false
118 |   save_top_k: -1
119 |   every_n_train_steps: 0 # do not save checkpoints during training
120 | 


--------------------------------------------------------------------------------
/configs/gradio/dreamfusion-sd.yaml:
--------------------------------------------------------------------------------
  1 | name: "dreamfusion-sd"
  2 | tag: "${rmspace:${system.prompt_processor.prompt},_}"
  3 | exp_root_dir: "outputs-gradio"
  4 | seed: 0
  5 | 
  6 | data_type: "random-camera-datamodule"
  7 | data:
  8 |   batch_size: 1
  9 |   width: 64
 10 |   height: 64
 11 |   camera_distance_range: [1.5, 2.0]
 12 |   fovy_range: [40, 70]
 13 |   elevation_range: [-10, 45]
 14 |   light_sample_strategy: "dreamfusion"
 15 |   eval_camera_distance: 2.0
 16 |   eval_fovy_deg: 70.
 17 | 
 18 | system_type: "dreamfusion-system"
 19 | system:
 20 |   geometry_type: "implicit-volume"
 21 |   geometry:
 22 |     radius: 2.0
 23 |     normal_type: "analytic"
 24 | 
 25 |     # the density initialization proposed in the DreamFusion paper
 26 |     # does not work very well
 27 |     # density_bias: "blob_dreamfusion"
 28 |     # density_activation: exp
 29 |     # density_blob_scale: 5.
 30 |     # density_blob_std: 0.2
 31 | 
 32 |     # use Magic3D density initialization instead
 33 |     density_bias: "blob_magic3d"
 34 |     density_activation: softplus
 35 |     density_blob_scale: 10.
 36 |     density_blob_std: 0.5
 37 | 
 38 |     # coarse to fine hash grid encoding
 39 |     # to ensure smooth analytic normals
 40 |     pos_encoding_config:
 41 |       otype: ProgressiveBandHashGrid
 42 |       n_levels: 16
 43 |       n_features_per_level: 2
 44 |       log2_hashmap_size: 19
 45 |       base_resolution: 16
 46 |       per_level_scale: 1.381912879967776 # max resolution 2048
 47 |       start_level: 10 # resolution ~300
 48 |       start_step: 2000
 49 |       update_steps: 400
 50 | 
 51 |   material_type: "diffuse-with-point-light-material"
 52 |   material:
 53 |     ambient_only_steps: 2001
 54 |     albedo_activation: sigmoid
 55 | 
 56 |   background_type: "neural-environment-map-background"
 57 |   background:
 58 |     color_activation: sigmoid
 59 |     random_aug: true
 60 | 
 61 |   renderer_type: "nerf-volume-renderer"
 62 |   renderer:
 63 |     radius: ${system.geometry.radius}
 64 |     num_samples_per_ray: 512
 65 | 
 66 |   prompt_processor_type: "stable-diffusion-prompt-processor"
 67 |   prompt_processor:
 68 |     pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base"
 69 |     prompt: ???
 70 | 
 71 |   guidance_type: "stable-diffusion-guidance"
 72 |   guidance:
 73 |     pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base"
 74 |     guidance_scale: 100.
 75 |     weighting_strategy: sds
 76 |     min_step_percent: 0.02
 77 |     max_step_percent: 0.98
 78 |     grad_clip: [0, 0.5, 2.0, 5000]
 79 | 
 80 |   exporter_type: "mesh-exporter"
 81 |   exporter:
 82 |     fmt: obj
 83 |     save_uv: false
 84 |     context_type: cuda
 85 | 
 86 |   loggers:
 87 |     wandb:
 88 |       enable: false
 89 |       project: "threestudio"
 90 |       name: None
 91 | 
 92 |   loss:
 93 |     lambda_sds: 1.
 94 |     lambda_orient: [0, 10., 1000., 5000]
 95 |     lambda_sparsity: 1.
 96 |     lambda_opaque: 0.
 97 |   optimizer:
 98 |     name: Adam
 99 |     args:
100 |       lr: 0.01
101 |       betas: [0.9, 0.99]
102 |       eps: 1.e-15
103 |     params:
104 |       geometry:
105 |         lr: 0.01
106 |       background:
107 |         lr: 0.001
108 | 
109 | trainer:
110 |   max_steps: 5000
111 |   log_every_n_steps: 1
112 |   num_sanity_val_steps: 0
113 |   val_check_interval: 100
114 |   enable_progress_bar: true
115 |   precision: 16-mixed
116 | 
117 | checkpoint:
118 |   save_last: false
119 |   save_top_k: -1
120 |   every_n_train_steps: 0 # do not save checkpoints during training
121 | 


--------------------------------------------------------------------------------
/configs/gradio/fantasia3d.yaml:
--------------------------------------------------------------------------------
  1 | name: "fantasia3d"
  2 | tag: "${rmspace:${system.prompt_processor.prompt},_}"
  3 | exp_root_dir: "outputs-gradio"
  4 | seed: 0
  5 | 
  6 | data_type: "random-camera-datamodule"
  7 | data:
  8 |   batch_size: 1
  9 |   width: 512
 10 |   height: 512
 11 |   camera_distance_range: [3, 3]
 12 |   fovy_range: [25, 45]
 13 |   camera_perturb: 0.
 14 |   center_perturb: 0.
 15 |   up_perturb: 0.
 16 |   elevation_range: [-10, 45]
 17 |   azimuth_range: [-180, 180]
 18 |   batch_uniform_azimuth: true
 19 |   eval_camera_distance: 3.
 20 |   eval_fovy_deg: 45.
 21 | 
 22 | system_type: "fantasia3d-system"
 23 | system:
 24 |   latent_steps: 1000
 25 |   geometry_type: "implicit-sdf"
 26 |   geometry:
 27 |     radius: 1.0
 28 |     n_feature_dims: 0
 29 |     isosurface_resolution: 128
 30 |     isosurface_deformable_grid: true
 31 |     isosurface_coarse_to_fine: false
 32 | 
 33 |     # initialize SDF by optimization
 34 |     shape_init: sphere
 35 |     shape_init_params: 0.5
 36 | 
 37 |     # or you can initialize SDF using a guide mesh
 38 |     # shape_init: mesh:load/shapes/human.obj
 39 |     # shape_init_params: 0.9
 40 |     # shape_init_mesh_up: +y
 41 |     # shape_init_mesh_front: +z
 42 | 
 43 |     # an alternative initialization implementation:
 44 |     # you can initialize SDF to sphere/ellipsoid by adding a bias value
 45 |     # which leads to more smooth initialized shape
 46 |     # sdf_bias: sphere
 47 |     # sdf_bias_params: 0.5
 48 |     # DO NOT use the two initialization methods together
 49 | 
 50 |   material_type: "no-material" # unused
 51 |   material:
 52 |     n_output_dims: 0
 53 | 
 54 |   background_type: "solid-color-background" # unused
 55 | 
 56 |   renderer_type: "nvdiff-rasterizer"
 57 |   renderer:
 58 |     context_type: cuda
 59 | 
 60 |   prompt_processor_type: "stable-diffusion-prompt-processor"
 61 |   prompt_processor:
 62 |     pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base"
 63 |     prompt: ???
 64 | 
 65 |   guidance_type: "stable-diffusion-guidance"
 66 |   guidance:
 67 |     pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base"
 68 |     guidance_scale: 100.
 69 |     max_step_percent: 0.5
 70 |     weighting_strategy: fantasia3d
 71 | 
 72 |   exporter_type: "mesh-exporter"
 73 |   exporter:
 74 |     fmt: obj
 75 |     save_uv: false
 76 |     save_texture: false
 77 |     context_type: cuda
 78 | 
 79 |   loggers:
 80 |     wandb:
 81 |       enable: false
 82 |       project: "threestudio"
 83 |       name: None
 84 | 
 85 |   loss:
 86 |     lambda_sds: 1.
 87 |     lambda_normal_consistency: 0.
 88 | 
 89 |   optimizer:
 90 |     name: AdamW
 91 |     args:
 92 |       lr: 0.001
 93 |       betas: [0.9, 0.99]
 94 |       eps: 1.e-15
 95 | 
 96 | trainer:
 97 |   max_steps: 5000
 98 |   log_every_n_steps: 1
 99 |   num_sanity_val_steps: 1
100 |   val_check_interval: 200
101 |   enable_progress_bar: true
102 |   precision: 16-mixed
103 | 
104 | checkpoint:
105 |   save_last: false
106 |   save_top_k: -1
107 |   every_n_train_steps: 0 # do not save checkpoints during training
108 | 


--------------------------------------------------------------------------------
/configs/gradio/latentnerf.yaml:
--------------------------------------------------------------------------------
  1 | name: "latentnerf"
  2 | tag: "${rmspace:${system.prompt_processor.prompt},_}"
  3 | exp_root_dir: "outputs-gradio"
  4 | seed: 0
  5 | 
  6 | data_type: "random-camera-datamodule"
  7 | data:
  8 |   elevation_range: [-10, 45]
  9 | 
 10 | system_type: "latentnerf-system"
 11 | system:
 12 |   geometry_type: "implicit-volume"
 13 |   geometry:
 14 |     n_feature_dims: 4
 15 |     normal_type: null
 16 | 
 17 |     density_bias: "blob_dreamfusion"
 18 |     density_activation: trunc_exp
 19 |     density_blob_scale: 5.
 20 |     density_blob_std: 0.2
 21 | 
 22 |     pos_encoding_config:
 23 |       otype: HashGrid
 24 |       n_levels: 16
 25 |       n_features_per_level: 2
 26 |       log2_hashmap_size: 19
 27 |       base_resolution: 16
 28 |       per_level_scale: 1.381912879967776 # max resolution 2048
 29 | 
 30 |   material_type: "no-material"
 31 |   material:
 32 |     n_output_dims: 4
 33 |     color_activation: none
 34 | 
 35 |   background_type: "neural-environment-map-background"
 36 |   background:
 37 |     n_output_dims: 4
 38 |     color_activation: none
 39 | 
 40 |   renderer_type: "nerf-volume-renderer"
 41 |   renderer:
 42 |     num_samples_per_ray: 512
 43 | 
 44 |   prompt_processor_type: "stable-diffusion-prompt-processor"
 45 |   prompt_processor:
 46 |     pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base"
 47 |     prompt: ???
 48 | 
 49 |   guidance_type: "stable-diffusion-guidance"
 50 |   guidance:
 51 |     pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base"
 52 |     guidance_scale: 100.
 53 |     weighting_strategy: sds
 54 |     grad_clip: [0, 2.0, 8.0, 5000]
 55 | 
 56 |   exporter_type: "dummy-exporter"
 57 | 
 58 |   loggers:
 59 |     wandb:
 60 |       enable: false
 61 |       project: "threestudio"
 62 |       name: None
 63 | 
 64 |   loss:
 65 |     lambda_sds: 1.
 66 |     lambda_sparsity: 5.e-4
 67 |     lambda_opaque: 0.0
 68 |     lambda_orient: 0.0
 69 |   optimizer:
 70 |     name: Adam
 71 |     args:
 72 |       lr: 0.01
 73 |       betas: [0.9, 0.99]
 74 |       eps: 1.e-15
 75 |   scheduler:
 76 |     name: SequentialLR
 77 |     interval: step
 78 |     warmup_steps: 100
 79 |     milestones:
 80 |       - ${system.scheduler.warmup_steps}
 81 |     schedulers:
 82 |       - name: LinearLR # linear warm-up in the first system.warmup_steps steps
 83 |         args:
 84 |           start_factor: 0.1
 85 |           end_factor: 1.0
 86 |           total_iters: ${system.scheduler.warmup_steps}
 87 |       - name: ExponentialLR
 88 |         args:
 89 |           gamma: ${calc_exp_lr_decay_rate:0.1,${sub:${trainer.max_steps},${system.scheduler.warmup_steps}}}
 90 | 
 91 | trainer:
 92 |   max_steps: 5000
 93 |   log_every_n_steps: 1
 94 |   num_sanity_val_steps: 0
 95 |   val_check_interval: 200
 96 |   enable_progress_bar: true
 97 |   precision: 16-mixed
 98 | 
 99 | checkpoint:
100 |   save_last: false
101 |   save_top_k: -1
102 |   every_n_train_steps: 0 # do not save checkpoints during training
103 | 


--------------------------------------------------------------------------------
/configs/gradio/sjc.yaml:
--------------------------------------------------------------------------------
 1 | name: sjc
 2 | tag: "${rmspace:${system.prompt_processor.prompt},_}"
 3 | exp_root_dir: "outputs-gradio"
 4 | seed: 0
 5 | 
 6 | data_type: random-camera-datamodule
 7 | data:
 8 |   camera_distance_range: [1.50, 1.50]
 9 |   elevation_range: [-10, 45]
10 |   camera_perturb: 0.0
11 |   center_perturb: 0.0
12 |   up_perturb: 0.0
13 |   light_position_perturb: 0.0
14 |   eval_elevation_deg: 20.0
15 | 
16 | system_type: sjc-system
17 | system:
18 |   subpixel_rendering: false
19 | 
20 |   geometry_type: volume-grid
21 |   geometry:
22 |     normal_type: null
23 |     grid_size: [100, 100, 100]
24 |     density_bias: -1.0
25 |     n_feature_dims: 4
26 | 
27 |   material_type: no-material
28 |   material:
29 |     n_output_dims: 4
30 |     color_activation: none
31 | 
32 |   background_type: textured-background
33 |   background:
34 |     n_output_dims: 4
35 |     color_activation: none
36 |     height: 4
37 |     width: 4
38 | 
39 |   renderer_type: nerf-volume-renderer
40 |   renderer:
41 |     num_samples_per_ray: 512
42 |     grid_prune: false
43 | 
44 |   prompt_processor_type: stable-diffusion-prompt-processor
45 |   prompt_processor:
46 |     pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base"
47 |     prompt: ???
48 |     view_dependent_prompt_front: true
49 | 
50 |   guidance_type: stable-diffusion-guidance
51 |   guidance:
52 |     pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base"
53 |     guidance_scale: 100.
54 |     use_sjc: true
55 |     var_red: true
56 |     min_step_percent: 0.01
57 |     max_step_percent: 0.97
58 |     grad_clip: [0, 2.0, 8.0, 5000]
59 | 
60 |   exporter_type: "dummy-exporter"
61 | 
62 |   loggers:
63 |     wandb:
64 |       enable: false
65 |       project: "threestudio"
66 |       name: None
67 | 
68 |   loss:
69 |     lambda_sds: 1.
70 |     center_ratio: 0.78125 # = 50 / 64
71 |     lambda_depth: 0 # or try 10
72 |     lambda_emptiness: [5000, 1.e+4, 2.e+5, 5001]
73 |     emptiness_scale: 10
74 | 
75 |   optimizer:
76 |     name: Adamax
77 |     args:
78 |       lr: 0.05
79 |     params:
80 |       geometry:
81 |         lr: 0.05
82 |       background:
83 |         lr: 0.0001 # maybe 0.001/0.01 is better
84 | 
85 | trainer:
86 |   max_steps: 5000
87 |   log_every_n_steps: 1
88 |   num_sanity_val_steps: 0
89 |   val_check_interval: 200
90 |   enable_progress_bar: true
91 |   precision: 16-mixed
92 | 
93 | checkpoint:
94 |   save_last: false
95 |   save_top_k: -1
96 |   every_n_train_steps: 0 # do not save checkpoints during training
97 | 


--------------------------------------------------------------------------------
/configs/gradio/textmesh-if.yaml:
--------------------------------------------------------------------------------
  1 | name: "textmesh-if"
  2 | tag: "${rmspace:${system.prompt_processor.prompt},_}"
  3 | exp_root_dir: "outputs-gradio"
  4 | seed: 0
  5 | 
  6 | data_type: "random-camera-datamodule"
  7 | data:
  8 |   batch_size: 1
  9 |   width: 64
 10 |   height: 64
 11 |   camera_distance_range: [1.5, 2.0]
 12 |   fovy_range: [40, 70]
 13 |   elevation_range: [-10, 90]
 14 |   light_sample_strategy: "dreamfusion"
 15 |   eval_camera_distance: 2.0
 16 |   eval_fovy_deg: 70.
 17 | 
 18 | system_type: "textmesh-system"
 19 | system:
 20 |   geometry_type: "implicit-sdf"
 21 |   geometry:
 22 |     radius: 2.0
 23 |     normal_type: finite_difference
 24 |     # progressive eps from Neuralangelo
 25 |     finite_difference_normal_eps: progressive
 26 | 
 27 |     sdf_bias: sphere
 28 |     sdf_bias_params: 0.5
 29 | 
 30 |     # coarse to fine hash grid encoding
 31 |     pos_encoding_config:
 32 |       otype: ProgressiveBandHashGrid
 33 |       n_levels: 16
 34 |       n_features_per_level: 2
 35 |       log2_hashmap_size: 19
 36 |       base_resolution: 16
 37 |       per_level_scale: 1.381912879967776 # max resolution 2048
 38 |       start_level: 10 # resolution ~300
 39 |       start_step: 2000
 40 |       update_steps: 400
 41 | 
 42 |   material_type: "diffuse-with-point-light-material"
 43 |   material:
 44 |     ambient_only_steps: 2001
 45 |     albedo_activation: sigmoid
 46 | 
 47 |   background_type: "neural-environment-map-background"
 48 |   background:
 49 |     color_activation: sigmoid
 50 |     random_aug: true
 51 | 
 52 |   renderer_type: "neus-volume-renderer"
 53 |   renderer:
 54 |     radius: ${system.geometry.radius}
 55 |     num_samples_per_ray: 512
 56 |     cos_anneal_end_steps: ${trainer.max_steps}
 57 |     eval_chunk_size: 8192
 58 | 
 59 |   prompt_processor_type: "deep-floyd-prompt-processor"
 60 |   prompt_processor:
 61 |     pretrained_model_name_or_path: "DeepFloyd/IF-I-XL-v1.0"
 62 |     prompt: ???
 63 | 
 64 |   guidance_type: "deep-floyd-guidance"
 65 |   guidance:
 66 |     pretrained_model_name_or_path: "DeepFloyd/IF-I-XL-v1.0"
 67 |     guidance_scale: 20.
 68 |     weighting_strategy: sds
 69 |     min_step_percent: 0.02
 70 |     max_step_percent: 0.98
 71 | 
 72 |   exporter_type: "mesh-exporter"
 73 |   exporter:
 74 |     fmt: obj
 75 |     save_uv: false
 76 |     context_type: cuda
 77 | 
 78 |   loss:
 79 |     lambda_sds: 1.
 80 |     lambda_orient: 0.0
 81 |     lambda_sparsity: 0.0
 82 |     lambda_opaque: 0.0
 83 |     lambda_eikonal: 1000.
 84 |   optimizer:
 85 |     name: Adam
 86 |     args:
 87 |       betas: [0.9, 0.99]
 88 |       eps: 1.e-15
 89 |     params:
 90 |       geometry.encoding:
 91 |         lr: 0.01
 92 |       geometry.sdf_network:
 93 |         lr: 0.001
 94 |       geometry.feature_network:
 95 |         lr: 0.001
 96 |       background:
 97 |         lr: 0.001
 98 |       renderer:
 99 |         lr: 0.001
100 | 
101 | trainer:
102 |   max_steps: 5000
103 |   log_every_n_steps: 1
104 |   num_sanity_val_steps: 0
105 |   val_check_interval: 100
106 |   enable_progress_bar: true
107 |   precision: 16-mixed
108 | 
109 | checkpoint:
110 |   save_last: false
111 |   save_top_k: -1
112 |   every_n_train_steps: 0 # do not save checkpoints during training
113 | 


--------------------------------------------------------------------------------
/configs/instructnerf2nerf.yaml:
--------------------------------------------------------------------------------
  1 | name: "instructnerf2nerf"
  2 | tag: "${basename:${data.dataroot}}_${rmspace:${system.prompt_processor.prompt},_}"
  3 | exp_root_dir: "outputs"
  4 | seed: 0
  5 | 
  6 | data_type: "multiview-camera-datamodule"
  7 | data:
  8 |   train_downsample_resolution: 2
  9 |   eval_downsample_resolution: 2
 10 |   dataroot: ???
 11 | 
 12 | system_type: "instructnerf2nerf-system"
 13 | system:
 14 |   start_editing_step: 600
 15 |   per_editing_step: 10
 16 | 
 17 |   geometry_type: "implicit-volume"
 18 |   geometry:
 19 |     radius: 1.
 20 |     normal_type: analytic
 21 | 
 22 |     pos_encoding_config:
 23 |       otype: HashGrid
 24 |       n_levels: 16
 25 |       n_features_per_level: 2
 26 |       log2_hashmap_size: 19
 27 |       base_resolution: 16
 28 |       per_level_scale: 1.4472692374403782 # max resolution 4096
 29 | 
 30 |     density_bias: "blob_magic3d"
 31 |     density_activation: softplus
 32 |     density_blob_scale: 10.
 33 |     density_blob_std: 0.5
 34 | 
 35 | 
 36 |   material_type: "diffuse-with-point-light-material"
 37 |   material:
 38 |     ambient_only_steps: 9999999
 39 |     albedo_activation: sigmoid
 40 | 
 41 |   background_type: "neural-environment-map-background"
 42 |   background:
 43 |     color_activation: sigmoid
 44 |     random_aug: false
 45 | 
 46 |   renderer_type: "patch-renderer"
 47 |   renderer:
 48 |     base_renderer_type: "nerf-volume-renderer"
 49 |     base_renderer:
 50 |       radius: ${system.geometry.radius}
 51 |       num_samples_per_ray: 384
 52 |     patch_size: 128
 53 | 
 54 |   guidance_type: "stable-diffusion-instructpix2pix-guidance"
 55 |   guidance:
 56 |     min_step_percent: 0.02
 57 |     max_step_percent: 0.98
 58 | 
 59 |   prompt_processor_type: "stable-diffusion-prompt-processor"
 60 |   prompt_processor:
 61 |     pretrained_model_name_or_path: "runwayml/stable-diffusion-v1-5"
 62 |     prompt: "Turn him into Elon Musk"
 63 | 
 64 |   loggers:
 65 |     wandb:
 66 |       enable: false
 67 |       project: 'threestudio'
 68 | 
 69 |   loss:
 70 |     lambda_sds: 0.
 71 |     lambda_orient: [0, 10.0, 1000., 5000.0]
 72 |     lambda_sparsity: 1.0
 73 |     lambda_opaque: 1.0
 74 |     lambda_l1: 10.
 75 |     lambda_p: 10.
 76 |   optimizer:
 77 |     name: Adam
 78 |     args:
 79 |       lr: 0.01
 80 |       betas: [0.9, 0.99]
 81 |       eps: 1.e-15
 82 |     params:
 83 |       geometry:
 84 |         lr: 0.01
 85 |       background:
 86 |         lr: 0.001
 87 | 
 88 | trainer:
 89 |   max_steps: 20000
 90 |   log_every_n_steps: 1
 91 |   num_sanity_val_steps: 0
 92 |   val_check_interval: 600
 93 |   enable_progress_bar: true
 94 |   precision: 16-mixed
 95 | 
 96 | checkpoint:
 97 |   save_last: true
 98 |   save_top_k: -1
 99 |   every_n_train_steps: ${trainer.max_steps}
100 | 


--------------------------------------------------------------------------------
/configs/latentnerf-refine.yaml:
--------------------------------------------------------------------------------
 1 | name: "latentnerf-refine"
 2 | tag: "${rmspace:${system.prompt_processor.prompt},_}"
 3 | exp_root_dir: "outputs"
 4 | seed: 0
 5 | 
 6 | data_type: "random-camera-datamodule"
 7 | data:
 8 |   elevation_range: [-10, 45]
 9 | 
10 | system_type: "latentnerf-system"
11 | system:
12 |   refinement: true
13 |   weights: ???
14 |   weights_ignore_modules: ["material", "background"]
15 | 
16 |   geometry_type: "implicit-volume"
17 |   geometry:
18 |     n_feature_dims: 4
19 |     normal_type: null
20 | 
21 |     density_bias: "blob_dreamfusion"
22 |     density_activation: trunc_exp
23 |     density_blob_scale: 5.
24 |     density_blob_std: 0.2
25 | 
26 |   material_type: "sd-latent-adapter-material"
27 | 
28 |   background_type: "neural-environment-map-background"
29 | 
30 |   renderer_type: "nerf-volume-renderer"
31 |   renderer:
32 |     num_samples_per_ray: 512
33 | 
34 |   prompt_processor_type: "stable-diffusion-prompt-processor"
35 |   prompt_processor:
36 |     pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base"
37 |     prompt: ???
38 | 
39 |   guidance_type: "stable-diffusion-guidance"
40 |   guidance:
41 |     pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base"
42 |     guidance_scale: 100.
43 |     weighting_strategy: sds
44 | 
45 |   loggers:
46 |     wandb:
47 |       enable: false
48 |       project: "threestudio"
49 |       name: None
50 | 
51 |   loss:
52 |     lambda_sds: 1.
53 |     lambda_sparsity: 5.e-4
54 |     lambda_opaque: 0.0
55 |     lambda_orient: 0.0
56 |   optimizer:
57 |     name: Adam
58 |     args:
59 |       lr: 0.01
60 |       betas: [0.9, 0.99]
61 |       eps: 1.e-15
62 |   scheduler:
63 |     name: SequentialLR
64 |     interval: step
65 |     warmup_steps: 100
66 |     milestones:
67 |       - ${system.scheduler.warmup_steps}
68 |     schedulers:
69 |       - name: LinearLR # linear warm-up in the first system.warmup_steps steps
70 |         args:
71 |           start_factor: 0.1
72 |           end_factor: 1.0
73 |           total_iters: ${system.scheduler.warmup_steps}
74 |       - name: ExponentialLR
75 |         args:
76 |           gamma: ${calc_exp_lr_decay_rate:0.1,${sub:${trainer.max_steps},${system.scheduler.warmup_steps}}}
77 | 
78 | trainer:
79 |   max_steps: 10000
80 |   log_every_n_steps: 1
81 |   num_sanity_val_steps: 1
82 |   val_check_interval: 200
83 |   enable_progress_bar: true
84 |   precision: 16-mixed
85 | 
86 | checkpoint:
87 |   save_last: true # save at each validation time
88 |   save_top_k: -1
89 |   every_n_train_steps: ${trainer.max_steps}
90 | 


--------------------------------------------------------------------------------
/configs/latentnerf.yaml:
--------------------------------------------------------------------------------
 1 | name: "latentnerf"
 2 | tag: "${rmspace:${system.prompt_processor.prompt},_}"
 3 | exp_root_dir: "outputs"
 4 | seed: 0
 5 | 
 6 | data_type: "random-camera-datamodule"
 7 | data:
 8 |   elevation_range: [-10, 45]
 9 | 
10 | system_type: "latentnerf-system"
11 | system:
12 |   geometry_type: "implicit-volume"
13 |   geometry:
14 |     n_feature_dims: 4
15 |     normal_type: null
16 | 
17 |     density_bias: "blob_dreamfusion"
18 |     density_activation: trunc_exp
19 |     density_blob_scale: 5.
20 |     density_blob_std: 0.2
21 | 
22 |   material_type: "no-material"
23 |   material:
24 |     n_output_dims: 4
25 |     color_activation: none
26 | 
27 |   background_type: "neural-environment-map-background"
28 |   background:
29 |     n_output_dims: 4
30 |     color_activation: none
31 | 
32 |   renderer_type: "nerf-volume-renderer"
33 |   renderer:
34 |     num_samples_per_ray: 512
35 | 
36 |   prompt_processor_type: "stable-diffusion-prompt-processor"
37 |   prompt_processor:
38 |     pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base"
39 |     prompt: ???
40 | 
41 |   guidance_type: "stable-diffusion-guidance"
42 |   guidance:
43 |     pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base"
44 |     guidance_scale: 100.
45 |     weighting_strategy: sds
46 | 
47 |   loggers:
48 |     wandb:
49 |       enable: false
50 |       project: "threestudio"
51 |       name: None
52 | 
53 |   loss:
54 |     lambda_sds: 1.
55 |     lambda_sparsity: 5.e-4
56 |     lambda_opaque: 0.0
57 |     lambda_orient: 0.0
58 |   optimizer:
59 |     name: Adam
60 |     args:
61 |       lr: 0.01
62 |       betas: [0.9, 0.99]
63 |       eps: 1.e-15
64 |   scheduler:
65 |     name: SequentialLR
66 |     interval: step
67 |     warmup_steps: 100
68 |     milestones:
69 |       - ${system.scheduler.warmup_steps}
70 |     schedulers:
71 |       - name: LinearLR # linear warm-up in the first system.warmup_steps steps
72 |         args:
73 |           start_factor: 0.1
74 |           end_factor: 1.0
75 |           total_iters: ${system.scheduler.warmup_steps}
76 |       - name: ExponentialLR
77 |         args:
78 |           gamma: ${calc_exp_lr_decay_rate:0.1,${sub:${trainer.max_steps},${system.scheduler.warmup_steps}}}
79 | 
80 | trainer:
81 |   max_steps: 10000
82 |   log_every_n_steps: 1
83 |   num_sanity_val_steps: 0
84 |   val_check_interval: 200
85 |   enable_progress_bar: true
86 |   precision: 16-mixed
87 | 
88 | checkpoint:
89 |   save_last: true # save at each validation time
90 |   save_top_k: -1
91 |   every_n_train_steps: ${trainer.max_steps}
92 | 


--------------------------------------------------------------------------------
/configs/magic3d-coarse-if.yaml:
--------------------------------------------------------------------------------
 1 | name: "magic3d-coarse-if"
 2 | tag: "${rmspace:${system.prompt_processor.prompt},_}"
 3 | exp_root_dir: "outputs"
 4 | seed: 0
 5 | 
 6 | data_type: "random-camera-datamodule"
 7 | data:
 8 |   width: 64
 9 |   height: 64
10 |   camera_distance_range: [1.5, 2.0]
11 |   light_sample_strategy: "magic3d"
12 |   eval_camera_distance: 2.0
13 |   eval_fovy_deg: 70.
14 | 
15 | system_type: "magic3d-system"
16 | system:
17 |   geometry_type: "implicit-volume"
18 |   geometry:
19 |     radius: 2.
20 |     normal_type: analytic
21 |     pos_encoding_config:
22 |       otype: HashGrid
23 |       n_levels: 16
24 |       n_features_per_level: 2
25 |       log2_hashmap_size: 19
26 |       base_resolution: 16
27 |       per_level_scale: 1.4472692374403782 # max resolution 4096
28 |     density_bias: "blob_magic3d"
29 |     density_activation: softplus
30 |     density_blob_scale: 10.
31 |     density_blob_std: 0.5
32 |     isosurface_resolution: 128
33 |     isosurface_threshold: auto
34 |     isosurface_coarse_to_fine: true
35 | 
36 |   material_type: "diffuse-with-point-light-material"
37 |   material:
38 |     ambient_only_steps: 2001
39 |     soft_shading: true
40 | 
41 |   background_type: "neural-environment-map-background"
42 | 
43 |   renderer_type: "nerf-volume-renderer"
44 |   renderer:
45 |     radius: ${system.geometry.radius}
46 |     num_samples_per_ray: 512
47 | 
48 |   prompt_processor_type: "deep-floyd-prompt-processor"
49 |   prompt_processor:
50 |     pretrained_model_name_or_path: "DeepFloyd/IF-I-XL-v1.0"
51 |     prompt: ???
52 | 
53 |   guidance_type: "deep-floyd-guidance"
54 |   guidance:
55 |     pretrained_model_name_or_path: "DeepFloyd/IF-I-XL-v1.0"
56 |     weighting_strategy: uniform
57 |     guidance_scale: 20.
58 |     min_step_percent: 0.02
59 |     max_step_percent: 0.98
60 | 
61 |   loggers:
62 |     wandb:
63 |       enable: false
64 |       project: 'threestudio'
65 |       name: None
66 | 
67 |   loss:
68 |     lambda_sds: 1.
69 |     lambda_orient: [0, 10., 1000., 5000]
70 |     lambda_sparsity: 1.
71 |     lambda_opaque: 0.
72 |   optimizer:
73 |     name: Adam
74 |     args:
75 |       lr: 0.01
76 |       betas: [0.9, 0.99]
77 |       eps: 1.e-15
78 |     params:
79 |       geometry:
80 |         lr: 0.01
81 |       background:
82 |         lr: 0.001
83 | 
84 | trainer:
85 |   max_steps: 10000
86 |   log_every_n_steps: 1
87 |   num_sanity_val_steps: 0
88 |   val_check_interval: 200
89 |   enable_progress_bar: true
90 |   precision: 16-mixed
91 | 
92 | checkpoint:
93 |   save_last: true
94 |   save_top_k: -1
95 |   every_n_train_steps: ${trainer.max_steps}
96 | 


--------------------------------------------------------------------------------
/configs/magic3d-coarse-sd.yaml:
--------------------------------------------------------------------------------
 1 | name: "magic3d-coarse-sd"
 2 | tag: "${rmspace:${system.prompt_processor.prompt},_}"
 3 | exp_root_dir: "outputs"
 4 | seed: 0
 5 | 
 6 | data_type: "random-camera-datamodule"
 7 | data:
 8 |   width: 64
 9 |   height: 64
10 |   camera_distance_range: [1.5, 2.0]
11 |   elevation_range: [-10, 45]
12 |   light_sample_strategy: "magic3d"
13 |   eval_camera_distance: 2.0
14 |   eval_fovy_deg: 70.
15 | 
16 | system_type: "magic3d-system"
17 | system:
18 |   geometry_type: "implicit-volume"
19 |   geometry:
20 |     radius: 2.
21 |     normal_type: analytic
22 |     pos_encoding_config:
23 |       otype: HashGrid
24 |       n_levels: 16
25 |       n_features_per_level: 2
26 |       log2_hashmap_size: 19
27 |       base_resolution: 16
28 |       per_level_scale: 1.4472692374403782 # max resolution 4096
29 |     density_bias: "blob_magic3d"
30 |     density_activation: softplus
31 |     density_blob_scale: 10.
32 |     density_blob_std: 0.5
33 |     isosurface_resolution: 128
34 |     isosurface_threshold: auto
35 |     isosurface_coarse_to_fine: true
36 | 
37 |   material_type: "diffuse-with-point-light-material"
38 |   material:
39 |     ambient_only_steps: 2001
40 |     soft_shading: true
41 | 
42 |   background_type: "neural-environment-map-background"
43 | 
44 |   renderer_type: "nerf-volume-renderer"
45 |   renderer:
46 |     radius: ${system.geometry.radius}
47 |     num_samples_per_ray: 512
48 | 
49 |   prompt_processor_type: "stable-diffusion-prompt-processor"
50 |   prompt_processor:
51 |     pretrained_model_name_or_path: "/nvme/lihe/workspace/weights/sd_weights/sd-2.1-base" # "stabilityai/stable-diffusion-2-1-base"
52 |     prompt: ???
53 | 
54 |   guidance_type: "stable-diffusion-guidance"
55 |   guidance:
56 |     pretrained_model_name_or_path: "/nvme/lihe/workspace/weights/sd_weights/sd-2.1-base" # "stabilityai/stable-diffusion-2-1-base"
57 |     weighting_strategy: uniform
58 |     guidance_scale: 100.
59 |     min_step_percent: 0.02
60 |     max_step_percent: 0.98
61 | 
62 |   loggers:
63 |     wandb:
64 |       enable: false
65 |       project: "threestudio"
66 |       name: None
67 | 
68 |   loss:
69 |     lambda_sds: 1.
70 |     lambda_orient: [0, 10., 1000., 5000]
71 |     lambda_sparsity: 1.
72 |     lambda_opaque: 0.
73 |   optimizer:
74 |     name: Adam
75 |     args:
76 |       lr: 0.01
77 |       betas: [0.9, 0.99]
78 |       eps: 1.e-15
79 |     params:
80 |       geometry:
81 |         lr: 0.01
82 |       background:
83 |         lr: 0.001
84 | 
85 | trainer:
86 |   max_steps: 10000
87 |   log_every_n_steps: 1
88 |   num_sanity_val_steps: 0
89 |   val_check_interval: 200
90 |   enable_progress_bar: true
91 |   precision: 16-mixed
92 | 
93 | checkpoint:
94 |   save_last: true
95 |   save_top_k: -1
96 |   every_n_train_steps: ${trainer.max_steps}
97 | 


--------------------------------------------------------------------------------
/configs/magic3d-refine-sd.yaml:
--------------------------------------------------------------------------------
 1 | name: "magic3d-refine-sd"
 2 | tag: "${rmspace:${system.prompt_processor.prompt},_}"
 3 | exp_root_dir: "outputs"
 4 | seed: 0
 5 | 
 6 | data_type: "random-camera-datamodule"
 7 | data:
 8 |   width: 512
 9 |   height: 512
10 |   camera_distance_range: [1.5, 2.0]
11 |   elevation_range: [-10, 45]
12 |   light_sample_strategy: "magic3d"
13 |   fovy_range: [30, 45]
14 |   eval_camera_distance: 2.0
15 |   eval_fovy_deg: 70.
16 | 
17 | system_type: "magic3d-system"
18 | system:
19 |   refinement: true
20 |   geometry_convert_from: ???
21 |   geometry_convert_inherit_texture: true
22 |   geometry_type: "tetrahedra-sdf-grid"
23 |   geometry:
24 |     radius: 2.0 # consistent with coarse
25 |     isosurface_resolution: 128
26 |     isosurface_deformable_grid: true
27 |     pos_encoding_config: # consistent with coarse, no progressive band
28 |       otype: HashGrid
29 |       n_levels: 16
30 |       n_features_per_level: 2
31 |       log2_hashmap_size: 19
32 |       base_resolution: 16
33 |       per_level_scale: 1.4472692374403782 # max resolution 4096
34 |     fix_geometry: false # optimize grid sdf and deformation
35 | 
36 |   material_type: "diffuse-with-point-light-material"
37 |   material:
38 |     ambient_only_steps: 0
39 |     soft_shading: true
40 | 
41 |   background_type: "neural-environment-map-background"
42 | 
43 |   renderer_type: "nvdiff-rasterizer"
44 |   renderer:
45 |     context_type: cuda # gl
46 | 
47 |   prompt_processor_type: "stable-diffusion-prompt-processor"
48 |   prompt_processor:
49 |     pretrained_model_name_or_path: "/nvme/lihe/workspace/weights/sd_weights/sd-2.1-base" # "stabilityai/stable-diffusion-2-1-base"
50 |     prompt: ???
51 | 
52 |   guidance_type: "stable-diffusion-guidance"
53 |   guidance:
54 |     pretrained_model_name_or_path: "/nvme/lihe/workspace/weights/sd_weights/sd-2.1-base" # "stabilityai/stable-diffusion-2-1-base"
55 |     weighting_strategy: sds
56 |     guidance_scale: 100.
57 |     min_step_percent: 0.02
58 |     max_step_percent: 0.5
59 | 
60 |   loggers:
61 |     wandb:
62 |       enable: false
63 |       project: "threestudio"
64 |       name: None
65 | 
66 |   loss:
67 |     lambda_sds: 1.
68 |     lambda_normal_consistency: 10000.
69 | 
70 |   optimizer:
71 |     name: Adam
72 |     args:
73 |       lr: 0.01
74 |       betas: [0.9, 0.99]
75 |       eps: 1.e-15
76 | 
77 | trainer:
78 |   max_steps: 5000
79 |   log_every_n_steps: 1
80 |   num_sanity_val_steps: 1
81 |   val_check_interval: 100
82 |   enable_progress_bar: true
83 |   precision: 16-mixed
84 | 
85 | checkpoint:
86 |   save_last: true
87 |   save_top_k: -1
88 |   every_n_train_steps: ${trainer.max_steps}
89 | 


--------------------------------------------------------------------------------
/configs/mvdream-sd21-shading.yaml:
--------------------------------------------------------------------------------
  1 | name: "mvdream-sd21-rescale0.5-shading"
  2 | tag: "${rmspace:${system.prompt_processor.prompt},_}"
  3 | exp_root_dir: "outputs"
  4 | seed: 0
  5 | 
  6 | data_type: "random-multiview-camera-datamodule"
  7 | data:
  8 |   batch_size: [8,4] # must be dividable by n_view
  9 |   n_view: 4
 10 |   # 0-4999: 64x64, >=5000: 256x256
 11 |   width: [64, 256]
 12 |   height: [64, 256]
 13 |   resolution_milestones: [5000]
 14 |   camera_distance_range: [0.8, 1.0] # relative
 15 |   fovy_range: [15, 60]
 16 |   elevation_range: [0, 30]
 17 |   camera_perturb: 0.
 18 |   center_perturb: 0.
 19 |   up_perturb: 0.
 20 |   n_val_views: 4
 21 |   eval_camera_distance: 3.0
 22 |   eval_fovy_deg: 40.
 23 | 
 24 | system_type: "mvdream-system"
 25 | system:
 26 |   geometry_type: "implicit-volume"
 27 |   geometry:
 28 |     radius: 1.0
 29 |     normal_type: "analytic"
 30 | 
 31 |     density_bias: "blob_magic3d"
 32 |     density_activation: softplus
 33 |     density_blob_scale: 10.
 34 |     density_blob_std: 0.5
 35 | 
 36 |     pos_encoding_config:
 37 |       otype: HashGrid
 38 |       n_levels: 16
 39 |       n_features_per_level: 2
 40 |       log2_hashmap_size: 19
 41 |       base_resolution: 16
 42 |       per_level_scale: 1.447269237440378 # max resolution 4096
 43 | 
 44 |   material_type: "diffuse-with-point-light-material"
 45 |   material:
 46 |     ambient_only_steps: 5000
 47 |     textureless_prob: 0.5
 48 |     ambient_light_color: [1.0, 1.0, 1.0]
 49 |     diffuse_light_color: [0.0, 0.0, 0.0]
 50 |     soft_shading: true
 51 |     albedo_activation: sigmoid
 52 | 
 53 |   background_type: "neural-environment-map-background"
 54 |   background:
 55 |     color_activation: sigmoid
 56 |     random_aug: true
 57 |     share_aug_bg: true
 58 | 
 59 |   renderer_type: "nerf-volume-renderer"
 60 |   renderer:
 61 |     radius: ${system.geometry.radius}
 62 |     num_samples_per_ray: 512
 63 | 
 64 |   prompt_processor_type: "stable-diffusion-prompt-processor"
 65 |   prompt_processor:
 66 |     pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base"
 67 |     prompt: ???
 68 |     negative_prompt: "ugly, bad anatomy, blurry, pixelated obscure, unnatural colors, poor lighting, dull, and unclear, cropped, lowres, low quality, artifacts, duplicate, morbid, mutilated, poorly drawn face, deformed, dehydrated, bad proportions"
 69 |     front_threshold: 30.
 70 |     back_threshold: 30.
 71 | 
 72 |   guidance_type: "multiview-diffusion-guidance"
 73 |   guidance:
 74 |     model_name: "sd-v2.1-base-4view"
 75 |     ckpt_path: null # path to a pre-downloaded checkpoint file (null for loading from URL)
 76 |     guidance_scale: 50.0
 77 |     min_step_percent: [0, 0.98, 0.02, 8000]  # (start_iter, start_val, end_val, end_iter)
 78 |     max_step_percent: [0, 0.98, 0.50, 8000]
 79 |     recon_loss: true
 80 |     recon_std_rescale: 0.5
 81 | 
 82 |   loggers:
 83 |     wandb:
 84 |       enable: false
 85 |       project: "threestudio"
 86 | 
 87 |   loss:
 88 |     lambda_sds: 1.
 89 |     lambda_orient: [0, 10., 1000., 5000]
 90 |     lambda_sparsity: 0.
 91 |     lambda_opaque: 0.
 92 |     lambda_z_variance: 0.
 93 |   optimizer:
 94 |     name: AdamW
 95 |     args:
 96 |       betas: [0.9, 0.99]
 97 |       eps: 1.e-15
 98 |     params:
 99 |       geometry.encoding:
100 |         lr: 0.01
101 |       geometry.density_network:
102 |         lr: 0.001
103 |       geometry.feature_network:
104 |         lr: 0.001
105 |       background:
106 |         lr: 0.001
107 | 
108 | trainer:
109 |   max_steps: 10000
110 |   log_every_n_steps: 1
111 |   num_sanity_val_steps: 0
112 |   val_check_interval: 200
113 |   enable_progress_bar: true
114 |   precision: 16-mixed
115 | 
116 | checkpoint:
117 |   save_last: true
118 |   save_top_k: -1
119 |   every_n_train_steps: ${trainer.max_steps}
120 | 


--------------------------------------------------------------------------------
/configs/mvdream-sd21.yaml:
--------------------------------------------------------------------------------
  1 | name: "mvdream-sd21-rescale0.5"
  2 | tag: "${rmspace:${system.prompt_processor.prompt},_}"
  3 | exp_root_dir: "outputs2"
  4 | seed: 0
  5 | 
  6 | data_type: "random-multiview-camera-datamodule"
  7 | data:
  8 |   batch_size: [8,4] # must be dividable by n_view
  9 |   n_view: 4
 10 |   # 0-4999: 64x64, >=5000: 256x256
 11 |   width: [64, 256]
 12 |   height: [64, 256]
 13 |   resolution_milestones: [5000]
 14 |   camera_distance_range: [0.8, 1.0] # relative
 15 |   fovy_range: [15, 60]
 16 |   elevation_range: [0, 30]
 17 |   camera_perturb: 0.
 18 |   center_perturb: 0.
 19 |   up_perturb: 0.
 20 |   n_val_views: 4
 21 |   eval_camera_distance: 3.0
 22 |   eval_fovy_deg: 40.
 23 | 
 24 | system_type: "mvdream-system"
 25 | system:
 26 |   geometry_type: "implicit-volume"
 27 |   geometry:
 28 |     radius: 1.0
 29 |     normal_type: null
 30 | 
 31 |     density_bias: "blob_magic3d"
 32 |     density_activation: softplus
 33 |     density_blob_scale: 10.
 34 |     density_blob_std: 0.5
 35 | 
 36 |     pos_encoding_config:
 37 |       otype: HashGrid
 38 |       n_levels: 16
 39 |       n_features_per_level: 2
 40 |       log2_hashmap_size: 19
 41 |       base_resolution: 16
 42 |       per_level_scale: 1.447269237440378 # max resolution 4096
 43 | 
 44 |   material_type: "no-material"
 45 |   material:
 46 |     n_output_dims: 3
 47 |     color_activation: sigmoid
 48 | 
 49 |   background_type: "neural-environment-map-background"
 50 |   background:
 51 |     color_activation: sigmoid
 52 |     random_aug: true
 53 |     share_aug_bg: true
 54 | 
 55 |   renderer_type: "nerf-volume-renderer"
 56 |   renderer:
 57 |     radius: ${system.geometry.radius}
 58 |     num_samples_per_ray: 512
 59 | 
 60 |   prompt_processor_type: "stable-diffusion-prompt-processor"
 61 |   prompt_processor:
 62 |     pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base"
 63 |     prompt: ???
 64 |     negative_prompt: "ugly, bad anatomy, blurry, pixelated obscure, unnatural colors, poor lighting, dull, and unclear, cropped, lowres, low quality, artifacts, duplicate, morbid, mutilated, poorly drawn face, deformed, dehydrated, bad proportions"
 65 |     front_threshold: 30.
 66 |     back_threshold: 30.
 67 | 
 68 |   guidance_type: "multiview-diffusion-guidance"
 69 |   guidance:
 70 |     model_name: "sd-v2.1-base-4view"
 71 |     ckpt_path: null # path to a pre-downloaded checkpoint file (null for loading from URL)
 72 |     guidance_scale: 50.0
 73 |     min_step_percent: [0, 0.98, 0.02, 8000]  # (start_iter, start_val, end_val, end_iter)
 74 |     max_step_percent: [0, 0.98, 0.50, 8000]
 75 |     recon_loss: true
 76 |     recon_std_rescale: 0.5
 77 | 
 78 |   loggers:
 79 |     wandb:
 80 |       enable: false
 81 |       project: "threestudio"
 82 | 
 83 |   loss:
 84 |     lambda_sds: 1.
 85 |     lambda_orient: 0.
 86 |     lambda_sparsity: 0.
 87 |     lambda_opaque: 0.
 88 |     lambda_z_variance: 0.
 89 |   optimizer:
 90 |     name: AdamW
 91 |     args:
 92 |       betas: [0.9, 0.99]
 93 |       eps: 1.e-15
 94 |     params:
 95 |       geometry.encoding:
 96 |         lr: 0.01
 97 |       geometry.density_network:
 98 |         lr: 0.001
 99 |       geometry.feature_network:
100 |         lr: 0.001
101 |       background:
102 |         lr: 0.001
103 | 
104 | trainer:
105 |   max_steps: 10000
106 |   log_every_n_steps: 1
107 |   num_sanity_val_steps: 0
108 |   val_check_interval: 200
109 |   enable_progress_bar: true
110 |   precision: 16-mixed
111 | 
112 | checkpoint:
113 |   save_last: true
114 |   save_top_k: -1
115 |   every_n_train_steps: ${trainer.max_steps}
116 | 


--------------------------------------------------------------------------------
/configs/prolificdreamer-geometry-from.yaml:
--------------------------------------------------------------------------------
 1 | name: "prolificdreamer-geometry"
 2 | tag: "${rmspace:${system.prompt_processor.prompt},_}"
 3 | exp_root_dir: "outputs"
 4 | seed: 0
 5 | 
 6 | data_type: "random-camera-datamodule"
 7 | data:
 8 |   batch_size: 1
 9 |   width: 512
10 |   height: 512
11 |   camera_distance_range: [1.0, 1.5]
12 |   fovy_range: [40, 70]
13 |   elevation_range: [-10, 45]
14 |   camera_perturb: 0.
15 |   center_perturb: 0.
16 |   up_perturb: 0.
17 |   eval_camera_distance: 1.5
18 |   eval_fovy_deg: 70.
19 | 
20 | system_type: "prolificdreamer-system"
21 | system:
22 |   stage: geometry
23 |   geometry_convert_from: ???
24 |   geometry_type: "tetrahedra-sdf-grid"
25 |   geometry:
26 |     radius: 1.0 # consistent with coarse
27 |     isosurface_resolution: 512 # 400 # 128
28 |     isosurface_deformable_grid: true
29 |     geometry_only: true
30 | 
31 |   material_type: "no-material" # unused
32 |   material:
33 |     n_output_dims: 0
34 | 
35 |   background_type: "solid-color-background" # unused
36 | 
37 |   renderer_type: "nvdiff-rasterizer"
38 |   renderer:
39 |     context_type: cuda # gl
40 | 
41 |   prompt_processor_type: "stable-diffusion-prompt-processor"
42 |   prompt_processor:
43 |     pretrained_model_name_or_path: "/cpfs01/shared/Gveval3/gongkaixiong/dlh/sd_weights/sd-2.1-base" # "stabilityai/stable-diffusion-2-1-base"
44 |     prompt: lib:michelangelo_dog
45 | 
46 |   guidance_type: "stable-diffusion-guidance"
47 |   guidance:
48 |     pretrained_model_name_or_path: "/cpfs01/shared/Gveval3/gongkaixiong/dlh/sd_weights/sd-2.1-base" # "stabilityai/stable-diffusion-2-1-base"
49 |     guidance_scale: 100.
50 |     min_step_percent: 0.02
51 |     max_step_percent: [5000, 0.98, 0.5, 5001] # annealed to 0.5 after 5000 steps
52 |     weighting_strategy: sds
53 | 
54 |   loggers:
55 |     wandb:
56 |       enable: false
57 |       project: "threestudio"
58 |       name: None
59 | 
60 |   loss:
61 |     lambda_sds: 1.
62 |     lambda_normal_consistency: 10000.
63 |     lambda_laplacian_smoothness: 10000.
64 | 
65 |   optimizer:
66 |     name: Adam
67 |     args:
68 |       lr: 0.005
69 |       betas: [0.9, 0.99]
70 |       eps: 1.e-15
71 | 
72 | trainer:
73 |   max_steps: 15000
74 |   log_every_n_steps: 1
75 |   num_sanity_val_steps: 1
76 |   val_check_interval: 200
77 |   enable_progress_bar: true
78 |   precision: 32
79 | 
80 | checkpoint:
81 |   save_last: true
82 |   save_top_k: -1
83 |   every_n_train_steps: ${trainer.max_steps}
84 | 


--------------------------------------------------------------------------------
/configs/prolificdreamer-geometry.yaml:
--------------------------------------------------------------------------------
 1 | name: "prolificdreamer-geometry"
 2 | tag: "${rmspace:${system.prompt_processor.prompt},_}"
 3 | exp_root_dir: "outputs"
 4 | seed: 0
 5 | 
 6 | data_type: "random-camera-datamodule"
 7 | data:
 8 |   batch_size: 1
 9 |   width: 512
10 |   height: 512
11 |   camera_distance_range: [1.0, 1.5]
12 |   fovy_range: [40, 70]
13 |   elevation_range: [-10, 45]
14 |   camera_perturb: 0.
15 |   center_perturb: 0.
16 |   up_perturb: 0.
17 |   eval_camera_distance: 1.5
18 |   eval_fovy_deg: 70.
19 | 
20 | system_type: "prolificdreamer-system"
21 | system:
22 |   stage: geometry
23 |   geometry_convert_from: ???
24 |   geometry_type: "tetrahedra-sdf-grid"
25 |   geometry:
26 |     radius: 1.0 # consistent with coarse
27 |     isosurface_resolution: 128
28 |     isosurface_deformable_grid: true
29 |     geometry_only: true
30 | 
31 |   material_type: "no-material" # unused
32 |   material:
33 |     n_output_dims: 0
34 | 
35 |   background_type: "solid-color-background" # unused
36 | 
37 |   renderer_type: "nvdiff-rasterizer"
38 |   renderer:
39 |     context_type: cuda # gl
40 | 
41 |   prompt_processor_type: "stable-diffusion-prompt-processor"
42 |   prompt_processor:
43 |     pretrained_model_name_or_path: "/cpfs01/shared/Gveval3/gongkaixiong/dlh/sd_weights/sd-2.1-base" # "stabilityai/stable-diffusion-2-1-base"
44 |     prompt: lib:michelangelo_dog
45 | 
46 |   guidance_type: "stable-diffusion-guidance"
47 |   guidance:
48 |     pretrained_model_name_or_path: "/cpfs01/shared/Gveval3/gongkaixiong/dlh/sd_weights/sd-2.1-base" # "stabilityai/stable-diffusion-2-1-base"
49 |     guidance_scale: 100.
50 |     min_step_percent: 0.02
51 |     max_step_percent: [5000, 0.98, 0.5, 5001] # annealed to 0.5 after 5000 steps
52 |     weighting_strategy: sds
53 | 
54 |   loggers:
55 |     wandb:
56 |       enable: false
57 |       project: "threestudio"
58 |       name: None
59 | 
60 |   loss:
61 |     lambda_sds: 1.
62 |     lambda_normal_consistency: 10000.
63 |     lambda_laplacian_smoothness: 10000.
64 | 
65 |   optimizer:
66 |     name: Adam
67 |     args:
68 |       lr: 0.005
69 |       betas: [0.9, 0.99]
70 |       eps: 1.e-15
71 | 
72 | trainer:
73 |   max_steps: 15000
74 |   log_every_n_steps: 1
75 |   num_sanity_val_steps: 1
76 |   val_check_interval: 200
77 |   enable_progress_bar: true
78 |   precision: 32
79 | 
80 | checkpoint:
81 |   save_last: true
82 |   save_top_k: -1
83 |   every_n_train_steps: ${trainer.max_steps}
84 | 


--------------------------------------------------------------------------------
/configs/prolificdreamer-patch.yaml:
--------------------------------------------------------------------------------
  1 | name: "prolificdreamer-patch"
  2 | tag: "${rmspace:${system.prompt_processor.prompt},_}"
  3 | exp_root_dir: "outputs"
  4 | seed: 0
  5 | 
  6 | data_type: "random-camera-datamodule"
  7 | data:
  8 |   batch_size: 1
  9 |   width: 512
 10 |   height: 512
 11 |   camera_distance_range: [1.0, 1.5]
 12 |   fovy_range: [40, 70]
 13 |   elevation_range: [-10, 45]
 14 |   camera_perturb: 0.
 15 |   center_perturb: 0.
 16 |   up_perturb: 0.
 17 |   eval_camera_distance: 1.5
 18 |   eval_fovy_deg: 70.
 19 | 
 20 | system_type: "prolificdreamer-system"
 21 | system:
 22 |   stage: coarse
 23 |   geometry_type: "implicit-volume"
 24 |   geometry:
 25 |     radius: 1.0
 26 |     normal_type: null
 27 | 
 28 |     density_bias: "blob_magic3d"
 29 |     density_activation: softplus
 30 |     density_blob_scale: 10.
 31 |     density_blob_std: 0.5
 32 | 
 33 |     pos_encoding_config:
 34 |       otype: HashGrid
 35 |       n_levels: 16
 36 |       n_features_per_level: 2
 37 |       log2_hashmap_size: 19
 38 |       base_resolution: 16
 39 |       per_level_scale: 1.447269237440378 # max resolution 4096
 40 | 
 41 |   material_type: "no-material"
 42 |   material:
 43 |     n_output_dims: 3
 44 |     color_activation: sigmoid
 45 | 
 46 |   background_type: "neural-environment-map-background"
 47 |   background:
 48 |     color_activation: sigmoid
 49 |     random_aug: true
 50 | 
 51 |   renderer_type: "patch-renderer"
 52 |   renderer:
 53 |     base_renderer_type: "nerf-volume-renderer"
 54 |     base_renderer:
 55 |       radius: ${system.geometry.radius}
 56 |       num_samples_per_ray: 512
 57 |     patch_size: 128
 58 | 
 59 |   prompt_processor_type: "stable-diffusion-prompt-processor"
 60 |   prompt_processor:
 61 |     pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base"
 62 |     prompt: ???
 63 |     front_threshold: 30.
 64 |     back_threshold: 30.
 65 | 
 66 |   guidance_type: "stable-diffusion-vsd-guidance"
 67 |   guidance:
 68 |     pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base"
 69 |     pretrained_model_name_or_path_lora: "stabilityai/stable-diffusion-2-1"
 70 |     guidance_scale: 7.5
 71 |     min_step_percent: 0.02
 72 |     max_step_percent: [5000, 0.98, 0.5, 5001] # annealed to 0.5 after 5000 steps
 73 | 
 74 |   loggers:
 75 |     wandb:
 76 |       enable: false
 77 |       project: "threestudio"
 78 | 
 79 |   loss:
 80 |     lambda_vsd: 1.
 81 |     lambda_lora: 1.
 82 |     lambda_orient: 0.
 83 |     lambda_sparsity: 10.
 84 |     lambda_opaque: [10000, 0.0, 1000.0, 10001]
 85 |     lambda_z_variance: 0.
 86 |   optimizer:
 87 |     name: AdamW
 88 |     args:
 89 |       betas: [0.9, 0.99]
 90 |       eps: 1.e-15
 91 |     params:
 92 |       geometry.encoding:
 93 |         lr: 0.01
 94 |       geometry.density_network:
 95 |         lr: 0.001
 96 |       geometry.feature_network:
 97 |         lr: 0.001
 98 |       background:
 99 |         lr: 0.001
100 |       guidance:
101 |         lr: 0.0001
102 | 
103 | trainer:
104 |   max_steps: 25000
105 |   log_every_n_steps: 1
106 |   num_sanity_val_steps: 0
107 |   val_check_interval: 200
108 |   enable_progress_bar: true
109 |   precision: 32
110 | 
111 | checkpoint:
112 |   save_last: true
113 |   save_top_k: -1
114 |   every_n_train_steps: ${trainer.max_steps}
115 | 


--------------------------------------------------------------------------------
/configs/prolificdreamer-scene.yaml:
--------------------------------------------------------------------------------
  1 | name: "prolificdreamer"
  2 | tag: "${rmspace:${system.prompt_processor.prompt},_}"
  3 | exp_root_dir: "outputs"
  4 | seed: 0
  5 | 
  6 | data_type: "random-camera-datamodule"
  7 | data:
  8 |   batch_size: [1, 1]
  9 |   # 0-4999: 64x64, >=5000: 512x512
 10 |   # this drastically reduces VRAM usage as empty space is pruned in early training
 11 |   width: [64, 512]
 12 |   height: [64, 512]
 13 |   resolution_milestones: [5000]
 14 |   camera_distance_range: [0.1, 2.3]
 15 |   fovy_range: [40, 70]
 16 |   elevation_range: [-10, 45]
 17 |   camera_perturb: 0.
 18 |   center_perturb: 0.
 19 |   up_perturb: 0.
 20 |   eval_camera_distance: 2.0
 21 |   eval_fovy_deg: 70.
 22 | 
 23 | system_type: "prolificdreamer-system"
 24 | system:
 25 |   stage: coarse
 26 |   geometry_type: "implicit-volume"
 27 |   geometry:
 28 |     radius: 5.0
 29 |     normal_type: null
 30 | 
 31 |     density_bias: "blob_magic3d"
 32 |     density_activation: softplus
 33 |     density_blob_scale: -10.
 34 |     density_blob_std: 2.5
 35 | 
 36 |     pos_encoding_config:
 37 |       otype: HashGrid
 38 |       n_levels: 16
 39 |       n_features_per_level: 2
 40 |       log2_hashmap_size: 19
 41 |       base_resolution: 16
 42 |       per_level_scale: 1.447269237440378 # max resolution 4096
 43 | 
 44 |   material_type: "no-material"
 45 |   material:
 46 |     n_output_dims: 3
 47 |     color_activation: sigmoid
 48 | 
 49 |   background_type: "neural-environment-map-background"
 50 |   background:
 51 |     color_activation: sigmoid
 52 | 
 53 |   renderer_type: "nerf-volume-renderer"
 54 |   renderer:
 55 |     radius: ${system.geometry.radius}
 56 |     num_samples_per_ray: 512
 57 | 
 58 |   prompt_processor_type: "stable-diffusion-prompt-processor"
 59 |   prompt_processor:
 60 |     pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base"
 61 |     prompt: ???
 62 | 
 63 |   guidance_type: "stable-diffusion-vsd-guidance"
 64 |   guidance:
 65 |     pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base"
 66 |     pretrained_model_name_or_path_lora: "stabilityai/stable-diffusion-2-1"
 67 |     guidance_scale: 7.5
 68 |     min_step_percent: 0.02
 69 |     max_step_percent: [10000, 0.98, 0.5, 10001] # annealed to 0.5 after 10000 steps
 70 |     view_dependent_prompting: false
 71 | 
 72 |   loggers:
 73 |     wandb:
 74 |       enable: false
 75 |       project: "threestudio"
 76 |       name: None
 77 | 
 78 |   loss:
 79 |     lambda_vsd: 1.
 80 |     lambda_lora: 1.
 81 |     lambda_orient: 0.
 82 |     lambda_sparsity: 0.
 83 |     lambda_opaque: 0.
 84 |     lambda_z_variance: 1.
 85 |   optimizer:
 86 |     name: AdamW
 87 |     args:
 88 |       betas: [0.9, 0.99]
 89 |       eps: 1.e-15
 90 |     params:
 91 |       geometry.encoding:
 92 |         lr: 0.01
 93 |       geometry.density_network:
 94 |         lr: 0.001
 95 |       geometry.feature_network:
 96 |         lr: 0.001
 97 |       background:
 98 |         lr: 0.001
 99 |       guidance:
100 |         lr: 0.0001
101 | 
102 | trainer:
103 |   max_steps: 25000
104 |   log_every_n_steps: 1
105 |   num_sanity_val_steps: 0
106 |   val_check_interval: 200
107 |   enable_progress_bar: true
108 |   precision: 32
109 | 
110 | checkpoint:
111 |   save_last: true
112 |   save_top_k: -1
113 |   every_n_train_steps: ${trainer.max_steps}
114 | 


--------------------------------------------------------------------------------
/configs/prolificdreamer-texture.yaml:
--------------------------------------------------------------------------------
  1 | name: "prolificdreamer-texture"
  2 | tag: "${rmspace:${system.prompt_processor.prompt},_}"
  3 | exp_root_dir: "outputs"
  4 | seed: 0
  5 | 
  6 | data_type: "random-camera-datamodule"
  7 | data:
  8 |   batch_size: 1
  9 |   width: 512
 10 |   height: 512
 11 |   camera_distance_range: [1.0, 1.5]
 12 |   fovy_range: [40, 70]
 13 |   elevation_range: [-10, 45]
 14 |   camera_perturb: 0.
 15 |   center_perturb: 0.
 16 |   up_perturb: 0.
 17 |   eval_camera_distance: 1.5
 18 |   eval_fovy_deg: 70.
 19 | 
 20 | system_type: "prolificdreamer-system"
 21 | system:
 22 |   stage: texture
 23 |   geometry_convert_from: ???
 24 |   geometry_type: "tetrahedra-sdf-grid"
 25 |   geometry:
 26 |     radius: 1.0 # consistent with last stage
 27 |     isosurface_resolution: 400 # 128 # consistent with last stage
 28 |     isosurface_deformable_grid: true
 29 |     isosurface_remove_outliers: true
 30 |     pos_encoding_config:
 31 |       otype: HashGrid
 32 |       n_levels: 16
 33 |       n_features_per_level: 2
 34 |       log2_hashmap_size: 19
 35 |       base_resolution: 16
 36 |       per_level_scale: 1.447269237440378 # max resolution 4096
 37 |     fix_geometry: true
 38 | 
 39 |   material_type: "no-material"
 40 |   material:
 41 |     n_output_dims: 3
 42 |     color_activation: sigmoid
 43 | 
 44 |   background_type: "neural-environment-map-background"
 45 |   background:
 46 |     color_activation: sigmoid
 47 | 
 48 |   renderer_type: "nvdiff-rasterizer"
 49 |   renderer:
 50 |     context_type: cuda # gl
 51 | 
 52 |   prompt_processor_type: "stable-diffusion-prompt-processor"
 53 |   prompt_processor:
 54 |     pretrained_model_name_or_path: "/cpfs01/shared/Gveval3/gongkaixiong/dlh/sd_weights/sd-2.1-base" # "stabilityai/stable-diffusion-2-1-base"
 55 |     prompt: ???
 56 |     front_threshold: 30.
 57 |     back_threshold: 30.
 58 | 
 59 |   guidance_type: "stable-diffusion-vsd-guidance"
 60 |   guidance:
 61 |     pretrained_model_name_or_path: "/cpfs01/shared/Gveval3/gongkaixiong/dlh/sd_weights/sd-2.1-base" # "stabilityai/stable-diffusion-2-1-base"
 62 |     pretrained_model_name_or_path_lora: "/cpfs01/shared/Gveval3/gongkaixiong/dlh/sd_weights/sd-lora" # "stabilityai/stable-diffusion-2-1"
 63 |     guidance_scale: 7.5
 64 |     min_step_percent: 0.02
 65 |     max_step_percent: [5000, 0.98, 0.5, 5001] # annealed to 0.5 after 5000 steps
 66 | 
 67 |   loggers:
 68 |     wandb:
 69 |       enable: false
 70 |       project: "threestudio"
 71 |       name: None
 72 | 
 73 |   loss:
 74 |     lambda_vsd: 1.
 75 |     lambda_lora: 1.
 76 |   optimizer:
 77 |     name: AdamW
 78 |     args:
 79 |       betas: [0.9, 0.99]
 80 |       eps: 1.e-15
 81 |     params:
 82 |       geometry.encoding:
 83 |         lr: 0.01
 84 |       geometry.feature_network:
 85 |         lr: 0.001
 86 |       background:
 87 |         lr: 0.001
 88 |       guidance:
 89 |         lr: 0.0001
 90 | 
 91 | trainer:
 92 |   max_steps: 30000
 93 |   log_every_n_steps: 1
 94 |   num_sanity_val_steps: 1
 95 |   val_check_interval: 200
 96 |   enable_progress_bar: true
 97 |   precision: 32
 98 | 
 99 | checkpoint:
100 |   save_last: true
101 |   save_top_k: -1
102 |   every_n_train_steps: ${trainer.max_steps}
103 | 


--------------------------------------------------------------------------------
/configs/prolificdreamer.yaml:
--------------------------------------------------------------------------------
  1 | name: "prolificdreamer"
  2 | tag: "${rmspace:${system.prompt_processor.prompt},_}"
  3 | exp_root_dir: "outputs"
  4 | seed: 0
  5 | 
  6 | data_type: "random-camera-datamodule"
  7 | data:
  8 |   batch_size: [1, 1]
  9 |   # 0-4999: 64x64, >=5000: 512x512
 10 |   # this drastically reduces VRAM usage as empty space is pruned in early training
 11 |   width: [64, 512]
 12 |   height: [64, 512]
 13 |   resolution_milestones: [5000]
 14 |   camera_distance_range: [1.0, 1.5]
 15 |   fovy_range: [40, 70]
 16 |   elevation_range: [-10, 45]
 17 |   camera_perturb: 0.
 18 |   center_perturb: 0.
 19 |   up_perturb: 0.
 20 |   eval_camera_distance: 1.5
 21 |   eval_fovy_deg: 70.
 22 | 
 23 | system_type: "prolificdreamer-system"
 24 | system:
 25 |   stage: coarse
 26 |   geometry_type: "implicit-volume"
 27 |   geometry:
 28 |     radius: 1.0
 29 |     normal_type: null
 30 | 
 31 |     density_bias: "blob_magic3d"
 32 |     density_activation: softplus
 33 |     density_blob_scale: 10.
 34 |     density_blob_std: 0.5
 35 | 
 36 |     pos_encoding_config:
 37 |       otype: HashGrid
 38 |       n_levels: 16
 39 |       n_features_per_level: 2
 40 |       log2_hashmap_size: 19
 41 |       base_resolution: 16
 42 |       per_level_scale: 1.447269237440378 # max resolution 4096
 43 | 
 44 |   material_type: "no-material"
 45 |   material:
 46 |     n_output_dims: 3
 47 |     color_activation: sigmoid
 48 | 
 49 |   background_type: "neural-environment-map-background"
 50 |   background:
 51 |     color_activation: sigmoid
 52 |     random_aug: true
 53 | 
 54 |   renderer_type: "nerf-volume-renderer"
 55 |   renderer:
 56 |     radius: ${system.geometry.radius}
 57 |     num_samples_per_ray: 512
 58 | 
 59 |   prompt_processor_type: "stable-diffusion-prompt-processor"
 60 |   prompt_processor:
 61 |     pretrained_model_name_or_path: "/cpfs01/shared/Gveval3/gongkaixiong/dlh/sd_weights/sd-2.1-base" # "stabilityai/stable-diffusion-2-1-base"
 62 |     prompt: ???
 63 |     front_threshold: 30.
 64 |     back_threshold: 30.
 65 | 
 66 |   guidance_type: "stable-diffusion-vsd-guidance"
 67 |   guidance:
 68 |     pretrained_model_name_or_path: "/cpfs01/shared/Gveval3/gongkaixiong/dlh/sd_weights/sd-2.1-base" # "stabilityai/stable-diffusion-2-1-base"
 69 |     pretrained_model_name_or_path_lora: "/cpfs01/shared/Gveval3/gongkaixiong/dlh/sd_weights/sd-lora"  # "stabilityai/stable-diffusion-2-1"
 70 |     guidance_scale: 7.5
 71 |     min_step_percent: 0.02
 72 |     max_step_percent: [5000, 0.98, 0.5, 5001] # annealed to 0.5 after 5000 steps
 73 | 
 74 |   loggers:
 75 |     wandb:
 76 |       enable: false
 77 |       project: "threestudio"
 78 |       name: None
 79 | 
 80 |   loss:
 81 |     lambda_vsd: 1.
 82 |     lambda_lora: 1.
 83 |     lambda_orient: 0.
 84 |     lambda_sparsity: 10.
 85 |     lambda_opaque: [10000, 0.0, 1000.0, 10001]
 86 |     lambda_z_variance: 0.
 87 |   optimizer:
 88 |     name: AdamW
 89 |     args:
 90 |       betas: [0.9, 0.99]
 91 |       eps: 1.e-15
 92 |     params:
 93 |       geometry.encoding:
 94 |         lr: 0.01
 95 |       geometry.density_network:
 96 |         lr: 0.001
 97 |       geometry.feature_network:
 98 |         lr: 0.001
 99 |       background:
100 |         lr: 0.001
101 |       guidance:
102 |         lr: 0.0001
103 | 
104 | trainer:
105 |   max_steps: 25000
106 |   log_every_n_steps: 1
107 |   num_sanity_val_steps: 0
108 |   val_check_interval: 200
109 |   enable_progress_bar: true
110 |   precision: 32
111 | 
112 | checkpoint:
113 |   save_last: true
114 |   save_top_k: -1
115 |   every_n_train_steps: ${trainer.max_steps}
116 | 


--------------------------------------------------------------------------------
/configs/sjc.yaml:
--------------------------------------------------------------------------------
 1 | name: sjc
 2 | tag: "${rmspace:${system.prompt_processor.prompt},_}"
 3 | exp_root_dir: outputs
 4 | seed: 0
 5 | 
 6 | data_type: random-camera-datamodule
 7 | data:
 8 |   camera_distance_range: [1.50, 1.50]
 9 |   elevation_range: [-10, 45]
10 |   camera_perturb: 0.0
11 |   center_perturb: 0.0
12 |   up_perturb: 0.0
13 |   light_position_perturb: 0.0
14 |   eval_elevation_deg: 20.0
15 | 
16 | system_type: sjc-system
17 | system:
18 |   geometry_type: volume-grid
19 |   geometry:
20 |     normal_type: null
21 |     grid_size: [100, 100, 100]
22 |     density_bias: -1.0
23 |     n_feature_dims: 4
24 | 
25 |   material_type: no-material
26 |   material:
27 |     n_output_dims: 4
28 |     color_activation: none
29 | 
30 |   background_type: textured-background
31 |   background:
32 |     n_output_dims: 4
33 |     color_activation: none
34 |     height: 4
35 |     width: 4
36 | 
37 |   renderer_type: nerf-volume-renderer
38 |   renderer:
39 |     num_samples_per_ray: 512
40 |     grid_prune: false
41 | 
42 |   prompt_processor_type: stable-diffusion-prompt-processor
43 |   prompt_processor:
44 |     pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base"
45 |     prompt: ???
46 |     view_dependent_prompt_front: true
47 | 
48 |   guidance_type: stable-diffusion-guidance
49 |   guidance:
50 |     pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base"
51 |     guidance_scale: 100.
52 |     use_sjc: true
53 |     var_red: true
54 |     min_step_percent: 0.01
55 |     max_step_percent: 0.97
56 | 
57 |   loggers:
58 |     wandb:
59 |       enable: false
60 |       project: "threestudio"
61 |       name: None
62 | 
63 |   loss:
64 |     lambda_sds: 1.
65 |     center_ratio: 0.78125 # = 50 / 64
66 |     lambda_depth: 0 # or try 10
67 |     lambda_emptiness: [5000, 1.e+4, 2.e+5, 5001]
68 |     emptiness_scale: 10
69 | 
70 |   optimizer:
71 |     name: Adamax
72 |     args:
73 |       lr: 0.05
74 |     params:
75 |       geometry:
76 |         lr: 0.05
77 |       background:
78 |         lr: 0.0001 # maybe 0.001/0.01 is better
79 | 
80 | trainer:
81 |   max_steps: 10000
82 |   log_every_n_steps: 1
83 |   num_sanity_val_steps: 0
84 |   val_check_interval: 200
85 |   enable_progress_bar: true
86 |   precision: 16-mixed
87 | 
88 | checkpoint:
89 |   save_last: true # save at each validation tim
90 |   save_top_k: -1
91 |   every_n_train_steps: ${trainer.max_steps}
92 | 


--------------------------------------------------------------------------------
/configs/sketchshape-refine.yaml:
--------------------------------------------------------------------------------
 1 | name: "sketchshape-refine"
 2 | tag: "${rmspace:${system.prompt_processor.prompt},_}"
 3 | exp_root_dir: "outputs"
 4 | seed: 0
 5 | 
 6 | data_type: "random-camera-datamodule"
 7 | data:
 8 |   elevation_range: [-10, 45]
 9 | 
10 | system_type: "latentnerf-system"
11 | system:
12 |   refinement: true
13 |   weights: ???
14 |   weights_ignore_modules: ["material", "background"]
15 |   guide_shape: ???
16 | 
17 |   geometry_type: "implicit-volume"
18 |   geometry:
19 |     n_feature_dims: 4
20 |     normal_type: null
21 | 
22 |   material_type: "sd-latent-adapter-material"
23 | 
24 |   background_type: "neural-environment-map-background"
25 | 
26 |   renderer_type: "nerf-volume-renderer"
27 |   renderer:
28 |     num_samples_per_ray: 512
29 | 
30 |   prompt_processor_type: "stable-diffusion-prompt-processor"
31 |   prompt_processor:
32 |     pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base"
33 |     prompt: ???
34 | 
35 |   guidance_type: "stable-diffusion-guidance"
36 |   guidance:
37 |     pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base"
38 |     guidance_scale: 100.
39 |     weighting_strategy: sds
40 | 
41 |   loggers:
42 |     wandb:
43 |       enable: false
44 |       project: "threestudio"
45 |       name: None
46 | 
47 |   loss:
48 |     lambda_sds: 1.
49 |     lambda_sparsity: 0.0
50 |     lambda_shape: 1.
51 |     lambda_opaque: 0.0
52 |     lambda_orient: 0.0
53 |   optimizer:
54 |     name: Adam
55 |     args:
56 |       lr: 0.01
57 |       betas: [0.9, 0.99]
58 |       eps: 1.e-15
59 |   scheduler:
60 |     name: SequentialLR
61 |     interval: step
62 |     warmup_steps: 100
63 |     milestones:
64 |       - ${system.scheduler.warmup_steps}
65 |     schedulers:
66 |       - name: LinearLR # linear warm-up in the first system.warmup_steps steps
67 |         args:
68 |           start_factor: 0.1
69 |           end_factor: 1.0
70 |           total_iters: ${system.scheduler.warmup_steps}
71 |       - name: ExponentialLR
72 |         args:
73 |           gamma: ${calc_exp_lr_decay_rate:0.1,${sub:${trainer.max_steps},${system.scheduler.warmup_steps}}}
74 | 
75 | trainer:
76 |   max_steps: 10000
77 |   log_every_n_steps: 1
78 |   num_sanity_val_steps: 1
79 |   val_check_interval: 200
80 |   enable_progress_bar: true
81 |   precision: 16-mixed
82 | 
83 | checkpoint:
84 |   save_last: true # save at each validation time
85 |   save_top_k: -1
86 |   every_n_train_steps: ${trainer.max_steps}
87 | 


--------------------------------------------------------------------------------
/configs/sketchshape.yaml:
--------------------------------------------------------------------------------
 1 | name: "sketchshape"
 2 | tag: "${rmspace:${system.prompt_processor.prompt},_}"
 3 | exp_root_dir: "outputs"
 4 | seed: 0
 5 | 
 6 | data_type: "random-camera-datamodule"
 7 | data:
 8 |   elevation_range: [-10, 45]
 9 | 
10 | system_type: "latentnerf-system"
11 | system:
12 |   guide_shape: ???
13 | 
14 |   geometry_type: "implicit-volume"
15 |   geometry:
16 |     n_feature_dims: 4
17 |     normal_type: null
18 | 
19 |   material_type: "no-material"
20 |   material:
21 |     n_output_dims: 4
22 |     color_activation: none
23 | 
24 |   background_type: "neural-environment-map-background"
25 |   background:
26 |     n_output_dims: 4
27 |     color_activation: none
28 | 
29 |   renderer_type: "nerf-volume-renderer"
30 |   renderer:
31 |     num_samples_per_ray: 512
32 | 
33 |   prompt_processor_type: "stable-diffusion-prompt-processor"
34 |   prompt_processor:
35 |     pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base"
36 |     prompt: ???
37 | 
38 |   guidance_type: "stable-diffusion-guidance"
39 |   guidance:
40 |     pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base"
41 |     guidance_scale: 100.
42 |     weighting_strategy: sds
43 | 
44 |   loggers:
45 |     wandb:
46 |       enable: false
47 |       project: "threestudio"
48 |       name: None
49 | 
50 |   loss:
51 |     lambda_sds: 1.0
52 |     lambda_sparsity: 0.0
53 |     lambda_shape: 1.0
54 |     lambda_opaque: 0.0
55 |     lambda_orient: 0.0
56 |   optimizer:
57 |     name: Adam
58 |     args:
59 |       lr: 0.01
60 |       betas: [0.9, 0.99]
61 |       eps: 1.e-15
62 |   scheduler:
63 |     name: SequentialLR
64 |     interval: step
65 |     warmup_steps: 100
66 |     milestones:
67 |       - ${system.scheduler.warmup_steps}
68 |     schedulers:
69 |       - name: LinearLR # linear warm-up in the first system.warmup_steps steps
70 |         args:
71 |           start_factor: 0.1
72 |           end_factor: 1.0
73 |           total_iters: ${system.scheduler.warmup_steps}
74 |       - name: ExponentialLR
75 |         args:
76 |           gamma: ${calc_exp_lr_decay_rate:0.1,${sub:${trainer.max_steps},${system.scheduler.warmup_steps}}}
77 | 
78 | trainer:
79 |   max_steps: 10000
80 |   log_every_n_steps: 1
81 |   num_sanity_val_steps: 0
82 |   val_check_interval: 200
83 |   enable_progress_bar: true
84 |   precision: 16-mixed
85 | 
86 | checkpoint:
87 |   save_last: true # save at each validation time
88 |   save_top_k: -1
89 |   every_n_train_steps: ${trainer.max_steps}
90 | 


--------------------------------------------------------------------------------
/configs/textmesh-if.yaml:
--------------------------------------------------------------------------------
  1 | name: "textmesh-if"
  2 | tag: "${rmspace:${system.prompt_processor.prompt},_}"
  3 | exp_root_dir: "outputs"
  4 | seed: 0
  5 | 
  6 | data_type: "random-camera-datamodule"
  7 | data:
  8 |   batch_size: 1
  9 |   width: 64
 10 |   height: 64
 11 |   camera_distance_range: [1.5, 2.0]
 12 |   fovy_range: [40, 70]
 13 |   elevation_range: [-10, 90]
 14 |   light_sample_strategy: "dreamfusion"
 15 |   eval_camera_distance: 2.0
 16 |   eval_fovy_deg: 70.
 17 | 
 18 | system_type: "textmesh-system"
 19 | system:
 20 |   geometry_type: "implicit-sdf"
 21 |   geometry:
 22 |     radius: 2.0
 23 |     normal_type: finite_difference
 24 |     # progressive eps from Neuralangelo
 25 |     finite_difference_normal_eps: progressive
 26 | 
 27 |     sdf_bias: sphere
 28 |     sdf_bias_params: 0.5
 29 | 
 30 |     # coarse to fine hash grid encoding
 31 |     pos_encoding_config:
 32 |       otype: ProgressiveBandHashGrid
 33 |       n_levels: 16
 34 |       n_features_per_level: 2
 35 |       log2_hashmap_size: 19
 36 |       base_resolution: 16
 37 |       per_level_scale: 1.381912879967776 # max resolution 2048
 38 |       start_level: 8 # resolution ~200
 39 |       start_step: 2000
 40 |       update_steps: 500
 41 | 
 42 |   material_type: "diffuse-with-point-light-material"
 43 |   material:
 44 |     ambient_only_steps: 2001
 45 |     albedo_activation: sigmoid
 46 | 
 47 |   background_type: "neural-environment-map-background"
 48 |   background:
 49 |     color_activation: sigmoid
 50 | 
 51 |   renderer_type: "neus-volume-renderer"
 52 |   renderer:
 53 |     radius: ${system.geometry.radius}
 54 |     num_samples_per_ray: 512
 55 |     cos_anneal_end_steps: ${trainer.max_steps}
 56 |     eval_chunk_size: 8192
 57 | 
 58 |   prompt_processor_type: "deep-floyd-prompt-processor"
 59 |   prompt_processor:
 60 |     pretrained_model_name_or_path: "DeepFloyd/IF-I-XL-v1.0"
 61 |     prompt: ???
 62 | 
 63 |   guidance_type: "deep-floyd-guidance"
 64 |   guidance:
 65 |     pretrained_model_name_or_path: "DeepFloyd/IF-I-XL-v1.0"
 66 |     guidance_scale: 20.
 67 |     weighting_strategy: sds
 68 |     min_step_percent: 0.02
 69 |     max_step_percent: 0.98
 70 | 
 71 |   loss:
 72 |     lambda_sds: 1.
 73 |     lambda_orient: 0.0
 74 |     lambda_sparsity: 0.0
 75 |     lambda_opaque: 0.0
 76 |     lambda_eikonal: 1000.
 77 |   optimizer:
 78 |     name: Adam
 79 |     args:
 80 |       betas: [0.9, 0.99]
 81 |       eps: 1.e-15
 82 |     params:
 83 |       geometry.encoding:
 84 |         lr: 0.01
 85 |       geometry.sdf_network:
 86 |         lr: 0.001
 87 |       geometry.feature_network:
 88 |         lr: 0.001
 89 |       background:
 90 |         lr: 0.001
 91 |       renderer:
 92 |         lr: 0.001
 93 | 
 94 | trainer:
 95 |   max_steps: 10000
 96 |   log_every_n_steps: 1
 97 |   num_sanity_val_steps: 0
 98 |   val_check_interval: 200
 99 |   enable_progress_bar: true
100 |   precision: 16-mixed
101 | 
102 | checkpoint:
103 |   save_last: true # save at each validation time
104 |   save_top_k: -1
105 |   every_n_train_steps: ${trainer.max_steps}
106 | 


--------------------------------------------------------------------------------
/docker/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Reference:
 2 | # https://github.com/cvpaperchallenge/Ascender
 3 | # https://github.com/nerfstudio-project/nerfstudio
 4 | 
 5 | FROM nvidia/cuda:11.8.0-devel-ubuntu22.04
 6 | 
 7 | ARG USER_NAME=dreamer
 8 | ARG GROUP_NAME=dreamers
 9 | ARG UID=1000
10 | ARG GID=1000
11 | 
12 | # Set compute capability for nerfacc and tiny-cuda-nn
13 | # See https://developer.nvidia.com/cuda-gpus and limit number to speed-up build
14 | ENV TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX"
15 | ENV TCNN_CUDA_ARCHITECTURES=90;89;86;80;75;70;61;60
16 | # Speed-up build for RTX 30xx
17 | # ENV TORCH_CUDA_ARCH_LIST="8.6"
18 | # ENV TCNN_CUDA_ARCHITECTURES=86
19 | # Speed-up build for RTX 40xx
20 | # ENV TORCH_CUDA_ARCH_LIST="8.9"
21 | # ENV TCNN_CUDA_ARCHITECTURES=89
22 | 
23 | ENV CUDA_HOME=/usr/local/cuda
24 | ENV PATH=${CUDA_HOME}/bin:/home/${USER_NAME}/.local/bin:${PATH}
25 | ENV LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}
26 | ENV LIBRARY_PATH=${CUDA_HOME}/lib64/stubs:${LIBRARY_PATH}
27 | 
28 | # apt install by root user
29 | RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
30 |     build-essential \
31 |     curl \
32 |     git \
33 |     libegl1-mesa-dev \
34 |     libgl1-mesa-dev \
35 |     libgles2-mesa-dev \
36 |     libglib2.0-0 \
37 |     libsm6 \
38 |     libxext6 \
39 |     libxrender1 \
40 |     python-is-python3 \
41 |     python3.10-dev \
42 |     python3-pip \
43 |     wget \
44 |     && rm -rf /var/lib/apt/lists/*
45 | 
46 | # Change user to non-root user
47 | RUN groupadd -g ${GID} ${GROUP_NAME} \
48 |     && useradd -ms /bin/sh -u ${UID} -g ${GID} ${USER_NAME}
49 | USER ${USER_NAME}
50 | 
51 | RUN pip install --upgrade pip setuptools ninja
52 | RUN pip install torch==2.0.1+cu118 torchvision==0.15.2+cu118 --index-url https://download.pytorch.org/whl/cu118
53 | # Install nerfacc and tiny-cuda-nn before installing requirements.txt
54 | # because these two installations are time consuming and error prone
55 | RUN pip install git+https://github.com/KAIR-BAIR/nerfacc.git@v0.5.2
56 | RUN pip install git+https://github.com/NVlabs/tiny-cuda-nn.git#subdirectory=bindings/torch
57 | 
58 | COPY requirements.txt /tmp
59 | RUN cd /tmp && pip install -r requirements.txt
60 | WORKDIR /home/${USER_NAME}/threestudio
61 | 


--------------------------------------------------------------------------------
/docker/compose.yaml:
--------------------------------------------------------------------------------
 1 | services:
 2 |   threestudio:
 3 |     build:
 4 |       context: ../
 5 |       dockerfile: docker/Dockerfile
 6 |       args:
 7 |         # you can set environment variables, otherwise default values will be used
 8 |         USER_NAME: ${HOST_USER_NAME:-dreamer}  # export HOST_USER_NAME=$USER
 9 |         GROUP_NAME: ${HOST_GROUP_NAME:-dreamers}
10 |         UID: ${HOST_UID:-1000}  # export HOST_UID=$(id -u)
11 |         GID: ${HOST_GID:-1000}  # export HOST_GID=$(id -g)
12 |       shm_size: '4gb'
13 |     environment:
14 |       NVIDIA_DISABLE_REQUIRE: 1  # avoid wrong `nvidia-container-cli: requirement error`
15 |     tty: true
16 |     volumes:
17 |         - ../:/home/${HOST_USER_NAME:-dreamer}/threestudio
18 |     deploy:
19 |       resources:
20 |         reservations:
21 |           devices:
22 |             - driver: nvidia
23 |               capabilities: [gpu]
24 | 


--------------------------------------------------------------------------------
/docs/installation.md:
--------------------------------------------------------------------------------
 1 | # Installation
 2 | 
 3 | ## Prerequisite
 4 | 
 5 | - NVIDIA GPU with at least 6GB VRAM. The more memory you have, the more methods and higher resolutions you can try.
 6 | - [NVIDIA Driver](https://www.nvidia.com/Download/index.aspx) whose version is higher than the [Minimum Required Driver Version](https://docs.nvidia.com/cuda/cuda-toolkit-release-notes/index.html) of CUDA Toolkit you want to use.
 7 | 
 8 | ## Install CUDA Toolkit
 9 | 
10 | You can skip this step if you have installed sufficiently new version or you use Docker.
11 | 
12 | Install [CUDA Toolkit](https://developer.nvidia.com/cuda-toolkit-archive).
13 | 
14 | - Example for Ubuntu 22.04:
15 |   - Run [command for CUDA 11.8 Ubuntu 22.04](https://developer.nvidia.com/cuda-11-8-0-download-archive?target_os=Linux&target_arch=x86_64&Distribution=Ubuntu&target_version=22.04&target_type=deb_local)
16 | - Example for Ubuntu on WSL2:
17 |   - `sudo apt-key del 7fa2af80`
18 |   - Run [command for CUDA 11.8 WSL-Ubuntu](https://developer.nvidia.com/cuda-11-8-0-download-archive?target_os=Linux&target_arch=x86_64&Distribution=WSL-Ubuntu&target_version=2.0&target_type=deb_local)
19 | 
20 | ## Install threestudio via Docker
21 | 
22 | 1. [Install Docker Engine](https://docs.docker.com/engine/install/).
23 |    This document assumes you [install Docker Engine on Ubuntu](https://docs.docker.com/engine/install/ubuntu/).
24 | 2. [Create `docker` group](https://docs.docker.com/engine/install/linux-postinstall/).
25 |    Otherwise, you need to type `sudo docker` instead of `docker`.
26 | 3. [Install NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html#setting-up-nvidia-container-toolkit).
27 | 4. If you use WSL2, [enable systemd](https://learn.microsoft.com/en-us/windows/wsl/wsl-config#systemd-support).
28 | 5. Edit [Dockerfile](../docker/Dockerfile) for your GPU to speed-up build.
29 |    The default Dockerfile takes into account many types of GPUs.
30 | 6. Run Docker via `docker compose`.
31 | 
32 | ```bash
33 | cd docker/
34 | docker compose build  # build Docker image
35 | docker compose up -d  # create and start a container in background
36 | docker compose exec threestudio bash  # run bash in the container
37 | 
38 | # Enjoy threestudio!
39 | 
40 | exit  # or Ctrl+D
41 | docker compose stop  # stop the container
42 | docker compose start  # start the container
43 | docker compose down  # stop and remove the container
44 | ```
45 | 
46 | Note: The current Dockerfile will cause errors when using the OpenGL-based rasterizer of nvdiffrast.
47 | You can use the CUDA-based rasterizer by adding commands or editing configs.
48 | 
49 | - `system.renderer.context_type=cuda` for training
50 | - `system.exporter.context_type=cuda` for exporting meshes
51 | 
52 | [This comment by the nvdiffrast author](https://github.com/NVlabs/nvdiffrast/issues/94#issuecomment-1288566038) could be a guide to resolve this limitation.
53 | 


--------------------------------------------------------------------------------
/extern/MVDream/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | */__pycache__/
  6 | 
  7 | # dataset-related, pre-trained models,
  8 | vae_models/vqgan
  9 | vae_models/*.gz
 10 | vae_models/*.pt
 11 | vae_models/*vqgan
 12 | *.pt
 13 | *.pth 
 14 | 
 15 | # log files
 16 | log/*.log
 17 | out*
 18 | test_results
 19 | err*
 20 | 
 21 | 
 22 | # C extensions
 23 | *.so
 24 | 
 25 | # Distribution / packaging
 26 | .Python
 27 | build/
 28 | develop-eggs/
 29 | dist/
 30 | downloads/
 31 | eggs/
 32 | .eggs/
 33 | lib/
 34 | lib64/
 35 | parts/
 36 | sdist/
 37 | var/
 38 | wheels/
 39 | pip-wheel-metadata/
 40 | share/python-wheels/
 41 | *.egg-info/
 42 | .installed.cfg
 43 | *.egg
 44 | MANIFEST
 45 | 
 46 | # PyInstaller
 47 | #  Usually these files are written by a python script from a template
 48 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 49 | *.manifest
 50 | *.spec
 51 | 
 52 | # Installer logs
 53 | pip-log.txt
 54 | pip-delete-this-directory.txt
 55 | 
 56 | # Unit test / coverage reports
 57 | htmlcov/
 58 | .tox/
 59 | .nox/
 60 | .coverage
 61 | .coverage.*
 62 | .cache
 63 | nosetests.xml
 64 | coverage.xml
 65 | *.cover
 66 | *.py,cover
 67 | .hypothesis/
 68 | .pytest_cache/
 69 | 
 70 | # Translations
 71 | *.mo
 72 | *.pot
 73 | 
 74 | # Django stuff:
 75 | *.log
 76 | local_settings.py
 77 | db.sqlite3
 78 | db.sqlite3-journal
 79 | 
 80 | # Flask stuff:
 81 | instance/
 82 | .webassets-cache
 83 | 
 84 | # Scrapy stuff:
 85 | .scrapy
 86 | 
 87 | # Sphinx documentation
 88 | docs/_build/
 89 | 
 90 | # PyBuilder
 91 | target/
 92 | 
 93 | # Jupyter Notebook
 94 | .ipynb_checkpoints
 95 | 
 96 | # IPython
 97 | profile_default/
 98 | ipython_config.py
 99 | 
100 | # pyenv
101 | .python-version
102 | 
103 | # pipenv
104 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
105 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
106 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
107 | #   install all needed dependencies.
108 | #Pipfile.lock
109 | 
110 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
111 | __pypackages__/
112 | 
113 | # Celery stuff
114 | celerybeat-schedule
115 | celerybeat.pid
116 | 
117 | # SageMath parsed files
118 | *.sage.py
119 | 
120 | # Environments
121 | .env
122 | .venv
123 | env/
124 | venv/
125 | ENV/
126 | env.bak/
127 | venv.bak/
128 | 
129 | # Spyder project settings
130 | .spyderproject
131 | .spyproject
132 | 
133 | # Rope project settings
134 | .ropeproject
135 | 
136 | # mkdocs documentation
137 | /site
138 | 
139 | # mypy
140 | .mypy_cache/
141 | .dmypy.json
142 | dmypy.json
143 | 
144 | # Pyre type checker
145 | .pyre/
146 | 
147 | *.zip
148 | *.pkl 
149 | *.csv 
150 | *.ckpt
151 | *.parquet 
152 | 
153 | *.whl
154 | *.th
155 | *.onnx


--------------------------------------------------------------------------------
/extern/MVDream/LICENSE-CODE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 ByteDance
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/extern/MVDream/mvdream/__init__.py:
--------------------------------------------------------------------------------
1 | from .model_zoo import build_model


--------------------------------------------------------------------------------
/extern/MVDream/mvdream/camera_utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | 
 4 | 
 5 | def create_camera_to_world_matrix(elevation, azimuth):
 6 |     elevation = np.radians(elevation)
 7 |     azimuth = np.radians(azimuth)
 8 |     # Convert elevation and azimuth angles to Cartesian coordinates on a unit sphere
 9 |     x = np.cos(elevation) * np.sin(azimuth)
10 |     y = np.sin(elevation)
11 |     z = np.cos(elevation) * np.cos(azimuth)
12 |     
13 |     # Calculate camera position, target, and up vectors
14 |     camera_pos = np.array([x, y, z])
15 |     target = np.array([0, 0, 0])
16 |     up = np.array([0, 1, 0])
17 |     
18 |     # Construct view matrix
19 |     forward = target - camera_pos
20 |     forward /= np.linalg.norm(forward)
21 |     right = np.cross(forward, up)
22 |     right /= np.linalg.norm(right)
23 |     new_up = np.cross(right, forward)
24 |     new_up /= np.linalg.norm(new_up)
25 |     cam2world = np.eye(4)
26 |     cam2world[:3, :3] = np.array([right, new_up, -forward]).T
27 |     cam2world[:3, 3] = camera_pos
28 |     return cam2world
29 | 
30 | 
31 | def convert_opengl_to_blender(camera_matrix):
32 |     if isinstance(camera_matrix, np.ndarray):
33 |         # Construct transformation matrix to convert from OpenGL space to Blender space
34 |         flip_yz = np.array([[1, 0, 0, 0], [0, 0, -1, 0], [0, 1, 0, 0], [0, 0, 0, 1]])
35 |         camera_matrix_blender = np.dot(flip_yz, camera_matrix)
36 |     else:
37 |         # Construct transformation matrix to convert from OpenGL space to Blender space
38 |         flip_yz = torch.tensor([[1, 0, 0, 0], [0, 0, -1, 0], [0, 1, 0, 0], [0, 0, 0, 1]])
39 |         if camera_matrix.ndim == 3:
40 |             flip_yz = flip_yz.unsqueeze(0)
41 |         camera_matrix_blender = torch.matmul(flip_yz.to(camera_matrix), camera_matrix)
42 |     return camera_matrix_blender
43 | 
44 | 
45 | def normalize_camera(camera_matrix):
46 |     ''' normalize the camera location onto a unit-sphere'''
47 |     if isinstance(camera_matrix, np.ndarray):
48 |         camera_matrix = camera_matrix.reshape(-1,4,4)
49 |         translation = camera_matrix[:,:3,3]
50 |         translation = translation / (np.linalg.norm(translation, axis=1, keepdims=True) + 1e-8)
51 |         camera_matrix[:,:3,3] = translation
52 |     else:
53 |         camera_matrix = camera_matrix.reshape(-1,4,4)
54 |         translation = camera_matrix[:,:3,3]
55 |         translation = translation / (torch.norm(translation, dim=1, keepdim=True) + 1e-8)
56 |         camera_matrix[:,:3,3] = translation
57 |     return camera_matrix.reshape(-1,16)
58 | 
59 | 
60 | def get_camera(num_frames, elevation=15, azimuth_start=0, azimuth_span=360, blender_coord=True):
61 |     angle_gap = azimuth_span / num_frames
62 |     cameras = []
63 |     for azimuth in np.arange(azimuth_start, azimuth_span+azimuth_start, angle_gap):
64 |         camera_matrix = create_camera_to_world_matrix(elevation, azimuth)
65 |         if blender_coord:
66 |             camera_matrix = convert_opengl_to_blender(camera_matrix)
67 |         cameras.append(camera_matrix.flatten())
68 |     return torch.tensor(np.stack(cameras, 0)).float()


--------------------------------------------------------------------------------
/extern/MVDream/mvdream/configs/sd-v1.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   target: mvdream.ldm.interface.LatentDiffusionInterface
 3 |   params:
 4 |     linear_start: 0.00085
 5 |     linear_end: 0.0120
 6 |     timesteps: 1000
 7 |     scale_factor: 0.18215
 8 |     parameterization: "eps"
 9 | 
10 |     unet_config:
11 |       target: mvdream.ldm.modules.diffusionmodules.openaimodel.MultiViewUNetModel
12 |       params:
13 |         image_size: 32 # unused
14 |         in_channels: 4
15 |         out_channels: 4
16 |         model_channels: 320
17 |         attention_resolutions: [ 4, 2, 1 ]
18 |         num_res_blocks: 2
19 |         channel_mult: [ 1, 2, 4, 4 ]
20 |         num_heads: 8
21 |         use_spatial_transformer: True
22 |         transformer_depth: 1
23 |         context_dim: 768
24 |         use_checkpoint: False
25 |         legacy: False
26 |         camera_dim: 16
27 | 
28 |     first_stage_config:
29 |       target: mvdream.ldm.models.autoencoder.AutoencoderKL
30 |       params:
31 |         embed_dim: 4
32 |         monitor: val/rec_loss
33 |         ddconfig:
34 |           double_z: true
35 |           z_channels: 4
36 |           resolution: 256
37 |           in_channels: 3
38 |           out_ch: 3
39 |           ch: 128
40 |           ch_mult:
41 |           - 1
42 |           - 2
43 |           - 4
44 |           - 4
45 |           num_res_blocks: 2
46 |           attn_resolutions: []
47 |           dropout: 0.0
48 |         lossconfig:
49 |           target: torch.nn.Identity
50 | 
51 |     cond_stage_config:
52 |       target: mvdream.ldm.modules.encoders.modules.FrozenCLIPEmbedder
53 | 


--------------------------------------------------------------------------------
/extern/MVDream/mvdream/configs/sd-v2-base.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   target: mvdream.ldm.interface.LatentDiffusionInterface
 3 |   params:
 4 |     linear_start: 0.00085
 5 |     linear_end: 0.0120
 6 |     timesteps: 1000
 7 |     scale_factor: 0.18215
 8 |     parameterization: "eps"
 9 | 
10 |     unet_config:
11 |       target: mvdream.ldm.modules.diffusionmodules.openaimodel.MultiViewUNetModel
12 |       params:
13 |         image_size: 32 # unused
14 |         in_channels: 4
15 |         out_channels: 4
16 |         model_channels: 320
17 |         attention_resolutions: [ 4, 2, 1 ]
18 |         num_res_blocks: 2
19 |         channel_mult: [ 1, 2, 4, 4 ]
20 |         num_head_channels: 64 # need to fix for flash-attn
21 |         use_spatial_transformer: True
22 |         use_linear_in_transformer: True
23 |         transformer_depth: 1
24 |         context_dim: 1024
25 |         use_checkpoint: False
26 |         legacy: False
27 |         camera_dim: 16
28 | 
29 |     first_stage_config:
30 |       target: mvdream.ldm.models.autoencoder.AutoencoderKL
31 |       params:
32 |         embed_dim: 4
33 |         monitor: val/rec_loss
34 |         ddconfig:
35 |           #attn_type: "vanilla-xformers"
36 |           double_z: true
37 |           z_channels: 4
38 |           resolution: 256
39 |           in_channels: 3
40 |           out_ch: 3
41 |           ch: 128
42 |           ch_mult:
43 |           - 1
44 |           - 2
45 |           - 4
46 |           - 4
47 |           num_res_blocks: 2
48 |           attn_resolutions: []
49 |           dropout: 0.0
50 |         lossconfig:
51 |           target: torch.nn.Identity
52 | 
53 |     cond_stage_config:
54 |       target: mvdream.ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder
55 |       params:
56 |         freeze: True
57 |         layer: "penultimate"


--------------------------------------------------------------------------------
/extern/MVDream/mvdream/ldm/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/interactive-3d/interactive3d/11519c0d8a489cf19efa1ec3b8fa0a058e7ed45a/extern/MVDream/mvdream/ldm/__init__.py


--------------------------------------------------------------------------------
/extern/MVDream/mvdream/ldm/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/interactive-3d/interactive3d/11519c0d8a489cf19efa1ec3b8fa0a058e7ed45a/extern/MVDream/mvdream/ldm/models/__init__.py


--------------------------------------------------------------------------------
/extern/MVDream/mvdream/ldm/models/diffusion/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/interactive-3d/interactive3d/11519c0d8a489cf19efa1ec3b8fa0a058e7ed45a/extern/MVDream/mvdream/ldm/models/diffusion/__init__.py


--------------------------------------------------------------------------------
/extern/MVDream/mvdream/ldm/modules/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/interactive-3d/interactive3d/11519c0d8a489cf19efa1ec3b8fa0a058e7ed45a/extern/MVDream/mvdream/ldm/modules/__init__.py


--------------------------------------------------------------------------------
/extern/MVDream/mvdream/ldm/modules/diffusionmodules/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/interactive-3d/interactive3d/11519c0d8a489cf19efa1ec3b8fa0a058e7ed45a/extern/MVDream/mvdream/ldm/modules/diffusionmodules/__init__.py


--------------------------------------------------------------------------------
/extern/MVDream/mvdream/ldm/modules/distributions/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/interactive-3d/interactive3d/11519c0d8a489cf19efa1ec3b8fa0a058e7ed45a/extern/MVDream/mvdream/ldm/modules/distributions/__init__.py


--------------------------------------------------------------------------------
/extern/MVDream/mvdream/ldm/modules/distributions/distributions.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import numpy as np
 3 | 
 4 | 
 5 | class AbstractDistribution:
 6 |     def sample(self):
 7 |         raise NotImplementedError()
 8 | 
 9 |     def mode(self):
10 |         raise NotImplementedError()
11 | 
12 | 
13 | class DiracDistribution(AbstractDistribution):
14 |     def __init__(self, value):
15 |         self.value = value
16 | 
17 |     def sample(self):
18 |         return self.value
19 | 
20 |     def mode(self):
21 |         return self.value
22 | 
23 | 
24 | class DiagonalGaussianDistribution(object):
25 |     def __init__(self, parameters, deterministic=False):
26 |         self.parameters = parameters
27 |         self.mean, self.logvar = torch.chunk(parameters, 2, dim=1)
28 |         self.logvar = torch.clamp(self.logvar, -30.0, 20.0)
29 |         self.deterministic = deterministic
30 |         self.std = torch.exp(0.5 * self.logvar)
31 |         self.var = torch.exp(self.logvar)
32 |         if self.deterministic:
33 |             self.var = self.std = torch.zeros_like(self.mean).to(device=self.parameters.device)
34 | 
35 |     def sample(self):
36 |         x = self.mean + self.std * torch.randn(self.mean.shape).to(device=self.parameters.device)
37 |         return x
38 | 
39 |     def kl(self, other=None):
40 |         if self.deterministic:
41 |             return torch.Tensor([0.])
42 |         else:
43 |             if other is None:
44 |                 return 0.5 * torch.sum(torch.pow(self.mean, 2)
45 |                                        + self.var - 1.0 - self.logvar,
46 |                                        dim=[1, 2, 3])
47 |             else:
48 |                 return 0.5 * torch.sum(
49 |                     torch.pow(self.mean - other.mean, 2) / other.var
50 |                     + self.var / other.var - 1.0 - self.logvar + other.logvar,
51 |                     dim=[1, 2, 3])
52 | 
53 |     def nll(self, sample, dims=[1,2,3]):
54 |         if self.deterministic:
55 |             return torch.Tensor([0.])
56 |         logtwopi = np.log(2.0 * np.pi)
57 |         return 0.5 * torch.sum(
58 |             logtwopi + self.logvar + torch.pow(sample - self.mean, 2) / self.var,
59 |             dim=dims)
60 | 
61 |     def mode(self):
62 |         return self.mean
63 | 
64 | 
65 | def normal_kl(mean1, logvar1, mean2, logvar2):
66 |     """
67 |     source: https://github.com/openai/guided-diffusion/blob/27c20a8fab9cb472df5d6bdd6c8d11c8f430b924/guided_diffusion/losses.py#L12
68 |     Compute the KL divergence between two gaussians.
69 |     Shapes are automatically broadcasted, so batches can be compared to
70 |     scalars, among other use cases.
71 |     """
72 |     tensor = None
73 |     for obj in (mean1, logvar1, mean2, logvar2):
74 |         if isinstance(obj, torch.Tensor):
75 |             tensor = obj
76 |             break
77 |     assert tensor is not None, "at least one argument must be a Tensor"
78 | 
79 |     # Force variances to be Tensors. Broadcasting helps convert scalars to
80 |     # Tensors, but it does not work for torch.exp().
81 |     logvar1, logvar2 = [
82 |         x if isinstance(x, torch.Tensor) else torch.tensor(x).to(tensor)
83 |         for x in (logvar1, logvar2)
84 |     ]
85 | 
86 |     return 0.5 * (
87 |         -1.0
88 |         + logvar2
89 |         - logvar1
90 |         + torch.exp(logvar1 - logvar2)
91 |         + ((mean1 - mean2) ** 2) * torch.exp(-logvar2)
92 |     )
93 | 


--------------------------------------------------------------------------------
/extern/MVDream/mvdream/ldm/modules/ema.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | 
 4 | 
 5 | class LitEma(nn.Module):
 6 |     def __init__(self, model, decay=0.9999, use_num_upates=True):
 7 |         super().__init__()
 8 |         if decay < 0.0 or decay > 1.0:
 9 |             raise ValueError('Decay must be between 0 and 1')
10 | 
11 |         self.m_name2s_name = {}
12 |         self.register_buffer('decay', torch.tensor(decay, dtype=torch.float32))
13 |         self.register_buffer('num_updates', torch.tensor(0, dtype=torch.int) if use_num_upates
14 |         else torch.tensor(-1, dtype=torch.int))
15 | 
16 |         for name, p in model.named_parameters():
17 |             if p.requires_grad:
18 |                 # remove as '.'-character is not allowed in buffers
19 |                 s_name = name.replace('.', '')
20 |                 self.m_name2s_name.update({name: s_name})
21 |                 self.register_buffer(s_name, p.clone().detach().data)
22 | 
23 |         self.collected_params = []
24 | 
25 |     def reset_num_updates(self):
26 |         del self.num_updates
27 |         self.register_buffer('num_updates', torch.tensor(0, dtype=torch.int))
28 | 
29 |     def forward(self, model):
30 |         decay = self.decay
31 | 
32 |         if self.num_updates >= 0:
33 |             self.num_updates += 1
34 |             decay = min(self.decay, (1 + self.num_updates) / (10 + self.num_updates))
35 | 
36 |         one_minus_decay = 1.0 - decay
37 | 
38 |         with torch.no_grad():
39 |             m_param = dict(model.named_parameters())
40 |             shadow_params = dict(self.named_buffers())
41 | 
42 |             for key in m_param:
43 |                 if m_param[key].requires_grad:
44 |                     sname = self.m_name2s_name[key]
45 |                     shadow_params[sname] = shadow_params[sname].type_as(m_param[key])
46 |                     shadow_params[sname].sub_(one_minus_decay * (shadow_params[sname] - m_param[key]))
47 |                 else:
48 |                     assert not key in self.m_name2s_name
49 | 
50 |     def copy_to(self, model):
51 |         m_param = dict(model.named_parameters())
52 |         shadow_params = dict(self.named_buffers())
53 |         for key in m_param:
54 |             if m_param[key].requires_grad:
55 |                 m_param[key].data.copy_(shadow_params[self.m_name2s_name[key]].data)
56 |             else:
57 |                 assert not key in self.m_name2s_name
58 | 
59 |     def store(self, parameters):
60 |         """
61 |         Save the current parameters for restoring later.
62 |         Args:
63 |           parameters: Iterable of `torch.nn.Parameter`; the parameters to be
64 |             temporarily stored.
65 |         """
66 |         self.collected_params = [param.clone() for param in parameters]
67 | 
68 |     def restore(self, parameters):
69 |         """
70 |         Restore the parameters stored with the `store` method.
71 |         Useful to validate the model with EMA parameters without affecting the
72 |         original optimization process. Store the parameters before the
73 |         `copy_to` method. After validation (or model saving), use this to
74 |         restore the former parameters.
75 |         Args:
76 |           parameters: Iterable of `torch.nn.Parameter`; the parameters to be
77 |             updated with the stored parameters.
78 |         """
79 |         for c_param, param in zip(self.collected_params, parameters):
80 |             param.data.copy_(c_param.data)


--------------------------------------------------------------------------------
/extern/MVDream/mvdream/ldm/modules/encoders/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/interactive-3d/interactive3d/11519c0d8a489cf19efa1ec3b8fa0a058e7ed45a/extern/MVDream/mvdream/ldm/modules/encoders/__init__.py


--------------------------------------------------------------------------------
/extern/MVDream/mvdream/model_zoo.py:
--------------------------------------------------------------------------------
 1 | ''' Utiliy functions to load pre-trained models more easily '''
 2 | import os
 3 | import pkg_resources
 4 | from omegaconf import OmegaConf
 5 | 
 6 | import torch
 7 | from huggingface_hub import hf_hub_download
 8 | 
 9 | from mvdream.ldm.util import instantiate_from_config
10 | 
11 | 
12 | PRETRAINED_MODELS = {
13 |     "sd-v2.1-base-4view": {
14 |         "config": "sd-v2-base.yaml",
15 |         "repo_id": "MVDream/MVDream",
16 |         "filename": "sd-v2.1-base-4view.pt"
17 |     },
18 |     "sd-v1.5-4view": {
19 |         "config": "sd-v1.yaml",
20 |         "repo_id": "MVDream/MVDream",
21 |         "filename": "sd-v1.5-4view.pt"
22 |     }
23 | }
24 | 
25 | 
26 | def get_config_file(config_path):
27 |     cfg_file = pkg_resources.resource_filename(
28 |         "mvdream", os.path.join("configs", config_path)
29 |     )
30 |     if not os.path.exists(cfg_file):
31 |         raise RuntimeError(f"Config {config_path} not available!")
32 |     return cfg_file
33 | 
34 | 
35 | def build_model(model_name, ckpt_path=None, cache_dir=None):
36 |     print("========building model=======")
37 |     print(model_name, ckpt_path, cache_dir)
38 |     if not model_name in PRETRAINED_MODELS:
39 |         raise RuntimeError(
40 |             f"Model name {model_name} is not a pre-trained model. Available models are:\n- " + \
41 |             "\n- ".join(PRETRAINED_MODELS.keys())
42 |         )
43 |     model_info = PRETRAINED_MODELS[model_name]
44 | 
45 |     # Instiantiate the model
46 |     print(f"Loading model from config: {model_info['config']}")
47 |     config_file = get_config_file(model_info["config"])
48 |     print("++++++++++++++++++++++   1")
49 |     config = OmegaConf.load(config_file)
50 |     print("++++++++++++++++++++++   2")
51 |     model = instantiate_from_config(config.model)
52 | 
53 |     print("++++++++++++++++++++++")
54 | 
55 |     # Load pre-trained checkpoint from huggingface
56 |     if not ckpt_path:
57 |         ckpt_path = hf_hub_download(
58 |             repo_id=model_info["repo_id"],
59 |             filename=model_info["filename"],
60 |             cache_dir=cache_dir
61 |         )
62 |         print(f"Loading model from cache file: {ckpt_path}")
63 |     model.load_state_dict(torch.load(ckpt_path, map_location="cpu"))
64 |     return model
65 | 


--------------------------------------------------------------------------------
/extern/MVDream/requirements.txt:
--------------------------------------------------------------------------------
 1 | opencv-python
 2 | imageio
 3 | imageio-ffmpeg
 4 | omegaconf
 5 | einops
 6 | transformers==4.27.1
 7 | open-clip-torch==2.7.0
 8 | gradio>=3.13.2
 9 | xformers==0.0.16
10 | 


--------------------------------------------------------------------------------
/extern/MVDream/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | 
 3 | setup(
 4 |     name='mvdream',
 5 |     version='0.0.1',
 6 |     description='Multi-view Diffusion Models',
 7 |     author="ByteDance",
 8 |     packages=find_packages(),
 9 |     package_data={"mvdream": ["configs/*.yaml"]} ,
10 |     install_requires=[
11 |         'torch',
12 |         'numpy',
13 |         'tqdm',
14 |         'omegaconf',
15 |         'einops',
16 |         'huggingface_hub',
17 |         "transformers",
18 |         "open-clip-torch",
19 |     ],
20 | )
21 | 


--------------------------------------------------------------------------------
/extern/ldm_zero123/extras.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from contextlib import contextmanager
 3 | from pathlib import Path
 4 | 
 5 | import torch
 6 | from omegaconf import OmegaConf
 7 | 
 8 | from extern.ldm_zero123.util import instantiate_from_config
 9 | 
10 | 
11 | @contextmanager
12 | def all_logging_disabled(highest_level=logging.CRITICAL):
13 |     """
14 |     A context manager that will prevent any logging messages
15 |     triggered during the body from being processed.
16 | 
17 |     :param highest_level: the maximum logging level in use.
18 |       This would only need to be changed if a custom level greater than CRITICAL
19 |       is defined.
20 | 
21 |     https://gist.github.com/simon-weber/7853144
22 |     """
23 |     # two kind-of hacks here:
24 |     #    * can't get the highest logging level in effect => delegate to the user
25 |     #    * can't get the current module-level override => use an undocumented
26 |     #       (but non-private!) interface
27 | 
28 |     previous_level = logging.root.manager.disable
29 | 
30 |     logging.disable(highest_level)
31 | 
32 |     try:
33 |         yield
34 |     finally:
35 |         logging.disable(previous_level)
36 | 
37 | 
38 | def load_training_dir(train_dir, device, epoch="last"):
39 |     """Load a checkpoint and config from training directory"""
40 |     train_dir = Path(train_dir)
41 |     ckpt = list(train_dir.rglob(f"*{epoch}.ckpt"))
42 |     assert len(ckpt) == 1, f"found {len(ckpt)} matching ckpt files"
43 |     config = list(train_dir.rglob(f"*-project.yaml"))
44 |     assert len(ckpt) > 0, f"didn't find any config in {train_dir}"
45 |     if len(config) > 1:
46 |         print(f"found {len(config)} matching config files")
47 |         config = sorted(config)[-1]
48 |         print(f"selecting {config}")
49 |     else:
50 |         config = config[0]
51 | 
52 |     config = OmegaConf.load(config)
53 |     return load_model_from_config(config, ckpt[0], device)
54 | 
55 | 
56 | def load_model_from_config(config, ckpt, device="cpu", verbose=False):
57 |     """Loads a model from config and a ckpt
58 |     if config is a path will use omegaconf to load
59 |     """
60 |     if isinstance(config, (str, Path)):
61 |         config = OmegaConf.load(config)
62 | 
63 |     with all_logging_disabled():
64 |         print(f"Loading model from {ckpt}")
65 |         pl_sd = torch.load(ckpt, map_location="cpu")
66 |         global_step = pl_sd["global_step"]
67 |         sd = pl_sd["state_dict"]
68 |         model = instantiate_from_config(config.model)
69 |         m, u = model.load_state_dict(sd, strict=False)
70 |         if len(m) > 0 and verbose:
71 |             print("missing keys:")
72 |             print(m)
73 |         if len(u) > 0 and verbose:
74 |             print("unexpected keys:")
75 |         model.to(device)
76 |         model.eval()
77 |         model.cond_stage_model.device = device
78 |         return model
79 | 


--------------------------------------------------------------------------------
/extern/ldm_zero123/models/diffusion/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/interactive-3d/interactive3d/11519c0d8a489cf19efa1ec3b8fa0a058e7ed45a/extern/ldm_zero123/models/diffusion/__init__.py


--------------------------------------------------------------------------------
/extern/ldm_zero123/models/diffusion/sampling_util.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | 
 4 | 
 5 | def append_dims(x, target_dims):
 6 |     """Appends dimensions to the end of a tensor until it has target_dims dimensions.
 7 |     From https://github.com/crowsonkb/k-diffusion/blob/master/k_diffusion/utils.py"""
 8 |     dims_to_append = target_dims - x.ndim
 9 |     if dims_to_append < 0:
10 |         raise ValueError(
11 |             f"input has {x.ndim} dims but target_dims is {target_dims}, which is less"
12 |         )
13 |     return x[(...,) + (None,) * dims_to_append]
14 | 
15 | 
16 | def renorm_thresholding(x0, value):
17 |     # renorm
18 |     pred_max = x0.max()
19 |     pred_min = x0.min()
20 |     pred_x0 = (x0 - pred_min) / (pred_max - pred_min)  # 0 ... 1
21 |     pred_x0 = 2 * pred_x0 - 1.0  # -1 ... 1
22 | 
23 |     s = torch.quantile(rearrange(pred_x0, "b ... -> b (...)").abs(), value, dim=-1)
24 |     s.clamp_(min=1.0)
25 |     s = s.view(-1, *((1,) * (pred_x0.ndim - 1)))
26 | 
27 |     # clip by threshold
28 |     # pred_x0 = pred_x0.clamp(-s, s) / s  # needs newer pytorch  # TODO bring back to pure-gpu with min/max
29 | 
30 |     # temporary hack: numpy on cpu
31 |     pred_x0 = (
32 |         np.clip(pred_x0.cpu().numpy(), -s.cpu().numpy(), s.cpu().numpy())
33 |         / s.cpu().numpy()
34 |     )
35 |     pred_x0 = torch.tensor(pred_x0).to(self.model.device)
36 | 
37 |     # re.renorm
38 |     pred_x0 = (pred_x0 + 1.0) / 2.0  # 0 ... 1
39 |     pred_x0 = (pred_max - pred_min) * pred_x0 + pred_min  # orig range
40 |     return pred_x0
41 | 
42 | 
43 | def norm_thresholding(x0, value):
44 |     s = append_dims(x0.pow(2).flatten(1).mean(1).sqrt().clamp(min=value), x0.ndim)
45 |     return x0 * (value / s)
46 | 
47 | 
48 | def spatial_norm_thresholding(x0, value):
49 |     # b c h w
50 |     s = x0.pow(2).mean(1, keepdim=True).sqrt().clamp(min=value)
51 |     return x0 * (value / s)
52 | 


--------------------------------------------------------------------------------
/extern/ldm_zero123/modules/diffusionmodules/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/interactive-3d/interactive3d/11519c0d8a489cf19efa1ec3b8fa0a058e7ed45a/extern/ldm_zero123/modules/diffusionmodules/__init__.py


--------------------------------------------------------------------------------
/extern/ldm_zero123/modules/distributions/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/interactive-3d/interactive3d/11519c0d8a489cf19efa1ec3b8fa0a058e7ed45a/extern/ldm_zero123/modules/distributions/__init__.py


--------------------------------------------------------------------------------
/extern/ldm_zero123/modules/distributions/distributions.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | 
  4 | 
  5 | class AbstractDistribution:
  6 |     def sample(self):
  7 |         raise NotImplementedError()
  8 | 
  9 |     def mode(self):
 10 |         raise NotImplementedError()
 11 | 
 12 | 
 13 | class DiracDistribution(AbstractDistribution):
 14 |     def __init__(self, value):
 15 |         self.value = value
 16 | 
 17 |     def sample(self):
 18 |         return self.value
 19 | 
 20 |     def mode(self):
 21 |         return self.value
 22 | 
 23 | 
 24 | class DiagonalGaussianDistribution(object):
 25 |     def __init__(self, parameters, deterministic=False):
 26 |         self.parameters = parameters
 27 |         self.mean, self.logvar = torch.chunk(parameters, 2, dim=1)
 28 |         self.logvar = torch.clamp(self.logvar, -30.0, 20.0)
 29 |         self.deterministic = deterministic
 30 |         self.std = torch.exp(0.5 * self.logvar)
 31 |         self.var = torch.exp(self.logvar)
 32 |         if self.deterministic:
 33 |             self.var = self.std = torch.zeros_like(self.mean).to(
 34 |                 device=self.parameters.device
 35 |             )
 36 | 
 37 |     def sample(self):
 38 |         x = self.mean + self.std * torch.randn(self.mean.shape).to(
 39 |             device=self.parameters.device
 40 |         )
 41 |         return x
 42 | 
 43 |     def kl(self, other=None):
 44 |         if self.deterministic:
 45 |             return torch.Tensor([0.0])
 46 |         else:
 47 |             if other is None:
 48 |                 return 0.5 * torch.sum(
 49 |                     torch.pow(self.mean, 2) + self.var - 1.0 - self.logvar,
 50 |                     dim=[1, 2, 3],
 51 |                 )
 52 |             else:
 53 |                 return 0.5 * torch.sum(
 54 |                     torch.pow(self.mean - other.mean, 2) / other.var
 55 |                     + self.var / other.var
 56 |                     - 1.0
 57 |                     - self.logvar
 58 |                     + other.logvar,
 59 |                     dim=[1, 2, 3],
 60 |                 )
 61 | 
 62 |     def nll(self, sample, dims=[1, 2, 3]):
 63 |         if self.deterministic:
 64 |             return torch.Tensor([0.0])
 65 |         logtwopi = np.log(2.0 * np.pi)
 66 |         return 0.5 * torch.sum(
 67 |             logtwopi + self.logvar + torch.pow(sample - self.mean, 2) / self.var,
 68 |             dim=dims,
 69 |         )
 70 | 
 71 |     def mode(self):
 72 |         return self.mean
 73 | 
 74 | 
 75 | def normal_kl(mean1, logvar1, mean2, logvar2):
 76 |     """
 77 |     source: https://github.com/openai/guided-diffusion/blob/27c20a8fab9cb472df5d6bdd6c8d11c8f430b924/guided_diffusion/losses.py#L12
 78 |     Compute the KL divergence between two gaussians.
 79 |     Shapes are automatically broadcasted, so batches can be compared to
 80 |     scalars, among other use cases.
 81 |     """
 82 |     tensor = None
 83 |     for obj in (mean1, logvar1, mean2, logvar2):
 84 |         if isinstance(obj, torch.Tensor):
 85 |             tensor = obj
 86 |             break
 87 |     assert tensor is not None, "at least one argument must be a Tensor"
 88 | 
 89 |     # Force variances to be Tensors. Broadcasting helps convert scalars to
 90 |     # Tensors, but it does not work for torch.exp().
 91 |     logvar1, logvar2 = [
 92 |         x if isinstance(x, torch.Tensor) else torch.tensor(x).to(tensor)
 93 |         for x in (logvar1, logvar2)
 94 |     ]
 95 | 
 96 |     return 0.5 * (
 97 |         -1.0
 98 |         + logvar2
 99 |         - logvar1
100 |         + torch.exp(logvar1 - logvar2)
101 |         + ((mean1 - mean2) ** 2) * torch.exp(-logvar2)
102 |     )
103 | 


--------------------------------------------------------------------------------
/extern/ldm_zero123/modules/ema.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | 
 4 | 
 5 | class LitEma(nn.Module):
 6 |     def __init__(self, model, decay=0.9999, use_num_upates=True):
 7 |         super().__init__()
 8 |         if decay < 0.0 or decay > 1.0:
 9 |             raise ValueError("Decay must be between 0 and 1")
10 | 
11 |         self.m_name2s_name = {}
12 |         self.register_buffer("decay", torch.tensor(decay, dtype=torch.float32))
13 |         self.register_buffer(
14 |             "num_updates",
15 |             torch.tensor(0, dtype=torch.int)
16 |             if use_num_upates
17 |             else torch.tensor(-1, dtype=torch.int),
18 |         )
19 | 
20 |         for name, p in model.named_parameters():
21 |             if p.requires_grad:
22 |                 # remove as '.'-character is not allowed in buffers
23 |                 s_name = name.replace(".", "")
24 |                 self.m_name2s_name.update({name: s_name})
25 |                 self.register_buffer(s_name, p.clone().detach().data)
26 | 
27 |         self.collected_params = []
28 | 
29 |     def forward(self, model):
30 |         decay = self.decay
31 | 
32 |         if self.num_updates >= 0:
33 |             self.num_updates += 1
34 |             decay = min(self.decay, (1 + self.num_updates) / (10 + self.num_updates))
35 | 
36 |         one_minus_decay = 1.0 - decay
37 | 
38 |         with torch.no_grad():
39 |             m_param = dict(model.named_parameters())
40 |             shadow_params = dict(self.named_buffers())
41 | 
42 |             for key in m_param:
43 |                 if m_param[key].requires_grad:
44 |                     sname = self.m_name2s_name[key]
45 |                     shadow_params[sname] = shadow_params[sname].type_as(m_param[key])
46 |                     shadow_params[sname].sub_(
47 |                         one_minus_decay * (shadow_params[sname] - m_param[key])
48 |                     )
49 |                 else:
50 |                     assert not key in self.m_name2s_name
51 | 
52 |     def copy_to(self, model):
53 |         m_param = dict(model.named_parameters())
54 |         shadow_params = dict(self.named_buffers())
55 |         for key in m_param:
56 |             if m_param[key].requires_grad:
57 |                 m_param[key].data.copy_(shadow_params[self.m_name2s_name[key]].data)
58 |             else:
59 |                 assert not key in self.m_name2s_name
60 | 
61 |     def store(self, parameters):
62 |         """
63 |         Save the current parameters for restoring later.
64 |         Args:
65 |           parameters: Iterable of `torch.nn.Parameter`; the parameters to be
66 |             temporarily stored.
67 |         """
68 |         self.collected_params = [param.clone() for param in parameters]
69 | 
70 |     def restore(self, parameters):
71 |         """
72 |         Restore the parameters stored with the `store` method.
73 |         Useful to validate the model with EMA parameters without affecting the
74 |         original optimization process. Store the parameters before the
75 |         `copy_to` method. After validation (or model saving), use this to
76 |         restore the former parameters.
77 |         Args:
78 |           parameters: Iterable of `torch.nn.Parameter`; the parameters to be
79 |             updated with the stored parameters.
80 |         """
81 |         for c_param, param in zip(self.collected_params, parameters):
82 |             param.data.copy_(c_param.data)
83 | 


--------------------------------------------------------------------------------
/extern/ldm_zero123/modules/encoders/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/interactive-3d/interactive3d/11519c0d8a489cf19efa1ec3b8fa0a058e7ed45a/extern/ldm_zero123/modules/encoders/__init__.py


--------------------------------------------------------------------------------
/extern/ldm_zero123/modules/image_degradation/__init__.py:
--------------------------------------------------------------------------------
1 | from extern.ldm_zero123.modules.image_degradation.bsrgan import (
2 |     degradation_bsrgan_variant as degradation_fn_bsr,
3 | )
4 | from extern.ldm_zero123.modules.image_degradation.bsrgan_light import (
5 |     degradation_bsrgan_variant as degradation_fn_bsr_light,
6 | )
7 | 


--------------------------------------------------------------------------------
/extern/ldm_zero123/modules/losses/__init__.py:
--------------------------------------------------------------------------------
1 | from extern.ldm_zero123.modules.losses.contperceptual import LPIPSWithDiscriminator
2 | 


--------------------------------------------------------------------------------
/extern/ldm_zero123/thirdp/psp/id_loss.py:
--------------------------------------------------------------------------------
 1 | # https://github.com/eladrich/pixel2style2pixel
 2 | import torch
 3 | from torch import nn
 4 | 
 5 | from extern.ldm_zero123.thirdp.psp.model_irse import Backbone
 6 | 
 7 | 
 8 | class IDFeatures(nn.Module):
 9 |     def __init__(self, model_path):
10 |         super(IDFeatures, self).__init__()
11 |         print("Loading ResNet ArcFace")
12 |         self.facenet = Backbone(
13 |             input_size=112, num_layers=50, drop_ratio=0.6, mode="ir_se"
14 |         )
15 |         self.facenet.load_state_dict(torch.load(model_path, map_location="cpu"))
16 |         self.face_pool = torch.nn.AdaptiveAvgPool2d((112, 112))
17 |         self.facenet.eval()
18 | 
19 |     def forward(self, x, crop=False):
20 |         # Not sure of the image range here
21 |         if crop:
22 |             x = torch.nn.functional.interpolate(x, (256, 256), mode="area")
23 |             x = x[:, :, 35:223, 32:220]
24 |         x = self.face_pool(x)
25 |         x_feats = self.facenet(x)
26 |         return x_feats
27 | 


--------------------------------------------------------------------------------
/extern/ldm_zero123/thirdp/psp/model_irse.py:
--------------------------------------------------------------------------------
  1 | # https://github.com/eladrich/pixel2style2pixel
  2 | 
  3 | from torch.nn import (
  4 |     BatchNorm1d,
  5 |     BatchNorm2d,
  6 |     Conv2d,
  7 |     Dropout,
  8 |     Linear,
  9 |     Module,
 10 |     PReLU,
 11 |     Sequential,
 12 | )
 13 | 
 14 | from extern.ldm_zero123.thirdp.psp.helpers import (
 15 |     Flatten,
 16 |     bottleneck_IR,
 17 |     bottleneck_IR_SE,
 18 |     get_blocks,
 19 |     l2_norm,
 20 | )
 21 | 
 22 | """
 23 | Modified Backbone implementation from [TreB1eN](https://github.com/TreB1eN/InsightFace_Pytorch)
 24 | """
 25 | 
 26 | 
 27 | class Backbone(Module):
 28 |     def __init__(self, input_size, num_layers, mode="ir", drop_ratio=0.4, affine=True):
 29 |         super(Backbone, self).__init__()
 30 |         assert input_size in [112, 224], "input_size should be 112 or 224"
 31 |         assert num_layers in [50, 100, 152], "num_layers should be 50, 100 or 152"
 32 |         assert mode in ["ir", "ir_se"], "mode should be ir or ir_se"
 33 |         blocks = get_blocks(num_layers)
 34 |         if mode == "ir":
 35 |             unit_module = bottleneck_IR
 36 |         elif mode == "ir_se":
 37 |             unit_module = bottleneck_IR_SE
 38 |         self.input_layer = Sequential(
 39 |             Conv2d(3, 64, (3, 3), 1, 1, bias=False), BatchNorm2d(64), PReLU(64)
 40 |         )
 41 |         if input_size == 112:
 42 |             self.output_layer = Sequential(
 43 |                 BatchNorm2d(512),
 44 |                 Dropout(drop_ratio),
 45 |                 Flatten(),
 46 |                 Linear(512 * 7 * 7, 512),
 47 |                 BatchNorm1d(512, affine=affine),
 48 |             )
 49 |         else:
 50 |             self.output_layer = Sequential(
 51 |                 BatchNorm2d(512),
 52 |                 Dropout(drop_ratio),
 53 |                 Flatten(),
 54 |                 Linear(512 * 14 * 14, 512),
 55 |                 BatchNorm1d(512, affine=affine),
 56 |             )
 57 | 
 58 |         modules = []
 59 |         for block in blocks:
 60 |             for bottleneck in block:
 61 |                 modules.append(
 62 |                     unit_module(
 63 |                         bottleneck.in_channel, bottleneck.depth, bottleneck.stride
 64 |                     )
 65 |                 )
 66 |         self.body = Sequential(*modules)
 67 | 
 68 |     def forward(self, x):
 69 |         x = self.input_layer(x)
 70 |         x = self.body(x)
 71 |         x = self.output_layer(x)
 72 |         return l2_norm(x)
 73 | 
 74 | 
 75 | def IR_50(input_size):
 76 |     """Constructs a ir-50 model."""
 77 |     model = Backbone(input_size, num_layers=50, mode="ir", drop_ratio=0.4, affine=False)
 78 |     return model
 79 | 
 80 | 
 81 | def IR_101(input_size):
 82 |     """Constructs a ir-101 model."""
 83 |     model = Backbone(
 84 |         input_size, num_layers=100, mode="ir", drop_ratio=0.4, affine=False
 85 |     )
 86 |     return model
 87 | 
 88 | 
 89 | def IR_152(input_size):
 90 |     """Constructs a ir-152 model."""
 91 |     model = Backbone(
 92 |         input_size, num_layers=152, mode="ir", drop_ratio=0.4, affine=False
 93 |     )
 94 |     return model
 95 | 
 96 | 
 97 | def IR_SE_50(input_size):
 98 |     """Constructs a ir_se-50 model."""
 99 |     model = Backbone(
100 |         input_size, num_layers=50, mode="ir_se", drop_ratio=0.4, affine=False
101 |     )
102 |     return model
103 | 
104 | 
105 | def IR_SE_101(input_size):
106 |     """Constructs a ir_se-101 model."""
107 |     model = Backbone(
108 |         input_size, num_layers=100, mode="ir_se", drop_ratio=0.4, affine=False
109 |     )
110 |     return model
111 | 
112 | 
113 | def IR_SE_152(input_size):
114 |     """Constructs a ir_se-152 model."""
115 |     model = Backbone(
116 |         input_size, num_layers=152, mode="ir_se", drop_ratio=0.4, affine=False
117 |     )
118 |     return model
119 | 


--------------------------------------------------------------------------------
/keyboard.py:
--------------------------------------------------------------------------------
 1 | import curses
 2 | 
 3 | def interactive_mode(stdscr):
 4 |     a = 0
 5 |     b = 0
 6 | 
 7 |     curses.noecho()
 8 |     curses.cbreak()
 9 |     stdscr.keypad(1)
10 | 
11 |     stdscr.addstr(0, 10, "Press 'a', 'b' or 'q'...")
12 |     stdscr.refresh()
13 | 
14 |     while True:
15 |         key = stdscr.getch()
16 |         if key == ord('a'):
17 |             a += 1
18 |             stdscr.addstr(1, 10, f"Variable 'a' incremented. Value: {a}      ")  # Added spaces to clear previous message
19 |             stdscr.refresh()
20 |         elif key == ord('b'):
21 |             b += 1
22 |             stdscr.addstr(1, 10, f"Variable 'b' incremented. Value: {b}      ")
23 |             stdscr.refresh()
24 |         elif key == ord('q'):
25 |             stdscr.addstr(1, 10, "Exiting interactive mode.              ")
26 |             stdscr.refresh()
27 |             break
28 |         else:
29 |             stdscr.addstr(1, 10, f"Unknown key: {chr(key)}. Press 'a', 'b' or 'q'...      ")
30 |             stdscr.refresh()
31 | 
32 | # Run the function
33 | curses.wrapper(interactive_mode)
34 | 


--------------------------------------------------------------------------------
/requirements-dev.txt:
--------------------------------------------------------------------------------
1 | black
2 | mypy
3 | pylint
4 | pre-commit
5 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | torch>=2.0.1
 2 | boto3
 3 | lightning==2.0.0
 4 | omegaconf==2.3.0
 5 | jaxtyping
 6 | typeguard
 7 | git+https://github.com/KAIR-BAIR/nerfacc.git@v0.5.2
 8 | git+https://github.com/NVlabs/tiny-cuda-nn/#subdirectory=bindings/torch
 9 | diffusers
10 | transformers
11 | accelerate
12 | opencv-python
13 | tensorboard
14 | matplotlib
15 | imageio>=2.28.0
16 | imageio[ffmpeg]
17 | git+https://github.com/NVlabs/nvdiffrast.git
18 | libigl
19 | xatlas
20 | trimesh[easy]
21 | networkx
22 | pysdf
23 | PyMCubes
24 | wandb
25 | gradio
26 | git+https://github.com/ashawkey/envlight.git
27 | torchmetrics
28 | 
29 | # deepfloyd
30 | xformers
31 | bitsandbytes
32 | sentencepiece
33 | safetensors
34 | huggingface_hub
35 | 
36 | # for zero123
37 | einops
38 | kornia
39 | taming-transformers-rom1504
40 | git+https://github.com/openai/CLIP.git
41 | 
42 | #controlnet
43 | controlnet_aux
44 | 
45 | # mvdream
46 | open-clip-torch==2.7.0
47 | git+https://github.com/bytedance/MVDream
48 | 
49 | git+https://github.com/graphdeco-inria/diff-gaussian-rasterization.git


--------------------------------------------------------------------------------
/threestudio/__init__.py:
--------------------------------------------------------------------------------
 1 | __modules__ = {}
 2 | 
 3 | 
 4 | def register(name):
 5 |     def decorator(cls):
 6 |         __modules__[name] = cls
 7 |         return cls
 8 | 
 9 |     return decorator
10 | 
11 | 
12 | def find(name):
13 |     return __modules__[name]
14 | 
15 | 
16 | ###  grammar sugar for logging utilities  ###
17 | import logging
18 | 
19 | logger = logging.getLogger("pytorch_lightning")
20 | 
21 | from pytorch_lightning.utilities.rank_zero import (
22 |     rank_zero_debug,
23 |     rank_zero_info,
24 |     rank_zero_only,
25 | )
26 | 
27 | debug = rank_zero_debug
28 | info = rank_zero_info
29 | 
30 | 
31 | @rank_zero_only
32 | def warn(*args, **kwargs):
33 |     logger.warn(*args, **kwargs)
34 | 
35 | 
36 | from . import data, models, systems
37 | 


--------------------------------------------------------------------------------
/threestudio/data/__init__.py:
--------------------------------------------------------------------------------
1 | from . import co3d, image, multiview, uncond, edit_multiview, edit_multiview_gs
2 | 


--------------------------------------------------------------------------------
/threestudio/models/__init__.py:
--------------------------------------------------------------------------------
 1 | from . import (
 2 |     background,
 3 |     exporters,
 4 |     geometry,
 5 |     guidance,
 6 |     materials,
 7 |     prompt_processors,
 8 |     renderers,
 9 | )
10 | 


--------------------------------------------------------------------------------
/threestudio/models/background/__init__.py:
--------------------------------------------------------------------------------
1 | from . import (
2 |     base,
3 |     neural_environment_map_background,
4 |     solid_color_background,
5 |     textured_background,
6 | )
7 | 


--------------------------------------------------------------------------------
/threestudio/models/background/base.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | from dataclasses import dataclass, field
 3 | 
 4 | import torch
 5 | import torch.nn as nn
 6 | import torch.nn.functional as F
 7 | 
 8 | import threestudio
 9 | from threestudio.utils.base import BaseModule
10 | from threestudio.utils.typing import *
11 | 
12 | 
13 | class BaseBackground(BaseModule):
14 |     @dataclass
15 |     class Config(BaseModule.Config):
16 |         pass
17 | 
18 |     cfg: Config
19 | 
20 |     def configure(self):
21 |         pass
22 | 
23 |     def forward(self, dirs: Float[Tensor, "B H W 3"]) -> Float[Tensor, "B H W Nc"]:
24 |         raise NotImplementedError
25 | 


--------------------------------------------------------------------------------
/threestudio/models/background/neural_environment_map_background.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | from dataclasses import dataclass, field
 3 | 
 4 | import torch
 5 | import torch.nn as nn
 6 | import torch.nn.functional as F
 7 | 
 8 | import threestudio
 9 | from threestudio.models.background.base import BaseBackground
10 | from threestudio.models.networks import get_encoding, get_mlp
11 | from threestudio.utils.ops import get_activation
12 | from threestudio.utils.typing import *
13 | 
14 | 
15 | @threestudio.register("neural-environment-map-background")
16 | class NeuralEnvironmentMapBackground(BaseBackground):
17 |     @dataclass
18 |     class Config(BaseBackground.Config):
19 |         n_output_dims: int = 3
20 |         color_activation: str = "sigmoid"
21 |         dir_encoding_config: dict = field(
22 |             default_factory=lambda: {"otype": "SphericalHarmonics", "degree": 3}
23 |         )
24 |         mlp_network_config: dict = field(
25 |             default_factory=lambda: {
26 |                 "otype": "VanillaMLP",
27 |                 "activation": "ReLU",
28 |                 "n_neurons": 16,
29 |                 "n_hidden_layers": 2,
30 |             }
31 |         )
32 |         random_aug: bool = False
33 |         random_aug_prob: float = 0.5
34 |         share_aug_bg: bool = False
35 |         eval_color: Optional[Tuple[float, float, float]] = None
36 | 
37 |     cfg: Config
38 | 
39 |     def configure(self) -> None:
40 |         self.encoding = get_encoding(3, self.cfg.dir_encoding_config)
41 |         self.network = get_mlp(
42 |             self.encoding.n_output_dims,
43 |             self.cfg.n_output_dims,
44 |             self.cfg.mlp_network_config,
45 |         )
46 |         self.cur_bg = None
47 | 
48 |     def forward(self, dirs: Float[Tensor, "B H W 3"]) -> Float[Tensor, "B H W Nc"]:
49 |         if not self.training and self.cfg.eval_color is not None:
50 |             return torch.ones(*dirs.shape[:-1], self.cfg.n_output_dims).to(
51 |                 dirs
52 |             ) * torch.as_tensor(self.cfg.eval_color).to(dirs)
53 |         # viewdirs must be normalized before passing to this function
54 |         dirs = (dirs + 1.0) / 2.0  # (-1, 1) => (0, 1)
55 |         dirs_embd = self.encoding(dirs.view(-1, 3))
56 |         color = self.network(dirs_embd).view(*dirs.shape[:-1], self.cfg.n_output_dims)
57 |         color = get_activation(self.cfg.color_activation)(color)
58 |         if (
59 |             self.training
60 |             and self.cfg.random_aug
61 |             and random.random() < self.cfg.random_aug_prob
62 |         ):
63 |             # use random background color with probability random_aug_prob
64 |             n_color = 1 if self.cfg.share_aug_bg else dirs.shape[0]
65 |             color = color * 0 + (  # prevent checking for unused parameters in DDP
66 |                 torch.rand(n_color, 1, 1, self.cfg.n_output_dims)
67 |                 .to(dirs)
68 |                 .expand(*dirs.shape[:-1], -1)
69 |             )
70 |         self.cur_bg = color
71 |         return color
72 | 


--------------------------------------------------------------------------------
/threestudio/models/background/solid_color_background.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | from dataclasses import dataclass, field
 3 | 
 4 | import torch
 5 | import torch.nn as nn
 6 | import torch.nn.functional as F
 7 | 
 8 | import threestudio
 9 | from threestudio.models.background.base import BaseBackground
10 | from threestudio.utils.typing import *
11 | 
12 | 
13 | @threestudio.register("solid-color-background")
14 | class SolidColorBackground(BaseBackground):
15 |     @dataclass
16 |     class Config(BaseBackground.Config):
17 |         n_output_dims: int = 3
18 |         color: Tuple = (1.0, 1.0, 1.0)
19 |         learned: bool = False
20 |         random_aug: bool = False
21 |         random_aug_prob: float = 0.5
22 | 
23 |     cfg: Config
24 | 
25 |     def configure(self) -> None:
26 |         self.env_color: Float[Tensor, "Nc"]
27 |         if self.cfg.learned:
28 |             self.env_color = nn.Parameter(
29 |                 torch.as_tensor(self.cfg.color, dtype=torch.float32)
30 |             )
31 |         else:
32 |             self.register_buffer(
33 |                 "env_color", torch.as_tensor(self.cfg.color, dtype=torch.float32)
34 |             )
35 | 
36 |     def forward(self, dirs: Float[Tensor, "B H W 3"]) -> Float[Tensor, "B H W Nc"]:
37 |         color = (
38 |             torch.ones(*dirs.shape[:-1], self.cfg.n_output_dims).to(dirs)
39 |             * self.env_color
40 |         )
41 |         if (
42 |             self.training
43 |             and self.cfg.random_aug
44 |             and random.random() < self.cfg.random_aug_prob
45 |         ):
46 |             # use random background color with probability random_aug_prob
47 |             color = color * 0 + (  # prevent checking for unused parameters in DDP
48 |                 torch.rand(dirs.shape[0], 1, 1, self.cfg.n_output_dims)
49 |                 .to(dirs)
50 |                 .expand(*dirs.shape[:-1], -1)
51 |             )
52 |         return color
53 | 


--------------------------------------------------------------------------------
/threestudio/models/background/textured_background.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass, field
 2 | 
 3 | import torch
 4 | import torch.nn as nn
 5 | import torch.nn.functional as F
 6 | 
 7 | import threestudio
 8 | from threestudio.models.background.base import BaseBackground
 9 | from threestudio.utils.ops import get_activation
10 | from threestudio.utils.typing import *
11 | 
12 | 
13 | @threestudio.register("textured-background")
14 | class TexturedBackground(BaseBackground):
15 |     @dataclass
16 |     class Config(BaseBackground.Config):
17 |         n_output_dims: int = 3
18 |         height: int = 64
19 |         width: int = 64
20 |         color_activation: str = "sigmoid"
21 | 
22 |     cfg: Config
23 | 
24 |     def configure(self) -> None:
25 |         self.texture = nn.Parameter(
26 |             torch.randn((1, self.cfg.n_output_dims, self.cfg.height, self.cfg.width))
27 |         )
28 | 
29 |     def spherical_xyz_to_uv(self, dirs: Float[Tensor, "*B 3"]) -> Float[Tensor, "*B 2"]:
30 |         x, y, z = dirs[..., 0], dirs[..., 1], dirs[..., 2]
31 |         xy = (x**2 + y**2) ** 0.5
32 |         u = torch.atan2(xy, z) / torch.pi
33 |         v = torch.atan2(y, x) / (torch.pi * 2) + 0.5
34 |         uv = torch.stack([u, v], -1)
35 |         return uv
36 | 
37 |     def forward(self, dirs: Float[Tensor, "*B 3"]) -> Float[Tensor, "*B Nc"]:
38 |         dirs_shape = dirs.shape[:-1]
39 |         uv = self.spherical_xyz_to_uv(dirs.reshape(-1, dirs.shape[-1]))
40 |         uv = 2 * uv - 1  # rescale to [-1, 1] for grid_sample
41 |         uv = uv.reshape(1, -1, 1, 2)
42 |         color = (
43 |             F.grid_sample(
44 |                 self.texture,
45 |                 uv,
46 |                 mode="bilinear",
47 |                 padding_mode="reflection",
48 |                 align_corners=False,
49 |             )
50 |             .reshape(self.cfg.n_output_dims, -1)
51 |             .T.reshape(*dirs_shape, self.cfg.n_output_dims)
52 |         )
53 |         color = get_activation(self.cfg.color_activation)(color)
54 |         return color
55 | 


--------------------------------------------------------------------------------
/threestudio/models/exporters/__init__.py:
--------------------------------------------------------------------------------
1 | from . import base, mesh_exporter
2 | 


--------------------------------------------------------------------------------
/threestudio/models/exporters/base.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | 
 3 | import threestudio
 4 | from threestudio.models.background.base import BaseBackground
 5 | from threestudio.models.geometry.base import BaseImplicitGeometry
 6 | from threestudio.models.materials.base import BaseMaterial
 7 | from threestudio.utils.base import BaseObject
 8 | from threestudio.utils.typing import *
 9 | 
10 | 
11 | @dataclass
12 | class ExporterOutput:
13 |     save_name: str
14 |     save_type: str
15 |     params: Dict[str, Any]
16 | 
17 | 
18 | class Exporter(BaseObject):
19 |     @dataclass
20 |     class Config(BaseObject.Config):
21 |         save_video: bool = False
22 | 
23 |     cfg: Config
24 | 
25 |     def configure(
26 |         self,
27 |         geometry: BaseImplicitGeometry,
28 |         material: BaseMaterial,
29 |         background: BaseBackground,
30 |     ) -> None:
31 |         @dataclass
32 |         class SubModules:
33 |             geometry: BaseImplicitGeometry
34 |             material: BaseMaterial
35 |             background: BaseBackground
36 | 
37 |         self.sub_modules = SubModules(geometry, material, background)
38 | 
39 |     @property
40 |     def geometry(self) -> BaseImplicitGeometry:
41 |         return self.sub_modules.geometry
42 | 
43 |     @property
44 |     def material(self) -> BaseMaterial:
45 |         return self.sub_modules.material
46 | 
47 |     @property
48 |     def background(self) -> BaseBackground:
49 |         return self.sub_modules.background
50 | 
51 |     def __call__(self, *args, **kwargs) -> List[ExporterOutput]:
52 |         raise NotImplementedError
53 | 
54 | 
55 | @threestudio.register("dummy-exporter")
56 | class DummyExporter(Exporter):
57 |     def __call__(self, *args, **kwargs) -> List[ExporterOutput]:
58 |         # DummyExporter does not export anything
59 |         return []
60 | 


--------------------------------------------------------------------------------
/threestudio/models/geometry/__init__.py:
--------------------------------------------------------------------------------
1 | from . import base, implicit_sdf, implicit_volume, tetrahedra_sdf_grid, volume_grid, gaussian, implicit_volume_edit
2 | 


--------------------------------------------------------------------------------
/threestudio/models/guidance/__init__.py:
--------------------------------------------------------------------------------
 1 | from . import (
 2 |     controlnet_guidance,
 3 |     deep_floyd_guidance,
 4 |     instructpix2pix_guidance,
 5 |     stable_diffusion_guidance,
 6 |     stable_diffusion_vsd_guidance,
 7 |     zero123_guidance,
 8 |     multiview_diffusion_guidance,
 9 |     stable_diffusion_unified_guidance,
10 |     # zero123_unified_guidance,
11 |     deep_floyd_guidance_stage2,
12 |     pixart_guidance,
13 | )
14 | 


--------------------------------------------------------------------------------
/threestudio/models/materials/__init__.py:
--------------------------------------------------------------------------------
 1 | from . import (
 2 |     base,
 3 |     diffuse_with_point_light_material,
 4 |     hybrid_rgb_latent_material,
 5 |     neural_radiance_material,
 6 |     no_material,
 7 |     pbr_material,
 8 |     sd_latent_adapter_material,
 9 | )
10 | 


--------------------------------------------------------------------------------
/threestudio/models/materials/base.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | from dataclasses import dataclass, field
 3 | 
 4 | import torch
 5 | import torch.nn as nn
 6 | import torch.nn.functional as F
 7 | 
 8 | import threestudio
 9 | from threestudio.utils.base import BaseModule
10 | from threestudio.utils.typing import *
11 | 
12 | 
13 | class BaseMaterial(BaseModule):
14 |     @dataclass
15 |     class Config(BaseModule.Config):
16 |         pass
17 | 
18 |     cfg: Config
19 |     requires_normal: bool = False
20 |     requires_tangent: bool = False
21 | 
22 |     def configure(self):
23 |         pass
24 | 
25 |     def forward(self, *args, **kwargs) -> Float[Tensor, "*B 3"]:
26 |         raise NotImplementedError
27 | 
28 |     def export(self, *args, **kwargs) -> Dict[str, Any]:
29 |         return {}
30 | 


--------------------------------------------------------------------------------
/threestudio/models/materials/hybrid_rgb_latent_material.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | from dataclasses import dataclass, field
 3 | 
 4 | import torch
 5 | import torch.nn as nn
 6 | import torch.nn.functional as F
 7 | 
 8 | import threestudio
 9 | from threestudio.models.materials.base import BaseMaterial
10 | from threestudio.models.networks import get_encoding, get_mlp
11 | from threestudio.utils.ops import dot, get_activation
12 | from threestudio.utils.typing import *
13 | 
14 | 
15 | @threestudio.register("hybrid-rgb-latent-material")
16 | class HybridRGBLatentMaterial(BaseMaterial):
17 |     @dataclass
18 |     class Config(BaseMaterial.Config):
19 |         n_output_dims: int = 3
20 |         color_activation: str = "sigmoid"
21 |         requires_normal: bool = True
22 | 
23 |     cfg: Config
24 | 
25 |     def configure(self) -> None:
26 |         self.requires_normal = self.cfg.requires_normal
27 | 
28 |     def forward(
29 |         self, features: Float[Tensor, "B ... Nf"], **kwargs
30 |     ) -> Float[Tensor, "B ... Nc"]:
31 |         assert (
32 |             features.shape[-1] == self.cfg.n_output_dims
33 |         ), f"Expected {self.cfg.n_output_dims} output dims, only got {features.shape[-1]} dims input."
34 |         color = features
35 |         color[..., :3] = get_activation(self.cfg.color_activation)(color[..., :3])
36 |         return color
37 | 


--------------------------------------------------------------------------------
/threestudio/models/materials/neural_radiance_material.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | from dataclasses import dataclass, field
 3 | 
 4 | import torch
 5 | import torch.nn as nn
 6 | import torch.nn.functional as F
 7 | 
 8 | import threestudio
 9 | from threestudio.models.materials.base import BaseMaterial
10 | from threestudio.models.networks import get_encoding, get_mlp
11 | from threestudio.utils.ops import dot, get_activation
12 | from threestudio.utils.typing import *
13 | 
14 | 
15 | @threestudio.register("neural-radiance-material")
16 | class NeuralRadianceMaterial(BaseMaterial):
17 |     @dataclass
18 |     class Config(BaseMaterial.Config):
19 |         input_feature_dims: int = 8
20 |         color_activation: str = "sigmoid"
21 |         dir_encoding_config: dict = field(
22 |             default_factory=lambda: {"otype": "SphericalHarmonics", "degree": 3}
23 |         )
24 |         mlp_network_config: dict = field(
25 |             default_factory=lambda: {
26 |                 "otype": "FullyFusedMLP",
27 |                 "activation": "ReLU",
28 |                 "n_neurons": 16,
29 |                 "n_hidden_layers": 2,
30 |             }
31 |         )
32 | 
33 |     cfg: Config
34 | 
35 |     def configure(self) -> None:
36 |         self.encoding = get_encoding(3, self.cfg.dir_encoding_config)
37 |         self.n_input_dims = self.cfg.input_feature_dims + self.encoding.n_output_dims  # type: ignore
38 |         self.network = get_mlp(self.n_input_dims, 3, self.cfg.mlp_network_config)
39 | 
40 |     def forward(
41 |         self,
42 |         features: Float[Tensor, "*B Nf"],
43 |         viewdirs: Float[Tensor, "*B 3"],
44 |         **kwargs,
45 |     ) -> Float[Tensor, "*B 3"]:
46 |         # viewdirs and normals must be normalized before passing to this function
47 |         viewdirs = (viewdirs + 1.0) / 2.0  # (-1, 1) => (0, 1)
48 |         viewdirs_embd = self.encoding(viewdirs.view(-1, 3))
49 |         network_inp = torch.cat(
50 |             [features.view(-1, features.shape[-1]), viewdirs_embd], dim=-1
51 |         )
52 |         color = self.network(network_inp).view(*features.shape[:-1], 3)
53 |         color = get_activation(self.cfg.color_activation)(color)
54 |         return color
55 | 


--------------------------------------------------------------------------------
/threestudio/models/materials/no_material.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | from dataclasses import dataclass, field
 3 | 
 4 | import torch
 5 | import torch.nn as nn
 6 | import torch.nn.functional as F
 7 | 
 8 | import threestudio
 9 | from threestudio.models.materials.base import BaseMaterial
10 | from threestudio.models.networks import get_encoding, get_mlp
11 | from threestudio.utils.ops import dot, get_activation
12 | from threestudio.utils.typing import *
13 | 
14 | 
15 | @threestudio.register("no-material")
16 | class NoMaterial(BaseMaterial):
17 |     @dataclass
18 |     class Config(BaseMaterial.Config):
19 |         n_output_dims: int = 3
20 |         color_activation: str = "sigmoid"
21 |         input_feature_dims: Optional[int] = None
22 |         mlp_network_config: Optional[dict] = None
23 |         requires_normal: bool = False
24 | 
25 |     cfg: Config
26 | 
27 |     def configure(self) -> None:
28 |         self.use_network = False
29 |         if (
30 |             self.cfg.input_feature_dims is not None
31 |             and self.cfg.mlp_network_config is not None
32 |         ):
33 |             self.network = get_mlp(
34 |                 self.cfg.input_feature_dims,
35 |                 self.cfg.n_output_dims,
36 |                 self.cfg.mlp_network_config,
37 |             )
38 |             self.use_network = True
39 |         self.requires_normal = self.cfg.requires_normal
40 | 
41 |     def forward(
42 |         self, features: Float[Tensor, "B ... Nf"], **kwargs
43 |     ) -> Float[Tensor, "B ... Nc"]:
44 |         if not self.use_network:
45 |             assert (
46 |                 features.shape[-1] == self.cfg.n_output_dims
47 |             ), f"Expected {self.cfg.n_output_dims} output dims, only got {features.shape[-1]} dims input."
48 |             color = get_activation(self.cfg.color_activation)(features)
49 |         else:
50 |             color = self.network(features.view(-1, features.shape[-1])).view(
51 |                 *features.shape[:-1], self.cfg.n_output_dims
52 |             )
53 |             color = get_activation(self.cfg.color_activation)(color)
54 |         return color
55 | 
56 |     def export(self, features: Float[Tensor, "*N Nf"], **kwargs) -> Dict[str, Any]:
57 |         color = self(features, **kwargs).clamp(0, 1)
58 |         assert color.shape[-1] >= 3, "Output color must have at least 3 channels"
59 |         if color.shape[-1] > 3:
60 |             threestudio.warn(
61 |                 "Output color has >3 channels, treating the first 3 as RGB"
62 |             )
63 |         return {"albedo": color[..., :3]}
64 | 


--------------------------------------------------------------------------------
/threestudio/models/materials/no_material_backup.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | from dataclasses import dataclass, field
 3 | 
 4 | import torch
 5 | import torch.nn as nn
 6 | import torch.nn.functional as F
 7 | 
 8 | import threestudio
 9 | from threestudio.models.materials.base import BaseMaterial
10 | from threestudio.models.networks import get_encoding, get_mlp
11 | from threestudio.utils.ops import dot, get_activation
12 | from threestudio.utils.typing import *
13 | 
14 | 
15 | @threestudio.register("no-material")
16 | class NoMaterial(BaseMaterial):
17 |     @dataclass
18 |     class Config(BaseMaterial.Config):
19 |         n_output_dims: int = 3
20 |         color_activation: str = "sigmoid"
21 |         input_feature_dims: Optional[int] = None
22 |         mlp_network_config: Optional[dict] = None
23 | 
24 |     cfg: Config
25 | 
26 |     def configure(self) -> None:
27 |         self.use_network = False
28 |         if (
29 |             self.cfg.input_feature_dims is not None
30 |             and self.cfg.mlp_network_config is not None
31 |         ):
32 |             self.network = get_mlp(
33 |                 self.cfg.input_feature_dims,
34 |                 self.cfg.n_output_dims,
35 |                 self.cfg.mlp_network_config,
36 |             )
37 |             self.use_network = True
38 | 
39 |     def forward(
40 |         self, features: Float[Tensor, "B ... Nf"], **kwargs
41 |     ) -> Float[Tensor, "B ... Nc"]:
42 |         if not self.use_network:
43 |             assert (
44 |                 features.shape[-1] == self.cfg.n_output_dims
45 |             ), f"Expected {self.cfg.n_output_dims} output dims, only got {features.shape[-1]} dims input."
46 |             color = get_activation(self.cfg.color_activation)(features)
47 |         else:
48 |             color = self.network(features.view(-1, features.shape[-1])).view(
49 |                 *features.shape[:-1], self.cfg.n_output_dims
50 |             )
51 |             color = get_activation(self.cfg.color_activation)(color)
52 |         return color
53 | 
54 |     def export(self, features: Float[Tensor, "*N Nf"], **kwargs) -> Dict[str, Any]:
55 |         color = self(features, **kwargs).clamp(0, 1)
56 |         assert color.shape[-1] >= 3, "Output color must have at least 3 channels"
57 |         if color.shape[-1] > 3:
58 |             threestudio.warn(
59 |                 "Output color has >3 channels, treating the first 3 as RGB"
60 |             )
61 |         return {"albedo": color[..., :3]}
62 | 


--------------------------------------------------------------------------------
/threestudio/models/materials/sd_latent_adapter_material.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | from dataclasses import dataclass, field
 3 | 
 4 | import torch
 5 | import torch.nn as nn
 6 | import torch.nn.functional as F
 7 | 
 8 | import threestudio
 9 | from threestudio.models.materials.base import BaseMaterial
10 | from threestudio.utils.typing import *
11 | 
12 | 
13 | @threestudio.register("sd-latent-adapter-material")
14 | class StableDiffusionLatentAdapterMaterial(BaseMaterial):
15 |     @dataclass
16 |     class Config(BaseMaterial.Config):
17 |         pass
18 | 
19 |     cfg: Config
20 | 
21 |     def configure(self) -> None:
22 |         adapter = nn.Parameter(
23 |             torch.as_tensor(
24 |                 [
25 |                     #   R       G       B
26 |                     [0.298, 0.207, 0.208],  # L1
27 |                     [0.187, 0.286, 0.173],  # L2
28 |                     [-0.158, 0.189, 0.264],  # L3
29 |                     [-0.184, -0.271, -0.473],  # L4
30 |                 ]
31 |             )
32 |         )
33 |         self.register_parameter("adapter", adapter)
34 | 
35 |     def forward(
36 |         self, features: Float[Tensor, "B ... 4"], **kwargs
37 |     ) -> Float[Tensor, "B ... 3"]:
38 |         assert features.shape[-1] == 4
39 |         color = features @ self.adapter
40 |         color = (color + 1) / 2
41 |         color = color.clamp(0.0, 1.0)
42 |         return color
43 | 


--------------------------------------------------------------------------------
/threestudio/models/prompt_processors/__init__.py:
--------------------------------------------------------------------------------
1 | from . import (
2 |     base,
3 |     deepfloyd_prompt_processor,
4 |     dummy_prompt_processor,
5 |     stable_diffusion_prompt_processor,
6 |     pixart_prompt_processor,
7 | )
8 | 


--------------------------------------------------------------------------------
/threestudio/models/prompt_processors/deepfloyd_prompt_processor.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | from dataclasses import dataclass
 4 | 
 5 | import torch
 6 | import torch.nn as nn
 7 | from diffusers import IFPipeline
 8 | from transformers import T5EncoderModel, T5Tokenizer
 9 | 
10 | import threestudio
11 | from threestudio.models.prompt_processors.base import PromptProcessor, hash_prompt
12 | from threestudio.utils.misc import cleanup
13 | from threestudio.utils.typing import *
14 | 
15 | 
16 | @threestudio.register("deep-floyd-prompt-processor")
17 | class DeepFloydPromptProcessor(PromptProcessor):
18 |     @dataclass
19 |     class Config(PromptProcessor.Config):
20 |         pretrained_model_name_or_path: str = "DeepFloyd/IF-I-XL-v1.0"
21 | 
22 |     cfg: Config
23 | 
24 |     ### these functions are unused, kept for debugging ###
25 |     def configure_text_encoder(self) -> None:
26 |         os.environ["TOKENIZERS_PARALLELISM"] = "false"
27 |         self.text_encoder = T5EncoderModel.from_pretrained(
28 |             self.cfg.pretrained_model_name_or_path,
29 |             subfolder="text_encoder",
30 |             load_in_8bit=True,
31 |             variant="8bit",
32 |             device_map="auto",
33 |         )  # FIXME: behavior of auto device map in multi-GPU training
34 |         self.pipe = IFPipeline.from_pretrained(
35 |             self.cfg.pretrained_model_name_or_path,
36 |             text_encoder=self.text_encoder,  # pass the previously instantiated 8bit text encoder
37 |             unet=None,
38 |             local_files_only=True
39 |         )
40 | 
41 |     def destroy_text_encoder(self) -> None:
42 |         del self.text_encoder
43 |         del self.pipe
44 |         cleanup()
45 | 
46 |     def get_text_embeddings(
47 |         self, prompt: Union[str, List[str]], negative_prompt: Union[str, List[str]]
48 |     ) -> Tuple[Float[Tensor, "B 77 4096"], Float[Tensor, "B 77 4096"]]:
49 |         text_embeddings, uncond_text_embeddings = self.pipe.encode_prompt(
50 |             prompt=prompt, negative_prompt=negative_prompt, device=self.device
51 |         )
52 |         return text_embeddings, uncond_text_embeddings
53 | 
54 |     ###
55 | 
56 |     @staticmethod
57 |     def spawn_func(pretrained_model_name_or_path, prompts, cache_dir):
58 |         max_length = 77
59 |         tokenizer = T5Tokenizer.from_pretrained(
60 |             pretrained_model_name_or_path, subfolder="tokenizer"
61 |         )
62 |         print("==== pretrained_model_name_or_path ===", pretrained_model_name_or_path)
63 |         text_encoder = T5EncoderModel.from_pretrained(
64 |             pretrained_model_name_or_path,
65 |             subfolder="text_encoder",
66 |             torch_dtype=torch.float16,  # suppress warning
67 |             load_in_8bit=True,
68 |             variant="8bit",
69 |             device_map="auto",
70 |         )
71 |         with torch.no_grad():
72 |             text_inputs = tokenizer(
73 |                 prompts,
74 |                 padding="max_length",
75 |                 max_length=max_length,
76 |                 truncation=True,
77 |                 add_special_tokens=True,
78 |                 return_tensors="pt",
79 |             )
80 |             text_input_ids = text_inputs.input_ids
81 |             attention_mask = text_inputs.attention_mask
82 |             text_embeddings = text_encoder(
83 |                 text_input_ids,
84 |                 attention_mask=attention_mask,
85 |             )
86 |             text_embeddings = text_embeddings[0]
87 | 
88 |         for prompt, embedding in zip(prompts, text_embeddings):
89 |             torch.save(
90 |                 embedding,
91 |                 os.path.join(
92 |                     cache_dir,
93 |                     f"{hash_prompt(pretrained_model_name_or_path, prompt)}.pt",
94 |                 ),
95 |             )
96 | 
97 |         del text_encoder
98 | 


--------------------------------------------------------------------------------
/threestudio/models/prompt_processors/dummy_prompt_processor.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | from dataclasses import dataclass
 4 | 
 5 | import threestudio
 6 | from threestudio.models.prompt_processors.base import PromptProcessor, hash_prompt
 7 | from threestudio.utils.misc import cleanup
 8 | from threestudio.utils.typing import *
 9 | 
10 | 
11 | @threestudio.register("dummy-prompt-processor")
12 | class DummyPromptProcessor(PromptProcessor):
13 |     @dataclass
14 |     class Config(PromptProcessor.Config):
15 |         pretrained_model_name_or_path: str = ""
16 |         prompt: str = ""
17 | 
18 |     cfg: Config
19 | 


--------------------------------------------------------------------------------
/threestudio/models/renderers/__init__.py:
--------------------------------------------------------------------------------
 1 | from . import (
 2 |     base,
 3 |     deferred_volume_renderer,
 4 |     gan_volume_renderer,
 5 |     nerf_volume_renderer,
 6 |     neus_volume_renderer,
 7 |     nvdiff_rasterizer,
 8 |     patch_renderer,
 9 |     diff_gaussian_rasterizer,
10 |     gsgen_renderer,
11 |     magic123_renderer,
12 |     threestudio_renderer,
13 | )
14 | 


--------------------------------------------------------------------------------
/threestudio/models/renderers/base.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | 
 3 | import nerfacc
 4 | import torch
 5 | import torch.nn.functional as F
 6 | 
 7 | import threestudio
 8 | from threestudio.models.background.base import BaseBackground
 9 | from threestudio.models.geometry.base import BaseImplicitGeometry
10 | from threestudio.models.materials.base import BaseMaterial
11 | from threestudio.utils.base import BaseModule
12 | from threestudio.utils.typing import *
13 | 
14 | 
15 | class Renderer(BaseModule):
16 |     @dataclass
17 |     class Config(BaseModule.Config):
18 |         radius: float = 1.0
19 | 
20 |     cfg: Config
21 | 
22 |     def configure(
23 |         self,
24 |         geometry: BaseImplicitGeometry,
25 |         material: BaseMaterial,
26 |         background: BaseBackground,
27 |     ) -> None:
28 |         # keep references to submodules using namedtuple, avoid being registered as modules
29 |         @dataclass
30 |         class SubModules:
31 |             geometry: BaseImplicitGeometry
32 |             material: BaseMaterial
33 |             background: BaseBackground
34 | 
35 |         self.sub_modules = SubModules(geometry, material, background)
36 | 
37 |         # set up bounding box
38 |         self.bbox: Float[Tensor, "2 3"]
39 |         self.register_buffer(
40 |             "bbox",
41 |             torch.as_tensor(
42 |                 [
43 |                     [-self.cfg.radius, -self.cfg.radius, -self.cfg.radius],
44 |                     [self.cfg.radius, self.cfg.radius, self.cfg.radius],
45 |                 ],
46 |                 dtype=torch.float32,
47 |             ),
48 |         )
49 | 
50 |     def forward(self, *args, **kwargs) -> Dict[str, Any]:
51 |         raise NotImplementedError
52 | 
53 |     @property
54 |     def geometry(self) -> BaseImplicitGeometry:
55 |         return self.sub_modules.geometry
56 | 
57 |     @property
58 |     def material(self) -> BaseMaterial:
59 |         return self.sub_modules.material
60 | 
61 |     @property
62 |     def background(self) -> BaseBackground:
63 |         return self.sub_modules.background
64 | 
65 |     def set_geometry(self, geometry: BaseImplicitGeometry) -> None:
66 |         self.sub_modules.geometry = geometry
67 | 
68 |     def set_material(self, material: BaseMaterial) -> None:
69 |         self.sub_modules.material = material
70 | 
71 |     def set_background(self, background: BaseBackground) -> None:
72 |         self.sub_modules.background = background
73 | 
74 | 
75 | class VolumeRenderer(Renderer):
76 |     pass
77 | 
78 | 
79 | class Rasterizer(Renderer):
80 |     pass
81 | 


--------------------------------------------------------------------------------
/threestudio/models/renderers/deferred_volume_renderer.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | 
 3 | import torch
 4 | import torch.nn.functional as F
 5 | 
 6 | import threestudio
 7 | from threestudio.models.renderers.base import VolumeRenderer
 8 | 
 9 | 
10 | class DeferredVolumeRenderer(VolumeRenderer):
11 |     pass
12 | 


--------------------------------------------------------------------------------
/threestudio/models/renderers/gsgen_renderer.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import sys
 4 | import threestudio
 5 | from threestudio.utils.typing import *
 6 | from gs.gaussian_splatting import GaussianSplattingRenderer
 7 | 
 8 | import numpy as np
 9 | 
10 | from torchvision.utils import save_image
11 | import pdb
12 | from omegaconf import OmegaConf
13 | 
14 | @threestudio.register("gs-renderer")
15 | class GSRenderer(nn.Module):
16 |     def __init__(self, ckpt=None):
17 |         super(GSRenderer, self).__init__()
18 |         device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
19 |         if ckpt is None:
20 |             ckpt = "path_to_your_ckpt.pt"
21 |         ckpt = torch.load(ckpt, map_location="cpu")
22 |         cfg = OmegaConf.create(ckpt["cfg"])
23 |         self.renderer = GaussianSplattingRenderer.load(
24 |             cfg.renderer, ckpt["params"]
25 |         ).to(device)
26 |         cfg.type = 'fixed'
27 |         cfg.color = [0.5, 0.5, 0.5]
28 |         cfg.random_aug = False
29 |         cfg.random_aug_prob = 0.
30 |         self.renderer.setup_bg(cfg)
31 |         print("===== [NOTE]: set gs bg color to 0.5 ====== ")
32 |         self.cfg = cfg
33 |     
34 |     def forward(
35 |             self,
36 |             sampled_cameras,
37 |             cam_id=None,
38 |             gt_img=None,
39 |             **kwargs
40 |         ) -> Dict[str, Float[Tensor, "..."]]:
41 |         with torch.cuda.amp.autocast(enabled=False):
42 |             # rotate
43 |             c2w = sampled_cameras['c2w']
44 |             c2w = torch.cat(
45 |                 [c2w, torch.zeros_like(c2w[:, :1])], dim=1
46 |             )
47 |             c2w[:, 3, 3] = 1.0
48 |             trans = torch.zeros_like(c2w).type(c2w.dtype) # b, 4, 4
49 |             trans[:, 3, 3] = 1.0
50 | 
51 |             # trans[:, 0, 0] = -1. 
52 |             # trans[:, 1, 1] = -1. 
53 |             # trans[:, 2, 2] = 1.
54 | 
55 |             trans[:, 0, 0] = 0. 
56 |             trans[:, 0, 1] = -1. 
57 |             trans[:, 1, 0] = 1. 
58 |             trans[:, 1, 1] = 0. 
59 |             trans[:, 2, 2] = 1.
60 | 
61 |             c2w = torch.matmul(trans, c2w)
62 |             sampled_cameras['c2w'] = c2w[:, :3].cuda()
63 |             out = self.renderer(sampled_cameras, self.cfg.use_bg, self.cfg.rgb_only)
64 |         comp_rgb = out["rgb"]
65 |         # save_image(out['rgb'].permute(0, 3, 1, 2), f'debug_data/gsgen_gt.png')
66 |     
67 |         return {"comp_rgb": comp_rgb}


--------------------------------------------------------------------------------
/threestudio/models/renderers/magic123_renderer.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import sys
 4 | import threestudio
 5 | from threestudio.utils.typing import *
 6 | 
 7 | from threestudio.utils.config import ExperimentConfig, load_config
 8 | import numpy as np
 9 | from torchvision.utils import save_image
10 | import pdb
11 | from omegaconf import OmegaConf
12 | from dataclasses import dataclass, field
13 | 
14 | @threestudio.register("magic123-renderer")
15 | class Magic123Renderer(nn.Module):
16 |     def __init__(self, ckpt=None):
17 |         super(Magic123Renderer, self).__init__()
18 |         device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
19 |         ckpt = "path_to_your_checkpoint.ckpt" if ckpt is None else ckpt
20 |         cfg_path = "parsed.yaml"
21 |         # parse YAML config to OmegaConf
22 |         cfg: ExperimentConfig
23 |         # cfg = load_config(args.config, cli_args=extras, n_gpus=n_gpus)
24 |         cfg = load_config(cfg_path)
25 |         self.cfg = cfg.system
26 |         ckpt = torch.load(ckpt, map_location="cpu")
27 |         self.geometry = threestudio.find(self.cfg.geometry_type)(self.cfg.geometry)
28 |         self.material = threestudio.find(self.cfg.material_type)(self.cfg.material) if self.cfg.material_type != 'none' else None
29 |         self.background = threestudio.find(self.cfg.background_type)(
30 |             self.cfg.background
31 |         ) if self.cfg.background_type != 'none' else None
32 |         self.renderer = threestudio.find(self.cfg.renderer_type)(
33 |             self.cfg.renderer,
34 |             geometry=self.geometry,
35 |             material=self.material,
36 |             background=self.background,
37 |         )
38 |         ckpt['state_dict']['background.env_color'] = torch.tensor([0.5, 0.5, 0.5])
39 | 
40 |         self.load_state_dict(ckpt['state_dict'], strict=True)
41 |         self.geometry.encoding.encoding.disable_mask = True
42 | 
43 |     
44 |     def forward(
45 |             self,
46 |             **kwargs
47 |         ) -> Dict[str, Float[Tensor, "..."]]:
48 |         # with torch.no_grad():
49 |         out = self.renderer(**kwargs)
50 |         comp_rgb = out["comp_rgb"]
51 |         save_image(out['comp_rgb'].permute(0, 3, 1, 2), f'debug_data/magic123_gt.png')
52 |     
53 |         return {"comp_rgb": comp_rgb}


--------------------------------------------------------------------------------
/threestudio/models/renderers/threestudio_renderer.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import sys
 4 | import threestudio
 5 | from threestudio.utils.typing import *
 6 | from threestudio.utils.config import ExperimentConfig, load_config
 7 | from torchvision.utils import save_image
 8 | 
 9 | @threestudio.register("threestudio-renderer")
10 | class ThreestudioRenderer(nn.Module):
11 |     def __init__(self, ckpt=None):
12 |         super(ThreestudioRenderer, self).__init__()
13 |         ckpt_root = 'path_to_your_ckpt_root'
14 |         ckpt = ckpt_root + '/ckpts/last.ckpt'
15 |         ckpt = torch.load(ckpt, map_location="cpu")
16 | 
17 |         cfg_path = ckpt_root + '/configs/parsed.yaml'
18 |         cfg = load_config(cfg_path)
19 |         self.cfg = cfg.system
20 | 
21 |         self.configure()
22 |     
23 |         self.load_state_dict(ckpt['state_dict'], strict=False)
24 |         print(f"==== loaded state dict from {ckpt_root} ====")
25 | 
26 |     
27 |     def configure(self) -> None:
28 |         
29 |         self.geometry = threestudio.find(self.cfg.geometry_type)(self.cfg.geometry)
30 | 
31 |         self.material = threestudio.find(self.cfg.material_type)(self.cfg.material)
32 |         self.background = threestudio.find(self.cfg.background_type)(
33 |             self.cfg.background
34 |         )
35 |         self.renderer = threestudio.find(self.cfg.renderer_type)(
36 |             self.cfg.renderer,
37 |             geometry=self.geometry,
38 |             material=self.material,
39 |             background=self.background,
40 |         )
41 |     
42 |     def forward(
43 |             self,
44 |             bg_color=None,
45 |             force_shading=None,
46 |             **kwargs
47 |         ) -> Dict[str, Float[Tensor, "..."]]:
48 |         with torch.cuda.amp.autocast(enabled=False):
49 |             out = self.renderer(bg_color=bg_color, force_shading=force_shading, **kwargs)
50 |         comp_rgb = out["comp_rgb"]
51 |         save_image(out['comp_rgb'].permute(0, 3, 1, 2), f'debug_data/three_gt.png')
52 |     
53 |         return {"comp_rgb": comp_rgb}


--------------------------------------------------------------------------------
/threestudio/scripts/make_training_vid.py:
--------------------------------------------------------------------------------
 1 | # make_training_vid("outputs/zero123/64_teddy_rgba.png@20230627-195615", frames_per_vid=30, fps=20, max_iters=200)
 2 | import argparse
 3 | import glob
 4 | import os
 5 | 
 6 | import imageio
 7 | import numpy as np
 8 | from PIL import Image, ImageDraw
 9 | from tqdm import tqdm
10 | 
11 | 
12 | def draw_text_in_image(img, texts):
13 |     img = Image.fromarray(img)
14 |     draw = ImageDraw.Draw(img)
15 |     black, white = (0, 0, 0), (255, 255, 255)
16 |     for i, text in enumerate(texts):
17 |         draw.text((2, (img.size[1] // len(texts)) * i + 1), f"{text}", white)
18 |         draw.text((0, (img.size[1] // len(texts)) * i + 1), f"{text}", white)
19 |         draw.text((2, (img.size[1] // len(texts)) * i - 1), f"{text}", white)
20 |         draw.text((0, (img.size[1] // len(texts)) * i - 1), f"{text}", white)
21 |         draw.text((1, (img.size[1] // len(texts)) * i), f"{text}", black)
22 |     return np.asarray(img)
23 | 
24 | 
25 | def make_training_vid(exp, frames_per_vid=1, fps=3, max_iters=None, max_vids=None):
26 |     # exp = "/admin/home-vikram/git/threestudio/outputs/zero123/64_teddy_rgba.png@20230627-195615"
27 |     files = glob.glob(os.path.join(exp, "save", "*.mp4"))
28 |     if os.path.join(exp, "save", "training_vid.mp4") in files:
29 |         files.remove(os.path.join(exp, "save", "training_vid.mp4"))
30 |     its = [int(os.path.basename(file).split("-")[0].split("it")[-1]) for file in files]
31 |     it_sort = np.argsort(its)
32 |     files = list(np.array(files)[it_sort])
33 |     its = list(np.array(its)[it_sort])
34 |     max_vids = max_iters // its[0] if max_iters is not None else max_vids
35 |     files, its = files[:max_vids], its[:max_vids]
36 |     frames, i = [], 0
37 |     for it, file in tqdm(zip(its, files), total=len(files)):
38 |         vid = imageio.mimread(file)
39 |         for _ in range(frames_per_vid):
40 |             frame = vid[i % len(vid)]
41 |             frame = draw_text_in_image(frame, [str(it)])
42 |             frames.append(frame)
43 |             i += 1
44 |     # Save
45 |     imageio.mimwrite(os.path.join(exp, "save", "training_vid.mp4"), frames, fps=fps)
46 | 
47 | 
48 | def join(file1, file2, name):
49 |     # file1 = "/admin/home-vikram/git/threestudio/outputs/zero123/OLD_64_dragon2_rgba.png@20230629-023028/save/it200-val.mp4"
50 |     # file2 = "/admin/home-vikram/git/threestudio/outputs/zero123/64_dragon2_rgba.png@20230628-152734/save/it200-val.mp4"
51 |     vid1 = imageio.mimread(file1)
52 |     vid2 = imageio.mimread(file2)
53 |     frames = []
54 |     for f1, f2 in zip(vid1, vid2):
55 |         frames.append(
56 |             np.concatenate([f1[:, : f1.shape[0]], f2[:, : f2.shape[0]]], axis=1)
57 |         )
58 |     imageio.mimwrite(name, frames)
59 | 
60 | 
61 | if __name__ == "__main__":
62 |     parser = argparse.ArgumentParser()
63 |     parser.add_argument("--exp", help="directory of experiment")
64 |     parser.add_argument(
65 |         "--frames_per_vid", type=int, default=1, help="# of frames from each val vid"
66 |     )
67 |     parser.add_argument("--fps", type=int, help="max # of iters to save")
68 |     parser.add_argument("--max_iters", type=int, help="max # of iters to save")
69 |     parser.add_argument(
70 |         "--max_vids",
71 |         type=int,
72 |         help="max # of val videos to save. Will be overridden by max_iters",
73 |     )
74 |     args = parser.parse_args()
75 |     make_training_vid(
76 |         args.exp, args.frames_per_vid, args.fps, args.max_iters, args.max_vids
77 |     )
78 | 


--------------------------------------------------------------------------------
/threestudio/scripts/run_gaussian.py:
--------------------------------------------------------------------------------
 1 | import subprocess
 2 | 
 3 | prompt_list = [
 4 |     "a delicious hamburger",
 5 |     "A DSLR photo of a roast turkey on a platter",
 6 |     "A high quality photo of a dragon",
 7 |     "A DSLR photo of a bald eagle",
 8 |     "A bunch of blue rose, highly detailed",
 9 |     "A 3D model of an adorable cottage with a thatched roof",
10 |     "A high quality photo of a furry corgi",
11 |     "A DSLR photo of a panda",
12 |     "a DSLR photo of a cat lying on its side batting at a ball of yarn",
13 |     "a beautiful dress made out of fruit, on a mannequin. Studio lighting, high quality, high resolution",
14 |     "a DSLR photo of a corgi wearing a beret and holding a baguette, standing up on two hind legs",
15 |     "a zoomed out DSLR photo of a stack of pancakes",
16 |     "a zoomed out DSLR photo of a baby bunny sitting on top of a stack of pancakes",
17 | ]
18 | negative_prompt = "oversaturated color, ugly, tiling, low quality, noise, ugly pattern"
19 | 
20 | gpu_id = 0
21 | max_steps = 10
22 | val_check = 1
23 | out_name = "gsgen_baseline"
24 | for prompt in prompt_list:
25 |     print(f"Running model on device {gpu_id}: ", prompt)
26 |     command = [
27 |         "python", "launch.py",
28 |         "--config", "configs/gaussian_splatting.yaml",
29 |         "--train",
30 |         f"system.prompt_processor.prompt={prompt}",
31 |         f"system.prompt_processor.negative_prompt={negative_prompt}",
32 |         f"name={out_name}",
33 |         "--gpu", f"{gpu_id}"
34 |     ]
35 |     subprocess.run(command)
36 |         


--------------------------------------------------------------------------------
/threestudio/scripts/run_zero123.sh:
--------------------------------------------------------------------------------
 1 | NAME="dragon2"
 2 | 
 3 | # Phase 1 - 64x64
 4 | python launch.py --config configs/zero123.yaml --train --gpu 7 data.image_path=./load/images/${NAME}_rgba.png use_timestamp=False name=${NAME} tag=Phase1 # system.freq.guidance_eval=0 system.loggers.wandb.enable=false system.loggers.wandb.project="zero123" system.loggers.wandb.name=${NAME}_Phase1
 5 | 
 6 | # Phase 1.5 - 512 refine
 7 | python launch.py --config configs/zero123-geometry.yaml --train --gpu 4 data.image_path=./load/images/${NAME}_rgba.png system.geometry_convert_from=./outputs/${NAME}/Phase1/ckpts/last.ckpt use_timestamp=False name=${NAME} tag=Phase1p5 # system.freq.guidance_eval=0 system.loggers.wandb.enable=false system.loggers.wandb.project="zero123" system.loggers.wandb.name=${NAME}_Phase1p5
 8 | 
 9 | # Phase 2 - dreamfusion
10 | python launch.py --config configs/experimental/imagecondition_zero123nerf.yaml --train --gpu 5 data.image_path=./load/images/${NAME}_rgba.png system.prompt_processor.prompt="A 3D model of a friendly dragon" system.weights="/admin/home-vikram/git/threestudio/outputs/${NAME}/Phase1/ckpts/last.ckpt" name=${NAME} tag=Phase2 # system.freq.guidance_eval=0 system.loggers.wandb.enable=false system.loggers.wandb.project="zero123" system.loggers.wandb.name=${NAME}_Phase2
11 | 
12 | # Phase 2 - SDF + dreamfusion
13 | python launch.py --config configs/experimental/imagecondition_zero123nerf_refine.yaml --train --gpu 5 data.image_path=./load/images/${NAME}_rgba.png system.prompt_processor.prompt="A 3D model of a friendly dragon" system.geometry_convert_from="/admin/home-vikram/git/threestudio/outputs/${NAME}/Phase1/ckpts/last.ckpt" name=${NAME} tag=Phase2_refine # system.freq.guidance_eval=0 system.loggers.wandb.enable=false system.loggers.wandb.project="zero123" system.loggers.wandb.name=${NAME}_Phase2_refine
14 | 


--------------------------------------------------------------------------------
/threestudio/scripts/run_zero123_comparison.sh:
--------------------------------------------------------------------------------
 1 | # with standard zero123
 2 | threestudio/scripts/run_zero123_phase.sh 6 anya_front 105000 0
 3 | 
 4 | # with zero123XL (not released yet!)
 5 | threestudio/scripts/run_zero123_phase.sh 1 anya_front XL_20230604 0
 6 | threestudio/scripts/run_zero123_phase.sh 2 baby_phoenix_on_ice XL_20230604 20
 7 | threestudio/scripts/run_zero123_phase.sh 3 beach_house_1 XL_20230604 50
 8 | threestudio/scripts/run_zero123_phase.sh 4 bollywood_actress XL_20230604 0
 9 | threestudio/scripts/run_zero123_phase.sh 5 beach_house_2 XL_20230604 30
10 | threestudio/scripts/run_zero123_phase.sh 6 hamburger XL_20230604 10
11 | threestudio/scripts/run_zero123_phase.sh 7 cactus XL_20230604 8
12 | threestudio/scripts/run_zero123_phase.sh 0 catstatue XL_20230604 50
13 | threestudio/scripts/run_zero123_phase.sh 1 church_ruins XL_20230604 0
14 | threestudio/scripts/run_zero123_phase.sh 2 firekeeper XL_20230604 10
15 | threestudio/scripts/run_zero123_phase.sh 3 futuristic_car XL_20230604 20
16 | threestudio/scripts/run_zero123_phase.sh 4 mona_lisa XL_20230604 10
17 | threestudio/scripts/run_zero123_phase.sh 5 teddy XL_20230604 20
18 | 
19 | # set guidance_eval to 0, to greatly speed up training
20 | threestudio/scripts/run_zero123_phase.sh 7 anya_front XL_20230604 0 system.freq.guidance_eval=0
21 | 
22 | # disable wandb for faster training (or if you don't want to use it)
23 | threestudio/scripts/run_zero123_phase.sh 7 anya_front XL_20230604 0 system.loggers.wandb.enable=false system.freq.guidance_eval=0
24 | 


--------------------------------------------------------------------------------
/threestudio/scripts/run_zero123_phase.sh:
--------------------------------------------------------------------------------
 1 | 
 2 | GPU_ID=$1         # e.g. 0
 3 | IMAGE_PREFIX=$2   # e.g. "anya_front"
 4 | ZERO123_PREFIX=$3 # e.g. "zero123-xl"
 5 | ELEVATION=$4      # e.g. 0
 6 | REST=${@:5:99}    # e.g. "system.guidance.min_step_percent=0.1 system.guidance.max_step_percent=0.9"
 7 | 
 8 | # change this config if you don't use wandb or want to speed up training
 9 | python launch.py --config configs/zero123.yaml --train --gpu $GPU_ID system.loggers.wandb.enable=true system.loggers.wandb.project="claforte-noise_atten" \
10 |     system.loggers.wandb.name="${IMAGE_PREFIX}_zero123_${ZERO123_PREFIX}...fov20_${REST}" \
11 |     data.image_path=./load/images/${IMAGE_PREFIX}_rgba.png system.freq.guidance_eval=37 \
12 |     system.guidance.pretrained_model_name_or_path="./load/zero123/${ZERO123_PREFIX}.ckpt" \
13 |     system.guidance.cond_elevation_deg=$ELEVATION \
14 |     ${REST}
15 | 


--------------------------------------------------------------------------------
/threestudio/scripts/run_zero123_phase2.sh:
--------------------------------------------------------------------------------
1 | # Reconstruct Anya using latest Zero123XL, in <2000 steps.
2 | python launch.py --config configs/zero123.yaml --train --gpu 0 system.loggers.wandb.enable=true system.loggers.wandb.project="voletiv-anya-new" system.loggers.wandb.name="claforte_params" data.image_path=./load/images/anya_front_rgba.png system.freq.ref_or_zero123="accumulate" system.freq.guidance_eval=13 system.guidance.pretrained_model_name_or_path="./load/zero123/zero123-xl.ckpt"
3 | 
4 | # PHASE 2
5 | python launch.py --config configs/experimental/imagecondition_zero123nerf.yaml --train --gpu 0 system.prompt_processor.prompt="A DSLR 3D photo of a cute anime schoolgirl stands proudly with her arms in the air, pink hair ( unreal engine 5 trending on Artstation Ghibli 4k )" system.weights=outputs/zero123/128_anya_front_rgba.png@20230623-145711/ckpts/last.ckpt system.freq.guidance_eval=13 system.loggers.wandb.enable=true system.loggers.wandb.project="voletiv-anya-new" data.image_path=./load/images/anya_front_rgba.png system.loggers.wandb.name="anya" data.random_camera.progressive_until=500
6 | 


--------------------------------------------------------------------------------
/threestudio/scripts/run_zero123_sbatch.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import time
 3 | 
 4 | files = [
 5 |     "~/git/threestudio/load/images/dog1_rgba.png",
 6 |     "~/git/threestudio/load/images/dragon2_rgba.png",
 7 | ]
 8 | 
 9 | for file in files:
10 |     name = os.path.basename(file).split("_rgba.png")[0]
11 |     with open(
12 |         os.path.expanduser("~/git/threestudio/threestudio/scripts/zero123_sbatch.sh"),
13 |         "w",
14 |     ) as f:
15 |         f.write("#!/bin/bash\n")
16 |         f.write(f"#SBATCH --job-name=vikky_{name}\n")
17 |         f.write("#SBATCH --account=mod3d\n")
18 |         f.write("#SBATCH --partition=g40\n")
19 |         f.write("#SBATCH --gpus=1\n")
20 |         f.write("#SBATCH --time=0-00:07:00\n")
21 |         f.write("conda activate three\n")
22 |         f.write("cd ~/git/threestudio/\n")
23 |         f.write(f"NAME={name}\n")
24 |         # Phase 1
25 |         f.write(
26 |             "python launch.py --config configs/zero123.yaml --train data.image_path=./load/images/${NAME}_rgba.png use_timestamp=true name=${NAME} tag=Phase1 system.loggers.wandb.enable=false system.loggers.wandb.project='zero123' system.loggers.wandb.name=${NAME}_Phase1\n"
27 |         )
28 |         # # Phase 1.5
29 |         # f.write(
30 |         #     "python launch.py --config configs/zero123-geometry.yaml --train data.image_path=./load/images/${NAME}_rgba.png system.geometry_convert_from=./outputs/${NAME}/Phase1/ckpts/last.ckpt use_timestamp=False name=${NAME} tag=Phase1p5 system.loggers.wandb.enable=true system.loggers.wandb.project='zero123' system.loggers.wandb.name=${NAME}_Phase1p5\n"
31 |         # )
32 |     os.system("sbatch ~/git/threestudio/threestudio/scripts/zero123_sbatch.sh")
33 |     time.sleep(1)
34 | 


--------------------------------------------------------------------------------
/threestudio/scripts/zero123_demo.py:
--------------------------------------------------------------------------------
 1 | # 1. Generate using StableDiffusionXL https://clipdrop.co/stable-diffusion
 2 | 
 3 | # 2. Remove background https://clipdrop.co/remove-background
 4 | 
 5 | # 3. Resize to 512x512 https://www.iloveimg.com/resize-image
 6 | 
 7 | # (OPTIONAL)
 8 | # 4. Estimate depth and normal https://omnidata.vision/demo/ (I used Omnidata Normal (with X-TC & 3DCC), and MiDaS Depth)
 9 | 
10 | 
11 | # (OPTIONAL)
12 | # 5. Convert depth image from RGB to greyscale
13 | def depth_rgb_to_grey(depth_filename):
14 |     # depth_filename = "image_depth.png"
15 |     import cv2
16 |     import numpy as np
17 | 
18 |     # import shutil
19 |     # shutil.copyfile(depth_filename,  depth_filename.replace("_depth", "_depth_orig"))
20 |     depth = cv2.imread(depth_filename)
21 |     depth = cv2.cvtColor(depth, cv2.COLOR_BGR2GRAY)
22 |     mask = (
23 |         cv2.resize(
24 |             cv2.imread(depth_filename.replace("_depth", "_rgba"), cv2.IMREAD_UNCHANGED)[
25 |                 :, :, -1
26 |             ],
27 |             depth.shape,
28 |         )
29 |         > 0
30 |     )
31 |     # depth[mask] = (depth[mask] - depth.min()) / (depth.max() - depth.min() + 1e-9)
32 |     depth = (depth - depth.min()) / (depth.max() - depth.min() + 1e-9)
33 |     depth[~mask] = 0
34 |     depth = (depth * 255).astype(np.uint8)
35 |     cv2.imwrite(depth_filename, depth)
36 | 
37 | 
38 | # (OPTIONAL)
39 | # 6. Mask normal
40 | def normal_mask(normal_filename):
41 |     # filename = "image_normal.png"
42 |     import cv2
43 | 
44 |     # import shutil
45 |     # shutil.copyfile(normal_filename, normal_filename.replace("_normal", "_normal_orig"))
46 |     normal = cv2.imread(normal_filename)
47 |     mask = (
48 |         cv2.resize(
49 |             cv2.imread(
50 |                 normal_filename.replace("_normal", "_rgba"), cv2.IMREAD_UNCHANGED
51 |             )[:, :, -1],
52 |             normal.shape[:2],
53 |         )
54 |         > 0
55 |     )
56 |     normal[~mask] = 0
57 |     cv2.imwrite(normal_filename, normal)
58 | 
59 | 
60 | # 5. Run Zero123
61 | # python launch.py --config configs/zero123.yaml --train data.image_path=./load/images/grootplant_rgba.png
62 | 


--------------------------------------------------------------------------------
/threestudio/scripts/zero123_sbatch.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #SBATCH --job-name=vikky
 3 | #SBATCH --account=mod3d
 4 | #SBATCH --partition=g40
 5 | #SBATCH --gpus=1
 6 | #SBATCH --time=0-00:07:00
 7 | conda activate three
 8 | cd ~/git/threestudio/
 9 | NAME="dog1"
10 | python launch.py --config configs/zero123.yaml --train data.image_path=./load/images/${NAME}_rgba.png use_timestamp=False name=${NAME} tag=Phase1 system.loggers.wandb.enable=true system.loggers.wandb.project='zero123' system.loggers.wandb.name=${NAME}_Phase1
11 | 


--------------------------------------------------------------------------------
/threestudio/systems/__init__.py:
--------------------------------------------------------------------------------
 1 | from . import (
 2 |     control4d_multiview,
 3 |     dreamfusion,
 4 |     fantasia3d,
 5 |     imagedreamfusion,
 6 |     instructnerf2nerf,
 7 |     interactive3d,
 8 |     latentnerf,
 9 |     magic3d,
10 |     prolificdreamer,
11 |     sjc,
12 |     textmesh,
13 |     zero123,
14 |     gaussian_splatting,
15 |     magic123,
16 | )
17 | 


--------------------------------------------------------------------------------
/threestudio/systems/utils.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import warnings
 3 | from bisect import bisect_right
 4 | 
 5 | import torch
 6 | import torch.nn as nn
 7 | from torch.optim import lr_scheduler
 8 | 
 9 | import threestudio
10 | 
11 | 
12 | def get_scheduler(name):
13 |     if hasattr(lr_scheduler, name):
14 |         return getattr(lr_scheduler, name)
15 |     else:
16 |         raise NotImplementedError
17 | 
18 | 
19 | def getattr_recursive(m, attr):
20 |     for name in attr.split("."):
21 |         m = getattr(m, name)
22 |     return m
23 | 
24 | 
25 | def get_parameters(model, name):
26 |     module = getattr_recursive(model, name)
27 |     if isinstance(module, nn.Module):
28 |         return module.parameters()
29 |     elif isinstance(module, nn.Parameter):
30 |         return module
31 |     return []
32 | 
33 | 
34 | def parse_optimizer(config, model):
35 |     if hasattr(config, "params"):
36 |         params = [
37 |             {"params": get_parameters(model, name), "name": name, **args}
38 |             for name, args in config.params.items()
39 |         ]
40 |         threestudio.debug(f"Specify optimizer params: {config.params}")
41 |         # print("=====params===")
42 |         # print(params)
43 |         # print(" len paprams ++++++++: ", len(params))
44 |     else:
45 |         params = model.parameters()
46 |     if config.name in ["FusedAdam"]:
47 |         import apex
48 | 
49 |         optim = getattr(apex.optimizers, config.name)(params, **config.args)
50 |     elif config.name in ["Adan"]:
51 |         from threestudio.systems import optimizers
52 | 
53 |         optim = getattr(optimizers, config.name)(params, **config.args)
54 |     else:
55 |         optim = getattr(torch.optim, config.name)(params, **config.args)
56 |     # print("===========++++++++++", optim.state_dict())
57 |     return optim
58 | 
59 | 
60 | def parse_scheduler(config, optimizer):
61 |     interval = config.get("interval", "epoch")
62 |     assert interval in ["epoch", "step"]
63 |     if config.name == "SequentialLR":
64 |         scheduler = {
65 |             "scheduler": lr_scheduler.SequentialLR(
66 |                 optimizer,
67 |                 [
68 |                     parse_scheduler(conf, optimizer)["scheduler"]
69 |                     for conf in config.schedulers
70 |                 ],
71 |                 milestones=config.milestones,
72 |             ),
73 |             "interval": interval,
74 |         }
75 |     elif config.name == "ChainedScheduler":
76 |         scheduler = {
77 |             "scheduler": lr_scheduler.ChainedScheduler(
78 |                 [
79 |                     parse_scheduler(conf, optimizer)["scheduler"]
80 |                     for conf in config.schedulers
81 |                 ]
82 |             ),
83 |             "interval": interval,
84 |         }
85 |     else:
86 |         scheduler = {
87 |             "scheduler": get_scheduler(config.name)(optimizer, **config.args),
88 |             "interval": interval,
89 |         }
90 |     return scheduler
91 | 


--------------------------------------------------------------------------------
/threestudio/utils/GAN/distribution.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | 
  4 | 
  5 | class AbstractDistribution:
  6 |     def sample(self):
  7 |         raise NotImplementedError()
  8 | 
  9 |     def mode(self):
 10 |         raise NotImplementedError()
 11 | 
 12 | 
 13 | class DiracDistribution(AbstractDistribution):
 14 |     def __init__(self, value):
 15 |         self.value = value
 16 | 
 17 |     def sample(self):
 18 |         return self.value
 19 | 
 20 |     def mode(self):
 21 |         return self.value
 22 | 
 23 | 
 24 | class DiagonalGaussianDistribution(object):
 25 |     def __init__(self, parameters, deterministic=False):
 26 |         self.parameters = parameters
 27 |         self.mean, self.logvar = torch.chunk(parameters, 2, dim=1)
 28 |         self.logvar = torch.clamp(self.logvar, -30.0, 20.0)
 29 |         self.deterministic = deterministic
 30 |         self.std = torch.exp(0.5 * self.logvar)
 31 |         self.var = torch.exp(self.logvar)
 32 |         if self.deterministic:
 33 |             self.var = self.std = torch.zeros_like(self.mean).to(
 34 |                 device=self.parameters.device
 35 |             )
 36 | 
 37 |     def sample(self):
 38 |         x = self.mean + self.std * torch.randn(self.mean.shape).to(
 39 |             device=self.parameters.device
 40 |         )
 41 |         return x
 42 | 
 43 |     def kl(self, other=None):
 44 |         if self.deterministic:
 45 |             return torch.Tensor([0.0])
 46 |         else:
 47 |             if other is None:
 48 |                 return 0.5 * torch.sum(
 49 |                     torch.pow(self.mean, 2) + self.var - 1.0 - self.logvar,
 50 |                     dim=[1, 2, 3],
 51 |                 )
 52 |             else:
 53 |                 return 0.5 * torch.sum(
 54 |                     torch.pow(self.mean - other.mean, 2) / other.var
 55 |                     + self.var / other.var
 56 |                     - 1.0
 57 |                     - self.logvar
 58 |                     + other.logvar,
 59 |                     dim=[1, 2, 3],
 60 |                 )
 61 | 
 62 |     def nll(self, sample, dims=[1, 2, 3]):
 63 |         if self.deterministic:
 64 |             return torch.Tensor([0.0])
 65 |         logtwopi = np.log(2.0 * np.pi)
 66 |         return 0.5 * torch.sum(
 67 |             logtwopi + self.logvar + torch.pow(sample - self.mean, 2) / self.var,
 68 |             dim=dims,
 69 |         )
 70 | 
 71 |     def mode(self):
 72 |         return self.mean
 73 | 
 74 | 
 75 | def normal_kl(mean1, logvar1, mean2, logvar2):
 76 |     """
 77 |     source: https://github.com/openai/guided-diffusion/blob/27c20a8fab9cb472df5d6bdd6c8d11c8f430b924/guided_diffusion/losses.py#L12
 78 |     Compute the KL divergence between two gaussians.
 79 |     Shapes are automatically broadcasted, so batches can be compared to
 80 |     scalars, among other use cases.
 81 |     """
 82 |     tensor = None
 83 |     for obj in (mean1, logvar1, mean2, logvar2):
 84 |         if isinstance(obj, torch.Tensor):
 85 |             tensor = obj
 86 |             break
 87 |     assert tensor is not None, "at least one argument must be a Tensor"
 88 | 
 89 |     # Force variances to be Tensors. Broadcasting helps convert scalars to
 90 |     # Tensors, but it does not work for torch.exp().
 91 |     logvar1, logvar2 = [
 92 |         x if isinstance(x, torch.Tensor) else torch.tensor(x).to(tensor)
 93 |         for x in (logvar1, logvar2)
 94 |     ]
 95 | 
 96 |     return 0.5 * (
 97 |         -1.0
 98 |         + logvar2
 99 |         - logvar1
100 |         + torch.exp(logvar1 - logvar2)
101 |         + ((mean1 - mean2) ** 2) * torch.exp(-logvar2)
102 |     )
103 | 


--------------------------------------------------------------------------------
/threestudio/utils/GAN/loss.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn.functional as F
 3 | 
 4 | 
 5 | def generator_loss(discriminator, inputs, reconstructions, cond=None):
 6 |     if cond is None:
 7 |         logits_fake = discriminator(reconstructions.contiguous())
 8 |     else:
 9 |         logits_fake = discriminator(
10 |             torch.cat((reconstructions.contiguous(), cond), dim=1)
11 |         )
12 |     g_loss = -torch.mean(logits_fake)
13 |     return g_loss
14 | 
15 | 
16 | def hinge_d_loss(logits_real, logits_fake):
17 |     loss_real = torch.mean(F.relu(1.0 - logits_real))
18 |     loss_fake = torch.mean(F.relu(1.0 + logits_fake))
19 |     d_loss = 0.5 * (loss_real + loss_fake)
20 |     return d_loss
21 | 
22 | 
23 | def discriminator_loss(discriminator, inputs, reconstructions, cond=None):
24 |     if cond is None:
25 |         logits_real = discriminator(inputs.contiguous().detach())
26 |         logits_fake = discriminator(reconstructions.contiguous().detach())
27 |     else:
28 |         logits_real = discriminator(
29 |             torch.cat((inputs.contiguous().detach(), cond), dim=1)
30 |         )
31 |         logits_fake = discriminator(
32 |             torch.cat((reconstructions.contiguous().detach(), cond), dim=1)
33 |         )
34 |     d_loss = hinge_d_loss(logits_real, logits_fake).mean()
35 |     return d_loss
36 | 


--------------------------------------------------------------------------------
/threestudio/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from . import base
2 | 


--------------------------------------------------------------------------------
/threestudio/utils/base.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | 
 3 | import torch
 4 | import torch.nn as nn
 5 | 
 6 | from threestudio.utils.config import parse_structured
 7 | from threestudio.utils.misc import get_device, load_module_weights
 8 | from threestudio.utils.typing import *
 9 | 
10 | 
11 | class Configurable:
12 |     @dataclass
13 |     class Config:
14 |         pass
15 | 
16 |     def __init__(self, cfg: Optional[dict] = None) -> None:
17 |         super().__init__()
18 |         self.cfg = parse_structured(self.Config, cfg)
19 | 
20 | 
21 | class Updateable:
22 |     def do_update_step(
23 |         self, epoch: int, global_step: int, on_load_weights: bool = False
24 |     ):
25 |         for attr in self.__dir__():
26 |             if attr.startswith("_"):
27 |                 continue
28 |             try:
29 |                 module = getattr(self, attr)
30 |             except:
31 |                 continue  # ignore attributes like property, which can't be retrived using getattr?
32 |             if isinstance(module, Updateable):
33 |                 module.do_update_step(
34 |                     epoch, global_step, on_load_weights=on_load_weights
35 |                 )
36 |         self.update_step(epoch, global_step, on_load_weights=on_load_weights)
37 | 
38 |     def update_step(self, epoch: int, global_step: int, on_load_weights: bool = False):
39 |         # override this method to implement custom update logic
40 |         # if on_load_weights is True, you should be careful doing things related to model evaluations,
41 |         # as the models and tensors are not guarenteed to be on the same device
42 |         pass
43 | 
44 | 
45 | def update_if_possible(module: Any, epoch: int, global_step: int) -> None:
46 |     if isinstance(module, Updateable):
47 |         module.do_update_step(epoch, global_step)
48 | 
49 | 
50 | class BaseObject(Updateable):
51 |     @dataclass
52 |     class Config:
53 |         pass
54 | 
55 |     cfg: Config  # add this to every subclass of BaseObject to enable static type checking
56 | 
57 |     def __init__(
58 |         self, cfg: Optional[Union[dict, DictConfig]] = None, *args, **kwargs
59 |     ) -> None:
60 |         super().__init__()
61 |         self.cfg = parse_structured(self.Config, cfg)
62 |         self.device = get_device()
63 |         self.configure(*args, **kwargs)
64 | 
65 |     def configure(self, *args, **kwargs) -> None:
66 |         pass
67 | 
68 | 
69 | class BaseModule(nn.Module, Updateable):
70 |     @dataclass
71 |     class Config:
72 |         weights: Optional[str] = None
73 | 
74 |     cfg: Config  # add this to every subclass of BaseModule to enable static type checking
75 | 
76 |     def __init__(
77 |         self, cfg: Optional[Union[dict, DictConfig]] = None, *args, **kwargs
78 |     ) -> None:
79 |         super().__init__()
80 |         self.cfg = parse_structured(self.Config, cfg)
81 |         self.device = get_device()
82 |         self.configure(*args, **kwargs)
83 |         if self.cfg.weights is not None:
84 |             # format: path/to/weights:module_name
85 |             weights_path, module_name = self.cfg.weights.split(":")
86 |             state_dict, epoch, global_step = load_module_weights(
87 |                 weights_path, module_name=module_name, map_location="cpu"
88 |             )
89 |             self.load_state_dict(state_dict)
90 |             self.do_update_step(
91 |                 epoch, global_step, on_load_weights=True
92 |             )  # restore states
93 |         # dummy tensor to indicate model state
94 |         self._dummy: Float[Tensor, "..."]
95 |         self.register_buffer("_dummy", torch.zeros(0).float(), persistent=False)
96 | 
97 |     def configure(self, *args, **kwargs) -> None:
98 |         pass
99 | 


--------------------------------------------------------------------------------
/threestudio/utils/mesh.py:
--------------------------------------------------------------------------------
 1 | import trimesh
 2 | from pathlib import Path
 3 | import torch
 4 | import numpy as np
 5 | from vedo import Mesh
 6 | 
 7 | 
 8 | def as_mesh(scene_or_mesh):
 9 |     """
10 |     Convert a possible scene to a mesh.
11 | 
12 |     If conversion occurs, the returned mesh has only vertex and face data.
13 | 
14 |     reference: https://github.com/mikedh/trimesh/issues/507#issuecomment-514973337
15 |     """
16 |     if isinstance(scene_or_mesh, trimesh.Scene):
17 |         if len(scene_or_mesh.geometry) == 0:
18 |             mesh = None  # empty scene
19 |         else:
20 |             # we lose texture information here
21 |             mesh = trimesh.util.concatenate(
22 |                 tuple(
23 |                     trimesh.Trimesh(vertices=g.vertices, faces=g.faces)
24 |                     for g in scene_or_mesh.geometry.values()
25 |                 )
26 |             )
27 |     else:
28 |         assert isinstance(scene_or_mesh, trimesh.Trimesh)
29 |         mesh = scene_or_mesh
30 |     return mesh
31 | 
32 | 
33 | def load_mesh_obj(obj_file, texture_file=None):
34 |     mesh = Mesh(str(obj_file))
35 |     if texture_file is not None:
36 |         mesh.texture(texture_file)
37 | 
38 |     xyz = mesh.points()
39 |     rgb = mesh.pointcolors.astype(np.float32) / 255.0
40 | 
41 |     return torch.from_numpy(xyz), torch.from_numpy(rgb)
42 | 
43 | 
44 | def load_mesh_as_pcd(mesh_file, texture_file):
45 |     mesh_file = Path(mesh_file)
46 |     if mesh_file.suffix == ".obj":
47 |         return load_mesh_obj(mesh_file, texture_file)
48 |     else:
49 |         raise NotImplementedError(f"Unknown mesh file {mesh_file}")
50 | 
51 | 
52 | def load_mesh_as_pcd_trimesh(mesh_file, num_points):
53 |     mesh = as_mesh(trimesh.load_mesh(mesh_file))
54 |     n = num_points
55 |     points = []
56 |     while n > 0:
57 |         p, _ = trimesh.sample.sample_surface_even(mesh, n)
58 |         n -= p.shape[0]
59 |         if n >= 0:
60 |             points.append(p)
61 |         else:
62 |             points.append(p[:n])
63 |     if len(points) > 1:
64 |         points = np.concatenate(points, axis=0)
65 |     else:
66 |         points = points[0]
67 |     points = torch.from_numpy(points.astype(np.float32))
68 | 
69 |     return points, torch.rand_like(points)
70 | 


--------------------------------------------------------------------------------
/threestudio/utils/perceptual/__init__.py:
--------------------------------------------------------------------------------
1 | from .perceptual import PerceptualLoss
2 | 


--------------------------------------------------------------------------------
/threestudio/utils/rasterize.py:
--------------------------------------------------------------------------------
 1 | import nvdiffrast.torch as dr
 2 | import torch
 3 | 
 4 | from threestudio.utils.typing import *
 5 | 
 6 | 
 7 | class NVDiffRasterizerContext:
 8 |     def __init__(self, context_type: str, device: torch.device) -> None:
 9 |         self.device = device
10 |         self.ctx = self.initialize_context(context_type, device)
11 | 
12 |     def initialize_context(
13 |         self, context_type: str, device: torch.device
14 |     ) -> Union[dr.RasterizeGLContext, dr.RasterizeCudaContext]:
15 |         if context_type == "gl":
16 |             return dr.RasterizeGLContext(device=device)
17 |         elif context_type == "cuda":
18 |             return dr.RasterizeCudaContext(device=device)
19 |         else:
20 |             raise ValueError(f"Unknown rasterizer context type: {context_type}")
21 | 
22 |     def vertex_transform(
23 |         self, verts: Float[Tensor, "Nv 3"], mvp_mtx: Float[Tensor, "B 4 4"]
24 |     ) -> Float[Tensor, "B Nv 4"]:
25 |         verts_homo = torch.cat(
26 |             [verts, torch.ones([verts.shape[0], 1]).to(verts)], dim=-1
27 |         )
28 |         return torch.matmul(verts_homo, mvp_mtx.permute(0, 2, 1))
29 | 
30 |     def rasterize(
31 |         self,
32 |         pos: Float[Tensor, "B Nv 4"],
33 |         tri: Integer[Tensor, "Nf 3"],
34 |         resolution: Union[int, Tuple[int, int]],
35 |     ):
36 |         # rasterize in instance mode (single topology)
37 |         return dr.rasterize(self.ctx, pos.float(), tri.int(), resolution, grad_db=True)
38 | 
39 |     def rasterize_one(
40 |         self,
41 |         pos: Float[Tensor, "Nv 4"],
42 |         tri: Integer[Tensor, "Nf 3"],
43 |         resolution: Union[int, Tuple[int, int]],
44 |     ):
45 |         # rasterize one single mesh under a single viewpoint
46 |         rast, rast_db = self.rasterize(pos[None, ...], tri, resolution)
47 |         return rast[0], rast_db[0]
48 | 
49 |     def antialias(
50 |         self,
51 |         color: Float[Tensor, "B H W C"],
52 |         rast: Float[Tensor, "B H W 4"],
53 |         pos: Float[Tensor, "B Nv 4"],
54 |         tri: Integer[Tensor, "Nf 3"],
55 |     ) -> Float[Tensor, "B H W C"]:
56 |         return dr.antialias(color.float(), rast, pos.float(), tri.int())
57 | 
58 |     def interpolate(
59 |         self,
60 |         attr: Float[Tensor, "B Nv C"],
61 |         rast: Float[Tensor, "B H W 4"],
62 |         tri: Integer[Tensor, "Nf 3"],
63 |         rast_db=None,
64 |         diff_attrs=None,
65 |     ) -> Float[Tensor, "B H W C"]:
66 |         return dr.interpolate(
67 |             attr.float(), rast, tri.int(), rast_db=rast_db, diff_attrs=diff_attrs
68 |         )
69 | 
70 |     def interpolate_one(
71 |         self,
72 |         attr: Float[Tensor, "Nv C"],
73 |         rast: Float[Tensor, "B H W 4"],
74 |         tri: Integer[Tensor, "Nf 3"],
75 |         rast_db=None,
76 |         diff_attrs=None,
77 |     ) -> Float[Tensor, "B H W C"]:
78 |         return self.interpolate(attr[None, ...], rast, tri, rast_db, diff_attrs)
79 | 


--------------------------------------------------------------------------------
/threestudio/utils/typing.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This module contains type annotations for the project, using
 3 | 1. Python type hints (https://docs.python.org/3/library/typing.html) for Python objects
 4 | 2. jaxtyping (https://github.com/google/jaxtyping/blob/main/API.md) for PyTorch tensors
 5 | 
 6 | Two types of typing checking can be used:
 7 | 1. Static type checking with mypy (install with pip and enabled as the default linter in VSCode)
 8 | 2. Runtime type checking with typeguard (install with pip and triggered at runtime, mainly for tensor dtype and shape checking)
 9 | """
10 | 
11 | # Basic types
12 | from typing import (
13 |     Any,
14 |     Callable,
15 |     Dict,
16 |     Iterable,
17 |     List,
18 |     Literal,
19 |     NamedTuple,
20 |     NewType,
21 |     Optional,
22 |     Sized,
23 |     Tuple,
24 |     Type,
25 |     TypeVar,
26 |     Union,
27 | )
28 | 
29 | # Tensor dtype
30 | # for jaxtyping usage, see https://github.com/google/jaxtyping/blob/main/API.md
31 | from jaxtyping import Bool, Complex, Float, Inexact, Int, Integer, Num, Shaped, UInt
32 | 
33 | # Config type
34 | from omegaconf import DictConfig
35 | 
36 | # PyTorch Tensor type
37 | from torch import Tensor
38 | 
39 | # Runtime type checking decorator
40 | from typeguard import typechecked as typechecker
41 | 


--------------------------------------------------------------------------------
/utils/test_pixart.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from diffusers import PixArtAlphaPipeline, ConsistencyDecoderVAE, AutoencoderKL
 3 | from diffusers import DDIMScheduler, DDPMScheduler, StableDiffusionPipeline
 4 | 
 5 | scheduler_params =  {
 6 |   "beta_end": 0.02, # 0.012,
 7 |   "beta_schedule": "linear", # "scaled_linear",
 8 |   "beta_start": 0.0001, # 0.00085,
 9 |   "dynamic_thresholding_ratio": 0.995,
10 |   "clip_sample": False,
11 |   "num_train_timesteps": 1000,
12 |   "prediction_type": "epsilon", # "v_prediction",
13 |   "timestep_spacing": "linspace",
14 |   "set_alpha_to_one": False,
15 | #   "skip_prk_steps": True,
16 | #   "steps_offset": 1,
17 | #   "trained_betas": None
18 | }
19 | scheduler = DDIMScheduler(**scheduler_params)
20 | 
21 | pipe = PixArtAlphaPipeline.from_pretrained("PixArt-alpha/PixArt-XL-2-512x512", torch_dtype=torch.float16, use_safetensors=True, scheduler=scheduler)
22 | 
23 | # Enable memory optimizations.
24 | pipe.enable_model_cpu_offload()
25 | 
26 | prompt = "A gundam robot holding a sword with angel wings"
27 | image = pipe(prompt, num_inference_steps=50).images[0]
28 | image.save("./test_pixart.png")


--------------------------------------------------------------------------------
/utils/test_sdxl.py:
--------------------------------------------------------------------------------
 1 | from diffusers import StableDiffusionXLPipeline, StableDiffusionXLImg2ImgPipeline
 2 | from diffusers import DDIMScheduler
 3 | import torch
 4 | 
 5 | scheduler_params =  {
 6 |   "beta_end": 0.012,
 7 |   "beta_schedule": "scaled_linear",
 8 |   "beta_start": 0.00085,
 9 |   # "dynamic_thresholding_ratio": 0.995,
10 |   "clip_sample": False,
11 |   "num_train_timesteps": 1000,
12 |   "prediction_type": "epsilon", # "v_prediction",
13 |   "timestep_spacing": "linspace",
14 |   "set_alpha_to_one": False,
15 | #   "skip_prk_steps": True,
16 | #   "steps_offset": 1,
17 | #   "trained_betas": None
18 | }
19 | scheduler = DDIMScheduler(**scheduler_params)
20 | 
21 | pipeline = StableDiffusionXLPipeline.from_pretrained(
22 |     "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16, variant="fp16", use_safetensors=True, scheduler=scheduler
23 | ).to("cuda")
24 | 
25 | prompt = "A gundam robot holding a sword with angel wings, detailed, 8k"
26 | image = pipeline(prompt=prompt).images[0]
27 | image.save("./test_sdxl.png")


--------------------------------------------------------------------------------