├── .editorconfig
├── .gitattributes
├── .github
    └── workflows
    │   └── pre-commit.yaml
├── .gitignore
├── .pre-commit-config.yaml
├── .pylintrc
├── 2dplayground.ipynb
├── DOCUMENTATION.md
├── LICENSE
├── README.md
├── configs
    ├── control4d-static.yaml
    ├── debugging
    │   ├── controlnet-canny.yaml
    │   ├── controlnet-normal.yaml
    │   ├── instructpix2pix.yaml
    │   └── stablediffusion.yaml
    ├── dreamfusion-if.yaml
    ├── dreamfusion-sd.yaml
    ├── experimental
    │   ├── co3d-imagecondition.yaml
    │   ├── imagecondition.yaml
    │   ├── imagecondition_zero123nerf.yaml
    │   └── imagecondition_zero123nerf_refine.yaml
    ├── fantasia3d-texture.yaml
    ├── fantasia3d.yaml
    ├── gradio
    │   ├── dreamfusion-if.yaml
    │   ├── dreamfusion-sd.yaml
    │   ├── fantasia3d.yaml
    │   ├── latentnerf.yaml
    │   ├── sjc.yaml
    │   └── textmesh-if.yaml
    ├── imagedream-sd21-shading.yaml
    ├── instructnerf2nerf.yaml
    ├── latentnerf-refine.yaml
    ├── latentnerf.yaml
    ├── magic3d-coarse-if.yaml
    ├── magic3d-coarse-sd.yaml
    ├── magic3d-refine-sd.yaml
    ├── mvdream-sd21-shading.yaml
    ├── mvdream-sd21.yaml
    ├── prolificdreamer-geometry.yaml
    ├── prolificdreamer-patch.yaml
    ├── prolificdreamer-scene.yaml
    ├── prolificdreamer-texture.yaml
    ├── prolificdreamer.yaml
    ├── sjc.yaml
    ├── sketchshape-refine.yaml
    ├── sketchshape.yaml
    ├── textmesh-if.yaml
    ├── zero123-geometry.yaml
    ├── zero123.yaml
    └── zero123_64.yaml
├── docker
    ├── Dockerfile
    └── compose.yaml
├── docs
    └── installation.md
├── extern
    ├── ImageDream
    │   ├── .gitignore
    │   ├── LICENSE-CODE
    │   ├── README.md
    │   ├── __init__.py
    │   ├── assets
    │   │   └── astronaut.png
    │   ├── imagedream
    │   │   ├── __init__.py
    │   │   ├── camera_utils.py
    │   │   ├── configs
    │   │   │   ├── sd_v2_base_ipmv.yaml
    │   │   │   └── sd_v2_base_ipmv_local.yaml
    │   │   ├── ldm
    │   │   │   ├── __init__.py
    │   │   │   ├── interface.py
    │   │   │   ├── models
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── autoencoder.py
    │   │   │   │   └── diffusion
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   └── ddim.py
    │   │   │   ├── modules
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── attention.py
    │   │   │   │   ├── diffusionmodules
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   ├── adaptors.py
    │   │   │   │   │   ├── model.py
    │   │   │   │   │   ├── openaimodel.py
    │   │   │   │   │   └── util.py
    │   │   │   │   ├── distributions
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   └── distributions.py
    │   │   │   │   ├── ema.py
    │   │   │   │   └── encoders
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   └── modules.py
    │   │   │   └── util.py
    │   │   └── model_zoo.py
    │   ├── requirements.txt
    │   ├── scripts
    │   │   ├── demo.py
    │   │   ├── demo.sh
    │   │   └── gradio_app.py
    │   └── setup.py
    └── ldm_zero123
    │   ├── extras.py
    │   ├── guidance.py
    │   ├── lr_scheduler.py
    │   ├── models
    │       ├── autoencoder.py
    │       └── diffusion
    │       │   ├── __init__.py
    │       │   ├── classifier.py
    │       │   ├── ddim.py
    │       │   ├── ddpm.py
    │       │   ├── plms.py
    │       │   └── sampling_util.py
    │   ├── modules
    │       ├── attention.py
    │       ├── diffusionmodules
    │       │   ├── __init__.py
    │       │   ├── model.py
    │       │   ├── openaimodel.py
    │       │   └── util.py
    │       ├── distributions
    │       │   ├── __init__.py
    │       │   └── distributions.py
    │       ├── ema.py
    │       ├── encoders
    │       │   ├── __init__.py
    │       │   └── modules.py
    │       ├── evaluate
    │       │   ├── adm_evaluator.py
    │       │   ├── evaluate_perceptualsim.py
    │       │   ├── frechet_video_distance.py
    │       │   ├── ssim.py
    │       │   └── torch_frechet_video_distance.py
    │       ├── image_degradation
    │       │   ├── __init__.py
    │       │   ├── bsrgan.py
    │       │   ├── bsrgan_light.py
    │       │   ├── utils
    │       │   │   └── test.png
    │       │   └── utils_image.py
    │       ├── losses
    │       │   ├── __init__.py
    │       │   ├── contperceptual.py
    │       │   └── vqperceptual.py
    │       └── x_transformer.py
    │   ├── thirdp
    │       └── psp
    │       │   ├── helpers.py
    │       │   ├── id_loss.py
    │       │   └── model_irse.py
    │   └── util.py
├── gradio_app.py
├── launch.py
├── load
    ├── images
    │   ├── anya_front.png
    │   ├── anya_front_depth.png
    │   ├── anya_front_normal.png
    │   ├── anya_front_rgba.png
    │   ├── baby_phoenix_on_ice.png
    │   ├── baby_phoenix_on_ice_depth.png
    │   ├── baby_phoenix_on_ice_normal.png
    │   ├── baby_phoenix_on_ice_rgba.png
    │   ├── beach_house_1.png
    │   ├── beach_house_1_depth.png
    │   ├── beach_house_1_normal.png
    │   ├── beach_house_1_rgba.png
    │   ├── beach_house_2.png
    │   ├── beach_house_2_depth.png
    │   ├── beach_house_2_normal.png
    │   ├── beach_house_2_rgba.png
    │   ├── bollywood_actress.png
    │   ├── bollywood_actress_depth.png
    │   ├── bollywood_actress_normal.png
    │   ├── bollywood_actress_rgba.png
    │   ├── cactus.png
    │   ├── cactus_depth.png
    │   ├── cactus_normal.png
    │   ├── cactus_rgba.png
    │   ├── catstatue.png
    │   ├── catstatue_depth.png
    │   ├── catstatue_normal.png
    │   ├── catstatue_rgba.png
    │   ├── church_ruins.png
    │   ├── church_ruins_depth.png
    │   ├── church_ruins_normal.png
    │   ├── church_ruins_rgba.png
    │   ├── dog1.png
    │   ├── dragon2_rgba.png
    │   ├── firekeeper.jpg
    │   ├── firekeeper_depth.png
    │   ├── firekeeper_normal.png
    │   ├── firekeeper_rgba.png
    │   ├── futuristic_car.png
    │   ├── futuristic_car_depth.png
    │   ├── futuristic_car_normal.png
    │   ├── futuristic_car_rgba.png
    │   ├── grootplant_rgba.png
    │   ├── hamburger.png
    │   ├── hamburger_depth.png
    │   ├── hamburger_rgba.png
    │   ├── mona_lisa.png
    │   ├── mona_lisa_depth.png
    │   ├── mona_lisa_normal.png
    │   ├── mona_lisa_rgba.png
    │   ├── robot_rgba.png
    │   ├── teddy.png
    │   ├── teddy_depth.png
    │   ├── teddy_normal.png
    │   ├── teddy_rgba.png
    │   └── thorhammer_rgba.png
    ├── lights
    │   ├── LICENSE.txt
    │   ├── bsdf_256_256.bin
    │   └── mud_road_puresky_1k.hdr
    ├── make_prompt_library.py
    ├── prompt_library.json
    ├── shapes
    │   ├── README.md
    │   ├── animal.obj
    │   ├── blub.obj
    │   ├── cabin.obj
    │   ├── env_sphere.obj
    │   ├── hand_prismatic.obj
    │   ├── human.obj
    │   ├── nascar.obj
    │   ├── potion.obj
    │   └── teddy.obj
    ├── tets
    │   ├── 128_tets.npz
    │   ├── 32_tets.npz
    │   ├── 64_tets.npz
    │   └── generate_tets.py
    └── zero123
    │   ├── download.sh
    │   └── sd-objaverse-finetune-c_concat-256.yaml
├── requirements-dev.txt
├── requirements.txt
├── threestudio.ipynb
└── threestudio
    ├── __init__.py
    ├── data
        ├── __init__.py
        ├── co3d.py
        ├── image.py
        ├── multiview.py
        ├── random_multiview.py
        └── uncond.py
    ├── models
        ├── __init__.py
        ├── background
        │   ├── __init__.py
        │   ├── base.py
        │   ├── neural_environment_map_background.py
        │   ├── solid_color_background.py
        │   └── textured_background.py
        ├── exporters
        │   ├── __init__.py
        │   ├── base.py
        │   └── mesh_exporter.py
        ├── geometry
        │   ├── __init__.py
        │   ├── base.py
        │   ├── implicit_sdf.py
        │   ├── implicit_volume.py
        │   ├── tetrahedra_sdf_grid.py
        │   └── volume_grid.py
        ├── guidance
        │   ├── __init__.py
        │   ├── controlnet_guidance.py
        │   ├── deep_floyd_guidance.py
        │   ├── instructpix2pix_guidance.py
        │   ├── multiview_diffusion_guidance.py
        │   ├── stable_diffusion_guidance.py
        │   ├── stable_diffusion_vsd_guidance.py
        │   └── zero123_guidance.py
        ├── isosurface.py
        ├── materials
        │   ├── __init__.py
        │   ├── base.py
        │   ├── diffuse_with_point_light_material.py
        │   ├── hybrid_rgb_latent_material.py
        │   ├── neural_radiance_material.py
        │   ├── no_material.py
        │   ├── pbr_material.py
        │   └── sd_latent_adapter_material.py
        ├── mesh.py
        ├── networks.py
        ├── prompt_processors
        │   ├── __init__.py
        │   ├── base.py
        │   ├── deepfloyd_prompt_processor.py
        │   ├── dummy_prompt_processor.py
        │   └── stable_diffusion_prompt_processor.py
        └── renderers
        │   ├── __init__.py
        │   ├── base.py
        │   ├── deferred_volume_renderer.py
        │   ├── gan_volume_renderer.py
        │   ├── nerf_volume_renderer.py
        │   ├── neus_volume_renderer.py
        │   ├── nvdiff_rasterizer.py
        │   └── patch_renderer.py
    ├── scripts
        ├── make_training_vid.py
        ├── run_imagedream.sh
        ├── run_zero123.sh
        ├── run_zero123_comparison.sh
        ├── run_zero123_phase.sh
        ├── run_zero123_phase2.sh
        ├── run_zero123_sbatch.py
        ├── zero123_demo.py
        └── zero123_sbatch.sh
    ├── systems
        ├── __init__.py
        ├── base.py
        ├── control4d_multiview.py
        ├── dreamfusion.py
        ├── fantasia3d.py
        ├── imagedream.py
        ├── imagedreamfusion.py
        ├── instructnerf2nerf.py
        ├── latentnerf.py
        ├── magic3d.py
        ├── mvdream.py
        ├── optimizers.py
        ├── prolificdreamer.py
        ├── sjc.py
        ├── textmesh.py
        ├── utils.py
        └── zero123.py
    └── utils
        ├── GAN
            ├── attention.py
            ├── discriminator.py
            ├── distribution.py
            ├── loss.py
            ├── mobilenet.py
            ├── network_util.py
            ├── util.py
            └── vae.py
        ├── __init__.py
        ├── base.py
        ├── callbacks.py
        ├── config.py
        ├── misc.py
        ├── ops.py
        ├── perceptual
            ├── __init__.py
            ├── perceptual.py
            └── utils.py
        ├── rasterize.py
        ├── saving.py
        └── typing.py


/.editorconfig:
--------------------------------------------------------------------------------
 1 | root = true
 2 | 
 3 | [*.py]
 4 | charset = utf-8
 5 | trim_trailing_whitespace = true
 6 | end_of_line = lf
 7 | insert_final_newline = true
 8 | indent_style = space
 9 | indent_size = 4
10 | 
11 | [*.md]
12 | trim_trailing_whitespace = false
13 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | *.extension filter=lfs diff=lfs merge=lfs -text
2 | 


--------------------------------------------------------------------------------
/.github/workflows/pre-commit.yaml:
--------------------------------------------------------------------------------
 1 | name: pre-commit
 2 | on: [push, pull_request]
 3 | 
 4 | concurrency:
 5 |   group: ${{ github.workflow }}-${{ github.ref }}
 6 |   cancel-in-progress: true
 7 | 
 8 | jobs:
 9 |   pre-commit:
10 |     runs-on: ubuntu-22.04
11 |     steps:
12 |       - uses: actions/checkout@v3
13 |       - name: Set up Python 3.8
14 |         uses: actions/setup-python@v4
15 |         with:
16 |           python-version: '3.8'
17 |       - name: Install pre-commit
18 |         run: |
19 |           pip install pre-commit
20 |           pre-commit install
21 |       - name: Run pre-commit
22 |         run: pre-commit run --all-files
23 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | default_language_version:
 2 |   python: python3
 3 | 
 4 | repos:
 5 |   - repo: https://github.com/pre-commit/pre-commit-hooks
 6 |     rev: v4.4.0
 7 |     hooks:
 8 |       - id: trailing-whitespace
 9 |       - id: check-ast
10 |       - id: check-merge-conflict
11 |       - id: check-yaml
12 |       - id: end-of-file-fixer
13 |       - id: trailing-whitespace
14 |         args: [--markdown-linebreak-ext=md]
15 | 
16 |   - repo: https://github.com/psf/black
17 |     rev: 23.3.0
18 |     hooks:
19 |       - id: black
20 |         language_version: python3.8
21 | 
22 |   - repo: https://github.com/pycqa/isort
23 |     rev: 5.12.0
24 |     hooks:
25 |       - id: isort
26 |         exclude: README.md
27 |         args: ["--profile", "black"]
28 | 
29 |   # temporarily disable static type checking
30 |   # - repo: https://github.com/pre-commit/mirrors-mypy
31 |   #   rev: v1.2.0
32 |   #   hooks:
33 |   #     - id: mypy
34 |   #       args: ["--ignore-missing-imports", "--scripts-are-modules", "--pretty"]
35 | 


--------------------------------------------------------------------------------
/.pylintrc:
--------------------------------------------------------------------------------
1 | disable=R,C
2 | 
3 | [TYPECHECK]
4 | # List of members which are set dynamically and missed by pylint inference
5 | # system, and so shouldn't trigger E1101 when accessed. Python regular
6 | # expressions are accepted.
7 | generated-members=numpy.*,torch.*,cv2.*
8 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # ImageDream Reconstruction
 2 | Peng Wang, Yichun Shi
 3 | 
 4 | [Project Page](https://image-dream.github.io/) | [Paper](https://arxiv.org/abs/2312.02201) | [Demo]()
 5 | 
 6 | [imagedream-threestudio-teaser](https://github.com/bytedance/ImageDream/assets/146033206/bcf67b1a-38f9-42cf-81df-b8b2f4fa007f)
 7 | 
 8 | ## Installation 
 9 | 
10 | This part is the same as original [MVDream-threestudio](https://github.com/bytedance/MVDream-threestudio). Skip it if you already have installed the environment.
11 | 
12 | 
13 | ## Quickstart
14 | Clone the modelcard on the [Huggingface ImageDream Model Page](https://huggingface.co/Peng-Wang/ImageDream/) under ```./extern/ImageDream/release_models/```
15 | 
16 | In the paper, we use the configuration with soft-shading. It would need an A100 GPU in most cases to compute normal:
17 | ```sh
18 | export PYTHONPATH=$PYTHONPATH:./extern/ImageDream
19 | image_file="./extern/ImageDream/assets/astronaut.png"
20 | ckpt_file="./extern/ImageDream/release_models/ImageDream/sd-v2.1-base-4view-ipmv.pt"
21 | cfg_file="./extern/ImageDream/imagedream/configs/sd_v2_base_ipmv.yaml"
22 | 
23 | python3 launch.py \
24 |     --config configs/$method.yaml --train --gpu 0 \
25 |     name="imagedream-sd21-shading" tag="astronaut" \
26 |     system.prompt_processor.prompt="an astronaut riding a horse" \
27 |     system.prompt_processor.image_path="${image_file}" \
28 |     system.guidance.ckpt_path="${ckpt_file}" \
29 |     system.guidance.config_path="${cfg_file}"
30 | ```
31 | 
32 | ***For diffusion only model, refer to subdir*** ```./extern/ImageDream/```
33 | ***Check*** ```./threestudio/scripts/run_imagedream.sh``` ***for a bash example.***
34 | 
35 | 
36 | ## Credits
37 | - This code is forked from [threestudio](https://github.com/threestudio-project/threestudio) and [MVDream](https://github.com/bytedance/MVDream-threestudi) for SDS and 3D Generation.
38 | 
39 | ## Tips
40 | 1. Place the object in the center and do not make it too large/small in the image.
41 | 2. If you have an object cutting image edge, in config, tuning the parameters range of elevation and fov to be a larger range, e.g. ```[0, 30]```, otherwise, you may do image outpainting and follow tips 1.
42 | 3. Check the results with ImageDream diffusion model before using it in 3D rendering to save time.
43 | 
44 | ## PreComputed Results
45 | - Since there is some randomness in diffusion model and time costly to get baseline results. We put our pre-computed results for reproducing Tab.1 in the paper in a [hugging face dataset card](https://huggingface.co/datasets/Peng-Wang/ImageDream)
46 | 
47 | 
48 | ## Citing
49 | If you find ImageDream helpful, please consider citing:
50 | 
51 | ``` bibtex
52 | @article{wang2023imagedream,
53 |   title={ImageDream: Image-Prompt Multi-view Diffusion for 3D Generation},
54 |   author={Wang, Peng and Shi, Yichun},
55 |   journal={arXiv preprint arXiv:2312.02201},
56 |   year={2023}
57 | }
58 | ```
59 | 


--------------------------------------------------------------------------------
/configs/control4d-static.yaml:
--------------------------------------------------------------------------------
  1 | name: "control4d-static"
  2 | tag: "${basename:${data.dataroot}}_${rmspace:${system.prompt_processor.prompt},_}"
  3 | exp_root_dir: "outputs"
  4 | seed: 0
  5 | 
  6 | data_type: "multiview-camera-datamodule"
  7 | data:
  8 |   train_downsample_resolution: 2
  9 |   eval_downsample_resolution: 2
 10 |   dataroot: ???
 11 | 
 12 | system_type: "control4d-multiview-system"
 13 | system:
 14 |   start_editing_step: 2000
 15 | 
 16 |   geometry_type: "implicit-volume"
 17 |   geometry:
 18 |     radius: 2.
 19 |     n_feature_dims: 11
 20 |     normal_type: analytic
 21 |     pos_encoding_config:
 22 |       otype: HashGrid
 23 |       n_levels: 16
 24 |       n_features_per_level: 2
 25 |       log2_hashmap_size: 19
 26 |       base_resolution: 16
 27 |       per_level_scale: 1.4472692374403782 # max resolution 4096
 28 |     density_bias: "blob_magic3d"
 29 |     density_activation: softplus
 30 |     density_blob_scale: 10.
 31 |     density_blob_std: 0.5
 32 |     isosurface_resolution: 128
 33 |     isosurface_threshold: auto
 34 |     isosurface_coarse_to_fine: true
 35 | 
 36 |   material_type: "hybrid-rgb-latent-material"
 37 |   material:
 38 |     n_output_dims: 11
 39 |     requires_normal: true
 40 | 
 41 |   background_type: "solid-color-background"
 42 |   background:
 43 |     n_output_dims: 11
 44 |     color: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
 45 | 
 46 |   renderer_type: "gan-volume-renderer"
 47 |   renderer:
 48 |     base_renderer_type: "nerf-volume-renderer"
 49 |     base_renderer:
 50 |       radius: ${system.geometry.radius}
 51 |       num_samples_per_ray: 512
 52 | 
 53 |   guidance_type: "stable-diffusion-controlnet-guidance"
 54 |   guidance:
 55 |     control_type: "normal"
 56 |     min_step_percent: 0.05
 57 |     max_step_percent: 0.8
 58 |     condition_scale: 1.0
 59 | 
 60 |   prompt_processor_type: "stable-diffusion-prompt-processor"
 61 |   prompt_processor:
 62 |     pretrained_model_name_or_path: "runwayml/stable-diffusion-v1-5"
 63 | 
 64 |   loggers:
 65 |     wandb:
 66 |       enable: false
 67 |       project: 'threestudio'
 68 | 
 69 |   loss:
 70 |     lambda_sds: 0.
 71 |     lambda_orient: [0, 10.0, 1000., 5000.0]
 72 |     lambda_sparsity: 1.0
 73 |     lambda_opaque: 1.0
 74 |     lambda_l1: 10.
 75 |     lambda_p: 10.
 76 |     lambda_kl: 0.000001
 77 |     lambda_G: 0.01
 78 |     lambda_D: 1.
 79 |   optimizer:
 80 |     name: Adam
 81 |     args:
 82 |       lr: 0.01
 83 |       betas: [0.9, 0.99]
 84 |       eps: 1.e-15
 85 |     params:
 86 |       geometry:
 87 |         lr: 0.01
 88 |       background:
 89 |         lr: 0.001
 90 |       renderer.generator:
 91 |         lr: 0.0001
 92 |       renderer.local_encoder:
 93 |         lr: 0.0001
 94 |       renderer.global_encoder:
 95 |         lr: 0.0001
 96 |     optimizer_dis:
 97 |       name: Adam
 98 |       args:
 99 |         lr: 0.01
100 |         betas: [0.9, 0.99]
101 |         eps: 1.e-15
102 |       params:
103 |         renderer.discriminator:
104 |           lr: 0.00001
105 | 
106 | trainer:
107 |   max_steps: 50000
108 |   log_every_n_steps: 1
109 |   num_sanity_val_steps: 0
110 |   val_check_interval: 200
111 |   enable_progress_bar: true
112 |   precision: 16-mixed
113 | 
114 | checkpoint:
115 |   save_last: true
116 |   save_top_k: -1
117 |   every_n_train_steps: ${trainer.max_steps}
118 | 


--------------------------------------------------------------------------------
/configs/debugging/controlnet-canny.yaml:
--------------------------------------------------------------------------------
 1 | system:
 2 |   guidance_type: "controlnet-guidance"
 3 |   guidance:
 4 |     control_type: "canny"
 5 |     min_step_percent: 0.8
 6 |     max_step_percent: 0.98
 7 | 
 8 |   prompt_processor_type: "stable-diffusion-prompt-processor"
 9 |   prompt_processor:
10 |     pretrained_model_name_or_path: "runwayml/stable-diffusion-v1-5"
11 |     prompt: "Elon Musk, RAW photo, (high detailed skin:1.2), 8k uhd, dslr, soft lighting, high quality, film grain, Fujifilm XT3"
12 |     # negative_prompt: "(overexposed, underexposed, out of focus, deformed iris, deformed pupils, semi-realistic, cgi, 3d, render, sketch, cartoon, drawing, anime:1.4), text, close up, cropped, out of frame, worst quality, low quality, jpeg artifacts, ugly, duplicate, morbid, mutilated, extra fingers, mutated hands, poorly drawn hands, poorly drawn face, mutation, deformed, blurry, dehydrated, bad anatomy, bad proportions, extra limbs, cloned face, disfigured, gross proportions, malformed limbs, missing arms, missing legs, extra arms, extra legs, fused fingers, too many fingers, long neck"
13 |     negative_prompt: "(overexposed, underexposed, out of focus, deformed iris, deformed pupils, semi-realistic, cgi, 3d, render, sketch, cartoon, drawing, anime:1.4)"
14 | 


--------------------------------------------------------------------------------
/configs/debugging/controlnet-normal.yaml:
--------------------------------------------------------------------------------
 1 | system:
 2 |   guidance_type: "controlnet-guidance"
 3 |   guidance:
 4 |     control_type: "normal"
 5 |     min_step_percent: 0.05
 6 |     max_step_percent: 0.8
 7 | 
 8 |   prompt_processor_type: "stable-diffusion-prompt-processor"
 9 |   prompt_processor:
10 |     pretrained_model_name_or_path: "SG161222/Realistic_Vision_V2.0"
11 |     prompt: "Elon Musk, RAW photo, (high detailed skin:1.2), 8k uhd, dslr, soft lighting, high quality, film grain, Fujifilm XT3"
12 |     negative_prompt: "(overexposed, underexposed, out of focus, deformed iris, deformed pupils, semi-realistic, cgi, 3d, render, sketch, cartoon, drawing, anime:1.4)"
13 | 


--------------------------------------------------------------------------------
/configs/debugging/instructpix2pix.yaml:
--------------------------------------------------------------------------------
 1 | system:
 2 |   guidance_type: "instructpix2pix-guidance"
 3 |   guidance:
 4 |     min_step_percent: 0.8
 5 |     max_step_percent: 0.98
 6 | 
 7 |   prompt_processor_type: "stable-diffusion-prompt-processor"
 8 |   prompt_processor:
 9 |     pretrained_model_name_or_path: "runwayml/stable-diffusion-v1-5"
10 |     prompt: "Turn him into Elon Musk"
11 | 


--------------------------------------------------------------------------------
/configs/debugging/stablediffusion.yaml:
--------------------------------------------------------------------------------
 1 | system:
 2 |   prompt_processor_type: "stable-diffusion-prompt-processor"
 3 |   prompt_processor:
 4 |     pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base"
 5 |     prompt: "A cute panda"
 6 |     front_threshold: 30.
 7 |     back_threshold: 30.
 8 | 
 9 |   guidance_type: "stable-diffusion-vsd-guidance"
10 |   guidance:
11 |     pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base"
12 |     pretrained_model_name_or_path_lora: "stabilityai/stable-diffusion-2-1"
13 |     guidance_scale: 7.5
14 |     min_step_percent: 0.02
15 |     max_step_percent: 0.98
16 |     max_step_percent_annealed: 0.5
17 |     anneal_start_step: 5000
18 | 


--------------------------------------------------------------------------------
/configs/dreamfusion-if.yaml:
--------------------------------------------------------------------------------
  1 | name: "dreamfusion-if"
  2 | tag: "${rmspace:${system.prompt_processor.prompt},_}"
  3 | exp_root_dir: "outputs"
  4 | seed: 0
  5 | 
  6 | data_type: "random-camera-datamodule"
  7 | data:
  8 |   batch_size: 1
  9 |   width: 64
 10 |   height: 64
 11 |   camera_distance_range: [1.5, 2.0]
 12 |   fovy_range: [40, 70]
 13 |   elevation_range: [-10, 90]
 14 |   light_sample_strategy: "dreamfusion"
 15 |   eval_camera_distance: 2.0
 16 |   eval_fovy_deg: 70.
 17 | 
 18 | system_type: "dreamfusion-system"
 19 | system:
 20 |   geometry_type: "implicit-volume"
 21 |   geometry:
 22 |     radius: 2.0
 23 |     normal_type: "analytic"
 24 | 
 25 |     # the density initialization proposed in the DreamFusion paper
 26 |     # does not work very well
 27 |     # density_bias: "blob_dreamfusion"
 28 |     # density_activation: exp
 29 |     # density_blob_scale: 5.
 30 |     # density_blob_std: 0.2
 31 | 
 32 |     # use Magic3D density initialization instead
 33 |     density_bias: "blob_magic3d"
 34 |     density_activation: softplus
 35 |     density_blob_scale: 10.
 36 |     density_blob_std: 0.5
 37 | 
 38 |     # coarse to fine hash grid encoding
 39 |     # to ensure smooth analytic normals
 40 |     pos_encoding_config:
 41 |       otype: ProgressiveBandHashGrid
 42 |       n_levels: 16
 43 |       n_features_per_level: 2
 44 |       log2_hashmap_size: 19
 45 |       base_resolution: 16
 46 |       per_level_scale: 1.447269237440378 # max resolution 4096
 47 |       start_level: 8 # resolution ~200
 48 |       start_step: 2000
 49 |       update_steps: 500
 50 | 
 51 |   material_type: "diffuse-with-point-light-material"
 52 |   material:
 53 |     ambient_only_steps: 2001
 54 |     albedo_activation: scale_-11_01
 55 | 
 56 |   background_type: "neural-environment-map-background"
 57 |   background:
 58 |     color_activation: scale_-11_01
 59 | 
 60 |   renderer_type: "nerf-volume-renderer"
 61 |   renderer:
 62 |     radius: ${system.geometry.radius}
 63 |     num_samples_per_ray: 512
 64 | 
 65 |   prompt_processor_type: "deep-floyd-prompt-processor"
 66 |   prompt_processor:
 67 |     pretrained_model_name_or_path: "DeepFloyd/IF-I-XL-v1.0"
 68 |     prompt: ???
 69 | 
 70 |   guidance_type: "deep-floyd-guidance"
 71 |   guidance:
 72 |     pretrained_model_name_or_path: "DeepFloyd/IF-I-XL-v1.0"
 73 |     guidance_scale: 20.
 74 |     weighting_strategy: sds
 75 |     min_step_percent: 0.02
 76 |     max_step_percent: 0.98
 77 | 
 78 |   loggers:
 79 |     wandb:
 80 |       enable: false
 81 |       project: 'threestudio'
 82 |       name: None
 83 | 
 84 |   loss:
 85 |     lambda_sds: 1.
 86 |     lambda_orient: [0, 10., 1000., 5000]
 87 |     lambda_sparsity: 1.
 88 |     lambda_opaque: 0.0
 89 |   optimizer:
 90 |     name: Adam
 91 |     args:
 92 |       lr: 0.01
 93 |       betas: [0.9, 0.99]
 94 |       eps: 1.e-15
 95 |     params:
 96 |       geometry:
 97 |         lr: 0.01
 98 |       background:
 99 |         lr: 0.001
100 | 
101 | trainer:
102 |   max_steps: 10000
103 |   log_every_n_steps: 1
104 |   num_sanity_val_steps: 0
105 |   val_check_interval: 200
106 |   enable_progress_bar: true
107 |   precision: 16-mixed
108 | 
109 | checkpoint:
110 |   save_last: true # save at each validation time
111 |   save_top_k: -1
112 |   every_n_train_steps: ${trainer.max_steps}
113 | 


--------------------------------------------------------------------------------
/configs/dreamfusion-sd.yaml:
--------------------------------------------------------------------------------
  1 | name: "dreamfusion-sd"
  2 | tag: "${rmspace:${system.prompt_processor.prompt},_}"
  3 | exp_root_dir: "outputs"
  4 | seed: 0
  5 | 
  6 | data_type: "random-camera-datamodule"
  7 | data:
  8 |   batch_size: 1
  9 |   width: 64
 10 |   height: 64
 11 |   camera_distance_range: [1.5, 2.0]
 12 |   fovy_range: [40, 70]
 13 |   elevation_range: [-10, 45]
 14 |   light_sample_strategy: "dreamfusion"
 15 |   eval_camera_distance: 2.0
 16 |   eval_fovy_deg: 70.
 17 | 
 18 | system_type: "dreamfusion-system"
 19 | system:
 20 |   geometry_type: "implicit-volume"
 21 |   geometry:
 22 |     radius: 2.0
 23 |     normal_type: "analytic"
 24 | 
 25 |     # the density initialization proposed in the DreamFusion paper
 26 |     # does not work very well
 27 |     # density_bias: "blob_dreamfusion"
 28 |     # density_activation: exp
 29 |     # density_blob_scale: 5.
 30 |     # density_blob_std: 0.2
 31 | 
 32 |     # use Magic3D density initialization instead
 33 |     density_bias: "blob_magic3d"
 34 |     density_activation: softplus
 35 |     density_blob_scale: 10.
 36 |     density_blob_std: 0.5
 37 | 
 38 |     # coarse to fine hash grid encoding
 39 |     # to ensure smooth analytic normals
 40 |     pos_encoding_config:
 41 |       otype: ProgressiveBandHashGrid
 42 |       n_levels: 16
 43 |       n_features_per_level: 2
 44 |       log2_hashmap_size: 19
 45 |       base_resolution: 16
 46 |       per_level_scale: 1.447269237440378 # max resolution 4096
 47 |       start_level: 8 # resolution ~200
 48 |       start_step: 2000
 49 |       update_steps: 500
 50 | 
 51 |   material_type: "diffuse-with-point-light-material"
 52 |   material:
 53 |     ambient_only_steps: 2001
 54 |     albedo_activation: sigmoid
 55 | 
 56 |   background_type: "neural-environment-map-background"
 57 |   background:
 58 |     color_activation: sigmoid
 59 | 
 60 |   renderer_type: "nerf-volume-renderer"
 61 |   renderer:
 62 |     radius: ${system.geometry.radius}
 63 |     num_samples_per_ray: 512
 64 | 
 65 |   prompt_processor_type: "stable-diffusion-prompt-processor"
 66 |   prompt_processor:
 67 |     pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base"
 68 |     prompt: ???
 69 | 
 70 |   guidance_type: "stable-diffusion-guidance"
 71 |   guidance:
 72 |     pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base"
 73 |     guidance_scale: 100.
 74 |     weighting_strategy: sds
 75 |     min_step_percent: 0.02
 76 |     max_step_percent: 0.98
 77 | 
 78 |   loggers:
 79 |     wandb:
 80 |       enable: false
 81 |       project: "threestudio"
 82 |       name: None
 83 | 
 84 |   loss:
 85 |     lambda_sds: 1.
 86 |     lambda_orient: [0, 10., 1000., 5000]
 87 |     lambda_sparsity: 1.
 88 |     lambda_opaque: 0.
 89 |   optimizer:
 90 |     name: Adam
 91 |     args:
 92 |       lr: 0.01
 93 |       betas: [0.9, 0.99]
 94 |       eps: 1.e-15
 95 |     params:
 96 |       geometry:
 97 |         lr: 0.01
 98 |       background:
 99 |         lr: 0.001
100 | 
101 | trainer:
102 |   max_steps: 10000
103 |   log_every_n_steps: 1
104 |   num_sanity_val_steps: 0
105 |   val_check_interval: 200
106 |   enable_progress_bar: true
107 |   precision: 16-mixed
108 | 
109 | checkpoint:
110 |   save_last: true # save at each validation time
111 |   save_top_k: -1
112 |   every_n_train_steps: ${trainer.max_steps}
113 | 


--------------------------------------------------------------------------------
/configs/fantasia3d-texture.yaml:
--------------------------------------------------------------------------------
 1 | name: "fantasia3d-texture"
 2 | tag: "${rmspace:${system.prompt_processor.prompt},_}"
 3 | exp_root_dir: "outputs"
 4 | seed: 0
 5 | 
 6 | data_type: "random-camera-datamodule"
 7 | data:
 8 |   batch_size: 1
 9 |   width: 512
10 |   height: 512
11 |   camera_distance_range: [3, 3]
12 |   fovy_range: [25, 45]
13 |   camera_perturb: 0.
14 |   center_perturb: 0.
15 |   up_perturb: 0.
16 |   elevation_range: [-10, 45]
17 |   azimuth_range: [-180, 180]
18 |   batch_uniform_azimuth: true
19 |   eval_camera_distance: 3.
20 |   eval_fovy_deg: 45.
21 | 
22 | system_type: "fantasia3d-system"
23 | system:
24 |   # do texture training
25 |   texture: true
26 |   geometry_convert_from: ???
27 |   geometry_convert_inherit_texture: false
28 |   geometry_type: "tetrahedra-sdf-grid"
29 |   geometry:
30 |     radius: 1.0 # consistent with coarse
31 |     isosurface_resolution: 128
32 |     isosurface_deformable_grid: true
33 |     pos_encoding_config:
34 |       otype: HashGrid
35 |       n_levels: 16
36 |       n_features_per_level: 2
37 |       log2_hashmap_size: 19
38 |       base_resolution: 16
39 |       per_level_scale: 1.4472692374403782 # max resolution 4096
40 |     n_feature_dims: 8 # albedo3 + roughness1 + metallic1 + bump3
41 |     fix_geometry: true
42 | 
43 |   material_type: "pbr-material"
44 |   material:
45 |     material_activation: sigmoid
46 |     environment_texture: "load/lights/mud_road_puresky_1k.hdr"
47 |     environment_scale: 2.0
48 |     min_metallic: 0.0
49 |     max_metallic: 0.9
50 |     min_roughness: 0.08
51 |     max_roughness: 0.9
52 |     use_bump: true
53 | 
54 |   background_type: "solid-color-background"
55 | 
56 |   renderer_type: "nvdiff-rasterizer"
57 | 
58 |   prompt_processor_type: "stable-diffusion-prompt-processor"
59 |   prompt_processor:
60 |     pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base"
61 |     prompt: ???
62 | 
63 |   guidance_type: "stable-diffusion-guidance"
64 |   guidance:
65 |     pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base"
66 |     guidance_scale: 100
67 |     weighting_strategy: sds
68 |     min_step_percent: 0.02
69 |     max_step_percent: 0.50
70 | 
71 |   loggers:
72 |     wandb:
73 |       enable: false
74 |       project: "threestudio"
75 | 
76 |   loss:
77 |     lambda_sds: 1.
78 |     lambda_normal_consistency: 0.
79 | 
80 |   optimizer:
81 |     name: AdamW
82 |     args:
83 |       lr: 0.01
84 |       betas: [0.9, 0.99]
85 |       eps: 1.e-15
86 | 
87 | trainer:
88 |   max_steps: 5000
89 |   log_every_n_steps: 1
90 |   num_sanity_val_steps: 1
91 |   val_check_interval: 500
92 |   enable_progress_bar: true
93 |   precision: 16-mixed
94 | 
95 | checkpoint:
96 |   save_last: true # save at each validation time
97 |   save_top_k: -1
98 |   every_n_train_steps: ${trainer.max_steps}
99 | 


--------------------------------------------------------------------------------
/configs/fantasia3d.yaml:
--------------------------------------------------------------------------------
 1 | name: "fantasia3d"
 2 | tag: "${rmspace:${system.prompt_processor.prompt},_}"
 3 | exp_root_dir: "outputs"
 4 | seed: 0
 5 | 
 6 | data_type: "random-camera-datamodule"
 7 | data:
 8 |   batch_size: 1
 9 |   width: 512
10 |   height: 512
11 |   camera_distance_range: [3, 3]
12 |   fovy_range: [25, 45]
13 |   camera_perturb: 0.
14 |   center_perturb: 0.
15 |   up_perturb: 0.
16 |   elevation_range: [-10, 45]
17 |   azimuth_range: [-180, 180]
18 |   batch_uniform_azimuth: true
19 |   eval_camera_distance: 3.
20 |   eval_fovy_deg: 45.
21 | 
22 | system_type: "fantasia3d-system"
23 | system:
24 |   latent_steps: 1000
25 |   geometry_type: "implicit-sdf"
26 |   geometry:
27 |     radius: 1.0
28 |     n_feature_dims: 0
29 |     isosurface_resolution: 128
30 |     isosurface_deformable_grid: true
31 |     isosurface_coarse_to_fine: false
32 | 
33 |     # initialize SDF by optimization
34 |     shape_init: sphere
35 |     shape_init_params: 0.5
36 | 
37 |     # or you can initialize SDF using a guide mesh
38 |     # shape_init: mesh:load/shapes/human.obj
39 |     # shape_init_params: 0.9
40 |     # shape_init_mesh_up: +y
41 |     # shape_init_mesh_front: +z
42 | 
43 |     # an alternative initialization implementation:
44 |     # you can initialize SDF to sphere/ellipsoid by adding a bias value
45 |     # which leads to more smooth initialized shape
46 |     # sdf_bias: sphere
47 |     # sdf_bias_params: 0.5
48 |     # DO NOT use the two initialization methods together
49 | 
50 |   material_type: "no-material" # unused
51 |   material:
52 |     n_output_dims: 0
53 | 
54 |   background_type: "solid-color-background" # unused
55 | 
56 |   renderer_type: "nvdiff-rasterizer"
57 | 
58 |   prompt_processor_type: "stable-diffusion-prompt-processor"
59 |   prompt_processor:
60 |     pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base"
61 |     prompt: ???
62 | 
63 |   guidance_type: "stable-diffusion-guidance"
64 |   guidance:
65 |     pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base"
66 |     guidance_scale: 100.
67 |     max_step_percent: 0.5
68 |     weighting_strategy: fantasia3d
69 | 
70 |   loggers:
71 |     wandb:
72 |       enable: false
73 |       project: 'threestudio'
74 |       name: None
75 | 
76 |   loss:
77 |     lambda_sds: 1.
78 |     lambda_normal_consistency: 0.
79 | 
80 |   optimizer:
81 |     name: AdamW
82 |     args:
83 |       lr: 0.001
84 |       betas: [0.9, 0.99]
85 |       eps: 1.e-15
86 | 
87 | trainer:
88 |   max_steps: 10000
89 |   log_every_n_steps: 1
90 |   num_sanity_val_steps: 1
91 |   val_check_interval: 500
92 |   enable_progress_bar: true
93 |   precision: 16-mixed
94 | 
95 | checkpoint:
96 |   save_last: true # save at each validation time
97 |   save_top_k: -1
98 |   every_n_train_steps: ${trainer.max_steps}
99 | 


--------------------------------------------------------------------------------
/configs/gradio/dreamfusion-if.yaml:
--------------------------------------------------------------------------------
  1 | name: "dreamfusion-if"
  2 | tag: "${rmspace:${system.prompt_processor.prompt},_}"
  3 | exp_root_dir: "outputs-gradio"
  4 | seed: 0
  5 | 
  6 | data_type: "random-camera-datamodule"
  7 | data:
  8 |   batch_size: 1
  9 |   width: 64
 10 |   height: 64
 11 |   camera_distance_range: [1.5, 2.0]
 12 |   fovy_range: [40, 70]
 13 |   elevation_range: [-10, 90]
 14 |   light_sample_strategy: "dreamfusion"
 15 |   eval_camera_distance: 2.0
 16 |   eval_fovy_deg: 70.
 17 | 
 18 | system_type: "dreamfusion-system"
 19 | system:
 20 |   geometry_type: "implicit-volume"
 21 |   geometry:
 22 |     radius: 2.0
 23 |     normal_type: "analytic"
 24 | 
 25 |     # the density initialization proposed in the DreamFusion paper
 26 |     # does not work very well
 27 |     # density_bias: "blob_dreamfusion"
 28 |     # density_activation: exp
 29 |     # density_blob_scale: 5.
 30 |     # density_blob_std: 0.2
 31 | 
 32 |     # use Magic3D density initialization instead
 33 |     density_bias: "blob_magic3d"
 34 |     density_activation: softplus
 35 |     density_blob_scale: 10.
 36 |     density_blob_std: 0.5
 37 | 
 38 |     # coarse to fine hash grid encoding
 39 |     # to ensure smooth analytic normals
 40 |     pos_encoding_config:
 41 |       otype: ProgressiveBandHashGrid
 42 |       n_levels: 16
 43 |       n_features_per_level: 2
 44 |       log2_hashmap_size: 19
 45 |       base_resolution: 16
 46 |       per_level_scale: 1.381912879967776 # max resolution 2048
 47 |       start_level: 10 # resolution ~300
 48 |       start_step: 2000
 49 |       update_steps: 400
 50 | 
 51 |   material_type: "diffuse-with-point-light-material"
 52 |   material:
 53 |     ambient_only_steps: 2001
 54 |     albedo_activation: scale_-11_01
 55 | 
 56 |   background_type: "neural-environment-map-background"
 57 |   background:
 58 |     color_activation: scale_-11_01
 59 |     random_aug: true
 60 | 
 61 |   renderer_type: "nerf-volume-renderer"
 62 |   renderer:
 63 |     radius: ${system.geometry.radius}
 64 |     num_samples_per_ray: 512
 65 | 
 66 |   prompt_processor_type: "deep-floyd-prompt-processor"
 67 |   prompt_processor:
 68 |     pretrained_model_name_or_path: "DeepFloyd/IF-I-XL-v1.0"
 69 |     prompt: ???
 70 | 
 71 |   guidance_type: "deep-floyd-guidance"
 72 |   guidance:
 73 |     pretrained_model_name_or_path: "DeepFloyd/IF-I-XL-v1.0"
 74 |     guidance_scale: 20.
 75 |     weighting_strategy: sds
 76 |     min_step_percent: 0.02
 77 |     max_step_percent: 0.98
 78 | 
 79 |   exporter_type: "mesh-exporter"
 80 |   exporter:
 81 |     fmt: obj
 82 |     save_uv: false
 83 |     context_type: cuda
 84 | 
 85 |   loggers:
 86 |     wandb:
 87 |       enable: false
 88 |       project: "threestudio"
 89 |       name: None
 90 | 
 91 |   loss:
 92 |     lambda_sds: 1.
 93 |     lambda_orient: [0, 10., 1000., 5000]
 94 |     lambda_sparsity: 1.
 95 |     lambda_opaque: 0.0
 96 |   optimizer:
 97 |     name: Adam
 98 |     args:
 99 |       lr: 0.01
100 |       betas: [0.9, 0.99]
101 |       eps: 1.e-15
102 |     params:
103 |       geometry:
104 |         lr: 0.01
105 |       background:
106 |         lr: 0.001
107 | 
108 | trainer:
109 |   max_steps: 5000
110 |   log_every_n_steps: 1
111 |   num_sanity_val_steps: 0
112 |   val_check_interval: 100
113 |   enable_progress_bar: true
114 |   precision: 16-mixed
115 | 
116 | checkpoint:
117 |   save_last: false
118 |   save_top_k: -1
119 |   every_n_train_steps: 0 # do not save checkpoints during training
120 | 


--------------------------------------------------------------------------------
/configs/gradio/dreamfusion-sd.yaml:
--------------------------------------------------------------------------------
  1 | name: "dreamfusion-sd"
  2 | tag: "${rmspace:${system.prompt_processor.prompt},_}"
  3 | exp_root_dir: "outputs-gradio"
  4 | seed: 0
  5 | 
  6 | data_type: "random-camera-datamodule"
  7 | data:
  8 |   batch_size: 1
  9 |   width: 64
 10 |   height: 64
 11 |   camera_distance_range: [1.5, 2.0]
 12 |   fovy_range: [40, 70]
 13 |   elevation_range: [-10, 45]
 14 |   light_sample_strategy: "dreamfusion"
 15 |   eval_camera_distance: 2.0
 16 |   eval_fovy_deg: 70.
 17 | 
 18 | system_type: "dreamfusion-system"
 19 | system:
 20 |   geometry_type: "implicit-volume"
 21 |   geometry:
 22 |     radius: 2.0
 23 |     normal_type: "analytic"
 24 | 
 25 |     # the density initialization proposed in the DreamFusion paper
 26 |     # does not work very well
 27 |     # density_bias: "blob_dreamfusion"
 28 |     # density_activation: exp
 29 |     # density_blob_scale: 5.
 30 |     # density_blob_std: 0.2
 31 | 
 32 |     # use Magic3D density initialization instead
 33 |     density_bias: "blob_magic3d"
 34 |     density_activation: softplus
 35 |     density_blob_scale: 10.
 36 |     density_blob_std: 0.5
 37 | 
 38 |     # coarse to fine hash grid encoding
 39 |     # to ensure smooth analytic normals
 40 |     pos_encoding_config:
 41 |       otype: ProgressiveBandHashGrid
 42 |       n_levels: 16
 43 |       n_features_per_level: 2
 44 |       log2_hashmap_size: 19
 45 |       base_resolution: 16
 46 |       per_level_scale: 1.381912879967776 # max resolution 2048
 47 |       start_level: 10 # resolution ~300
 48 |       start_step: 2000
 49 |       update_steps: 400
 50 | 
 51 |   material_type: "diffuse-with-point-light-material"
 52 |   material:
 53 |     ambient_only_steps: 2001
 54 |     albedo_activation: sigmoid
 55 | 
 56 |   background_type: "neural-environment-map-background"
 57 |   background:
 58 |     color_activation: sigmoid
 59 |     random_aug: true
 60 | 
 61 |   renderer_type: "nerf-volume-renderer"
 62 |   renderer:
 63 |     radius: ${system.geometry.radius}
 64 |     num_samples_per_ray: 512
 65 | 
 66 |   prompt_processor_type: "stable-diffusion-prompt-processor"
 67 |   prompt_processor:
 68 |     pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base"
 69 |     prompt: ???
 70 | 
 71 |   guidance_type: "stable-diffusion-guidance"
 72 |   guidance:
 73 |     pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base"
 74 |     guidance_scale: 100.
 75 |     weighting_strategy: sds
 76 |     min_step_percent: 0.02
 77 |     max_step_percent: 0.98
 78 |     grad_clip: [0, 0.5, 2.0, 5000]
 79 | 
 80 |   exporter_type: "mesh-exporter"
 81 |   exporter:
 82 |     fmt: obj
 83 |     save_uv: false
 84 |     context_type: cuda
 85 | 
 86 |   loggers:
 87 |     wandb:
 88 |       enable: false
 89 |       project: "threestudio"
 90 |       name: None
 91 | 
 92 |   loss:
 93 |     lambda_sds: 1.
 94 |     lambda_orient: [0, 10., 1000., 5000]
 95 |     lambda_sparsity: 1.
 96 |     lambda_opaque: 0.
 97 |   optimizer:
 98 |     name: Adam
 99 |     args:
100 |       lr: 0.01
101 |       betas: [0.9, 0.99]
102 |       eps: 1.e-15
103 |     params:
104 |       geometry:
105 |         lr: 0.01
106 |       background:
107 |         lr: 0.001
108 | 
109 | trainer:
110 |   max_steps: 5000
111 |   log_every_n_steps: 1
112 |   num_sanity_val_steps: 0
113 |   val_check_interval: 100
114 |   enable_progress_bar: true
115 |   precision: 16-mixed
116 | 
117 | checkpoint:
118 |   save_last: false
119 |   save_top_k: -1
120 |   every_n_train_steps: 0 # do not save checkpoints during training
121 | 


--------------------------------------------------------------------------------
/configs/gradio/fantasia3d.yaml:
--------------------------------------------------------------------------------
  1 | name: "fantasia3d"
  2 | tag: "${rmspace:${system.prompt_processor.prompt},_}"
  3 | exp_root_dir: "outputs-gradio"
  4 | seed: 0
  5 | 
  6 | data_type: "random-camera-datamodule"
  7 | data:
  8 |   batch_size: 1
  9 |   width: 512
 10 |   height: 512
 11 |   camera_distance_range: [3, 3]
 12 |   fovy_range: [25, 45]
 13 |   camera_perturb: 0.
 14 |   center_perturb: 0.
 15 |   up_perturb: 0.
 16 |   elevation_range: [-10, 45]
 17 |   azimuth_range: [-180, 180]
 18 |   batch_uniform_azimuth: true
 19 |   eval_camera_distance: 3.
 20 |   eval_fovy_deg: 45.
 21 | 
 22 | system_type: "fantasia3d-system"
 23 | system:
 24 |   latent_steps: 1000
 25 |   geometry_type: "implicit-sdf"
 26 |   geometry:
 27 |     radius: 1.0
 28 |     n_feature_dims: 0
 29 |     isosurface_resolution: 128
 30 |     isosurface_deformable_grid: true
 31 |     isosurface_coarse_to_fine: false
 32 | 
 33 |     # initialize SDF by optimization
 34 |     shape_init: sphere
 35 |     shape_init_params: 0.5
 36 | 
 37 |     # or you can initialize SDF using a guide mesh
 38 |     # shape_init: mesh:load/shapes/human.obj
 39 |     # shape_init_params: 0.9
 40 |     # shape_init_mesh_up: +y
 41 |     # shape_init_mesh_front: +z
 42 | 
 43 |     # an alternative initialization implementation:
 44 |     # you can initialize SDF to sphere/ellipsoid by adding a bias value
 45 |     # which leads to more smooth initialized shape
 46 |     # sdf_bias: sphere
 47 |     # sdf_bias_params: 0.5
 48 |     # DO NOT use the two initialization methods together
 49 | 
 50 |   material_type: "no-material" # unused
 51 |   material:
 52 |     n_output_dims: 0
 53 | 
 54 |   background_type: "solid-color-background" # unused
 55 | 
 56 |   renderer_type: "nvdiff-rasterizer"
 57 |   renderer:
 58 |     context_type: cuda
 59 | 
 60 |   prompt_processor_type: "stable-diffusion-prompt-processor"
 61 |   prompt_processor:
 62 |     pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base"
 63 |     prompt: ???
 64 | 
 65 |   guidance_type: "stable-diffusion-guidance"
 66 |   guidance:
 67 |     pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base"
 68 |     guidance_scale: 100.
 69 |     max_step_percent: 0.5
 70 |     weighting_strategy: fantasia3d
 71 | 
 72 |   exporter_type: "mesh-exporter"
 73 |   exporter:
 74 |     fmt: obj
 75 |     save_uv: false
 76 |     save_texture: false
 77 |     context_type: cuda
 78 | 
 79 |   loggers:
 80 |     wandb:
 81 |       enable: false
 82 |       project: "threestudio"
 83 |       name: None
 84 | 
 85 |   loss:
 86 |     lambda_sds: 1.
 87 |     lambda_normal_consistency: 0.
 88 | 
 89 |   optimizer:
 90 |     name: AdamW
 91 |     args:
 92 |       lr: 0.001
 93 |       betas: [0.9, 0.99]
 94 |       eps: 1.e-15
 95 | 
 96 | trainer:
 97 |   max_steps: 5000
 98 |   log_every_n_steps: 1
 99 |   num_sanity_val_steps: 1
100 |   val_check_interval: 200
101 |   enable_progress_bar: true
102 |   precision: 16-mixed
103 | 
104 | checkpoint:
105 |   save_last: false
106 |   save_top_k: -1
107 |   every_n_train_steps: 0 # do not save checkpoints during training
108 | 


--------------------------------------------------------------------------------
/configs/gradio/latentnerf.yaml:
--------------------------------------------------------------------------------
  1 | name: "latentnerf"
  2 | tag: "${rmspace:${system.prompt_processor.prompt},_}"
  3 | exp_root_dir: "outputs-gradio"
  4 | seed: 0
  5 | 
  6 | data_type: "random-camera-datamodule"
  7 | data:
  8 |   elevation_range: [-10, 45]
  9 | 
 10 | system_type: "latentnerf-system"
 11 | system:
 12 |   geometry_type: "implicit-volume"
 13 |   geometry:
 14 |     n_feature_dims: 4
 15 |     normal_type: null
 16 | 
 17 |     density_bias: "blob_dreamfusion"
 18 |     density_activation: trunc_exp
 19 |     density_blob_scale: 5.
 20 |     density_blob_std: 0.2
 21 | 
 22 |     pos_encoding_config:
 23 |       otype: HashGrid
 24 |       n_levels: 16
 25 |       n_features_per_level: 2
 26 |       log2_hashmap_size: 19
 27 |       base_resolution: 16
 28 |       per_level_scale: 1.381912879967776 # max resolution 2048
 29 | 
 30 |   material_type: "no-material"
 31 |   material:
 32 |     n_output_dims: 4
 33 |     color_activation: none
 34 | 
 35 |   background_type: "neural-environment-map-background"
 36 |   background:
 37 |     n_output_dims: 4
 38 |     color_activation: none
 39 | 
 40 |   renderer_type: "nerf-volume-renderer"
 41 |   renderer:
 42 |     num_samples_per_ray: 512
 43 | 
 44 |   prompt_processor_type: "stable-diffusion-prompt-processor"
 45 |   prompt_processor:
 46 |     pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base"
 47 |     prompt: ???
 48 | 
 49 |   guidance_type: "stable-diffusion-guidance"
 50 |   guidance:
 51 |     pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base"
 52 |     guidance_scale: 100.
 53 |     weighting_strategy: sds
 54 |     grad_clip: [0, 2.0, 8.0, 5000]
 55 | 
 56 |   exporter_type: "dummy-exporter"
 57 | 
 58 |   loggers:
 59 |     wandb:
 60 |       enable: false
 61 |       project: "threestudio"
 62 |       name: None
 63 | 
 64 |   loss:
 65 |     lambda_sds: 1.
 66 |     lambda_sparsity: 5.e-4
 67 |     lambda_opaque: 0.0
 68 |     lambda_orient: 0.0
 69 |   optimizer:
 70 |     name: Adam
 71 |     args:
 72 |       lr: 0.01
 73 |       betas: [0.9, 0.99]
 74 |       eps: 1.e-15
 75 |   scheduler:
 76 |     name: SequentialLR
 77 |     interval: step
 78 |     warmup_steps: 100
 79 |     milestones:
 80 |       - ${system.scheduler.warmup_steps}
 81 |     schedulers:
 82 |       - name: LinearLR # linear warm-up in the first system.warmup_steps steps
 83 |         args:
 84 |           start_factor: 0.1
 85 |           end_factor: 1.0
 86 |           total_iters: ${system.scheduler.warmup_steps}
 87 |       - name: ExponentialLR
 88 |         args:
 89 |           gamma: ${calc_exp_lr_decay_rate:0.1,${sub:${trainer.max_steps},${system.scheduler.warmup_steps}}}
 90 | 
 91 | trainer:
 92 |   max_steps: 5000
 93 |   log_every_n_steps: 1
 94 |   num_sanity_val_steps: 0
 95 |   val_check_interval: 200
 96 |   enable_progress_bar: true
 97 |   precision: 16-mixed
 98 | 
 99 | checkpoint:
100 |   save_last: false
101 |   save_top_k: -1
102 |   every_n_train_steps: 0 # do not save checkpoints during training
103 | 


--------------------------------------------------------------------------------
/configs/gradio/sjc.yaml:
--------------------------------------------------------------------------------
 1 | name: sjc
 2 | tag: "${rmspace:${system.prompt_processor.prompt},_}"
 3 | exp_root_dir: "outputs-gradio"
 4 | seed: 0
 5 | 
 6 | data_type: random-camera-datamodule
 7 | data:
 8 |   camera_distance_range: [1.50, 1.50]
 9 |   elevation_range: [-10, 45]
10 |   camera_perturb: 0.0
11 |   center_perturb: 0.0
12 |   up_perturb: 0.0
13 |   light_position_perturb: 0.0
14 |   eval_elevation_deg: 20.0
15 | 
16 | system_type: sjc-system
17 | system:
18 |   subpixel_rendering: false
19 | 
20 |   geometry_type: volume-grid
21 |   geometry:
22 |     normal_type: null
23 |     grid_size: [100, 100, 100]
24 |     density_bias: -1.0
25 |     n_feature_dims: 4
26 | 
27 |   material_type: no-material
28 |   material:
29 |     n_output_dims: 4
30 |     color_activation: none
31 | 
32 |   background_type: textured-background
33 |   background:
34 |     n_output_dims: 4
35 |     color_activation: none
36 |     height: 4
37 |     width: 4
38 | 
39 |   renderer_type: nerf-volume-renderer
40 |   renderer:
41 |     num_samples_per_ray: 512
42 |     grid_prune: false
43 | 
44 |   prompt_processor_type: stable-diffusion-prompt-processor
45 |   prompt_processor:
46 |     pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base"
47 |     prompt: ???
48 |     view_dependent_prompt_front: true
49 | 
50 |   guidance_type: stable-diffusion-guidance
51 |   guidance:
52 |     pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base"
53 |     guidance_scale: 100.
54 |     use_sjc: true
55 |     var_red: true
56 |     min_step_percent: 0.01
57 |     max_step_percent: 0.97
58 |     grad_clip: [0, 2.0, 8.0, 5000]
59 | 
60 |   exporter_type: "dummy-exporter"
61 | 
62 |   loggers:
63 |     wandb:
64 |       enable: false
65 |       project: "threestudio"
66 |       name: None
67 | 
68 |   loss:
69 |     lambda_sds: 1.
70 |     center_ratio: 0.78125 # = 50 / 64
71 |     lambda_depth: 0 # or try 10
72 |     lambda_emptiness: [5000, 1.e+4, 2.e+5, 5001]
73 |     emptiness_scale: 10
74 | 
75 |   optimizer:
76 |     name: Adamax
77 |     args:
78 |       lr: 0.05
79 |     params:
80 |       geometry:
81 |         lr: 0.05
82 |       background:
83 |         lr: 0.0001 # maybe 0.001/0.01 is better
84 | 
85 | trainer:
86 |   max_steps: 5000
87 |   log_every_n_steps: 1
88 |   num_sanity_val_steps: 0
89 |   val_check_interval: 200
90 |   enable_progress_bar: true
91 |   precision: 16-mixed
92 | 
93 | checkpoint:
94 |   save_last: false
95 |   save_top_k: -1
96 |   every_n_train_steps: 0 # do not save checkpoints during training
97 | 


--------------------------------------------------------------------------------
/configs/gradio/textmesh-if.yaml:
--------------------------------------------------------------------------------
  1 | name: "textmesh-if"
  2 | tag: "${rmspace:${system.prompt_processor.prompt},_}"
  3 | exp_root_dir: "outputs-gradio"
  4 | seed: 0
  5 | 
  6 | data_type: "random-camera-datamodule"
  7 | data:
  8 |   batch_size: 1
  9 |   width: 64
 10 |   height: 64
 11 |   camera_distance_range: [1.5, 2.0]
 12 |   fovy_range: [40, 70]
 13 |   elevation_range: [-10, 90]
 14 |   light_sample_strategy: "dreamfusion"
 15 |   eval_camera_distance: 2.0
 16 |   eval_fovy_deg: 70.
 17 | 
 18 | system_type: "textmesh-system"
 19 | system:
 20 |   geometry_type: "implicit-sdf"
 21 |   geometry:
 22 |     radius: 2.0
 23 |     normal_type: finite_difference
 24 |     # progressive eps from Neuralangelo
 25 |     finite_difference_normal_eps: progressive
 26 | 
 27 |     sdf_bias: sphere
 28 |     sdf_bias_params: 0.5
 29 | 
 30 |     # coarse to fine hash grid encoding
 31 |     pos_encoding_config:
 32 |       otype: ProgressiveBandHashGrid
 33 |       n_levels: 16
 34 |       n_features_per_level: 2
 35 |       log2_hashmap_size: 19
 36 |       base_resolution: 16
 37 |       per_level_scale: 1.381912879967776 # max resolution 2048
 38 |       start_level: 10 # resolution ~300
 39 |       start_step: 2000
 40 |       update_steps: 400
 41 | 
 42 |   material_type: "diffuse-with-point-light-material"
 43 |   material:
 44 |     ambient_only_steps: 2001
 45 |     albedo_activation: sigmoid
 46 | 
 47 |   background_type: "neural-environment-map-background"
 48 |   background:
 49 |     color_activation: sigmoid
 50 |     random_aug: true
 51 | 
 52 |   renderer_type: "neus-volume-renderer"
 53 |   renderer:
 54 |     radius: ${system.geometry.radius}
 55 |     num_samples_per_ray: 512
 56 |     cos_anneal_end_steps: ${trainer.max_steps}
 57 |     eval_chunk_size: 8192
 58 | 
 59 |   prompt_processor_type: "deep-floyd-prompt-processor"
 60 |   prompt_processor:
 61 |     pretrained_model_name_or_path: "DeepFloyd/IF-I-XL-v1.0"
 62 |     prompt: ???
 63 | 
 64 |   guidance_type: "deep-floyd-guidance"
 65 |   guidance:
 66 |     pretrained_model_name_or_path: "DeepFloyd/IF-I-XL-v1.0"
 67 |     guidance_scale: 20.
 68 |     weighting_strategy: sds
 69 |     min_step_percent: 0.02
 70 |     max_step_percent: 0.98
 71 | 
 72 |   exporter_type: "mesh-exporter"
 73 |   exporter:
 74 |     fmt: obj
 75 |     save_uv: false
 76 |     context_type: cuda
 77 | 
 78 |   loss:
 79 |     lambda_sds: 1.
 80 |     lambda_orient: 0.0
 81 |     lambda_sparsity: 0.0
 82 |     lambda_opaque: 0.0
 83 |     lambda_eikonal: 1000.
 84 |   optimizer:
 85 |     name: Adam
 86 |     args:
 87 |       betas: [0.9, 0.99]
 88 |       eps: 1.e-15
 89 |     params:
 90 |       geometry.encoding:
 91 |         lr: 0.01
 92 |       geometry.sdf_network:
 93 |         lr: 0.001
 94 |       geometry.feature_network:
 95 |         lr: 0.001
 96 |       background:
 97 |         lr: 0.001
 98 |       renderer:
 99 |         lr: 0.001
100 | 
101 | trainer:
102 |   max_steps: 5000
103 |   log_every_n_steps: 1
104 |   num_sanity_val_steps: 0
105 |   val_check_interval: 100
106 |   enable_progress_bar: true
107 |   precision: 16-mixed
108 | 
109 | checkpoint:
110 |   save_last: false
111 |   save_top_k: -1
112 |   every_n_train_steps: 0 # do not save checkpoints during training
113 | 


--------------------------------------------------------------------------------
/configs/instructnerf2nerf.yaml:
--------------------------------------------------------------------------------
  1 | name: "instructnerf2nerf"
  2 | tag: "${basename:${data.dataroot}}_${rmspace:${system.prompt_processor.prompt},_}"
  3 | exp_root_dir: "outputs"
  4 | seed: 0
  5 | 
  6 | data_type: "multiview-camera-datamodule"
  7 | data:
  8 |   train_downsample_resolution: 2
  9 |   eval_downsample_resolution: 2
 10 |   dataroot: ???
 11 | 
 12 | system_type: "instructnerf2nerf-system"
 13 | system:
 14 |   start_editing_step: 600
 15 |   per_editing_step: 10
 16 | 
 17 |   geometry_type: "implicit-volume"
 18 |   geometry:
 19 |     radius: 1.
 20 |     normal_type: analytic
 21 | 
 22 |     pos_encoding_config:
 23 |       otype: HashGrid
 24 |       n_levels: 16
 25 |       n_features_per_level: 2
 26 |       log2_hashmap_size: 19
 27 |       base_resolution: 16
 28 |       per_level_scale: 1.4472692374403782 # max resolution 4096
 29 | 
 30 |     density_bias: "blob_magic3d"
 31 |     density_activation: softplus
 32 |     density_blob_scale: 10.
 33 |     density_blob_std: 0.5
 34 | 
 35 | 
 36 |   material_type: "diffuse-with-point-light-material"
 37 |   material:
 38 |     ambient_only_steps: 9999999
 39 |     albedo_activation: sigmoid
 40 | 
 41 |   background_type: "neural-environment-map-background"
 42 |   background:
 43 |     color_activation: sigmoid
 44 |     random_aug: false
 45 | 
 46 |   renderer_type: "patch-renderer"
 47 |   renderer:
 48 |     base_renderer_type: "nerf-volume-renderer"
 49 |     base_renderer:
 50 |       radius: ${system.geometry.radius}
 51 |       num_samples_per_ray: 384
 52 |     patch_size: 128
 53 | 
 54 |   guidance_type: "stable-diffusion-instructpix2pix-guidance"
 55 |   guidance:
 56 |     min_step_percent: 0.02
 57 |     max_step_percent: 0.98
 58 | 
 59 |   prompt_processor_type: "stable-diffusion-prompt-processor"
 60 |   prompt_processor:
 61 |     pretrained_model_name_or_path: "runwayml/stable-diffusion-v1-5"
 62 |     prompt: "Turn him into Elon Musk"
 63 | 
 64 |   loggers:
 65 |     wandb:
 66 |       enable: false
 67 |       project: 'threestudio'
 68 | 
 69 |   loss:
 70 |     lambda_sds: 0.
 71 |     lambda_orient: [0, 10.0, 1000., 5000.0]
 72 |     lambda_sparsity: 1.0
 73 |     lambda_opaque: 1.0
 74 |     lambda_l1: 10.
 75 |     lambda_p: 10.
 76 |   optimizer:
 77 |     name: Adam
 78 |     args:
 79 |       lr: 0.01
 80 |       betas: [0.9, 0.99]
 81 |       eps: 1.e-15
 82 |     params:
 83 |       geometry:
 84 |         lr: 0.01
 85 |       background:
 86 |         lr: 0.001
 87 | 
 88 | trainer:
 89 |   max_steps: 20000
 90 |   log_every_n_steps: 1
 91 |   num_sanity_val_steps: 0
 92 |   val_check_interval: 600
 93 |   enable_progress_bar: true
 94 |   precision: 16-mixed
 95 | 
 96 | checkpoint:
 97 |   save_last: true
 98 |   save_top_k: -1
 99 |   every_n_train_steps: ${trainer.max_steps}
100 | 


--------------------------------------------------------------------------------
/configs/latentnerf-refine.yaml:
--------------------------------------------------------------------------------
 1 | name: "latentnerf-refine"
 2 | tag: "${rmspace:${system.prompt_processor.prompt},_}"
 3 | exp_root_dir: "outputs"
 4 | seed: 0
 5 | 
 6 | data_type: "random-camera-datamodule"
 7 | data:
 8 |   elevation_range: [-10, 45]
 9 | 
10 | system_type: "latentnerf-system"
11 | system:
12 |   refinement: true
13 |   weights: ???
14 |   weights_ignore_modules: ["material", "background"]
15 | 
16 |   geometry_type: "implicit-volume"
17 |   geometry:
18 |     n_feature_dims: 4
19 |     normal_type: null
20 | 
21 |     density_bias: "blob_dreamfusion"
22 |     density_activation: trunc_exp
23 |     density_blob_scale: 5.
24 |     density_blob_std: 0.2
25 | 
26 |   material_type: "sd-latent-adapter-material"
27 | 
28 |   background_type: "neural-environment-map-background"
29 | 
30 |   renderer_type: "nerf-volume-renderer"
31 |   renderer:
32 |     num_samples_per_ray: 512
33 | 
34 |   prompt_processor_type: "stable-diffusion-prompt-processor"
35 |   prompt_processor:
36 |     pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base"
37 |     prompt: ???
38 | 
39 |   guidance_type: "stable-diffusion-guidance"
40 |   guidance:
41 |     pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base"
42 |     guidance_scale: 100.
43 |     weighting_strategy: sds
44 | 
45 |   loggers:
46 |     wandb:
47 |       enable: false
48 |       project: "threestudio"
49 |       name: None
50 | 
51 |   loss:
52 |     lambda_sds: 1.
53 |     lambda_sparsity: 5.e-4
54 |     lambda_opaque: 0.0
55 |     lambda_orient: 0.0
56 |   optimizer:
57 |     name: Adam
58 |     args:
59 |       lr: 0.01
60 |       betas: [0.9, 0.99]
61 |       eps: 1.e-15
62 |   scheduler:
63 |     name: SequentialLR
64 |     interval: step
65 |     warmup_steps: 100
66 |     milestones:
67 |       - ${system.scheduler.warmup_steps}
68 |     schedulers:
69 |       - name: LinearLR # linear warm-up in the first system.warmup_steps steps
70 |         args:
71 |           start_factor: 0.1
72 |           end_factor: 1.0
73 |           total_iters: ${system.scheduler.warmup_steps}
74 |       - name: ExponentialLR
75 |         args:
76 |           gamma: ${calc_exp_lr_decay_rate:0.1,${sub:${trainer.max_steps},${system.scheduler.warmup_steps}}}
77 | 
78 | trainer:
79 |   max_steps: 10000
80 |   log_every_n_steps: 1
81 |   num_sanity_val_steps: 1
82 |   val_check_interval: 200
83 |   enable_progress_bar: true
84 |   precision: 16-mixed
85 | 
86 | checkpoint:
87 |   save_last: true # save at each validation time
88 |   save_top_k: -1
89 |   every_n_train_steps: ${trainer.max_steps}
90 | 


--------------------------------------------------------------------------------
/configs/latentnerf.yaml:
--------------------------------------------------------------------------------
 1 | name: "latentnerf"
 2 | tag: "${rmspace:${system.prompt_processor.prompt},_}"
 3 | exp_root_dir: "outputs"
 4 | seed: 0
 5 | 
 6 | data_type: "random-camera-datamodule"
 7 | data:
 8 |   elevation_range: [-10, 45]
 9 | 
10 | system_type: "latentnerf-system"
11 | system:
12 |   geometry_type: "implicit-volume"
13 |   geometry:
14 |     n_feature_dims: 4
15 |     normal_type: null
16 | 
17 |     density_bias: "blob_dreamfusion"
18 |     density_activation: trunc_exp
19 |     density_blob_scale: 5.
20 |     density_blob_std: 0.2
21 | 
22 |   material_type: "no-material"
23 |   material:
24 |     n_output_dims: 4
25 |     color_activation: none
26 | 
27 |   background_type: "neural-environment-map-background"
28 |   background:
29 |     n_output_dims: 4
30 |     color_activation: none
31 | 
32 |   renderer_type: "nerf-volume-renderer"
33 |   renderer:
34 |     num_samples_per_ray: 512
35 | 
36 |   prompt_processor_type: "stable-diffusion-prompt-processor"
37 |   prompt_processor:
38 |     pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base"
39 |     prompt: ???
40 | 
41 |   guidance_type: "stable-diffusion-guidance"
42 |   guidance:
43 |     pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base"
44 |     guidance_scale: 100.
45 |     weighting_strategy: sds
46 | 
47 |   loggers:
48 |     wandb:
49 |       enable: false
50 |       project: "threestudio"
51 |       name: None
52 | 
53 |   loss:
54 |     lambda_sds: 1.
55 |     lambda_sparsity: 5.e-4
56 |     lambda_opaque: 0.0
57 |     lambda_orient: 0.0
58 |   optimizer:
59 |     name: Adam
60 |     args:
61 |       lr: 0.01
62 |       betas: [0.9, 0.99]
63 |       eps: 1.e-15
64 |   scheduler:
65 |     name: SequentialLR
66 |     interval: step
67 |     warmup_steps: 100
68 |     milestones:
69 |       - ${system.scheduler.warmup_steps}
70 |     schedulers:
71 |       - name: LinearLR # linear warm-up in the first system.warmup_steps steps
72 |         args:
73 |           start_factor: 0.1
74 |           end_factor: 1.0
75 |           total_iters: ${system.scheduler.warmup_steps}
76 |       - name: ExponentialLR
77 |         args:
78 |           gamma: ${calc_exp_lr_decay_rate:0.1,${sub:${trainer.max_steps},${system.scheduler.warmup_steps}}}
79 | 
80 | trainer:
81 |   max_steps: 10000
82 |   log_every_n_steps: 1
83 |   num_sanity_val_steps: 0
84 |   val_check_interval: 200
85 |   enable_progress_bar: true
86 |   precision: 16-mixed
87 | 
88 | checkpoint:
89 |   save_last: true # save at each validation time
90 |   save_top_k: -1
91 |   every_n_train_steps: ${trainer.max_steps}
92 | 


--------------------------------------------------------------------------------
/configs/magic3d-coarse-if.yaml:
--------------------------------------------------------------------------------
 1 | name: "magic3d-coarse-if"
 2 | tag: "${rmspace:${system.prompt_processor.prompt},_}"
 3 | exp_root_dir: "outputs"
 4 | seed: 0
 5 | 
 6 | data_type: "random-camera-datamodule"
 7 | data:
 8 |   width: 64
 9 |   height: 64
10 |   camera_distance_range: [1.5, 2.0]
11 |   light_sample_strategy: "magic3d"
12 |   eval_camera_distance: 2.0
13 |   eval_fovy_deg: 70.
14 | 
15 | system_type: "magic3d-system"
16 | system:
17 |   geometry_type: "implicit-volume"
18 |   geometry:
19 |     radius: 2.
20 |     normal_type: analytic
21 |     pos_encoding_config:
22 |       otype: HashGrid
23 |       n_levels: 16
24 |       n_features_per_level: 2
25 |       log2_hashmap_size: 19
26 |       base_resolution: 16
27 |       per_level_scale: 1.4472692374403782 # max resolution 4096
28 |     density_bias: "blob_magic3d"
29 |     density_activation: softplus
30 |     density_blob_scale: 10.
31 |     density_blob_std: 0.5
32 |     isosurface_resolution: 128
33 |     isosurface_threshold: auto
34 |     isosurface_coarse_to_fine: true
35 | 
36 |   material_type: "diffuse-with-point-light-material"
37 |   material:
38 |     ambient_only_steps: 2001
39 |     soft_shading: true
40 | 
41 |   background_type: "neural-environment-map-background"
42 | 
43 |   renderer_type: "nerf-volume-renderer"
44 |   renderer:
45 |     radius: ${system.geometry.radius}
46 |     num_samples_per_ray: 512
47 | 
48 |   prompt_processor_type: "deep-floyd-prompt-processor"
49 |   prompt_processor:
50 |     pretrained_model_name_or_path: "DeepFloyd/IF-I-XL-v1.0"
51 |     prompt: ???
52 | 
53 |   guidance_type: "deep-floyd-guidance"
54 |   guidance:
55 |     pretrained_model_name_or_path: "DeepFloyd/IF-I-XL-v1.0"
56 |     weighting_strategy: uniform
57 |     guidance_scale: 20.
58 |     min_step_percent: 0.02
59 |     max_step_percent: 0.98
60 | 
61 |   loggers:
62 |     wandb:
63 |       enable: false
64 |       project: 'threestudio'
65 |       name: None
66 | 
67 |   loss:
68 |     lambda_sds: 1.
69 |     lambda_orient: [0, 10., 1000., 5000]
70 |     lambda_sparsity: 1.
71 |     lambda_opaque: 0.
72 |   optimizer:
73 |     name: Adam
74 |     args:
75 |       lr: 0.01
76 |       betas: [0.9, 0.99]
77 |       eps: 1.e-15
78 |     params:
79 |       geometry:
80 |         lr: 0.01
81 |       background:
82 |         lr: 0.001
83 | 
84 | trainer:
85 |   max_steps: 10000
86 |   log_every_n_steps: 1
87 |   num_sanity_val_steps: 0
88 |   val_check_interval: 200
89 |   enable_progress_bar: true
90 |   precision: 16-mixed
91 | 
92 | checkpoint:
93 |   save_last: true
94 |   save_top_k: -1
95 |   every_n_train_steps: ${trainer.max_steps}
96 | 


--------------------------------------------------------------------------------
/configs/magic3d-coarse-sd.yaml:
--------------------------------------------------------------------------------
 1 | name: "magic3d-coarse-sd"
 2 | tag: "${rmspace:${system.prompt_processor.prompt},_}"
 3 | exp_root_dir: "outputs"
 4 | seed: 0
 5 | 
 6 | data_type: "random-camera-datamodule"
 7 | data:
 8 |   width: 64
 9 |   height: 64
10 |   camera_distance_range: [1.5, 2.0]
11 |   elevation_range: [-10, 45]
12 |   light_sample_strategy: "magic3d"
13 |   eval_camera_distance: 2.0
14 |   eval_fovy_deg: 70.
15 | 
16 | system_type: "magic3d-system"
17 | system:
18 |   geometry_type: "implicit-volume"
19 |   geometry:
20 |     radius: 2.
21 |     normal_type: analytic
22 |     pos_encoding_config:
23 |       otype: HashGrid
24 |       n_levels: 16
25 |       n_features_per_level: 2
26 |       log2_hashmap_size: 19
27 |       base_resolution: 16
28 |       per_level_scale: 1.4472692374403782 # max resolution 4096
29 |     density_bias: "blob_magic3d"
30 |     density_activation: softplus
31 |     density_blob_scale: 10.
32 |     density_blob_std: 0.5
33 |     isosurface_resolution: 128
34 |     isosurface_threshold: auto
35 |     isosurface_coarse_to_fine: true
36 | 
37 |   material_type: "diffuse-with-point-light-material"
38 |   material:
39 |     ambient_only_steps: 2001
40 |     soft_shading: true
41 | 
42 |   background_type: "neural-environment-map-background"
43 | 
44 |   renderer_type: "nerf-volume-renderer"
45 |   renderer:
46 |     radius: ${system.geometry.radius}
47 |     num_samples_per_ray: 512
48 | 
49 |   prompt_processor_type: "stable-diffusion-prompt-processor"
50 |   prompt_processor:
51 |     pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base"
52 |     prompt: ???
53 | 
54 |   guidance_type: "stable-diffusion-guidance"
55 |   guidance:
56 |     pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base"
57 |     weighting_strategy: uniform
58 |     guidance_scale: 100.
59 |     min_step_percent: 0.02
60 |     max_step_percent: 0.98
61 | 
62 |   loggers:
63 |     wandb:
64 |       enable: false
65 |       project: "threestudio"
66 |       name: None
67 | 
68 |   loss:
69 |     lambda_sds: 1.
70 |     lambda_orient: [0, 10., 1000., 5000]
71 |     lambda_sparsity: 1.
72 |     lambda_opaque: 0.
73 |   optimizer:
74 |     name: Adam
75 |     args:
76 |       lr: 0.01
77 |       betas: [0.9, 0.99]
78 |       eps: 1.e-15
79 |     params:
80 |       geometry:
81 |         lr: 0.01
82 |       background:
83 |         lr: 0.001
84 | 
85 | trainer:
86 |   max_steps: 10000
87 |   log_every_n_steps: 1
88 |   num_sanity_val_steps: 0
89 |   val_check_interval: 200
90 |   enable_progress_bar: true
91 |   precision: 16-mixed
92 | 
93 | checkpoint:
94 |   save_last: true
95 |   save_top_k: -1
96 |   every_n_train_steps: ${trainer.max_steps}
97 | 


--------------------------------------------------------------------------------
/configs/magic3d-refine-sd.yaml:
--------------------------------------------------------------------------------
 1 | name: "magic3d-refine-sd"
 2 | tag: "${rmspace:${system.prompt_processor.prompt},_}"
 3 | exp_root_dir: "outputs"
 4 | seed: 0
 5 | 
 6 | data_type: "random-camera-datamodule"
 7 | data:
 8 |   width: 512
 9 |   height: 512
10 |   camera_distance_range: [1.5, 2.0]
11 |   elevation_range: [-10, 45]
12 |   light_sample_strategy: "magic3d"
13 |   fovy_range: [30, 45]
14 |   eval_camera_distance: 2.0
15 |   eval_fovy_deg: 70.
16 | 
17 | system_type: "magic3d-system"
18 | system:
19 |   refinement: true
20 |   geometry_convert_from: ???
21 |   geometry_convert_inherit_texture: true
22 |   geometry_type: "tetrahedra-sdf-grid"
23 |   geometry:
24 |     radius: 2.0 # consistent with coarse
25 |     isosurface_resolution: 128
26 |     isosurface_deformable_grid: true
27 |     pos_encoding_config: # consistent with coarse, no progressive band
28 |       otype: HashGrid
29 |       n_levels: 16
30 |       n_features_per_level: 2
31 |       log2_hashmap_size: 19
32 |       base_resolution: 16
33 |       per_level_scale: 1.4472692374403782 # max resolution 4096
34 |     fix_geometry: false # optimize grid sdf and deformation
35 | 
36 |   material_type: "diffuse-with-point-light-material"
37 |   material:
38 |     ambient_only_steps: 0
39 |     soft_shading: true
40 | 
41 |   background_type: "neural-environment-map-background"
42 | 
43 |   renderer_type: "nvdiff-rasterizer"
44 |   renderer:
45 |     context_type: gl
46 | 
47 |   prompt_processor_type: "stable-diffusion-prompt-processor"
48 |   prompt_processor:
49 |     pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base"
50 |     prompt: ???
51 | 
52 |   guidance_type: "stable-diffusion-guidance"
53 |   guidance:
54 |     pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base"
55 |     weighting_strategy: sds
56 |     guidance_scale: 100.
57 |     min_step_percent: 0.02
58 |     max_step_percent: 0.5
59 | 
60 |   loggers:
61 |     wandb:
62 |       enable: false
63 |       project: "threestudio"
64 |       name: None
65 | 
66 |   loss:
67 |     lambda_sds: 1.
68 |     lambda_normal_consistency: 10000.
69 | 
70 |   optimizer:
71 |     name: Adam
72 |     args:
73 |       lr: 0.01
74 |       betas: [0.9, 0.99]
75 |       eps: 1.e-15
76 | 
77 | trainer:
78 |   max_steps: 5000
79 |   log_every_n_steps: 1
80 |   num_sanity_val_steps: 1
81 |   val_check_interval: 100
82 |   enable_progress_bar: true
83 |   precision: 16-mixed
84 | 
85 | checkpoint:
86 |   save_last: true
87 |   save_top_k: -1
88 |   every_n_train_steps: ${trainer.max_steps}
89 | 


--------------------------------------------------------------------------------
/configs/mvdream-sd21.yaml:
--------------------------------------------------------------------------------
  1 | name: "mvdream-sd21-rescale0.5"
  2 | tag: "${rmspace:${system.prompt_processor.prompt},_}"
  3 | exp_root_dir: "outputs"
  4 | seed: 0
  5 | 
  6 | data_type: "random-multiview-camera-datamodule"
  7 | data:
  8 |   batch_size: [8,4] # must be dividable by n_view
  9 |   n_view: 4
 10 |   # 0-4999: 64x64, >=5000: 256x256
 11 |   width: [64, 256]
 12 |   height: [64, 256]
 13 |   resolution_milestones: [5000]
 14 |   camera_distance_range: [0.8, 1.0] # relative
 15 |   fovy_range: [15, 60]
 16 |   elevation_range: [0, 30]
 17 |   camera_perturb: 0.
 18 |   center_perturb: 0.
 19 |   up_perturb: 0.
 20 |   n_val_views: 4
 21 |   eval_camera_distance: 3.0
 22 |   eval_fovy_deg: 40.
 23 | 
 24 | system_type: "mvdream-system"
 25 | system:
 26 |   geometry_type: "implicit-volume"
 27 |   geometry:
 28 |     radius: 1.0
 29 |     normal_type: null
 30 | 
 31 |     density_bias: "blob_magic3d"
 32 |     density_activation: softplus
 33 |     density_blob_scale: 10.
 34 |     density_blob_std: 0.5
 35 | 
 36 |     pos_encoding_config:
 37 |       otype: HashGrid
 38 |       n_levels: 16
 39 |       n_features_per_level: 2
 40 |       log2_hashmap_size: 19
 41 |       base_resolution: 16
 42 |       per_level_scale: 1.447269237440378 # max resolution 4096
 43 | 
 44 |   material_type: "no-material"
 45 |   material:
 46 |     n_output_dims: 3
 47 |     color_activation: sigmoid
 48 | 
 49 |   background_type: "neural-environment-map-background"
 50 |   background:
 51 |     color_activation: sigmoid
 52 |     random_aug: true
 53 |     share_aug_bg: true
 54 | 
 55 |   renderer_type: "nerf-volume-renderer"
 56 |   renderer:
 57 |     radius: ${system.geometry.radius}
 58 |     num_samples_per_ray: 512
 59 | 
 60 |   prompt_processor_type: "stable-diffusion-prompt-processor"
 61 |   prompt_processor:
 62 |     pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base"
 63 |     prompt: ???
 64 |     negative_prompt: "ugly, bad anatomy, blurry, pixelated obscure, unnatural colors, poor lighting, dull, and unclear, cropped, lowres, low quality, artifacts, duplicate, morbid, mutilated, poorly drawn face, deformed, dehydrated, bad proportions"
 65 |     front_threshold: 30.
 66 |     back_threshold: 30.
 67 | 
 68 |   guidance_type: "multiview-diffusion-guidance"
 69 |   guidance:
 70 |     model_name: "sd-v2.1-base-4view"
 71 |     ckpt_path: null # path to a pre-downloaded checkpoint file (null for loading from URL)
 72 |     guidance_scale: 50.0
 73 |     min_step_percent: [0, 0.98, 0.02, 8000]  # (start_iter, start_val, end_val, end_iter)
 74 |     max_step_percent: [0, 0.98, 0.50, 8000]
 75 |     recon_loss: true
 76 |     recon_std_rescale: 0.5
 77 | 
 78 |   loggers:
 79 |     wandb:
 80 |       enable: false
 81 |       project: "threestudio"
 82 | 
 83 |   loss:
 84 |     lambda_sds: 1.
 85 |     lambda_orient: 0.
 86 |     lambda_sparsity: 0.
 87 |     lambda_opaque: 0.
 88 |     lambda_z_variance: 0.
 89 |   optimizer:
 90 |     name: AdamW
 91 |     args:
 92 |       betas: [0.9, 0.99]
 93 |       eps: 1.e-15
 94 |     params:
 95 |       geometry.encoding:
 96 |         lr: 0.01
 97 |       geometry.density_network:
 98 |         lr: 0.001
 99 |       geometry.feature_network:
100 |         lr: 0.001
101 |       background:
102 |         lr: 0.001
103 | 
104 | trainer:
105 |   max_steps: 10000
106 |   log_every_n_steps: 1
107 |   num_sanity_val_steps: 0
108 |   val_check_interval: 200
109 |   enable_progress_bar: true
110 |   precision: 16-mixed
111 | 
112 | checkpoint:
113 |   save_last: true
114 |   save_top_k: -1
115 |   every_n_train_steps: ${trainer.max_steps}
116 | 


--------------------------------------------------------------------------------
/configs/prolificdreamer-geometry.yaml:
--------------------------------------------------------------------------------
 1 | name: "prolificdreamer-geometry"
 2 | tag: "${rmspace:${system.prompt_processor.prompt},_}"
 3 | exp_root_dir: "outputs"
 4 | seed: 0
 5 | 
 6 | data_type: "random-camera-datamodule"
 7 | data:
 8 |   batch_size: 1
 9 |   width: 512
10 |   height: 512
11 |   camera_distance_range: [1.0, 1.5]
12 |   fovy_range: [40, 70]
13 |   elevation_range: [-10, 45]
14 |   camera_perturb: 0.
15 |   center_perturb: 0.
16 |   up_perturb: 0.
17 |   eval_camera_distance: 1.5
18 |   eval_fovy_deg: 70.
19 | 
20 | system_type: "prolificdreamer-system"
21 | system:
22 |   stage: geometry
23 |   geometry_convert_from: ???
24 |   geometry_type: "tetrahedra-sdf-grid"
25 |   geometry:
26 |     radius: 1.0 # consistent with coarse
27 |     isosurface_resolution: 128
28 |     isosurface_deformable_grid: true
29 |     geometry_only: true
30 | 
31 |   material_type: "no-material" # unused
32 |   material:
33 |     n_output_dims: 0
34 | 
35 |   background_type: "solid-color-background" # unused
36 | 
37 |   renderer_type: "nvdiff-rasterizer"
38 |   renderer:
39 |     context_type: gl
40 | 
41 |   prompt_processor_type: "stable-diffusion-prompt-processor"
42 |   prompt_processor:
43 |     pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base"
44 |     prompt: lib:michelangelo_dog
45 | 
46 |   guidance_type: "stable-diffusion-guidance"
47 |   guidance:
48 |     pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base"
49 |     guidance_scale: 100.
50 |     min_step_percent: 0.02
51 |     max_step_percent: [5000, 0.98, 0.5, 5001] # annealed to 0.5 after 5000 steps
52 |     weighting_strategy: sds
53 | 
54 |   loggers:
55 |     wandb:
56 |       enable: false
57 |       project: "threestudio"
58 |       name: None
59 | 
60 |   loss:
61 |     lambda_sds: 1.
62 |     lambda_normal_consistency: 10000.
63 |     lambda_laplacian_smoothness: 10000.
64 | 
65 |   optimizer:
66 |     name: Adam
67 |     args:
68 |       lr: 0.005
69 |       betas: [0.9, 0.99]
70 |       eps: 1.e-15
71 | 
72 | trainer:
73 |   max_steps: 15000
74 |   log_every_n_steps: 1
75 |   num_sanity_val_steps: 1
76 |   val_check_interval: 200
77 |   enable_progress_bar: true
78 |   precision: 32
79 | 
80 | checkpoint:
81 |   save_last: true
82 |   save_top_k: -1
83 |   every_n_train_steps: ${trainer.max_steps}
84 | 


--------------------------------------------------------------------------------
/configs/prolificdreamer-patch.yaml:
--------------------------------------------------------------------------------
  1 | name: "prolificdreamer-patch"
  2 | tag: "${rmspace:${system.prompt_processor.prompt},_}"
  3 | exp_root_dir: "outputs"
  4 | seed: 0
  5 | 
  6 | data_type: "random-camera-datamodule"
  7 | data:
  8 |   batch_size: 1
  9 |   width: 512
 10 |   height: 512
 11 |   camera_distance_range: [1.0, 1.5]
 12 |   fovy_range: [40, 70]
 13 |   elevation_range: [-10, 45]
 14 |   camera_perturb: 0.
 15 |   center_perturb: 0.
 16 |   up_perturb: 0.
 17 |   eval_camera_distance: 1.5
 18 |   eval_fovy_deg: 70.
 19 | 
 20 | system_type: "prolificdreamer-system"
 21 | system:
 22 |   stage: coarse
 23 |   geometry_type: "implicit-volume"
 24 |   geometry:
 25 |     radius: 1.0
 26 |     normal_type: null
 27 | 
 28 |     density_bias: "blob_magic3d"
 29 |     density_activation: softplus
 30 |     density_blob_scale: 10.
 31 |     density_blob_std: 0.5
 32 | 
 33 |     pos_encoding_config:
 34 |       otype: HashGrid
 35 |       n_levels: 16
 36 |       n_features_per_level: 2
 37 |       log2_hashmap_size: 19
 38 |       base_resolution: 16
 39 |       per_level_scale: 1.447269237440378 # max resolution 4096
 40 | 
 41 |   material_type: "no-material"
 42 |   material:
 43 |     n_output_dims: 3
 44 |     color_activation: sigmoid
 45 | 
 46 |   background_type: "neural-environment-map-background"
 47 |   background:
 48 |     color_activation: sigmoid
 49 |     random_aug: true
 50 | 
 51 |   renderer_type: "patch-renderer"
 52 |   renderer:
 53 |     base_renderer_type: "nerf-volume-renderer"
 54 |     base_renderer:
 55 |       radius: ${system.geometry.radius}
 56 |       num_samples_per_ray: 512
 57 |     patch_size: 128
 58 | 
 59 |   prompt_processor_type: "stable-diffusion-prompt-processor"
 60 |   prompt_processor:
 61 |     pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base"
 62 |     prompt: ???
 63 |     front_threshold: 30.
 64 |     back_threshold: 30.
 65 | 
 66 |   guidance_type: "stable-diffusion-vsd-guidance"
 67 |   guidance:
 68 |     pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base"
 69 |     pretrained_model_name_or_path_lora: "stabilityai/stable-diffusion-2-1"
 70 |     guidance_scale: 7.5
 71 |     min_step_percent: 0.02
 72 |     max_step_percent: [5000, 0.98, 0.5, 5001] # annealed to 0.5 after 5000 steps
 73 | 
 74 |   loggers:
 75 |     wandb:
 76 |       enable: false
 77 |       project: "threestudio"
 78 | 
 79 |   loss:
 80 |     lambda_vsd: 1.
 81 |     lambda_lora: 1.
 82 |     lambda_orient: 0.
 83 |     lambda_sparsity: 10.
 84 |     lambda_opaque: [10000, 0.0, 1000.0, 10001]
 85 |     lambda_z_variance: 0.
 86 |   optimizer:
 87 |     name: AdamW
 88 |     args:
 89 |       betas: [0.9, 0.99]
 90 |       eps: 1.e-15
 91 |     params:
 92 |       geometry.encoding:
 93 |         lr: 0.01
 94 |       geometry.density_network:
 95 |         lr: 0.001
 96 |       geometry.feature_network:
 97 |         lr: 0.001
 98 |       background:
 99 |         lr: 0.001
100 |       guidance:
101 |         lr: 0.0001
102 | 
103 | trainer:
104 |   max_steps: 25000
105 |   log_every_n_steps: 1
106 |   num_sanity_val_steps: 0
107 |   val_check_interval: 200
108 |   enable_progress_bar: true
109 |   precision: 32
110 | 
111 | checkpoint:
112 |   save_last: true
113 |   save_top_k: -1
114 |   every_n_train_steps: ${trainer.max_steps}
115 | 


--------------------------------------------------------------------------------
/configs/prolificdreamer-scene.yaml:
--------------------------------------------------------------------------------
  1 | name: "prolificdreamer"
  2 | tag: "${rmspace:${system.prompt_processor.prompt},_}"
  3 | exp_root_dir: "outputs"
  4 | seed: 0
  5 | 
  6 | data_type: "random-camera-datamodule"
  7 | data:
  8 |   batch_size: [1, 1]
  9 |   # 0-4999: 64x64, >=5000: 512x512
 10 |   # this drastically reduces VRAM usage as empty space is pruned in early training
 11 |   width: [64, 512]
 12 |   height: [64, 512]
 13 |   resolution_milestones: [5000]
 14 |   camera_distance_range: [0.1, 2.3]
 15 |   fovy_range: [40, 70]
 16 |   elevation_range: [-10, 45]
 17 |   camera_perturb: 0.
 18 |   center_perturb: 0.
 19 |   up_perturb: 0.
 20 |   eval_camera_distance: 2.0
 21 |   eval_fovy_deg: 70.
 22 | 
 23 | system_type: "prolificdreamer-system"
 24 | system:
 25 |   stage: coarse
 26 |   geometry_type: "implicit-volume"
 27 |   geometry:
 28 |     radius: 5.0
 29 |     normal_type: null
 30 | 
 31 |     density_bias: "blob_magic3d"
 32 |     density_activation: softplus
 33 |     density_blob_scale: -10.
 34 |     density_blob_std: 2.5
 35 | 
 36 |     pos_encoding_config:
 37 |       otype: HashGrid
 38 |       n_levels: 16
 39 |       n_features_per_level: 2
 40 |       log2_hashmap_size: 19
 41 |       base_resolution: 16
 42 |       per_level_scale: 1.447269237440378 # max resolution 4096
 43 | 
 44 |   material_type: "no-material"
 45 |   material:
 46 |     n_output_dims: 3
 47 |     color_activation: sigmoid
 48 | 
 49 |   background_type: "neural-environment-map-background"
 50 |   background:
 51 |     color_activation: sigmoid
 52 | 
 53 |   renderer_type: "nerf-volume-renderer"
 54 |   renderer:
 55 |     radius: ${system.geometry.radius}
 56 |     num_samples_per_ray: 512
 57 | 
 58 |   prompt_processor_type: "stable-diffusion-prompt-processor"
 59 |   prompt_processor:
 60 |     pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base"
 61 |     prompt: ???
 62 | 
 63 |   guidance_type: "stable-diffusion-vsd-guidance"
 64 |   guidance:
 65 |     pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base"
 66 |     pretrained_model_name_or_path_lora: "stabilityai/stable-diffusion-2-1"
 67 |     guidance_scale: 7.5
 68 |     min_step_percent: 0.02
 69 |     max_step_percent: [10000, 0.98, 0.5, 10001] # annealed to 0.5 after 10000 steps
 70 |     view_dependent_prompting: false
 71 | 
 72 |   loggers:
 73 |     wandb:
 74 |       enable: false
 75 |       project: "threestudio"
 76 |       name: None
 77 | 
 78 |   loss:
 79 |     lambda_vsd: 1.
 80 |     lambda_lora: 1.
 81 |     lambda_orient: 0.
 82 |     lambda_sparsity: 0.
 83 |     lambda_opaque: 0.
 84 |     lambda_z_variance: 1.
 85 |   optimizer:
 86 |     name: AdamW
 87 |     args:
 88 |       betas: [0.9, 0.99]
 89 |       eps: 1.e-15
 90 |     params:
 91 |       geometry.encoding:
 92 |         lr: 0.01
 93 |       geometry.density_network:
 94 |         lr: 0.001
 95 |       geometry.feature_network:
 96 |         lr: 0.001
 97 |       background:
 98 |         lr: 0.001
 99 |       guidance:
100 |         lr: 0.0001
101 | 
102 | trainer:
103 |   max_steps: 25000
104 |   log_every_n_steps: 1
105 |   num_sanity_val_steps: 0
106 |   val_check_interval: 200
107 |   enable_progress_bar: true
108 |   precision: 32
109 | 
110 | checkpoint:
111 |   save_last: true
112 |   save_top_k: -1
113 |   every_n_train_steps: ${trainer.max_steps}
114 | 


--------------------------------------------------------------------------------
/configs/prolificdreamer-texture.yaml:
--------------------------------------------------------------------------------
  1 | name: "prolificdreamer-texture"
  2 | tag: "${rmspace:${system.prompt_processor.prompt},_}"
  3 | exp_root_dir: "outputs"
  4 | seed: 0
  5 | 
  6 | data_type: "random-camera-datamodule"
  7 | data:
  8 |   batch_size: 1
  9 |   width: 512
 10 |   height: 512
 11 |   camera_distance_range: [1.0, 1.5]
 12 |   fovy_range: [40, 70]
 13 |   elevation_range: [-10, 45]
 14 |   camera_perturb: 0.
 15 |   center_perturb: 0.
 16 |   up_perturb: 0.
 17 |   eval_camera_distance: 1.5
 18 |   eval_fovy_deg: 70.
 19 | 
 20 | system_type: "prolificdreamer-system"
 21 | system:
 22 |   stage: texture
 23 |   geometry_convert_from: ???
 24 |   geometry_type: "tetrahedra-sdf-grid"
 25 |   geometry:
 26 |     radius: 1.0 # consistent with last stage
 27 |     isosurface_resolution: 128 # consistent with last stage
 28 |     isosurface_deformable_grid: true
 29 |     isosurface_remove_outliers: true
 30 |     pos_encoding_config:
 31 |       otype: HashGrid
 32 |       n_levels: 16
 33 |       n_features_per_level: 2
 34 |       log2_hashmap_size: 19
 35 |       base_resolution: 16
 36 |       per_level_scale: 1.447269237440378 # max resolution 4096
 37 |     fix_geometry: true
 38 | 
 39 |   material_type: "no-material"
 40 |   material:
 41 |     n_output_dims: 3
 42 |     color_activation: sigmoid
 43 | 
 44 |   background_type: "neural-environment-map-background"
 45 |   background:
 46 |     color_activation: sigmoid
 47 | 
 48 |   renderer_type: "nvdiff-rasterizer"
 49 |   renderer:
 50 |     context_type: gl
 51 | 
 52 |   prompt_processor_type: "stable-diffusion-prompt-processor"
 53 |   prompt_processor:
 54 |     pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base"
 55 |     prompt: ???
 56 |     front_threshold: 30.
 57 |     back_threshold: 30.
 58 | 
 59 |   guidance_type: "stable-diffusion-vsd-guidance"
 60 |   guidance:
 61 |     pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base"
 62 |     pretrained_model_name_or_path_lora: "stabilityai/stable-diffusion-2-1"
 63 |     guidance_scale: 7.5
 64 |     min_step_percent: 0.02
 65 |     max_step_percent: [5000, 0.98, 0.5, 5001] # annealed to 0.5 after 5000 steps
 66 | 
 67 |   loggers:
 68 |     wandb:
 69 |       enable: false
 70 |       project: "threestudio"
 71 |       name: None
 72 | 
 73 |   loss:
 74 |     lambda_vsd: 1.
 75 |     lambda_lora: 1.
 76 |   optimizer:
 77 |     name: AdamW
 78 |     args:
 79 |       betas: [0.9, 0.99]
 80 |       eps: 1.e-15
 81 |     params:
 82 |       geometry.encoding:
 83 |         lr: 0.01
 84 |       geometry.feature_network:
 85 |         lr: 0.001
 86 |       background:
 87 |         lr: 0.001
 88 |       guidance:
 89 |         lr: 0.0001
 90 | 
 91 | trainer:
 92 |   max_steps: 30000
 93 |   log_every_n_steps: 1
 94 |   num_sanity_val_steps: 1
 95 |   val_check_interval: 200
 96 |   enable_progress_bar: true
 97 |   precision: 32
 98 | 
 99 | checkpoint:
100 |   save_last: true
101 |   save_top_k: -1
102 |   every_n_train_steps: ${trainer.max_steps}
103 | 


--------------------------------------------------------------------------------
/configs/prolificdreamer.yaml:
--------------------------------------------------------------------------------
  1 | name: "prolificdreamer"
  2 | tag: "${rmspace:${system.prompt_processor.prompt},_}"
  3 | exp_root_dir: "outputs"
  4 | seed: 0
  5 | 
  6 | data_type: "random-camera-datamodule"
  7 | data:
  8 |   batch_size: [1, 1]
  9 |   # 0-4999: 64x64, >=5000: 512x512
 10 |   # this drastically reduces VRAM usage as empty space is pruned in early training
 11 |   width: [64, 512]
 12 |   height: [64, 512]
 13 |   resolution_milestones: [5000]
 14 |   camera_distance_range: [1.0, 1.5]
 15 |   fovy_range: [40, 70]
 16 |   elevation_range: [-10, 45]
 17 |   camera_perturb: 0.
 18 |   center_perturb: 0.
 19 |   up_perturb: 0.
 20 |   eval_camera_distance: 1.5
 21 |   eval_fovy_deg: 70.
 22 | 
 23 | system_type: "prolificdreamer-system"
 24 | system:
 25 |   stage: coarse
 26 |   geometry_type: "implicit-volume"
 27 |   geometry:
 28 |     radius: 1.0
 29 |     normal_type: null
 30 | 
 31 |     density_bias: "blob_magic3d"
 32 |     density_activation: softplus
 33 |     density_blob_scale: 10.
 34 |     density_blob_std: 0.5
 35 | 
 36 |     pos_encoding_config:
 37 |       otype: HashGrid
 38 |       n_levels: 16
 39 |       n_features_per_level: 2
 40 |       log2_hashmap_size: 19
 41 |       base_resolution: 16
 42 |       per_level_scale: 1.447269237440378 # max resolution 4096
 43 | 
 44 |   material_type: "no-material"
 45 |   material:
 46 |     n_output_dims: 3
 47 |     color_activation: sigmoid
 48 | 
 49 |   background_type: "neural-environment-map-background"
 50 |   background:
 51 |     color_activation: sigmoid
 52 |     random_aug: true
 53 | 
 54 |   renderer_type: "nerf-volume-renderer"
 55 |   renderer:
 56 |     radius: ${system.geometry.radius}
 57 |     num_samples_per_ray: 512
 58 | 
 59 |   prompt_processor_type: "stable-diffusion-prompt-processor"
 60 |   prompt_processor:
 61 |     pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base"
 62 |     prompt: ???
 63 |     front_threshold: 30.
 64 |     back_threshold: 30.
 65 | 
 66 |   guidance_type: "stable-diffusion-vsd-guidance"
 67 |   guidance:
 68 |     pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base"
 69 |     pretrained_model_name_or_path_lora: "stabilityai/stable-diffusion-2-1"
 70 |     guidance_scale: 7.5
 71 |     min_step_percent: 0.02
 72 |     max_step_percent: [5000, 0.98, 0.5, 5001] # annealed to 0.5 after 5000 steps
 73 | 
 74 |   loggers:
 75 |     wandb:
 76 |       enable: false
 77 |       project: "threestudio"
 78 |       name: None
 79 | 
 80 |   loss:
 81 |     lambda_vsd: 1.
 82 |     lambda_lora: 1.
 83 |     lambda_orient: 0.
 84 |     lambda_sparsity: 10.
 85 |     lambda_opaque: [10000, 0.0, 1000.0, 10001]
 86 |     lambda_z_variance: 0.
 87 |   optimizer:
 88 |     name: AdamW
 89 |     args:
 90 |       betas: [0.9, 0.99]
 91 |       eps: 1.e-15
 92 |     params:
 93 |       geometry.encoding:
 94 |         lr: 0.01
 95 |       geometry.density_network:
 96 |         lr: 0.001
 97 |       geometry.feature_network:
 98 |         lr: 0.001
 99 |       background:
100 |         lr: 0.001
101 |       guidance:
102 |         lr: 0.0001
103 | 
104 | trainer:
105 |   max_steps: 25000
106 |   log_every_n_steps: 1
107 |   num_sanity_val_steps: 0
108 |   val_check_interval: 200
109 |   enable_progress_bar: true
110 |   precision: 32
111 | 
112 | checkpoint:
113 |   save_last: true
114 |   save_top_k: -1
115 |   every_n_train_steps: ${trainer.max_steps}
116 | 


--------------------------------------------------------------------------------
/configs/sjc.yaml:
--------------------------------------------------------------------------------
 1 | name: sjc
 2 | tag: "${rmspace:${system.prompt_processor.prompt},_}"
 3 | exp_root_dir: outputs
 4 | seed: 0
 5 | 
 6 | data_type: random-camera-datamodule
 7 | data:
 8 |   camera_distance_range: [1.50, 1.50]
 9 |   elevation_range: [-10, 45]
10 |   camera_perturb: 0.0
11 |   center_perturb: 0.0
12 |   up_perturb: 0.0
13 |   light_position_perturb: 0.0
14 |   eval_elevation_deg: 20.0
15 | 
16 | system_type: sjc-system
17 | system:
18 |   geometry_type: volume-grid
19 |   geometry:
20 |     normal_type: null
21 |     grid_size: [100, 100, 100]
22 |     density_bias: -1.0
23 |     n_feature_dims: 4
24 | 
25 |   material_type: no-material
26 |   material:
27 |     n_output_dims: 4
28 |     color_activation: none
29 | 
30 |   background_type: textured-background
31 |   background:
32 |     n_output_dims: 4
33 |     color_activation: none
34 |     height: 4
35 |     width: 4
36 | 
37 |   renderer_type: nerf-volume-renderer
38 |   renderer:
39 |     num_samples_per_ray: 512
40 |     grid_prune: false
41 | 
42 |   prompt_processor_type: stable-diffusion-prompt-processor
43 |   prompt_processor:
44 |     pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base"
45 |     prompt: ???
46 |     view_dependent_prompt_front: true
47 | 
48 |   guidance_type: stable-diffusion-guidance
49 |   guidance:
50 |     pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base"
51 |     guidance_scale: 100.
52 |     use_sjc: true
53 |     var_red: true
54 |     min_step_percent: 0.01
55 |     max_step_percent: 0.97
56 | 
57 |   loggers:
58 |     wandb:
59 |       enable: false
60 |       project: "threestudio"
61 |       name: None
62 | 
63 |   loss:
64 |     lambda_sds: 1.
65 |     center_ratio: 0.78125 # = 50 / 64
66 |     lambda_depth: 0 # or try 10
67 |     lambda_emptiness: [5000, 1.e+4, 2.e+5, 5001]
68 |     emptiness_scale: 10
69 | 
70 |   optimizer:
71 |     name: Adamax
72 |     args:
73 |       lr: 0.05
74 |     params:
75 |       geometry:
76 |         lr: 0.05
77 |       background:
78 |         lr: 0.0001 # maybe 0.001/0.01 is better
79 | 
80 | trainer:
81 |   max_steps: 10000
82 |   log_every_n_steps: 1
83 |   num_sanity_val_steps: 0
84 |   val_check_interval: 200
85 |   enable_progress_bar: true
86 |   precision: 16-mixed
87 | 
88 | checkpoint:
89 |   save_last: true # save at each validation tim
90 |   save_top_k: -1
91 |   every_n_train_steps: ${trainer.max_steps}
92 | 


--------------------------------------------------------------------------------
/configs/sketchshape-refine.yaml:
--------------------------------------------------------------------------------
 1 | name: "sketchshape-refine"
 2 | tag: "${rmspace:${system.prompt_processor.prompt},_}"
 3 | exp_root_dir: "outputs"
 4 | seed: 0
 5 | 
 6 | data_type: "random-camera-datamodule"
 7 | data:
 8 |   elevation_range: [-10, 45]
 9 | 
10 | system_type: "latentnerf-system"
11 | system:
12 |   refinement: true
13 |   weights: ???
14 |   weights_ignore_modules: ["material", "background"]
15 |   guide_shape: ???
16 | 
17 |   geometry_type: "implicit-volume"
18 |   geometry:
19 |     n_feature_dims: 4
20 |     normal_type: null
21 | 
22 |   material_type: "sd-latent-adapter-material"
23 | 
24 |   background_type: "neural-environment-map-background"
25 | 
26 |   renderer_type: "nerf-volume-renderer"
27 |   renderer:
28 |     num_samples_per_ray: 512
29 | 
30 |   prompt_processor_type: "stable-diffusion-prompt-processor"
31 |   prompt_processor:
32 |     pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base"
33 |     prompt: ???
34 | 
35 |   guidance_type: "stable-diffusion-guidance"
36 |   guidance:
37 |     pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base"
38 |     guidance_scale: 100.
39 |     weighting_strategy: sds
40 | 
41 |   loggers:
42 |     wandb:
43 |       enable: false
44 |       project: "threestudio"
45 |       name: None
46 | 
47 |   loss:
48 |     lambda_sds: 1.
49 |     lambda_sparsity: 0.0
50 |     lambda_shape: 1.
51 |     lambda_opaque: 0.0
52 |     lambda_orient: 0.0
53 |   optimizer:
54 |     name: Adam
55 |     args:
56 |       lr: 0.01
57 |       betas: [0.9, 0.99]
58 |       eps: 1.e-15
59 |   scheduler:
60 |     name: SequentialLR
61 |     interval: step
62 |     warmup_steps: 100
63 |     milestones:
64 |       - ${system.scheduler.warmup_steps}
65 |     schedulers:
66 |       - name: LinearLR # linear warm-up in the first system.warmup_steps steps
67 |         args:
68 |           start_factor: 0.1
69 |           end_factor: 1.0
70 |           total_iters: ${system.scheduler.warmup_steps}
71 |       - name: ExponentialLR
72 |         args:
73 |           gamma: ${calc_exp_lr_decay_rate:0.1,${sub:${trainer.max_steps},${system.scheduler.warmup_steps}}}
74 | 
75 | trainer:
76 |   max_steps: 10000
77 |   log_every_n_steps: 1
78 |   num_sanity_val_steps: 1
79 |   val_check_interval: 200
80 |   enable_progress_bar: true
81 |   precision: 16-mixed
82 | 
83 | checkpoint:
84 |   save_last: true # save at each validation time
85 |   save_top_k: -1
86 |   every_n_train_steps: ${trainer.max_steps}
87 | 


--------------------------------------------------------------------------------
/configs/sketchshape.yaml:
--------------------------------------------------------------------------------
 1 | name: "sketchshape"
 2 | tag: "${rmspace:${system.prompt_processor.prompt},_}"
 3 | exp_root_dir: "outputs"
 4 | seed: 0
 5 | 
 6 | data_type: "random-camera-datamodule"
 7 | data:
 8 |   elevation_range: [-10, 45]
 9 | 
10 | system_type: "latentnerf-system"
11 | system:
12 |   guide_shape: ???
13 | 
14 |   geometry_type: "implicit-volume"
15 |   geometry:
16 |     n_feature_dims: 4
17 |     normal_type: null
18 | 
19 |   material_type: "no-material"
20 |   material:
21 |     n_output_dims: 4
22 |     color_activation: none
23 | 
24 |   background_type: "neural-environment-map-background"
25 |   background:
26 |     n_output_dims: 4
27 |     color_activation: none
28 | 
29 |   renderer_type: "nerf-volume-renderer"
30 |   renderer:
31 |     num_samples_per_ray: 512
32 | 
33 |   prompt_processor_type: "stable-diffusion-prompt-processor"
34 |   prompt_processor:
35 |     pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base"
36 |     prompt: ???
37 | 
38 |   guidance_type: "stable-diffusion-guidance"
39 |   guidance:
40 |     pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base"
41 |     guidance_scale: 100.
42 |     weighting_strategy: sds
43 | 
44 |   loggers:
45 |     wandb:
46 |       enable: false
47 |       project: "threestudio"
48 |       name: None
49 | 
50 |   loss:
51 |     lambda_sds: 1.0
52 |     lambda_sparsity: 0.0
53 |     lambda_shape: 1.0
54 |     lambda_opaque: 0.0
55 |     lambda_orient: 0.0
56 |   optimizer:
57 |     name: Adam
58 |     args:
59 |       lr: 0.01
60 |       betas: [0.9, 0.99]
61 |       eps: 1.e-15
62 |   scheduler:
63 |     name: SequentialLR
64 |     interval: step
65 |     warmup_steps: 100
66 |     milestones:
67 |       - ${system.scheduler.warmup_steps}
68 |     schedulers:
69 |       - name: LinearLR # linear warm-up in the first system.warmup_steps steps
70 |         args:
71 |           start_factor: 0.1
72 |           end_factor: 1.0
73 |           total_iters: ${system.scheduler.warmup_steps}
74 |       - name: ExponentialLR
75 |         args:
76 |           gamma: ${calc_exp_lr_decay_rate:0.1,${sub:${trainer.max_steps},${system.scheduler.warmup_steps}}}
77 | 
78 | trainer:
79 |   max_steps: 10000
80 |   log_every_n_steps: 1
81 |   num_sanity_val_steps: 0
82 |   val_check_interval: 200
83 |   enable_progress_bar: true
84 |   precision: 16-mixed
85 | 
86 | checkpoint:
87 |   save_last: true # save at each validation time
88 |   save_top_k: -1
89 |   every_n_train_steps: ${trainer.max_steps}
90 | 


--------------------------------------------------------------------------------
/configs/textmesh-if.yaml:
--------------------------------------------------------------------------------
  1 | name: "textmesh-if"
  2 | tag: "${rmspace:${system.prompt_processor.prompt},_}"
  3 | exp_root_dir: "outputs"
  4 | seed: 0
  5 | 
  6 | data_type: "random-camera-datamodule"
  7 | data:
  8 |   batch_size: 1
  9 |   width: 64
 10 |   height: 64
 11 |   camera_distance_range: [1.5, 2.0]
 12 |   fovy_range: [40, 70]
 13 |   elevation_range: [-10, 90]
 14 |   light_sample_strategy: "dreamfusion"
 15 |   eval_camera_distance: 2.0
 16 |   eval_fovy_deg: 70.
 17 | 
 18 | system_type: "textmesh-system"
 19 | system:
 20 |   geometry_type: "implicit-sdf"
 21 |   geometry:
 22 |     radius: 2.0
 23 |     normal_type: finite_difference
 24 |     # progressive eps from Neuralangelo
 25 |     finite_difference_normal_eps: progressive
 26 | 
 27 |     sdf_bias: sphere
 28 |     sdf_bias_params: 0.5
 29 | 
 30 |     # coarse to fine hash grid encoding
 31 |     pos_encoding_config:
 32 |       otype: ProgressiveBandHashGrid
 33 |       n_levels: 16
 34 |       n_features_per_level: 2
 35 |       log2_hashmap_size: 19
 36 |       base_resolution: 16
 37 |       per_level_scale: 1.381912879967776 # max resolution 2048
 38 |       start_level: 8 # resolution ~200
 39 |       start_step: 2000
 40 |       update_steps: 500
 41 | 
 42 |   material_type: "diffuse-with-point-light-material"
 43 |   material:
 44 |     ambient_only_steps: 2001
 45 |     albedo_activation: sigmoid
 46 | 
 47 |   background_type: "neural-environment-map-background"
 48 |   background:
 49 |     color_activation: sigmoid
 50 | 
 51 |   renderer_type: "neus-volume-renderer"
 52 |   renderer:
 53 |     radius: ${system.geometry.radius}
 54 |     num_samples_per_ray: 512
 55 |     cos_anneal_end_steps: ${trainer.max_steps}
 56 |     eval_chunk_size: 8192
 57 | 
 58 |   prompt_processor_type: "deep-floyd-prompt-processor"
 59 |   prompt_processor:
 60 |     pretrained_model_name_or_path: "DeepFloyd/IF-I-XL-v1.0"
 61 |     prompt: ???
 62 | 
 63 |   guidance_type: "deep-floyd-guidance"
 64 |   guidance:
 65 |     pretrained_model_name_or_path: "DeepFloyd/IF-I-XL-v1.0"
 66 |     guidance_scale: 20.
 67 |     weighting_strategy: sds
 68 |     min_step_percent: 0.02
 69 |     max_step_percent: 0.98
 70 | 
 71 |   loss:
 72 |     lambda_sds: 1.
 73 |     lambda_orient: 0.0
 74 |     lambda_sparsity: 0.0
 75 |     lambda_opaque: 0.0
 76 |     lambda_eikonal: 1000.
 77 |   optimizer:
 78 |     name: Adam
 79 |     args:
 80 |       betas: [0.9, 0.99]
 81 |       eps: 1.e-15
 82 |     params:
 83 |       geometry.encoding:
 84 |         lr: 0.01
 85 |       geometry.sdf_network:
 86 |         lr: 0.001
 87 |       geometry.feature_network:
 88 |         lr: 0.001
 89 |       background:
 90 |         lr: 0.001
 91 |       renderer:
 92 |         lr: 0.001
 93 | 
 94 | trainer:
 95 |   max_steps: 10000
 96 |   log_every_n_steps: 1
 97 |   num_sanity_val_steps: 0
 98 |   val_check_interval: 200
 99 |   enable_progress_bar: true
100 |   precision: 16-mixed
101 | 
102 | checkpoint:
103 |   save_last: true # save at each validation time
104 |   save_top_k: -1
105 |   every_n_train_steps: ${trainer.max_steps}
106 | 


--------------------------------------------------------------------------------
/docker/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Reference:
 2 | # https://github.com/cvpaperchallenge/Ascender
 3 | # https://github.com/nerfstudio-project/nerfstudio
 4 | 
 5 | FROM nvidia/cuda:11.8.0-devel-ubuntu22.04
 6 | 
 7 | ARG USER_NAME=dreamer
 8 | ARG GROUP_NAME=dreamers
 9 | ARG UID=1000
10 | ARG GID=1000
11 | 
12 | # Set compute capability for nerfacc and tiny-cuda-nn
13 | # See https://developer.nvidia.com/cuda-gpus and limit number to speed-up build
14 | ENV TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX"
15 | ENV TCNN_CUDA_ARCHITECTURES=90;89;86;80;75;70;61;60
16 | # Speed-up build for RTX 30xx
17 | # ENV TORCH_CUDA_ARCH_LIST="8.6"
18 | # ENV TCNN_CUDA_ARCHITECTURES=86
19 | # Speed-up build for RTX 40xx
20 | # ENV TORCH_CUDA_ARCH_LIST="8.9"
21 | # ENV TCNN_CUDA_ARCHITECTURES=89
22 | 
23 | ENV CUDA_HOME=/usr/local/cuda
24 | ENV PATH=${CUDA_HOME}/bin:/home/${USER_NAME}/.local/bin:${PATH}
25 | ENV LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}
26 | ENV LIBRARY_PATH=${CUDA_HOME}/lib64/stubs:${LIBRARY_PATH}
27 | 
28 | # apt install by root user
29 | RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
30 |     build-essential \
31 |     curl \
32 |     git \
33 |     libegl1-mesa-dev \
34 |     libgl1-mesa-dev \
35 |     libgles2-mesa-dev \
36 |     libglib2.0-0 \
37 |     libsm6 \
38 |     libxext6 \
39 |     libxrender1 \
40 |     python-is-python3 \
41 |     python3.10-dev \
42 |     python3-pip \
43 |     wget \
44 |     && rm -rf /var/lib/apt/lists/*
45 | 
46 | # Change user to non-root user
47 | RUN groupadd -g ${GID} ${GROUP_NAME} \
48 |     && useradd -ms /bin/sh -u ${UID} -g ${GID} ${USER_NAME}
49 | USER ${USER_NAME}
50 | 
51 | RUN pip install --upgrade pip setuptools ninja
52 | RUN pip install torch==2.0.1+cu118 torchvision==0.15.2+cu118 --index-url https://download.pytorch.org/whl/cu118
53 | # Install nerfacc and tiny-cuda-nn before installing requirements.txt
54 | # because these two installations are time consuming and error prone
55 | RUN pip install git+https://github.com/KAIR-BAIR/nerfacc.git@v0.5.2
56 | RUN pip install git+https://github.com/NVlabs/tiny-cuda-nn.git#subdirectory=bindings/torch
57 | 
58 | COPY requirements.txt /tmp
59 | RUN cd /tmp && pip install -r requirements.txt
60 | WORKDIR /home/${USER_NAME}/threestudio
61 | 


--------------------------------------------------------------------------------
/docker/compose.yaml:
--------------------------------------------------------------------------------
 1 | services:
 2 |   threestudio:
 3 |     build:
 4 |       context: ../
 5 |       dockerfile: docker/Dockerfile
 6 |       args:
 7 |         # you can set environment variables, otherwise default values will be used
 8 |         USER_NAME: ${HOST_USER_NAME:-dreamer}  # export HOST_USER_NAME=$USER
 9 |         GROUP_NAME: ${HOST_GROUP_NAME:-dreamers}
10 |         UID: ${HOST_UID:-1000}  # export HOST_UID=$(id -u)
11 |         GID: ${HOST_GID:-1000}  # export HOST_GID=$(id -g)
12 |       shm_size: '4gb'
13 |     environment:
14 |       NVIDIA_DISABLE_REQUIRE: 1  # avoid wrong `nvidia-container-cli: requirement error`
15 |     tty: true
16 |     volumes:
17 |         - ../:/home/${HOST_USER_NAME:-dreamer}/threestudio
18 |     deploy:
19 |       resources:
20 |         reservations:
21 |           devices:
22 |             - driver: nvidia
23 |               capabilities: [gpu]
24 | 


--------------------------------------------------------------------------------
/docs/installation.md:
--------------------------------------------------------------------------------
 1 | # Installation
 2 | 
 3 | ## Prerequisite
 4 | 
 5 | - NVIDIA GPU with at least 6GB VRAM. The more memory you have, the more methods and higher resolutions you can try.
 6 | - [NVIDIA Driver](https://www.nvidia.com/Download/index.aspx) whose version is higher than the [Minimum Required Driver Version](https://docs.nvidia.com/cuda/cuda-toolkit-release-notes/index.html) of CUDA Toolkit you want to use.
 7 | 
 8 | ## Install CUDA Toolkit
 9 | 
10 | You can skip this step if you have installed sufficiently new version or you use Docker.
11 | 
12 | Install [CUDA Toolkit](https://developer.nvidia.com/cuda-toolkit-archive).
13 | 
14 | - Example for Ubuntu 22.04:
15 |   - Run [command for CUDA 11.8 Ubuntu 22.04](https://developer.nvidia.com/cuda-11-8-0-download-archive?target_os=Linux&target_arch=x86_64&Distribution=Ubuntu&target_version=22.04&target_type=deb_local)
16 | - Example for Ubuntu on WSL2:
17 |   - `sudo apt-key del 7fa2af80`
18 |   - Run [command for CUDA 11.8 WSL-Ubuntu](https://developer.nvidia.com/cuda-11-8-0-download-archive?target_os=Linux&target_arch=x86_64&Distribution=WSL-Ubuntu&target_version=2.0&target_type=deb_local)
19 | 
20 | ## Install threestudio via Docker
21 | 
22 | 1. [Install Docker Engine](https://docs.docker.com/engine/install/).
23 |    This document assumes you [install Docker Engine on Ubuntu](https://docs.docker.com/engine/install/ubuntu/).
24 | 2. [Create `docker` group](https://docs.docker.com/engine/install/linux-postinstall/).
25 |    Otherwise, you need to type `sudo docker` instead of `docker`.
26 | 3. [Install NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html#setting-up-nvidia-container-toolkit).
27 | 4. If you use WSL2, [enable systemd](https://learn.microsoft.com/en-us/windows/wsl/wsl-config#systemd-support).
28 | 5. Edit [Dockerfile](../docker/Dockerfile) for your GPU to speed-up build.
29 |    The default Dockerfile takes into account many types of GPUs.
30 | 6. Run Docker via `docker compose`.
31 | 
32 | ```bash
33 | cd docker/
34 | docker compose build  # build Docker image
35 | docker compose up -d  # create and start a container in background
36 | docker compose exec threestudio bash  # run bash in the container
37 | 
38 | # Enjoy threestudio!
39 | 
40 | exit  # or Ctrl+D
41 | docker compose stop  # stop the container
42 | docker compose start  # start the container
43 | docker compose down  # stop and remove the container
44 | ```
45 | 
46 | Note: The current Dockerfile will cause errors when using the OpenGL-based rasterizer of nvdiffrast.
47 | You can use the CUDA-based rasterizer by adding commands or editing configs.
48 | 
49 | - `system.renderer.context_type=cuda` for training
50 | - `system.exporter.context_type=cuda` for exporting meshes
51 | 
52 | [This comment by the nvdiffrast author](https://github.com/NVlabs/nvdiffrast/issues/94#issuecomment-1288566038) could be a guide to resolve this limitation.
53 | 


--------------------------------------------------------------------------------
/extern/ImageDream/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | */__pycache__/
  6 | 
  7 | # dataset-related, pre-trained models,
  8 | vae_models/vqgan
  9 | vae_models/*.gz
 10 | vae_models/*.pt
 11 | vae_models/*vqgan
 12 | *.pt
 13 | *.pth 
 14 | 
 15 | # log files
 16 | log/*.log
 17 | out*
 18 | test_results
 19 | err*
 20 | 
 21 | 
 22 | # C extensions
 23 | *.so
 24 | 
 25 | # Distribution / packaging
 26 | .Python
 27 | build/
 28 | develop-eggs/
 29 | dist/
 30 | downloads/
 31 | eggs/
 32 | .eggs/
 33 | lib/
 34 | lib64/
 35 | parts/
 36 | sdist/
 37 | var/
 38 | wheels/
 39 | pip-wheel-metadata/
 40 | share/python-wheels/
 41 | *.egg-info/
 42 | .installed.cfg
 43 | *.egg
 44 | MANIFEST
 45 | 
 46 | # PyInstaller
 47 | #  Usually these files are written by a python script from a template
 48 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 49 | *.manifest
 50 | *.spec
 51 | 
 52 | # Installer logs
 53 | pip-log.txt
 54 | pip-delete-this-directory.txt
 55 | 
 56 | # Unit test / coverage reports
 57 | htmlcov/
 58 | .tox/
 59 | .nox/
 60 | .coverage
 61 | .coverage.*
 62 | .cache
 63 | nosetests.xml
 64 | coverage.xml
 65 | *.cover
 66 | *.py,cover
 67 | .hypothesis/
 68 | .pytest_cache/
 69 | 
 70 | # Translations
 71 | *.mo
 72 | *.pot
 73 | 
 74 | # Django stuff:
 75 | *.log
 76 | local_settings.py
 77 | db.sqlite3
 78 | db.sqlite3-journal
 79 | 
 80 | # Flask stuff:
 81 | instance/
 82 | .webassets-cache
 83 | 
 84 | # Scrapy stuff:
 85 | .scrapy
 86 | 
 87 | # Sphinx documentation
 88 | docs/_build/
 89 | 
 90 | # PyBuilder
 91 | target/
 92 | 
 93 | # Jupyter Notebook
 94 | .ipynb_checkpoints
 95 | 
 96 | # IPython
 97 | profile_default/
 98 | ipython_config.py
 99 | 
100 | # pyenv
101 | .python-version
102 | 
103 | # pipenv
104 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
105 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
106 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
107 | #   install all needed dependencies.
108 | #Pipfile.lock
109 | 
110 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
111 | __pypackages__/
112 | 
113 | # Celery stuff
114 | celerybeat-schedule
115 | celerybeat.pid
116 | 
117 | # SageMath parsed files
118 | *.sage.py
119 | 
120 | # Environments
121 | .env
122 | .venv
123 | env/
124 | venv/
125 | ENV/
126 | env.bak/
127 | venv.bak/
128 | 
129 | # Spyder project settings
130 | .spyderproject
131 | .spyproject
132 | 
133 | # Rope project settings
134 | .ropeproject
135 | 
136 | # mkdocs documentation
137 | /site
138 | 
139 | # mypy
140 | .mypy_cache/
141 | .dmypy.json
142 | dmypy.json
143 | 
144 | # Pyre type checker
145 | .pyre/
146 | 
147 | *.zip
148 | *.pkl 
149 | *.csv 
150 | *.ckpt
151 | *.parquet 
152 | 
153 | *.whl
154 | *.th
155 | *.onnx


--------------------------------------------------------------------------------
/extern/ImageDream/LICENSE-CODE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 ByteDance
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/extern/ImageDream/README.md:
--------------------------------------------------------------------------------
 1 | # ImageDream Diffusion
 2 | Peng Wang, Yichun Shi
 3 | 
 4 | | [Project Page](https://image-dream.github.io/) | [Paper](https://arxiv.org/abs/2312.02201) | [HuggingFace Demo]() |
 5 | 
 6 | ## 
 7 | - **This repo inherit content from repos of [LDM](), [MVDream]() and some adaptor module from [IP-Adaptor]()**
 8 | - **It only includes the diffusion model and 2D image generation.For 3D Generation, please check [Here](https://github.com/bytedance/ImageDream).**
 9 | 
10 | 
11 | ## Installation
12 | Setup environment as in [Stable-Diffusion](https://github.com/Stability-AI/stablediffusion) for this repo. You can set up the environment by installing the given requirements
13 | ``` bash
14 | pip install -r requirements.txt
15 | ```
16 | 
17 | To use ImageDream as a python module, you can install it by `pip install -e .` or:
18 | ```bash
19 | pip install git+https://github.com/bytedance/ImageDream/#subdirectory=extern/ImageDream
20 | ```
21 | 
22 | ## Image-to-Multi-View
23 | Clone the modelcard on the [Huggingface ImageDream Model Page](https://huggingface.co/Peng-Wang/ImageDream/) under ```./release_models/```
24 | 
25 | Replace the object in the center of RGBA image and a short description of the image is necessary to obtain good results. For image only case, one may run a simple caption model such as [Llava](https://llava.hliu.cc/) or [BLIP2](https://huggingface.co/spaces/Salesforce/BLIP2), which may get similar results. This also applies for 3D SDS.
26 | 
27 | 
28 | ``` bash
29 | export PYTHONPATH=$PYTHONPATH:./
30 | python3 scripts/demo.py  \
31 |     --image "./assets/astronaut.png" \
32 |     --text "an astronaut riding a horse" \
33 |     --config_path "./imagedream/configs/sd_v2_base_ipmv.yaml" \
34 |     --ckpt_path "./release_models/ImageDream/sd-v2.1-base-4view-ipmv.pt" \
35 |     --mode "pixel" \
36 |     --num_frames 5
37 | ```
38 | 
39 | Tips
40 | - The model is trained with same elevation between the input image prompt and synthesized views. Therefore, may adjust the camera elevation in ```get_camera()``` for better results. In paper, we adopt a unified elevation with 5 degree. This also applied for threestudio fusion for a better results.
41 | 
42 | 
43 | ## Acknowledgement
44 | This repository is heavily based on [Stable Diffusion](https://huggingface.co/stabilityai/stable-diffusion-2-1-base). We would like to thank the authors of these work for publicly releasing their code.
45 | 
46 | ## Citation
47 | If you find ImageDream helpful, please consider citing:
48 | 
49 | ``` bibtex
50 | @article{wang2023imagedream,
51 |   title={ImageDream: Image-Prompt Multi-view Diffusion for 3D Generation},
52 |   author={Wang, Peng and Shi, Yichun},
53 |   journal={arXiv preprint arXiv:2312.02201},
54 |   year={2023}
55 | }
56 | ```
57 | 


--------------------------------------------------------------------------------
/extern/ImageDream/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/extern/ImageDream/__init__.py


--------------------------------------------------------------------------------
/extern/ImageDream/assets/astronaut.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/extern/ImageDream/assets/astronaut.png


--------------------------------------------------------------------------------
/extern/ImageDream/imagedream/__init__.py:
--------------------------------------------------------------------------------
1 | from .model_zoo import build_model
2 | 


--------------------------------------------------------------------------------
/extern/ImageDream/imagedream/camera_utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | 
 4 | 
 5 | def create_camera_to_world_matrix(elevation, azimuth):
 6 |     elevation = np.radians(elevation)
 7 |     azimuth = np.radians(azimuth)
 8 |     # Convert elevation and azimuth angles to Cartesian coordinates on a unit sphere
 9 |     x = np.cos(elevation) * np.sin(azimuth)
10 |     y = np.sin(elevation)
11 |     z = np.cos(elevation) * np.cos(azimuth)
12 | 
13 |     # Calculate camera position, target, and up vectors
14 |     camera_pos = np.array([x, y, z])
15 |     target = np.array([0, 0, 0])
16 |     up = np.array([0, 1, 0])
17 | 
18 |     # Construct view matrix
19 |     forward = target - camera_pos
20 |     forward /= np.linalg.norm(forward)
21 |     right = np.cross(forward, up)
22 |     right /= np.linalg.norm(right)
23 |     new_up = np.cross(right, forward)
24 |     new_up /= np.linalg.norm(new_up)
25 |     cam2world = np.eye(4)
26 |     cam2world[:3, :3] = np.array([right, new_up, -forward]).T
27 |     cam2world[:3, 3] = camera_pos
28 |     return cam2world
29 | 
30 | 
31 | def convert_opengl_to_blender(camera_matrix):
32 |     if isinstance(camera_matrix, np.ndarray):
33 |         # Construct transformation matrix to convert from OpenGL space to Blender space
34 |         flip_yz = np.array([[1, 0, 0, 0], [0, 0, -1, 0], [0, 1, 0, 0], [0, 0, 0, 1]])
35 |         camera_matrix_blender = np.dot(flip_yz, camera_matrix)
36 |     else:
37 |         # Construct transformation matrix to convert from OpenGL space to Blender space
38 |         flip_yz = torch.tensor(
39 |             [[1, 0, 0, 0], [0, 0, -1, 0], [0, 1, 0, 0], [0, 0, 0, 1]]
40 |         )
41 |         if camera_matrix.ndim == 3:
42 |             flip_yz = flip_yz.unsqueeze(0)
43 |         camera_matrix_blender = torch.matmul(flip_yz.to(camera_matrix), camera_matrix)
44 |     return camera_matrix_blender
45 | 
46 | 
47 | def normalize_camera(camera_matrix):
48 |     """normalize the camera location onto a unit-sphere"""
49 |     if isinstance(camera_matrix, np.ndarray):
50 |         camera_matrix = camera_matrix.reshape(-1, 4, 4)
51 |         translation = camera_matrix[:, :3, 3]
52 |         translation = translation / (
53 |             np.linalg.norm(translation, axis=1, keepdims=True) + 1e-8
54 |         )
55 |         camera_matrix[:, :3, 3] = translation
56 |     else:
57 |         camera_matrix = camera_matrix.reshape(-1, 4, 4)
58 |         translation = camera_matrix[:, :3, 3]
59 |         translation = translation / (
60 |             torch.norm(translation, dim=1, keepdim=True) + 1e-8
61 |         )
62 |         camera_matrix[:, :3, 3] = translation
63 |     return camera_matrix.reshape(-1, 16)
64 | 
65 | 
66 | def get_camera(
67 |     num_frames, 
68 |     elevation=15, 
69 |     azimuth_start=0, 
70 |     azimuth_span=360, 
71 |     blender_coord=True,
72 |     extra_view=False,
73 | ):
74 |     angle_gap = azimuth_span / num_frames
75 |     cameras = []
76 |     for azimuth in np.arange(azimuth_start, azimuth_span + azimuth_start, angle_gap):
77 |         camera_matrix = create_camera_to_world_matrix(elevation, azimuth)
78 |         if blender_coord:
79 |             camera_matrix = convert_opengl_to_blender(camera_matrix)
80 |         cameras.append(camera_matrix.flatten())
81 |         
82 |     if extra_view:
83 |         dim = len(cameras[0])
84 |         cameras.append(np.zeros(dim))  
85 |     return torch.tensor(np.stack(cameras, 0)).float()
86 | 


--------------------------------------------------------------------------------
/extern/ImageDream/imagedream/configs/sd_v2_base_ipmv.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   target: imagedream.ldm.interface.LatentDiffusionInterface
 3 |   params:
 4 |     linear_start: 0.00085
 5 |     linear_end: 0.0120
 6 |     timesteps: 1000
 7 |     scale_factor: 0.18215
 8 |     parameterization: "eps"
 9 | 
10 |     unet_config:
11 |       target: imagedream.ldm.modules.diffusionmodules.openaimodel.MultiViewUNetModel
12 |       params:
13 |         image_size: 32 # unused
14 |         in_channels: 4
15 |         out_channels: 4
16 |         model_channels: 320
17 |         attention_resolutions: [ 4, 2, 1 ]
18 |         num_res_blocks: 2
19 |         channel_mult: [ 1, 2, 4, 4 ]
20 |         num_head_channels: 64 # need to fix for flash-attn
21 |         use_spatial_transformer: True
22 |         use_linear_in_transformer: True
23 |         transformer_depth: 1
24 |         context_dim: 1024
25 |         use_checkpoint: False
26 |         legacy: False
27 |         camera_dim: 16
28 |         with_ip: True
29 |         ip_dim: 16 # ip token length
30 |         ip_mode: "local_resample"
31 | 
32 |     vae_config:
33 |       target: imagedream.ldm.models.autoencoder.AutoencoderKL
34 |       params:
35 |         embed_dim: 4
36 |         monitor: val/rec_loss
37 |         ddconfig:
38 |           #attn_type: "vanilla-xformers"
39 |           double_z: true
40 |           z_channels: 4
41 |           resolution: 256
42 |           in_channels: 3
43 |           out_ch: 3
44 |           ch: 128
45 |           ch_mult:
46 |           - 1
47 |           - 2
48 |           - 4
49 |           - 4
50 |           num_res_blocks: 2
51 |           attn_resolutions: []
52 |           dropout: 0.0
53 |         lossconfig:
54 |           target: torch.nn.Identity
55 | 
56 |     clip_config:
57 |       target: imagedream.ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder
58 |       params:
59 |         freeze: True
60 |         layer: "penultimate"
61 |         ip_mode: "local_resample"
62 | 


--------------------------------------------------------------------------------
/extern/ImageDream/imagedream/configs/sd_v2_base_ipmv_local.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   target: imagedream.ldm.interface.LatentDiffusionInterface
 3 |   params:
 4 |     linear_start: 0.00085
 5 |     linear_end: 0.0120
 6 |     timesteps: 1000
 7 |     scale_factor: 0.18215
 8 |     parameterization: "eps"
 9 | 
10 |     unet_config:
11 |       target: imagedream.ldm.modules.diffusionmodules.openaimodel.MultiViewUNetModel
12 |       params:
13 |         image_size: 32 # unused
14 |         in_channels: 4
15 |         out_channels: 4
16 |         model_channels: 320
17 |         attention_resolutions: [ 4, 2, 1 ]
18 |         num_res_blocks: 2
19 |         channel_mult: [ 1, 2, 4, 4 ]
20 |         num_head_channels: 64 # need to fix for flash-attn
21 |         use_spatial_transformer: True
22 |         use_linear_in_transformer: True
23 |         transformer_depth: 1
24 |         context_dim: 1024
25 |         use_checkpoint: False
26 |         legacy: False
27 |         camera_dim: 16
28 |         with_ip: True
29 |         ip_dim: 16 # ip token length
30 |         ip_mode: "local_resample"
31 |         ip_weight: 1.0 # adjust for similarity to image 
32 | 
33 |     vae_config:
34 |       target: imagedream.ldm.models.autoencoder.AutoencoderKL
35 |       params:
36 |         embed_dim: 4
37 |         monitor: val/rec_loss
38 |         ddconfig:
39 |           #attn_type: "vanilla-xformers"
40 |           double_z: true
41 |           z_channels: 4
42 |           resolution: 256
43 |           in_channels: 3
44 |           out_ch: 3
45 |           ch: 128
46 |           ch_mult:
47 |           - 1
48 |           - 2
49 |           - 4
50 |           - 4
51 |           num_res_blocks: 2
52 |           attn_resolutions: []
53 |           dropout: 0.0
54 |         lossconfig:
55 |           target: torch.nn.Identity
56 | 
57 |     clip_config:
58 |       target: imagedream.ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder
59 |       params:
60 |         freeze: True
61 |         layer: "penultimate"
62 |         ip_mode: "local_resample"
63 | 


--------------------------------------------------------------------------------
/extern/ImageDream/imagedream/ldm/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/extern/ImageDream/imagedream/ldm/__init__.py


--------------------------------------------------------------------------------
/extern/ImageDream/imagedream/ldm/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/extern/ImageDream/imagedream/ldm/models/__init__.py


--------------------------------------------------------------------------------
/extern/ImageDream/imagedream/ldm/models/diffusion/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/extern/ImageDream/imagedream/ldm/models/diffusion/__init__.py


--------------------------------------------------------------------------------
/extern/ImageDream/imagedream/ldm/modules/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/extern/ImageDream/imagedream/ldm/modules/__init__.py


--------------------------------------------------------------------------------
/extern/ImageDream/imagedream/ldm/modules/diffusionmodules/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/extern/ImageDream/imagedream/ldm/modules/diffusionmodules/__init__.py


--------------------------------------------------------------------------------
/extern/ImageDream/imagedream/ldm/modules/distributions/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/extern/ImageDream/imagedream/ldm/modules/distributions/__init__.py


--------------------------------------------------------------------------------
/extern/ImageDream/imagedream/ldm/modules/distributions/distributions.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import numpy as np
  3 | 
  4 | 
  5 | class AbstractDistribution:
  6 |     def sample(self):
  7 |         raise NotImplementedError()
  8 | 
  9 |     def mode(self):
 10 |         raise NotImplementedError()
 11 | 
 12 | 
 13 | class DiracDistribution(AbstractDistribution):
 14 |     def __init__(self, value):
 15 |         self.value = value
 16 | 
 17 |     def sample(self):
 18 |         return self.value
 19 | 
 20 |     def mode(self):
 21 |         return self.value
 22 | 
 23 | 
 24 | class DiagonalGaussianDistribution(object):
 25 |     def __init__(self, parameters, deterministic=False):
 26 |         self.parameters = parameters
 27 |         self.mean, self.logvar = torch.chunk(parameters, 2, dim=1)
 28 |         self.logvar = torch.clamp(self.logvar, -30.0, 20.0)
 29 |         self.deterministic = deterministic
 30 |         self.std = torch.exp(0.5 * self.logvar)
 31 |         self.var = torch.exp(self.logvar)
 32 |         if self.deterministic:
 33 |             self.var = self.std = torch.zeros_like(self.mean).to(
 34 |                 device=self.parameters.device
 35 |             )
 36 | 
 37 |     def sample(self):
 38 |         x = self.mean + self.std * torch.randn(self.mean.shape).to(
 39 |             device=self.parameters.device
 40 |         )
 41 |         return x
 42 | 
 43 |     def kl(self, other=None):
 44 |         if self.deterministic:
 45 |             return torch.Tensor([0.0])
 46 |         else:
 47 |             if other is None:
 48 |                 return 0.5 * torch.sum(
 49 |                     torch.pow(self.mean, 2) + self.var - 1.0 - self.logvar,
 50 |                     dim=[1, 2, 3],
 51 |                 )
 52 |             else:
 53 |                 return 0.5 * torch.sum(
 54 |                     torch.pow(self.mean - other.mean, 2) / other.var
 55 |                     + self.var / other.var
 56 |                     - 1.0
 57 |                     - self.logvar
 58 |                     + other.logvar,
 59 |                     dim=[1, 2, 3],
 60 |                 )
 61 | 
 62 |     def nll(self, sample, dims=[1, 2, 3]):
 63 |         if self.deterministic:
 64 |             return torch.Tensor([0.0])
 65 |         logtwopi = np.log(2.0 * np.pi)
 66 |         return 0.5 * torch.sum(
 67 |             logtwopi + self.logvar + torch.pow(sample - self.mean, 2) / self.var,
 68 |             dim=dims,
 69 |         )
 70 | 
 71 |     def mode(self):
 72 |         return self.mean
 73 | 
 74 | 
 75 | def normal_kl(mean1, logvar1, mean2, logvar2):
 76 |     """
 77 |     source: https://github.com/openai/guided-diffusion/blob/27c20a8fab9cb472df5d6bdd6c8d11c8f430b924/guided_diffusion/losses.py#L12
 78 |     Compute the KL divergence between two gaussians.
 79 |     Shapes are automatically broadcasted, so batches can be compared to
 80 |     scalars, among other use cases.
 81 |     """
 82 |     tensor = None
 83 |     for obj in (mean1, logvar1, mean2, logvar2):
 84 |         if isinstance(obj, torch.Tensor):
 85 |             tensor = obj
 86 |             break
 87 |     assert tensor is not None, "at least one argument must be a Tensor"
 88 | 
 89 |     # Force variances to be Tensors. Broadcasting helps convert scalars to
 90 |     # Tensors, but it does not work for torch.exp().
 91 |     logvar1, logvar2 = [
 92 |         x if isinstance(x, torch.Tensor) else torch.tensor(x).to(tensor)
 93 |         for x in (logvar1, logvar2)
 94 |     ]
 95 | 
 96 |     return 0.5 * (
 97 |         -1.0
 98 |         + logvar2
 99 |         - logvar1
100 |         + torch.exp(logvar1 - logvar2)
101 |         + ((mean1 - mean2) ** 2) * torch.exp(-logvar2)
102 |     )
103 | 


--------------------------------------------------------------------------------
/extern/ImageDream/imagedream/ldm/modules/ema.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | 
 4 | 
 5 | class LitEma(nn.Module):
 6 |     def __init__(self, model, decay=0.9999, use_num_upates=True):
 7 |         super().__init__()
 8 |         if decay < 0.0 or decay > 1.0:
 9 |             raise ValueError("Decay must be between 0 and 1")
10 | 
11 |         self.m_name2s_name = {}
12 |         self.register_buffer("decay", torch.tensor(decay, dtype=torch.float32))
13 |         self.register_buffer(
14 |             "num_updates",
15 |             torch.tensor(0, dtype=torch.int)
16 |             if use_num_upates
17 |             else torch.tensor(-1, dtype=torch.int),
18 |         )
19 | 
20 |         for name, p in model.named_parameters():
21 |             if p.requires_grad:
22 |                 # remove as '.'-character is not allowed in buffers
23 |                 s_name = name.replace(".", "")
24 |                 self.m_name2s_name.update({name: s_name})
25 |                 self.register_buffer(s_name, p.clone().detach().data)
26 | 
27 |         self.collected_params = []
28 | 
29 |     def reset_num_updates(self):
30 |         del self.num_updates
31 |         self.register_buffer("num_updates", torch.tensor(0, dtype=torch.int))
32 | 
33 |     def forward(self, model):
34 |         decay = self.decay
35 | 
36 |         if self.num_updates >= 0:
37 |             self.num_updates += 1
38 |             decay = min(self.decay, (1 + self.num_updates) / (10 + self.num_updates))
39 | 
40 |         one_minus_decay = 1.0 - decay
41 | 
42 |         with torch.no_grad():
43 |             m_param = dict(model.named_parameters())
44 |             shadow_params = dict(self.named_buffers())
45 | 
46 |             for key in m_param:
47 |                 if m_param[key].requires_grad:
48 |                     sname = self.m_name2s_name[key]
49 |                     shadow_params[sname] = shadow_params[sname].type_as(m_param[key])
50 |                     shadow_params[sname].sub_(
51 |                         one_minus_decay * (shadow_params[sname] - m_param[key])
52 |                     )
53 |                 else:
54 |                     assert not key in self.m_name2s_name
55 | 
56 |     def copy_to(self, model):
57 |         m_param = dict(model.named_parameters())
58 |         shadow_params = dict(self.named_buffers())
59 |         for key in m_param:
60 |             if m_param[key].requires_grad:
61 |                 m_param[key].data.copy_(shadow_params[self.m_name2s_name[key]].data)
62 |             else:
63 |                 assert not key in self.m_name2s_name
64 | 
65 |     def store(self, parameters):
66 |         """
67 |         Save the current parameters for restoring later.
68 |         Args:
69 |           parameters: Iterable of `torch.nn.Parameter`; the parameters to be
70 |             temporarily stored.
71 |         """
72 |         self.collected_params = [param.clone() for param in parameters]
73 | 
74 |     def restore(self, parameters):
75 |         """
76 |         Restore the parameters stored with the `store` method.
77 |         Useful to validate the model with EMA parameters without affecting the
78 |         original optimization process. Store the parameters before the
79 |         `copy_to` method. After validation (or model saving), use this to
80 |         restore the former parameters.
81 |         Args:
82 |           parameters: Iterable of `torch.nn.Parameter`; the parameters to be
83 |             updated with the stored parameters.
84 |         """
85 |         for c_param, param in zip(self.collected_params, parameters):
86 |             param.data.copy_(c_param.data)
87 | 


--------------------------------------------------------------------------------
/extern/ImageDream/imagedream/ldm/modules/encoders/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/extern/ImageDream/imagedream/ldm/modules/encoders/__init__.py


--------------------------------------------------------------------------------
/extern/ImageDream/imagedream/model_zoo.py:
--------------------------------------------------------------------------------
 1 | """ Utiliy functions to load pre-trained models more easily """
 2 | import os
 3 | import pkg_resources
 4 | from omegaconf import OmegaConf
 5 | 
 6 | import torch
 7 | from huggingface_hub import hf_hub_download
 8 | 
 9 | from imagedream.ldm.util import instantiate_from_config
10 | 
11 | 
12 | PRETRAINED_MODELS = {
13 |     "sd-v2.1-base-4view-ipmv": {
14 |         "config": "sd_v2_base_ipmv.yaml",
15 |         "repo_id": "Peng-Wang/ImageDream",
16 |         "filename": "sd-v2.1-base-4view-ipmv.pt",
17 |     },
18 |     "sd-v2.1-base-4view-ipmv-local": {
19 |         "config": "sd_v2_base_ipmv_local.yaml",
20 |         "repo_id": "Peng-Wang/ImageDream",
21 |         "filename": "sd-v2.1-base-4view-ipmv-local.pt",
22 |     },
23 | }
24 | 
25 | 
26 | def get_config_file(config_path):
27 |     cfg_file = pkg_resources.resource_filename(
28 |         "imagedream", os.path.join("configs", config_path)
29 |     )
30 |     if not os.path.exists(cfg_file):
31 |         raise RuntimeError(f"Config {config_path} not available!")
32 |     return cfg_file
33 | 
34 | 
35 | def build_model(model_name, config_path=None, ckpt_path=None, cache_dir=None):
36 |     if (config_path is not None) and (ckpt_path is not None):
37 |         config = OmegaConf.load(config_path)
38 |         model = instantiate_from_config(config.model)
39 |         model.load_state_dict(torch.load(ckpt_path, map_location="cpu"), strict=False)
40 |         return model
41 |         
42 |     if not model_name in PRETRAINED_MODELS:
43 |         raise RuntimeError(
44 |             f"Model name {model_name} is not a pre-trained model. Available models are:\n- "
45 |             + "\n- ".join(PRETRAINED_MODELS.keys())
46 |         )
47 |     model_info = PRETRAINED_MODELS[model_name]
48 | 
49 |     # Instiantiate the model
50 |     print(f"Loading model from config: {model_info['config']}")
51 |     config_file = get_config_file(model_info["config"])
52 |     config = OmegaConf.load(config_file)
53 |     model = instantiate_from_config(config.model)
54 | 
55 |     # Load pre-trained checkpoint from huggingface
56 |     if not ckpt_path:
57 |         ckpt_path = hf_hub_download(
58 |             repo_id=model_info["repo_id"],
59 |             filename=model_info["filename"],
60 |             cache_dir=cache_dir,
61 |         )
62 |         print(f"Loading model from cache file: {ckpt_path}")
63 |     model.load_state_dict(torch.load(ckpt_path, map_location="cpu"), strict=False)
64 |     return model
65 | 


--------------------------------------------------------------------------------
/extern/ImageDream/requirements.txt:
--------------------------------------------------------------------------------
 1 | opencv-python
 2 | imageio
 3 | imageio-ffmpeg
 4 | omegaconf
 5 | einops
 6 | transformers==4.27.1
 7 | open-clip-torch==2.7.0
 8 | gradio>=3.13.2
 9 | xformers==0.0.16
10 | 


--------------------------------------------------------------------------------
/extern/ImageDream/scripts/demo.sh:
--------------------------------------------------------------------------------
 1 | # Run this script under ImageDream/
 2 | export PYTHONPATH=$PYTHONPATH:./
 3 | 
 4 | # test pixel version
 5 | python3 scripts/demo.py  \
 6 |     --image "./assets/astronaut.png" \
 7 |     --text "an astronaut riding a horse" \
 8 |     --config_path "./imagedream/configs/sd_v2_base_ipmv.yaml" \
 9 |     --ckpt_path "./release_models/ImageDream/sd-v2.1-base-4view-ipmv.pt" \
10 |     --mode "pixel" \
11 |     --num_frames 5
12 | 
13 | # test local version
14 | python3 scripts/demo.py  \
15 |     --image "./assets/astronaut.png" \
16 |     --text "an astronaut riding a horse" \
17 |     --config_path "./imagedream/configs/sd_v2_base_ipmv_local.yaml" \
18 |     --ckpt_path "./release_models/ImageDream/sd-v2.1-base-4view-ipmv-local.pt" \
19 |     --mode "local" \
20 |     --num_frames 4
21 | 


--------------------------------------------------------------------------------
/extern/ImageDream/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | 
 3 | setup(
 4 |     name="imagedream",
 5 |     version="0.0.1",
 6 |     description="Multi-view Diffusion Models",
 7 |     author="ByteDance",
 8 |     packages=find_packages(),
 9 |     package_data={"imagedream": ["configs/*.yaml"]},
10 |     install_requires=[
11 |         "torch",
12 |         "numpy",
13 |         "tqdm",
14 |         "omegaconf",
15 |         "einops",
16 |         "huggingface_hub",
17 |         "transformers",
18 |         "open-clip-torch",
19 |     ],
20 | )
21 | 


--------------------------------------------------------------------------------
/extern/ldm_zero123/extras.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from contextlib import contextmanager
 3 | from pathlib import Path
 4 | 
 5 | import torch
 6 | from omegaconf import OmegaConf
 7 | 
 8 | from extern.ldm_zero123.util import instantiate_from_config
 9 | 
10 | 
11 | @contextmanager
12 | def all_logging_disabled(highest_level=logging.CRITICAL):
13 |     """
14 |     A context manager that will prevent any logging messages
15 |     triggered during the body from being processed.
16 | 
17 |     :param highest_level: the maximum logging level in use.
18 |       This would only need to be changed if a custom level greater than CRITICAL
19 |       is defined.
20 | 
21 |     https://gist.github.com/simon-weber/7853144
22 |     """
23 |     # two kind-of hacks here:
24 |     #    * can't get the highest logging level in effect => delegate to the user
25 |     #    * can't get the current module-level override => use an undocumented
26 |     #       (but non-private!) interface
27 | 
28 |     previous_level = logging.root.manager.disable
29 | 
30 |     logging.disable(highest_level)
31 | 
32 |     try:
33 |         yield
34 |     finally:
35 |         logging.disable(previous_level)
36 | 
37 | 
38 | def load_training_dir(train_dir, device, epoch="last"):
39 |     """Load a checkpoint and config from training directory"""
40 |     train_dir = Path(train_dir)
41 |     ckpt = list(train_dir.rglob(f"*{epoch}.ckpt"))
42 |     assert len(ckpt) == 1, f"found {len(ckpt)} matching ckpt files"
43 |     config = list(train_dir.rglob(f"*-project.yaml"))
44 |     assert len(ckpt) > 0, f"didn't find any config in {train_dir}"
45 |     if len(config) > 1:
46 |         print(f"found {len(config)} matching config files")
47 |         config = sorted(config)[-1]
48 |         print(f"selecting {config}")
49 |     else:
50 |         config = config[0]
51 | 
52 |     config = OmegaConf.load(config)
53 |     return load_model_from_config(config, ckpt[0], device)
54 | 
55 | 
56 | def load_model_from_config(config, ckpt, device="cpu", verbose=False):
57 |     """Loads a model from config and a ckpt
58 |     if config is a path will use omegaconf to load
59 |     """
60 |     if isinstance(config, (str, Path)):
61 |         config = OmegaConf.load(config)
62 | 
63 |     with all_logging_disabled():
64 |         print(f"Loading model from {ckpt}")
65 |         pl_sd = torch.load(ckpt, map_location="cpu")
66 |         global_step = pl_sd["global_step"]
67 |         sd = pl_sd["state_dict"]
68 |         model = instantiate_from_config(config.model)
69 |         m, u = model.load_state_dict(sd, strict=False)
70 |         if len(m) > 0 and verbose:
71 |             print("missing keys:")
72 |             print(m)
73 |         if len(u) > 0 and verbose:
74 |             print("unexpected keys:")
75 |         model.to(device)
76 |         model.eval()
77 |         model.cond_stage_model.device = device
78 |         return model
79 | 


--------------------------------------------------------------------------------
/extern/ldm_zero123/models/diffusion/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/extern/ldm_zero123/models/diffusion/__init__.py


--------------------------------------------------------------------------------
/extern/ldm_zero123/models/diffusion/sampling_util.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | 
 4 | 
 5 | def append_dims(x, target_dims):
 6 |     """Appends dimensions to the end of a tensor until it has target_dims dimensions.
 7 |     From https://github.com/crowsonkb/k-diffusion/blob/master/k_diffusion/utils.py"""
 8 |     dims_to_append = target_dims - x.ndim
 9 |     if dims_to_append < 0:
10 |         raise ValueError(
11 |             f"input has {x.ndim} dims but target_dims is {target_dims}, which is less"
12 |         )
13 |     return x[(...,) + (None,) * dims_to_append]
14 | 
15 | 
16 | def renorm_thresholding(x0, value):
17 |     # renorm
18 |     pred_max = x0.max()
19 |     pred_min = x0.min()
20 |     pred_x0 = (x0 - pred_min) / (pred_max - pred_min)  # 0 ... 1
21 |     pred_x0 = 2 * pred_x0 - 1.0  # -1 ... 1
22 | 
23 |     s = torch.quantile(rearrange(pred_x0, "b ... -> b (...)").abs(), value, dim=-1)
24 |     s.clamp_(min=1.0)
25 |     s = s.view(-1, *((1,) * (pred_x0.ndim - 1)))
26 | 
27 |     # clip by threshold
28 |     # pred_x0 = pred_x0.clamp(-s, s) / s  # needs newer pytorch  # TODO bring back to pure-gpu with min/max
29 | 
30 |     # temporary hack: numpy on cpu
31 |     pred_x0 = (
32 |         np.clip(pred_x0.cpu().numpy(), -s.cpu().numpy(), s.cpu().numpy())
33 |         / s.cpu().numpy()
34 |     )
35 |     pred_x0 = torch.tensor(pred_x0).to(self.model.device)
36 | 
37 |     # re.renorm
38 |     pred_x0 = (pred_x0 + 1.0) / 2.0  # 0 ... 1
39 |     pred_x0 = (pred_max - pred_min) * pred_x0 + pred_min  # orig range
40 |     return pred_x0
41 | 
42 | 
43 | def norm_thresholding(x0, value):
44 |     s = append_dims(x0.pow(2).flatten(1).mean(1).sqrt().clamp(min=value), x0.ndim)
45 |     return x0 * (value / s)
46 | 
47 | 
48 | def spatial_norm_thresholding(x0, value):
49 |     # b c h w
50 |     s = x0.pow(2).mean(1, keepdim=True).sqrt().clamp(min=value)
51 |     return x0 * (value / s)
52 | 


--------------------------------------------------------------------------------
/extern/ldm_zero123/modules/diffusionmodules/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/extern/ldm_zero123/modules/diffusionmodules/__init__.py


--------------------------------------------------------------------------------
/extern/ldm_zero123/modules/distributions/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/extern/ldm_zero123/modules/distributions/__init__.py


--------------------------------------------------------------------------------
/extern/ldm_zero123/modules/distributions/distributions.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | 
  4 | 
  5 | class AbstractDistribution:
  6 |     def sample(self):
  7 |         raise NotImplementedError()
  8 | 
  9 |     def mode(self):
 10 |         raise NotImplementedError()
 11 | 
 12 | 
 13 | class DiracDistribution(AbstractDistribution):
 14 |     def __init__(self, value):
 15 |         self.value = value
 16 | 
 17 |     def sample(self):
 18 |         return self.value
 19 | 
 20 |     def mode(self):
 21 |         return self.value
 22 | 
 23 | 
 24 | class DiagonalGaussianDistribution(object):
 25 |     def __init__(self, parameters, deterministic=False):
 26 |         self.parameters = parameters
 27 |         self.mean, self.logvar = torch.chunk(parameters, 2, dim=1)
 28 |         self.logvar = torch.clamp(self.logvar, -30.0, 20.0)
 29 |         self.deterministic = deterministic
 30 |         self.std = torch.exp(0.5 * self.logvar)
 31 |         self.var = torch.exp(self.logvar)
 32 |         if self.deterministic:
 33 |             self.var = self.std = torch.zeros_like(self.mean).to(
 34 |                 device=self.parameters.device
 35 |             )
 36 | 
 37 |     def sample(self):
 38 |         x = self.mean + self.std * torch.randn(self.mean.shape).to(
 39 |             device=self.parameters.device
 40 |         )
 41 |         return x
 42 | 
 43 |     def kl(self, other=None):
 44 |         if self.deterministic:
 45 |             return torch.Tensor([0.0])
 46 |         else:
 47 |             if other is None:
 48 |                 return 0.5 * torch.sum(
 49 |                     torch.pow(self.mean, 2) + self.var - 1.0 - self.logvar,
 50 |                     dim=[1, 2, 3],
 51 |                 )
 52 |             else:
 53 |                 return 0.5 * torch.sum(
 54 |                     torch.pow(self.mean - other.mean, 2) / other.var
 55 |                     + self.var / other.var
 56 |                     - 1.0
 57 |                     - self.logvar
 58 |                     + other.logvar,
 59 |                     dim=[1, 2, 3],
 60 |                 )
 61 | 
 62 |     def nll(self, sample, dims=[1, 2, 3]):
 63 |         if self.deterministic:
 64 |             return torch.Tensor([0.0])
 65 |         logtwopi = np.log(2.0 * np.pi)
 66 |         return 0.5 * torch.sum(
 67 |             logtwopi + self.logvar + torch.pow(sample - self.mean, 2) / self.var,
 68 |             dim=dims,
 69 |         )
 70 | 
 71 |     def mode(self):
 72 |         return self.mean
 73 | 
 74 | 
 75 | def normal_kl(mean1, logvar1, mean2, logvar2):
 76 |     """
 77 |     source: https://github.com/openai/guided-diffusion/blob/27c20a8fab9cb472df5d6bdd6c8d11c8f430b924/guided_diffusion/losses.py#L12
 78 |     Compute the KL divergence between two gaussians.
 79 |     Shapes are automatically broadcasted, so batches can be compared to
 80 |     scalars, among other use cases.
 81 |     """
 82 |     tensor = None
 83 |     for obj in (mean1, logvar1, mean2, logvar2):
 84 |         if isinstance(obj, torch.Tensor):
 85 |             tensor = obj
 86 |             break
 87 |     assert tensor is not None, "at least one argument must be a Tensor"
 88 | 
 89 |     # Force variances to be Tensors. Broadcasting helps convert scalars to
 90 |     # Tensors, but it does not work for torch.exp().
 91 |     logvar1, logvar2 = [
 92 |         x if isinstance(x, torch.Tensor) else torch.tensor(x).to(tensor)
 93 |         for x in (logvar1, logvar2)
 94 |     ]
 95 | 
 96 |     return 0.5 * (
 97 |         -1.0
 98 |         + logvar2
 99 |         - logvar1
100 |         + torch.exp(logvar1 - logvar2)
101 |         + ((mean1 - mean2) ** 2) * torch.exp(-logvar2)
102 |     )
103 | 


--------------------------------------------------------------------------------
/extern/ldm_zero123/modules/ema.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | 
 4 | 
 5 | class LitEma(nn.Module):
 6 |     def __init__(self, model, decay=0.9999, use_num_upates=True):
 7 |         super().__init__()
 8 |         if decay < 0.0 or decay > 1.0:
 9 |             raise ValueError("Decay must be between 0 and 1")
10 | 
11 |         self.m_name2s_name = {}
12 |         self.register_buffer("decay", torch.tensor(decay, dtype=torch.float32))
13 |         self.register_buffer(
14 |             "num_updates",
15 |             torch.tensor(0, dtype=torch.int)
16 |             if use_num_upates
17 |             else torch.tensor(-1, dtype=torch.int),
18 |         )
19 | 
20 |         for name, p in model.named_parameters():
21 |             if p.requires_grad:
22 |                 # remove as '.'-character is not allowed in buffers
23 |                 s_name = name.replace(".", "")
24 |                 self.m_name2s_name.update({name: s_name})
25 |                 self.register_buffer(s_name, p.clone().detach().data)
26 | 
27 |         self.collected_params = []
28 | 
29 |     def forward(self, model):
30 |         decay = self.decay
31 | 
32 |         if self.num_updates >= 0:
33 |             self.num_updates += 1
34 |             decay = min(self.decay, (1 + self.num_updates) / (10 + self.num_updates))
35 | 
36 |         one_minus_decay = 1.0 - decay
37 | 
38 |         with torch.no_grad():
39 |             m_param = dict(model.named_parameters())
40 |             shadow_params = dict(self.named_buffers())
41 | 
42 |             for key in m_param:
43 |                 if m_param[key].requires_grad:
44 |                     sname = self.m_name2s_name[key]
45 |                     shadow_params[sname] = shadow_params[sname].type_as(m_param[key])
46 |                     shadow_params[sname].sub_(
47 |                         one_minus_decay * (shadow_params[sname] - m_param[key])
48 |                     )
49 |                 else:
50 |                     assert not key in self.m_name2s_name
51 | 
52 |     def copy_to(self, model):
53 |         m_param = dict(model.named_parameters())
54 |         shadow_params = dict(self.named_buffers())
55 |         for key in m_param:
56 |             if m_param[key].requires_grad:
57 |                 m_param[key].data.copy_(shadow_params[self.m_name2s_name[key]].data)
58 |             else:
59 |                 assert not key in self.m_name2s_name
60 | 
61 |     def store(self, parameters):
62 |         """
63 |         Save the current parameters for restoring later.
64 |         Args:
65 |           parameters: Iterable of `torch.nn.Parameter`; the parameters to be
66 |             temporarily stored.
67 |         """
68 |         self.collected_params = [param.clone() for param in parameters]
69 | 
70 |     def restore(self, parameters):
71 |         """
72 |         Restore the parameters stored with the `store` method.
73 |         Useful to validate the model with EMA parameters without affecting the
74 |         original optimization process. Store the parameters before the
75 |         `copy_to` method. After validation (or model saving), use this to
76 |         restore the former parameters.
77 |         Args:
78 |           parameters: Iterable of `torch.nn.Parameter`; the parameters to be
79 |             updated with the stored parameters.
80 |         """
81 |         for c_param, param in zip(self.collected_params, parameters):
82 |             param.data.copy_(c_param.data)
83 | 


--------------------------------------------------------------------------------
/extern/ldm_zero123/modules/encoders/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/extern/ldm_zero123/modules/encoders/__init__.py


--------------------------------------------------------------------------------
/extern/ldm_zero123/modules/image_degradation/__init__.py:
--------------------------------------------------------------------------------
1 | from extern.ldm_zero123.modules.image_degradation.bsrgan import (
2 |     degradation_bsrgan_variant as degradation_fn_bsr,
3 | )
4 | from extern.ldm_zero123.modules.image_degradation.bsrgan_light import (
5 |     degradation_bsrgan_variant as degradation_fn_bsr_light,
6 | )
7 | 


--------------------------------------------------------------------------------
/extern/ldm_zero123/modules/image_degradation/utils/test.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/extern/ldm_zero123/modules/image_degradation/utils/test.png


--------------------------------------------------------------------------------
/extern/ldm_zero123/modules/losses/__init__.py:
--------------------------------------------------------------------------------
1 | from extern.ldm_zero123.modules.losses.contperceptual import LPIPSWithDiscriminator
2 | 


--------------------------------------------------------------------------------
/extern/ldm_zero123/thirdp/psp/id_loss.py:
--------------------------------------------------------------------------------
 1 | # https://github.com/eladrich/pixel2style2pixel
 2 | import torch
 3 | from torch import nn
 4 | 
 5 | from extern.ldm_zero123.thirdp.psp.model_irse import Backbone
 6 | 
 7 | 
 8 | class IDFeatures(nn.Module):
 9 |     def __init__(self, model_path):
10 |         super(IDFeatures, self).__init__()
11 |         print("Loading ResNet ArcFace")
12 |         self.facenet = Backbone(
13 |             input_size=112, num_layers=50, drop_ratio=0.6, mode="ir_se"
14 |         )
15 |         self.facenet.load_state_dict(torch.load(model_path, map_location="cpu"))
16 |         self.face_pool = torch.nn.AdaptiveAvgPool2d((112, 112))
17 |         self.facenet.eval()
18 | 
19 |     def forward(self, x, crop=False):
20 |         # Not sure of the image range here
21 |         if crop:
22 |             x = torch.nn.functional.interpolate(x, (256, 256), mode="area")
23 |             x = x[:, :, 35:223, 32:220]
24 |         x = self.face_pool(x)
25 |         x_feats = self.facenet(x)
26 |         return x_feats
27 | 


--------------------------------------------------------------------------------
/load/images/anya_front.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/anya_front.png


--------------------------------------------------------------------------------
/load/images/anya_front_depth.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/anya_front_depth.png


--------------------------------------------------------------------------------
/load/images/anya_front_normal.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/anya_front_normal.png


--------------------------------------------------------------------------------
/load/images/anya_front_rgba.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/anya_front_rgba.png


--------------------------------------------------------------------------------
/load/images/baby_phoenix_on_ice.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/baby_phoenix_on_ice.png


--------------------------------------------------------------------------------
/load/images/baby_phoenix_on_ice_depth.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/baby_phoenix_on_ice_depth.png


--------------------------------------------------------------------------------
/load/images/baby_phoenix_on_ice_normal.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/baby_phoenix_on_ice_normal.png


--------------------------------------------------------------------------------
/load/images/baby_phoenix_on_ice_rgba.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/baby_phoenix_on_ice_rgba.png


--------------------------------------------------------------------------------
/load/images/beach_house_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/beach_house_1.png


--------------------------------------------------------------------------------
/load/images/beach_house_1_depth.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/beach_house_1_depth.png


--------------------------------------------------------------------------------
/load/images/beach_house_1_normal.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/beach_house_1_normal.png


--------------------------------------------------------------------------------
/load/images/beach_house_1_rgba.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/beach_house_1_rgba.png


--------------------------------------------------------------------------------
/load/images/beach_house_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/beach_house_2.png


--------------------------------------------------------------------------------
/load/images/beach_house_2_depth.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/beach_house_2_depth.png


--------------------------------------------------------------------------------
/load/images/beach_house_2_normal.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/beach_house_2_normal.png


--------------------------------------------------------------------------------
/load/images/beach_house_2_rgba.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/beach_house_2_rgba.png


--------------------------------------------------------------------------------
/load/images/bollywood_actress.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/bollywood_actress.png


--------------------------------------------------------------------------------
/load/images/bollywood_actress_depth.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/bollywood_actress_depth.png


--------------------------------------------------------------------------------
/load/images/bollywood_actress_normal.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/bollywood_actress_normal.png


--------------------------------------------------------------------------------
/load/images/bollywood_actress_rgba.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/bollywood_actress_rgba.png


--------------------------------------------------------------------------------
/load/images/cactus.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/cactus.png


--------------------------------------------------------------------------------
/load/images/cactus_depth.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/cactus_depth.png


--------------------------------------------------------------------------------
/load/images/cactus_normal.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/cactus_normal.png


--------------------------------------------------------------------------------
/load/images/cactus_rgba.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/cactus_rgba.png


--------------------------------------------------------------------------------
/load/images/catstatue.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/catstatue.png


--------------------------------------------------------------------------------
/load/images/catstatue_depth.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/catstatue_depth.png


--------------------------------------------------------------------------------
/load/images/catstatue_normal.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/catstatue_normal.png


--------------------------------------------------------------------------------
/load/images/catstatue_rgba.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/catstatue_rgba.png


--------------------------------------------------------------------------------
/load/images/church_ruins.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/church_ruins.png


--------------------------------------------------------------------------------
/load/images/church_ruins_depth.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/church_ruins_depth.png


--------------------------------------------------------------------------------
/load/images/church_ruins_normal.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/church_ruins_normal.png


--------------------------------------------------------------------------------
/load/images/church_ruins_rgba.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/church_ruins_rgba.png


--------------------------------------------------------------------------------
/load/images/dog1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/dog1.png


--------------------------------------------------------------------------------
/load/images/dragon2_rgba.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/dragon2_rgba.png


--------------------------------------------------------------------------------
/load/images/firekeeper.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/firekeeper.jpg


--------------------------------------------------------------------------------
/load/images/firekeeper_depth.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/firekeeper_depth.png


--------------------------------------------------------------------------------
/load/images/firekeeper_normal.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/firekeeper_normal.png


--------------------------------------------------------------------------------
/load/images/firekeeper_rgba.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/firekeeper_rgba.png


--------------------------------------------------------------------------------
/load/images/futuristic_car.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/futuristic_car.png


--------------------------------------------------------------------------------
/load/images/futuristic_car_depth.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/futuristic_car_depth.png


--------------------------------------------------------------------------------
/load/images/futuristic_car_normal.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/futuristic_car_normal.png


--------------------------------------------------------------------------------
/load/images/futuristic_car_rgba.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/futuristic_car_rgba.png


--------------------------------------------------------------------------------
/load/images/grootplant_rgba.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/grootplant_rgba.png


--------------------------------------------------------------------------------
/load/images/hamburger.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/hamburger.png


--------------------------------------------------------------------------------
/load/images/hamburger_depth.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/hamburger_depth.png


--------------------------------------------------------------------------------
/load/images/hamburger_rgba.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/hamburger_rgba.png


--------------------------------------------------------------------------------
/load/images/mona_lisa.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/mona_lisa.png


--------------------------------------------------------------------------------
/load/images/mona_lisa_depth.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/mona_lisa_depth.png


--------------------------------------------------------------------------------
/load/images/mona_lisa_normal.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/mona_lisa_normal.png


--------------------------------------------------------------------------------
/load/images/mona_lisa_rgba.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/mona_lisa_rgba.png


--------------------------------------------------------------------------------
/load/images/robot_rgba.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/robot_rgba.png


--------------------------------------------------------------------------------
/load/images/teddy.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/teddy.png


--------------------------------------------------------------------------------
/load/images/teddy_depth.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/teddy_depth.png


--------------------------------------------------------------------------------
/load/images/teddy_normal.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/teddy_normal.png


--------------------------------------------------------------------------------
/load/images/teddy_rgba.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/teddy_rgba.png


--------------------------------------------------------------------------------
/load/images/thorhammer_rgba.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/thorhammer_rgba.png


--------------------------------------------------------------------------------
/load/lights/LICENSE.txt:
--------------------------------------------------------------------------------
1 | The mud_road_puresky.hdr HDR probe is from https://polyhaven.com/a/mud_road_puresky
2 | CC0 License.
3 | 


--------------------------------------------------------------------------------
/load/lights/bsdf_256_256.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/lights/bsdf_256_256.bin


--------------------------------------------------------------------------------
/load/lights/mud_road_puresky_1k.hdr:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/lights/mud_road_puresky_1k.hdr


--------------------------------------------------------------------------------
/load/shapes/README.md:
--------------------------------------------------------------------------------
 1 | # Shape Credits
 2 | 
 3 | - `animal.obj` - Ido Richardson
 4 | - `hand_prismatic.obj` - Ido Richardson
 5 | - `potion.obj` - Ido Richardson
 6 | - `blub.obj` - [Keenan's 3D Model Repository](https://www.cs.cmu.edu/~kmcrane/Projects/ModelRepository/)
 7 | - `nascar.obj` - [Princeton ModelNet](https://modelnet.cs.princeton.edu/)
 8 | - `cabin.obj` - [Princeton ModelNet](https://modelnet.cs.princeton.edu/)
 9 | - `teddy.obj` - [Gal Metzer](https://galmetzer.github.io/)
10 | - `human.obj` - [TurboSquid](https://www.turbosquid.com/3d-models/3d-model-character-base/524860)
11 | 


--------------------------------------------------------------------------------
/load/tets/128_tets.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/tets/128_tets.npz


--------------------------------------------------------------------------------
/load/tets/32_tets.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/tets/32_tets.npz


--------------------------------------------------------------------------------
/load/tets/64_tets.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/tets/64_tets.npz


--------------------------------------------------------------------------------
/load/tets/generate_tets.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | #
 3 | # NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
 4 | # property and proprietary rights in and to this material, related
 5 | # documentation and any modifications thereto. Any use, reproduction,
 6 | # disclosure or distribution of this material and related documentation
 7 | # without an express license agreement from NVIDIA CORPORATION or
 8 | # its affiliates is strictly prohibited.
 9 | 
10 | import os
11 | 
12 | import numpy as np
13 | 
14 | """
15 | This code segment shows how to use Quartet: https://github.com/crawforddoran/quartet,
16 | to generate a tet grid
17 | 1) Download, compile and run Quartet as described in the link above. Example usage `quartet meshes/cube.obj 0.5 cube_5.tet`
18 | 2) Run the function below to generate a file `cube_32_tet.tet`
19 | """
20 | 
21 | 
22 | def generate_tetrahedron_grid_file(res=32, root=".."):
23 |     frac = 1.0 / res
24 |     command = f"cd {root}; ./quartet meshes/cube.obj {frac} meshes/cube_{res}_tet.tet -s meshes/cube_boundary_{res}.obj"
25 |     os.system(command)
26 | 
27 | 
28 | """
29 | This code segment shows how to convert from a quartet .tet file to compressed npz file
30 | """
31 | 
32 | 
33 | def convert_from_quartet_to_npz(quartetfile="cube_32_tet.tet", npzfile="32_tets"):
34 |     file1 = open(quartetfile, "r")
35 |     header = file1.readline()
36 |     numvertices = int(header.split(" ")[1])
37 |     numtets = int(header.split(" ")[2])
38 |     print(numvertices, numtets)
39 | 
40 |     # load vertices
41 |     vertices = np.loadtxt(quartetfile, skiprows=1, max_rows=numvertices)
42 |     print(vertices.shape)
43 | 
44 |     # load indices
45 |     indices = np.loadtxt(
46 |         quartetfile, dtype=int, skiprows=1 + numvertices, max_rows=numtets
47 |     )
48 |     print(indices.shape)
49 | 
50 |     np.savez_compressed(npzfile, vertices=vertices, indices=indices)
51 | 
52 | 
53 | root = "/home/gyc/quartet"
54 | for res in [300, 350, 400]:
55 |     generate_tetrahedron_grid_file(res, root)
56 |     convert_from_quartet_to_npz(
57 |         os.path.join(root, f"meshes/cube_{res}_tet.tet"), npzfile=f"{res}_tets"
58 |     )
59 | 


--------------------------------------------------------------------------------
/load/zero123/download.sh:
--------------------------------------------------------------------------------
1 | wget https://huggingface.co/cvlab/zero123-weights/resolve/main/105000.ckpt
2 | 


--------------------------------------------------------------------------------
/load/zero123/sd-objaverse-finetune-c_concat-256.yaml:
--------------------------------------------------------------------------------
  1 | model:
  2 |   base_learning_rate: 1.0e-04
  3 |   target: extern.ldm_zero123.models.diffusion.ddpm.LatentDiffusion
  4 |   params:
  5 |     linear_start: 0.00085
  6 |     linear_end: 0.0120
  7 |     num_timesteps_cond: 1
  8 |     log_every_t: 200
  9 |     timesteps: 1000
 10 |     first_stage_key: "image_target"
 11 |     cond_stage_key: "image_cond"
 12 |     image_size: 32
 13 |     channels: 4
 14 |     cond_stage_trainable: false   # Note: different from the one we trained before
 15 |     conditioning_key: hybrid
 16 |     monitor: val/loss_simple_ema
 17 |     scale_factor: 0.18215
 18 | 
 19 |     scheduler_config: # 10000 warmup steps
 20 |       target: extern.ldm_zero123.lr_scheduler.LambdaLinearScheduler
 21 |       params:
 22 |         warm_up_steps: [ 100 ]
 23 |         cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases
 24 |         f_start: [ 1.e-6 ]
 25 |         f_max: [ 1. ]
 26 |         f_min: [ 1. ]
 27 | 
 28 |     unet_config:
 29 |       target: extern.ldm_zero123.modules.diffusionmodules.openaimodel.UNetModel
 30 |       params:
 31 |         image_size: 32 # unused
 32 |         in_channels: 8
 33 |         out_channels: 4
 34 |         model_channels: 320
 35 |         attention_resolutions: [ 4, 2, 1 ]
 36 |         num_res_blocks: 2
 37 |         channel_mult: [ 1, 2, 4, 4 ]
 38 |         num_heads: 8
 39 |         use_spatial_transformer: True
 40 |         transformer_depth: 1
 41 |         context_dim: 768
 42 |         use_checkpoint: True
 43 |         legacy: False
 44 | 
 45 |     first_stage_config:
 46 |       target: extern.ldm_zero123.models.autoencoder.AutoencoderKL
 47 |       params:
 48 |         embed_dim: 4
 49 |         monitor: val/rec_loss
 50 |         ddconfig:
 51 |           double_z: true
 52 |           z_channels: 4
 53 |           resolution: 256
 54 |           in_channels: 3
 55 |           out_ch: 3
 56 |           ch: 128
 57 |           ch_mult:
 58 |           - 1
 59 |           - 2
 60 |           - 4
 61 |           - 4
 62 |           num_res_blocks: 2
 63 |           attn_resolutions: []
 64 |           dropout: 0.0
 65 |         lossconfig:
 66 |           target: torch.nn.Identity
 67 | 
 68 |     cond_stage_config:
 69 |       target: extern.ldm_zero123.modules.encoders.modules.FrozenCLIPImageEmbedder
 70 | 
 71 | 
 72 | # data:
 73 | #   target: extern.ldm_zero123.data.simple.ObjaverseDataModuleFromConfig
 74 | #   params:
 75 | #     root_dir: 'views_whole_sphere'
 76 | #     batch_size: 192
 77 | #     num_workers: 16
 78 | #     total_view: 4
 79 | #     train:
 80 | #       validation: False
 81 | #       image_transforms:
 82 | #         size: 256
 83 | 
 84 | #     validation:
 85 | #       validation: True
 86 | #       image_transforms:
 87 | #         size: 256
 88 | 
 89 | 
 90 | # lightning:
 91 | #   find_unused_parameters: false
 92 | #   metrics_over_trainsteps_checkpoint: True
 93 | #   modelcheckpoint:
 94 | #     params:
 95 | #       every_n_train_steps: 5000
 96 | #   callbacks:
 97 | #     image_logger:
 98 | #       target: main.ImageLogger
 99 | #       params:
100 | #         batch_frequency: 500
101 | #         max_images: 32
102 | #         increase_log_steps: False
103 | #         log_first_step: True
104 | #         log_images_kwargs:
105 | #           use_ema_scope: False
106 | #           inpaint: False
107 | #           plot_progressive_rows: False
108 | #           plot_diffusion_rows: False
109 | #           N: 32
110 | #           unconditional_scale: 3.0
111 | #           unconditional_label: [""]
112 | 
113 | #   trainer:
114 | #     benchmark: True
115 | #     val_check_interval: 5000000 # really sorry
116 | #     num_sanity_val_steps: 0
117 | #     accumulate_grad_batches: 1
118 | 


--------------------------------------------------------------------------------
/requirements-dev.txt:
--------------------------------------------------------------------------------
1 | black
2 | mypy
3 | pylint
4 | pre-commit
5 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | lightning==2.0.0
 2 | omegaconf==2.3.0
 3 | jaxtyping
 4 | typeguard
 5 | git+https://github.com/KAIR-BAIR/nerfacc.git@v0.5.2
 6 | git+https://github.com/NVlabs/tiny-cuda-nn/#subdirectory=bindings/torch
 7 | diffusers
 8 | transformers
 9 | accelerate
10 | opencv-python
11 | tensorboard
12 | matplotlib
13 | imageio>=2.28.0
14 | imageio[ffmpeg]
15 | git+https://github.com/NVlabs/nvdiffrast.git
16 | libigl
17 | xatlas
18 | trimesh[easy]
19 | networkx
20 | pysdf
21 | PyMCubes
22 | wandb
23 | gradio
24 | git+https://github.com/ashawkey/envlight.git
25 | torchmetrics
26 | 
27 | # deepfloyd
28 | xformers
29 | bitsandbytes
30 | sentencepiece
31 | safetensors
32 | huggingface_hub
33 | 
34 | # for zero123
35 | einops
36 | kornia
37 | taming-transformers-rom1504
38 | git+https://github.com/openai/CLIP.git
39 | 
40 | #controlnet
41 | controlnet_aux
42 | 
43 | # imagedream
44 | open-clip-torch==2.7.0


--------------------------------------------------------------------------------
/threestudio/__init__.py:
--------------------------------------------------------------------------------
 1 | __modules__ = {}
 2 | 
 3 | 
 4 | def register(name):
 5 |     def decorator(cls):
 6 |         __modules__[name] = cls
 7 |         return cls
 8 | 
 9 |     return decorator
10 | 
11 | 
12 | def find(name):
13 |     return __modules__[name]
14 | 
15 | 
16 | ###  grammar sugar for logging utilities  ###
17 | import logging
18 | 
19 | logger = logging.getLogger("pytorch_lightning")
20 | 
21 | from pytorch_lightning.utilities.rank_zero import (
22 |     rank_zero_debug,
23 |     rank_zero_info,
24 |     rank_zero_only,
25 | )
26 | 
27 | debug = rank_zero_debug
28 | info = rank_zero_info
29 | 
30 | 
31 | @rank_zero_only
32 | def warn(*args, **kwargs):
33 |     logger.warn(*args, **kwargs)
34 | 
35 | 
36 | from . import data, models, systems
37 | 


--------------------------------------------------------------------------------
/threestudio/data/__init__.py:
--------------------------------------------------------------------------------
1 | from . import co3d, image, multiview, uncond, random_multiview
2 | 


--------------------------------------------------------------------------------
/threestudio/models/__init__.py:
--------------------------------------------------------------------------------
 1 | from . import (
 2 |     background,
 3 |     exporters,
 4 |     geometry,
 5 |     guidance,
 6 |     materials,
 7 |     prompt_processors,
 8 |     renderers,
 9 | )
10 | 


--------------------------------------------------------------------------------
/threestudio/models/background/__init__.py:
--------------------------------------------------------------------------------
1 | from . import (
2 |     base,
3 |     neural_environment_map_background,
4 |     solid_color_background,
5 |     textured_background,
6 | )
7 | 


--------------------------------------------------------------------------------
/threestudio/models/background/base.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | from dataclasses import dataclass, field
 3 | 
 4 | import torch
 5 | import torch.nn as nn
 6 | import torch.nn.functional as F
 7 | 
 8 | import threestudio
 9 | from threestudio.utils.base import BaseModule
10 | from threestudio.utils.typing import *
11 | 
12 | 
13 | class BaseBackground(BaseModule):
14 |     @dataclass
15 |     class Config(BaseModule.Config):
16 |         pass
17 | 
18 |     cfg: Config
19 | 
20 |     def configure(self):
21 |         pass
22 | 
23 |     def forward(self, dirs: Float[Tensor, "B H W 3"]) -> Float[Tensor, "B H W Nc"]:
24 |         raise NotImplementedError
25 | 


--------------------------------------------------------------------------------
/threestudio/models/background/neural_environment_map_background.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | from dataclasses import dataclass, field
 3 | 
 4 | import torch
 5 | import torch.nn as nn
 6 | import torch.nn.functional as F
 7 | 
 8 | import threestudio
 9 | from threestudio.models.background.base import BaseBackground
10 | from threestudio.models.networks import get_encoding, get_mlp
11 | from threestudio.utils.ops import get_activation
12 | from threestudio.utils.typing import *
13 | 
14 | 
15 | @threestudio.register("neural-environment-map-background")
16 | class NeuralEnvironmentMapBackground(BaseBackground):
17 |     @dataclass
18 |     class Config(BaseBackground.Config):
19 |         n_output_dims: int = 3
20 |         color_activation: str = "sigmoid"
21 |         dir_encoding_config: dict = field(
22 |             default_factory=lambda: {"otype": "SphericalHarmonics", "degree": 3}
23 |         )
24 |         mlp_network_config: dict = field(
25 |             default_factory=lambda: {
26 |                 "otype": "VanillaMLP",
27 |                 "activation": "ReLU",
28 |                 "n_neurons": 16,
29 |                 "n_hidden_layers": 2,
30 |             }
31 |         )
32 |         random_aug: bool = False
33 |         random_aug_prob: float = 0.5
34 |         share_aug_bg: bool = False
35 |         eval_color: Optional[Tuple[float, float, float]] = None
36 | 
37 |     cfg: Config
38 | 
39 |     def configure(self) -> None:
40 |         self.encoding = get_encoding(3, self.cfg.dir_encoding_config)
41 |         self.network = get_mlp(
42 |             self.encoding.n_output_dims,
43 |             self.cfg.n_output_dims,
44 |             self.cfg.mlp_network_config,
45 |         )
46 | 
47 |     def forward(self, dirs: Float[Tensor, "B H W 3"]) -> Float[Tensor, "B H W Nc"]:
48 |         if not self.training and self.cfg.eval_color is not None:
49 |             return torch.ones(*dirs.shape[:-1], self.cfg.n_output_dims).to(
50 |                 dirs
51 |             ) * torch.as_tensor(self.cfg.eval_color).to(dirs)
52 |         # viewdirs must be normalized before passing to this function
53 |         dirs = (dirs + 1.0) / 2.0  # (-1, 1) => (0, 1)
54 |         dirs_embd = self.encoding(dirs.view(-1, 3))
55 |         color = self.network(dirs_embd).view(*dirs.shape[:-1], self.cfg.n_output_dims)
56 |         color = get_activation(self.cfg.color_activation)(color)
57 |         if (
58 |             self.training
59 |             and self.cfg.random_aug
60 |             and random.random() < self.cfg.random_aug_prob
61 |         ):
62 |             # use random background color with probability random_aug_prob
63 |             n_color = 1 if self.cfg.share_aug_bg else dirs.shape[0]
64 |             color = color * 0 + (  # prevent checking for unused parameters in DDP
65 |                 torch.rand(n_color, 1, 1, self.cfg.n_output_dims)
66 |                 .to(dirs)
67 |                 .expand(*dirs.shape[:-1], -1)
68 |             )
69 |         return color
70 | 


--------------------------------------------------------------------------------
/threestudio/models/background/solid_color_background.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | from dataclasses import dataclass, field
 3 | 
 4 | import torch
 5 | import torch.nn as nn
 6 | import torch.nn.functional as F
 7 | 
 8 | import threestudio
 9 | from threestudio.models.background.base import BaseBackground
10 | from threestudio.utils.typing import *
11 | 
12 | 
13 | @threestudio.register("solid-color-background")
14 | class SolidColorBackground(BaseBackground):
15 |     @dataclass
16 |     class Config(BaseBackground.Config):
17 |         n_output_dims: int = 3
18 |         color: Tuple = (1.0, 1.0, 1.0)
19 |         learned: bool = False
20 |         random_aug: bool = False
21 |         random_aug_prob: float = 0.5
22 | 
23 |     cfg: Config
24 | 
25 |     def configure(self) -> None:
26 |         self.env_color: Float[Tensor, "Nc"]
27 |         if self.cfg.learned:
28 |             self.env_color = nn.Parameter(
29 |                 torch.as_tensor(self.cfg.color, dtype=torch.float32)
30 |             )
31 |         else:
32 |             self.register_buffer(
33 |                 "env_color", torch.as_tensor(self.cfg.color, dtype=torch.float32)
34 |             )
35 | 
36 |     def forward(self, dirs: Float[Tensor, "B H W 3"]) -> Float[Tensor, "B H W Nc"]:
37 |         color = (
38 |             torch.ones(*dirs.shape[:-1], self.cfg.n_output_dims).to(dirs)
39 |             * self.env_color
40 |         )
41 |         if (
42 |             self.training
43 |             and self.cfg.random_aug
44 |             and random.random() < self.cfg.random_aug_prob
45 |         ):
46 |             # use random background color with probability random_aug_prob
47 |             color = color * 0 + (  # prevent checking for unused parameters in DDP
48 |                 torch.rand(dirs.shape[0], 1, 1, self.cfg.n_output_dims)
49 |                 .to(dirs)
50 |                 .expand(*dirs.shape[:-1], -1)
51 |             )
52 |         return color
53 | 


--------------------------------------------------------------------------------
/threestudio/models/background/textured_background.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass, field
 2 | 
 3 | import torch
 4 | import torch.nn as nn
 5 | import torch.nn.functional as F
 6 | 
 7 | import threestudio
 8 | from threestudio.models.background.base import BaseBackground
 9 | from threestudio.utils.ops import get_activation
10 | from threestudio.utils.typing import *
11 | 
12 | 
13 | @threestudio.register("textured-background")
14 | class TexturedBackground(BaseBackground):
15 |     @dataclass
16 |     class Config(BaseBackground.Config):
17 |         n_output_dims: int = 3
18 |         height: int = 64
19 |         width: int = 64
20 |         color_activation: str = "sigmoid"
21 | 
22 |     cfg: Config
23 | 
24 |     def configure(self) -> None:
25 |         self.texture = nn.Parameter(
26 |             torch.randn((1, self.cfg.n_output_dims, self.cfg.height, self.cfg.width))
27 |         )
28 | 
29 |     def spherical_xyz_to_uv(self, dirs: Float[Tensor, "*B 3"]) -> Float[Tensor, "*B 2"]:
30 |         x, y, z = dirs[..., 0], dirs[..., 1], dirs[..., 2]
31 |         xy = (x**2 + y**2) ** 0.5
32 |         u = torch.atan2(xy, z) / torch.pi
33 |         v = torch.atan2(y, x) / (torch.pi * 2) + 0.5
34 |         uv = torch.stack([u, v], -1)
35 |         return uv
36 | 
37 |     def forward(self, dirs: Float[Tensor, "*B 3"]) -> Float[Tensor, "*B Nc"]:
38 |         dirs_shape = dirs.shape[:-1]
39 |         uv = self.spherical_xyz_to_uv(dirs.reshape(-1, dirs.shape[-1]))
40 |         uv = 2 * uv - 1  # rescale to [-1, 1] for grid_sample
41 |         uv = uv.reshape(1, -1, 1, 2)
42 |         color = (
43 |             F.grid_sample(
44 |                 self.texture,
45 |                 uv,
46 |                 mode="bilinear",
47 |                 padding_mode="reflection",
48 |                 align_corners=False,
49 |             )
50 |             .reshape(self.cfg.n_output_dims, -1)
51 |             .T.reshape(*dirs_shape, self.cfg.n_output_dims)
52 |         )
53 |         color = get_activation(self.cfg.color_activation)(color)
54 |         return color
55 | 


--------------------------------------------------------------------------------
/threestudio/models/exporters/__init__.py:
--------------------------------------------------------------------------------
1 | from . import base, mesh_exporter
2 | 


--------------------------------------------------------------------------------
/threestudio/models/exporters/base.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | 
 3 | import threestudio
 4 | from threestudio.models.background.base import BaseBackground
 5 | from threestudio.models.geometry.base import BaseImplicitGeometry
 6 | from threestudio.models.materials.base import BaseMaterial
 7 | from threestudio.utils.base import BaseObject
 8 | from threestudio.utils.typing import *
 9 | 
10 | 
11 | @dataclass
12 | class ExporterOutput:
13 |     save_name: str
14 |     save_type: str
15 |     params: Dict[str, Any]
16 | 
17 | 
18 | class Exporter(BaseObject):
19 |     @dataclass
20 |     class Config(BaseObject.Config):
21 |         save_video: bool = False
22 | 
23 |     cfg: Config
24 | 
25 |     def configure(
26 |         self,
27 |         geometry: BaseImplicitGeometry,
28 |         material: BaseMaterial,
29 |         background: BaseBackground,
30 |     ) -> None:
31 |         @dataclass
32 |         class SubModules:
33 |             geometry: BaseImplicitGeometry
34 |             material: BaseMaterial
35 |             background: BaseBackground
36 | 
37 |         self.sub_modules = SubModules(geometry, material, background)
38 | 
39 |     @property
40 |     def geometry(self) -> BaseImplicitGeometry:
41 |         return self.sub_modules.geometry
42 | 
43 |     @property
44 |     def material(self) -> BaseMaterial:
45 |         return self.sub_modules.material
46 | 
47 |     @property
48 |     def background(self) -> BaseBackground:
49 |         return self.sub_modules.background
50 | 
51 |     def __call__(self, *args, **kwargs) -> List[ExporterOutput]:
52 |         raise NotImplementedError
53 | 
54 | 
55 | @threestudio.register("dummy-exporter")
56 | class DummyExporter(Exporter):
57 |     def __call__(self, *args, **kwargs) -> List[ExporterOutput]:
58 |         # DummyExporter does not export anything
59 |         return []
60 | 


--------------------------------------------------------------------------------
/threestudio/models/geometry/__init__.py:
--------------------------------------------------------------------------------
1 | from . import base, implicit_sdf, implicit_volume, tetrahedra_sdf_grid, volume_grid
2 | 


--------------------------------------------------------------------------------
/threestudio/models/guidance/__init__.py:
--------------------------------------------------------------------------------
 1 | from . import (
 2 |     controlnet_guidance,
 3 |     deep_floyd_guidance,
 4 |     instructpix2pix_guidance,
 5 |     stable_diffusion_guidance,
 6 |     stable_diffusion_vsd_guidance,
 7 |     zero123_guidance,
 8 |     multiview_diffusion_guidance,
 9 | )
10 | 


--------------------------------------------------------------------------------
/threestudio/models/materials/__init__.py:
--------------------------------------------------------------------------------
 1 | from . import (
 2 |     base,
 3 |     diffuse_with_point_light_material,
 4 |     hybrid_rgb_latent_material,
 5 |     neural_radiance_material,
 6 |     no_material,
 7 |     pbr_material,
 8 |     sd_latent_adapter_material,
 9 | )
10 | 


--------------------------------------------------------------------------------
/threestudio/models/materials/base.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | from dataclasses import dataclass, field
 3 | 
 4 | import torch
 5 | import torch.nn as nn
 6 | import torch.nn.functional as F
 7 | 
 8 | import threestudio
 9 | from threestudio.utils.base import BaseModule
10 | from threestudio.utils.typing import *
11 | 
12 | 
13 | class BaseMaterial(BaseModule):
14 |     @dataclass
15 |     class Config(BaseModule.Config):
16 |         pass
17 | 
18 |     cfg: Config
19 |     requires_normal: bool = False
20 |     requires_tangent: bool = False
21 | 
22 |     def configure(self):
23 |         pass
24 | 
25 |     def forward(self, *args, **kwargs) -> Float[Tensor, "*B 3"]:
26 |         raise NotImplementedError
27 | 
28 |     def export(self, *args, **kwargs) -> Dict[str, Any]:
29 |         return {}
30 | 


--------------------------------------------------------------------------------
/threestudio/models/materials/hybrid_rgb_latent_material.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | from dataclasses import dataclass, field
 3 | 
 4 | import torch
 5 | import torch.nn as nn
 6 | import torch.nn.functional as F
 7 | 
 8 | import threestudio
 9 | from threestudio.models.materials.base import BaseMaterial
10 | from threestudio.models.networks import get_encoding, get_mlp
11 | from threestudio.utils.ops import dot, get_activation
12 | from threestudio.utils.typing import *
13 | 
14 | 
15 | @threestudio.register("hybrid-rgb-latent-material")
16 | class HybridRGBLatentMaterial(BaseMaterial):
17 |     @dataclass
18 |     class Config(BaseMaterial.Config):
19 |         n_output_dims: int = 3
20 |         color_activation: str = "sigmoid"
21 |         requires_normal: bool = True
22 | 
23 |     cfg: Config
24 | 
25 |     def configure(self) -> None:
26 |         self.requires_normal = self.cfg.requires_normal
27 | 
28 |     def forward(
29 |         self, features: Float[Tensor, "B ... Nf"], **kwargs
30 |     ) -> Float[Tensor, "B ... Nc"]:
31 |         assert (
32 |             features.shape[-1] == self.cfg.n_output_dims
33 |         ), f"Expected {self.cfg.n_output_dims} output dims, only got {features.shape[-1]} dims input."
34 |         color = features
35 |         color[..., :3] = get_activation(self.cfg.color_activation)(color[..., :3])
36 |         return color
37 | 


--------------------------------------------------------------------------------
/threestudio/models/materials/neural_radiance_material.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | from dataclasses import dataclass, field
 3 | 
 4 | import torch
 5 | import torch.nn as nn
 6 | import torch.nn.functional as F
 7 | 
 8 | import threestudio
 9 | from threestudio.models.materials.base import BaseMaterial
10 | from threestudio.models.networks import get_encoding, get_mlp
11 | from threestudio.utils.ops import dot, get_activation
12 | from threestudio.utils.typing import *
13 | 
14 | 
15 | @threestudio.register("neural-radiance-material")
16 | class NeuralRadianceMaterial(BaseMaterial):
17 |     @dataclass
18 |     class Config(BaseMaterial.Config):
19 |         input_feature_dims: int = 8
20 |         color_activation: str = "sigmoid"
21 |         dir_encoding_config: dict = field(
22 |             default_factory=lambda: {"otype": "SphericalHarmonics", "degree": 3}
23 |         )
24 |         mlp_network_config: dict = field(
25 |             default_factory=lambda: {
26 |                 "otype": "FullyFusedMLP",
27 |                 "activation": "ReLU",
28 |                 "n_neurons": 16,
29 |                 "n_hidden_layers": 2,
30 |             }
31 |         )
32 | 
33 |     cfg: Config
34 | 
35 |     def configure(self) -> None:
36 |         self.encoding = get_encoding(3, self.cfg.dir_encoding_config)
37 |         self.n_input_dims = self.cfg.input_feature_dims + self.encoding.n_output_dims  # type: ignore
38 |         self.network = get_mlp(self.n_input_dims, 3, self.cfg.mlp_network_config)
39 | 
40 |     def forward(
41 |         self,
42 |         features: Float[Tensor, "*B Nf"],
43 |         viewdirs: Float[Tensor, "*B 3"],
44 |         **kwargs,
45 |     ) -> Float[Tensor, "*B 3"]:
46 |         # viewdirs and normals must be normalized before passing to this function
47 |         viewdirs = (viewdirs + 1.0) / 2.0  # (-1, 1) => (0, 1)
48 |         viewdirs_embd = self.encoding(viewdirs.view(-1, 3))
49 |         network_inp = torch.cat(
50 |             [features.view(-1, features.shape[-1]), viewdirs_embd], dim=-1
51 |         )
52 |         color = self.network(network_inp).view(*features.shape[:-1], 3)
53 |         color = get_activation(self.cfg.color_activation)(color)
54 |         return color
55 | 


--------------------------------------------------------------------------------
/threestudio/models/materials/no_material.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | from dataclasses import dataclass, field
 3 | 
 4 | import torch
 5 | import torch.nn as nn
 6 | import torch.nn.functional as F
 7 | 
 8 | import threestudio
 9 | from threestudio.models.materials.base import BaseMaterial
10 | from threestudio.models.networks import get_encoding, get_mlp
11 | from threestudio.utils.ops import dot, get_activation
12 | from threestudio.utils.typing import *
13 | 
14 | 
15 | @threestudio.register("no-material")
16 | class NoMaterial(BaseMaterial):
17 |     @dataclass
18 |     class Config(BaseMaterial.Config):
19 |         n_output_dims: int = 3
20 |         color_activation: str = "sigmoid"
21 |         input_feature_dims: Optional[int] = None
22 |         mlp_network_config: Optional[dict] = None
23 | 
24 |     cfg: Config
25 | 
26 |     def configure(self) -> None:
27 |         self.use_network = False
28 |         if (
29 |             self.cfg.input_feature_dims is not None
30 |             and self.cfg.mlp_network_config is not None
31 |         ):
32 |             self.network = get_mlp(
33 |                 self.cfg.input_feature_dims,
34 |                 self.cfg.n_output_dims,
35 |                 self.cfg.mlp_network_config,
36 |             )
37 |             self.use_network = True
38 | 
39 |     def forward(
40 |         self, features: Float[Tensor, "B ... Nf"], **kwargs
41 |     ) -> Float[Tensor, "B ... Nc"]:
42 |         if not self.use_network:
43 |             assert (
44 |                 features.shape[-1] == self.cfg.n_output_dims
45 |             ), f"Expected {self.cfg.n_output_dims} output dims, only got {features.shape[-1]} dims input."
46 |             color = get_activation(self.cfg.color_activation)(features)
47 |         else:
48 |             color = self.network(features.view(-1, features.shape[-1])).view(
49 |                 *features.shape[:-1], self.cfg.n_output_dims
50 |             )
51 |             color = get_activation(self.cfg.color_activation)(color)
52 |         return color
53 | 
54 |     def export(self, features: Float[Tensor, "*N Nf"], **kwargs) -> Dict[str, Any]:
55 |         color = self(features, **kwargs).clamp(0, 1)
56 |         assert color.shape[-1] >= 3, "Output color must have at least 3 channels"
57 |         if color.shape[-1] > 3:
58 |             threestudio.warn(
59 |                 "Output color has >3 channels, treating the first 3 as RGB"
60 |             )
61 |         return {"albedo": color[..., :3]}
62 | 


--------------------------------------------------------------------------------
/threestudio/models/materials/sd_latent_adapter_material.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | from dataclasses import dataclass, field
 3 | 
 4 | import torch
 5 | import torch.nn as nn
 6 | import torch.nn.functional as F
 7 | 
 8 | import threestudio
 9 | from threestudio.models.materials.base import BaseMaterial
10 | from threestudio.utils.typing import *
11 | 
12 | 
13 | @threestudio.register("sd-latent-adapter-material")
14 | class StableDiffusionLatentAdapterMaterial(BaseMaterial):
15 |     @dataclass
16 |     class Config(BaseMaterial.Config):
17 |         pass
18 | 
19 |     cfg: Config
20 | 
21 |     def configure(self) -> None:
22 |         adapter = nn.Parameter(
23 |             torch.as_tensor(
24 |                 [
25 |                     #   R       G       B
26 |                     [0.298, 0.207, 0.208],  # L1
27 |                     [0.187, 0.286, 0.173],  # L2
28 |                     [-0.158, 0.189, 0.264],  # L3
29 |                     [-0.184, -0.271, -0.473],  # L4
30 |                 ]
31 |             )
32 |         )
33 |         self.register_parameter("adapter", adapter)
34 | 
35 |     def forward(
36 |         self, features: Float[Tensor, "B ... 4"], **kwargs
37 |     ) -> Float[Tensor, "B ... 3"]:
38 |         assert features.shape[-1] == 4
39 |         color = features @ self.adapter
40 |         color = (color + 1) / 2
41 |         color = color.clamp(0.0, 1.0)
42 |         return color
43 | 


--------------------------------------------------------------------------------
/threestudio/models/prompt_processors/__init__.py:
--------------------------------------------------------------------------------
1 | from . import (
2 |     base,
3 |     deepfloyd_prompt_processor,
4 |     dummy_prompt_processor,
5 |     stable_diffusion_prompt_processor,
6 | )
7 | 


--------------------------------------------------------------------------------
/threestudio/models/prompt_processors/deepfloyd_prompt_processor.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | from dataclasses import dataclass
 4 | 
 5 | import torch
 6 | import torch.nn as nn
 7 | from diffusers import IFPipeline
 8 | from transformers import T5EncoderModel, T5Tokenizer
 9 | 
10 | import threestudio
11 | from threestudio.models.prompt_processors.base import PromptProcessor, hash_prompt
12 | from threestudio.utils.misc import cleanup
13 | from threestudio.utils.typing import *
14 | 
15 | 
16 | @threestudio.register("deep-floyd-prompt-processor")
17 | class DeepFloydPromptProcessor(PromptProcessor):
18 |     @dataclass
19 |     class Config(PromptProcessor.Config):
20 |         pretrained_model_name_or_path: str = "DeepFloyd/IF-I-XL-v1.0"
21 | 
22 |     cfg: Config
23 | 
24 |     ### these functions are unused, kept for debugging ###
25 |     def configure_text_encoder(self) -> None:
26 |         os.environ["TOKENIZERS_PARALLELISM"] = "false"
27 |         self.text_encoder = T5EncoderModel.from_pretrained(
28 |             self.cfg.pretrained_model_name_or_path,
29 |             subfolder="text_encoder",
30 |             load_in_8bit=True,
31 |             variant="8bit",
32 |             device_map="auto",
33 |         )  # FIXME: behavior of auto device map in multi-GPU training
34 |         self.pipe = IFPipeline.from_pretrained(
35 |             self.cfg.pretrained_model_name_or_path,
36 |             text_encoder=self.text_encoder,  # pass the previously instantiated 8bit text encoder
37 |             unet=None,
38 |         )
39 | 
40 |     def destroy_text_encoder(self) -> None:
41 |         del self.text_encoder
42 |         del self.pipe
43 |         cleanup()
44 | 
45 |     def get_text_embeddings(
46 |         self, prompt: Union[str, List[str]], negative_prompt: Union[str, List[str]]
47 |     ) -> Tuple[Float[Tensor, "B 77 4096"], Float[Tensor, "B 77 4096"]]:
48 |         text_embeddings, uncond_text_embeddings = self.pipe.encode_prompt(
49 |             prompt=prompt, negative_prompt=negative_prompt, device=self.device
50 |         )
51 |         return text_embeddings, uncond_text_embeddings
52 | 
53 |     ###
54 | 
55 |     @staticmethod
56 |     def spawn_func(pretrained_model_name_or_path, prompts, cache_dir):
57 |         max_length = 77
58 |         tokenizer = T5Tokenizer.from_pretrained(
59 |             pretrained_model_name_or_path, subfolder="tokenizer"
60 |         )
61 |         text_encoder = T5EncoderModel.from_pretrained(
62 |             pretrained_model_name_or_path,
63 |             subfolder="text_encoder",
64 |             torch_dtype=torch.float16,  # suppress warning
65 |             load_in_8bit=True,
66 |             variant="8bit",
67 |             device_map="auto",
68 |         )
69 |         with torch.no_grad():
70 |             text_inputs = tokenizer(
71 |                 prompts,
72 |                 padding="max_length",
73 |                 max_length=max_length,
74 |                 truncation=True,
75 |                 add_special_tokens=True,
76 |                 return_tensors="pt",
77 |             )
78 |             text_input_ids = text_inputs.input_ids
79 |             attention_mask = text_inputs.attention_mask
80 |             text_embeddings = text_encoder(
81 |                 text_input_ids,
82 |                 attention_mask=attention_mask,
83 |             )
84 |             text_embeddings = text_embeddings[0]
85 | 
86 |         for prompt, embedding in zip(prompts, text_embeddings):
87 |             torch.save(
88 |                 embedding,
89 |                 os.path.join(
90 |                     cache_dir,
91 |                     f"{hash_prompt(pretrained_model_name_or_path, prompt)}.pt",
92 |                 ),
93 |             )
94 | 
95 |         del text_encoder
96 | 


--------------------------------------------------------------------------------
/threestudio/models/prompt_processors/dummy_prompt_processor.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | from dataclasses import dataclass
 4 | 
 5 | import threestudio
 6 | from threestudio.models.prompt_processors.base import PromptProcessor, hash_prompt
 7 | from threestudio.utils.misc import cleanup
 8 | from threestudio.utils.typing import *
 9 | 
10 | 
11 | @threestudio.register("dummy-prompt-processor")
12 | class DummyPromptProcessor(PromptProcessor):
13 |     @dataclass
14 |     class Config(PromptProcessor.Config):
15 |         pretrained_model_name_or_path: str = ""
16 |         prompt: str = ""
17 | 
18 |     cfg: Config
19 | 


--------------------------------------------------------------------------------
/threestudio/models/renderers/__init__.py:
--------------------------------------------------------------------------------
 1 | from . import (
 2 |     base,
 3 |     deferred_volume_renderer,
 4 |     gan_volume_renderer,
 5 |     nerf_volume_renderer,
 6 |     neus_volume_renderer,
 7 |     nvdiff_rasterizer,
 8 |     patch_renderer,
 9 | )
10 | 


--------------------------------------------------------------------------------
/threestudio/models/renderers/base.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | 
 3 | import nerfacc
 4 | import torch
 5 | import torch.nn.functional as F
 6 | 
 7 | import threestudio
 8 | from threestudio.models.background.base import BaseBackground
 9 | from threestudio.models.geometry.base import BaseImplicitGeometry
10 | from threestudio.models.materials.base import BaseMaterial
11 | from threestudio.utils.base import BaseModule
12 | from threestudio.utils.typing import *
13 | 
14 | 
15 | class Renderer(BaseModule):
16 |     @dataclass
17 |     class Config(BaseModule.Config):
18 |         radius: float = 1.0
19 | 
20 |     cfg: Config
21 | 
22 |     def configure(
23 |         self,
24 |         geometry: BaseImplicitGeometry,
25 |         material: BaseMaterial,
26 |         background: BaseBackground,
27 |     ) -> None:
28 |         # keep references to submodules using namedtuple, avoid being registered as modules
29 |         @dataclass
30 |         class SubModules:
31 |             geometry: BaseImplicitGeometry
32 |             material: BaseMaterial
33 |             background: BaseBackground
34 | 
35 |         self.sub_modules = SubModules(geometry, material, background)
36 | 
37 |         # set up bounding box
38 |         self.bbox: Float[Tensor, "2 3"]
39 |         self.register_buffer(
40 |             "bbox",
41 |             torch.as_tensor(
42 |                 [
43 |                     [-self.cfg.radius, -self.cfg.radius, -self.cfg.radius],
44 |                     [self.cfg.radius, self.cfg.radius, self.cfg.radius],
45 |                 ],
46 |                 dtype=torch.float32,
47 |             ),
48 |         )
49 | 
50 |     def forward(self, *args, **kwargs) -> Dict[str, Any]:
51 |         raise NotImplementedError
52 | 
53 |     @property
54 |     def geometry(self) -> BaseImplicitGeometry:
55 |         return self.sub_modules.geometry
56 | 
57 |     @property
58 |     def material(self) -> BaseMaterial:
59 |         return self.sub_modules.material
60 | 
61 |     @property
62 |     def background(self) -> BaseBackground:
63 |         return self.sub_modules.background
64 | 
65 |     def set_geometry(self, geometry: BaseImplicitGeometry) -> None:
66 |         self.sub_modules.geometry = geometry
67 | 
68 |     def set_material(self, material: BaseMaterial) -> None:
69 |         self.sub_modules.material = material
70 | 
71 |     def set_background(self, background: BaseBackground) -> None:
72 |         self.sub_modules.background = background
73 | 
74 | 
75 | class VolumeRenderer(Renderer):
76 |     pass
77 | 
78 | 
79 | class Rasterizer(Renderer):
80 |     pass
81 | 


--------------------------------------------------------------------------------
/threestudio/models/renderers/deferred_volume_renderer.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | 
 3 | import torch
 4 | import torch.nn.functional as F
 5 | 
 6 | import threestudio
 7 | from threestudio.models.renderers.base import VolumeRenderer
 8 | 
 9 | 
10 | class DeferredVolumeRenderer(VolumeRenderer):
11 |     pass
12 | 


--------------------------------------------------------------------------------
/threestudio/scripts/make_training_vid.py:
--------------------------------------------------------------------------------
 1 | # make_training_vid("outputs/zero123/64_teddy_rgba.png@20230627-195615", frames_per_vid=30, fps=20, max_iters=200)
 2 | import argparse
 3 | import glob
 4 | import os
 5 | 
 6 | import imageio
 7 | import numpy as np
 8 | from PIL import Image, ImageDraw
 9 | from tqdm import tqdm
10 | 
11 | 
12 | def draw_text_in_image(img, texts):
13 |     img = Image.fromarray(img)
14 |     draw = ImageDraw.Draw(img)
15 |     black, white = (0, 0, 0), (255, 255, 255)
16 |     for i, text in enumerate(texts):
17 |         draw.text((2, (img.size[1] // len(texts)) * i + 1), f"{text}", white)
18 |         draw.text((0, (img.size[1] // len(texts)) * i + 1), f"{text}", white)
19 |         draw.text((2, (img.size[1] // len(texts)) * i - 1), f"{text}", white)
20 |         draw.text((0, (img.size[1] // len(texts)) * i - 1), f"{text}", white)
21 |         draw.text((1, (img.size[1] // len(texts)) * i), f"{text}", black)
22 |     return np.asarray(img)
23 | 
24 | 
25 | def make_training_vid(exp, frames_per_vid=1, fps=3, max_iters=None, max_vids=None):
26 |     # exp = "/admin/home-vikram/git/threestudio/outputs/zero123/64_teddy_rgba.png@20230627-195615"
27 |     files = glob.glob(os.path.join(exp, "save", "*.mp4"))
28 |     if os.path.join(exp, "save", "training_vid.mp4") in files:
29 |         files.remove(os.path.join(exp, "save", "training_vid.mp4"))
30 |     its = [int(os.path.basename(file).split("-")[0].split("it")[-1]) for file in files]
31 |     it_sort = np.argsort(its)
32 |     files = list(np.array(files)[it_sort])
33 |     its = list(np.array(its)[it_sort])
34 |     max_vids = max_iters // its[0] if max_iters is not None else max_vids
35 |     files, its = files[:max_vids], its[:max_vids]
36 |     frames, i = [], 0
37 |     for it, file in tqdm(zip(its, files), total=len(files)):
38 |         vid = imageio.mimread(file)
39 |         for _ in range(frames_per_vid):
40 |             frame = vid[i % len(vid)]
41 |             frame = draw_text_in_image(frame, [str(it)])
42 |             frames.append(frame)
43 |             i += 1
44 |     # Save
45 |     imageio.mimwrite(os.path.join(exp, "save", "training_vid.mp4"), frames, fps=fps)
46 | 
47 | 
48 | def join(file1, file2, name):
49 |     # file1 = "/admin/home-vikram/git/threestudio/outputs/zero123/OLD_64_dragon2_rgba.png@20230629-023028/save/it200-val.mp4"
50 |     # file2 = "/admin/home-vikram/git/threestudio/outputs/zero123/64_dragon2_rgba.png@20230628-152734/save/it200-val.mp4"
51 |     vid1 = imageio.mimread(file1)
52 |     vid2 = imageio.mimread(file2)
53 |     frames = []
54 |     for f1, f2 in zip(vid1, vid2):
55 |         frames.append(
56 |             np.concatenate([f1[:, : f1.shape[0]], f2[:, : f2.shape[0]]], axis=1)
57 |         )
58 |     imageio.mimwrite(name, frames)
59 | 
60 | 
61 | if __name__ == "__main__":
62 |     parser = argparse.ArgumentParser()
63 |     parser.add_argument("--exp", help="directory of experiment")
64 |     parser.add_argument(
65 |         "--frames_per_vid", type=int, default=1, help="# of frames from each val vid"
66 |     )
67 |     parser.add_argument("--fps", type=int, help="max # of iters to save")
68 |     parser.add_argument("--max_iters", type=int, help="max # of iters to save")
69 |     parser.add_argument(
70 |         "--max_vids",
71 |         type=int,
72 |         help="max # of val videos to save. Will be overridden by max_iters",
73 |     )
74 |     args = parser.parse_args()
75 |     make_training_vid(
76 |         args.exp, args.frames_per_vid, args.fps, args.max_iters, args.max_vids
77 |     )
78 | 


--------------------------------------------------------------------------------
/threestudio/scripts/run_imagedream.sh:
--------------------------------------------------------------------------------
 1 | export PYTHONPATH=$PYTHONPATH:./extern/ImageDream
 2 | 
 3 | gpu=0
 4 | method=imagedream-sd21-shading
 5 | name="astronaut"
 6 | prompt="an astronaut riding a horse"
 7 | image_path="./extern/ImageDream/assets/astronaut.png"
 8 | 
 9 | # for pixel [ImageDream-P]
10 | ckpt_path="./extern/ImageDream/release_models/ImageDream/sd-v2.1-base-4view-ipmv.pt"
11 | config_path="./extern/ImageDream/imagedream/configs/sd_v2_base_ipmv.yaml"
12 | python3 launch.py \
13 |     --config configs/$method.yaml \
14 |     --train \
15 |     --gpu $gpu \
16 |     name="${method}" \
17 |     tag=${name} \
18 |     system.prompt_processor.prompt="$prompt" \
19 |     system.prompt_processor.image_path="$image_path" \
20 |     system.guidance.ckpt_path="$ckpt_path"  \
21 |     system.guidance.config_path="$config_path" 
22 | 
23 | # for local [ImageDream-G]
24 | ckpt_path="./extern/ImageDream/release_models/ImageDream/sd-v2.1-base-4view-ipmv-local.pt"
25 | config_path="./extern/ImageDream/imagedream/configs/sd_v2_base_ipmv_local.yaml"
26 | python3 launch.py \
27 |     --config configs/$method.yaml \
28 |     --train \
29 |     --gpu $gpu \
30 |     name="${method}" \
31 |     tag=${name} \
32 |     system.prompt_processor.prompt="$prompt" \
33 |     system.prompt_processor.image_path="$image_path" \
34 |     system.guidance.ckpt_path="$ckpt_path"  \
35 |     system.guidance.config_path="$config_path" \
36 |     system.guidance.ip_mode="local"


--------------------------------------------------------------------------------
/threestudio/scripts/run_zero123.sh:
--------------------------------------------------------------------------------
 1 | NAME="dragon2"
 2 | 
 3 | # Phase 1 - 64x64
 4 | python launch.py --config configs/zero123.yaml --train --gpu 7 data.image_path=./load/images/${NAME}_rgba.png use_timestamp=False name=${NAME} tag=Phase1 # system.freq.guidance_eval=0 system.loggers.wandb.enable=false system.loggers.wandb.project="zero123" system.loggers.wandb.name=${NAME}_Phase1
 5 | 
 6 | # Phase 1.5 - 512 refine
 7 | python launch.py --config configs/zero123-geometry.yaml --train --gpu 4 data.image_path=./load/images/${NAME}_rgba.png system.geometry_convert_from=./outputs/${NAME}/Phase1/ckpts/last.ckpt use_timestamp=False name=${NAME} tag=Phase1p5 # system.freq.guidance_eval=0 system.loggers.wandb.enable=false system.loggers.wandb.project="zero123" system.loggers.wandb.name=${NAME}_Phase1p5
 8 | 
 9 | # Phase 2 - dreamfusion
10 | python launch.py --config configs/experimental/imagecondition_zero123nerf.yaml --train --gpu 5 data.image_path=./load/images/${NAME}_rgba.png system.prompt_processor.prompt="A 3D model of a friendly dragon" system.weights="/admin/home-vikram/git/threestudio/outputs/${NAME}/Phase1/ckpts/last.ckpt" name=${NAME} tag=Phase2 # system.freq.guidance_eval=0 system.loggers.wandb.enable=false system.loggers.wandb.project="zero123" system.loggers.wandb.name=${NAME}_Phase2
11 | 
12 | # Phase 2 - SDF + dreamfusion
13 | python launch.py --config configs/experimental/imagecondition_zero123nerf_refine.yaml --train --gpu 5 data.image_path=./load/images/${NAME}_rgba.png system.prompt_processor.prompt="A 3D model of a friendly dragon" system.geometry_convert_from="/admin/home-vikram/git/threestudio/outputs/${NAME}/Phase1/ckpts/last.ckpt" name=${NAME} tag=Phase2_refine # system.freq.guidance_eval=0 system.loggers.wandb.enable=false system.loggers.wandb.project="zero123" system.loggers.wandb.name=${NAME}_Phase2_refine
14 | 


--------------------------------------------------------------------------------
/threestudio/scripts/run_zero123_comparison.sh:
--------------------------------------------------------------------------------
 1 | # with standard zero123
 2 | threestudio/scripts/run_zero123_phase.sh 6 anya_front 105000 0
 3 | 
 4 | # with zero123XL (not released yet!)
 5 | threestudio/scripts/run_zero123_phase.sh 1 anya_front XL_20230604 0
 6 | threestudio/scripts/run_zero123_phase.sh 2 baby_phoenix_on_ice XL_20230604 20
 7 | threestudio/scripts/run_zero123_phase.sh 3 beach_house_1 XL_20230604 50
 8 | threestudio/scripts/run_zero123_phase.sh 4 bollywood_actress XL_20230604 0
 9 | threestudio/scripts/run_zero123_phase.sh 5 beach_house_2 XL_20230604 30
10 | threestudio/scripts/run_zero123_phase.sh 6 hamburger XL_20230604 10
11 | threestudio/scripts/run_zero123_phase.sh 7 cactus XL_20230604 8
12 | threestudio/scripts/run_zero123_phase.sh 0 catstatue XL_20230604 50
13 | threestudio/scripts/run_zero123_phase.sh 1 church_ruins XL_20230604 0
14 | threestudio/scripts/run_zero123_phase.sh 2 firekeeper XL_20230604 10
15 | threestudio/scripts/run_zero123_phase.sh 3 futuristic_car XL_20230604 20
16 | threestudio/scripts/run_zero123_phase.sh 4 mona_lisa XL_20230604 10
17 | threestudio/scripts/run_zero123_phase.sh 5 teddy XL_20230604 20
18 | 
19 | # set guidance_eval to 0, to greatly speed up training
20 | threestudio/scripts/run_zero123_phase.sh 7 anya_front XL_20230604 0 system.freq.guidance_eval=0
21 | 
22 | # disable wandb for faster training (or if you don't want to use it)
23 | threestudio/scripts/run_zero123_phase.sh 7 anya_front XL_20230604 0 system.loggers.wandb.enable=false system.freq.guidance_eval=0
24 | 


--------------------------------------------------------------------------------
/threestudio/scripts/run_zero123_phase.sh:
--------------------------------------------------------------------------------
 1 | 
 2 | GPU_ID=$1         # e.g. 0
 3 | IMAGE_PREFIX=$2   # e.g. "anya_front"
 4 | ZERO123_PREFIX=$3 # e.g. "zero123-xl"
 5 | ELEVATION=$4      # e.g. 0
 6 | REST=${@:5:99}    # e.g. "system.guidance.min_step_percent=0.1 system.guidance.max_step_percent=0.9"
 7 | 
 8 | # change this config if you don't use wandb or want to speed up training
 9 | python launch.py --config configs/zero123.yaml --train --gpu $GPU_ID system.loggers.wandb.enable=true system.loggers.wandb.project="claforte-noise_atten" \
10 |     system.loggers.wandb.name="${IMAGE_PREFIX}_zero123_${ZERO123_PREFIX}...fov20_${REST}" \
11 |     data.image_path=./load/images/${IMAGE_PREFIX}_rgba.png system.freq.guidance_eval=37 \
12 |     system.guidance.pretrained_model_name_or_path="./load/zero123/${ZERO123_PREFIX}.ckpt" \
13 |     system.guidance.cond_elevation_deg=$ELEVATION \
14 |     ${REST}
15 | 


--------------------------------------------------------------------------------
/threestudio/scripts/run_zero123_phase2.sh:
--------------------------------------------------------------------------------
1 | # Reconstruct Anya using latest Zero123XL, in <2000 steps.
2 | python launch.py --config configs/zero123.yaml --train --gpu 0 system.loggers.wandb.enable=true system.loggers.wandb.project="voletiv-anya-new" system.loggers.wandb.name="claforte_params" data.image_path=./load/images/anya_front_rgba.png system.freq.ref_or_zero123="accumulate" system.freq.guidance_eval=13 system.guidance.pretrained_model_name_or_path="./load/zero123/zero123-xl.ckpt"
3 | 
4 | # PHASE 2
5 | python launch.py --config configs/experimental/imagecondition_zero123nerf.yaml --train --gpu 0 system.prompt_processor.prompt="A DSLR 3D photo of a cute anime schoolgirl stands proudly with her arms in the air, pink hair ( unreal engine 5 trending on Artstation Ghibli 4k )" system.weights=outputs/zero123/128_anya_front_rgba.png@20230623-145711/ckpts/last.ckpt system.freq.guidance_eval=13 system.loggers.wandb.enable=true system.loggers.wandb.project="voletiv-anya-new" data.image_path=./load/images/anya_front_rgba.png system.loggers.wandb.name="anya" data.random_camera.progressive_until=500
6 | 


--------------------------------------------------------------------------------
/threestudio/scripts/run_zero123_sbatch.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import time
 3 | 
 4 | files = [
 5 |     "~/git/threestudio/load/images/dog1_rgba.png",
 6 |     "~/git/threestudio/load/images/dragon2_rgba.png",
 7 | ]
 8 | 
 9 | for file in files:
10 |     name = os.path.basename(file).split("_rgba.png")[0]
11 |     with open(
12 |         os.path.expanduser("~/git/threestudio/threestudio/scripts/zero123_sbatch.sh"),
13 |         "w",
14 |     ) as f:
15 |         f.write("#!/bin/bash\n")
16 |         f.write(f"#SBATCH --job-name=vikky_{name}\n")
17 |         f.write("#SBATCH --account=mod3d\n")
18 |         f.write("#SBATCH --partition=g40\n")
19 |         f.write("#SBATCH --gpus=1\n")
20 |         f.write("#SBATCH --time=0-00:07:00\n")
21 |         f.write("conda activate three\n")
22 |         f.write("cd ~/git/threestudio/\n")
23 |         f.write(f"NAME={name}\n")
24 |         # Phase 1
25 |         f.write(
26 |             "python launch.py --config configs/zero123.yaml --train data.image_path=./load/images/${NAME}_rgba.png use_timestamp=true name=${NAME} tag=Phase1 system.loggers.wandb.enable=false system.loggers.wandb.project='zero123' system.loggers.wandb.name=${NAME}_Phase1\n"
27 |         )
28 |         # # Phase 1.5
29 |         # f.write(
30 |         #     "python launch.py --config configs/zero123-geometry.yaml --train data.image_path=./load/images/${NAME}_rgba.png system.geometry_convert_from=./outputs/${NAME}/Phase1/ckpts/last.ckpt use_timestamp=False name=${NAME} tag=Phase1p5 system.loggers.wandb.enable=true system.loggers.wandb.project='zero123' system.loggers.wandb.name=${NAME}_Phase1p5\n"
31 |         # )
32 |     os.system("sbatch ~/git/threestudio/threestudio/scripts/zero123_sbatch.sh")
33 |     time.sleep(1)
34 | 


--------------------------------------------------------------------------------
/threestudio/scripts/zero123_demo.py:
--------------------------------------------------------------------------------
 1 | # 1. Generate using StableDiffusionXL https://clipdrop.co/stable-diffusion
 2 | 
 3 | # 2. Remove background https://clipdrop.co/remove-background
 4 | 
 5 | # 3. Resize to 512x512 https://www.iloveimg.com/resize-image
 6 | 
 7 | # (OPTIONAL)
 8 | # 4. Estimate depth and normal https://omnidata.vision/demo/ (I used Omnidata Normal (with X-TC & 3DCC), and MiDaS Depth)
 9 | 
10 | 
11 | # (OPTIONAL)
12 | # 5. Convert depth image from RGB to greyscale
13 | def depth_rgb_to_grey(depth_filename):
14 |     # depth_filename = "image_depth.png"
15 |     import cv2
16 |     import numpy as np
17 | 
18 |     # import shutil
19 |     # shutil.copyfile(depth_filename,  depth_filename.replace("_depth", "_depth_orig"))
20 |     depth = cv2.imread(depth_filename)
21 |     depth = cv2.cvtColor(depth, cv2.COLOR_BGR2GRAY)
22 |     mask = (
23 |         cv2.resize(
24 |             cv2.imread(depth_filename.replace("_depth", "_rgba"), cv2.IMREAD_UNCHANGED)[
25 |                 :, :, -1
26 |             ],
27 |             depth.shape,
28 |         )
29 |         > 0
30 |     )
31 |     # depth[mask] = (depth[mask] - depth.min()) / (depth.max() - depth.min() + 1e-9)
32 |     depth = (depth - depth.min()) / (depth.max() - depth.min() + 1e-9)
33 |     depth[~mask] = 0
34 |     depth = (depth * 255).astype(np.uint8)
35 |     cv2.imwrite(depth_filename, depth)
36 | 
37 | 
38 | # (OPTIONAL)
39 | # 6. Mask normal
40 | def normal_mask(normal_filename):
41 |     # filename = "image_normal.png"
42 |     import cv2
43 | 
44 |     # import shutil
45 |     # shutil.copyfile(normal_filename, normal_filename.replace("_normal", "_normal_orig"))
46 |     normal = cv2.imread(normal_filename)
47 |     mask = (
48 |         cv2.resize(
49 |             cv2.imread(
50 |                 normal_filename.replace("_normal", "_rgba"), cv2.IMREAD_UNCHANGED
51 |             )[:, :, -1],
52 |             normal.shape[:2],
53 |         )
54 |         > 0
55 |     )
56 |     normal[~mask] = 0
57 |     cv2.imwrite(normal_filename, normal)
58 | 
59 | 
60 | # 5. Run Zero123
61 | # python launch.py --config configs/zero123.yaml --train data.image_path=./load/images/grootplant_rgba.png
62 | 


--------------------------------------------------------------------------------
/threestudio/scripts/zero123_sbatch.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #SBATCH --job-name=vikky
 3 | #SBATCH --account=mod3d
 4 | #SBATCH --partition=g40
 5 | #SBATCH --gpus=1
 6 | #SBATCH --time=0-00:07:00
 7 | conda activate three
 8 | cd ~/git/threestudio/
 9 | NAME="dog1"
10 | python launch.py --config configs/zero123.yaml --train data.image_path=./load/images/${NAME}_rgba.png use_timestamp=False name=${NAME} tag=Phase1 system.loggers.wandb.enable=true system.loggers.wandb.project='zero123' system.loggers.wandb.name=${NAME}_Phase1
11 | 


--------------------------------------------------------------------------------
/threestudio/systems/__init__.py:
--------------------------------------------------------------------------------
 1 | from . import (
 2 |     control4d_multiview,
 3 |     dreamfusion,
 4 |     fantasia3d,
 5 |     imagedreamfusion,
 6 |     instructnerf2nerf,
 7 |     latentnerf,
 8 |     magic3d,
 9 |     prolificdreamer,
10 |     sjc,
11 |     textmesh,
12 |     zero123,
13 |     imagedream,
14 | )
15 | 


--------------------------------------------------------------------------------
/threestudio/systems/utils.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import warnings
 3 | from bisect import bisect_right
 4 | 
 5 | import torch
 6 | import torch.nn as nn
 7 | from torch.optim import lr_scheduler
 8 | 
 9 | import threestudio
10 | 
11 | 
12 | def get_scheduler(name):
13 |     if hasattr(lr_scheduler, name):
14 |         return getattr(lr_scheduler, name)
15 |     else:
16 |         raise NotImplementedError
17 | 
18 | 
19 | def getattr_recursive(m, attr):
20 |     for name in attr.split("."):
21 |         m = getattr(m, name)
22 |     return m
23 | 
24 | 
25 | def get_parameters(model, name):
26 |     module = getattr_recursive(model, name)
27 |     if isinstance(module, nn.Module):
28 |         return module.parameters()
29 |     elif isinstance(module, nn.Parameter):
30 |         return module
31 |     return []
32 | 
33 | 
34 | def parse_optimizer(config, model):
35 |     if hasattr(config, "params"):
36 |         params = [
37 |             {"params": get_parameters(model, name), "name": name, **args}
38 |             for name, args in config.params.items()
39 |         ]
40 |         threestudio.debug(f"Specify optimizer params: {config.params}")
41 |     else:
42 |         params = model.parameters()
43 |     if config.name in ["FusedAdam"]:
44 |         import apex
45 | 
46 |         optim = getattr(apex.optimizers, config.name)(params, **config.args)
47 |     elif config.name in ["Adan"]:
48 |         from threestudio.systems import optimizers
49 | 
50 |         optim = getattr(optimizers, config.name)(params, **config.args)
51 |     else:
52 |         optim = getattr(torch.optim, config.name)(params, **config.args)
53 |     return optim
54 | 
55 | 
56 | def parse_scheduler(config, optimizer):
57 |     interval = config.get("interval", "epoch")
58 |     assert interval in ["epoch", "step"]
59 |     if config.name == "SequentialLR":
60 |         scheduler = {
61 |             "scheduler": lr_scheduler.SequentialLR(
62 |                 optimizer,
63 |                 [
64 |                     parse_scheduler(conf, optimizer)["scheduler"]
65 |                     for conf in config.schedulers
66 |                 ],
67 |                 milestones=config.milestones,
68 |             ),
69 |             "interval": interval,
70 |         }
71 |     elif config.name == "ChainedScheduler":
72 |         scheduler = {
73 |             "scheduler": lr_scheduler.ChainedScheduler(
74 |                 [
75 |                     parse_scheduler(conf, optimizer)["scheduler"]
76 |                     for conf in config.schedulers
77 |                 ]
78 |             ),
79 |             "interval": interval,
80 |         }
81 |     else:
82 |         scheduler = {
83 |             "scheduler": get_scheduler(config.name)(optimizer, **config.args),
84 |             "interval": interval,
85 |         }
86 |     return scheduler
87 | 


--------------------------------------------------------------------------------
/threestudio/utils/GAN/distribution.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | 
  4 | 
  5 | class AbstractDistribution:
  6 |     def sample(self):
  7 |         raise NotImplementedError()
  8 | 
  9 |     def mode(self):
 10 |         raise NotImplementedError()
 11 | 
 12 | 
 13 | class DiracDistribution(AbstractDistribution):
 14 |     def __init__(self, value):
 15 |         self.value = value
 16 | 
 17 |     def sample(self):
 18 |         return self.value
 19 | 
 20 |     def mode(self):
 21 |         return self.value
 22 | 
 23 | 
 24 | class DiagonalGaussianDistribution(object):
 25 |     def __init__(self, parameters, deterministic=False):
 26 |         self.parameters = parameters
 27 |         self.mean, self.logvar = torch.chunk(parameters, 2, dim=1)
 28 |         self.logvar = torch.clamp(self.logvar, -30.0, 20.0)
 29 |         self.deterministic = deterministic
 30 |         self.std = torch.exp(0.5 * self.logvar)
 31 |         self.var = torch.exp(self.logvar)
 32 |         if self.deterministic:
 33 |             self.var = self.std = torch.zeros_like(self.mean).to(
 34 |                 device=self.parameters.device
 35 |             )
 36 | 
 37 |     def sample(self):
 38 |         x = self.mean + self.std * torch.randn(self.mean.shape).to(
 39 |             device=self.parameters.device
 40 |         )
 41 |         return x
 42 | 
 43 |     def kl(self, other=None):
 44 |         if self.deterministic:
 45 |             return torch.Tensor([0.0])
 46 |         else:
 47 |             if other is None:
 48 |                 return 0.5 * torch.sum(
 49 |                     torch.pow(self.mean, 2) + self.var - 1.0 - self.logvar,
 50 |                     dim=[1, 2, 3],
 51 |                 )
 52 |             else:
 53 |                 return 0.5 * torch.sum(
 54 |                     torch.pow(self.mean - other.mean, 2) / other.var
 55 |                     + self.var / other.var
 56 |                     - 1.0
 57 |                     - self.logvar
 58 |                     + other.logvar,
 59 |                     dim=[1, 2, 3],
 60 |                 )
 61 | 
 62 |     def nll(self, sample, dims=[1, 2, 3]):
 63 |         if self.deterministic:
 64 |             return torch.Tensor([0.0])
 65 |         logtwopi = np.log(2.0 * np.pi)
 66 |         return 0.5 * torch.sum(
 67 |             logtwopi + self.logvar + torch.pow(sample - self.mean, 2) / self.var,
 68 |             dim=dims,
 69 |         )
 70 | 
 71 |     def mode(self):
 72 |         return self.mean
 73 | 
 74 | 
 75 | def normal_kl(mean1, logvar1, mean2, logvar2):
 76 |     """
 77 |     source: https://github.com/openai/guided-diffusion/blob/27c20a8fab9cb472df5d6bdd6c8d11c8f430b924/guided_diffusion/losses.py#L12
 78 |     Compute the KL divergence between two gaussians.
 79 |     Shapes are automatically broadcasted, so batches can be compared to
 80 |     scalars, among other use cases.
 81 |     """
 82 |     tensor = None
 83 |     for obj in (mean1, logvar1, mean2, logvar2):
 84 |         if isinstance(obj, torch.Tensor):
 85 |             tensor = obj
 86 |             break
 87 |     assert tensor is not None, "at least one argument must be a Tensor"
 88 | 
 89 |     # Force variances to be Tensors. Broadcasting helps convert scalars to
 90 |     # Tensors, but it does not work for torch.exp().
 91 |     logvar1, logvar2 = [
 92 |         x if isinstance(x, torch.Tensor) else torch.tensor(x).to(tensor)
 93 |         for x in (logvar1, logvar2)
 94 |     ]
 95 | 
 96 |     return 0.5 * (
 97 |         -1.0
 98 |         + logvar2
 99 |         - logvar1
100 |         + torch.exp(logvar1 - logvar2)
101 |         + ((mean1 - mean2) ** 2) * torch.exp(-logvar2)
102 |     )
103 | 


--------------------------------------------------------------------------------
/threestudio/utils/GAN/loss.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn.functional as F
 3 | 
 4 | 
 5 | def generator_loss(discriminator, inputs, reconstructions, cond=None):
 6 |     if cond is None:
 7 |         logits_fake = discriminator(reconstructions.contiguous())
 8 |     else:
 9 |         logits_fake = discriminator(
10 |             torch.cat((reconstructions.contiguous(), cond), dim=1)
11 |         )
12 |     g_loss = -torch.mean(logits_fake)
13 |     return g_loss
14 | 
15 | 
16 | def hinge_d_loss(logits_real, logits_fake):
17 |     loss_real = torch.mean(F.relu(1.0 - logits_real))
18 |     loss_fake = torch.mean(F.relu(1.0 + logits_fake))
19 |     d_loss = 0.5 * (loss_real + loss_fake)
20 |     return d_loss
21 | 
22 | 
23 | def discriminator_loss(discriminator, inputs, reconstructions, cond=None):
24 |     if cond is None:
25 |         logits_real = discriminator(inputs.contiguous().detach())
26 |         logits_fake = discriminator(reconstructions.contiguous().detach())
27 |     else:
28 |         logits_real = discriminator(
29 |             torch.cat((inputs.contiguous().detach(), cond), dim=1)
30 |         )
31 |         logits_fake = discriminator(
32 |             torch.cat((reconstructions.contiguous().detach(), cond), dim=1)
33 |         )
34 |     d_loss = hinge_d_loss(logits_real, logits_fake).mean()
35 |     return d_loss
36 | 


--------------------------------------------------------------------------------
/threestudio/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from . import base
2 | 


--------------------------------------------------------------------------------
/threestudio/utils/base.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | 
 3 | import torch
 4 | import torch.nn as nn
 5 | 
 6 | from threestudio.utils.config import parse_structured
 7 | from threestudio.utils.misc import get_device, load_module_weights
 8 | from threestudio.utils.typing import *
 9 | 
10 | 
11 | class Configurable:
12 |     @dataclass
13 |     class Config:
14 |         pass
15 | 
16 |     def __init__(self, cfg: Optional[dict] = None) -> None:
17 |         super().__init__()
18 |         self.cfg = parse_structured(self.Config, cfg)
19 | 
20 | 
21 | class Updateable:
22 |     def do_update_step(
23 |         self, epoch: int, global_step: int, on_load_weights: bool = False
24 |     ):
25 |         for attr in self.__dir__():
26 |             if attr.startswith("_"):
27 |                 continue
28 |             try:
29 |                 module = getattr(self, attr)
30 |             except:
31 |                 continue  # ignore attributes like property, which can't be retrived using getattr?
32 |             if isinstance(module, Updateable):
33 |                 module.do_update_step(
34 |                     epoch, global_step, on_load_weights=on_load_weights
35 |                 )
36 |         self.update_step(epoch, global_step, on_load_weights=on_load_weights)
37 | 
38 |     def update_step(self, epoch: int, global_step: int, on_load_weights: bool = False):
39 |         # override this method to implement custom update logic
40 |         # if on_load_weights is True, you should be careful doing things related to model evaluations,
41 |         # as the models and tensors are not guarenteed to be on the same device
42 |         pass
43 | 
44 | 
45 | def update_if_possible(module: Any, epoch: int, global_step: int) -> None:
46 |     if isinstance(module, Updateable):
47 |         module.do_update_step(epoch, global_step)
48 | 
49 | 
50 | class BaseObject(Updateable):
51 |     @dataclass
52 |     class Config:
53 |         pass
54 | 
55 |     cfg: Config  # add this to every subclass of BaseObject to enable static type checking
56 | 
57 |     def __init__(
58 |         self, cfg: Optional[Union[dict, DictConfig]] = None, *args, **kwargs
59 |     ) -> None:
60 |         super().__init__()
61 |         self.cfg = parse_structured(self.Config, cfg)
62 |         self.device = get_device()
63 |         self.configure(*args, **kwargs)
64 | 
65 |     def configure(self, *args, **kwargs) -> None:
66 |         pass
67 | 
68 | 
69 | class BaseModule(nn.Module, Updateable):
70 |     @dataclass
71 |     class Config:
72 |         weights: Optional[str] = None
73 | 
74 |     cfg: Config  # add this to every subclass of BaseModule to enable static type checking
75 | 
76 |     def __init__(
77 |         self, cfg: Optional[Union[dict, DictConfig]] = None, *args, **kwargs
78 |     ) -> None:
79 |         super().__init__()
80 |         self.cfg = parse_structured(self.Config, cfg)
81 |         self.device = get_device()
82 |         self.configure(*args, **kwargs)
83 |         if self.cfg.weights is not None:
84 |             # format: path/to/weights:module_name
85 |             weights_path, module_name = self.cfg.weights.split(":")
86 |             state_dict, epoch, global_step = load_module_weights(
87 |                 weights_path, module_name=module_name, map_location="cpu"
88 |             )
89 |             self.load_state_dict(state_dict)
90 |             self.do_update_step(
91 |                 epoch, global_step, on_load_weights=True
92 |             )  # restore states
93 |         # dummy tensor to indicate model state
94 |         self._dummy: Float[Tensor, "..."]
95 |         self.register_buffer("_dummy", torch.zeros(0).float(), persistent=False)
96 | 
97 |     def configure(self, *args, **kwargs) -> None:
98 |         pass
99 | 


--------------------------------------------------------------------------------
/threestudio/utils/perceptual/__init__.py:
--------------------------------------------------------------------------------
1 | from .perceptual import PerceptualLoss
2 | 


--------------------------------------------------------------------------------
/threestudio/utils/rasterize.py:
--------------------------------------------------------------------------------
 1 | import nvdiffrast.torch as dr
 2 | import torch
 3 | 
 4 | from threestudio.utils.typing import *
 5 | 
 6 | 
 7 | class NVDiffRasterizerContext:
 8 |     def __init__(self, context_type: str, device: torch.device) -> None:
 9 |         self.device = device
10 |         self.ctx = self.initialize_context(context_type, device)
11 | 
12 |     def initialize_context(
13 |         self, context_type: str, device: torch.device
14 |     ) -> Union[dr.RasterizeGLContext, dr.RasterizeCudaContext]:
15 |         if context_type == "gl":
16 |             return dr.RasterizeGLContext(device=device)
17 |         elif context_type == "cuda":
18 |             return dr.RasterizeCudaContext(device=device)
19 |         else:
20 |             raise ValueError(f"Unknown rasterizer context type: {context_type}")
21 | 
22 |     def vertex_transform(
23 |         self, verts: Float[Tensor, "Nv 3"], mvp_mtx: Float[Tensor, "B 4 4"]
24 |     ) -> Float[Tensor, "B Nv 4"]:
25 |         verts_homo = torch.cat(
26 |             [verts, torch.ones([verts.shape[0], 1]).to(verts)], dim=-1
27 |         )
28 |         return torch.matmul(verts_homo, mvp_mtx.permute(0, 2, 1))
29 | 
30 |     def rasterize(
31 |         self,
32 |         pos: Float[Tensor, "B Nv 4"],
33 |         tri: Integer[Tensor, "Nf 3"],
34 |         resolution: Union[int, Tuple[int, int]],
35 |     ):
36 |         # rasterize in instance mode (single topology)
37 |         return dr.rasterize(self.ctx, pos.float(), tri.int(), resolution, grad_db=True)
38 | 
39 |     def rasterize_one(
40 |         self,
41 |         pos: Float[Tensor, "Nv 4"],
42 |         tri: Integer[Tensor, "Nf 3"],
43 |         resolution: Union[int, Tuple[int, int]],
44 |     ):
45 |         # rasterize one single mesh under a single viewpoint
46 |         rast, rast_db = self.rasterize(pos[None, ...], tri, resolution)
47 |         return rast[0], rast_db[0]
48 | 
49 |     def antialias(
50 |         self,
51 |         color: Float[Tensor, "B H W C"],
52 |         rast: Float[Tensor, "B H W 4"],
53 |         pos: Float[Tensor, "B Nv 4"],
54 |         tri: Integer[Tensor, "Nf 3"],
55 |     ) -> Float[Tensor, "B H W C"]:
56 |         return dr.antialias(color.float(), rast, pos.float(), tri.int())
57 | 
58 |     def interpolate(
59 |         self,
60 |         attr: Float[Tensor, "B Nv C"],
61 |         rast: Float[Tensor, "B H W 4"],
62 |         tri: Integer[Tensor, "Nf 3"],
63 |         rast_db=None,
64 |         diff_attrs=None,
65 |     ) -> Float[Tensor, "B H W C"]:
66 |         return dr.interpolate(
67 |             attr.float(), rast, tri.int(), rast_db=rast_db, diff_attrs=diff_attrs
68 |         )
69 | 
70 |     def interpolate_one(
71 |         self,
72 |         attr: Float[Tensor, "Nv C"],
73 |         rast: Float[Tensor, "B H W 4"],
74 |         tri: Integer[Tensor, "Nf 3"],
75 |         rast_db=None,
76 |         diff_attrs=None,
77 |     ) -> Float[Tensor, "B H W C"]:
78 |         return self.interpolate(attr[None, ...], rast, tri, rast_db, diff_attrs)
79 | 


--------------------------------------------------------------------------------
/threestudio/utils/typing.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This module contains type annotations for the project, using
 3 | 1. Python type hints (https://docs.python.org/3/library/typing.html) for Python objects
 4 | 2. jaxtyping (https://github.com/google/jaxtyping/blob/main/API.md) for PyTorch tensors
 5 | 
 6 | Two types of typing checking can be used:
 7 | 1. Static type checking with mypy (install with pip and enabled as the default linter in VSCode)
 8 | 2. Runtime type checking with typeguard (install with pip and triggered at runtime, mainly for tensor dtype and shape checking)
 9 | """
10 | 
11 | # Basic types
12 | from typing import (
13 |     Any,
14 |     Callable,
15 |     Dict,
16 |     Iterable,
17 |     List,
18 |     Literal,
19 |     NamedTuple,
20 |     NewType,
21 |     Optional,
22 |     Sized,
23 |     Tuple,
24 |     Type,
25 |     TypeVar,
26 |     Union,
27 | )
28 | 
29 | # Tensor dtype
30 | # for jaxtyping usage, see https://github.com/google/jaxtyping/blob/main/API.md
31 | from jaxtyping import Bool, Complex, Float, Inexact, Int, Integer, Num, Shaped, UInt
32 | 
33 | # Config type
34 | from omegaconf import DictConfig
35 | 
36 | # PyTorch Tensor type
37 | from torch import Tensor
38 | 
39 | # Runtime type checking decorator
40 | from typeguard import typechecked as typechecker
41 | 


--------------------------------------------------------------------------------