├── .editorconfig ├── .gitattributes ├── .github └── workflows │ └── pre-commit.yaml ├── .gitignore ├── .pre-commit-config.yaml ├── .pylintrc ├── 2dplayground.ipynb ├── DOCUMENTATION.md ├── LICENSE ├── README.md ├── configs ├── control4d-static.yaml ├── debugging │ ├── controlnet-canny.yaml │ ├── controlnet-normal.yaml │ ├── instructpix2pix.yaml │ └── stablediffusion.yaml ├── dreamfusion-if.yaml ├── dreamfusion-sd.yaml ├── experimental │ ├── co3d-imagecondition.yaml │ ├── imagecondition.yaml │ ├── imagecondition_zero123nerf.yaml │ └── imagecondition_zero123nerf_refine.yaml ├── fantasia3d-texture.yaml ├── fantasia3d.yaml ├── gradio │ ├── dreamfusion-if.yaml │ ├── dreamfusion-sd.yaml │ ├── fantasia3d.yaml │ ├── latentnerf.yaml │ ├── sjc.yaml │ └── textmesh-if.yaml ├── imagedream-sd21-shading.yaml ├── instructnerf2nerf.yaml ├── latentnerf-refine.yaml ├── latentnerf.yaml ├── magic3d-coarse-if.yaml ├── magic3d-coarse-sd.yaml ├── magic3d-refine-sd.yaml ├── mvdream-sd21-shading.yaml ├── mvdream-sd21.yaml ├── prolificdreamer-geometry.yaml ├── prolificdreamer-patch.yaml ├── prolificdreamer-scene.yaml ├── prolificdreamer-texture.yaml ├── prolificdreamer.yaml ├── sjc.yaml ├── sketchshape-refine.yaml ├── sketchshape.yaml ├── textmesh-if.yaml ├── zero123-geometry.yaml ├── zero123.yaml └── zero123_64.yaml ├── docker ├── Dockerfile └── compose.yaml ├── docs └── installation.md ├── extern ├── ImageDream │ ├── .gitignore │ ├── LICENSE-CODE │ ├── README.md │ ├── __init__.py │ ├── assets │ │ └── astronaut.png │ ├── imagedream │ │ ├── __init__.py │ │ ├── camera_utils.py │ │ ├── configs │ │ │ ├── sd_v2_base_ipmv.yaml │ │ │ └── sd_v2_base_ipmv_local.yaml │ │ ├── ldm │ │ │ ├── __init__.py │ │ │ ├── interface.py │ │ │ ├── models │ │ │ │ ├── __init__.py │ │ │ │ ├── autoencoder.py │ │ │ │ └── diffusion │ │ │ │ │ ├── __init__.py │ │ │ │ │ └── ddim.py │ │ │ ├── modules │ │ │ │ ├── __init__.py │ │ │ │ ├── attention.py │ │ │ │ ├── diffusionmodules │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── adaptors.py │ │ │ │ │ ├── model.py │ │ │ │ │ ├── openaimodel.py │ │ │ │ │ └── util.py │ │ │ │ ├── distributions │ │ │ │ │ ├── __init__.py │ │ │ │ │ └── distributions.py │ │ │ │ ├── ema.py │ │ │ │ └── encoders │ │ │ │ │ ├── __init__.py │ │ │ │ │ └── modules.py │ │ │ └── util.py │ │ └── model_zoo.py │ ├── requirements.txt │ ├── scripts │ │ ├── demo.py │ │ ├── demo.sh │ │ └── gradio_app.py │ └── setup.py └── ldm_zero123 │ ├── extras.py │ ├── guidance.py │ ├── lr_scheduler.py │ ├── models │ ├── autoencoder.py │ └── diffusion │ │ ├── __init__.py │ │ ├── classifier.py │ │ ├── ddim.py │ │ ├── ddpm.py │ │ ├── plms.py │ │ └── sampling_util.py │ ├── modules │ ├── attention.py │ ├── diffusionmodules │ │ ├── __init__.py │ │ ├── model.py │ │ ├── openaimodel.py │ │ └── util.py │ ├── distributions │ │ ├── __init__.py │ │ └── distributions.py │ ├── ema.py │ ├── encoders │ │ ├── __init__.py │ │ └── modules.py │ ├── evaluate │ │ ├── adm_evaluator.py │ │ ├── evaluate_perceptualsim.py │ │ ├── frechet_video_distance.py │ │ ├── ssim.py │ │ └── torch_frechet_video_distance.py │ ├── image_degradation │ │ ├── __init__.py │ │ ├── bsrgan.py │ │ ├── bsrgan_light.py │ │ ├── utils │ │ │ └── test.png │ │ └── utils_image.py │ ├── losses │ │ ├── __init__.py │ │ ├── contperceptual.py │ │ └── vqperceptual.py │ └── x_transformer.py │ ├── thirdp │ └── psp │ │ ├── helpers.py │ │ ├── id_loss.py │ │ └── model_irse.py │ └── util.py ├── gradio_app.py ├── launch.py ├── load ├── images │ ├── anya_front.png │ ├── anya_front_depth.png │ ├── anya_front_normal.png │ ├── anya_front_rgba.png │ ├── baby_phoenix_on_ice.png │ ├── baby_phoenix_on_ice_depth.png │ ├── baby_phoenix_on_ice_normal.png │ ├── baby_phoenix_on_ice_rgba.png │ ├── beach_house_1.png │ ├── beach_house_1_depth.png │ ├── beach_house_1_normal.png │ ├── beach_house_1_rgba.png │ ├── beach_house_2.png │ ├── beach_house_2_depth.png │ ├── beach_house_2_normal.png │ ├── beach_house_2_rgba.png │ ├── bollywood_actress.png │ ├── bollywood_actress_depth.png │ ├── bollywood_actress_normal.png │ ├── bollywood_actress_rgba.png │ ├── cactus.png │ ├── cactus_depth.png │ ├── cactus_normal.png │ ├── cactus_rgba.png │ ├── catstatue.png │ ├── catstatue_depth.png │ ├── catstatue_normal.png │ ├── catstatue_rgba.png │ ├── church_ruins.png │ ├── church_ruins_depth.png │ ├── church_ruins_normal.png │ ├── church_ruins_rgba.png │ ├── dog1.png │ ├── dragon2_rgba.png │ ├── firekeeper.jpg │ ├── firekeeper_depth.png │ ├── firekeeper_normal.png │ ├── firekeeper_rgba.png │ ├── futuristic_car.png │ ├── futuristic_car_depth.png │ ├── futuristic_car_normal.png │ ├── futuristic_car_rgba.png │ ├── grootplant_rgba.png │ ├── hamburger.png │ ├── hamburger_depth.png │ ├── hamburger_rgba.png │ ├── mona_lisa.png │ ├── mona_lisa_depth.png │ ├── mona_lisa_normal.png │ ├── mona_lisa_rgba.png │ ├── robot_rgba.png │ ├── teddy.png │ ├── teddy_depth.png │ ├── teddy_normal.png │ ├── teddy_rgba.png │ └── thorhammer_rgba.png ├── lights │ ├── LICENSE.txt │ ├── bsdf_256_256.bin │ └── mud_road_puresky_1k.hdr ├── make_prompt_library.py ├── prompt_library.json ├── shapes │ ├── README.md │ ├── animal.obj │ ├── blub.obj │ ├── cabin.obj │ ├── env_sphere.obj │ ├── hand_prismatic.obj │ ├── human.obj │ ├── nascar.obj │ ├── potion.obj │ └── teddy.obj ├── tets │ ├── 128_tets.npz │ ├── 32_tets.npz │ ├── 64_tets.npz │ └── generate_tets.py └── zero123 │ ├── download.sh │ └── sd-objaverse-finetune-c_concat-256.yaml ├── requirements-dev.txt ├── requirements.txt ├── threestudio.ipynb └── threestudio ├── __init__.py ├── data ├── __init__.py ├── co3d.py ├── image.py ├── multiview.py ├── random_multiview.py └── uncond.py ├── models ├── __init__.py ├── background │ ├── __init__.py │ ├── base.py │ ├── neural_environment_map_background.py │ ├── solid_color_background.py │ └── textured_background.py ├── exporters │ ├── __init__.py │ ├── base.py │ └── mesh_exporter.py ├── geometry │ ├── __init__.py │ ├── base.py │ ├── implicit_sdf.py │ ├── implicit_volume.py │ ├── tetrahedra_sdf_grid.py │ └── volume_grid.py ├── guidance │ ├── __init__.py │ ├── controlnet_guidance.py │ ├── deep_floyd_guidance.py │ ├── instructpix2pix_guidance.py │ ├── multiview_diffusion_guidance.py │ ├── stable_diffusion_guidance.py │ ├── stable_diffusion_vsd_guidance.py │ └── zero123_guidance.py ├── isosurface.py ├── materials │ ├── __init__.py │ ├── base.py │ ├── diffuse_with_point_light_material.py │ ├── hybrid_rgb_latent_material.py │ ├── neural_radiance_material.py │ ├── no_material.py │ ├── pbr_material.py │ └── sd_latent_adapter_material.py ├── mesh.py ├── networks.py ├── prompt_processors │ ├── __init__.py │ ├── base.py │ ├── deepfloyd_prompt_processor.py │ ├── dummy_prompt_processor.py │ └── stable_diffusion_prompt_processor.py └── renderers │ ├── __init__.py │ ├── base.py │ ├── deferred_volume_renderer.py │ ├── gan_volume_renderer.py │ ├── nerf_volume_renderer.py │ ├── neus_volume_renderer.py │ ├── nvdiff_rasterizer.py │ └── patch_renderer.py ├── scripts ├── make_training_vid.py ├── run_imagedream.sh ├── run_zero123.sh ├── run_zero123_comparison.sh ├── run_zero123_phase.sh ├── run_zero123_phase2.sh ├── run_zero123_sbatch.py ├── zero123_demo.py └── zero123_sbatch.sh ├── systems ├── __init__.py ├── base.py ├── control4d_multiview.py ├── dreamfusion.py ├── fantasia3d.py ├── imagedream.py ├── imagedreamfusion.py ├── instructnerf2nerf.py ├── latentnerf.py ├── magic3d.py ├── mvdream.py ├── optimizers.py ├── prolificdreamer.py ├── sjc.py ├── textmesh.py ├── utils.py └── zero123.py └── utils ├── GAN ├── attention.py ├── discriminator.py ├── distribution.py ├── loss.py ├── mobilenet.py ├── network_util.py ├── util.py └── vae.py ├── __init__.py ├── base.py ├── callbacks.py ├── config.py ├── misc.py ├── ops.py ├── perceptual ├── __init__.py ├── perceptual.py └── utils.py ├── rasterize.py ├── saving.py └── typing.py /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*.py] 4 | charset = utf-8 5 | trim_trailing_whitespace = true 6 | end_of_line = lf 7 | insert_final_newline = true 8 | indent_style = space 9 | indent_size = 4 10 | 11 | [*.md] 12 | trim_trailing_whitespace = false 13 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | *.extension filter=lfs diff=lfs merge=lfs -text 2 | -------------------------------------------------------------------------------- /.github/workflows/pre-commit.yaml: -------------------------------------------------------------------------------- 1 | name: pre-commit 2 | on: [push, pull_request] 3 | 4 | concurrency: 5 | group: ${{ github.workflow }}-${{ github.ref }} 6 | cancel-in-progress: true 7 | 8 | jobs: 9 | pre-commit: 10 | runs-on: ubuntu-22.04 11 | steps: 12 | - uses: actions/checkout@v3 13 | - name: Set up Python 3.8 14 | uses: actions/setup-python@v4 15 | with: 16 | python-version: '3.8' 17 | - name: Install pre-commit 18 | run: | 19 | pip install pre-commit 20 | pre-commit install 21 | - name: Run pre-commit 22 | run: pre-commit run --all-files 23 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | default_language_version: 2 | python: python3 3 | 4 | repos: 5 | - repo: https://github.com/pre-commit/pre-commit-hooks 6 | rev: v4.4.0 7 | hooks: 8 | - id: trailing-whitespace 9 | - id: check-ast 10 | - id: check-merge-conflict 11 | - id: check-yaml 12 | - id: end-of-file-fixer 13 | - id: trailing-whitespace 14 | args: [--markdown-linebreak-ext=md] 15 | 16 | - repo: https://github.com/psf/black 17 | rev: 23.3.0 18 | hooks: 19 | - id: black 20 | language_version: python3.8 21 | 22 | - repo: https://github.com/pycqa/isort 23 | rev: 5.12.0 24 | hooks: 25 | - id: isort 26 | exclude: README.md 27 | args: ["--profile", "black"] 28 | 29 | # temporarily disable static type checking 30 | # - repo: https://github.com/pre-commit/mirrors-mypy 31 | # rev: v1.2.0 32 | # hooks: 33 | # - id: mypy 34 | # args: ["--ignore-missing-imports", "--scripts-are-modules", "--pretty"] 35 | -------------------------------------------------------------------------------- /.pylintrc: -------------------------------------------------------------------------------- 1 | disable=R,C 2 | 3 | [TYPECHECK] 4 | # List of members which are set dynamically and missed by pylint inference 5 | # system, and so shouldn't trigger E1101 when accessed. Python regular 6 | # expressions are accepted. 7 | generated-members=numpy.*,torch.*,cv2.* 8 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ImageDream Reconstruction 2 | Peng Wang, Yichun Shi 3 | 4 | [Project Page](https://image-dream.github.io/) | [Paper](https://arxiv.org/abs/2312.02201) | [Demo]() 5 | 6 | [imagedream-threestudio-teaser](https://github.com/bytedance/ImageDream/assets/146033206/bcf67b1a-38f9-42cf-81df-b8b2f4fa007f) 7 | 8 | ## Installation 9 | 10 | This part is the same as original [MVDream-threestudio](https://github.com/bytedance/MVDream-threestudio). Skip it if you already have installed the environment. 11 | 12 | 13 | ## Quickstart 14 | Clone the modelcard on the [Huggingface ImageDream Model Page](https://huggingface.co/Peng-Wang/ImageDream/) under ```./extern/ImageDream/release_models/``` 15 | 16 | In the paper, we use the configuration with soft-shading. It would need an A100 GPU in most cases to compute normal: 17 | ```sh 18 | export PYTHONPATH=$PYTHONPATH:./extern/ImageDream 19 | image_file="./extern/ImageDream/assets/astronaut.png" 20 | ckpt_file="./extern/ImageDream/release_models/ImageDream/sd-v2.1-base-4view-ipmv.pt" 21 | cfg_file="./extern/ImageDream/imagedream/configs/sd_v2_base_ipmv.yaml" 22 | 23 | python3 launch.py \ 24 | --config configs/$method.yaml --train --gpu 0 \ 25 | name="imagedream-sd21-shading" tag="astronaut" \ 26 | system.prompt_processor.prompt="an astronaut riding a horse" \ 27 | system.prompt_processor.image_path="${image_file}" \ 28 | system.guidance.ckpt_path="${ckpt_file}" \ 29 | system.guidance.config_path="${cfg_file}" 30 | ``` 31 | 32 | ***For diffusion only model, refer to subdir*** ```./extern/ImageDream/``` 33 | ***Check*** ```./threestudio/scripts/run_imagedream.sh``` ***for a bash example.*** 34 | 35 | 36 | ## Credits 37 | - This code is forked from [threestudio](https://github.com/threestudio-project/threestudio) and [MVDream](https://github.com/bytedance/MVDream-threestudi) for SDS and 3D Generation. 38 | 39 | ## Tips 40 | 1. Place the object in the center and do not make it too large/small in the image. 41 | 2. If you have an object cutting image edge, in config, tuning the parameters range of elevation and fov to be a larger range, e.g. ```[0, 30]```, otherwise, you may do image outpainting and follow tips 1. 42 | 3. Check the results with ImageDream diffusion model before using it in 3D rendering to save time. 43 | 44 | ## PreComputed Results 45 | - Since there is some randomness in diffusion model and time costly to get baseline results. We put our pre-computed results for reproducing Tab.1 in the paper in a [hugging face dataset card](https://huggingface.co/datasets/Peng-Wang/ImageDream) 46 | 47 | 48 | ## Citing 49 | If you find ImageDream helpful, please consider citing: 50 | 51 | ``` bibtex 52 | @article{wang2023imagedream, 53 | title={ImageDream: Image-Prompt Multi-view Diffusion for 3D Generation}, 54 | author={Wang, Peng and Shi, Yichun}, 55 | journal={arXiv preprint arXiv:2312.02201}, 56 | year={2023} 57 | } 58 | ``` 59 | -------------------------------------------------------------------------------- /configs/control4d-static.yaml: -------------------------------------------------------------------------------- 1 | name: "control4d-static" 2 | tag: "${basename:${data.dataroot}}_${rmspace:${system.prompt_processor.prompt},_}" 3 | exp_root_dir: "outputs" 4 | seed: 0 5 | 6 | data_type: "multiview-camera-datamodule" 7 | data: 8 | train_downsample_resolution: 2 9 | eval_downsample_resolution: 2 10 | dataroot: ??? 11 | 12 | system_type: "control4d-multiview-system" 13 | system: 14 | start_editing_step: 2000 15 | 16 | geometry_type: "implicit-volume" 17 | geometry: 18 | radius: 2. 19 | n_feature_dims: 11 20 | normal_type: analytic 21 | pos_encoding_config: 22 | otype: HashGrid 23 | n_levels: 16 24 | n_features_per_level: 2 25 | log2_hashmap_size: 19 26 | base_resolution: 16 27 | per_level_scale: 1.4472692374403782 # max resolution 4096 28 | density_bias: "blob_magic3d" 29 | density_activation: softplus 30 | density_blob_scale: 10. 31 | density_blob_std: 0.5 32 | isosurface_resolution: 128 33 | isosurface_threshold: auto 34 | isosurface_coarse_to_fine: true 35 | 36 | material_type: "hybrid-rgb-latent-material" 37 | material: 38 | n_output_dims: 11 39 | requires_normal: true 40 | 41 | background_type: "solid-color-background" 42 | background: 43 | n_output_dims: 11 44 | color: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] 45 | 46 | renderer_type: "gan-volume-renderer" 47 | renderer: 48 | base_renderer_type: "nerf-volume-renderer" 49 | base_renderer: 50 | radius: ${system.geometry.radius} 51 | num_samples_per_ray: 512 52 | 53 | guidance_type: "stable-diffusion-controlnet-guidance" 54 | guidance: 55 | control_type: "normal" 56 | min_step_percent: 0.05 57 | max_step_percent: 0.8 58 | condition_scale: 1.0 59 | 60 | prompt_processor_type: "stable-diffusion-prompt-processor" 61 | prompt_processor: 62 | pretrained_model_name_or_path: "runwayml/stable-diffusion-v1-5" 63 | 64 | loggers: 65 | wandb: 66 | enable: false 67 | project: 'threestudio' 68 | 69 | loss: 70 | lambda_sds: 0. 71 | lambda_orient: [0, 10.0, 1000., 5000.0] 72 | lambda_sparsity: 1.0 73 | lambda_opaque: 1.0 74 | lambda_l1: 10. 75 | lambda_p: 10. 76 | lambda_kl: 0.000001 77 | lambda_G: 0.01 78 | lambda_D: 1. 79 | optimizer: 80 | name: Adam 81 | args: 82 | lr: 0.01 83 | betas: [0.9, 0.99] 84 | eps: 1.e-15 85 | params: 86 | geometry: 87 | lr: 0.01 88 | background: 89 | lr: 0.001 90 | renderer.generator: 91 | lr: 0.0001 92 | renderer.local_encoder: 93 | lr: 0.0001 94 | renderer.global_encoder: 95 | lr: 0.0001 96 | optimizer_dis: 97 | name: Adam 98 | args: 99 | lr: 0.01 100 | betas: [0.9, 0.99] 101 | eps: 1.e-15 102 | params: 103 | renderer.discriminator: 104 | lr: 0.00001 105 | 106 | trainer: 107 | max_steps: 50000 108 | log_every_n_steps: 1 109 | num_sanity_val_steps: 0 110 | val_check_interval: 200 111 | enable_progress_bar: true 112 | precision: 16-mixed 113 | 114 | checkpoint: 115 | save_last: true 116 | save_top_k: -1 117 | every_n_train_steps: ${trainer.max_steps} 118 | -------------------------------------------------------------------------------- /configs/debugging/controlnet-canny.yaml: -------------------------------------------------------------------------------- 1 | system: 2 | guidance_type: "controlnet-guidance" 3 | guidance: 4 | control_type: "canny" 5 | min_step_percent: 0.8 6 | max_step_percent: 0.98 7 | 8 | prompt_processor_type: "stable-diffusion-prompt-processor" 9 | prompt_processor: 10 | pretrained_model_name_or_path: "runwayml/stable-diffusion-v1-5" 11 | prompt: "Elon Musk, RAW photo, (high detailed skin:1.2), 8k uhd, dslr, soft lighting, high quality, film grain, Fujifilm XT3" 12 | # negative_prompt: "(overexposed, underexposed, out of focus, deformed iris, deformed pupils, semi-realistic, cgi, 3d, render, sketch, cartoon, drawing, anime:1.4), text, close up, cropped, out of frame, worst quality, low quality, jpeg artifacts, ugly, duplicate, morbid, mutilated, extra fingers, mutated hands, poorly drawn hands, poorly drawn face, mutation, deformed, blurry, dehydrated, bad anatomy, bad proportions, extra limbs, cloned face, disfigured, gross proportions, malformed limbs, missing arms, missing legs, extra arms, extra legs, fused fingers, too many fingers, long neck" 13 | negative_prompt: "(overexposed, underexposed, out of focus, deformed iris, deformed pupils, semi-realistic, cgi, 3d, render, sketch, cartoon, drawing, anime:1.4)" 14 | -------------------------------------------------------------------------------- /configs/debugging/controlnet-normal.yaml: -------------------------------------------------------------------------------- 1 | system: 2 | guidance_type: "controlnet-guidance" 3 | guidance: 4 | control_type: "normal" 5 | min_step_percent: 0.05 6 | max_step_percent: 0.8 7 | 8 | prompt_processor_type: "stable-diffusion-prompt-processor" 9 | prompt_processor: 10 | pretrained_model_name_or_path: "SG161222/Realistic_Vision_V2.0" 11 | prompt: "Elon Musk, RAW photo, (high detailed skin:1.2), 8k uhd, dslr, soft lighting, high quality, film grain, Fujifilm XT3" 12 | negative_prompt: "(overexposed, underexposed, out of focus, deformed iris, deformed pupils, semi-realistic, cgi, 3d, render, sketch, cartoon, drawing, anime:1.4)" 13 | -------------------------------------------------------------------------------- /configs/debugging/instructpix2pix.yaml: -------------------------------------------------------------------------------- 1 | system: 2 | guidance_type: "instructpix2pix-guidance" 3 | guidance: 4 | min_step_percent: 0.8 5 | max_step_percent: 0.98 6 | 7 | prompt_processor_type: "stable-diffusion-prompt-processor" 8 | prompt_processor: 9 | pretrained_model_name_or_path: "runwayml/stable-diffusion-v1-5" 10 | prompt: "Turn him into Elon Musk" 11 | -------------------------------------------------------------------------------- /configs/debugging/stablediffusion.yaml: -------------------------------------------------------------------------------- 1 | system: 2 | prompt_processor_type: "stable-diffusion-prompt-processor" 3 | prompt_processor: 4 | pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base" 5 | prompt: "A cute panda" 6 | front_threshold: 30. 7 | back_threshold: 30. 8 | 9 | guidance_type: "stable-diffusion-vsd-guidance" 10 | guidance: 11 | pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base" 12 | pretrained_model_name_or_path_lora: "stabilityai/stable-diffusion-2-1" 13 | guidance_scale: 7.5 14 | min_step_percent: 0.02 15 | max_step_percent: 0.98 16 | max_step_percent_annealed: 0.5 17 | anneal_start_step: 5000 18 | -------------------------------------------------------------------------------- /configs/dreamfusion-if.yaml: -------------------------------------------------------------------------------- 1 | name: "dreamfusion-if" 2 | tag: "${rmspace:${system.prompt_processor.prompt},_}" 3 | exp_root_dir: "outputs" 4 | seed: 0 5 | 6 | data_type: "random-camera-datamodule" 7 | data: 8 | batch_size: 1 9 | width: 64 10 | height: 64 11 | camera_distance_range: [1.5, 2.0] 12 | fovy_range: [40, 70] 13 | elevation_range: [-10, 90] 14 | light_sample_strategy: "dreamfusion" 15 | eval_camera_distance: 2.0 16 | eval_fovy_deg: 70. 17 | 18 | system_type: "dreamfusion-system" 19 | system: 20 | geometry_type: "implicit-volume" 21 | geometry: 22 | radius: 2.0 23 | normal_type: "analytic" 24 | 25 | # the density initialization proposed in the DreamFusion paper 26 | # does not work very well 27 | # density_bias: "blob_dreamfusion" 28 | # density_activation: exp 29 | # density_blob_scale: 5. 30 | # density_blob_std: 0.2 31 | 32 | # use Magic3D density initialization instead 33 | density_bias: "blob_magic3d" 34 | density_activation: softplus 35 | density_blob_scale: 10. 36 | density_blob_std: 0.5 37 | 38 | # coarse to fine hash grid encoding 39 | # to ensure smooth analytic normals 40 | pos_encoding_config: 41 | otype: ProgressiveBandHashGrid 42 | n_levels: 16 43 | n_features_per_level: 2 44 | log2_hashmap_size: 19 45 | base_resolution: 16 46 | per_level_scale: 1.447269237440378 # max resolution 4096 47 | start_level: 8 # resolution ~200 48 | start_step: 2000 49 | update_steps: 500 50 | 51 | material_type: "diffuse-with-point-light-material" 52 | material: 53 | ambient_only_steps: 2001 54 | albedo_activation: scale_-11_01 55 | 56 | background_type: "neural-environment-map-background" 57 | background: 58 | color_activation: scale_-11_01 59 | 60 | renderer_type: "nerf-volume-renderer" 61 | renderer: 62 | radius: ${system.geometry.radius} 63 | num_samples_per_ray: 512 64 | 65 | prompt_processor_type: "deep-floyd-prompt-processor" 66 | prompt_processor: 67 | pretrained_model_name_or_path: "DeepFloyd/IF-I-XL-v1.0" 68 | prompt: ??? 69 | 70 | guidance_type: "deep-floyd-guidance" 71 | guidance: 72 | pretrained_model_name_or_path: "DeepFloyd/IF-I-XL-v1.0" 73 | guidance_scale: 20. 74 | weighting_strategy: sds 75 | min_step_percent: 0.02 76 | max_step_percent: 0.98 77 | 78 | loggers: 79 | wandb: 80 | enable: false 81 | project: 'threestudio' 82 | name: None 83 | 84 | loss: 85 | lambda_sds: 1. 86 | lambda_orient: [0, 10., 1000., 5000] 87 | lambda_sparsity: 1. 88 | lambda_opaque: 0.0 89 | optimizer: 90 | name: Adam 91 | args: 92 | lr: 0.01 93 | betas: [0.9, 0.99] 94 | eps: 1.e-15 95 | params: 96 | geometry: 97 | lr: 0.01 98 | background: 99 | lr: 0.001 100 | 101 | trainer: 102 | max_steps: 10000 103 | log_every_n_steps: 1 104 | num_sanity_val_steps: 0 105 | val_check_interval: 200 106 | enable_progress_bar: true 107 | precision: 16-mixed 108 | 109 | checkpoint: 110 | save_last: true # save at each validation time 111 | save_top_k: -1 112 | every_n_train_steps: ${trainer.max_steps} 113 | -------------------------------------------------------------------------------- /configs/dreamfusion-sd.yaml: -------------------------------------------------------------------------------- 1 | name: "dreamfusion-sd" 2 | tag: "${rmspace:${system.prompt_processor.prompt},_}" 3 | exp_root_dir: "outputs" 4 | seed: 0 5 | 6 | data_type: "random-camera-datamodule" 7 | data: 8 | batch_size: 1 9 | width: 64 10 | height: 64 11 | camera_distance_range: [1.5, 2.0] 12 | fovy_range: [40, 70] 13 | elevation_range: [-10, 45] 14 | light_sample_strategy: "dreamfusion" 15 | eval_camera_distance: 2.0 16 | eval_fovy_deg: 70. 17 | 18 | system_type: "dreamfusion-system" 19 | system: 20 | geometry_type: "implicit-volume" 21 | geometry: 22 | radius: 2.0 23 | normal_type: "analytic" 24 | 25 | # the density initialization proposed in the DreamFusion paper 26 | # does not work very well 27 | # density_bias: "blob_dreamfusion" 28 | # density_activation: exp 29 | # density_blob_scale: 5. 30 | # density_blob_std: 0.2 31 | 32 | # use Magic3D density initialization instead 33 | density_bias: "blob_magic3d" 34 | density_activation: softplus 35 | density_blob_scale: 10. 36 | density_blob_std: 0.5 37 | 38 | # coarse to fine hash grid encoding 39 | # to ensure smooth analytic normals 40 | pos_encoding_config: 41 | otype: ProgressiveBandHashGrid 42 | n_levels: 16 43 | n_features_per_level: 2 44 | log2_hashmap_size: 19 45 | base_resolution: 16 46 | per_level_scale: 1.447269237440378 # max resolution 4096 47 | start_level: 8 # resolution ~200 48 | start_step: 2000 49 | update_steps: 500 50 | 51 | material_type: "diffuse-with-point-light-material" 52 | material: 53 | ambient_only_steps: 2001 54 | albedo_activation: sigmoid 55 | 56 | background_type: "neural-environment-map-background" 57 | background: 58 | color_activation: sigmoid 59 | 60 | renderer_type: "nerf-volume-renderer" 61 | renderer: 62 | radius: ${system.geometry.radius} 63 | num_samples_per_ray: 512 64 | 65 | prompt_processor_type: "stable-diffusion-prompt-processor" 66 | prompt_processor: 67 | pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base" 68 | prompt: ??? 69 | 70 | guidance_type: "stable-diffusion-guidance" 71 | guidance: 72 | pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base" 73 | guidance_scale: 100. 74 | weighting_strategy: sds 75 | min_step_percent: 0.02 76 | max_step_percent: 0.98 77 | 78 | loggers: 79 | wandb: 80 | enable: false 81 | project: "threestudio" 82 | name: None 83 | 84 | loss: 85 | lambda_sds: 1. 86 | lambda_orient: [0, 10., 1000., 5000] 87 | lambda_sparsity: 1. 88 | lambda_opaque: 0. 89 | optimizer: 90 | name: Adam 91 | args: 92 | lr: 0.01 93 | betas: [0.9, 0.99] 94 | eps: 1.e-15 95 | params: 96 | geometry: 97 | lr: 0.01 98 | background: 99 | lr: 0.001 100 | 101 | trainer: 102 | max_steps: 10000 103 | log_every_n_steps: 1 104 | num_sanity_val_steps: 0 105 | val_check_interval: 200 106 | enable_progress_bar: true 107 | precision: 16-mixed 108 | 109 | checkpoint: 110 | save_last: true # save at each validation time 111 | save_top_k: -1 112 | every_n_train_steps: ${trainer.max_steps} 113 | -------------------------------------------------------------------------------- /configs/fantasia3d-texture.yaml: -------------------------------------------------------------------------------- 1 | name: "fantasia3d-texture" 2 | tag: "${rmspace:${system.prompt_processor.prompt},_}" 3 | exp_root_dir: "outputs" 4 | seed: 0 5 | 6 | data_type: "random-camera-datamodule" 7 | data: 8 | batch_size: 1 9 | width: 512 10 | height: 512 11 | camera_distance_range: [3, 3] 12 | fovy_range: [25, 45] 13 | camera_perturb: 0. 14 | center_perturb: 0. 15 | up_perturb: 0. 16 | elevation_range: [-10, 45] 17 | azimuth_range: [-180, 180] 18 | batch_uniform_azimuth: true 19 | eval_camera_distance: 3. 20 | eval_fovy_deg: 45. 21 | 22 | system_type: "fantasia3d-system" 23 | system: 24 | # do texture training 25 | texture: true 26 | geometry_convert_from: ??? 27 | geometry_convert_inherit_texture: false 28 | geometry_type: "tetrahedra-sdf-grid" 29 | geometry: 30 | radius: 1.0 # consistent with coarse 31 | isosurface_resolution: 128 32 | isosurface_deformable_grid: true 33 | pos_encoding_config: 34 | otype: HashGrid 35 | n_levels: 16 36 | n_features_per_level: 2 37 | log2_hashmap_size: 19 38 | base_resolution: 16 39 | per_level_scale: 1.4472692374403782 # max resolution 4096 40 | n_feature_dims: 8 # albedo3 + roughness1 + metallic1 + bump3 41 | fix_geometry: true 42 | 43 | material_type: "pbr-material" 44 | material: 45 | material_activation: sigmoid 46 | environment_texture: "load/lights/mud_road_puresky_1k.hdr" 47 | environment_scale: 2.0 48 | min_metallic: 0.0 49 | max_metallic: 0.9 50 | min_roughness: 0.08 51 | max_roughness: 0.9 52 | use_bump: true 53 | 54 | background_type: "solid-color-background" 55 | 56 | renderer_type: "nvdiff-rasterizer" 57 | 58 | prompt_processor_type: "stable-diffusion-prompt-processor" 59 | prompt_processor: 60 | pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base" 61 | prompt: ??? 62 | 63 | guidance_type: "stable-diffusion-guidance" 64 | guidance: 65 | pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base" 66 | guidance_scale: 100 67 | weighting_strategy: sds 68 | min_step_percent: 0.02 69 | max_step_percent: 0.50 70 | 71 | loggers: 72 | wandb: 73 | enable: false 74 | project: "threestudio" 75 | 76 | loss: 77 | lambda_sds: 1. 78 | lambda_normal_consistency: 0. 79 | 80 | optimizer: 81 | name: AdamW 82 | args: 83 | lr: 0.01 84 | betas: [0.9, 0.99] 85 | eps: 1.e-15 86 | 87 | trainer: 88 | max_steps: 5000 89 | log_every_n_steps: 1 90 | num_sanity_val_steps: 1 91 | val_check_interval: 500 92 | enable_progress_bar: true 93 | precision: 16-mixed 94 | 95 | checkpoint: 96 | save_last: true # save at each validation time 97 | save_top_k: -1 98 | every_n_train_steps: ${trainer.max_steps} 99 | -------------------------------------------------------------------------------- /configs/fantasia3d.yaml: -------------------------------------------------------------------------------- 1 | name: "fantasia3d" 2 | tag: "${rmspace:${system.prompt_processor.prompt},_}" 3 | exp_root_dir: "outputs" 4 | seed: 0 5 | 6 | data_type: "random-camera-datamodule" 7 | data: 8 | batch_size: 1 9 | width: 512 10 | height: 512 11 | camera_distance_range: [3, 3] 12 | fovy_range: [25, 45] 13 | camera_perturb: 0. 14 | center_perturb: 0. 15 | up_perturb: 0. 16 | elevation_range: [-10, 45] 17 | azimuth_range: [-180, 180] 18 | batch_uniform_azimuth: true 19 | eval_camera_distance: 3. 20 | eval_fovy_deg: 45. 21 | 22 | system_type: "fantasia3d-system" 23 | system: 24 | latent_steps: 1000 25 | geometry_type: "implicit-sdf" 26 | geometry: 27 | radius: 1.0 28 | n_feature_dims: 0 29 | isosurface_resolution: 128 30 | isosurface_deformable_grid: true 31 | isosurface_coarse_to_fine: false 32 | 33 | # initialize SDF by optimization 34 | shape_init: sphere 35 | shape_init_params: 0.5 36 | 37 | # or you can initialize SDF using a guide mesh 38 | # shape_init: mesh:load/shapes/human.obj 39 | # shape_init_params: 0.9 40 | # shape_init_mesh_up: +y 41 | # shape_init_mesh_front: +z 42 | 43 | # an alternative initialization implementation: 44 | # you can initialize SDF to sphere/ellipsoid by adding a bias value 45 | # which leads to more smooth initialized shape 46 | # sdf_bias: sphere 47 | # sdf_bias_params: 0.5 48 | # DO NOT use the two initialization methods together 49 | 50 | material_type: "no-material" # unused 51 | material: 52 | n_output_dims: 0 53 | 54 | background_type: "solid-color-background" # unused 55 | 56 | renderer_type: "nvdiff-rasterizer" 57 | 58 | prompt_processor_type: "stable-diffusion-prompt-processor" 59 | prompt_processor: 60 | pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base" 61 | prompt: ??? 62 | 63 | guidance_type: "stable-diffusion-guidance" 64 | guidance: 65 | pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base" 66 | guidance_scale: 100. 67 | max_step_percent: 0.5 68 | weighting_strategy: fantasia3d 69 | 70 | loggers: 71 | wandb: 72 | enable: false 73 | project: 'threestudio' 74 | name: None 75 | 76 | loss: 77 | lambda_sds: 1. 78 | lambda_normal_consistency: 0. 79 | 80 | optimizer: 81 | name: AdamW 82 | args: 83 | lr: 0.001 84 | betas: [0.9, 0.99] 85 | eps: 1.e-15 86 | 87 | trainer: 88 | max_steps: 10000 89 | log_every_n_steps: 1 90 | num_sanity_val_steps: 1 91 | val_check_interval: 500 92 | enable_progress_bar: true 93 | precision: 16-mixed 94 | 95 | checkpoint: 96 | save_last: true # save at each validation time 97 | save_top_k: -1 98 | every_n_train_steps: ${trainer.max_steps} 99 | -------------------------------------------------------------------------------- /configs/gradio/dreamfusion-if.yaml: -------------------------------------------------------------------------------- 1 | name: "dreamfusion-if" 2 | tag: "${rmspace:${system.prompt_processor.prompt},_}" 3 | exp_root_dir: "outputs-gradio" 4 | seed: 0 5 | 6 | data_type: "random-camera-datamodule" 7 | data: 8 | batch_size: 1 9 | width: 64 10 | height: 64 11 | camera_distance_range: [1.5, 2.0] 12 | fovy_range: [40, 70] 13 | elevation_range: [-10, 90] 14 | light_sample_strategy: "dreamfusion" 15 | eval_camera_distance: 2.0 16 | eval_fovy_deg: 70. 17 | 18 | system_type: "dreamfusion-system" 19 | system: 20 | geometry_type: "implicit-volume" 21 | geometry: 22 | radius: 2.0 23 | normal_type: "analytic" 24 | 25 | # the density initialization proposed in the DreamFusion paper 26 | # does not work very well 27 | # density_bias: "blob_dreamfusion" 28 | # density_activation: exp 29 | # density_blob_scale: 5. 30 | # density_blob_std: 0.2 31 | 32 | # use Magic3D density initialization instead 33 | density_bias: "blob_magic3d" 34 | density_activation: softplus 35 | density_blob_scale: 10. 36 | density_blob_std: 0.5 37 | 38 | # coarse to fine hash grid encoding 39 | # to ensure smooth analytic normals 40 | pos_encoding_config: 41 | otype: ProgressiveBandHashGrid 42 | n_levels: 16 43 | n_features_per_level: 2 44 | log2_hashmap_size: 19 45 | base_resolution: 16 46 | per_level_scale: 1.381912879967776 # max resolution 2048 47 | start_level: 10 # resolution ~300 48 | start_step: 2000 49 | update_steps: 400 50 | 51 | material_type: "diffuse-with-point-light-material" 52 | material: 53 | ambient_only_steps: 2001 54 | albedo_activation: scale_-11_01 55 | 56 | background_type: "neural-environment-map-background" 57 | background: 58 | color_activation: scale_-11_01 59 | random_aug: true 60 | 61 | renderer_type: "nerf-volume-renderer" 62 | renderer: 63 | radius: ${system.geometry.radius} 64 | num_samples_per_ray: 512 65 | 66 | prompt_processor_type: "deep-floyd-prompt-processor" 67 | prompt_processor: 68 | pretrained_model_name_or_path: "DeepFloyd/IF-I-XL-v1.0" 69 | prompt: ??? 70 | 71 | guidance_type: "deep-floyd-guidance" 72 | guidance: 73 | pretrained_model_name_or_path: "DeepFloyd/IF-I-XL-v1.0" 74 | guidance_scale: 20. 75 | weighting_strategy: sds 76 | min_step_percent: 0.02 77 | max_step_percent: 0.98 78 | 79 | exporter_type: "mesh-exporter" 80 | exporter: 81 | fmt: obj 82 | save_uv: false 83 | context_type: cuda 84 | 85 | loggers: 86 | wandb: 87 | enable: false 88 | project: "threestudio" 89 | name: None 90 | 91 | loss: 92 | lambda_sds: 1. 93 | lambda_orient: [0, 10., 1000., 5000] 94 | lambda_sparsity: 1. 95 | lambda_opaque: 0.0 96 | optimizer: 97 | name: Adam 98 | args: 99 | lr: 0.01 100 | betas: [0.9, 0.99] 101 | eps: 1.e-15 102 | params: 103 | geometry: 104 | lr: 0.01 105 | background: 106 | lr: 0.001 107 | 108 | trainer: 109 | max_steps: 5000 110 | log_every_n_steps: 1 111 | num_sanity_val_steps: 0 112 | val_check_interval: 100 113 | enable_progress_bar: true 114 | precision: 16-mixed 115 | 116 | checkpoint: 117 | save_last: false 118 | save_top_k: -1 119 | every_n_train_steps: 0 # do not save checkpoints during training 120 | -------------------------------------------------------------------------------- /configs/gradio/dreamfusion-sd.yaml: -------------------------------------------------------------------------------- 1 | name: "dreamfusion-sd" 2 | tag: "${rmspace:${system.prompt_processor.prompt},_}" 3 | exp_root_dir: "outputs-gradio" 4 | seed: 0 5 | 6 | data_type: "random-camera-datamodule" 7 | data: 8 | batch_size: 1 9 | width: 64 10 | height: 64 11 | camera_distance_range: [1.5, 2.0] 12 | fovy_range: [40, 70] 13 | elevation_range: [-10, 45] 14 | light_sample_strategy: "dreamfusion" 15 | eval_camera_distance: 2.0 16 | eval_fovy_deg: 70. 17 | 18 | system_type: "dreamfusion-system" 19 | system: 20 | geometry_type: "implicit-volume" 21 | geometry: 22 | radius: 2.0 23 | normal_type: "analytic" 24 | 25 | # the density initialization proposed in the DreamFusion paper 26 | # does not work very well 27 | # density_bias: "blob_dreamfusion" 28 | # density_activation: exp 29 | # density_blob_scale: 5. 30 | # density_blob_std: 0.2 31 | 32 | # use Magic3D density initialization instead 33 | density_bias: "blob_magic3d" 34 | density_activation: softplus 35 | density_blob_scale: 10. 36 | density_blob_std: 0.5 37 | 38 | # coarse to fine hash grid encoding 39 | # to ensure smooth analytic normals 40 | pos_encoding_config: 41 | otype: ProgressiveBandHashGrid 42 | n_levels: 16 43 | n_features_per_level: 2 44 | log2_hashmap_size: 19 45 | base_resolution: 16 46 | per_level_scale: 1.381912879967776 # max resolution 2048 47 | start_level: 10 # resolution ~300 48 | start_step: 2000 49 | update_steps: 400 50 | 51 | material_type: "diffuse-with-point-light-material" 52 | material: 53 | ambient_only_steps: 2001 54 | albedo_activation: sigmoid 55 | 56 | background_type: "neural-environment-map-background" 57 | background: 58 | color_activation: sigmoid 59 | random_aug: true 60 | 61 | renderer_type: "nerf-volume-renderer" 62 | renderer: 63 | radius: ${system.geometry.radius} 64 | num_samples_per_ray: 512 65 | 66 | prompt_processor_type: "stable-diffusion-prompt-processor" 67 | prompt_processor: 68 | pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base" 69 | prompt: ??? 70 | 71 | guidance_type: "stable-diffusion-guidance" 72 | guidance: 73 | pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base" 74 | guidance_scale: 100. 75 | weighting_strategy: sds 76 | min_step_percent: 0.02 77 | max_step_percent: 0.98 78 | grad_clip: [0, 0.5, 2.0, 5000] 79 | 80 | exporter_type: "mesh-exporter" 81 | exporter: 82 | fmt: obj 83 | save_uv: false 84 | context_type: cuda 85 | 86 | loggers: 87 | wandb: 88 | enable: false 89 | project: "threestudio" 90 | name: None 91 | 92 | loss: 93 | lambda_sds: 1. 94 | lambda_orient: [0, 10., 1000., 5000] 95 | lambda_sparsity: 1. 96 | lambda_opaque: 0. 97 | optimizer: 98 | name: Adam 99 | args: 100 | lr: 0.01 101 | betas: [0.9, 0.99] 102 | eps: 1.e-15 103 | params: 104 | geometry: 105 | lr: 0.01 106 | background: 107 | lr: 0.001 108 | 109 | trainer: 110 | max_steps: 5000 111 | log_every_n_steps: 1 112 | num_sanity_val_steps: 0 113 | val_check_interval: 100 114 | enable_progress_bar: true 115 | precision: 16-mixed 116 | 117 | checkpoint: 118 | save_last: false 119 | save_top_k: -1 120 | every_n_train_steps: 0 # do not save checkpoints during training 121 | -------------------------------------------------------------------------------- /configs/gradio/fantasia3d.yaml: -------------------------------------------------------------------------------- 1 | name: "fantasia3d" 2 | tag: "${rmspace:${system.prompt_processor.prompt},_}" 3 | exp_root_dir: "outputs-gradio" 4 | seed: 0 5 | 6 | data_type: "random-camera-datamodule" 7 | data: 8 | batch_size: 1 9 | width: 512 10 | height: 512 11 | camera_distance_range: [3, 3] 12 | fovy_range: [25, 45] 13 | camera_perturb: 0. 14 | center_perturb: 0. 15 | up_perturb: 0. 16 | elevation_range: [-10, 45] 17 | azimuth_range: [-180, 180] 18 | batch_uniform_azimuth: true 19 | eval_camera_distance: 3. 20 | eval_fovy_deg: 45. 21 | 22 | system_type: "fantasia3d-system" 23 | system: 24 | latent_steps: 1000 25 | geometry_type: "implicit-sdf" 26 | geometry: 27 | radius: 1.0 28 | n_feature_dims: 0 29 | isosurface_resolution: 128 30 | isosurface_deformable_grid: true 31 | isosurface_coarse_to_fine: false 32 | 33 | # initialize SDF by optimization 34 | shape_init: sphere 35 | shape_init_params: 0.5 36 | 37 | # or you can initialize SDF using a guide mesh 38 | # shape_init: mesh:load/shapes/human.obj 39 | # shape_init_params: 0.9 40 | # shape_init_mesh_up: +y 41 | # shape_init_mesh_front: +z 42 | 43 | # an alternative initialization implementation: 44 | # you can initialize SDF to sphere/ellipsoid by adding a bias value 45 | # which leads to more smooth initialized shape 46 | # sdf_bias: sphere 47 | # sdf_bias_params: 0.5 48 | # DO NOT use the two initialization methods together 49 | 50 | material_type: "no-material" # unused 51 | material: 52 | n_output_dims: 0 53 | 54 | background_type: "solid-color-background" # unused 55 | 56 | renderer_type: "nvdiff-rasterizer" 57 | renderer: 58 | context_type: cuda 59 | 60 | prompt_processor_type: "stable-diffusion-prompt-processor" 61 | prompt_processor: 62 | pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base" 63 | prompt: ??? 64 | 65 | guidance_type: "stable-diffusion-guidance" 66 | guidance: 67 | pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base" 68 | guidance_scale: 100. 69 | max_step_percent: 0.5 70 | weighting_strategy: fantasia3d 71 | 72 | exporter_type: "mesh-exporter" 73 | exporter: 74 | fmt: obj 75 | save_uv: false 76 | save_texture: false 77 | context_type: cuda 78 | 79 | loggers: 80 | wandb: 81 | enable: false 82 | project: "threestudio" 83 | name: None 84 | 85 | loss: 86 | lambda_sds: 1. 87 | lambda_normal_consistency: 0. 88 | 89 | optimizer: 90 | name: AdamW 91 | args: 92 | lr: 0.001 93 | betas: [0.9, 0.99] 94 | eps: 1.e-15 95 | 96 | trainer: 97 | max_steps: 5000 98 | log_every_n_steps: 1 99 | num_sanity_val_steps: 1 100 | val_check_interval: 200 101 | enable_progress_bar: true 102 | precision: 16-mixed 103 | 104 | checkpoint: 105 | save_last: false 106 | save_top_k: -1 107 | every_n_train_steps: 0 # do not save checkpoints during training 108 | -------------------------------------------------------------------------------- /configs/gradio/latentnerf.yaml: -------------------------------------------------------------------------------- 1 | name: "latentnerf" 2 | tag: "${rmspace:${system.prompt_processor.prompt},_}" 3 | exp_root_dir: "outputs-gradio" 4 | seed: 0 5 | 6 | data_type: "random-camera-datamodule" 7 | data: 8 | elevation_range: [-10, 45] 9 | 10 | system_type: "latentnerf-system" 11 | system: 12 | geometry_type: "implicit-volume" 13 | geometry: 14 | n_feature_dims: 4 15 | normal_type: null 16 | 17 | density_bias: "blob_dreamfusion" 18 | density_activation: trunc_exp 19 | density_blob_scale: 5. 20 | density_blob_std: 0.2 21 | 22 | pos_encoding_config: 23 | otype: HashGrid 24 | n_levels: 16 25 | n_features_per_level: 2 26 | log2_hashmap_size: 19 27 | base_resolution: 16 28 | per_level_scale: 1.381912879967776 # max resolution 2048 29 | 30 | material_type: "no-material" 31 | material: 32 | n_output_dims: 4 33 | color_activation: none 34 | 35 | background_type: "neural-environment-map-background" 36 | background: 37 | n_output_dims: 4 38 | color_activation: none 39 | 40 | renderer_type: "nerf-volume-renderer" 41 | renderer: 42 | num_samples_per_ray: 512 43 | 44 | prompt_processor_type: "stable-diffusion-prompt-processor" 45 | prompt_processor: 46 | pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base" 47 | prompt: ??? 48 | 49 | guidance_type: "stable-diffusion-guidance" 50 | guidance: 51 | pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base" 52 | guidance_scale: 100. 53 | weighting_strategy: sds 54 | grad_clip: [0, 2.0, 8.0, 5000] 55 | 56 | exporter_type: "dummy-exporter" 57 | 58 | loggers: 59 | wandb: 60 | enable: false 61 | project: "threestudio" 62 | name: None 63 | 64 | loss: 65 | lambda_sds: 1. 66 | lambda_sparsity: 5.e-4 67 | lambda_opaque: 0.0 68 | lambda_orient: 0.0 69 | optimizer: 70 | name: Adam 71 | args: 72 | lr: 0.01 73 | betas: [0.9, 0.99] 74 | eps: 1.e-15 75 | scheduler: 76 | name: SequentialLR 77 | interval: step 78 | warmup_steps: 100 79 | milestones: 80 | - ${system.scheduler.warmup_steps} 81 | schedulers: 82 | - name: LinearLR # linear warm-up in the first system.warmup_steps steps 83 | args: 84 | start_factor: 0.1 85 | end_factor: 1.0 86 | total_iters: ${system.scheduler.warmup_steps} 87 | - name: ExponentialLR 88 | args: 89 | gamma: ${calc_exp_lr_decay_rate:0.1,${sub:${trainer.max_steps},${system.scheduler.warmup_steps}}} 90 | 91 | trainer: 92 | max_steps: 5000 93 | log_every_n_steps: 1 94 | num_sanity_val_steps: 0 95 | val_check_interval: 200 96 | enable_progress_bar: true 97 | precision: 16-mixed 98 | 99 | checkpoint: 100 | save_last: false 101 | save_top_k: -1 102 | every_n_train_steps: 0 # do not save checkpoints during training 103 | -------------------------------------------------------------------------------- /configs/gradio/sjc.yaml: -------------------------------------------------------------------------------- 1 | name: sjc 2 | tag: "${rmspace:${system.prompt_processor.prompt},_}" 3 | exp_root_dir: "outputs-gradio" 4 | seed: 0 5 | 6 | data_type: random-camera-datamodule 7 | data: 8 | camera_distance_range: [1.50, 1.50] 9 | elevation_range: [-10, 45] 10 | camera_perturb: 0.0 11 | center_perturb: 0.0 12 | up_perturb: 0.0 13 | light_position_perturb: 0.0 14 | eval_elevation_deg: 20.0 15 | 16 | system_type: sjc-system 17 | system: 18 | subpixel_rendering: false 19 | 20 | geometry_type: volume-grid 21 | geometry: 22 | normal_type: null 23 | grid_size: [100, 100, 100] 24 | density_bias: -1.0 25 | n_feature_dims: 4 26 | 27 | material_type: no-material 28 | material: 29 | n_output_dims: 4 30 | color_activation: none 31 | 32 | background_type: textured-background 33 | background: 34 | n_output_dims: 4 35 | color_activation: none 36 | height: 4 37 | width: 4 38 | 39 | renderer_type: nerf-volume-renderer 40 | renderer: 41 | num_samples_per_ray: 512 42 | grid_prune: false 43 | 44 | prompt_processor_type: stable-diffusion-prompt-processor 45 | prompt_processor: 46 | pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base" 47 | prompt: ??? 48 | view_dependent_prompt_front: true 49 | 50 | guidance_type: stable-diffusion-guidance 51 | guidance: 52 | pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base" 53 | guidance_scale: 100. 54 | use_sjc: true 55 | var_red: true 56 | min_step_percent: 0.01 57 | max_step_percent: 0.97 58 | grad_clip: [0, 2.0, 8.0, 5000] 59 | 60 | exporter_type: "dummy-exporter" 61 | 62 | loggers: 63 | wandb: 64 | enable: false 65 | project: "threestudio" 66 | name: None 67 | 68 | loss: 69 | lambda_sds: 1. 70 | center_ratio: 0.78125 # = 50 / 64 71 | lambda_depth: 0 # or try 10 72 | lambda_emptiness: [5000, 1.e+4, 2.e+5, 5001] 73 | emptiness_scale: 10 74 | 75 | optimizer: 76 | name: Adamax 77 | args: 78 | lr: 0.05 79 | params: 80 | geometry: 81 | lr: 0.05 82 | background: 83 | lr: 0.0001 # maybe 0.001/0.01 is better 84 | 85 | trainer: 86 | max_steps: 5000 87 | log_every_n_steps: 1 88 | num_sanity_val_steps: 0 89 | val_check_interval: 200 90 | enable_progress_bar: true 91 | precision: 16-mixed 92 | 93 | checkpoint: 94 | save_last: false 95 | save_top_k: -1 96 | every_n_train_steps: 0 # do not save checkpoints during training 97 | -------------------------------------------------------------------------------- /configs/gradio/textmesh-if.yaml: -------------------------------------------------------------------------------- 1 | name: "textmesh-if" 2 | tag: "${rmspace:${system.prompt_processor.prompt},_}" 3 | exp_root_dir: "outputs-gradio" 4 | seed: 0 5 | 6 | data_type: "random-camera-datamodule" 7 | data: 8 | batch_size: 1 9 | width: 64 10 | height: 64 11 | camera_distance_range: [1.5, 2.0] 12 | fovy_range: [40, 70] 13 | elevation_range: [-10, 90] 14 | light_sample_strategy: "dreamfusion" 15 | eval_camera_distance: 2.0 16 | eval_fovy_deg: 70. 17 | 18 | system_type: "textmesh-system" 19 | system: 20 | geometry_type: "implicit-sdf" 21 | geometry: 22 | radius: 2.0 23 | normal_type: finite_difference 24 | # progressive eps from Neuralangelo 25 | finite_difference_normal_eps: progressive 26 | 27 | sdf_bias: sphere 28 | sdf_bias_params: 0.5 29 | 30 | # coarse to fine hash grid encoding 31 | pos_encoding_config: 32 | otype: ProgressiveBandHashGrid 33 | n_levels: 16 34 | n_features_per_level: 2 35 | log2_hashmap_size: 19 36 | base_resolution: 16 37 | per_level_scale: 1.381912879967776 # max resolution 2048 38 | start_level: 10 # resolution ~300 39 | start_step: 2000 40 | update_steps: 400 41 | 42 | material_type: "diffuse-with-point-light-material" 43 | material: 44 | ambient_only_steps: 2001 45 | albedo_activation: sigmoid 46 | 47 | background_type: "neural-environment-map-background" 48 | background: 49 | color_activation: sigmoid 50 | random_aug: true 51 | 52 | renderer_type: "neus-volume-renderer" 53 | renderer: 54 | radius: ${system.geometry.radius} 55 | num_samples_per_ray: 512 56 | cos_anneal_end_steps: ${trainer.max_steps} 57 | eval_chunk_size: 8192 58 | 59 | prompt_processor_type: "deep-floyd-prompt-processor" 60 | prompt_processor: 61 | pretrained_model_name_or_path: "DeepFloyd/IF-I-XL-v1.0" 62 | prompt: ??? 63 | 64 | guidance_type: "deep-floyd-guidance" 65 | guidance: 66 | pretrained_model_name_or_path: "DeepFloyd/IF-I-XL-v1.0" 67 | guidance_scale: 20. 68 | weighting_strategy: sds 69 | min_step_percent: 0.02 70 | max_step_percent: 0.98 71 | 72 | exporter_type: "mesh-exporter" 73 | exporter: 74 | fmt: obj 75 | save_uv: false 76 | context_type: cuda 77 | 78 | loss: 79 | lambda_sds: 1. 80 | lambda_orient: 0.0 81 | lambda_sparsity: 0.0 82 | lambda_opaque: 0.0 83 | lambda_eikonal: 1000. 84 | optimizer: 85 | name: Adam 86 | args: 87 | betas: [0.9, 0.99] 88 | eps: 1.e-15 89 | params: 90 | geometry.encoding: 91 | lr: 0.01 92 | geometry.sdf_network: 93 | lr: 0.001 94 | geometry.feature_network: 95 | lr: 0.001 96 | background: 97 | lr: 0.001 98 | renderer: 99 | lr: 0.001 100 | 101 | trainer: 102 | max_steps: 5000 103 | log_every_n_steps: 1 104 | num_sanity_val_steps: 0 105 | val_check_interval: 100 106 | enable_progress_bar: true 107 | precision: 16-mixed 108 | 109 | checkpoint: 110 | save_last: false 111 | save_top_k: -1 112 | every_n_train_steps: 0 # do not save checkpoints during training 113 | -------------------------------------------------------------------------------- /configs/instructnerf2nerf.yaml: -------------------------------------------------------------------------------- 1 | name: "instructnerf2nerf" 2 | tag: "${basename:${data.dataroot}}_${rmspace:${system.prompt_processor.prompt},_}" 3 | exp_root_dir: "outputs" 4 | seed: 0 5 | 6 | data_type: "multiview-camera-datamodule" 7 | data: 8 | train_downsample_resolution: 2 9 | eval_downsample_resolution: 2 10 | dataroot: ??? 11 | 12 | system_type: "instructnerf2nerf-system" 13 | system: 14 | start_editing_step: 600 15 | per_editing_step: 10 16 | 17 | geometry_type: "implicit-volume" 18 | geometry: 19 | radius: 1. 20 | normal_type: analytic 21 | 22 | pos_encoding_config: 23 | otype: HashGrid 24 | n_levels: 16 25 | n_features_per_level: 2 26 | log2_hashmap_size: 19 27 | base_resolution: 16 28 | per_level_scale: 1.4472692374403782 # max resolution 4096 29 | 30 | density_bias: "blob_magic3d" 31 | density_activation: softplus 32 | density_blob_scale: 10. 33 | density_blob_std: 0.5 34 | 35 | 36 | material_type: "diffuse-with-point-light-material" 37 | material: 38 | ambient_only_steps: 9999999 39 | albedo_activation: sigmoid 40 | 41 | background_type: "neural-environment-map-background" 42 | background: 43 | color_activation: sigmoid 44 | random_aug: false 45 | 46 | renderer_type: "patch-renderer" 47 | renderer: 48 | base_renderer_type: "nerf-volume-renderer" 49 | base_renderer: 50 | radius: ${system.geometry.radius} 51 | num_samples_per_ray: 384 52 | patch_size: 128 53 | 54 | guidance_type: "stable-diffusion-instructpix2pix-guidance" 55 | guidance: 56 | min_step_percent: 0.02 57 | max_step_percent: 0.98 58 | 59 | prompt_processor_type: "stable-diffusion-prompt-processor" 60 | prompt_processor: 61 | pretrained_model_name_or_path: "runwayml/stable-diffusion-v1-5" 62 | prompt: "Turn him into Elon Musk" 63 | 64 | loggers: 65 | wandb: 66 | enable: false 67 | project: 'threestudio' 68 | 69 | loss: 70 | lambda_sds: 0. 71 | lambda_orient: [0, 10.0, 1000., 5000.0] 72 | lambda_sparsity: 1.0 73 | lambda_opaque: 1.0 74 | lambda_l1: 10. 75 | lambda_p: 10. 76 | optimizer: 77 | name: Adam 78 | args: 79 | lr: 0.01 80 | betas: [0.9, 0.99] 81 | eps: 1.e-15 82 | params: 83 | geometry: 84 | lr: 0.01 85 | background: 86 | lr: 0.001 87 | 88 | trainer: 89 | max_steps: 20000 90 | log_every_n_steps: 1 91 | num_sanity_val_steps: 0 92 | val_check_interval: 600 93 | enable_progress_bar: true 94 | precision: 16-mixed 95 | 96 | checkpoint: 97 | save_last: true 98 | save_top_k: -1 99 | every_n_train_steps: ${trainer.max_steps} 100 | -------------------------------------------------------------------------------- /configs/latentnerf-refine.yaml: -------------------------------------------------------------------------------- 1 | name: "latentnerf-refine" 2 | tag: "${rmspace:${system.prompt_processor.prompt},_}" 3 | exp_root_dir: "outputs" 4 | seed: 0 5 | 6 | data_type: "random-camera-datamodule" 7 | data: 8 | elevation_range: [-10, 45] 9 | 10 | system_type: "latentnerf-system" 11 | system: 12 | refinement: true 13 | weights: ??? 14 | weights_ignore_modules: ["material", "background"] 15 | 16 | geometry_type: "implicit-volume" 17 | geometry: 18 | n_feature_dims: 4 19 | normal_type: null 20 | 21 | density_bias: "blob_dreamfusion" 22 | density_activation: trunc_exp 23 | density_blob_scale: 5. 24 | density_blob_std: 0.2 25 | 26 | material_type: "sd-latent-adapter-material" 27 | 28 | background_type: "neural-environment-map-background" 29 | 30 | renderer_type: "nerf-volume-renderer" 31 | renderer: 32 | num_samples_per_ray: 512 33 | 34 | prompt_processor_type: "stable-diffusion-prompt-processor" 35 | prompt_processor: 36 | pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base" 37 | prompt: ??? 38 | 39 | guidance_type: "stable-diffusion-guidance" 40 | guidance: 41 | pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base" 42 | guidance_scale: 100. 43 | weighting_strategy: sds 44 | 45 | loggers: 46 | wandb: 47 | enable: false 48 | project: "threestudio" 49 | name: None 50 | 51 | loss: 52 | lambda_sds: 1. 53 | lambda_sparsity: 5.e-4 54 | lambda_opaque: 0.0 55 | lambda_orient: 0.0 56 | optimizer: 57 | name: Adam 58 | args: 59 | lr: 0.01 60 | betas: [0.9, 0.99] 61 | eps: 1.e-15 62 | scheduler: 63 | name: SequentialLR 64 | interval: step 65 | warmup_steps: 100 66 | milestones: 67 | - ${system.scheduler.warmup_steps} 68 | schedulers: 69 | - name: LinearLR # linear warm-up in the first system.warmup_steps steps 70 | args: 71 | start_factor: 0.1 72 | end_factor: 1.0 73 | total_iters: ${system.scheduler.warmup_steps} 74 | - name: ExponentialLR 75 | args: 76 | gamma: ${calc_exp_lr_decay_rate:0.1,${sub:${trainer.max_steps},${system.scheduler.warmup_steps}}} 77 | 78 | trainer: 79 | max_steps: 10000 80 | log_every_n_steps: 1 81 | num_sanity_val_steps: 1 82 | val_check_interval: 200 83 | enable_progress_bar: true 84 | precision: 16-mixed 85 | 86 | checkpoint: 87 | save_last: true # save at each validation time 88 | save_top_k: -1 89 | every_n_train_steps: ${trainer.max_steps} 90 | -------------------------------------------------------------------------------- /configs/latentnerf.yaml: -------------------------------------------------------------------------------- 1 | name: "latentnerf" 2 | tag: "${rmspace:${system.prompt_processor.prompt},_}" 3 | exp_root_dir: "outputs" 4 | seed: 0 5 | 6 | data_type: "random-camera-datamodule" 7 | data: 8 | elevation_range: [-10, 45] 9 | 10 | system_type: "latentnerf-system" 11 | system: 12 | geometry_type: "implicit-volume" 13 | geometry: 14 | n_feature_dims: 4 15 | normal_type: null 16 | 17 | density_bias: "blob_dreamfusion" 18 | density_activation: trunc_exp 19 | density_blob_scale: 5. 20 | density_blob_std: 0.2 21 | 22 | material_type: "no-material" 23 | material: 24 | n_output_dims: 4 25 | color_activation: none 26 | 27 | background_type: "neural-environment-map-background" 28 | background: 29 | n_output_dims: 4 30 | color_activation: none 31 | 32 | renderer_type: "nerf-volume-renderer" 33 | renderer: 34 | num_samples_per_ray: 512 35 | 36 | prompt_processor_type: "stable-diffusion-prompt-processor" 37 | prompt_processor: 38 | pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base" 39 | prompt: ??? 40 | 41 | guidance_type: "stable-diffusion-guidance" 42 | guidance: 43 | pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base" 44 | guidance_scale: 100. 45 | weighting_strategy: sds 46 | 47 | loggers: 48 | wandb: 49 | enable: false 50 | project: "threestudio" 51 | name: None 52 | 53 | loss: 54 | lambda_sds: 1. 55 | lambda_sparsity: 5.e-4 56 | lambda_opaque: 0.0 57 | lambda_orient: 0.0 58 | optimizer: 59 | name: Adam 60 | args: 61 | lr: 0.01 62 | betas: [0.9, 0.99] 63 | eps: 1.e-15 64 | scheduler: 65 | name: SequentialLR 66 | interval: step 67 | warmup_steps: 100 68 | milestones: 69 | - ${system.scheduler.warmup_steps} 70 | schedulers: 71 | - name: LinearLR # linear warm-up in the first system.warmup_steps steps 72 | args: 73 | start_factor: 0.1 74 | end_factor: 1.0 75 | total_iters: ${system.scheduler.warmup_steps} 76 | - name: ExponentialLR 77 | args: 78 | gamma: ${calc_exp_lr_decay_rate:0.1,${sub:${trainer.max_steps},${system.scheduler.warmup_steps}}} 79 | 80 | trainer: 81 | max_steps: 10000 82 | log_every_n_steps: 1 83 | num_sanity_val_steps: 0 84 | val_check_interval: 200 85 | enable_progress_bar: true 86 | precision: 16-mixed 87 | 88 | checkpoint: 89 | save_last: true # save at each validation time 90 | save_top_k: -1 91 | every_n_train_steps: ${trainer.max_steps} 92 | -------------------------------------------------------------------------------- /configs/magic3d-coarse-if.yaml: -------------------------------------------------------------------------------- 1 | name: "magic3d-coarse-if" 2 | tag: "${rmspace:${system.prompt_processor.prompt},_}" 3 | exp_root_dir: "outputs" 4 | seed: 0 5 | 6 | data_type: "random-camera-datamodule" 7 | data: 8 | width: 64 9 | height: 64 10 | camera_distance_range: [1.5, 2.0] 11 | light_sample_strategy: "magic3d" 12 | eval_camera_distance: 2.0 13 | eval_fovy_deg: 70. 14 | 15 | system_type: "magic3d-system" 16 | system: 17 | geometry_type: "implicit-volume" 18 | geometry: 19 | radius: 2. 20 | normal_type: analytic 21 | pos_encoding_config: 22 | otype: HashGrid 23 | n_levels: 16 24 | n_features_per_level: 2 25 | log2_hashmap_size: 19 26 | base_resolution: 16 27 | per_level_scale: 1.4472692374403782 # max resolution 4096 28 | density_bias: "blob_magic3d" 29 | density_activation: softplus 30 | density_blob_scale: 10. 31 | density_blob_std: 0.5 32 | isosurface_resolution: 128 33 | isosurface_threshold: auto 34 | isosurface_coarse_to_fine: true 35 | 36 | material_type: "diffuse-with-point-light-material" 37 | material: 38 | ambient_only_steps: 2001 39 | soft_shading: true 40 | 41 | background_type: "neural-environment-map-background" 42 | 43 | renderer_type: "nerf-volume-renderer" 44 | renderer: 45 | radius: ${system.geometry.radius} 46 | num_samples_per_ray: 512 47 | 48 | prompt_processor_type: "deep-floyd-prompt-processor" 49 | prompt_processor: 50 | pretrained_model_name_or_path: "DeepFloyd/IF-I-XL-v1.0" 51 | prompt: ??? 52 | 53 | guidance_type: "deep-floyd-guidance" 54 | guidance: 55 | pretrained_model_name_or_path: "DeepFloyd/IF-I-XL-v1.0" 56 | weighting_strategy: uniform 57 | guidance_scale: 20. 58 | min_step_percent: 0.02 59 | max_step_percent: 0.98 60 | 61 | loggers: 62 | wandb: 63 | enable: false 64 | project: 'threestudio' 65 | name: None 66 | 67 | loss: 68 | lambda_sds: 1. 69 | lambda_orient: [0, 10., 1000., 5000] 70 | lambda_sparsity: 1. 71 | lambda_opaque: 0. 72 | optimizer: 73 | name: Adam 74 | args: 75 | lr: 0.01 76 | betas: [0.9, 0.99] 77 | eps: 1.e-15 78 | params: 79 | geometry: 80 | lr: 0.01 81 | background: 82 | lr: 0.001 83 | 84 | trainer: 85 | max_steps: 10000 86 | log_every_n_steps: 1 87 | num_sanity_val_steps: 0 88 | val_check_interval: 200 89 | enable_progress_bar: true 90 | precision: 16-mixed 91 | 92 | checkpoint: 93 | save_last: true 94 | save_top_k: -1 95 | every_n_train_steps: ${trainer.max_steps} 96 | -------------------------------------------------------------------------------- /configs/magic3d-coarse-sd.yaml: -------------------------------------------------------------------------------- 1 | name: "magic3d-coarse-sd" 2 | tag: "${rmspace:${system.prompt_processor.prompt},_}" 3 | exp_root_dir: "outputs" 4 | seed: 0 5 | 6 | data_type: "random-camera-datamodule" 7 | data: 8 | width: 64 9 | height: 64 10 | camera_distance_range: [1.5, 2.0] 11 | elevation_range: [-10, 45] 12 | light_sample_strategy: "magic3d" 13 | eval_camera_distance: 2.0 14 | eval_fovy_deg: 70. 15 | 16 | system_type: "magic3d-system" 17 | system: 18 | geometry_type: "implicit-volume" 19 | geometry: 20 | radius: 2. 21 | normal_type: analytic 22 | pos_encoding_config: 23 | otype: HashGrid 24 | n_levels: 16 25 | n_features_per_level: 2 26 | log2_hashmap_size: 19 27 | base_resolution: 16 28 | per_level_scale: 1.4472692374403782 # max resolution 4096 29 | density_bias: "blob_magic3d" 30 | density_activation: softplus 31 | density_blob_scale: 10. 32 | density_blob_std: 0.5 33 | isosurface_resolution: 128 34 | isosurface_threshold: auto 35 | isosurface_coarse_to_fine: true 36 | 37 | material_type: "diffuse-with-point-light-material" 38 | material: 39 | ambient_only_steps: 2001 40 | soft_shading: true 41 | 42 | background_type: "neural-environment-map-background" 43 | 44 | renderer_type: "nerf-volume-renderer" 45 | renderer: 46 | radius: ${system.geometry.radius} 47 | num_samples_per_ray: 512 48 | 49 | prompt_processor_type: "stable-diffusion-prompt-processor" 50 | prompt_processor: 51 | pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base" 52 | prompt: ??? 53 | 54 | guidance_type: "stable-diffusion-guidance" 55 | guidance: 56 | pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base" 57 | weighting_strategy: uniform 58 | guidance_scale: 100. 59 | min_step_percent: 0.02 60 | max_step_percent: 0.98 61 | 62 | loggers: 63 | wandb: 64 | enable: false 65 | project: "threestudio" 66 | name: None 67 | 68 | loss: 69 | lambda_sds: 1. 70 | lambda_orient: [0, 10., 1000., 5000] 71 | lambda_sparsity: 1. 72 | lambda_opaque: 0. 73 | optimizer: 74 | name: Adam 75 | args: 76 | lr: 0.01 77 | betas: [0.9, 0.99] 78 | eps: 1.e-15 79 | params: 80 | geometry: 81 | lr: 0.01 82 | background: 83 | lr: 0.001 84 | 85 | trainer: 86 | max_steps: 10000 87 | log_every_n_steps: 1 88 | num_sanity_val_steps: 0 89 | val_check_interval: 200 90 | enable_progress_bar: true 91 | precision: 16-mixed 92 | 93 | checkpoint: 94 | save_last: true 95 | save_top_k: -1 96 | every_n_train_steps: ${trainer.max_steps} 97 | -------------------------------------------------------------------------------- /configs/magic3d-refine-sd.yaml: -------------------------------------------------------------------------------- 1 | name: "magic3d-refine-sd" 2 | tag: "${rmspace:${system.prompt_processor.prompt},_}" 3 | exp_root_dir: "outputs" 4 | seed: 0 5 | 6 | data_type: "random-camera-datamodule" 7 | data: 8 | width: 512 9 | height: 512 10 | camera_distance_range: [1.5, 2.0] 11 | elevation_range: [-10, 45] 12 | light_sample_strategy: "magic3d" 13 | fovy_range: [30, 45] 14 | eval_camera_distance: 2.0 15 | eval_fovy_deg: 70. 16 | 17 | system_type: "magic3d-system" 18 | system: 19 | refinement: true 20 | geometry_convert_from: ??? 21 | geometry_convert_inherit_texture: true 22 | geometry_type: "tetrahedra-sdf-grid" 23 | geometry: 24 | radius: 2.0 # consistent with coarse 25 | isosurface_resolution: 128 26 | isosurface_deformable_grid: true 27 | pos_encoding_config: # consistent with coarse, no progressive band 28 | otype: HashGrid 29 | n_levels: 16 30 | n_features_per_level: 2 31 | log2_hashmap_size: 19 32 | base_resolution: 16 33 | per_level_scale: 1.4472692374403782 # max resolution 4096 34 | fix_geometry: false # optimize grid sdf and deformation 35 | 36 | material_type: "diffuse-with-point-light-material" 37 | material: 38 | ambient_only_steps: 0 39 | soft_shading: true 40 | 41 | background_type: "neural-environment-map-background" 42 | 43 | renderer_type: "nvdiff-rasterizer" 44 | renderer: 45 | context_type: gl 46 | 47 | prompt_processor_type: "stable-diffusion-prompt-processor" 48 | prompt_processor: 49 | pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base" 50 | prompt: ??? 51 | 52 | guidance_type: "stable-diffusion-guidance" 53 | guidance: 54 | pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base" 55 | weighting_strategy: sds 56 | guidance_scale: 100. 57 | min_step_percent: 0.02 58 | max_step_percent: 0.5 59 | 60 | loggers: 61 | wandb: 62 | enable: false 63 | project: "threestudio" 64 | name: None 65 | 66 | loss: 67 | lambda_sds: 1. 68 | lambda_normal_consistency: 10000. 69 | 70 | optimizer: 71 | name: Adam 72 | args: 73 | lr: 0.01 74 | betas: [0.9, 0.99] 75 | eps: 1.e-15 76 | 77 | trainer: 78 | max_steps: 5000 79 | log_every_n_steps: 1 80 | num_sanity_val_steps: 1 81 | val_check_interval: 100 82 | enable_progress_bar: true 83 | precision: 16-mixed 84 | 85 | checkpoint: 86 | save_last: true 87 | save_top_k: -1 88 | every_n_train_steps: ${trainer.max_steps} 89 | -------------------------------------------------------------------------------- /configs/mvdream-sd21.yaml: -------------------------------------------------------------------------------- 1 | name: "mvdream-sd21-rescale0.5" 2 | tag: "${rmspace:${system.prompt_processor.prompt},_}" 3 | exp_root_dir: "outputs" 4 | seed: 0 5 | 6 | data_type: "random-multiview-camera-datamodule" 7 | data: 8 | batch_size: [8,4] # must be dividable by n_view 9 | n_view: 4 10 | # 0-4999: 64x64, >=5000: 256x256 11 | width: [64, 256] 12 | height: [64, 256] 13 | resolution_milestones: [5000] 14 | camera_distance_range: [0.8, 1.0] # relative 15 | fovy_range: [15, 60] 16 | elevation_range: [0, 30] 17 | camera_perturb: 0. 18 | center_perturb: 0. 19 | up_perturb: 0. 20 | n_val_views: 4 21 | eval_camera_distance: 3.0 22 | eval_fovy_deg: 40. 23 | 24 | system_type: "mvdream-system" 25 | system: 26 | geometry_type: "implicit-volume" 27 | geometry: 28 | radius: 1.0 29 | normal_type: null 30 | 31 | density_bias: "blob_magic3d" 32 | density_activation: softplus 33 | density_blob_scale: 10. 34 | density_blob_std: 0.5 35 | 36 | pos_encoding_config: 37 | otype: HashGrid 38 | n_levels: 16 39 | n_features_per_level: 2 40 | log2_hashmap_size: 19 41 | base_resolution: 16 42 | per_level_scale: 1.447269237440378 # max resolution 4096 43 | 44 | material_type: "no-material" 45 | material: 46 | n_output_dims: 3 47 | color_activation: sigmoid 48 | 49 | background_type: "neural-environment-map-background" 50 | background: 51 | color_activation: sigmoid 52 | random_aug: true 53 | share_aug_bg: true 54 | 55 | renderer_type: "nerf-volume-renderer" 56 | renderer: 57 | radius: ${system.geometry.radius} 58 | num_samples_per_ray: 512 59 | 60 | prompt_processor_type: "stable-diffusion-prompt-processor" 61 | prompt_processor: 62 | pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base" 63 | prompt: ??? 64 | negative_prompt: "ugly, bad anatomy, blurry, pixelated obscure, unnatural colors, poor lighting, dull, and unclear, cropped, lowres, low quality, artifacts, duplicate, morbid, mutilated, poorly drawn face, deformed, dehydrated, bad proportions" 65 | front_threshold: 30. 66 | back_threshold: 30. 67 | 68 | guidance_type: "multiview-diffusion-guidance" 69 | guidance: 70 | model_name: "sd-v2.1-base-4view" 71 | ckpt_path: null # path to a pre-downloaded checkpoint file (null for loading from URL) 72 | guidance_scale: 50.0 73 | min_step_percent: [0, 0.98, 0.02, 8000] # (start_iter, start_val, end_val, end_iter) 74 | max_step_percent: [0, 0.98, 0.50, 8000] 75 | recon_loss: true 76 | recon_std_rescale: 0.5 77 | 78 | loggers: 79 | wandb: 80 | enable: false 81 | project: "threestudio" 82 | 83 | loss: 84 | lambda_sds: 1. 85 | lambda_orient: 0. 86 | lambda_sparsity: 0. 87 | lambda_opaque: 0. 88 | lambda_z_variance: 0. 89 | optimizer: 90 | name: AdamW 91 | args: 92 | betas: [0.9, 0.99] 93 | eps: 1.e-15 94 | params: 95 | geometry.encoding: 96 | lr: 0.01 97 | geometry.density_network: 98 | lr: 0.001 99 | geometry.feature_network: 100 | lr: 0.001 101 | background: 102 | lr: 0.001 103 | 104 | trainer: 105 | max_steps: 10000 106 | log_every_n_steps: 1 107 | num_sanity_val_steps: 0 108 | val_check_interval: 200 109 | enable_progress_bar: true 110 | precision: 16-mixed 111 | 112 | checkpoint: 113 | save_last: true 114 | save_top_k: -1 115 | every_n_train_steps: ${trainer.max_steps} 116 | -------------------------------------------------------------------------------- /configs/prolificdreamer-geometry.yaml: -------------------------------------------------------------------------------- 1 | name: "prolificdreamer-geometry" 2 | tag: "${rmspace:${system.prompt_processor.prompt},_}" 3 | exp_root_dir: "outputs" 4 | seed: 0 5 | 6 | data_type: "random-camera-datamodule" 7 | data: 8 | batch_size: 1 9 | width: 512 10 | height: 512 11 | camera_distance_range: [1.0, 1.5] 12 | fovy_range: [40, 70] 13 | elevation_range: [-10, 45] 14 | camera_perturb: 0. 15 | center_perturb: 0. 16 | up_perturb: 0. 17 | eval_camera_distance: 1.5 18 | eval_fovy_deg: 70. 19 | 20 | system_type: "prolificdreamer-system" 21 | system: 22 | stage: geometry 23 | geometry_convert_from: ??? 24 | geometry_type: "tetrahedra-sdf-grid" 25 | geometry: 26 | radius: 1.0 # consistent with coarse 27 | isosurface_resolution: 128 28 | isosurface_deformable_grid: true 29 | geometry_only: true 30 | 31 | material_type: "no-material" # unused 32 | material: 33 | n_output_dims: 0 34 | 35 | background_type: "solid-color-background" # unused 36 | 37 | renderer_type: "nvdiff-rasterizer" 38 | renderer: 39 | context_type: gl 40 | 41 | prompt_processor_type: "stable-diffusion-prompt-processor" 42 | prompt_processor: 43 | pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base" 44 | prompt: lib:michelangelo_dog 45 | 46 | guidance_type: "stable-diffusion-guidance" 47 | guidance: 48 | pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base" 49 | guidance_scale: 100. 50 | min_step_percent: 0.02 51 | max_step_percent: [5000, 0.98, 0.5, 5001] # annealed to 0.5 after 5000 steps 52 | weighting_strategy: sds 53 | 54 | loggers: 55 | wandb: 56 | enable: false 57 | project: "threestudio" 58 | name: None 59 | 60 | loss: 61 | lambda_sds: 1. 62 | lambda_normal_consistency: 10000. 63 | lambda_laplacian_smoothness: 10000. 64 | 65 | optimizer: 66 | name: Adam 67 | args: 68 | lr: 0.005 69 | betas: [0.9, 0.99] 70 | eps: 1.e-15 71 | 72 | trainer: 73 | max_steps: 15000 74 | log_every_n_steps: 1 75 | num_sanity_val_steps: 1 76 | val_check_interval: 200 77 | enable_progress_bar: true 78 | precision: 32 79 | 80 | checkpoint: 81 | save_last: true 82 | save_top_k: -1 83 | every_n_train_steps: ${trainer.max_steps} 84 | -------------------------------------------------------------------------------- /configs/prolificdreamer-patch.yaml: -------------------------------------------------------------------------------- 1 | name: "prolificdreamer-patch" 2 | tag: "${rmspace:${system.prompt_processor.prompt},_}" 3 | exp_root_dir: "outputs" 4 | seed: 0 5 | 6 | data_type: "random-camera-datamodule" 7 | data: 8 | batch_size: 1 9 | width: 512 10 | height: 512 11 | camera_distance_range: [1.0, 1.5] 12 | fovy_range: [40, 70] 13 | elevation_range: [-10, 45] 14 | camera_perturb: 0. 15 | center_perturb: 0. 16 | up_perturb: 0. 17 | eval_camera_distance: 1.5 18 | eval_fovy_deg: 70. 19 | 20 | system_type: "prolificdreamer-system" 21 | system: 22 | stage: coarse 23 | geometry_type: "implicit-volume" 24 | geometry: 25 | radius: 1.0 26 | normal_type: null 27 | 28 | density_bias: "blob_magic3d" 29 | density_activation: softplus 30 | density_blob_scale: 10. 31 | density_blob_std: 0.5 32 | 33 | pos_encoding_config: 34 | otype: HashGrid 35 | n_levels: 16 36 | n_features_per_level: 2 37 | log2_hashmap_size: 19 38 | base_resolution: 16 39 | per_level_scale: 1.447269237440378 # max resolution 4096 40 | 41 | material_type: "no-material" 42 | material: 43 | n_output_dims: 3 44 | color_activation: sigmoid 45 | 46 | background_type: "neural-environment-map-background" 47 | background: 48 | color_activation: sigmoid 49 | random_aug: true 50 | 51 | renderer_type: "patch-renderer" 52 | renderer: 53 | base_renderer_type: "nerf-volume-renderer" 54 | base_renderer: 55 | radius: ${system.geometry.radius} 56 | num_samples_per_ray: 512 57 | patch_size: 128 58 | 59 | prompt_processor_type: "stable-diffusion-prompt-processor" 60 | prompt_processor: 61 | pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base" 62 | prompt: ??? 63 | front_threshold: 30. 64 | back_threshold: 30. 65 | 66 | guidance_type: "stable-diffusion-vsd-guidance" 67 | guidance: 68 | pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base" 69 | pretrained_model_name_or_path_lora: "stabilityai/stable-diffusion-2-1" 70 | guidance_scale: 7.5 71 | min_step_percent: 0.02 72 | max_step_percent: [5000, 0.98, 0.5, 5001] # annealed to 0.5 after 5000 steps 73 | 74 | loggers: 75 | wandb: 76 | enable: false 77 | project: "threestudio" 78 | 79 | loss: 80 | lambda_vsd: 1. 81 | lambda_lora: 1. 82 | lambda_orient: 0. 83 | lambda_sparsity: 10. 84 | lambda_opaque: [10000, 0.0, 1000.0, 10001] 85 | lambda_z_variance: 0. 86 | optimizer: 87 | name: AdamW 88 | args: 89 | betas: [0.9, 0.99] 90 | eps: 1.e-15 91 | params: 92 | geometry.encoding: 93 | lr: 0.01 94 | geometry.density_network: 95 | lr: 0.001 96 | geometry.feature_network: 97 | lr: 0.001 98 | background: 99 | lr: 0.001 100 | guidance: 101 | lr: 0.0001 102 | 103 | trainer: 104 | max_steps: 25000 105 | log_every_n_steps: 1 106 | num_sanity_val_steps: 0 107 | val_check_interval: 200 108 | enable_progress_bar: true 109 | precision: 32 110 | 111 | checkpoint: 112 | save_last: true 113 | save_top_k: -1 114 | every_n_train_steps: ${trainer.max_steps} 115 | -------------------------------------------------------------------------------- /configs/prolificdreamer-scene.yaml: -------------------------------------------------------------------------------- 1 | name: "prolificdreamer" 2 | tag: "${rmspace:${system.prompt_processor.prompt},_}" 3 | exp_root_dir: "outputs" 4 | seed: 0 5 | 6 | data_type: "random-camera-datamodule" 7 | data: 8 | batch_size: [1, 1] 9 | # 0-4999: 64x64, >=5000: 512x512 10 | # this drastically reduces VRAM usage as empty space is pruned in early training 11 | width: [64, 512] 12 | height: [64, 512] 13 | resolution_milestones: [5000] 14 | camera_distance_range: [0.1, 2.3] 15 | fovy_range: [40, 70] 16 | elevation_range: [-10, 45] 17 | camera_perturb: 0. 18 | center_perturb: 0. 19 | up_perturb: 0. 20 | eval_camera_distance: 2.0 21 | eval_fovy_deg: 70. 22 | 23 | system_type: "prolificdreamer-system" 24 | system: 25 | stage: coarse 26 | geometry_type: "implicit-volume" 27 | geometry: 28 | radius: 5.0 29 | normal_type: null 30 | 31 | density_bias: "blob_magic3d" 32 | density_activation: softplus 33 | density_blob_scale: -10. 34 | density_blob_std: 2.5 35 | 36 | pos_encoding_config: 37 | otype: HashGrid 38 | n_levels: 16 39 | n_features_per_level: 2 40 | log2_hashmap_size: 19 41 | base_resolution: 16 42 | per_level_scale: 1.447269237440378 # max resolution 4096 43 | 44 | material_type: "no-material" 45 | material: 46 | n_output_dims: 3 47 | color_activation: sigmoid 48 | 49 | background_type: "neural-environment-map-background" 50 | background: 51 | color_activation: sigmoid 52 | 53 | renderer_type: "nerf-volume-renderer" 54 | renderer: 55 | radius: ${system.geometry.radius} 56 | num_samples_per_ray: 512 57 | 58 | prompt_processor_type: "stable-diffusion-prompt-processor" 59 | prompt_processor: 60 | pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base" 61 | prompt: ??? 62 | 63 | guidance_type: "stable-diffusion-vsd-guidance" 64 | guidance: 65 | pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base" 66 | pretrained_model_name_or_path_lora: "stabilityai/stable-diffusion-2-1" 67 | guidance_scale: 7.5 68 | min_step_percent: 0.02 69 | max_step_percent: [10000, 0.98, 0.5, 10001] # annealed to 0.5 after 10000 steps 70 | view_dependent_prompting: false 71 | 72 | loggers: 73 | wandb: 74 | enable: false 75 | project: "threestudio" 76 | name: None 77 | 78 | loss: 79 | lambda_vsd: 1. 80 | lambda_lora: 1. 81 | lambda_orient: 0. 82 | lambda_sparsity: 0. 83 | lambda_opaque: 0. 84 | lambda_z_variance: 1. 85 | optimizer: 86 | name: AdamW 87 | args: 88 | betas: [0.9, 0.99] 89 | eps: 1.e-15 90 | params: 91 | geometry.encoding: 92 | lr: 0.01 93 | geometry.density_network: 94 | lr: 0.001 95 | geometry.feature_network: 96 | lr: 0.001 97 | background: 98 | lr: 0.001 99 | guidance: 100 | lr: 0.0001 101 | 102 | trainer: 103 | max_steps: 25000 104 | log_every_n_steps: 1 105 | num_sanity_val_steps: 0 106 | val_check_interval: 200 107 | enable_progress_bar: true 108 | precision: 32 109 | 110 | checkpoint: 111 | save_last: true 112 | save_top_k: -1 113 | every_n_train_steps: ${trainer.max_steps} 114 | -------------------------------------------------------------------------------- /configs/prolificdreamer-texture.yaml: -------------------------------------------------------------------------------- 1 | name: "prolificdreamer-texture" 2 | tag: "${rmspace:${system.prompt_processor.prompt},_}" 3 | exp_root_dir: "outputs" 4 | seed: 0 5 | 6 | data_type: "random-camera-datamodule" 7 | data: 8 | batch_size: 1 9 | width: 512 10 | height: 512 11 | camera_distance_range: [1.0, 1.5] 12 | fovy_range: [40, 70] 13 | elevation_range: [-10, 45] 14 | camera_perturb: 0. 15 | center_perturb: 0. 16 | up_perturb: 0. 17 | eval_camera_distance: 1.5 18 | eval_fovy_deg: 70. 19 | 20 | system_type: "prolificdreamer-system" 21 | system: 22 | stage: texture 23 | geometry_convert_from: ??? 24 | geometry_type: "tetrahedra-sdf-grid" 25 | geometry: 26 | radius: 1.0 # consistent with last stage 27 | isosurface_resolution: 128 # consistent with last stage 28 | isosurface_deformable_grid: true 29 | isosurface_remove_outliers: true 30 | pos_encoding_config: 31 | otype: HashGrid 32 | n_levels: 16 33 | n_features_per_level: 2 34 | log2_hashmap_size: 19 35 | base_resolution: 16 36 | per_level_scale: 1.447269237440378 # max resolution 4096 37 | fix_geometry: true 38 | 39 | material_type: "no-material" 40 | material: 41 | n_output_dims: 3 42 | color_activation: sigmoid 43 | 44 | background_type: "neural-environment-map-background" 45 | background: 46 | color_activation: sigmoid 47 | 48 | renderer_type: "nvdiff-rasterizer" 49 | renderer: 50 | context_type: gl 51 | 52 | prompt_processor_type: "stable-diffusion-prompt-processor" 53 | prompt_processor: 54 | pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base" 55 | prompt: ??? 56 | front_threshold: 30. 57 | back_threshold: 30. 58 | 59 | guidance_type: "stable-diffusion-vsd-guidance" 60 | guidance: 61 | pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base" 62 | pretrained_model_name_or_path_lora: "stabilityai/stable-diffusion-2-1" 63 | guidance_scale: 7.5 64 | min_step_percent: 0.02 65 | max_step_percent: [5000, 0.98, 0.5, 5001] # annealed to 0.5 after 5000 steps 66 | 67 | loggers: 68 | wandb: 69 | enable: false 70 | project: "threestudio" 71 | name: None 72 | 73 | loss: 74 | lambda_vsd: 1. 75 | lambda_lora: 1. 76 | optimizer: 77 | name: AdamW 78 | args: 79 | betas: [0.9, 0.99] 80 | eps: 1.e-15 81 | params: 82 | geometry.encoding: 83 | lr: 0.01 84 | geometry.feature_network: 85 | lr: 0.001 86 | background: 87 | lr: 0.001 88 | guidance: 89 | lr: 0.0001 90 | 91 | trainer: 92 | max_steps: 30000 93 | log_every_n_steps: 1 94 | num_sanity_val_steps: 1 95 | val_check_interval: 200 96 | enable_progress_bar: true 97 | precision: 32 98 | 99 | checkpoint: 100 | save_last: true 101 | save_top_k: -1 102 | every_n_train_steps: ${trainer.max_steps} 103 | -------------------------------------------------------------------------------- /configs/prolificdreamer.yaml: -------------------------------------------------------------------------------- 1 | name: "prolificdreamer" 2 | tag: "${rmspace:${system.prompt_processor.prompt},_}" 3 | exp_root_dir: "outputs" 4 | seed: 0 5 | 6 | data_type: "random-camera-datamodule" 7 | data: 8 | batch_size: [1, 1] 9 | # 0-4999: 64x64, >=5000: 512x512 10 | # this drastically reduces VRAM usage as empty space is pruned in early training 11 | width: [64, 512] 12 | height: [64, 512] 13 | resolution_milestones: [5000] 14 | camera_distance_range: [1.0, 1.5] 15 | fovy_range: [40, 70] 16 | elevation_range: [-10, 45] 17 | camera_perturb: 0. 18 | center_perturb: 0. 19 | up_perturb: 0. 20 | eval_camera_distance: 1.5 21 | eval_fovy_deg: 70. 22 | 23 | system_type: "prolificdreamer-system" 24 | system: 25 | stage: coarse 26 | geometry_type: "implicit-volume" 27 | geometry: 28 | radius: 1.0 29 | normal_type: null 30 | 31 | density_bias: "blob_magic3d" 32 | density_activation: softplus 33 | density_blob_scale: 10. 34 | density_blob_std: 0.5 35 | 36 | pos_encoding_config: 37 | otype: HashGrid 38 | n_levels: 16 39 | n_features_per_level: 2 40 | log2_hashmap_size: 19 41 | base_resolution: 16 42 | per_level_scale: 1.447269237440378 # max resolution 4096 43 | 44 | material_type: "no-material" 45 | material: 46 | n_output_dims: 3 47 | color_activation: sigmoid 48 | 49 | background_type: "neural-environment-map-background" 50 | background: 51 | color_activation: sigmoid 52 | random_aug: true 53 | 54 | renderer_type: "nerf-volume-renderer" 55 | renderer: 56 | radius: ${system.geometry.radius} 57 | num_samples_per_ray: 512 58 | 59 | prompt_processor_type: "stable-diffusion-prompt-processor" 60 | prompt_processor: 61 | pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base" 62 | prompt: ??? 63 | front_threshold: 30. 64 | back_threshold: 30. 65 | 66 | guidance_type: "stable-diffusion-vsd-guidance" 67 | guidance: 68 | pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base" 69 | pretrained_model_name_or_path_lora: "stabilityai/stable-diffusion-2-1" 70 | guidance_scale: 7.5 71 | min_step_percent: 0.02 72 | max_step_percent: [5000, 0.98, 0.5, 5001] # annealed to 0.5 after 5000 steps 73 | 74 | loggers: 75 | wandb: 76 | enable: false 77 | project: "threestudio" 78 | name: None 79 | 80 | loss: 81 | lambda_vsd: 1. 82 | lambda_lora: 1. 83 | lambda_orient: 0. 84 | lambda_sparsity: 10. 85 | lambda_opaque: [10000, 0.0, 1000.0, 10001] 86 | lambda_z_variance: 0. 87 | optimizer: 88 | name: AdamW 89 | args: 90 | betas: [0.9, 0.99] 91 | eps: 1.e-15 92 | params: 93 | geometry.encoding: 94 | lr: 0.01 95 | geometry.density_network: 96 | lr: 0.001 97 | geometry.feature_network: 98 | lr: 0.001 99 | background: 100 | lr: 0.001 101 | guidance: 102 | lr: 0.0001 103 | 104 | trainer: 105 | max_steps: 25000 106 | log_every_n_steps: 1 107 | num_sanity_val_steps: 0 108 | val_check_interval: 200 109 | enable_progress_bar: true 110 | precision: 32 111 | 112 | checkpoint: 113 | save_last: true 114 | save_top_k: -1 115 | every_n_train_steps: ${trainer.max_steps} 116 | -------------------------------------------------------------------------------- /configs/sjc.yaml: -------------------------------------------------------------------------------- 1 | name: sjc 2 | tag: "${rmspace:${system.prompt_processor.prompt},_}" 3 | exp_root_dir: outputs 4 | seed: 0 5 | 6 | data_type: random-camera-datamodule 7 | data: 8 | camera_distance_range: [1.50, 1.50] 9 | elevation_range: [-10, 45] 10 | camera_perturb: 0.0 11 | center_perturb: 0.0 12 | up_perturb: 0.0 13 | light_position_perturb: 0.0 14 | eval_elevation_deg: 20.0 15 | 16 | system_type: sjc-system 17 | system: 18 | geometry_type: volume-grid 19 | geometry: 20 | normal_type: null 21 | grid_size: [100, 100, 100] 22 | density_bias: -1.0 23 | n_feature_dims: 4 24 | 25 | material_type: no-material 26 | material: 27 | n_output_dims: 4 28 | color_activation: none 29 | 30 | background_type: textured-background 31 | background: 32 | n_output_dims: 4 33 | color_activation: none 34 | height: 4 35 | width: 4 36 | 37 | renderer_type: nerf-volume-renderer 38 | renderer: 39 | num_samples_per_ray: 512 40 | grid_prune: false 41 | 42 | prompt_processor_type: stable-diffusion-prompt-processor 43 | prompt_processor: 44 | pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base" 45 | prompt: ??? 46 | view_dependent_prompt_front: true 47 | 48 | guidance_type: stable-diffusion-guidance 49 | guidance: 50 | pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base" 51 | guidance_scale: 100. 52 | use_sjc: true 53 | var_red: true 54 | min_step_percent: 0.01 55 | max_step_percent: 0.97 56 | 57 | loggers: 58 | wandb: 59 | enable: false 60 | project: "threestudio" 61 | name: None 62 | 63 | loss: 64 | lambda_sds: 1. 65 | center_ratio: 0.78125 # = 50 / 64 66 | lambda_depth: 0 # or try 10 67 | lambda_emptiness: [5000, 1.e+4, 2.e+5, 5001] 68 | emptiness_scale: 10 69 | 70 | optimizer: 71 | name: Adamax 72 | args: 73 | lr: 0.05 74 | params: 75 | geometry: 76 | lr: 0.05 77 | background: 78 | lr: 0.0001 # maybe 0.001/0.01 is better 79 | 80 | trainer: 81 | max_steps: 10000 82 | log_every_n_steps: 1 83 | num_sanity_val_steps: 0 84 | val_check_interval: 200 85 | enable_progress_bar: true 86 | precision: 16-mixed 87 | 88 | checkpoint: 89 | save_last: true # save at each validation tim 90 | save_top_k: -1 91 | every_n_train_steps: ${trainer.max_steps} 92 | -------------------------------------------------------------------------------- /configs/sketchshape-refine.yaml: -------------------------------------------------------------------------------- 1 | name: "sketchshape-refine" 2 | tag: "${rmspace:${system.prompt_processor.prompt},_}" 3 | exp_root_dir: "outputs" 4 | seed: 0 5 | 6 | data_type: "random-camera-datamodule" 7 | data: 8 | elevation_range: [-10, 45] 9 | 10 | system_type: "latentnerf-system" 11 | system: 12 | refinement: true 13 | weights: ??? 14 | weights_ignore_modules: ["material", "background"] 15 | guide_shape: ??? 16 | 17 | geometry_type: "implicit-volume" 18 | geometry: 19 | n_feature_dims: 4 20 | normal_type: null 21 | 22 | material_type: "sd-latent-adapter-material" 23 | 24 | background_type: "neural-environment-map-background" 25 | 26 | renderer_type: "nerf-volume-renderer" 27 | renderer: 28 | num_samples_per_ray: 512 29 | 30 | prompt_processor_type: "stable-diffusion-prompt-processor" 31 | prompt_processor: 32 | pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base" 33 | prompt: ??? 34 | 35 | guidance_type: "stable-diffusion-guidance" 36 | guidance: 37 | pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base" 38 | guidance_scale: 100. 39 | weighting_strategy: sds 40 | 41 | loggers: 42 | wandb: 43 | enable: false 44 | project: "threestudio" 45 | name: None 46 | 47 | loss: 48 | lambda_sds: 1. 49 | lambda_sparsity: 0.0 50 | lambda_shape: 1. 51 | lambda_opaque: 0.0 52 | lambda_orient: 0.0 53 | optimizer: 54 | name: Adam 55 | args: 56 | lr: 0.01 57 | betas: [0.9, 0.99] 58 | eps: 1.e-15 59 | scheduler: 60 | name: SequentialLR 61 | interval: step 62 | warmup_steps: 100 63 | milestones: 64 | - ${system.scheduler.warmup_steps} 65 | schedulers: 66 | - name: LinearLR # linear warm-up in the first system.warmup_steps steps 67 | args: 68 | start_factor: 0.1 69 | end_factor: 1.0 70 | total_iters: ${system.scheduler.warmup_steps} 71 | - name: ExponentialLR 72 | args: 73 | gamma: ${calc_exp_lr_decay_rate:0.1,${sub:${trainer.max_steps},${system.scheduler.warmup_steps}}} 74 | 75 | trainer: 76 | max_steps: 10000 77 | log_every_n_steps: 1 78 | num_sanity_val_steps: 1 79 | val_check_interval: 200 80 | enable_progress_bar: true 81 | precision: 16-mixed 82 | 83 | checkpoint: 84 | save_last: true # save at each validation time 85 | save_top_k: -1 86 | every_n_train_steps: ${trainer.max_steps} 87 | -------------------------------------------------------------------------------- /configs/sketchshape.yaml: -------------------------------------------------------------------------------- 1 | name: "sketchshape" 2 | tag: "${rmspace:${system.prompt_processor.prompt},_}" 3 | exp_root_dir: "outputs" 4 | seed: 0 5 | 6 | data_type: "random-camera-datamodule" 7 | data: 8 | elevation_range: [-10, 45] 9 | 10 | system_type: "latentnerf-system" 11 | system: 12 | guide_shape: ??? 13 | 14 | geometry_type: "implicit-volume" 15 | geometry: 16 | n_feature_dims: 4 17 | normal_type: null 18 | 19 | material_type: "no-material" 20 | material: 21 | n_output_dims: 4 22 | color_activation: none 23 | 24 | background_type: "neural-environment-map-background" 25 | background: 26 | n_output_dims: 4 27 | color_activation: none 28 | 29 | renderer_type: "nerf-volume-renderer" 30 | renderer: 31 | num_samples_per_ray: 512 32 | 33 | prompt_processor_type: "stable-diffusion-prompt-processor" 34 | prompt_processor: 35 | pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base" 36 | prompt: ??? 37 | 38 | guidance_type: "stable-diffusion-guidance" 39 | guidance: 40 | pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base" 41 | guidance_scale: 100. 42 | weighting_strategy: sds 43 | 44 | loggers: 45 | wandb: 46 | enable: false 47 | project: "threestudio" 48 | name: None 49 | 50 | loss: 51 | lambda_sds: 1.0 52 | lambda_sparsity: 0.0 53 | lambda_shape: 1.0 54 | lambda_opaque: 0.0 55 | lambda_orient: 0.0 56 | optimizer: 57 | name: Adam 58 | args: 59 | lr: 0.01 60 | betas: [0.9, 0.99] 61 | eps: 1.e-15 62 | scheduler: 63 | name: SequentialLR 64 | interval: step 65 | warmup_steps: 100 66 | milestones: 67 | - ${system.scheduler.warmup_steps} 68 | schedulers: 69 | - name: LinearLR # linear warm-up in the first system.warmup_steps steps 70 | args: 71 | start_factor: 0.1 72 | end_factor: 1.0 73 | total_iters: ${system.scheduler.warmup_steps} 74 | - name: ExponentialLR 75 | args: 76 | gamma: ${calc_exp_lr_decay_rate:0.1,${sub:${trainer.max_steps},${system.scheduler.warmup_steps}}} 77 | 78 | trainer: 79 | max_steps: 10000 80 | log_every_n_steps: 1 81 | num_sanity_val_steps: 0 82 | val_check_interval: 200 83 | enable_progress_bar: true 84 | precision: 16-mixed 85 | 86 | checkpoint: 87 | save_last: true # save at each validation time 88 | save_top_k: -1 89 | every_n_train_steps: ${trainer.max_steps} 90 | -------------------------------------------------------------------------------- /configs/textmesh-if.yaml: -------------------------------------------------------------------------------- 1 | name: "textmesh-if" 2 | tag: "${rmspace:${system.prompt_processor.prompt},_}" 3 | exp_root_dir: "outputs" 4 | seed: 0 5 | 6 | data_type: "random-camera-datamodule" 7 | data: 8 | batch_size: 1 9 | width: 64 10 | height: 64 11 | camera_distance_range: [1.5, 2.0] 12 | fovy_range: [40, 70] 13 | elevation_range: [-10, 90] 14 | light_sample_strategy: "dreamfusion" 15 | eval_camera_distance: 2.0 16 | eval_fovy_deg: 70. 17 | 18 | system_type: "textmesh-system" 19 | system: 20 | geometry_type: "implicit-sdf" 21 | geometry: 22 | radius: 2.0 23 | normal_type: finite_difference 24 | # progressive eps from Neuralangelo 25 | finite_difference_normal_eps: progressive 26 | 27 | sdf_bias: sphere 28 | sdf_bias_params: 0.5 29 | 30 | # coarse to fine hash grid encoding 31 | pos_encoding_config: 32 | otype: ProgressiveBandHashGrid 33 | n_levels: 16 34 | n_features_per_level: 2 35 | log2_hashmap_size: 19 36 | base_resolution: 16 37 | per_level_scale: 1.381912879967776 # max resolution 2048 38 | start_level: 8 # resolution ~200 39 | start_step: 2000 40 | update_steps: 500 41 | 42 | material_type: "diffuse-with-point-light-material" 43 | material: 44 | ambient_only_steps: 2001 45 | albedo_activation: sigmoid 46 | 47 | background_type: "neural-environment-map-background" 48 | background: 49 | color_activation: sigmoid 50 | 51 | renderer_type: "neus-volume-renderer" 52 | renderer: 53 | radius: ${system.geometry.radius} 54 | num_samples_per_ray: 512 55 | cos_anneal_end_steps: ${trainer.max_steps} 56 | eval_chunk_size: 8192 57 | 58 | prompt_processor_type: "deep-floyd-prompt-processor" 59 | prompt_processor: 60 | pretrained_model_name_or_path: "DeepFloyd/IF-I-XL-v1.0" 61 | prompt: ??? 62 | 63 | guidance_type: "deep-floyd-guidance" 64 | guidance: 65 | pretrained_model_name_or_path: "DeepFloyd/IF-I-XL-v1.0" 66 | guidance_scale: 20. 67 | weighting_strategy: sds 68 | min_step_percent: 0.02 69 | max_step_percent: 0.98 70 | 71 | loss: 72 | lambda_sds: 1. 73 | lambda_orient: 0.0 74 | lambda_sparsity: 0.0 75 | lambda_opaque: 0.0 76 | lambda_eikonal: 1000. 77 | optimizer: 78 | name: Adam 79 | args: 80 | betas: [0.9, 0.99] 81 | eps: 1.e-15 82 | params: 83 | geometry.encoding: 84 | lr: 0.01 85 | geometry.sdf_network: 86 | lr: 0.001 87 | geometry.feature_network: 88 | lr: 0.001 89 | background: 90 | lr: 0.001 91 | renderer: 92 | lr: 0.001 93 | 94 | trainer: 95 | max_steps: 10000 96 | log_every_n_steps: 1 97 | num_sanity_val_steps: 0 98 | val_check_interval: 200 99 | enable_progress_bar: true 100 | precision: 16-mixed 101 | 102 | checkpoint: 103 | save_last: true # save at each validation time 104 | save_top_k: -1 105 | every_n_train_steps: ${trainer.max_steps} 106 | -------------------------------------------------------------------------------- /docker/Dockerfile: -------------------------------------------------------------------------------- 1 | # Reference: 2 | # https://github.com/cvpaperchallenge/Ascender 3 | # https://github.com/nerfstudio-project/nerfstudio 4 | 5 | FROM nvidia/cuda:11.8.0-devel-ubuntu22.04 6 | 7 | ARG USER_NAME=dreamer 8 | ARG GROUP_NAME=dreamers 9 | ARG UID=1000 10 | ARG GID=1000 11 | 12 | # Set compute capability for nerfacc and tiny-cuda-nn 13 | # See https://developer.nvidia.com/cuda-gpus and limit number to speed-up build 14 | ENV TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX" 15 | ENV TCNN_CUDA_ARCHITECTURES=90;89;86;80;75;70;61;60 16 | # Speed-up build for RTX 30xx 17 | # ENV TORCH_CUDA_ARCH_LIST="8.6" 18 | # ENV TCNN_CUDA_ARCHITECTURES=86 19 | # Speed-up build for RTX 40xx 20 | # ENV TORCH_CUDA_ARCH_LIST="8.9" 21 | # ENV TCNN_CUDA_ARCHITECTURES=89 22 | 23 | ENV CUDA_HOME=/usr/local/cuda 24 | ENV PATH=${CUDA_HOME}/bin:/home/${USER_NAME}/.local/bin:${PATH} 25 | ENV LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH} 26 | ENV LIBRARY_PATH=${CUDA_HOME}/lib64/stubs:${LIBRARY_PATH} 27 | 28 | # apt install by root user 29 | RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ 30 | build-essential \ 31 | curl \ 32 | git \ 33 | libegl1-mesa-dev \ 34 | libgl1-mesa-dev \ 35 | libgles2-mesa-dev \ 36 | libglib2.0-0 \ 37 | libsm6 \ 38 | libxext6 \ 39 | libxrender1 \ 40 | python-is-python3 \ 41 | python3.10-dev \ 42 | python3-pip \ 43 | wget \ 44 | && rm -rf /var/lib/apt/lists/* 45 | 46 | # Change user to non-root user 47 | RUN groupadd -g ${GID} ${GROUP_NAME} \ 48 | && useradd -ms /bin/sh -u ${UID} -g ${GID} ${USER_NAME} 49 | USER ${USER_NAME} 50 | 51 | RUN pip install --upgrade pip setuptools ninja 52 | RUN pip install torch==2.0.1+cu118 torchvision==0.15.2+cu118 --index-url https://download.pytorch.org/whl/cu118 53 | # Install nerfacc and tiny-cuda-nn before installing requirements.txt 54 | # because these two installations are time consuming and error prone 55 | RUN pip install git+https://github.com/KAIR-BAIR/nerfacc.git@v0.5.2 56 | RUN pip install git+https://github.com/NVlabs/tiny-cuda-nn.git#subdirectory=bindings/torch 57 | 58 | COPY requirements.txt /tmp 59 | RUN cd /tmp && pip install -r requirements.txt 60 | WORKDIR /home/${USER_NAME}/threestudio 61 | -------------------------------------------------------------------------------- /docker/compose.yaml: -------------------------------------------------------------------------------- 1 | services: 2 | threestudio: 3 | build: 4 | context: ../ 5 | dockerfile: docker/Dockerfile 6 | args: 7 | # you can set environment variables, otherwise default values will be used 8 | USER_NAME: ${HOST_USER_NAME:-dreamer} # export HOST_USER_NAME=$USER 9 | GROUP_NAME: ${HOST_GROUP_NAME:-dreamers} 10 | UID: ${HOST_UID:-1000} # export HOST_UID=$(id -u) 11 | GID: ${HOST_GID:-1000} # export HOST_GID=$(id -g) 12 | shm_size: '4gb' 13 | environment: 14 | NVIDIA_DISABLE_REQUIRE: 1 # avoid wrong `nvidia-container-cli: requirement error` 15 | tty: true 16 | volumes: 17 | - ../:/home/${HOST_USER_NAME:-dreamer}/threestudio 18 | deploy: 19 | resources: 20 | reservations: 21 | devices: 22 | - driver: nvidia 23 | capabilities: [gpu] 24 | -------------------------------------------------------------------------------- /docs/installation.md: -------------------------------------------------------------------------------- 1 | # Installation 2 | 3 | ## Prerequisite 4 | 5 | - NVIDIA GPU with at least 6GB VRAM. The more memory you have, the more methods and higher resolutions you can try. 6 | - [NVIDIA Driver](https://www.nvidia.com/Download/index.aspx) whose version is higher than the [Minimum Required Driver Version](https://docs.nvidia.com/cuda/cuda-toolkit-release-notes/index.html) of CUDA Toolkit you want to use. 7 | 8 | ## Install CUDA Toolkit 9 | 10 | You can skip this step if you have installed sufficiently new version or you use Docker. 11 | 12 | Install [CUDA Toolkit](https://developer.nvidia.com/cuda-toolkit-archive). 13 | 14 | - Example for Ubuntu 22.04: 15 | - Run [command for CUDA 11.8 Ubuntu 22.04](https://developer.nvidia.com/cuda-11-8-0-download-archive?target_os=Linux&target_arch=x86_64&Distribution=Ubuntu&target_version=22.04&target_type=deb_local) 16 | - Example for Ubuntu on WSL2: 17 | - `sudo apt-key del 7fa2af80` 18 | - Run [command for CUDA 11.8 WSL-Ubuntu](https://developer.nvidia.com/cuda-11-8-0-download-archive?target_os=Linux&target_arch=x86_64&Distribution=WSL-Ubuntu&target_version=2.0&target_type=deb_local) 19 | 20 | ## Install threestudio via Docker 21 | 22 | 1. [Install Docker Engine](https://docs.docker.com/engine/install/). 23 | This document assumes you [install Docker Engine on Ubuntu](https://docs.docker.com/engine/install/ubuntu/). 24 | 2. [Create `docker` group](https://docs.docker.com/engine/install/linux-postinstall/). 25 | Otherwise, you need to type `sudo docker` instead of `docker`. 26 | 3. [Install NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html#setting-up-nvidia-container-toolkit). 27 | 4. If you use WSL2, [enable systemd](https://learn.microsoft.com/en-us/windows/wsl/wsl-config#systemd-support). 28 | 5. Edit [Dockerfile](../docker/Dockerfile) for your GPU to speed-up build. 29 | The default Dockerfile takes into account many types of GPUs. 30 | 6. Run Docker via `docker compose`. 31 | 32 | ```bash 33 | cd docker/ 34 | docker compose build # build Docker image 35 | docker compose up -d # create and start a container in background 36 | docker compose exec threestudio bash # run bash in the container 37 | 38 | # Enjoy threestudio! 39 | 40 | exit # or Ctrl+D 41 | docker compose stop # stop the container 42 | docker compose start # start the container 43 | docker compose down # stop and remove the container 44 | ``` 45 | 46 | Note: The current Dockerfile will cause errors when using the OpenGL-based rasterizer of nvdiffrast. 47 | You can use the CUDA-based rasterizer by adding commands or editing configs. 48 | 49 | - `system.renderer.context_type=cuda` for training 50 | - `system.exporter.context_type=cuda` for exporting meshes 51 | 52 | [This comment by the nvdiffrast author](https://github.com/NVlabs/nvdiffrast/issues/94#issuecomment-1288566038) could be a guide to resolve this limitation. 53 | -------------------------------------------------------------------------------- /extern/ImageDream/.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | */__pycache__/ 6 | 7 | # dataset-related, pre-trained models, 8 | vae_models/vqgan 9 | vae_models/*.gz 10 | vae_models/*.pt 11 | vae_models/*vqgan 12 | *.pt 13 | *.pth 14 | 15 | # log files 16 | log/*.log 17 | out* 18 | test_results 19 | err* 20 | 21 | 22 | # C extensions 23 | *.so 24 | 25 | # Distribution / packaging 26 | .Python 27 | build/ 28 | develop-eggs/ 29 | dist/ 30 | downloads/ 31 | eggs/ 32 | .eggs/ 33 | lib/ 34 | lib64/ 35 | parts/ 36 | sdist/ 37 | var/ 38 | wheels/ 39 | pip-wheel-metadata/ 40 | share/python-wheels/ 41 | *.egg-info/ 42 | .installed.cfg 43 | *.egg 44 | MANIFEST 45 | 46 | # PyInstaller 47 | # Usually these files are written by a python script from a template 48 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 49 | *.manifest 50 | *.spec 51 | 52 | # Installer logs 53 | pip-log.txt 54 | pip-delete-this-directory.txt 55 | 56 | # Unit test / coverage reports 57 | htmlcov/ 58 | .tox/ 59 | .nox/ 60 | .coverage 61 | .coverage.* 62 | .cache 63 | nosetests.xml 64 | coverage.xml 65 | *.cover 66 | *.py,cover 67 | .hypothesis/ 68 | .pytest_cache/ 69 | 70 | # Translations 71 | *.mo 72 | *.pot 73 | 74 | # Django stuff: 75 | *.log 76 | local_settings.py 77 | db.sqlite3 78 | db.sqlite3-journal 79 | 80 | # Flask stuff: 81 | instance/ 82 | .webassets-cache 83 | 84 | # Scrapy stuff: 85 | .scrapy 86 | 87 | # Sphinx documentation 88 | docs/_build/ 89 | 90 | # PyBuilder 91 | target/ 92 | 93 | # Jupyter Notebook 94 | .ipynb_checkpoints 95 | 96 | # IPython 97 | profile_default/ 98 | ipython_config.py 99 | 100 | # pyenv 101 | .python-version 102 | 103 | # pipenv 104 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 105 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 106 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 107 | # install all needed dependencies. 108 | #Pipfile.lock 109 | 110 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 111 | __pypackages__/ 112 | 113 | # Celery stuff 114 | celerybeat-schedule 115 | celerybeat.pid 116 | 117 | # SageMath parsed files 118 | *.sage.py 119 | 120 | # Environments 121 | .env 122 | .venv 123 | env/ 124 | venv/ 125 | ENV/ 126 | env.bak/ 127 | venv.bak/ 128 | 129 | # Spyder project settings 130 | .spyderproject 131 | .spyproject 132 | 133 | # Rope project settings 134 | .ropeproject 135 | 136 | # mkdocs documentation 137 | /site 138 | 139 | # mypy 140 | .mypy_cache/ 141 | .dmypy.json 142 | dmypy.json 143 | 144 | # Pyre type checker 145 | .pyre/ 146 | 147 | *.zip 148 | *.pkl 149 | *.csv 150 | *.ckpt 151 | *.parquet 152 | 153 | *.whl 154 | *.th 155 | *.onnx -------------------------------------------------------------------------------- /extern/ImageDream/LICENSE-CODE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 ByteDance 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /extern/ImageDream/README.md: -------------------------------------------------------------------------------- 1 | # ImageDream Diffusion 2 | Peng Wang, Yichun Shi 3 | 4 | | [Project Page](https://image-dream.github.io/) | [Paper](https://arxiv.org/abs/2312.02201) | [HuggingFace Demo]() | 5 | 6 | ## 7 | - **This repo inherit content from repos of [LDM](), [MVDream]() and some adaptor module from [IP-Adaptor]()** 8 | - **It only includes the diffusion model and 2D image generation.For 3D Generation, please check [Here](https://github.com/bytedance/ImageDream).** 9 | 10 | 11 | ## Installation 12 | Setup environment as in [Stable-Diffusion](https://github.com/Stability-AI/stablediffusion) for this repo. You can set up the environment by installing the given requirements 13 | ``` bash 14 | pip install -r requirements.txt 15 | ``` 16 | 17 | To use ImageDream as a python module, you can install it by `pip install -e .` or: 18 | ```bash 19 | pip install git+https://github.com/bytedance/ImageDream/#subdirectory=extern/ImageDream 20 | ``` 21 | 22 | ## Image-to-Multi-View 23 | Clone the modelcard on the [Huggingface ImageDream Model Page](https://huggingface.co/Peng-Wang/ImageDream/) under ```./release_models/``` 24 | 25 | Replace the object in the center of RGBA image and a short description of the image is necessary to obtain good results. For image only case, one may run a simple caption model such as [Llava](https://llava.hliu.cc/) or [BLIP2](https://huggingface.co/spaces/Salesforce/BLIP2), which may get similar results. This also applies for 3D SDS. 26 | 27 | 28 | ``` bash 29 | export PYTHONPATH=$PYTHONPATH:./ 30 | python3 scripts/demo.py \ 31 | --image "./assets/astronaut.png" \ 32 | --text "an astronaut riding a horse" \ 33 | --config_path "./imagedream/configs/sd_v2_base_ipmv.yaml" \ 34 | --ckpt_path "./release_models/ImageDream/sd-v2.1-base-4view-ipmv.pt" \ 35 | --mode "pixel" \ 36 | --num_frames 5 37 | ``` 38 | 39 | Tips 40 | - The model is trained with same elevation between the input image prompt and synthesized views. Therefore, may adjust the camera elevation in ```get_camera()``` for better results. In paper, we adopt a unified elevation with 5 degree. This also applied for threestudio fusion for a better results. 41 | 42 | 43 | ## Acknowledgement 44 | This repository is heavily based on [Stable Diffusion](https://huggingface.co/stabilityai/stable-diffusion-2-1-base). We would like to thank the authors of these work for publicly releasing their code. 45 | 46 | ## Citation 47 | If you find ImageDream helpful, please consider citing: 48 | 49 | ``` bibtex 50 | @article{wang2023imagedream, 51 | title={ImageDream: Image-Prompt Multi-view Diffusion for 3D Generation}, 52 | author={Wang, Peng and Shi, Yichun}, 53 | journal={arXiv preprint arXiv:2312.02201}, 54 | year={2023} 55 | } 56 | ``` 57 | -------------------------------------------------------------------------------- /extern/ImageDream/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/extern/ImageDream/__init__.py -------------------------------------------------------------------------------- /extern/ImageDream/assets/astronaut.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/extern/ImageDream/assets/astronaut.png -------------------------------------------------------------------------------- /extern/ImageDream/imagedream/__init__.py: -------------------------------------------------------------------------------- 1 | from .model_zoo import build_model 2 | -------------------------------------------------------------------------------- /extern/ImageDream/imagedream/camera_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | 5 | def create_camera_to_world_matrix(elevation, azimuth): 6 | elevation = np.radians(elevation) 7 | azimuth = np.radians(azimuth) 8 | # Convert elevation and azimuth angles to Cartesian coordinates on a unit sphere 9 | x = np.cos(elevation) * np.sin(azimuth) 10 | y = np.sin(elevation) 11 | z = np.cos(elevation) * np.cos(azimuth) 12 | 13 | # Calculate camera position, target, and up vectors 14 | camera_pos = np.array([x, y, z]) 15 | target = np.array([0, 0, 0]) 16 | up = np.array([0, 1, 0]) 17 | 18 | # Construct view matrix 19 | forward = target - camera_pos 20 | forward /= np.linalg.norm(forward) 21 | right = np.cross(forward, up) 22 | right /= np.linalg.norm(right) 23 | new_up = np.cross(right, forward) 24 | new_up /= np.linalg.norm(new_up) 25 | cam2world = np.eye(4) 26 | cam2world[:3, :3] = np.array([right, new_up, -forward]).T 27 | cam2world[:3, 3] = camera_pos 28 | return cam2world 29 | 30 | 31 | def convert_opengl_to_blender(camera_matrix): 32 | if isinstance(camera_matrix, np.ndarray): 33 | # Construct transformation matrix to convert from OpenGL space to Blender space 34 | flip_yz = np.array([[1, 0, 0, 0], [0, 0, -1, 0], [0, 1, 0, 0], [0, 0, 0, 1]]) 35 | camera_matrix_blender = np.dot(flip_yz, camera_matrix) 36 | else: 37 | # Construct transformation matrix to convert from OpenGL space to Blender space 38 | flip_yz = torch.tensor( 39 | [[1, 0, 0, 0], [0, 0, -1, 0], [0, 1, 0, 0], [0, 0, 0, 1]] 40 | ) 41 | if camera_matrix.ndim == 3: 42 | flip_yz = flip_yz.unsqueeze(0) 43 | camera_matrix_blender = torch.matmul(flip_yz.to(camera_matrix), camera_matrix) 44 | return camera_matrix_blender 45 | 46 | 47 | def normalize_camera(camera_matrix): 48 | """normalize the camera location onto a unit-sphere""" 49 | if isinstance(camera_matrix, np.ndarray): 50 | camera_matrix = camera_matrix.reshape(-1, 4, 4) 51 | translation = camera_matrix[:, :3, 3] 52 | translation = translation / ( 53 | np.linalg.norm(translation, axis=1, keepdims=True) + 1e-8 54 | ) 55 | camera_matrix[:, :3, 3] = translation 56 | else: 57 | camera_matrix = camera_matrix.reshape(-1, 4, 4) 58 | translation = camera_matrix[:, :3, 3] 59 | translation = translation / ( 60 | torch.norm(translation, dim=1, keepdim=True) + 1e-8 61 | ) 62 | camera_matrix[:, :3, 3] = translation 63 | return camera_matrix.reshape(-1, 16) 64 | 65 | 66 | def get_camera( 67 | num_frames, 68 | elevation=15, 69 | azimuth_start=0, 70 | azimuth_span=360, 71 | blender_coord=True, 72 | extra_view=False, 73 | ): 74 | angle_gap = azimuth_span / num_frames 75 | cameras = [] 76 | for azimuth in np.arange(azimuth_start, azimuth_span + azimuth_start, angle_gap): 77 | camera_matrix = create_camera_to_world_matrix(elevation, azimuth) 78 | if blender_coord: 79 | camera_matrix = convert_opengl_to_blender(camera_matrix) 80 | cameras.append(camera_matrix.flatten()) 81 | 82 | if extra_view: 83 | dim = len(cameras[0]) 84 | cameras.append(np.zeros(dim)) 85 | return torch.tensor(np.stack(cameras, 0)).float() 86 | -------------------------------------------------------------------------------- /extern/ImageDream/imagedream/configs/sd_v2_base_ipmv.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | target: imagedream.ldm.interface.LatentDiffusionInterface 3 | params: 4 | linear_start: 0.00085 5 | linear_end: 0.0120 6 | timesteps: 1000 7 | scale_factor: 0.18215 8 | parameterization: "eps" 9 | 10 | unet_config: 11 | target: imagedream.ldm.modules.diffusionmodules.openaimodel.MultiViewUNetModel 12 | params: 13 | image_size: 32 # unused 14 | in_channels: 4 15 | out_channels: 4 16 | model_channels: 320 17 | attention_resolutions: [ 4, 2, 1 ] 18 | num_res_blocks: 2 19 | channel_mult: [ 1, 2, 4, 4 ] 20 | num_head_channels: 64 # need to fix for flash-attn 21 | use_spatial_transformer: True 22 | use_linear_in_transformer: True 23 | transformer_depth: 1 24 | context_dim: 1024 25 | use_checkpoint: False 26 | legacy: False 27 | camera_dim: 16 28 | with_ip: True 29 | ip_dim: 16 # ip token length 30 | ip_mode: "local_resample" 31 | 32 | vae_config: 33 | target: imagedream.ldm.models.autoencoder.AutoencoderKL 34 | params: 35 | embed_dim: 4 36 | monitor: val/rec_loss 37 | ddconfig: 38 | #attn_type: "vanilla-xformers" 39 | double_z: true 40 | z_channels: 4 41 | resolution: 256 42 | in_channels: 3 43 | out_ch: 3 44 | ch: 128 45 | ch_mult: 46 | - 1 47 | - 2 48 | - 4 49 | - 4 50 | num_res_blocks: 2 51 | attn_resolutions: [] 52 | dropout: 0.0 53 | lossconfig: 54 | target: torch.nn.Identity 55 | 56 | clip_config: 57 | target: imagedream.ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder 58 | params: 59 | freeze: True 60 | layer: "penultimate" 61 | ip_mode: "local_resample" 62 | -------------------------------------------------------------------------------- /extern/ImageDream/imagedream/configs/sd_v2_base_ipmv_local.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | target: imagedream.ldm.interface.LatentDiffusionInterface 3 | params: 4 | linear_start: 0.00085 5 | linear_end: 0.0120 6 | timesteps: 1000 7 | scale_factor: 0.18215 8 | parameterization: "eps" 9 | 10 | unet_config: 11 | target: imagedream.ldm.modules.diffusionmodules.openaimodel.MultiViewUNetModel 12 | params: 13 | image_size: 32 # unused 14 | in_channels: 4 15 | out_channels: 4 16 | model_channels: 320 17 | attention_resolutions: [ 4, 2, 1 ] 18 | num_res_blocks: 2 19 | channel_mult: [ 1, 2, 4, 4 ] 20 | num_head_channels: 64 # need to fix for flash-attn 21 | use_spatial_transformer: True 22 | use_linear_in_transformer: True 23 | transformer_depth: 1 24 | context_dim: 1024 25 | use_checkpoint: False 26 | legacy: False 27 | camera_dim: 16 28 | with_ip: True 29 | ip_dim: 16 # ip token length 30 | ip_mode: "local_resample" 31 | ip_weight: 1.0 # adjust for similarity to image 32 | 33 | vae_config: 34 | target: imagedream.ldm.models.autoencoder.AutoencoderKL 35 | params: 36 | embed_dim: 4 37 | monitor: val/rec_loss 38 | ddconfig: 39 | #attn_type: "vanilla-xformers" 40 | double_z: true 41 | z_channels: 4 42 | resolution: 256 43 | in_channels: 3 44 | out_ch: 3 45 | ch: 128 46 | ch_mult: 47 | - 1 48 | - 2 49 | - 4 50 | - 4 51 | num_res_blocks: 2 52 | attn_resolutions: [] 53 | dropout: 0.0 54 | lossconfig: 55 | target: torch.nn.Identity 56 | 57 | clip_config: 58 | target: imagedream.ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder 59 | params: 60 | freeze: True 61 | layer: "penultimate" 62 | ip_mode: "local_resample" 63 | -------------------------------------------------------------------------------- /extern/ImageDream/imagedream/ldm/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/extern/ImageDream/imagedream/ldm/__init__.py -------------------------------------------------------------------------------- /extern/ImageDream/imagedream/ldm/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/extern/ImageDream/imagedream/ldm/models/__init__.py -------------------------------------------------------------------------------- /extern/ImageDream/imagedream/ldm/models/diffusion/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/extern/ImageDream/imagedream/ldm/models/diffusion/__init__.py -------------------------------------------------------------------------------- /extern/ImageDream/imagedream/ldm/modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/extern/ImageDream/imagedream/ldm/modules/__init__.py -------------------------------------------------------------------------------- /extern/ImageDream/imagedream/ldm/modules/diffusionmodules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/extern/ImageDream/imagedream/ldm/modules/diffusionmodules/__init__.py -------------------------------------------------------------------------------- /extern/ImageDream/imagedream/ldm/modules/distributions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/extern/ImageDream/imagedream/ldm/modules/distributions/__init__.py -------------------------------------------------------------------------------- /extern/ImageDream/imagedream/ldm/modules/distributions/distributions.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | 4 | 5 | class AbstractDistribution: 6 | def sample(self): 7 | raise NotImplementedError() 8 | 9 | def mode(self): 10 | raise NotImplementedError() 11 | 12 | 13 | class DiracDistribution(AbstractDistribution): 14 | def __init__(self, value): 15 | self.value = value 16 | 17 | def sample(self): 18 | return self.value 19 | 20 | def mode(self): 21 | return self.value 22 | 23 | 24 | class DiagonalGaussianDistribution(object): 25 | def __init__(self, parameters, deterministic=False): 26 | self.parameters = parameters 27 | self.mean, self.logvar = torch.chunk(parameters, 2, dim=1) 28 | self.logvar = torch.clamp(self.logvar, -30.0, 20.0) 29 | self.deterministic = deterministic 30 | self.std = torch.exp(0.5 * self.logvar) 31 | self.var = torch.exp(self.logvar) 32 | if self.deterministic: 33 | self.var = self.std = torch.zeros_like(self.mean).to( 34 | device=self.parameters.device 35 | ) 36 | 37 | def sample(self): 38 | x = self.mean + self.std * torch.randn(self.mean.shape).to( 39 | device=self.parameters.device 40 | ) 41 | return x 42 | 43 | def kl(self, other=None): 44 | if self.deterministic: 45 | return torch.Tensor([0.0]) 46 | else: 47 | if other is None: 48 | return 0.5 * torch.sum( 49 | torch.pow(self.mean, 2) + self.var - 1.0 - self.logvar, 50 | dim=[1, 2, 3], 51 | ) 52 | else: 53 | return 0.5 * torch.sum( 54 | torch.pow(self.mean - other.mean, 2) / other.var 55 | + self.var / other.var 56 | - 1.0 57 | - self.logvar 58 | + other.logvar, 59 | dim=[1, 2, 3], 60 | ) 61 | 62 | def nll(self, sample, dims=[1, 2, 3]): 63 | if self.deterministic: 64 | return torch.Tensor([0.0]) 65 | logtwopi = np.log(2.0 * np.pi) 66 | return 0.5 * torch.sum( 67 | logtwopi + self.logvar + torch.pow(sample - self.mean, 2) / self.var, 68 | dim=dims, 69 | ) 70 | 71 | def mode(self): 72 | return self.mean 73 | 74 | 75 | def normal_kl(mean1, logvar1, mean2, logvar2): 76 | """ 77 | source: https://github.com/openai/guided-diffusion/blob/27c20a8fab9cb472df5d6bdd6c8d11c8f430b924/guided_diffusion/losses.py#L12 78 | Compute the KL divergence between two gaussians. 79 | Shapes are automatically broadcasted, so batches can be compared to 80 | scalars, among other use cases. 81 | """ 82 | tensor = None 83 | for obj in (mean1, logvar1, mean2, logvar2): 84 | if isinstance(obj, torch.Tensor): 85 | tensor = obj 86 | break 87 | assert tensor is not None, "at least one argument must be a Tensor" 88 | 89 | # Force variances to be Tensors. Broadcasting helps convert scalars to 90 | # Tensors, but it does not work for torch.exp(). 91 | logvar1, logvar2 = [ 92 | x if isinstance(x, torch.Tensor) else torch.tensor(x).to(tensor) 93 | for x in (logvar1, logvar2) 94 | ] 95 | 96 | return 0.5 * ( 97 | -1.0 98 | + logvar2 99 | - logvar1 100 | + torch.exp(logvar1 - logvar2) 101 | + ((mean1 - mean2) ** 2) * torch.exp(-logvar2) 102 | ) 103 | -------------------------------------------------------------------------------- /extern/ImageDream/imagedream/ldm/modules/ema.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | 4 | 5 | class LitEma(nn.Module): 6 | def __init__(self, model, decay=0.9999, use_num_upates=True): 7 | super().__init__() 8 | if decay < 0.0 or decay > 1.0: 9 | raise ValueError("Decay must be between 0 and 1") 10 | 11 | self.m_name2s_name = {} 12 | self.register_buffer("decay", torch.tensor(decay, dtype=torch.float32)) 13 | self.register_buffer( 14 | "num_updates", 15 | torch.tensor(0, dtype=torch.int) 16 | if use_num_upates 17 | else torch.tensor(-1, dtype=torch.int), 18 | ) 19 | 20 | for name, p in model.named_parameters(): 21 | if p.requires_grad: 22 | # remove as '.'-character is not allowed in buffers 23 | s_name = name.replace(".", "") 24 | self.m_name2s_name.update({name: s_name}) 25 | self.register_buffer(s_name, p.clone().detach().data) 26 | 27 | self.collected_params = [] 28 | 29 | def reset_num_updates(self): 30 | del self.num_updates 31 | self.register_buffer("num_updates", torch.tensor(0, dtype=torch.int)) 32 | 33 | def forward(self, model): 34 | decay = self.decay 35 | 36 | if self.num_updates >= 0: 37 | self.num_updates += 1 38 | decay = min(self.decay, (1 + self.num_updates) / (10 + self.num_updates)) 39 | 40 | one_minus_decay = 1.0 - decay 41 | 42 | with torch.no_grad(): 43 | m_param = dict(model.named_parameters()) 44 | shadow_params = dict(self.named_buffers()) 45 | 46 | for key in m_param: 47 | if m_param[key].requires_grad: 48 | sname = self.m_name2s_name[key] 49 | shadow_params[sname] = shadow_params[sname].type_as(m_param[key]) 50 | shadow_params[sname].sub_( 51 | one_minus_decay * (shadow_params[sname] - m_param[key]) 52 | ) 53 | else: 54 | assert not key in self.m_name2s_name 55 | 56 | def copy_to(self, model): 57 | m_param = dict(model.named_parameters()) 58 | shadow_params = dict(self.named_buffers()) 59 | for key in m_param: 60 | if m_param[key].requires_grad: 61 | m_param[key].data.copy_(shadow_params[self.m_name2s_name[key]].data) 62 | else: 63 | assert not key in self.m_name2s_name 64 | 65 | def store(self, parameters): 66 | """ 67 | Save the current parameters for restoring later. 68 | Args: 69 | parameters: Iterable of `torch.nn.Parameter`; the parameters to be 70 | temporarily stored. 71 | """ 72 | self.collected_params = [param.clone() for param in parameters] 73 | 74 | def restore(self, parameters): 75 | """ 76 | Restore the parameters stored with the `store` method. 77 | Useful to validate the model with EMA parameters without affecting the 78 | original optimization process. Store the parameters before the 79 | `copy_to` method. After validation (or model saving), use this to 80 | restore the former parameters. 81 | Args: 82 | parameters: Iterable of `torch.nn.Parameter`; the parameters to be 83 | updated with the stored parameters. 84 | """ 85 | for c_param, param in zip(self.collected_params, parameters): 86 | param.data.copy_(c_param.data) 87 | -------------------------------------------------------------------------------- /extern/ImageDream/imagedream/ldm/modules/encoders/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/extern/ImageDream/imagedream/ldm/modules/encoders/__init__.py -------------------------------------------------------------------------------- /extern/ImageDream/imagedream/model_zoo.py: -------------------------------------------------------------------------------- 1 | """ Utiliy functions to load pre-trained models more easily """ 2 | import os 3 | import pkg_resources 4 | from omegaconf import OmegaConf 5 | 6 | import torch 7 | from huggingface_hub import hf_hub_download 8 | 9 | from imagedream.ldm.util import instantiate_from_config 10 | 11 | 12 | PRETRAINED_MODELS = { 13 | "sd-v2.1-base-4view-ipmv": { 14 | "config": "sd_v2_base_ipmv.yaml", 15 | "repo_id": "Peng-Wang/ImageDream", 16 | "filename": "sd-v2.1-base-4view-ipmv.pt", 17 | }, 18 | "sd-v2.1-base-4view-ipmv-local": { 19 | "config": "sd_v2_base_ipmv_local.yaml", 20 | "repo_id": "Peng-Wang/ImageDream", 21 | "filename": "sd-v2.1-base-4view-ipmv-local.pt", 22 | }, 23 | } 24 | 25 | 26 | def get_config_file(config_path): 27 | cfg_file = pkg_resources.resource_filename( 28 | "imagedream", os.path.join("configs", config_path) 29 | ) 30 | if not os.path.exists(cfg_file): 31 | raise RuntimeError(f"Config {config_path} not available!") 32 | return cfg_file 33 | 34 | 35 | def build_model(model_name, config_path=None, ckpt_path=None, cache_dir=None): 36 | if (config_path is not None) and (ckpt_path is not None): 37 | config = OmegaConf.load(config_path) 38 | model = instantiate_from_config(config.model) 39 | model.load_state_dict(torch.load(ckpt_path, map_location="cpu"), strict=False) 40 | return model 41 | 42 | if not model_name in PRETRAINED_MODELS: 43 | raise RuntimeError( 44 | f"Model name {model_name} is not a pre-trained model. Available models are:\n- " 45 | + "\n- ".join(PRETRAINED_MODELS.keys()) 46 | ) 47 | model_info = PRETRAINED_MODELS[model_name] 48 | 49 | # Instiantiate the model 50 | print(f"Loading model from config: {model_info['config']}") 51 | config_file = get_config_file(model_info["config"]) 52 | config = OmegaConf.load(config_file) 53 | model = instantiate_from_config(config.model) 54 | 55 | # Load pre-trained checkpoint from huggingface 56 | if not ckpt_path: 57 | ckpt_path = hf_hub_download( 58 | repo_id=model_info["repo_id"], 59 | filename=model_info["filename"], 60 | cache_dir=cache_dir, 61 | ) 62 | print(f"Loading model from cache file: {ckpt_path}") 63 | model.load_state_dict(torch.load(ckpt_path, map_location="cpu"), strict=False) 64 | return model 65 | -------------------------------------------------------------------------------- /extern/ImageDream/requirements.txt: -------------------------------------------------------------------------------- 1 | opencv-python 2 | imageio 3 | imageio-ffmpeg 4 | omegaconf 5 | einops 6 | transformers==4.27.1 7 | open-clip-torch==2.7.0 8 | gradio>=3.13.2 9 | xformers==0.0.16 10 | -------------------------------------------------------------------------------- /extern/ImageDream/scripts/demo.sh: -------------------------------------------------------------------------------- 1 | # Run this script under ImageDream/ 2 | export PYTHONPATH=$PYTHONPATH:./ 3 | 4 | # test pixel version 5 | python3 scripts/demo.py \ 6 | --image "./assets/astronaut.png" \ 7 | --text "an astronaut riding a horse" \ 8 | --config_path "./imagedream/configs/sd_v2_base_ipmv.yaml" \ 9 | --ckpt_path "./release_models/ImageDream/sd-v2.1-base-4view-ipmv.pt" \ 10 | --mode "pixel" \ 11 | --num_frames 5 12 | 13 | # test local version 14 | python3 scripts/demo.py \ 15 | --image "./assets/astronaut.png" \ 16 | --text "an astronaut riding a horse" \ 17 | --config_path "./imagedream/configs/sd_v2_base_ipmv_local.yaml" \ 18 | --ckpt_path "./release_models/ImageDream/sd-v2.1-base-4view-ipmv-local.pt" \ 19 | --mode "local" \ 20 | --num_frames 4 21 | -------------------------------------------------------------------------------- /extern/ImageDream/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | setup( 4 | name="imagedream", 5 | version="0.0.1", 6 | description="Multi-view Diffusion Models", 7 | author="ByteDance", 8 | packages=find_packages(), 9 | package_data={"imagedream": ["configs/*.yaml"]}, 10 | install_requires=[ 11 | "torch", 12 | "numpy", 13 | "tqdm", 14 | "omegaconf", 15 | "einops", 16 | "huggingface_hub", 17 | "transformers", 18 | "open-clip-torch", 19 | ], 20 | ) 21 | -------------------------------------------------------------------------------- /extern/ldm_zero123/extras.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from contextlib import contextmanager 3 | from pathlib import Path 4 | 5 | import torch 6 | from omegaconf import OmegaConf 7 | 8 | from extern.ldm_zero123.util import instantiate_from_config 9 | 10 | 11 | @contextmanager 12 | def all_logging_disabled(highest_level=logging.CRITICAL): 13 | """ 14 | A context manager that will prevent any logging messages 15 | triggered during the body from being processed. 16 | 17 | :param highest_level: the maximum logging level in use. 18 | This would only need to be changed if a custom level greater than CRITICAL 19 | is defined. 20 | 21 | https://gist.github.com/simon-weber/7853144 22 | """ 23 | # two kind-of hacks here: 24 | # * can't get the highest logging level in effect => delegate to the user 25 | # * can't get the current module-level override => use an undocumented 26 | # (but non-private!) interface 27 | 28 | previous_level = logging.root.manager.disable 29 | 30 | logging.disable(highest_level) 31 | 32 | try: 33 | yield 34 | finally: 35 | logging.disable(previous_level) 36 | 37 | 38 | def load_training_dir(train_dir, device, epoch="last"): 39 | """Load a checkpoint and config from training directory""" 40 | train_dir = Path(train_dir) 41 | ckpt = list(train_dir.rglob(f"*{epoch}.ckpt")) 42 | assert len(ckpt) == 1, f"found {len(ckpt)} matching ckpt files" 43 | config = list(train_dir.rglob(f"*-project.yaml")) 44 | assert len(ckpt) > 0, f"didn't find any config in {train_dir}" 45 | if len(config) > 1: 46 | print(f"found {len(config)} matching config files") 47 | config = sorted(config)[-1] 48 | print(f"selecting {config}") 49 | else: 50 | config = config[0] 51 | 52 | config = OmegaConf.load(config) 53 | return load_model_from_config(config, ckpt[0], device) 54 | 55 | 56 | def load_model_from_config(config, ckpt, device="cpu", verbose=False): 57 | """Loads a model from config and a ckpt 58 | if config is a path will use omegaconf to load 59 | """ 60 | if isinstance(config, (str, Path)): 61 | config = OmegaConf.load(config) 62 | 63 | with all_logging_disabled(): 64 | print(f"Loading model from {ckpt}") 65 | pl_sd = torch.load(ckpt, map_location="cpu") 66 | global_step = pl_sd["global_step"] 67 | sd = pl_sd["state_dict"] 68 | model = instantiate_from_config(config.model) 69 | m, u = model.load_state_dict(sd, strict=False) 70 | if len(m) > 0 and verbose: 71 | print("missing keys:") 72 | print(m) 73 | if len(u) > 0 and verbose: 74 | print("unexpected keys:") 75 | model.to(device) 76 | model.eval() 77 | model.cond_stage_model.device = device 78 | return model 79 | -------------------------------------------------------------------------------- /extern/ldm_zero123/models/diffusion/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/extern/ldm_zero123/models/diffusion/__init__.py -------------------------------------------------------------------------------- /extern/ldm_zero123/models/diffusion/sampling_util.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | 5 | def append_dims(x, target_dims): 6 | """Appends dimensions to the end of a tensor until it has target_dims dimensions. 7 | From https://github.com/crowsonkb/k-diffusion/blob/master/k_diffusion/utils.py""" 8 | dims_to_append = target_dims - x.ndim 9 | if dims_to_append < 0: 10 | raise ValueError( 11 | f"input has {x.ndim} dims but target_dims is {target_dims}, which is less" 12 | ) 13 | return x[(...,) + (None,) * dims_to_append] 14 | 15 | 16 | def renorm_thresholding(x0, value): 17 | # renorm 18 | pred_max = x0.max() 19 | pred_min = x0.min() 20 | pred_x0 = (x0 - pred_min) / (pred_max - pred_min) # 0 ... 1 21 | pred_x0 = 2 * pred_x0 - 1.0 # -1 ... 1 22 | 23 | s = torch.quantile(rearrange(pred_x0, "b ... -> b (...)").abs(), value, dim=-1) 24 | s.clamp_(min=1.0) 25 | s = s.view(-1, *((1,) * (pred_x0.ndim - 1))) 26 | 27 | # clip by threshold 28 | # pred_x0 = pred_x0.clamp(-s, s) / s # needs newer pytorch # TODO bring back to pure-gpu with min/max 29 | 30 | # temporary hack: numpy on cpu 31 | pred_x0 = ( 32 | np.clip(pred_x0.cpu().numpy(), -s.cpu().numpy(), s.cpu().numpy()) 33 | / s.cpu().numpy() 34 | ) 35 | pred_x0 = torch.tensor(pred_x0).to(self.model.device) 36 | 37 | # re.renorm 38 | pred_x0 = (pred_x0 + 1.0) / 2.0 # 0 ... 1 39 | pred_x0 = (pred_max - pred_min) * pred_x0 + pred_min # orig range 40 | return pred_x0 41 | 42 | 43 | def norm_thresholding(x0, value): 44 | s = append_dims(x0.pow(2).flatten(1).mean(1).sqrt().clamp(min=value), x0.ndim) 45 | return x0 * (value / s) 46 | 47 | 48 | def spatial_norm_thresholding(x0, value): 49 | # b c h w 50 | s = x0.pow(2).mean(1, keepdim=True).sqrt().clamp(min=value) 51 | return x0 * (value / s) 52 | -------------------------------------------------------------------------------- /extern/ldm_zero123/modules/diffusionmodules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/extern/ldm_zero123/modules/diffusionmodules/__init__.py -------------------------------------------------------------------------------- /extern/ldm_zero123/modules/distributions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/extern/ldm_zero123/modules/distributions/__init__.py -------------------------------------------------------------------------------- /extern/ldm_zero123/modules/distributions/distributions.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | 5 | class AbstractDistribution: 6 | def sample(self): 7 | raise NotImplementedError() 8 | 9 | def mode(self): 10 | raise NotImplementedError() 11 | 12 | 13 | class DiracDistribution(AbstractDistribution): 14 | def __init__(self, value): 15 | self.value = value 16 | 17 | def sample(self): 18 | return self.value 19 | 20 | def mode(self): 21 | return self.value 22 | 23 | 24 | class DiagonalGaussianDistribution(object): 25 | def __init__(self, parameters, deterministic=False): 26 | self.parameters = parameters 27 | self.mean, self.logvar = torch.chunk(parameters, 2, dim=1) 28 | self.logvar = torch.clamp(self.logvar, -30.0, 20.0) 29 | self.deterministic = deterministic 30 | self.std = torch.exp(0.5 * self.logvar) 31 | self.var = torch.exp(self.logvar) 32 | if self.deterministic: 33 | self.var = self.std = torch.zeros_like(self.mean).to( 34 | device=self.parameters.device 35 | ) 36 | 37 | def sample(self): 38 | x = self.mean + self.std * torch.randn(self.mean.shape).to( 39 | device=self.parameters.device 40 | ) 41 | return x 42 | 43 | def kl(self, other=None): 44 | if self.deterministic: 45 | return torch.Tensor([0.0]) 46 | else: 47 | if other is None: 48 | return 0.5 * torch.sum( 49 | torch.pow(self.mean, 2) + self.var - 1.0 - self.logvar, 50 | dim=[1, 2, 3], 51 | ) 52 | else: 53 | return 0.5 * torch.sum( 54 | torch.pow(self.mean - other.mean, 2) / other.var 55 | + self.var / other.var 56 | - 1.0 57 | - self.logvar 58 | + other.logvar, 59 | dim=[1, 2, 3], 60 | ) 61 | 62 | def nll(self, sample, dims=[1, 2, 3]): 63 | if self.deterministic: 64 | return torch.Tensor([0.0]) 65 | logtwopi = np.log(2.0 * np.pi) 66 | return 0.5 * torch.sum( 67 | logtwopi + self.logvar + torch.pow(sample - self.mean, 2) / self.var, 68 | dim=dims, 69 | ) 70 | 71 | def mode(self): 72 | return self.mean 73 | 74 | 75 | def normal_kl(mean1, logvar1, mean2, logvar2): 76 | """ 77 | source: https://github.com/openai/guided-diffusion/blob/27c20a8fab9cb472df5d6bdd6c8d11c8f430b924/guided_diffusion/losses.py#L12 78 | Compute the KL divergence between two gaussians. 79 | Shapes are automatically broadcasted, so batches can be compared to 80 | scalars, among other use cases. 81 | """ 82 | tensor = None 83 | for obj in (mean1, logvar1, mean2, logvar2): 84 | if isinstance(obj, torch.Tensor): 85 | tensor = obj 86 | break 87 | assert tensor is not None, "at least one argument must be a Tensor" 88 | 89 | # Force variances to be Tensors. Broadcasting helps convert scalars to 90 | # Tensors, but it does not work for torch.exp(). 91 | logvar1, logvar2 = [ 92 | x if isinstance(x, torch.Tensor) else torch.tensor(x).to(tensor) 93 | for x in (logvar1, logvar2) 94 | ] 95 | 96 | return 0.5 * ( 97 | -1.0 98 | + logvar2 99 | - logvar1 100 | + torch.exp(logvar1 - logvar2) 101 | + ((mean1 - mean2) ** 2) * torch.exp(-logvar2) 102 | ) 103 | -------------------------------------------------------------------------------- /extern/ldm_zero123/modules/ema.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | 4 | 5 | class LitEma(nn.Module): 6 | def __init__(self, model, decay=0.9999, use_num_upates=True): 7 | super().__init__() 8 | if decay < 0.0 or decay > 1.0: 9 | raise ValueError("Decay must be between 0 and 1") 10 | 11 | self.m_name2s_name = {} 12 | self.register_buffer("decay", torch.tensor(decay, dtype=torch.float32)) 13 | self.register_buffer( 14 | "num_updates", 15 | torch.tensor(0, dtype=torch.int) 16 | if use_num_upates 17 | else torch.tensor(-1, dtype=torch.int), 18 | ) 19 | 20 | for name, p in model.named_parameters(): 21 | if p.requires_grad: 22 | # remove as '.'-character is not allowed in buffers 23 | s_name = name.replace(".", "") 24 | self.m_name2s_name.update({name: s_name}) 25 | self.register_buffer(s_name, p.clone().detach().data) 26 | 27 | self.collected_params = [] 28 | 29 | def forward(self, model): 30 | decay = self.decay 31 | 32 | if self.num_updates >= 0: 33 | self.num_updates += 1 34 | decay = min(self.decay, (1 + self.num_updates) / (10 + self.num_updates)) 35 | 36 | one_minus_decay = 1.0 - decay 37 | 38 | with torch.no_grad(): 39 | m_param = dict(model.named_parameters()) 40 | shadow_params = dict(self.named_buffers()) 41 | 42 | for key in m_param: 43 | if m_param[key].requires_grad: 44 | sname = self.m_name2s_name[key] 45 | shadow_params[sname] = shadow_params[sname].type_as(m_param[key]) 46 | shadow_params[sname].sub_( 47 | one_minus_decay * (shadow_params[sname] - m_param[key]) 48 | ) 49 | else: 50 | assert not key in self.m_name2s_name 51 | 52 | def copy_to(self, model): 53 | m_param = dict(model.named_parameters()) 54 | shadow_params = dict(self.named_buffers()) 55 | for key in m_param: 56 | if m_param[key].requires_grad: 57 | m_param[key].data.copy_(shadow_params[self.m_name2s_name[key]].data) 58 | else: 59 | assert not key in self.m_name2s_name 60 | 61 | def store(self, parameters): 62 | """ 63 | Save the current parameters for restoring later. 64 | Args: 65 | parameters: Iterable of `torch.nn.Parameter`; the parameters to be 66 | temporarily stored. 67 | """ 68 | self.collected_params = [param.clone() for param in parameters] 69 | 70 | def restore(self, parameters): 71 | """ 72 | Restore the parameters stored with the `store` method. 73 | Useful to validate the model with EMA parameters without affecting the 74 | original optimization process. Store the parameters before the 75 | `copy_to` method. After validation (or model saving), use this to 76 | restore the former parameters. 77 | Args: 78 | parameters: Iterable of `torch.nn.Parameter`; the parameters to be 79 | updated with the stored parameters. 80 | """ 81 | for c_param, param in zip(self.collected_params, parameters): 82 | param.data.copy_(c_param.data) 83 | -------------------------------------------------------------------------------- /extern/ldm_zero123/modules/encoders/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/extern/ldm_zero123/modules/encoders/__init__.py -------------------------------------------------------------------------------- /extern/ldm_zero123/modules/image_degradation/__init__.py: -------------------------------------------------------------------------------- 1 | from extern.ldm_zero123.modules.image_degradation.bsrgan import ( 2 | degradation_bsrgan_variant as degradation_fn_bsr, 3 | ) 4 | from extern.ldm_zero123.modules.image_degradation.bsrgan_light import ( 5 | degradation_bsrgan_variant as degradation_fn_bsr_light, 6 | ) 7 | -------------------------------------------------------------------------------- /extern/ldm_zero123/modules/image_degradation/utils/test.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/extern/ldm_zero123/modules/image_degradation/utils/test.png -------------------------------------------------------------------------------- /extern/ldm_zero123/modules/losses/__init__.py: -------------------------------------------------------------------------------- 1 | from extern.ldm_zero123.modules.losses.contperceptual import LPIPSWithDiscriminator 2 | -------------------------------------------------------------------------------- /extern/ldm_zero123/thirdp/psp/id_loss.py: -------------------------------------------------------------------------------- 1 | # https://github.com/eladrich/pixel2style2pixel 2 | import torch 3 | from torch import nn 4 | 5 | from extern.ldm_zero123.thirdp.psp.model_irse import Backbone 6 | 7 | 8 | class IDFeatures(nn.Module): 9 | def __init__(self, model_path): 10 | super(IDFeatures, self).__init__() 11 | print("Loading ResNet ArcFace") 12 | self.facenet = Backbone( 13 | input_size=112, num_layers=50, drop_ratio=0.6, mode="ir_se" 14 | ) 15 | self.facenet.load_state_dict(torch.load(model_path, map_location="cpu")) 16 | self.face_pool = torch.nn.AdaptiveAvgPool2d((112, 112)) 17 | self.facenet.eval() 18 | 19 | def forward(self, x, crop=False): 20 | # Not sure of the image range here 21 | if crop: 22 | x = torch.nn.functional.interpolate(x, (256, 256), mode="area") 23 | x = x[:, :, 35:223, 32:220] 24 | x = self.face_pool(x) 25 | x_feats = self.facenet(x) 26 | return x_feats 27 | -------------------------------------------------------------------------------- /load/images/anya_front.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/anya_front.png -------------------------------------------------------------------------------- /load/images/anya_front_depth.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/anya_front_depth.png -------------------------------------------------------------------------------- /load/images/anya_front_normal.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/anya_front_normal.png -------------------------------------------------------------------------------- /load/images/anya_front_rgba.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/anya_front_rgba.png -------------------------------------------------------------------------------- /load/images/baby_phoenix_on_ice.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/baby_phoenix_on_ice.png -------------------------------------------------------------------------------- /load/images/baby_phoenix_on_ice_depth.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/baby_phoenix_on_ice_depth.png -------------------------------------------------------------------------------- /load/images/baby_phoenix_on_ice_normal.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/baby_phoenix_on_ice_normal.png -------------------------------------------------------------------------------- /load/images/baby_phoenix_on_ice_rgba.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/baby_phoenix_on_ice_rgba.png -------------------------------------------------------------------------------- /load/images/beach_house_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/beach_house_1.png -------------------------------------------------------------------------------- /load/images/beach_house_1_depth.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/beach_house_1_depth.png -------------------------------------------------------------------------------- /load/images/beach_house_1_normal.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/beach_house_1_normal.png -------------------------------------------------------------------------------- /load/images/beach_house_1_rgba.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/beach_house_1_rgba.png -------------------------------------------------------------------------------- /load/images/beach_house_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/beach_house_2.png -------------------------------------------------------------------------------- /load/images/beach_house_2_depth.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/beach_house_2_depth.png -------------------------------------------------------------------------------- /load/images/beach_house_2_normal.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/beach_house_2_normal.png -------------------------------------------------------------------------------- /load/images/beach_house_2_rgba.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/beach_house_2_rgba.png -------------------------------------------------------------------------------- /load/images/bollywood_actress.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/bollywood_actress.png -------------------------------------------------------------------------------- /load/images/bollywood_actress_depth.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/bollywood_actress_depth.png -------------------------------------------------------------------------------- /load/images/bollywood_actress_normal.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/bollywood_actress_normal.png -------------------------------------------------------------------------------- /load/images/bollywood_actress_rgba.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/bollywood_actress_rgba.png -------------------------------------------------------------------------------- /load/images/cactus.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/cactus.png -------------------------------------------------------------------------------- /load/images/cactus_depth.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/cactus_depth.png -------------------------------------------------------------------------------- /load/images/cactus_normal.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/cactus_normal.png -------------------------------------------------------------------------------- /load/images/cactus_rgba.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/cactus_rgba.png -------------------------------------------------------------------------------- /load/images/catstatue.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/catstatue.png -------------------------------------------------------------------------------- /load/images/catstatue_depth.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/catstatue_depth.png -------------------------------------------------------------------------------- /load/images/catstatue_normal.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/catstatue_normal.png -------------------------------------------------------------------------------- /load/images/catstatue_rgba.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/catstatue_rgba.png -------------------------------------------------------------------------------- /load/images/church_ruins.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/church_ruins.png -------------------------------------------------------------------------------- /load/images/church_ruins_depth.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/church_ruins_depth.png -------------------------------------------------------------------------------- /load/images/church_ruins_normal.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/church_ruins_normal.png -------------------------------------------------------------------------------- /load/images/church_ruins_rgba.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/church_ruins_rgba.png -------------------------------------------------------------------------------- /load/images/dog1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/dog1.png -------------------------------------------------------------------------------- /load/images/dragon2_rgba.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/dragon2_rgba.png -------------------------------------------------------------------------------- /load/images/firekeeper.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/firekeeper.jpg -------------------------------------------------------------------------------- /load/images/firekeeper_depth.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/firekeeper_depth.png -------------------------------------------------------------------------------- /load/images/firekeeper_normal.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/firekeeper_normal.png -------------------------------------------------------------------------------- /load/images/firekeeper_rgba.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/firekeeper_rgba.png -------------------------------------------------------------------------------- /load/images/futuristic_car.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/futuristic_car.png -------------------------------------------------------------------------------- /load/images/futuristic_car_depth.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/futuristic_car_depth.png -------------------------------------------------------------------------------- /load/images/futuristic_car_normal.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/futuristic_car_normal.png -------------------------------------------------------------------------------- /load/images/futuristic_car_rgba.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/futuristic_car_rgba.png -------------------------------------------------------------------------------- /load/images/grootplant_rgba.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/grootplant_rgba.png -------------------------------------------------------------------------------- /load/images/hamburger.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/hamburger.png -------------------------------------------------------------------------------- /load/images/hamburger_depth.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/hamburger_depth.png -------------------------------------------------------------------------------- /load/images/hamburger_rgba.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/hamburger_rgba.png -------------------------------------------------------------------------------- /load/images/mona_lisa.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/mona_lisa.png -------------------------------------------------------------------------------- /load/images/mona_lisa_depth.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/mona_lisa_depth.png -------------------------------------------------------------------------------- /load/images/mona_lisa_normal.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/mona_lisa_normal.png -------------------------------------------------------------------------------- /load/images/mona_lisa_rgba.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/mona_lisa_rgba.png -------------------------------------------------------------------------------- /load/images/robot_rgba.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/robot_rgba.png -------------------------------------------------------------------------------- /load/images/teddy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/teddy.png -------------------------------------------------------------------------------- /load/images/teddy_depth.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/teddy_depth.png -------------------------------------------------------------------------------- /load/images/teddy_normal.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/teddy_normal.png -------------------------------------------------------------------------------- /load/images/teddy_rgba.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/teddy_rgba.png -------------------------------------------------------------------------------- /load/images/thorhammer_rgba.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/images/thorhammer_rgba.png -------------------------------------------------------------------------------- /load/lights/LICENSE.txt: -------------------------------------------------------------------------------- 1 | The mud_road_puresky.hdr HDR probe is from https://polyhaven.com/a/mud_road_puresky 2 | CC0 License. 3 | -------------------------------------------------------------------------------- /load/lights/bsdf_256_256.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/lights/bsdf_256_256.bin -------------------------------------------------------------------------------- /load/lights/mud_road_puresky_1k.hdr: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/lights/mud_road_puresky_1k.hdr -------------------------------------------------------------------------------- /load/shapes/README.md: -------------------------------------------------------------------------------- 1 | # Shape Credits 2 | 3 | - `animal.obj` - Ido Richardson 4 | - `hand_prismatic.obj` - Ido Richardson 5 | - `potion.obj` - Ido Richardson 6 | - `blub.obj` - [Keenan's 3D Model Repository](https://www.cs.cmu.edu/~kmcrane/Projects/ModelRepository/) 7 | - `nascar.obj` - [Princeton ModelNet](https://modelnet.cs.princeton.edu/) 8 | - `cabin.obj` - [Princeton ModelNet](https://modelnet.cs.princeton.edu/) 9 | - `teddy.obj` - [Gal Metzer](https://galmetzer.github.io/) 10 | - `human.obj` - [TurboSquid](https://www.turbosquid.com/3d-models/3d-model-character-base/524860) 11 | -------------------------------------------------------------------------------- /load/tets/128_tets.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/tets/128_tets.npz -------------------------------------------------------------------------------- /load/tets/32_tets.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/tets/32_tets.npz -------------------------------------------------------------------------------- /load/tets/64_tets.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ImageDream/26c3972e586f0c8d2f6c6b297aa9d792d06abebb/load/tets/64_tets.npz -------------------------------------------------------------------------------- /load/tets/generate_tets.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION, its affiliates and licensors retain all intellectual 4 | # property and proprietary rights in and to this material, related 5 | # documentation and any modifications thereto. Any use, reproduction, 6 | # disclosure or distribution of this material and related documentation 7 | # without an express license agreement from NVIDIA CORPORATION or 8 | # its affiliates is strictly prohibited. 9 | 10 | import os 11 | 12 | import numpy as np 13 | 14 | """ 15 | This code segment shows how to use Quartet: https://github.com/crawforddoran/quartet, 16 | to generate a tet grid 17 | 1) Download, compile and run Quartet as described in the link above. Example usage `quartet meshes/cube.obj 0.5 cube_5.tet` 18 | 2) Run the function below to generate a file `cube_32_tet.tet` 19 | """ 20 | 21 | 22 | def generate_tetrahedron_grid_file(res=32, root=".."): 23 | frac = 1.0 / res 24 | command = f"cd {root}; ./quartet meshes/cube.obj {frac} meshes/cube_{res}_tet.tet -s meshes/cube_boundary_{res}.obj" 25 | os.system(command) 26 | 27 | 28 | """ 29 | This code segment shows how to convert from a quartet .tet file to compressed npz file 30 | """ 31 | 32 | 33 | def convert_from_quartet_to_npz(quartetfile="cube_32_tet.tet", npzfile="32_tets"): 34 | file1 = open(quartetfile, "r") 35 | header = file1.readline() 36 | numvertices = int(header.split(" ")[1]) 37 | numtets = int(header.split(" ")[2]) 38 | print(numvertices, numtets) 39 | 40 | # load vertices 41 | vertices = np.loadtxt(quartetfile, skiprows=1, max_rows=numvertices) 42 | print(vertices.shape) 43 | 44 | # load indices 45 | indices = np.loadtxt( 46 | quartetfile, dtype=int, skiprows=1 + numvertices, max_rows=numtets 47 | ) 48 | print(indices.shape) 49 | 50 | np.savez_compressed(npzfile, vertices=vertices, indices=indices) 51 | 52 | 53 | root = "/home/gyc/quartet" 54 | for res in [300, 350, 400]: 55 | generate_tetrahedron_grid_file(res, root) 56 | convert_from_quartet_to_npz( 57 | os.path.join(root, f"meshes/cube_{res}_tet.tet"), npzfile=f"{res}_tets" 58 | ) 59 | -------------------------------------------------------------------------------- /load/zero123/download.sh: -------------------------------------------------------------------------------- 1 | wget https://huggingface.co/cvlab/zero123-weights/resolve/main/105000.ckpt 2 | -------------------------------------------------------------------------------- /load/zero123/sd-objaverse-finetune-c_concat-256.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | base_learning_rate: 1.0e-04 3 | target: extern.ldm_zero123.models.diffusion.ddpm.LatentDiffusion 4 | params: 5 | linear_start: 0.00085 6 | linear_end: 0.0120 7 | num_timesteps_cond: 1 8 | log_every_t: 200 9 | timesteps: 1000 10 | first_stage_key: "image_target" 11 | cond_stage_key: "image_cond" 12 | image_size: 32 13 | channels: 4 14 | cond_stage_trainable: false # Note: different from the one we trained before 15 | conditioning_key: hybrid 16 | monitor: val/loss_simple_ema 17 | scale_factor: 0.18215 18 | 19 | scheduler_config: # 10000 warmup steps 20 | target: extern.ldm_zero123.lr_scheduler.LambdaLinearScheduler 21 | params: 22 | warm_up_steps: [ 100 ] 23 | cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases 24 | f_start: [ 1.e-6 ] 25 | f_max: [ 1. ] 26 | f_min: [ 1. ] 27 | 28 | unet_config: 29 | target: extern.ldm_zero123.modules.diffusionmodules.openaimodel.UNetModel 30 | params: 31 | image_size: 32 # unused 32 | in_channels: 8 33 | out_channels: 4 34 | model_channels: 320 35 | attention_resolutions: [ 4, 2, 1 ] 36 | num_res_blocks: 2 37 | channel_mult: [ 1, 2, 4, 4 ] 38 | num_heads: 8 39 | use_spatial_transformer: True 40 | transformer_depth: 1 41 | context_dim: 768 42 | use_checkpoint: True 43 | legacy: False 44 | 45 | first_stage_config: 46 | target: extern.ldm_zero123.models.autoencoder.AutoencoderKL 47 | params: 48 | embed_dim: 4 49 | monitor: val/rec_loss 50 | ddconfig: 51 | double_z: true 52 | z_channels: 4 53 | resolution: 256 54 | in_channels: 3 55 | out_ch: 3 56 | ch: 128 57 | ch_mult: 58 | - 1 59 | - 2 60 | - 4 61 | - 4 62 | num_res_blocks: 2 63 | attn_resolutions: [] 64 | dropout: 0.0 65 | lossconfig: 66 | target: torch.nn.Identity 67 | 68 | cond_stage_config: 69 | target: extern.ldm_zero123.modules.encoders.modules.FrozenCLIPImageEmbedder 70 | 71 | 72 | # data: 73 | # target: extern.ldm_zero123.data.simple.ObjaverseDataModuleFromConfig 74 | # params: 75 | # root_dir: 'views_whole_sphere' 76 | # batch_size: 192 77 | # num_workers: 16 78 | # total_view: 4 79 | # train: 80 | # validation: False 81 | # image_transforms: 82 | # size: 256 83 | 84 | # validation: 85 | # validation: True 86 | # image_transforms: 87 | # size: 256 88 | 89 | 90 | # lightning: 91 | # find_unused_parameters: false 92 | # metrics_over_trainsteps_checkpoint: True 93 | # modelcheckpoint: 94 | # params: 95 | # every_n_train_steps: 5000 96 | # callbacks: 97 | # image_logger: 98 | # target: main.ImageLogger 99 | # params: 100 | # batch_frequency: 500 101 | # max_images: 32 102 | # increase_log_steps: False 103 | # log_first_step: True 104 | # log_images_kwargs: 105 | # use_ema_scope: False 106 | # inpaint: False 107 | # plot_progressive_rows: False 108 | # plot_diffusion_rows: False 109 | # N: 32 110 | # unconditional_scale: 3.0 111 | # unconditional_label: [""] 112 | 113 | # trainer: 114 | # benchmark: True 115 | # val_check_interval: 5000000 # really sorry 116 | # num_sanity_val_steps: 0 117 | # accumulate_grad_batches: 1 118 | -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- 1 | black 2 | mypy 3 | pylint 4 | pre-commit 5 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | lightning==2.0.0 2 | omegaconf==2.3.0 3 | jaxtyping 4 | typeguard 5 | git+https://github.com/KAIR-BAIR/nerfacc.git@v0.5.2 6 | git+https://github.com/NVlabs/tiny-cuda-nn/#subdirectory=bindings/torch 7 | diffusers 8 | transformers 9 | accelerate 10 | opencv-python 11 | tensorboard 12 | matplotlib 13 | imageio>=2.28.0 14 | imageio[ffmpeg] 15 | git+https://github.com/NVlabs/nvdiffrast.git 16 | libigl 17 | xatlas 18 | trimesh[easy] 19 | networkx 20 | pysdf 21 | PyMCubes 22 | wandb 23 | gradio 24 | git+https://github.com/ashawkey/envlight.git 25 | torchmetrics 26 | 27 | # deepfloyd 28 | xformers 29 | bitsandbytes 30 | sentencepiece 31 | safetensors 32 | huggingface_hub 33 | 34 | # for zero123 35 | einops 36 | kornia 37 | taming-transformers-rom1504 38 | git+https://github.com/openai/CLIP.git 39 | 40 | #controlnet 41 | controlnet_aux 42 | 43 | # imagedream 44 | open-clip-torch==2.7.0 -------------------------------------------------------------------------------- /threestudio/__init__.py: -------------------------------------------------------------------------------- 1 | __modules__ = {} 2 | 3 | 4 | def register(name): 5 | def decorator(cls): 6 | __modules__[name] = cls 7 | return cls 8 | 9 | return decorator 10 | 11 | 12 | def find(name): 13 | return __modules__[name] 14 | 15 | 16 | ### grammar sugar for logging utilities ### 17 | import logging 18 | 19 | logger = logging.getLogger("pytorch_lightning") 20 | 21 | from pytorch_lightning.utilities.rank_zero import ( 22 | rank_zero_debug, 23 | rank_zero_info, 24 | rank_zero_only, 25 | ) 26 | 27 | debug = rank_zero_debug 28 | info = rank_zero_info 29 | 30 | 31 | @rank_zero_only 32 | def warn(*args, **kwargs): 33 | logger.warn(*args, **kwargs) 34 | 35 | 36 | from . import data, models, systems 37 | -------------------------------------------------------------------------------- /threestudio/data/__init__.py: -------------------------------------------------------------------------------- 1 | from . import co3d, image, multiview, uncond, random_multiview 2 | -------------------------------------------------------------------------------- /threestudio/models/__init__.py: -------------------------------------------------------------------------------- 1 | from . import ( 2 | background, 3 | exporters, 4 | geometry, 5 | guidance, 6 | materials, 7 | prompt_processors, 8 | renderers, 9 | ) 10 | -------------------------------------------------------------------------------- /threestudio/models/background/__init__.py: -------------------------------------------------------------------------------- 1 | from . import ( 2 | base, 3 | neural_environment_map_background, 4 | solid_color_background, 5 | textured_background, 6 | ) 7 | -------------------------------------------------------------------------------- /threestudio/models/background/base.py: -------------------------------------------------------------------------------- 1 | import random 2 | from dataclasses import dataclass, field 3 | 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | 8 | import threestudio 9 | from threestudio.utils.base import BaseModule 10 | from threestudio.utils.typing import * 11 | 12 | 13 | class BaseBackground(BaseModule): 14 | @dataclass 15 | class Config(BaseModule.Config): 16 | pass 17 | 18 | cfg: Config 19 | 20 | def configure(self): 21 | pass 22 | 23 | def forward(self, dirs: Float[Tensor, "B H W 3"]) -> Float[Tensor, "B H W Nc"]: 24 | raise NotImplementedError 25 | -------------------------------------------------------------------------------- /threestudio/models/background/neural_environment_map_background.py: -------------------------------------------------------------------------------- 1 | import random 2 | from dataclasses import dataclass, field 3 | 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | 8 | import threestudio 9 | from threestudio.models.background.base import BaseBackground 10 | from threestudio.models.networks import get_encoding, get_mlp 11 | from threestudio.utils.ops import get_activation 12 | from threestudio.utils.typing import * 13 | 14 | 15 | @threestudio.register("neural-environment-map-background") 16 | class NeuralEnvironmentMapBackground(BaseBackground): 17 | @dataclass 18 | class Config(BaseBackground.Config): 19 | n_output_dims: int = 3 20 | color_activation: str = "sigmoid" 21 | dir_encoding_config: dict = field( 22 | default_factory=lambda: {"otype": "SphericalHarmonics", "degree": 3} 23 | ) 24 | mlp_network_config: dict = field( 25 | default_factory=lambda: { 26 | "otype": "VanillaMLP", 27 | "activation": "ReLU", 28 | "n_neurons": 16, 29 | "n_hidden_layers": 2, 30 | } 31 | ) 32 | random_aug: bool = False 33 | random_aug_prob: float = 0.5 34 | share_aug_bg: bool = False 35 | eval_color: Optional[Tuple[float, float, float]] = None 36 | 37 | cfg: Config 38 | 39 | def configure(self) -> None: 40 | self.encoding = get_encoding(3, self.cfg.dir_encoding_config) 41 | self.network = get_mlp( 42 | self.encoding.n_output_dims, 43 | self.cfg.n_output_dims, 44 | self.cfg.mlp_network_config, 45 | ) 46 | 47 | def forward(self, dirs: Float[Tensor, "B H W 3"]) -> Float[Tensor, "B H W Nc"]: 48 | if not self.training and self.cfg.eval_color is not None: 49 | return torch.ones(*dirs.shape[:-1], self.cfg.n_output_dims).to( 50 | dirs 51 | ) * torch.as_tensor(self.cfg.eval_color).to(dirs) 52 | # viewdirs must be normalized before passing to this function 53 | dirs = (dirs + 1.0) / 2.0 # (-1, 1) => (0, 1) 54 | dirs_embd = self.encoding(dirs.view(-1, 3)) 55 | color = self.network(dirs_embd).view(*dirs.shape[:-1], self.cfg.n_output_dims) 56 | color = get_activation(self.cfg.color_activation)(color) 57 | if ( 58 | self.training 59 | and self.cfg.random_aug 60 | and random.random() < self.cfg.random_aug_prob 61 | ): 62 | # use random background color with probability random_aug_prob 63 | n_color = 1 if self.cfg.share_aug_bg else dirs.shape[0] 64 | color = color * 0 + ( # prevent checking for unused parameters in DDP 65 | torch.rand(n_color, 1, 1, self.cfg.n_output_dims) 66 | .to(dirs) 67 | .expand(*dirs.shape[:-1], -1) 68 | ) 69 | return color 70 | -------------------------------------------------------------------------------- /threestudio/models/background/solid_color_background.py: -------------------------------------------------------------------------------- 1 | import random 2 | from dataclasses import dataclass, field 3 | 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | 8 | import threestudio 9 | from threestudio.models.background.base import BaseBackground 10 | from threestudio.utils.typing import * 11 | 12 | 13 | @threestudio.register("solid-color-background") 14 | class SolidColorBackground(BaseBackground): 15 | @dataclass 16 | class Config(BaseBackground.Config): 17 | n_output_dims: int = 3 18 | color: Tuple = (1.0, 1.0, 1.0) 19 | learned: bool = False 20 | random_aug: bool = False 21 | random_aug_prob: float = 0.5 22 | 23 | cfg: Config 24 | 25 | def configure(self) -> None: 26 | self.env_color: Float[Tensor, "Nc"] 27 | if self.cfg.learned: 28 | self.env_color = nn.Parameter( 29 | torch.as_tensor(self.cfg.color, dtype=torch.float32) 30 | ) 31 | else: 32 | self.register_buffer( 33 | "env_color", torch.as_tensor(self.cfg.color, dtype=torch.float32) 34 | ) 35 | 36 | def forward(self, dirs: Float[Tensor, "B H W 3"]) -> Float[Tensor, "B H W Nc"]: 37 | color = ( 38 | torch.ones(*dirs.shape[:-1], self.cfg.n_output_dims).to(dirs) 39 | * self.env_color 40 | ) 41 | if ( 42 | self.training 43 | and self.cfg.random_aug 44 | and random.random() < self.cfg.random_aug_prob 45 | ): 46 | # use random background color with probability random_aug_prob 47 | color = color * 0 + ( # prevent checking for unused parameters in DDP 48 | torch.rand(dirs.shape[0], 1, 1, self.cfg.n_output_dims) 49 | .to(dirs) 50 | .expand(*dirs.shape[:-1], -1) 51 | ) 52 | return color 53 | -------------------------------------------------------------------------------- /threestudio/models/background/textured_background.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass, field 2 | 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | 7 | import threestudio 8 | from threestudio.models.background.base import BaseBackground 9 | from threestudio.utils.ops import get_activation 10 | from threestudio.utils.typing import * 11 | 12 | 13 | @threestudio.register("textured-background") 14 | class TexturedBackground(BaseBackground): 15 | @dataclass 16 | class Config(BaseBackground.Config): 17 | n_output_dims: int = 3 18 | height: int = 64 19 | width: int = 64 20 | color_activation: str = "sigmoid" 21 | 22 | cfg: Config 23 | 24 | def configure(self) -> None: 25 | self.texture = nn.Parameter( 26 | torch.randn((1, self.cfg.n_output_dims, self.cfg.height, self.cfg.width)) 27 | ) 28 | 29 | def spherical_xyz_to_uv(self, dirs: Float[Tensor, "*B 3"]) -> Float[Tensor, "*B 2"]: 30 | x, y, z = dirs[..., 0], dirs[..., 1], dirs[..., 2] 31 | xy = (x**2 + y**2) ** 0.5 32 | u = torch.atan2(xy, z) / torch.pi 33 | v = torch.atan2(y, x) / (torch.pi * 2) + 0.5 34 | uv = torch.stack([u, v], -1) 35 | return uv 36 | 37 | def forward(self, dirs: Float[Tensor, "*B 3"]) -> Float[Tensor, "*B Nc"]: 38 | dirs_shape = dirs.shape[:-1] 39 | uv = self.spherical_xyz_to_uv(dirs.reshape(-1, dirs.shape[-1])) 40 | uv = 2 * uv - 1 # rescale to [-1, 1] for grid_sample 41 | uv = uv.reshape(1, -1, 1, 2) 42 | color = ( 43 | F.grid_sample( 44 | self.texture, 45 | uv, 46 | mode="bilinear", 47 | padding_mode="reflection", 48 | align_corners=False, 49 | ) 50 | .reshape(self.cfg.n_output_dims, -1) 51 | .T.reshape(*dirs_shape, self.cfg.n_output_dims) 52 | ) 53 | color = get_activation(self.cfg.color_activation)(color) 54 | return color 55 | -------------------------------------------------------------------------------- /threestudio/models/exporters/__init__.py: -------------------------------------------------------------------------------- 1 | from . import base, mesh_exporter 2 | -------------------------------------------------------------------------------- /threestudio/models/exporters/base.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | 3 | import threestudio 4 | from threestudio.models.background.base import BaseBackground 5 | from threestudio.models.geometry.base import BaseImplicitGeometry 6 | from threestudio.models.materials.base import BaseMaterial 7 | from threestudio.utils.base import BaseObject 8 | from threestudio.utils.typing import * 9 | 10 | 11 | @dataclass 12 | class ExporterOutput: 13 | save_name: str 14 | save_type: str 15 | params: Dict[str, Any] 16 | 17 | 18 | class Exporter(BaseObject): 19 | @dataclass 20 | class Config(BaseObject.Config): 21 | save_video: bool = False 22 | 23 | cfg: Config 24 | 25 | def configure( 26 | self, 27 | geometry: BaseImplicitGeometry, 28 | material: BaseMaterial, 29 | background: BaseBackground, 30 | ) -> None: 31 | @dataclass 32 | class SubModules: 33 | geometry: BaseImplicitGeometry 34 | material: BaseMaterial 35 | background: BaseBackground 36 | 37 | self.sub_modules = SubModules(geometry, material, background) 38 | 39 | @property 40 | def geometry(self) -> BaseImplicitGeometry: 41 | return self.sub_modules.geometry 42 | 43 | @property 44 | def material(self) -> BaseMaterial: 45 | return self.sub_modules.material 46 | 47 | @property 48 | def background(self) -> BaseBackground: 49 | return self.sub_modules.background 50 | 51 | def __call__(self, *args, **kwargs) -> List[ExporterOutput]: 52 | raise NotImplementedError 53 | 54 | 55 | @threestudio.register("dummy-exporter") 56 | class DummyExporter(Exporter): 57 | def __call__(self, *args, **kwargs) -> List[ExporterOutput]: 58 | # DummyExporter does not export anything 59 | return [] 60 | -------------------------------------------------------------------------------- /threestudio/models/geometry/__init__.py: -------------------------------------------------------------------------------- 1 | from . import base, implicit_sdf, implicit_volume, tetrahedra_sdf_grid, volume_grid 2 | -------------------------------------------------------------------------------- /threestudio/models/guidance/__init__.py: -------------------------------------------------------------------------------- 1 | from . import ( 2 | controlnet_guidance, 3 | deep_floyd_guidance, 4 | instructpix2pix_guidance, 5 | stable_diffusion_guidance, 6 | stable_diffusion_vsd_guidance, 7 | zero123_guidance, 8 | multiview_diffusion_guidance, 9 | ) 10 | -------------------------------------------------------------------------------- /threestudio/models/materials/__init__.py: -------------------------------------------------------------------------------- 1 | from . import ( 2 | base, 3 | diffuse_with_point_light_material, 4 | hybrid_rgb_latent_material, 5 | neural_radiance_material, 6 | no_material, 7 | pbr_material, 8 | sd_latent_adapter_material, 9 | ) 10 | -------------------------------------------------------------------------------- /threestudio/models/materials/base.py: -------------------------------------------------------------------------------- 1 | import random 2 | from dataclasses import dataclass, field 3 | 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | 8 | import threestudio 9 | from threestudio.utils.base import BaseModule 10 | from threestudio.utils.typing import * 11 | 12 | 13 | class BaseMaterial(BaseModule): 14 | @dataclass 15 | class Config(BaseModule.Config): 16 | pass 17 | 18 | cfg: Config 19 | requires_normal: bool = False 20 | requires_tangent: bool = False 21 | 22 | def configure(self): 23 | pass 24 | 25 | def forward(self, *args, **kwargs) -> Float[Tensor, "*B 3"]: 26 | raise NotImplementedError 27 | 28 | def export(self, *args, **kwargs) -> Dict[str, Any]: 29 | return {} 30 | -------------------------------------------------------------------------------- /threestudio/models/materials/hybrid_rgb_latent_material.py: -------------------------------------------------------------------------------- 1 | import random 2 | from dataclasses import dataclass, field 3 | 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | 8 | import threestudio 9 | from threestudio.models.materials.base import BaseMaterial 10 | from threestudio.models.networks import get_encoding, get_mlp 11 | from threestudio.utils.ops import dot, get_activation 12 | from threestudio.utils.typing import * 13 | 14 | 15 | @threestudio.register("hybrid-rgb-latent-material") 16 | class HybridRGBLatentMaterial(BaseMaterial): 17 | @dataclass 18 | class Config(BaseMaterial.Config): 19 | n_output_dims: int = 3 20 | color_activation: str = "sigmoid" 21 | requires_normal: bool = True 22 | 23 | cfg: Config 24 | 25 | def configure(self) -> None: 26 | self.requires_normal = self.cfg.requires_normal 27 | 28 | def forward( 29 | self, features: Float[Tensor, "B ... Nf"], **kwargs 30 | ) -> Float[Tensor, "B ... Nc"]: 31 | assert ( 32 | features.shape[-1] == self.cfg.n_output_dims 33 | ), f"Expected {self.cfg.n_output_dims} output dims, only got {features.shape[-1]} dims input." 34 | color = features 35 | color[..., :3] = get_activation(self.cfg.color_activation)(color[..., :3]) 36 | return color 37 | -------------------------------------------------------------------------------- /threestudio/models/materials/neural_radiance_material.py: -------------------------------------------------------------------------------- 1 | import random 2 | from dataclasses import dataclass, field 3 | 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | 8 | import threestudio 9 | from threestudio.models.materials.base import BaseMaterial 10 | from threestudio.models.networks import get_encoding, get_mlp 11 | from threestudio.utils.ops import dot, get_activation 12 | from threestudio.utils.typing import * 13 | 14 | 15 | @threestudio.register("neural-radiance-material") 16 | class NeuralRadianceMaterial(BaseMaterial): 17 | @dataclass 18 | class Config(BaseMaterial.Config): 19 | input_feature_dims: int = 8 20 | color_activation: str = "sigmoid" 21 | dir_encoding_config: dict = field( 22 | default_factory=lambda: {"otype": "SphericalHarmonics", "degree": 3} 23 | ) 24 | mlp_network_config: dict = field( 25 | default_factory=lambda: { 26 | "otype": "FullyFusedMLP", 27 | "activation": "ReLU", 28 | "n_neurons": 16, 29 | "n_hidden_layers": 2, 30 | } 31 | ) 32 | 33 | cfg: Config 34 | 35 | def configure(self) -> None: 36 | self.encoding = get_encoding(3, self.cfg.dir_encoding_config) 37 | self.n_input_dims = self.cfg.input_feature_dims + self.encoding.n_output_dims # type: ignore 38 | self.network = get_mlp(self.n_input_dims, 3, self.cfg.mlp_network_config) 39 | 40 | def forward( 41 | self, 42 | features: Float[Tensor, "*B Nf"], 43 | viewdirs: Float[Tensor, "*B 3"], 44 | **kwargs, 45 | ) -> Float[Tensor, "*B 3"]: 46 | # viewdirs and normals must be normalized before passing to this function 47 | viewdirs = (viewdirs + 1.0) / 2.0 # (-1, 1) => (0, 1) 48 | viewdirs_embd = self.encoding(viewdirs.view(-1, 3)) 49 | network_inp = torch.cat( 50 | [features.view(-1, features.shape[-1]), viewdirs_embd], dim=-1 51 | ) 52 | color = self.network(network_inp).view(*features.shape[:-1], 3) 53 | color = get_activation(self.cfg.color_activation)(color) 54 | return color 55 | -------------------------------------------------------------------------------- /threestudio/models/materials/no_material.py: -------------------------------------------------------------------------------- 1 | import random 2 | from dataclasses import dataclass, field 3 | 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | 8 | import threestudio 9 | from threestudio.models.materials.base import BaseMaterial 10 | from threestudio.models.networks import get_encoding, get_mlp 11 | from threestudio.utils.ops import dot, get_activation 12 | from threestudio.utils.typing import * 13 | 14 | 15 | @threestudio.register("no-material") 16 | class NoMaterial(BaseMaterial): 17 | @dataclass 18 | class Config(BaseMaterial.Config): 19 | n_output_dims: int = 3 20 | color_activation: str = "sigmoid" 21 | input_feature_dims: Optional[int] = None 22 | mlp_network_config: Optional[dict] = None 23 | 24 | cfg: Config 25 | 26 | def configure(self) -> None: 27 | self.use_network = False 28 | if ( 29 | self.cfg.input_feature_dims is not None 30 | and self.cfg.mlp_network_config is not None 31 | ): 32 | self.network = get_mlp( 33 | self.cfg.input_feature_dims, 34 | self.cfg.n_output_dims, 35 | self.cfg.mlp_network_config, 36 | ) 37 | self.use_network = True 38 | 39 | def forward( 40 | self, features: Float[Tensor, "B ... Nf"], **kwargs 41 | ) -> Float[Tensor, "B ... Nc"]: 42 | if not self.use_network: 43 | assert ( 44 | features.shape[-1] == self.cfg.n_output_dims 45 | ), f"Expected {self.cfg.n_output_dims} output dims, only got {features.shape[-1]} dims input." 46 | color = get_activation(self.cfg.color_activation)(features) 47 | else: 48 | color = self.network(features.view(-1, features.shape[-1])).view( 49 | *features.shape[:-1], self.cfg.n_output_dims 50 | ) 51 | color = get_activation(self.cfg.color_activation)(color) 52 | return color 53 | 54 | def export(self, features: Float[Tensor, "*N Nf"], **kwargs) -> Dict[str, Any]: 55 | color = self(features, **kwargs).clamp(0, 1) 56 | assert color.shape[-1] >= 3, "Output color must have at least 3 channels" 57 | if color.shape[-1] > 3: 58 | threestudio.warn( 59 | "Output color has >3 channels, treating the first 3 as RGB" 60 | ) 61 | return {"albedo": color[..., :3]} 62 | -------------------------------------------------------------------------------- /threestudio/models/materials/sd_latent_adapter_material.py: -------------------------------------------------------------------------------- 1 | import random 2 | from dataclasses import dataclass, field 3 | 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | 8 | import threestudio 9 | from threestudio.models.materials.base import BaseMaterial 10 | from threestudio.utils.typing import * 11 | 12 | 13 | @threestudio.register("sd-latent-adapter-material") 14 | class StableDiffusionLatentAdapterMaterial(BaseMaterial): 15 | @dataclass 16 | class Config(BaseMaterial.Config): 17 | pass 18 | 19 | cfg: Config 20 | 21 | def configure(self) -> None: 22 | adapter = nn.Parameter( 23 | torch.as_tensor( 24 | [ 25 | # R G B 26 | [0.298, 0.207, 0.208], # L1 27 | [0.187, 0.286, 0.173], # L2 28 | [-0.158, 0.189, 0.264], # L3 29 | [-0.184, -0.271, -0.473], # L4 30 | ] 31 | ) 32 | ) 33 | self.register_parameter("adapter", adapter) 34 | 35 | def forward( 36 | self, features: Float[Tensor, "B ... 4"], **kwargs 37 | ) -> Float[Tensor, "B ... 3"]: 38 | assert features.shape[-1] == 4 39 | color = features @ self.adapter 40 | color = (color + 1) / 2 41 | color = color.clamp(0.0, 1.0) 42 | return color 43 | -------------------------------------------------------------------------------- /threestudio/models/prompt_processors/__init__.py: -------------------------------------------------------------------------------- 1 | from . import ( 2 | base, 3 | deepfloyd_prompt_processor, 4 | dummy_prompt_processor, 5 | stable_diffusion_prompt_processor, 6 | ) 7 | -------------------------------------------------------------------------------- /threestudio/models/prompt_processors/deepfloyd_prompt_processor.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | from dataclasses import dataclass 4 | 5 | import torch 6 | import torch.nn as nn 7 | from diffusers import IFPipeline 8 | from transformers import T5EncoderModel, T5Tokenizer 9 | 10 | import threestudio 11 | from threestudio.models.prompt_processors.base import PromptProcessor, hash_prompt 12 | from threestudio.utils.misc import cleanup 13 | from threestudio.utils.typing import * 14 | 15 | 16 | @threestudio.register("deep-floyd-prompt-processor") 17 | class DeepFloydPromptProcessor(PromptProcessor): 18 | @dataclass 19 | class Config(PromptProcessor.Config): 20 | pretrained_model_name_or_path: str = "DeepFloyd/IF-I-XL-v1.0" 21 | 22 | cfg: Config 23 | 24 | ### these functions are unused, kept for debugging ### 25 | def configure_text_encoder(self) -> None: 26 | os.environ["TOKENIZERS_PARALLELISM"] = "false" 27 | self.text_encoder = T5EncoderModel.from_pretrained( 28 | self.cfg.pretrained_model_name_or_path, 29 | subfolder="text_encoder", 30 | load_in_8bit=True, 31 | variant="8bit", 32 | device_map="auto", 33 | ) # FIXME: behavior of auto device map in multi-GPU training 34 | self.pipe = IFPipeline.from_pretrained( 35 | self.cfg.pretrained_model_name_or_path, 36 | text_encoder=self.text_encoder, # pass the previously instantiated 8bit text encoder 37 | unet=None, 38 | ) 39 | 40 | def destroy_text_encoder(self) -> None: 41 | del self.text_encoder 42 | del self.pipe 43 | cleanup() 44 | 45 | def get_text_embeddings( 46 | self, prompt: Union[str, List[str]], negative_prompt: Union[str, List[str]] 47 | ) -> Tuple[Float[Tensor, "B 77 4096"], Float[Tensor, "B 77 4096"]]: 48 | text_embeddings, uncond_text_embeddings = self.pipe.encode_prompt( 49 | prompt=prompt, negative_prompt=negative_prompt, device=self.device 50 | ) 51 | return text_embeddings, uncond_text_embeddings 52 | 53 | ### 54 | 55 | @staticmethod 56 | def spawn_func(pretrained_model_name_or_path, prompts, cache_dir): 57 | max_length = 77 58 | tokenizer = T5Tokenizer.from_pretrained( 59 | pretrained_model_name_or_path, subfolder="tokenizer" 60 | ) 61 | text_encoder = T5EncoderModel.from_pretrained( 62 | pretrained_model_name_or_path, 63 | subfolder="text_encoder", 64 | torch_dtype=torch.float16, # suppress warning 65 | load_in_8bit=True, 66 | variant="8bit", 67 | device_map="auto", 68 | ) 69 | with torch.no_grad(): 70 | text_inputs = tokenizer( 71 | prompts, 72 | padding="max_length", 73 | max_length=max_length, 74 | truncation=True, 75 | add_special_tokens=True, 76 | return_tensors="pt", 77 | ) 78 | text_input_ids = text_inputs.input_ids 79 | attention_mask = text_inputs.attention_mask 80 | text_embeddings = text_encoder( 81 | text_input_ids, 82 | attention_mask=attention_mask, 83 | ) 84 | text_embeddings = text_embeddings[0] 85 | 86 | for prompt, embedding in zip(prompts, text_embeddings): 87 | torch.save( 88 | embedding, 89 | os.path.join( 90 | cache_dir, 91 | f"{hash_prompt(pretrained_model_name_or_path, prompt)}.pt", 92 | ), 93 | ) 94 | 95 | del text_encoder 96 | -------------------------------------------------------------------------------- /threestudio/models/prompt_processors/dummy_prompt_processor.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | from dataclasses import dataclass 4 | 5 | import threestudio 6 | from threestudio.models.prompt_processors.base import PromptProcessor, hash_prompt 7 | from threestudio.utils.misc import cleanup 8 | from threestudio.utils.typing import * 9 | 10 | 11 | @threestudio.register("dummy-prompt-processor") 12 | class DummyPromptProcessor(PromptProcessor): 13 | @dataclass 14 | class Config(PromptProcessor.Config): 15 | pretrained_model_name_or_path: str = "" 16 | prompt: str = "" 17 | 18 | cfg: Config 19 | -------------------------------------------------------------------------------- /threestudio/models/renderers/__init__.py: -------------------------------------------------------------------------------- 1 | from . import ( 2 | base, 3 | deferred_volume_renderer, 4 | gan_volume_renderer, 5 | nerf_volume_renderer, 6 | neus_volume_renderer, 7 | nvdiff_rasterizer, 8 | patch_renderer, 9 | ) 10 | -------------------------------------------------------------------------------- /threestudio/models/renderers/base.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | 3 | import nerfacc 4 | import torch 5 | import torch.nn.functional as F 6 | 7 | import threestudio 8 | from threestudio.models.background.base import BaseBackground 9 | from threestudio.models.geometry.base import BaseImplicitGeometry 10 | from threestudio.models.materials.base import BaseMaterial 11 | from threestudio.utils.base import BaseModule 12 | from threestudio.utils.typing import * 13 | 14 | 15 | class Renderer(BaseModule): 16 | @dataclass 17 | class Config(BaseModule.Config): 18 | radius: float = 1.0 19 | 20 | cfg: Config 21 | 22 | def configure( 23 | self, 24 | geometry: BaseImplicitGeometry, 25 | material: BaseMaterial, 26 | background: BaseBackground, 27 | ) -> None: 28 | # keep references to submodules using namedtuple, avoid being registered as modules 29 | @dataclass 30 | class SubModules: 31 | geometry: BaseImplicitGeometry 32 | material: BaseMaterial 33 | background: BaseBackground 34 | 35 | self.sub_modules = SubModules(geometry, material, background) 36 | 37 | # set up bounding box 38 | self.bbox: Float[Tensor, "2 3"] 39 | self.register_buffer( 40 | "bbox", 41 | torch.as_tensor( 42 | [ 43 | [-self.cfg.radius, -self.cfg.radius, -self.cfg.radius], 44 | [self.cfg.radius, self.cfg.radius, self.cfg.radius], 45 | ], 46 | dtype=torch.float32, 47 | ), 48 | ) 49 | 50 | def forward(self, *args, **kwargs) -> Dict[str, Any]: 51 | raise NotImplementedError 52 | 53 | @property 54 | def geometry(self) -> BaseImplicitGeometry: 55 | return self.sub_modules.geometry 56 | 57 | @property 58 | def material(self) -> BaseMaterial: 59 | return self.sub_modules.material 60 | 61 | @property 62 | def background(self) -> BaseBackground: 63 | return self.sub_modules.background 64 | 65 | def set_geometry(self, geometry: BaseImplicitGeometry) -> None: 66 | self.sub_modules.geometry = geometry 67 | 68 | def set_material(self, material: BaseMaterial) -> None: 69 | self.sub_modules.material = material 70 | 71 | def set_background(self, background: BaseBackground) -> None: 72 | self.sub_modules.background = background 73 | 74 | 75 | class VolumeRenderer(Renderer): 76 | pass 77 | 78 | 79 | class Rasterizer(Renderer): 80 | pass 81 | -------------------------------------------------------------------------------- /threestudio/models/renderers/deferred_volume_renderer.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | 3 | import torch 4 | import torch.nn.functional as F 5 | 6 | import threestudio 7 | from threestudio.models.renderers.base import VolumeRenderer 8 | 9 | 10 | class DeferredVolumeRenderer(VolumeRenderer): 11 | pass 12 | -------------------------------------------------------------------------------- /threestudio/scripts/make_training_vid.py: -------------------------------------------------------------------------------- 1 | # make_training_vid("outputs/zero123/64_teddy_rgba.png@20230627-195615", frames_per_vid=30, fps=20, max_iters=200) 2 | import argparse 3 | import glob 4 | import os 5 | 6 | import imageio 7 | import numpy as np 8 | from PIL import Image, ImageDraw 9 | from tqdm import tqdm 10 | 11 | 12 | def draw_text_in_image(img, texts): 13 | img = Image.fromarray(img) 14 | draw = ImageDraw.Draw(img) 15 | black, white = (0, 0, 0), (255, 255, 255) 16 | for i, text in enumerate(texts): 17 | draw.text((2, (img.size[1] // len(texts)) * i + 1), f"{text}", white) 18 | draw.text((0, (img.size[1] // len(texts)) * i + 1), f"{text}", white) 19 | draw.text((2, (img.size[1] // len(texts)) * i - 1), f"{text}", white) 20 | draw.text((0, (img.size[1] // len(texts)) * i - 1), f"{text}", white) 21 | draw.text((1, (img.size[1] // len(texts)) * i), f"{text}", black) 22 | return np.asarray(img) 23 | 24 | 25 | def make_training_vid(exp, frames_per_vid=1, fps=3, max_iters=None, max_vids=None): 26 | # exp = "/admin/home-vikram/git/threestudio/outputs/zero123/64_teddy_rgba.png@20230627-195615" 27 | files = glob.glob(os.path.join(exp, "save", "*.mp4")) 28 | if os.path.join(exp, "save", "training_vid.mp4") in files: 29 | files.remove(os.path.join(exp, "save", "training_vid.mp4")) 30 | its = [int(os.path.basename(file).split("-")[0].split("it")[-1]) for file in files] 31 | it_sort = np.argsort(its) 32 | files = list(np.array(files)[it_sort]) 33 | its = list(np.array(its)[it_sort]) 34 | max_vids = max_iters // its[0] if max_iters is not None else max_vids 35 | files, its = files[:max_vids], its[:max_vids] 36 | frames, i = [], 0 37 | for it, file in tqdm(zip(its, files), total=len(files)): 38 | vid = imageio.mimread(file) 39 | for _ in range(frames_per_vid): 40 | frame = vid[i % len(vid)] 41 | frame = draw_text_in_image(frame, [str(it)]) 42 | frames.append(frame) 43 | i += 1 44 | # Save 45 | imageio.mimwrite(os.path.join(exp, "save", "training_vid.mp4"), frames, fps=fps) 46 | 47 | 48 | def join(file1, file2, name): 49 | # file1 = "/admin/home-vikram/git/threestudio/outputs/zero123/OLD_64_dragon2_rgba.png@20230629-023028/save/it200-val.mp4" 50 | # file2 = "/admin/home-vikram/git/threestudio/outputs/zero123/64_dragon2_rgba.png@20230628-152734/save/it200-val.mp4" 51 | vid1 = imageio.mimread(file1) 52 | vid2 = imageio.mimread(file2) 53 | frames = [] 54 | for f1, f2 in zip(vid1, vid2): 55 | frames.append( 56 | np.concatenate([f1[:, : f1.shape[0]], f2[:, : f2.shape[0]]], axis=1) 57 | ) 58 | imageio.mimwrite(name, frames) 59 | 60 | 61 | if __name__ == "__main__": 62 | parser = argparse.ArgumentParser() 63 | parser.add_argument("--exp", help="directory of experiment") 64 | parser.add_argument( 65 | "--frames_per_vid", type=int, default=1, help="# of frames from each val vid" 66 | ) 67 | parser.add_argument("--fps", type=int, help="max # of iters to save") 68 | parser.add_argument("--max_iters", type=int, help="max # of iters to save") 69 | parser.add_argument( 70 | "--max_vids", 71 | type=int, 72 | help="max # of val videos to save. Will be overridden by max_iters", 73 | ) 74 | args = parser.parse_args() 75 | make_training_vid( 76 | args.exp, args.frames_per_vid, args.fps, args.max_iters, args.max_vids 77 | ) 78 | -------------------------------------------------------------------------------- /threestudio/scripts/run_imagedream.sh: -------------------------------------------------------------------------------- 1 | export PYTHONPATH=$PYTHONPATH:./extern/ImageDream 2 | 3 | gpu=0 4 | method=imagedream-sd21-shading 5 | name="astronaut" 6 | prompt="an astronaut riding a horse" 7 | image_path="./extern/ImageDream/assets/astronaut.png" 8 | 9 | # for pixel [ImageDream-P] 10 | ckpt_path="./extern/ImageDream/release_models/ImageDream/sd-v2.1-base-4view-ipmv.pt" 11 | config_path="./extern/ImageDream/imagedream/configs/sd_v2_base_ipmv.yaml" 12 | python3 launch.py \ 13 | --config configs/$method.yaml \ 14 | --train \ 15 | --gpu $gpu \ 16 | name="${method}" \ 17 | tag=${name} \ 18 | system.prompt_processor.prompt="$prompt" \ 19 | system.prompt_processor.image_path="$image_path" \ 20 | system.guidance.ckpt_path="$ckpt_path" \ 21 | system.guidance.config_path="$config_path" 22 | 23 | # for local [ImageDream-G] 24 | ckpt_path="./extern/ImageDream/release_models/ImageDream/sd-v2.1-base-4view-ipmv-local.pt" 25 | config_path="./extern/ImageDream/imagedream/configs/sd_v2_base_ipmv_local.yaml" 26 | python3 launch.py \ 27 | --config configs/$method.yaml \ 28 | --train \ 29 | --gpu $gpu \ 30 | name="${method}" \ 31 | tag=${name} \ 32 | system.prompt_processor.prompt="$prompt" \ 33 | system.prompt_processor.image_path="$image_path" \ 34 | system.guidance.ckpt_path="$ckpt_path" \ 35 | system.guidance.config_path="$config_path" \ 36 | system.guidance.ip_mode="local" -------------------------------------------------------------------------------- /threestudio/scripts/run_zero123.sh: -------------------------------------------------------------------------------- 1 | NAME="dragon2" 2 | 3 | # Phase 1 - 64x64 4 | python launch.py --config configs/zero123.yaml --train --gpu 7 data.image_path=./load/images/${NAME}_rgba.png use_timestamp=False name=${NAME} tag=Phase1 # system.freq.guidance_eval=0 system.loggers.wandb.enable=false system.loggers.wandb.project="zero123" system.loggers.wandb.name=${NAME}_Phase1 5 | 6 | # Phase 1.5 - 512 refine 7 | python launch.py --config configs/zero123-geometry.yaml --train --gpu 4 data.image_path=./load/images/${NAME}_rgba.png system.geometry_convert_from=./outputs/${NAME}/Phase1/ckpts/last.ckpt use_timestamp=False name=${NAME} tag=Phase1p5 # system.freq.guidance_eval=0 system.loggers.wandb.enable=false system.loggers.wandb.project="zero123" system.loggers.wandb.name=${NAME}_Phase1p5 8 | 9 | # Phase 2 - dreamfusion 10 | python launch.py --config configs/experimental/imagecondition_zero123nerf.yaml --train --gpu 5 data.image_path=./load/images/${NAME}_rgba.png system.prompt_processor.prompt="A 3D model of a friendly dragon" system.weights="/admin/home-vikram/git/threestudio/outputs/${NAME}/Phase1/ckpts/last.ckpt" name=${NAME} tag=Phase2 # system.freq.guidance_eval=0 system.loggers.wandb.enable=false system.loggers.wandb.project="zero123" system.loggers.wandb.name=${NAME}_Phase2 11 | 12 | # Phase 2 - SDF + dreamfusion 13 | python launch.py --config configs/experimental/imagecondition_zero123nerf_refine.yaml --train --gpu 5 data.image_path=./load/images/${NAME}_rgba.png system.prompt_processor.prompt="A 3D model of a friendly dragon" system.geometry_convert_from="/admin/home-vikram/git/threestudio/outputs/${NAME}/Phase1/ckpts/last.ckpt" name=${NAME} tag=Phase2_refine # system.freq.guidance_eval=0 system.loggers.wandb.enable=false system.loggers.wandb.project="zero123" system.loggers.wandb.name=${NAME}_Phase2_refine 14 | -------------------------------------------------------------------------------- /threestudio/scripts/run_zero123_comparison.sh: -------------------------------------------------------------------------------- 1 | # with standard zero123 2 | threestudio/scripts/run_zero123_phase.sh 6 anya_front 105000 0 3 | 4 | # with zero123XL (not released yet!) 5 | threestudio/scripts/run_zero123_phase.sh 1 anya_front XL_20230604 0 6 | threestudio/scripts/run_zero123_phase.sh 2 baby_phoenix_on_ice XL_20230604 20 7 | threestudio/scripts/run_zero123_phase.sh 3 beach_house_1 XL_20230604 50 8 | threestudio/scripts/run_zero123_phase.sh 4 bollywood_actress XL_20230604 0 9 | threestudio/scripts/run_zero123_phase.sh 5 beach_house_2 XL_20230604 30 10 | threestudio/scripts/run_zero123_phase.sh 6 hamburger XL_20230604 10 11 | threestudio/scripts/run_zero123_phase.sh 7 cactus XL_20230604 8 12 | threestudio/scripts/run_zero123_phase.sh 0 catstatue XL_20230604 50 13 | threestudio/scripts/run_zero123_phase.sh 1 church_ruins XL_20230604 0 14 | threestudio/scripts/run_zero123_phase.sh 2 firekeeper XL_20230604 10 15 | threestudio/scripts/run_zero123_phase.sh 3 futuristic_car XL_20230604 20 16 | threestudio/scripts/run_zero123_phase.sh 4 mona_lisa XL_20230604 10 17 | threestudio/scripts/run_zero123_phase.sh 5 teddy XL_20230604 20 18 | 19 | # set guidance_eval to 0, to greatly speed up training 20 | threestudio/scripts/run_zero123_phase.sh 7 anya_front XL_20230604 0 system.freq.guidance_eval=0 21 | 22 | # disable wandb for faster training (or if you don't want to use it) 23 | threestudio/scripts/run_zero123_phase.sh 7 anya_front XL_20230604 0 system.loggers.wandb.enable=false system.freq.guidance_eval=0 24 | -------------------------------------------------------------------------------- /threestudio/scripts/run_zero123_phase.sh: -------------------------------------------------------------------------------- 1 | 2 | GPU_ID=$1 # e.g. 0 3 | IMAGE_PREFIX=$2 # e.g. "anya_front" 4 | ZERO123_PREFIX=$3 # e.g. "zero123-xl" 5 | ELEVATION=$4 # e.g. 0 6 | REST=${@:5:99} # e.g. "system.guidance.min_step_percent=0.1 system.guidance.max_step_percent=0.9" 7 | 8 | # change this config if you don't use wandb or want to speed up training 9 | python launch.py --config configs/zero123.yaml --train --gpu $GPU_ID system.loggers.wandb.enable=true system.loggers.wandb.project="claforte-noise_atten" \ 10 | system.loggers.wandb.name="${IMAGE_PREFIX}_zero123_${ZERO123_PREFIX}...fov20_${REST}" \ 11 | data.image_path=./load/images/${IMAGE_PREFIX}_rgba.png system.freq.guidance_eval=37 \ 12 | system.guidance.pretrained_model_name_or_path="./load/zero123/${ZERO123_PREFIX}.ckpt" \ 13 | system.guidance.cond_elevation_deg=$ELEVATION \ 14 | ${REST} 15 | -------------------------------------------------------------------------------- /threestudio/scripts/run_zero123_phase2.sh: -------------------------------------------------------------------------------- 1 | # Reconstruct Anya using latest Zero123XL, in <2000 steps. 2 | python launch.py --config configs/zero123.yaml --train --gpu 0 system.loggers.wandb.enable=true system.loggers.wandb.project="voletiv-anya-new" system.loggers.wandb.name="claforte_params" data.image_path=./load/images/anya_front_rgba.png system.freq.ref_or_zero123="accumulate" system.freq.guidance_eval=13 system.guidance.pretrained_model_name_or_path="./load/zero123/zero123-xl.ckpt" 3 | 4 | # PHASE 2 5 | python launch.py --config configs/experimental/imagecondition_zero123nerf.yaml --train --gpu 0 system.prompt_processor.prompt="A DSLR 3D photo of a cute anime schoolgirl stands proudly with her arms in the air, pink hair ( unreal engine 5 trending on Artstation Ghibli 4k )" system.weights=outputs/zero123/128_anya_front_rgba.png@20230623-145711/ckpts/last.ckpt system.freq.guidance_eval=13 system.loggers.wandb.enable=true system.loggers.wandb.project="voletiv-anya-new" data.image_path=./load/images/anya_front_rgba.png system.loggers.wandb.name="anya" data.random_camera.progressive_until=500 6 | -------------------------------------------------------------------------------- /threestudio/scripts/run_zero123_sbatch.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | 4 | files = [ 5 | "~/git/threestudio/load/images/dog1_rgba.png", 6 | "~/git/threestudio/load/images/dragon2_rgba.png", 7 | ] 8 | 9 | for file in files: 10 | name = os.path.basename(file).split("_rgba.png")[0] 11 | with open( 12 | os.path.expanduser("~/git/threestudio/threestudio/scripts/zero123_sbatch.sh"), 13 | "w", 14 | ) as f: 15 | f.write("#!/bin/bash\n") 16 | f.write(f"#SBATCH --job-name=vikky_{name}\n") 17 | f.write("#SBATCH --account=mod3d\n") 18 | f.write("#SBATCH --partition=g40\n") 19 | f.write("#SBATCH --gpus=1\n") 20 | f.write("#SBATCH --time=0-00:07:00\n") 21 | f.write("conda activate three\n") 22 | f.write("cd ~/git/threestudio/\n") 23 | f.write(f"NAME={name}\n") 24 | # Phase 1 25 | f.write( 26 | "python launch.py --config configs/zero123.yaml --train data.image_path=./load/images/${NAME}_rgba.png use_timestamp=true name=${NAME} tag=Phase1 system.loggers.wandb.enable=false system.loggers.wandb.project='zero123' system.loggers.wandb.name=${NAME}_Phase1\n" 27 | ) 28 | # # Phase 1.5 29 | # f.write( 30 | # "python launch.py --config configs/zero123-geometry.yaml --train data.image_path=./load/images/${NAME}_rgba.png system.geometry_convert_from=./outputs/${NAME}/Phase1/ckpts/last.ckpt use_timestamp=False name=${NAME} tag=Phase1p5 system.loggers.wandb.enable=true system.loggers.wandb.project='zero123' system.loggers.wandb.name=${NAME}_Phase1p5\n" 31 | # ) 32 | os.system("sbatch ~/git/threestudio/threestudio/scripts/zero123_sbatch.sh") 33 | time.sleep(1) 34 | -------------------------------------------------------------------------------- /threestudio/scripts/zero123_demo.py: -------------------------------------------------------------------------------- 1 | # 1. Generate using StableDiffusionXL https://clipdrop.co/stable-diffusion 2 | 3 | # 2. Remove background https://clipdrop.co/remove-background 4 | 5 | # 3. Resize to 512x512 https://www.iloveimg.com/resize-image 6 | 7 | # (OPTIONAL) 8 | # 4. Estimate depth and normal https://omnidata.vision/demo/ (I used Omnidata Normal (with X-TC & 3DCC), and MiDaS Depth) 9 | 10 | 11 | # (OPTIONAL) 12 | # 5. Convert depth image from RGB to greyscale 13 | def depth_rgb_to_grey(depth_filename): 14 | # depth_filename = "image_depth.png" 15 | import cv2 16 | import numpy as np 17 | 18 | # import shutil 19 | # shutil.copyfile(depth_filename, depth_filename.replace("_depth", "_depth_orig")) 20 | depth = cv2.imread(depth_filename) 21 | depth = cv2.cvtColor(depth, cv2.COLOR_BGR2GRAY) 22 | mask = ( 23 | cv2.resize( 24 | cv2.imread(depth_filename.replace("_depth", "_rgba"), cv2.IMREAD_UNCHANGED)[ 25 | :, :, -1 26 | ], 27 | depth.shape, 28 | ) 29 | > 0 30 | ) 31 | # depth[mask] = (depth[mask] - depth.min()) / (depth.max() - depth.min() + 1e-9) 32 | depth = (depth - depth.min()) / (depth.max() - depth.min() + 1e-9) 33 | depth[~mask] = 0 34 | depth = (depth * 255).astype(np.uint8) 35 | cv2.imwrite(depth_filename, depth) 36 | 37 | 38 | # (OPTIONAL) 39 | # 6. Mask normal 40 | def normal_mask(normal_filename): 41 | # filename = "image_normal.png" 42 | import cv2 43 | 44 | # import shutil 45 | # shutil.copyfile(normal_filename, normal_filename.replace("_normal", "_normal_orig")) 46 | normal = cv2.imread(normal_filename) 47 | mask = ( 48 | cv2.resize( 49 | cv2.imread( 50 | normal_filename.replace("_normal", "_rgba"), cv2.IMREAD_UNCHANGED 51 | )[:, :, -1], 52 | normal.shape[:2], 53 | ) 54 | > 0 55 | ) 56 | normal[~mask] = 0 57 | cv2.imwrite(normal_filename, normal) 58 | 59 | 60 | # 5. Run Zero123 61 | # python launch.py --config configs/zero123.yaml --train data.image_path=./load/images/grootplant_rgba.png 62 | -------------------------------------------------------------------------------- /threestudio/scripts/zero123_sbatch.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH --job-name=vikky 3 | #SBATCH --account=mod3d 4 | #SBATCH --partition=g40 5 | #SBATCH --gpus=1 6 | #SBATCH --time=0-00:07:00 7 | conda activate three 8 | cd ~/git/threestudio/ 9 | NAME="dog1" 10 | python launch.py --config configs/zero123.yaml --train data.image_path=./load/images/${NAME}_rgba.png use_timestamp=False name=${NAME} tag=Phase1 system.loggers.wandb.enable=true system.loggers.wandb.project='zero123' system.loggers.wandb.name=${NAME}_Phase1 11 | -------------------------------------------------------------------------------- /threestudio/systems/__init__.py: -------------------------------------------------------------------------------- 1 | from . import ( 2 | control4d_multiview, 3 | dreamfusion, 4 | fantasia3d, 5 | imagedreamfusion, 6 | instructnerf2nerf, 7 | latentnerf, 8 | magic3d, 9 | prolificdreamer, 10 | sjc, 11 | textmesh, 12 | zero123, 13 | imagedream, 14 | ) 15 | -------------------------------------------------------------------------------- /threestudio/systems/utils.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import warnings 3 | from bisect import bisect_right 4 | 5 | import torch 6 | import torch.nn as nn 7 | from torch.optim import lr_scheduler 8 | 9 | import threestudio 10 | 11 | 12 | def get_scheduler(name): 13 | if hasattr(lr_scheduler, name): 14 | return getattr(lr_scheduler, name) 15 | else: 16 | raise NotImplementedError 17 | 18 | 19 | def getattr_recursive(m, attr): 20 | for name in attr.split("."): 21 | m = getattr(m, name) 22 | return m 23 | 24 | 25 | def get_parameters(model, name): 26 | module = getattr_recursive(model, name) 27 | if isinstance(module, nn.Module): 28 | return module.parameters() 29 | elif isinstance(module, nn.Parameter): 30 | return module 31 | return [] 32 | 33 | 34 | def parse_optimizer(config, model): 35 | if hasattr(config, "params"): 36 | params = [ 37 | {"params": get_parameters(model, name), "name": name, **args} 38 | for name, args in config.params.items() 39 | ] 40 | threestudio.debug(f"Specify optimizer params: {config.params}") 41 | else: 42 | params = model.parameters() 43 | if config.name in ["FusedAdam"]: 44 | import apex 45 | 46 | optim = getattr(apex.optimizers, config.name)(params, **config.args) 47 | elif config.name in ["Adan"]: 48 | from threestudio.systems import optimizers 49 | 50 | optim = getattr(optimizers, config.name)(params, **config.args) 51 | else: 52 | optim = getattr(torch.optim, config.name)(params, **config.args) 53 | return optim 54 | 55 | 56 | def parse_scheduler(config, optimizer): 57 | interval = config.get("interval", "epoch") 58 | assert interval in ["epoch", "step"] 59 | if config.name == "SequentialLR": 60 | scheduler = { 61 | "scheduler": lr_scheduler.SequentialLR( 62 | optimizer, 63 | [ 64 | parse_scheduler(conf, optimizer)["scheduler"] 65 | for conf in config.schedulers 66 | ], 67 | milestones=config.milestones, 68 | ), 69 | "interval": interval, 70 | } 71 | elif config.name == "ChainedScheduler": 72 | scheduler = { 73 | "scheduler": lr_scheduler.ChainedScheduler( 74 | [ 75 | parse_scheduler(conf, optimizer)["scheduler"] 76 | for conf in config.schedulers 77 | ] 78 | ), 79 | "interval": interval, 80 | } 81 | else: 82 | scheduler = { 83 | "scheduler": get_scheduler(config.name)(optimizer, **config.args), 84 | "interval": interval, 85 | } 86 | return scheduler 87 | -------------------------------------------------------------------------------- /threestudio/utils/GAN/distribution.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | 5 | class AbstractDistribution: 6 | def sample(self): 7 | raise NotImplementedError() 8 | 9 | def mode(self): 10 | raise NotImplementedError() 11 | 12 | 13 | class DiracDistribution(AbstractDistribution): 14 | def __init__(self, value): 15 | self.value = value 16 | 17 | def sample(self): 18 | return self.value 19 | 20 | def mode(self): 21 | return self.value 22 | 23 | 24 | class DiagonalGaussianDistribution(object): 25 | def __init__(self, parameters, deterministic=False): 26 | self.parameters = parameters 27 | self.mean, self.logvar = torch.chunk(parameters, 2, dim=1) 28 | self.logvar = torch.clamp(self.logvar, -30.0, 20.0) 29 | self.deterministic = deterministic 30 | self.std = torch.exp(0.5 * self.logvar) 31 | self.var = torch.exp(self.logvar) 32 | if self.deterministic: 33 | self.var = self.std = torch.zeros_like(self.mean).to( 34 | device=self.parameters.device 35 | ) 36 | 37 | def sample(self): 38 | x = self.mean + self.std * torch.randn(self.mean.shape).to( 39 | device=self.parameters.device 40 | ) 41 | return x 42 | 43 | def kl(self, other=None): 44 | if self.deterministic: 45 | return torch.Tensor([0.0]) 46 | else: 47 | if other is None: 48 | return 0.5 * torch.sum( 49 | torch.pow(self.mean, 2) + self.var - 1.0 - self.logvar, 50 | dim=[1, 2, 3], 51 | ) 52 | else: 53 | return 0.5 * torch.sum( 54 | torch.pow(self.mean - other.mean, 2) / other.var 55 | + self.var / other.var 56 | - 1.0 57 | - self.logvar 58 | + other.logvar, 59 | dim=[1, 2, 3], 60 | ) 61 | 62 | def nll(self, sample, dims=[1, 2, 3]): 63 | if self.deterministic: 64 | return torch.Tensor([0.0]) 65 | logtwopi = np.log(2.0 * np.pi) 66 | return 0.5 * torch.sum( 67 | logtwopi + self.logvar + torch.pow(sample - self.mean, 2) / self.var, 68 | dim=dims, 69 | ) 70 | 71 | def mode(self): 72 | return self.mean 73 | 74 | 75 | def normal_kl(mean1, logvar1, mean2, logvar2): 76 | """ 77 | source: https://github.com/openai/guided-diffusion/blob/27c20a8fab9cb472df5d6bdd6c8d11c8f430b924/guided_diffusion/losses.py#L12 78 | Compute the KL divergence between two gaussians. 79 | Shapes are automatically broadcasted, so batches can be compared to 80 | scalars, among other use cases. 81 | """ 82 | tensor = None 83 | for obj in (mean1, logvar1, mean2, logvar2): 84 | if isinstance(obj, torch.Tensor): 85 | tensor = obj 86 | break 87 | assert tensor is not None, "at least one argument must be a Tensor" 88 | 89 | # Force variances to be Tensors. Broadcasting helps convert scalars to 90 | # Tensors, but it does not work for torch.exp(). 91 | logvar1, logvar2 = [ 92 | x if isinstance(x, torch.Tensor) else torch.tensor(x).to(tensor) 93 | for x in (logvar1, logvar2) 94 | ] 95 | 96 | return 0.5 * ( 97 | -1.0 98 | + logvar2 99 | - logvar1 100 | + torch.exp(logvar1 - logvar2) 101 | + ((mean1 - mean2) ** 2) * torch.exp(-logvar2) 102 | ) 103 | -------------------------------------------------------------------------------- /threestudio/utils/GAN/loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | 4 | 5 | def generator_loss(discriminator, inputs, reconstructions, cond=None): 6 | if cond is None: 7 | logits_fake = discriminator(reconstructions.contiguous()) 8 | else: 9 | logits_fake = discriminator( 10 | torch.cat((reconstructions.contiguous(), cond), dim=1) 11 | ) 12 | g_loss = -torch.mean(logits_fake) 13 | return g_loss 14 | 15 | 16 | def hinge_d_loss(logits_real, logits_fake): 17 | loss_real = torch.mean(F.relu(1.0 - logits_real)) 18 | loss_fake = torch.mean(F.relu(1.0 + logits_fake)) 19 | d_loss = 0.5 * (loss_real + loss_fake) 20 | return d_loss 21 | 22 | 23 | def discriminator_loss(discriminator, inputs, reconstructions, cond=None): 24 | if cond is None: 25 | logits_real = discriminator(inputs.contiguous().detach()) 26 | logits_fake = discriminator(reconstructions.contiguous().detach()) 27 | else: 28 | logits_real = discriminator( 29 | torch.cat((inputs.contiguous().detach(), cond), dim=1) 30 | ) 31 | logits_fake = discriminator( 32 | torch.cat((reconstructions.contiguous().detach(), cond), dim=1) 33 | ) 34 | d_loss = hinge_d_loss(logits_real, logits_fake).mean() 35 | return d_loss 36 | -------------------------------------------------------------------------------- /threestudio/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from . import base 2 | -------------------------------------------------------------------------------- /threestudio/utils/base.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | 3 | import torch 4 | import torch.nn as nn 5 | 6 | from threestudio.utils.config import parse_structured 7 | from threestudio.utils.misc import get_device, load_module_weights 8 | from threestudio.utils.typing import * 9 | 10 | 11 | class Configurable: 12 | @dataclass 13 | class Config: 14 | pass 15 | 16 | def __init__(self, cfg: Optional[dict] = None) -> None: 17 | super().__init__() 18 | self.cfg = parse_structured(self.Config, cfg) 19 | 20 | 21 | class Updateable: 22 | def do_update_step( 23 | self, epoch: int, global_step: int, on_load_weights: bool = False 24 | ): 25 | for attr in self.__dir__(): 26 | if attr.startswith("_"): 27 | continue 28 | try: 29 | module = getattr(self, attr) 30 | except: 31 | continue # ignore attributes like property, which can't be retrived using getattr? 32 | if isinstance(module, Updateable): 33 | module.do_update_step( 34 | epoch, global_step, on_load_weights=on_load_weights 35 | ) 36 | self.update_step(epoch, global_step, on_load_weights=on_load_weights) 37 | 38 | def update_step(self, epoch: int, global_step: int, on_load_weights: bool = False): 39 | # override this method to implement custom update logic 40 | # if on_load_weights is True, you should be careful doing things related to model evaluations, 41 | # as the models and tensors are not guarenteed to be on the same device 42 | pass 43 | 44 | 45 | def update_if_possible(module: Any, epoch: int, global_step: int) -> None: 46 | if isinstance(module, Updateable): 47 | module.do_update_step(epoch, global_step) 48 | 49 | 50 | class BaseObject(Updateable): 51 | @dataclass 52 | class Config: 53 | pass 54 | 55 | cfg: Config # add this to every subclass of BaseObject to enable static type checking 56 | 57 | def __init__( 58 | self, cfg: Optional[Union[dict, DictConfig]] = None, *args, **kwargs 59 | ) -> None: 60 | super().__init__() 61 | self.cfg = parse_structured(self.Config, cfg) 62 | self.device = get_device() 63 | self.configure(*args, **kwargs) 64 | 65 | def configure(self, *args, **kwargs) -> None: 66 | pass 67 | 68 | 69 | class BaseModule(nn.Module, Updateable): 70 | @dataclass 71 | class Config: 72 | weights: Optional[str] = None 73 | 74 | cfg: Config # add this to every subclass of BaseModule to enable static type checking 75 | 76 | def __init__( 77 | self, cfg: Optional[Union[dict, DictConfig]] = None, *args, **kwargs 78 | ) -> None: 79 | super().__init__() 80 | self.cfg = parse_structured(self.Config, cfg) 81 | self.device = get_device() 82 | self.configure(*args, **kwargs) 83 | if self.cfg.weights is not None: 84 | # format: path/to/weights:module_name 85 | weights_path, module_name = self.cfg.weights.split(":") 86 | state_dict, epoch, global_step = load_module_weights( 87 | weights_path, module_name=module_name, map_location="cpu" 88 | ) 89 | self.load_state_dict(state_dict) 90 | self.do_update_step( 91 | epoch, global_step, on_load_weights=True 92 | ) # restore states 93 | # dummy tensor to indicate model state 94 | self._dummy: Float[Tensor, "..."] 95 | self.register_buffer("_dummy", torch.zeros(0).float(), persistent=False) 96 | 97 | def configure(self, *args, **kwargs) -> None: 98 | pass 99 | -------------------------------------------------------------------------------- /threestudio/utils/perceptual/__init__.py: -------------------------------------------------------------------------------- 1 | from .perceptual import PerceptualLoss 2 | -------------------------------------------------------------------------------- /threestudio/utils/rasterize.py: -------------------------------------------------------------------------------- 1 | import nvdiffrast.torch as dr 2 | import torch 3 | 4 | from threestudio.utils.typing import * 5 | 6 | 7 | class NVDiffRasterizerContext: 8 | def __init__(self, context_type: str, device: torch.device) -> None: 9 | self.device = device 10 | self.ctx = self.initialize_context(context_type, device) 11 | 12 | def initialize_context( 13 | self, context_type: str, device: torch.device 14 | ) -> Union[dr.RasterizeGLContext, dr.RasterizeCudaContext]: 15 | if context_type == "gl": 16 | return dr.RasterizeGLContext(device=device) 17 | elif context_type == "cuda": 18 | return dr.RasterizeCudaContext(device=device) 19 | else: 20 | raise ValueError(f"Unknown rasterizer context type: {context_type}") 21 | 22 | def vertex_transform( 23 | self, verts: Float[Tensor, "Nv 3"], mvp_mtx: Float[Tensor, "B 4 4"] 24 | ) -> Float[Tensor, "B Nv 4"]: 25 | verts_homo = torch.cat( 26 | [verts, torch.ones([verts.shape[0], 1]).to(verts)], dim=-1 27 | ) 28 | return torch.matmul(verts_homo, mvp_mtx.permute(0, 2, 1)) 29 | 30 | def rasterize( 31 | self, 32 | pos: Float[Tensor, "B Nv 4"], 33 | tri: Integer[Tensor, "Nf 3"], 34 | resolution: Union[int, Tuple[int, int]], 35 | ): 36 | # rasterize in instance mode (single topology) 37 | return dr.rasterize(self.ctx, pos.float(), tri.int(), resolution, grad_db=True) 38 | 39 | def rasterize_one( 40 | self, 41 | pos: Float[Tensor, "Nv 4"], 42 | tri: Integer[Tensor, "Nf 3"], 43 | resolution: Union[int, Tuple[int, int]], 44 | ): 45 | # rasterize one single mesh under a single viewpoint 46 | rast, rast_db = self.rasterize(pos[None, ...], tri, resolution) 47 | return rast[0], rast_db[0] 48 | 49 | def antialias( 50 | self, 51 | color: Float[Tensor, "B H W C"], 52 | rast: Float[Tensor, "B H W 4"], 53 | pos: Float[Tensor, "B Nv 4"], 54 | tri: Integer[Tensor, "Nf 3"], 55 | ) -> Float[Tensor, "B H W C"]: 56 | return dr.antialias(color.float(), rast, pos.float(), tri.int()) 57 | 58 | def interpolate( 59 | self, 60 | attr: Float[Tensor, "B Nv C"], 61 | rast: Float[Tensor, "B H W 4"], 62 | tri: Integer[Tensor, "Nf 3"], 63 | rast_db=None, 64 | diff_attrs=None, 65 | ) -> Float[Tensor, "B H W C"]: 66 | return dr.interpolate( 67 | attr.float(), rast, tri.int(), rast_db=rast_db, diff_attrs=diff_attrs 68 | ) 69 | 70 | def interpolate_one( 71 | self, 72 | attr: Float[Tensor, "Nv C"], 73 | rast: Float[Tensor, "B H W 4"], 74 | tri: Integer[Tensor, "Nf 3"], 75 | rast_db=None, 76 | diff_attrs=None, 77 | ) -> Float[Tensor, "B H W C"]: 78 | return self.interpolate(attr[None, ...], rast, tri, rast_db, diff_attrs) 79 | -------------------------------------------------------------------------------- /threestudio/utils/typing.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module contains type annotations for the project, using 3 | 1. Python type hints (https://docs.python.org/3/library/typing.html) for Python objects 4 | 2. jaxtyping (https://github.com/google/jaxtyping/blob/main/API.md) for PyTorch tensors 5 | 6 | Two types of typing checking can be used: 7 | 1. Static type checking with mypy (install with pip and enabled as the default linter in VSCode) 8 | 2. Runtime type checking with typeguard (install with pip and triggered at runtime, mainly for tensor dtype and shape checking) 9 | """ 10 | 11 | # Basic types 12 | from typing import ( 13 | Any, 14 | Callable, 15 | Dict, 16 | Iterable, 17 | List, 18 | Literal, 19 | NamedTuple, 20 | NewType, 21 | Optional, 22 | Sized, 23 | Tuple, 24 | Type, 25 | TypeVar, 26 | Union, 27 | ) 28 | 29 | # Tensor dtype 30 | # for jaxtyping usage, see https://github.com/google/jaxtyping/blob/main/API.md 31 | from jaxtyping import Bool, Complex, Float, Inexact, Int, Integer, Num, Shaped, UInt 32 | 33 | # Config type 34 | from omegaconf import DictConfig 35 | 36 | # PyTorch Tensor type 37 | from torch import Tensor 38 | 39 | # Runtime type checking decorator 40 | from typeguard import typechecked as typechecker 41 | --------------------------------------------------------------------------------