├── models
└── .gitkeep
├── sample_images
├── dog
│ ├── 0.jpg
│ ├── 87.jpg
│ ├── 180.jpg
│ └── 249.jpg
└── ramen
│ └── image.jpg
├── requirements_for_check.txt
├── requirements.txt
├── panels.py
├── README.md
├── .gitignore
├── dependencies.py
├── __init__.py
├── operators.py
└── utils.py
/models/.gitkeep:
--------------------------------------------------------------------------------
1 | VGGT model will be downloaded here.
--------------------------------------------------------------------------------
/sample_images/dog/0.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xy-gao/DA3-blender/HEAD/sample_images/dog/0.jpg
--------------------------------------------------------------------------------
/sample_images/dog/87.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xy-gao/DA3-blender/HEAD/sample_images/dog/87.jpg
--------------------------------------------------------------------------------
/sample_images/dog/180.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xy-gao/DA3-blender/HEAD/sample_images/dog/180.jpg
--------------------------------------------------------------------------------
/sample_images/dog/249.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xy-gao/DA3-blender/HEAD/sample_images/dog/249.jpg
--------------------------------------------------------------------------------
/sample_images/ramen/image.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xy-gao/DA3-blender/HEAD/sample_images/ramen/image.jpg
--------------------------------------------------------------------------------
/requirements_for_check.txt:
--------------------------------------------------------------------------------
1 | torch==2.5.1
2 | torchvision==0.20.1
3 | numpy==1.26.4
4 | pre-commit
5 | trimesh
6 | einops
7 | huggingface_hub
8 | imageio
9 | opencv-python
10 | # xformers
11 | open3d
12 | fastapi
13 | unicorn
14 | requests
15 | typer
16 | pillow
17 | antlr4-python3-runtime==4.9.2 # required by omegaconf 2.3.0, without this line installing antlr4-python3-runtime failed for me, Google Gemini claims it's a known issue in 4.9.3 on Windows
18 | omegaconf<2.4 # when omegaconf 2.4.0 releases, it will require antlr4-python3-runtime 4.11
19 | # evo
20 | e3nn
21 | moviepy==1.0.3
22 | plyfile
23 | pillow_heif
24 | safetensors
25 | addict
26 | pycolmap
27 | ultralytics # for YOLO image segmentation
28 | lapx>=0.5.5 # lap is required by ultralytics, but when lap attempted to automatically install, it installed to the wrong python environment
29 | # depth_anything_3
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | --extra-index-url https://download.pytorch.org/whl/cu121
2 | torch==2.5.1
3 | torchvision==0.20.1
4 | numpy==1.26.4
5 | pre-commit
6 | trimesh
7 | einops
8 | huggingface_hub
9 | imageio
10 | opencv-python
11 | # xformers
12 | open3d
13 | fastapi
14 | unicorn
15 | requests
16 | typer
17 | pillow
18 | antlr4-python3-runtime==4.9.2 # required by omegaconf 2.3.0, without this line installing antlr4-python3-runtime failed for me, Google Gemini claims it's a known issue in 4.9.3 on Windows
19 | omegaconf<2.4 # when omegaconf 2.4.0 releases, it will require antlr4-python3-runtime 4.11
20 | evo
21 | e3nn
22 | moviepy==1.0.3
23 | plyfile
24 | pillow_heif
25 | safetensors
26 | addict
27 | pycolmap
28 | ultralytics # for YOLO image segmentation
29 | lapx>=0.5.5 # lap is required by ultralytics, but when lap attempted to automatically install, it installed to the wrong python environment
--------------------------------------------------------------------------------
/panels.py:
--------------------------------------------------------------------------------
1 | import bpy
2 | from .operators import get_model_path
3 | import os
4 |
5 | class DA3Panel(bpy.types.Panel):
6 | bl_label = "DA3"
7 | bl_idname = "VIEW3D_PT_da3"
8 | bl_space_type = 'VIEW_3D'
9 | bl_region_type = 'UI'
10 | bl_category = "DA3"
11 |
12 | def draw(self, context):
13 | layout = self.layout
14 | scene = context.scene
15 |
16 | # Model selection dropdown
17 | layout.prop(scene, "da3_model_name", text="Model")
18 |
19 | # Download button or status
20 | model_path = get_model_path(scene.da3_model_name)
21 | row = layout.row()
22 | if os.path.exists(model_path):
23 | row.label(text=f"Model {scene.da3_model_name} ready")
24 | else:
25 | row.operator("da3.download_model", text=f"Download {scene.da3_model_name}")
26 |
27 | # Metric model checkbox and download button/status
28 | if scene.da3_model_name != "da3nested-giant-large":
29 | layout.prop(scene, "da3_use_metric", text="Use Metric")
30 | if scene.da3_use_metric:
31 | # Metric combination mode
32 | layout.prop(scene, "da3_metric_mode", text="Metric Mode")
33 |
34 | metric_model_name = "da3metric-large"
35 | metric_model_path = get_model_path(metric_model_name)
36 | row = layout.row()
37 | if os.path.exists(metric_model_path):
38 | row.label(text=f"Metric model {metric_model_name} ready")
39 | else:
40 | op = row.operator("da3.download_model", text="Download Metric Model")
41 | op.da3_override_model_name = metric_model_name
42 |
43 | layout.prop(scene, "da3_input_folder", text="Input Folder")
44 | layout.prop(scene, "da3_process_res", text="Process Resolution")
45 | layout.prop(scene, "da3_process_res_method", text="Resize Method")
46 | layout.prop(scene, "da3_batch_mode", text="Batch Mode")
47 | if scene.da3_batch_mode != "ignore_batch_size":
48 | layout.prop(scene, "da3_batch_size", text="Batch Size")
49 | layout.prop(scene, "da3_use_ray_pose", text="Use Ray-based Pose")
50 | layout.prop(scene, "da3_use_half_precision", text="Use Half Precision")
51 | layout.prop(scene, "da3_filter_edges", text="Filter Edges")
52 | layout.prop(scene, "da3_min_confidence", text="Min Confidence")
53 | layout.prop(scene, "da3_detect_motion", text="Detect Motion")
54 | if scene.da3_detect_motion:
55 | layout.prop(scene, "da3_motion_threshold", text="Motion Threshold")
56 |
57 | layout.prop(scene, "da3_use_segmentation")
58 | if scene.da3_use_segmentation:
59 | layout.prop(scene, "da3_segmentation_model")
60 | layout.prop(scene, "da3_segmentation_conf")
61 |
62 | layout.separator()
63 |
64 | layout.prop(scene, "da3_generate_mesh", text="Generate Meshes")
65 | layout.prop(scene, "da3_output_debug_images", text="Output Debug Images")
66 | row = layout.row()
67 | row.operator("da3.generate_point_cloud")
68 | row = layout.row()
69 | row.operator("da3.unload_model")
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # DA3-blender
2 | Blender addon for Depth-Anything-3 3D reconstruction
3 |
4 | Input an image folder which contains single or multiple images, then you will get point cloud geometry nodes with material.
5 |
6 | This blender addon is based on [Depth-Anything-3](https://github.com/ByteDance-Seed/Depth-Anything-3).
7 |
8 | ## Usage
9 | 1. Download Depth-Anything-3 model from operation panel (press N to toggle Sidebar, and click DA3 tab).
10 | 2. select an image folder.
11 | 3. Generate.
12 |
13 | https://github.com/user-attachments/assets/6eeff6d0-a89f-4c2c-970b-47fe2b5475d3
14 |
15 |
16 | ## Installation (only the first time)
17 | 1. Download Zip from this github repo (but don't extract it).
18 | 2. In Blender, toggle System Console (Window > Toggle System Console) for installation logs tracking.
19 | 3. Install addon in blender preference (Edit > Preferences > Add-ons) with "Install from Disk" (v button in top right corner) and select downloaded zip.
20 | 4. Wait for Depth-Anything-3 git clone and python dependencies installation.
21 | 5. After addon activated, download Depth-Anything-3 model from operation panel (press N to toggle Sidebar, and click DA3 tab).
22 |
23 |
24 |
25 |
26 |
27 |
28 | ## Advanced usage
29 | - There are several **Model**s to choose from. **DA3 Large** is recommended unless you have a lot of VRAM.
30 | - The scale will be very small by default. Check **Use Metric** to use the DA3 Metric model to help scale it to approximately life-size. This is twice as slow. You will have to click the button to download the Metric model if you haven't already.
31 | - The default resolution is only 504x504 (or less for non-square images). You can change the resolution of the longest side in the **Process Resolution** box, but it must be a multiple of 14. If you don't know your 14 times tables, Blender supports typing maths in the box, eg. `504+14`. Higher resolutions use a lot more VRAM and will fail.
32 | - If you want to specify the resolution of the shortest side instead of the longest side, select **Lower Bound Resize** from the drop-down box. That will massively increase VRAM usage and is not recommended.
33 | - There is a limit to how many images the model can process at once without crashing, based on VRAM. For 4GB of VRAM, at 504x280, the limit is 10. Set the **Batch Size** to whatever the limit is for your graphics card (by experimenting). Set the **Batch Mode** to how you want to handle more than that many images. **Skip Frames** will choose 10 evenly spaced images from the folder for a single batch, and is the only mode with good alignment. **Last Frame Overlap** will do it in batches using one frame of overlap to align the batches. **First-Last Overlap** will use two frames of overlap for better alignment (in theory). And **Ignore Batch Size** will try to do all the images at once and risk crashing.
34 | - **Use Ray-based Pose** will use a slower more precise method of aligning cameras within a batch, but doesn't help alignment between batches.
35 | - **Use Half Precision** will run the model in mixed precision, mostly 16-bit, reducing the VRAM used to run the model (but the model itself still uses the same VRAM), and making it faster, with only a slight loss of precision.
36 | - **Generate Meshes** will create a separate textured mesh for each image instead of a single point cloud. The meshes use the original full-resolution image as a texture. You will have many meshes layered on top of each other that you need to clean up manually if you want to use them. It makes no attempt to combine meshes into a single mesh yet.
37 | - **Detect Motion** will detect moving objects that are present in one frame but absent in another where they should be visible. It then puts the moving objects into their own point clouds and animates them. Press Play in the animation panel to watch. Static geometry from all frames will always be visible. You may need to manually increase the length of the scene's animation. Detect Motion doesn't work well on feet or objects that are near other objects. It isn't optimised, so it may have problems with large numbers of frames.
38 | - Click **Unload Model** after you have finished to free VRAM for other things, otherwise the model will stay in VRAM.
39 | - To view the confidence of each point in the point cloud, select the point cloud then click on the **Shading** tab at the top of the screen. In the node editor, change the **Factor** of the yellow **Mix** node to `1.0` (or something between 0 and 1) to show the confidence of each point instead of the colour.
40 | - To change the size of each point, select the point cloud then click on the **Geometry Nodes** tab at the top of the screen. In the node editor, change the **Radius** of the green **Mesh to Points** node to the desired size.
41 | - To hide points below a certain confidence level, select the point cloud, then click on the blue spanner icon in the bottom right column of icons, and set **Threshold** to a value between 1 and about 30. Setting it to `2.0` will filter out almost all the noise, but also some of the background.
42 | - To view the scene from one of the cameras, select the camera, move the mouse over the 3D View, and press Ctrl+Numpad0
43 |
44 | ## Tested on
45 | - Win10, Win11
46 | - Blender 4.2, 4.4
47 | - cuda 12.6
48 | - 4GB VRAM GTX 970
49 |
50 | also tested on Ubuntu 25.10, Blender 5.0, CUDA 13.0 https://github.com/xy-gao/DA3-blender/issues/1#issue-3652866452
51 |
52 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[codz]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | share/python-wheels/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 | MANIFEST
28 |
29 | # PyInstaller
30 | # Usually these files are written by a python script from a template
31 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
32 | *.manifest
33 | *.spec
34 |
35 | # Installer logs
36 | pip-log.txt
37 | pip-delete-this-directory.txt
38 |
39 | # Unit test / coverage reports
40 | htmlcov/
41 | .tox/
42 | .nox/
43 | .coverage
44 | .coverage.*
45 | .cache
46 | nosetests.xml
47 | coverage.xml
48 | *.cover
49 | *.py.cover
50 | .hypothesis/
51 | .pytest_cache/
52 | cover/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | .pybuilder/
76 | target/
77 |
78 | # Jupyter Notebook
79 | .ipynb_checkpoints
80 |
81 | # IPython
82 | profile_default/
83 | ipython_config.py
84 |
85 | # pyenv
86 | # For a library or package, you might want to ignore these files since the code is
87 | # intended to run in multiple environments; otherwise, check them in:
88 | # .python-version
89 |
90 | # pipenv
91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
94 | # install all needed dependencies.
95 | #Pipfile.lock
96 |
97 | # UV
98 | # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
99 | # This is especially recommended for binary packages to ensure reproducibility, and is more
100 | # commonly ignored for libraries.
101 | #uv.lock
102 |
103 | # poetry
104 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105 | # This is especially recommended for binary packages to ensure reproducibility, and is more
106 | # commonly ignored for libraries.
107 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108 | #poetry.lock
109 | #poetry.toml
110 |
111 | # pdm
112 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
113 | # pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
114 | # https://pdm-project.org/en/latest/usage/project/#working-with-version-control
115 | #pdm.lock
116 | #pdm.toml
117 | .pdm-python
118 | .pdm-build/
119 |
120 | # pixi
121 | # Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
122 | #pixi.lock
123 | # Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
124 | # in the .venv directory. It is recommended not to include this directory in version control.
125 | .pixi
126 |
127 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
128 | __pypackages__/
129 |
130 | # Celery stuff
131 | celerybeat-schedule
132 | celerybeat.pid
133 |
134 | # SageMath parsed files
135 | *.sage.py
136 |
137 | # Environments
138 | .env
139 | .envrc
140 | .venv
141 | env/
142 | venv/
143 | ENV/
144 | env.bak/
145 | venv.bak/
146 |
147 | # Spyder project settings
148 | .spyderproject
149 | .spyproject
150 |
151 | # Rope project settings
152 | .ropeproject
153 |
154 | # mkdocs documentation
155 | /site
156 |
157 | # mypy
158 | .mypy_cache/
159 | .dmypy.json
160 | dmypy.json
161 |
162 | # Pyre type checker
163 | .pyre/
164 |
165 | # pytype static type analyzer
166 | .pytype/
167 |
168 | # Cython debug symbols
169 | cython_debug/
170 |
171 | # PyCharm
172 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
173 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
174 | # and can be added to the global gitignore or merged into this file. For a more nuclear
175 | # option (not recommended) you can uncomment the following to ignore the entire idea folder.
176 | #.idea/
177 |
178 | # Abstra
179 | # Abstra is an AI-powered process automation framework.
180 | # Ignore directories containing user credentials, local state, and settings.
181 | # Learn more at https://abstra.io/docs
182 | .abstra/
183 |
184 | # Visual Studio Code
185 | # Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
186 | # that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
187 | # and can be added to the global gitignore or merged into this file. However, if you prefer,
188 | # you could uncomment the following to ignore the entire vscode folder
189 | # .vscode/
190 |
191 | # Ruff stuff:
192 | .ruff_cache/
193 |
194 | # PyPI configuration file
195 | .pypirc
196 |
197 | # Cursor
198 | # Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
199 | # exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
200 | # refer to https://docs.cursor.com/context/ignore-files
201 | .cursorignore
202 | .cursorindexingignore
203 |
204 | # Marimo
205 | marimo/_static/
206 | marimo/_lsp/
207 | __marimo__/
208 |
209 | # Our Blender add-on generated files
210 | /da3_repo
211 | /deps_public
212 | /deps_da3
213 | /models/*.safetensors
214 | debug_output/
215 | /models/*.pt
216 |
--------------------------------------------------------------------------------
/dependencies.py:
--------------------------------------------------------------------------------
1 | import os
2 | import pkg_resources
3 | import subprocess
4 | import sys
5 | from pathlib import Path
6 |
7 |
8 | add_on_path = Path(__file__).parent # assuming this file is at root of add-on
9 | os.environ["ADDON_PATH"] = str(add_on_path)
10 | requirements_txt = add_on_path / 'requirements.txt' # assuming requirements.txt is at root of add-on
11 | requirements_for_check_txt = add_on_path / 'requirements_for_check.txt' # assuming requirements.txt is at root of add-on
12 | DA3_DIR = add_on_path / "da3_repo"
13 |
14 | deps_path = add_on_path / 'deps_public' # might not exist until install_deps is called
15 | deps_path_da3 = add_on_path / 'deps_da3'
16 | # Append dependencies folder to system path so we can import
17 | # (important for Windows machines, but less so for Linux)
18 | sys.path.insert(0, os.fspath(deps_path))
19 | sys.path.insert(0, os.fspath(deps_path_da3))
20 | sys.path.insert(0, os.fspath(DA3_DIR))
21 |
22 |
23 | class Dependencies:
24 | # cache variables used to eliminate unnecessary computations
25 | _checked = None
26 | _requirements = None
27 |
28 | @staticmethod
29 | def install():
30 | if Dependencies.check():
31 | return True
32 |
33 | # Create folder into which pip will install dependencies
34 | if not os.path.exists(DA3_DIR):
35 | try:
36 | subprocess.check_call(['git', 'clone', 'https://github.com/ByteDance-Seed/Depth-Anything-3.git', DA3_DIR])
37 | except subprocess.CalledProcessError as e:
38 | print(f'Caught Exception while trying to git clone da3')
39 | print(f' Exception: {e}')
40 | return False
41 |
42 | try:
43 | deps_path.mkdir(exist_ok=True)
44 | except Exception as e:
45 | print(f'Caught Exception while trying to create dependencies folder')
46 | print(f' Exception: {e}')
47 | print(f' Folder: {deps_path}')
48 | return False
49 | try:
50 | deps_path_da3.mkdir(exist_ok=True)
51 | except Exception as e:
52 | print(f'Caught Exception while trying to create dependencies folder')
53 | print(f' Exception: {e}')
54 | print(f' Folder: {deps_path_da3}')
55 | return False
56 | # Ensure pip is installed
57 | try:
58 | subprocess.check_call([sys.executable, "-m", "ensurepip", "--upgrade"])
59 | except subprocess.CalledProcessError as e:
60 | print(f'Caught CalledProcessError while trying to ensure pip is installed')
61 | print(f' Exception: {e}')
62 | print(f' {sys.executable=}')
63 | return False
64 |
65 | # Install dependencies from requirements.txt
66 | try:
67 | cmd = [
68 | sys.executable,
69 | "-m",
70 | "pip",
71 | "install",
72 | "-r",
73 | os.fspath(requirements_txt),
74 | "--target",
75 | os.fspath(deps_path)
76 | ]
77 | print(f'Installing: {cmd}')
78 | subprocess.check_call(cmd)
79 | except subprocess.CalledProcessError as e:
80 | print(f'Caught CalledProcessError while trying to install dependencies')
81 | print(f' Exception: {e}')
82 | print(f' Requirements: {requirements_txt}')
83 | print(f' Folder: {deps_path}')
84 | return False
85 | # Install dependencies from requirements.txt
86 |
87 | try:
88 | cmd = [
89 | sys.executable,
90 | "-m",
91 | "pip",
92 | "install",
93 | "--no-deps",
94 | os.fspath(DA3_DIR),
95 | "--target",
96 | os.fspath(deps_path_da3)
97 | ]
98 | print(f'Installing: {cmd}')
99 | subprocess.check_call(cmd)
100 | except subprocess.CalledProcessError as e:
101 | print(f'Caught CalledProcessError while trying to install DA3')
102 | print(f' Exception: {e}')
103 | print(f' Requirements: {DA3_DIR}')
104 | return False
105 | return Dependencies.check(force=True)
106 |
107 | @staticmethod
108 | def check(*, force=False):
109 | if force:
110 | Dependencies._checked = None
111 | elif Dependencies._checked is not None:
112 | # Assume everything is installed
113 | return Dependencies._checked
114 |
115 | Dependencies._checked = False
116 |
117 | if deps_path.exists() and os.path.exists(DA3_DIR):
118 | try:
119 | # Ensure all required dependencies are installed in dependencies folder
120 | ws = pkg_resources.WorkingSet(entries=[ os.fspath(deps_path) ])
121 | for dep in Dependencies.requirements(force=force):
122 | ws.require(dep)
123 |
124 | # If we get here, we found all required dependencies
125 | Dependencies._checked = True
126 |
127 | except Exception as e:
128 | print(f'Caught Exception while trying to check dependencies')
129 | print(f' Exception: {e}')
130 | Dependencies._checked = False
131 |
132 | return Dependencies._checked
133 |
134 | @staticmethod
135 | def requirements(*, force=False):
136 | if force:
137 | Dependencies._requirements = None
138 | elif Dependencies._requirements is not None:
139 | return Dependencies._requirements
140 |
141 | # load and cache requirements
142 | with requirements_for_check_txt.open() as requirements:
143 | dependencies = pkg_resources.parse_requirements(requirements)
144 | Dependencies._requirements = [ dep.project_name for dep in dependencies ]
145 | return Dependencies._requirements
146 |
--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
1 | bl_info = {
2 | "name": "DA3 Addon",
3 | "author": "Xiangyi Gao",
4 | "version": (1, 0),
5 | "blender": (4, 2, 0),
6 | "location": "View3D > Sidebar > DA3",
7 | "description": "Generate point clouds from images using DA3",
8 | "category": "3D View",
9 | }
10 |
11 | import bpy
12 | from .dependencies import Dependencies
13 | import os
14 |
15 | def register():
16 | # Set PyTorch CUDA allocation config to reduce fragmentation
17 | os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'
18 | # Set CUDA_LAUNCH_BLOCKING for better error reporting
19 | os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
20 |
21 | if not Dependencies.check():
22 | Dependencies.install()
23 | if Dependencies.check():
24 | from . import operators, panels
25 | bpy.utils.register_class(operators.DownloadModelOperator)
26 | bpy.utils.register_class(operators.UnloadModelOperator)
27 | bpy.utils.register_class(operators.GeneratePointCloudOperator)
28 | bpy.utils.register_class(panels.DA3Panel)
29 | bpy.types.Scene.da3_input_folder = bpy.props.StringProperty(subtype='DIR_PATH')
30 | bpy.types.Scene.da3_model_name = bpy.props.EnumProperty(
31 | items=[
32 | ('da3-small', 'DA3 Small', 'Small model for faster inference'),
33 | ('da3-base', 'DA3 Base', 'Base model with balanced performance'),
34 | ('da3-large', 'DA3 Large', 'Large model for better quality'),
35 | ('da3-giant', 'DA3 Giant', 'Giant model for highest quality'),
36 | ("da3metric-large", "DA3 Metric Large", "Metric depth model"),
37 | ('da3mono-large', 'DA3 Mono Large', 'Single image depth estimation'),
38 | ('da3nested-giant-large', 'DA3 Nested Giant Large', 'Nested depth estimation'),
39 | ],
40 | name="Model",
41 | description="Select DA3 model variant",
42 | default='da3-large'
43 | )
44 | bpy.types.Scene.da3_use_metric = bpy.props.BoolProperty(
45 | name="Use Metric",
46 | description="Real-world scale using the metric DA3 model",
47 | default=False,
48 | )
49 | bpy.types.Scene.da3_metric_mode = bpy.props.EnumProperty(
50 | items=[
51 | ("scale_base", "Scale Base Depth", "Scale base depth using metric model"),
52 | ("metric_depth", "Use Metric Depth", "Use metric model depth with base cameras"),
53 | ],
54 | name="Metric Mode",
55 | description="How to combine base and metric model outputs",
56 | default="scale_base",
57 | )
58 | bpy.types.Scene.da3_process_res = bpy.props.IntProperty(
59 | name="Process Resolution",
60 | description="Internal resolution for processing (must be multiple of 14)",
61 | default=504,
62 | min=14
63 | )
64 | bpy.types.Scene.da3_process_res_method = bpy.props.EnumProperty(
65 | items=[
66 | ("upper_bound_resize", "Upper Bound Resize", "Resize so that the specified dimension becomes the longer side"),
67 | ("lower_bound_resize", "Lower Bound Resize", "Resize so that the specified dimension becomes the shorter side"),
68 | ],
69 | name="Resize Method",
70 | description="Method for resizing images to the target resolution",
71 | default="upper_bound_resize"
72 | )
73 | bpy.types.Scene.da3_use_half_precision = bpy.props.BoolProperty(
74 | name="Use Half Precision",
75 | description="Use 16-bit floats for reduced VRAM usage",
76 | default=False,
77 | )
78 | bpy.types.Scene.da3_use_ray_pose = bpy.props.BoolProperty(
79 | name="Use Ray-based Pose",
80 | description="Use ray-based camera pose estimation instead of the camera decoder (slower but potentially more accurate)",
81 | default=False,
82 | )
83 | bpy.types.Scene.da3_batch_size = bpy.props.IntProperty(
84 | name="Batch Size",
85 | description="Number of images to process in batch mode",
86 | default=10,
87 | min=1
88 | )
89 | bpy.types.Scene.da3_batch_mode = bpy.props.EnumProperty(
90 | items=[
91 | ("ignore_batch_size", "Ignore Batch Size", "Process all images (may use excessive VRAM)"),
92 | ("skip_frames", "Skip Frames", "Process evenly spaced frames"),
93 | ("last_frame_overlap", "Last Frame Overlap", "Process overlapping batches for large datasets"),
94 | ("first_last_overlap", "First+Last Overlap", "Use first and last frame of previous batch plus new frames"),
95 | ],
96 | name="Batch Mode",
97 | description="How to select images for processing",
98 | default="skip_frames"
99 | )
100 | bpy.types.Scene.da3_filter_edges = bpy.props.BoolProperty(
101 | name="Filter Edges",
102 | description="Set confidence to 0 for pixels with high depth gradient",
103 | default=True,
104 | )
105 | bpy.types.Scene.da3_min_confidence = bpy.props.FloatProperty(
106 | name="Min Confidence",
107 | description="Minimum confidence threshold for points (points below this will be removed)",
108 | default=0.5,
109 | min=0.0,
110 | max=100.0,
111 | )
112 | bpy.types.Scene.da3_output_debug_images = bpy.props.BoolProperty(
113 | name="Output Debug Images",
114 | description="Save debug images (depth, confidence, etc.) to a subfolder",
115 | default=False,
116 | )
117 | bpy.types.Scene.da3_generate_mesh = bpy.props.BoolProperty(
118 | name="Generate Meshes",
119 | description="Generate independent textured meshes for each input image instead of a point cloud",
120 | default=False,
121 | )
122 | bpy.types.Scene.da3_detect_motion = bpy.props.BoolProperty(
123 | name="Detect Motion",
124 | description="Identify and animate moving objects by checking if they're missing in other frames",
125 | default=False,
126 | )
127 | bpy.types.Scene.da3_motion_threshold = bpy.props.FloatProperty(
128 | name="Motion Threshold",
129 | description="Depth difference ratio to consider as empty space (e.g. 0.1 = 10%)",
130 | default=0.1,
131 | min=0.01,
132 | max=1.0,
133 | )
134 | bpy.types.Scene.da3_use_segmentation = bpy.props.BoolProperty(
135 | name="Use Segmentation",
136 | description="Use YOLO to segment and track objects across frames",
137 | default=False,
138 | )
139 | bpy.types.Scene.da3_segmentation_model = bpy.props.EnumProperty(
140 | items=[
141 | ("yolov8n-seg", "YOLOv8 Nano", "Lowest accuracy"),
142 | ("yolov8l-seg", "YOLOv8 Large", "Balanced speed/accuracy"),
143 | ("yolov8x-seg", "YOLOv8 X-Large", "Best accuracy for v8"),
144 | ("yolo11n-seg", "YOLO11 Nano", "Newest tiny fast model"),
145 | ("yolo11l-seg", "YOLO11 Large", "Newest balanced model"),
146 | ("yolo11x-seg", "YOLO11 X-Large", "Newest best accuracy"),
147 | ("yoloe-11s-seg-pf", "YOLOE Small PF", "YOLOE Small prompt-free"),
148 | ("yoloe-11m-seg-pf", "YOLOE Medium PF", "YOLOE Medium prompt-free"),
149 | ("yoloe-11l-seg-pf", "YOLOE Large PF", "Recognise the most objects"),
150 | ],
151 | name="Seg Model",
152 | description="Select segmentation model",
153 | default="yoloe-11l-seg-pf",
154 | )
155 | bpy.types.Scene.da3_segmentation_conf = bpy.props.FloatProperty(
156 | name="Seg Confidence",
157 | description="Minimum confidence for segmentation",
158 | default=0.25,
159 | min=0.0,
160 | max=1.0,
161 | )
162 | else:
163 | raise ValueError("installation failed.")
164 |
165 | def unregister():
166 | from . import operators, panels
167 | bpy.utils.unregister_class(operators.DownloadModelOperator)
168 | bpy.utils.unregister_class(operators.UnloadModelOperator)
169 | bpy.utils.unregister_class(operators.GeneratePointCloudOperator)
170 | bpy.utils.unregister_class(panels.DA3Panel)
171 | del bpy.types.Scene.da3_input_folder
172 | del bpy.types.Scene.da3_model_name
173 | del bpy.types.Scene.da3_use_metric
174 | del bpy.types.Scene.da3_metric_mode
175 | del bpy.types.Scene.da3_process_res
176 | del bpy.types.Scene.da3_process_res_method
177 | del bpy.types.Scene.da3_use_half_precision
178 | del bpy.types.Scene.da3_use_ray_pose
179 | del bpy.types.Scene.da3_batch_size
180 | del bpy.types.Scene.da3_batch_mode
181 | del bpy.types.Scene.da3_filter_edges
182 | del bpy.types.Scene.da3_min_confidence
183 | del bpy.types.Scene.da3_output_debug_images
184 | del bpy.types.Scene.da3_generate_mesh
185 | del bpy.types.Scene.da3_detect_motion
186 | del bpy.types.Scene.da3_motion_threshold
187 | del bpy.types.Scene.da3_use_segmentation
188 | del bpy.types.Scene.da3_segmentation_model
189 | del bpy.types.Scene.da3_segmentation_conf
190 |
191 | if __name__ == "__main__":
192 | register()
--------------------------------------------------------------------------------
/operators.py:
--------------------------------------------------------------------------------
1 | import bpy
2 | from pathlib import Path
3 | import os
4 | import torch
5 | import numpy as np
6 | import time
7 | import datetime
8 | from .utils import (
9 | run_model,
10 | convert_prediction_to_dict,
11 | combine_base_and_metric,
12 | combine_base_with_metric_depth,
13 | import_point_cloud,
14 | import_mesh_from_depth,
15 | create_cameras,
16 | align_batches,
17 | compute_motion_scores,
18 | )
19 |
20 | wm = None
21 | total_predicted_time = None
22 | start_time = None
23 | def start_progress_timer(total):
24 | global wm, total_predicted_time, start_time
25 | start_time = time.time()
26 | wm = bpy.context.window_manager
27 | total_predicted_time = total
28 | wm.progress_begin(0, 100)
29 |
30 | # Calculate estimated duration and finish time
31 | minutes = int(total // 60)
32 | seconds = int(total % 60)
33 | if minutes > 0:
34 | duration_str = f"{minutes} minutes {seconds} seconds"
35 | else:
36 | duration_str = f"{seconds} seconds"
37 |
38 | finish_time = datetime.datetime.now() + datetime.timedelta(seconds=total)
39 | finish_str = finish_time.strftime("%H:%M:%S")
40 | print(f"Rough estimated duration: {duration_str}, expected finish at {finish_str}")
41 |
42 | def update_progress_timer(expected_time, stage=""):
43 | global wm, total_predicted_time, start_time
44 | if not total_predicted_time or total_predicted_time <= 0:
45 | print("Warning: total_predicted_time is zero or negative, cannot update progress.")
46 | return
47 | portion = expected_time / total_predicted_time * 100
48 | wm.progress_update(int(portion))
49 | print(f"Progress: {stage}, {portion:.2f}%, elapsed: {time.time() - start_time:.2f}s")
50 |
51 | def end_progress_timer():
52 | global wm
53 | if wm is not None:
54 | wm.progress_end()
55 | wm = None
56 |
57 | add_on_path = Path(__file__).parent
58 | MODELS_DIR = os.path.join(add_on_path, 'models')
59 | _URLS = {
60 | 'da3-small': "https://huggingface.co/depth-anything/DA3-SMALL/resolve/main/model.safetensors",
61 | 'da3-base': "https://huggingface.co/depth-anything/DA3-BASE/resolve/main/model.safetensors",
62 | 'da3-large': "https://huggingface.co/depth-anything/DA3-LARGE/resolve/main/model.safetensors",
63 | 'da3-giant': "https://huggingface.co/depth-anything/DA3-GIANT/resolve/main/model.safetensors",
64 | "da3metric-large": "https://huggingface.co/depth-anything/DA3METRIC-LARGE/resolve/main/model.safetensors",
65 | "da3mono-large": "https://huggingface.co/depth-anything/DA3MONO-LARGE/resolve/main/model.safetensors",
66 | "da3nested-giant-large": "https://huggingface.co/depth-anything/DA3NESTED-GIANT-LARGE/resolve/main/model.safetensors",
67 |
68 | "yolov8n-seg": "https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8n-seg.pt",
69 | "yolov8s-seg": "https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8s-seg.pt",
70 | "yolov8m-seg": "https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8m-seg.pt",
71 | "yolov8l-seg": "https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8l-seg.pt",
72 | "yolov8x-seg": "https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8x-seg.pt",
73 | "yolo11n-seg": "https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n-seg.pt",
74 | "yolo11s-seg": "https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11s-seg.pt",
75 | "yolo11m-seg": "https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11m-seg.pt",
76 | "yolo11l-seg": "https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11l-seg.pt",
77 | "yolo11x-seg": "https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11x-seg.pt",
78 | "yoloe-11s-seg-pf": "https://github.com/ultralytics/assets/releases/download/v8.3.0/yoloe-11s-seg-pf.pt",
79 | "yoloe-11m-seg-pf": "https://github.com/ultralytics/assets/releases/download/v8.3.0/yoloe-11m-seg-pf.pt",
80 | "yoloe-11l-seg-pf": "https://github.com/ultralytics/assets/releases/download/v8.3.0/yoloe-11l-seg-pf.pt",
81 | }
82 | model = None
83 | current_model_name = None
84 |
85 | def get_model_path(model_name):
86 | return os.path.join(MODELS_DIR, f'{model_name}.safetensors')
87 |
88 | def display_VRAM_usage(stage: str, include_peak=False):
89 | if torch.cuda.is_available():
90 | allocated = torch.cuda.memory_allocated() / 1024**2
91 | free, total = torch.cuda.mem_get_info()
92 | free_mb = free / 1024**2
93 | total_mb = total / 1024**2
94 | msg = f"VRAM {stage}: {allocated:.1f} MB (free: {free_mb:.1f} MB / {total_mb:.1f} MB)"
95 | if include_peak:
96 | peak = torch.cuda.max_memory_allocated() / 1024**2
97 | msg += f" (peak: {peak:.1f} MB)"
98 | print(msg)
99 |
100 |
101 | def get_model(model_name):
102 | global model, current_model_name
103 | if model is None or current_model_name != model_name:
104 | from depth_anything_3.api import DepthAnything3
105 | if torch.cuda.is_available():
106 | torch.cuda.reset_peak_memory_stats()
107 | display_VRAM_usage(f"before loading {model_name}")
108 | model = DepthAnything3(model_name=model_name)
109 | model_path = get_model_path(model_name)
110 | if os.path.exists(model_path):
111 | from safetensors.torch import load_file
112 | weight = load_file(model_path)
113 | model.load_state_dict(weight, strict=False)
114 | else:
115 | raise FileNotFoundError(f"Model file {model_name} not found. Please download it first.")
116 | device = "cuda" if torch.cuda.is_available() else "cpu"
117 | model.to(device)
118 | model.eval()
119 | current_model_name = model_name
120 | display_VRAM_usage(f"after loading {model_name}", include_peak=True)
121 | return model
122 |
123 | def unload_current_model():
124 | global model, current_model_name
125 | if model is not None:
126 | display_VRAM_usage("before unload")
127 | # Drop references so PyTorch can free memory
128 | del model
129 | model = None
130 | current_model_name = None
131 | if torch.cuda.is_available():
132 | torch.cuda.empty_cache()
133 | display_VRAM_usage("after unload")
134 |
135 | def run_segmentation(image_paths, conf_threshold=0.25, model_name="yolo11x-seg"):
136 | print(f"Loading {model_name} model...")
137 | display_VRAM_usage("before loading YOLO")
138 | try:
139 | from ultralytics import YOLO
140 | except ImportError:
141 | print("Error: ultralytics not installed. Please install it to use segmentation.")
142 | return None, None
143 |
144 | # Use selected model
145 | # model_name passed as argument
146 | model_path = os.path.join(MODELS_DIR, f"{model_name}.pt")
147 |
148 | if not os.path.exists(model_path):
149 | print(f"Downloading {model_name} to {model_path}...")
150 | url = _URLS.get(model_name, "")
151 | if not url:
152 | print(f"Error: No URL known for {model_name}. Please download {model_name}.pt manually to {MODELS_DIR}")
153 | return None, None
154 |
155 | try:
156 | torch.hub.download_url_to_file(url, model_path)
157 | except Exception as e:
158 | print(f"Failed to download {model_name}: {e}")
159 | return None, None
160 |
161 | # Load model from specific path
162 | seg_model = YOLO(model_path)
163 | display_VRAM_usage("after loading YOLO", include_peak=True)
164 |
165 | print(f"Running segmentation on {len(image_paths)} images...")
166 |
167 | # Run tracking
168 | # persist=True is important for video tracking
169 | # stream=True returns a generator, good for memory
170 | results = seg_model.track(source=image_paths, conf=conf_threshold, persist=True, stream=True, verbose=False)
171 |
172 | segmentation_data = []
173 |
174 | for i, r in enumerate(results):
175 | # r is a Results object
176 | # We need masks and track IDs
177 | frame_data = {
178 | "masks": [],
179 | "ids": [],
180 | "classes": [],
181 | "orig_shape": r.orig_shape
182 | }
183 |
184 | if r.masks is not None:
185 | # masks.data is a torch tensor of masks [N, H, W]
186 | masks = r.masks.data.cpu().numpy()
187 |
188 | # Crop masks to remove letterbox padding (YOLO pads to multiple of 32)
189 | # This ensures aspect ratio matches original image before we resize later
190 | h_orig, w_orig = r.orig_shape
191 | if len(masks.shape) == 3:
192 | _, h_mask, w_mask = masks.shape
193 |
194 | # Calculate scale factor that was used to fit image into mask
195 | scale = min(w_mask / w_orig, h_mask / h_orig)
196 |
197 | # Compute expected dimensions of the valid image area in the mask
198 | new_w = int(round(w_orig * scale))
199 | new_h = int(round(h_orig * scale))
200 |
201 | # Compute start offsets (centering)
202 | x_off = (w_mask - new_w) // 2
203 | y_off = (h_mask - new_h) // 2
204 |
205 | # Crop
206 | masks = masks[:, y_off : y_off + new_h, x_off : x_off + new_w]
207 |
208 | # Fix edge artifacts (sometimes edges are black)
209 | if len(masks.shape) == 3:
210 | for k in range(masks.shape[0]):
211 | m = masks[k]
212 | h_m, w_m = m.shape
213 |
214 | # Fix bottom edge
215 | if h_m >= 3:
216 | if np.max(m[-1, :]) == 0:
217 | if np.max(m[-2, :]) == 0:
218 | m[-2:, :] = m[-3, :]
219 | else:
220 | m[-1, :] = m[-2, :]
221 |
222 | # Fix top edge
223 | if h_m >= 3:
224 | if np.max(m[0, :]) == 0:
225 | if np.max(m[1, :]) == 0:
226 | m[:2, :] = m[2, :]
227 | else:
228 | m[0, :] = m[1, :]
229 |
230 | # Fix left edge
231 | if w_m >= 3:
232 | if np.max(m[:, 0]) == 0:
233 | if np.max(m[:, 1]) == 0:
234 | m[:, :2] = m[:, 2:3]
235 | else:
236 | m[:, 0] = m[:, 1]
237 |
238 | frame_data["masks"] = masks
239 |
240 | if r.boxes is not None and r.boxes.id is not None:
241 | frame_data["ids"] = r.boxes.id.int().cpu().numpy()
242 | else:
243 | # If no tracking IDs (e.g. first frame or lost track), use -1 or generate new ones?
244 | # If tracking is on, it should return IDs. If not, maybe just detection.
245 | # But we requested track().
246 | # If no ID, maybe it's a new object that wasn't tracked?
247 | # Let's use -1 for untracked
248 | if r.boxes is not None:
249 | frame_data["ids"] = np.full(len(r.boxes), -1, dtype=int)
250 |
251 | if r.boxes is not None:
252 | frame_data["classes"] = r.boxes.cls.int().cpu().numpy()
253 |
254 | segmentation_data.append(frame_data)
255 |
256 | if i % 10 == 0:
257 | print(f"Segmented {i+1}/{len(image_paths)} images")
258 |
259 | display_VRAM_usage("after YOLO inference", include_peak=True)
260 |
261 | # Get class names
262 | class_names = seg_model.names
263 |
264 | # Cleanup
265 | del seg_model
266 | if torch.cuda.is_available():
267 | torch.cuda.empty_cache()
268 | display_VRAM_usage("after unloading YOLO")
269 |
270 | return segmentation_data, class_names
271 |
272 | class DownloadModelOperator(bpy.types.Operator):
273 | bl_idname = "da3.download_model"
274 | bl_label = "Download DA3 Model"
275 |
276 | # NEW: optional override for which model to download
277 | da3_override_model_name: bpy.props.StringProperty(
278 | name="Override Model Name",
279 | description="If set, download this model instead of the one selected in the scene",
280 | default="",
281 | )
282 |
283 | def execute(self, context):
284 | model_name = self.da3_override_model_name or context.scene.da3_model_name
285 | model_path = get_model_path(model_name)
286 |
287 | if os.path.exists(model_path):
288 | self.report({'INFO'}, f"Model {model_name} already downloaded.")
289 | return {'FINISHED'}
290 |
291 | if model_name not in _URLS:
292 | self.report({'ERROR'}, f"Unknown model: {model_name}")
293 | return {'CANCELLED'}
294 |
295 | try:
296 | print(f"Downloading model {model_name}...")
297 | os.makedirs(MODELS_DIR, exist_ok=True)
298 | torch.hub.download_url_to_file(_URLS[model_name], model_path)
299 | self.report({'INFO'}, f"Model {model_name} downloaded successfully.")
300 | except Exception as e:
301 | self.report({'ERROR'}, f"Failed to download model {model_name}: {e}")
302 | return {'CANCELLED'}
303 | return {'FINISHED'}
304 |
305 | @classmethod
306 | def poll(cls, context):
307 | # Allow the button to be clicked; existence is checked in execute()
308 | return True
309 | # model_name = context.scene.da3_model_name
310 | # model_path = get_model_path(model_name)
311 | # return not os.path.exists(model_path)
312 |
313 |
314 | class UnloadModelOperator(bpy.types.Operator):
315 | bl_idname = "da3.unload_model"
316 | bl_label = "Unload Model"
317 |
318 | def execute(self, context):
319 | unload_current_model()
320 | self.report({'INFO'}, "Model unloaded and VRAM freed.")
321 | return {'FINISHED'}
322 |
323 | @classmethod
324 | def poll(cls, context):
325 | # Enable if a model is loaded
326 | return model is not None
327 |
328 |
329 | class GeneratePointCloudOperator(bpy.types.Operator):
330 | bl_idname = "da3.generate_point_cloud"
331 | bl_label = "Generate Point Cloud"
332 |
333 | def execute(self, context):
334 | input_folder = context.scene.da3_input_folder
335 | base_model_name = context.scene.da3_model_name
336 | use_metric = context.scene.da3_use_metric
337 | metric_mode = getattr(context.scene, "da3_metric_mode", "scale_base")
338 | use_ray_pose = getattr(context.scene, "da3_use_ray_pose", False)
339 | process_res = context.scene.da3_process_res
340 | process_res_method = context.scene.da3_process_res_method
341 | use_half_precision = context.scene.da3_use_half_precision
342 | filter_edges = getattr(context.scene, "da3_filter_edges", True)
343 | min_confidence = getattr(context.scene, "da3_min_confidence", 0.5)
344 | output_debug_images = getattr(context.scene, "da3_output_debug_images", False)
345 | generate_mesh = getattr(context.scene, "da3_generate_mesh", False)
346 |
347 | if process_res % 14 != 0:
348 | self.report({'ERROR'}, "Process resolution must be a multiple of 14.")
349 | return {'CANCELLED'}
350 |
351 | if not input_folder or not os.path.isdir(input_folder):
352 | self.report({'ERROR'}, "Please select a valid input folder.")
353 | return {'CANCELLED'}
354 |
355 | # Get image paths
356 | import glob
357 | image_paths = sorted(glob.glob(os.path.join(input_folder, "*.[jJpP][pPnN][gG]")))
358 | if not image_paths:
359 | self.report({'ERROR'}, "No images found in the input folder.")
360 | return {'CANCELLED'}
361 |
362 | print(f"Total images: {len(image_paths)}")
363 |
364 | batch_mode = context.scene.da3_batch_mode
365 | batch_size = context.scene.da3_batch_size
366 | if batch_mode == "skip_frames" and len(image_paths) > batch_size:
367 | import numpy as np
368 | indices = np.linspace(0, len(image_paths) - 1, batch_size, dtype=int)
369 | image_paths = [image_paths[i] for i in indices]
370 | # For overlap modes and ignore_batch_size, use all images
371 |
372 | self.report({'INFO'}, f"Processing {len(image_paths)} images...")
373 |
374 | # Initialize progress bar
375 | LoadModelTime = 9.2 # seconds
376 | AlignBatchesTime = 0.29
377 | AddImagePointsTime = 0.27
378 | BatchTimePerImage = 4.9 # it's actually quadratic but close enough
379 | MetricLoadModelTime = 19.25
380 | MetricBatchTimePerImage = 0.62
381 | MetricCombineTime = 0.12
382 | if current_model_name == base_model_name:
383 | LoadModelTime = 0
384 | needs_alignment = batch_mode in ("last_frame_overlap", "first_last_overlap")
385 | BaseTimeEstimate = LoadModelTime + BatchTimePerImage * len(image_paths)
386 | if needs_alignment:
387 | BaseTimeEstimate += AlignBatchesTime
388 | if use_metric:
389 | MetricTimeEstimate = BaseTimeEstimate + MetricLoadModelTime
390 | if metric_mode == "scale_base":
391 | MetricTimeEstimate += MetricBatchTimePerImage * batch_size
392 | else:
393 | MetricTimeEstimate += MetricBatchTimePerImage * len(image_paths)
394 | AfterCombineTimeEstimate = MetricTimeEstimate
395 | if needs_alignment:
396 | AfterCombineTimeEstimate += AlignBatchesTime
397 | AfterCombineTimeEstimate += MetricCombineTime
398 | else:
399 | MetricTimeEstimate = BaseTimeEstimate
400 | AfterCombineTimeEstimate = BaseTimeEstimate
401 | if needs_alignment:
402 | AfterCombineTimeEstimate += AlignBatchesTime
403 | TotalTimeEstimate = AfterCombineTimeEstimate + AddImagePointsTime*len(image_paths)
404 | start_progress_timer(TotalTimeEstimate)
405 | self.report({'INFO'}, "Starting point cloud generation...")
406 |
407 | try:
408 | # 0) Run Segmentation if enabled
409 | all_segmentation_data = None
410 | segmentation_class_names = None
411 | if getattr(context.scene, "da3_use_segmentation", False):
412 | self.report({'INFO'}, "Running segmentation...")
413 | # Ensure DA3 model is unloaded
414 | unload_current_model()
415 |
416 | seg_conf = getattr(context.scene, "da3_segmentation_conf", 0.25)
417 | seg_model_name = getattr(context.scene, "da3_segmentation_model", "yolo11x-seg")
418 | all_segmentation_data, segmentation_class_names = run_segmentation(image_paths, conf_threshold=seg_conf, model_name=seg_model_name)
419 |
420 | if all_segmentation_data is None:
421 | self.report({'WARNING'}, "Segmentation failed or cancelled. Proceeding without segmentation.")
422 | else:
423 | self.report({'INFO'}, "Segmentation complete.")
424 | update_progress_timer(0, "Segmentation complete") # Timer doesn't account for seg yet
425 |
426 | # 1) run base model
427 | self.report({'INFO'}, f"Loading {base_model_name} model...")
428 | base_model = get_model(base_model_name)
429 | update_progress_timer(LoadModelTime, "Loaded base model")
430 | self.report({'INFO'}, "Running base model inference...")
431 |
432 | all_base_predictions = []
433 |
434 | if batch_mode in {"last_frame_overlap", "first_last_overlap"}:
435 | # Process in overlapping batches
436 | if batch_mode == "last_frame_overlap":
437 | # Existing scheme: last frame of previous batch overlaps with first of next
438 | step = batch_size - 1
439 | num_batches = (len(image_paths) + step - 1) // step # Ceiling division
440 | for batch_idx, start_idx in enumerate(range(0, len(image_paths), step)):
441 | end_idx = min(start_idx + batch_size, len(image_paths))
442 | batch_paths = image_paths[start_idx:end_idx]
443 | batch_indices = list(range(start_idx, end_idx))
444 | print(f"Batch {batch_idx + 1}/{num_batches}:")
445 | prediction = run_model(batch_paths, base_model, process_res, process_res_method, use_half=use_half_precision, use_ray_pose=use_ray_pose)
446 | update_progress_timer(LoadModelTime + end_idx * BatchTimePerImage, f"Base batch {batch_idx + 1}")
447 | all_base_predictions.append((prediction, batch_indices))
448 | else:
449 | # New scheme: (0..9) (0, 9, 10..17) (10, 17, 18..25)
450 | N = len(image_paths)
451 | if batch_size < 3:
452 | step = 1
453 | else:
454 | step = batch_size - 2
455 |
456 | # First batch
457 | start = 0
458 | end = min(batch_size, N)
459 | batch_indices = list(range(start, end))
460 | current_new_indices = batch_indices
461 |
462 | remaining_start = end
463 |
464 | if step > 0:
465 | num_batches = 1 + max(0, (N - end + step - 1) // step)
466 | else:
467 | num_batches = (N + batch_size - 1) // batch_size
468 |
469 | batch_idx = 0
470 | while True:
471 | batch_paths = [image_paths[i] for i in batch_indices]
472 | print(f"Batch {batch_idx + 1}/{num_batches}:")
473 | prediction = run_model(batch_paths, base_model, process_res, process_res_method, use_half=use_half_precision, use_ray_pose=use_ray_pose)
474 | end_idx = batch_indices[-1] + 1
475 | update_progress_timer(LoadModelTime + end_idx * BatchTimePerImage, f"Base batch {batch_idx + 1}")
476 | all_base_predictions.append((prediction, batch_indices.copy()))
477 |
478 | if remaining_start >= N:
479 | break
480 |
481 | # Determine overlap frames from the "new" frames of the current batch
482 | overlap_indices = [current_new_indices[0], current_new_indices[-1]]
483 | # Remove duplicates if any (e.g. if only 1 new frame)
484 | if overlap_indices[0] == overlap_indices[1]:
485 | overlap_indices = [overlap_indices[0]]
486 |
487 | next_end = min(remaining_start + step, N)
488 | next_new_indices = list(range(remaining_start, next_end))
489 |
490 | batch_indices = overlap_indices + next_new_indices
491 | current_new_indices = next_new_indices
492 |
493 | remaining_start = next_end
494 | batch_idx += 1
495 | else:
496 | prediction = run_model(image_paths, base_model, process_res, process_res_method, use_half=use_half_precision, use_ray_pose=use_ray_pose)
497 | update_progress_timer(LoadModelTime + len(image_paths) * BatchTimePerImage, "Base batch complete")
498 | all_base_predictions.append((prediction, list(range(len(image_paths)))))
499 |
500 | update_progress_timer(BaseTimeEstimate, "Base inference complete")
501 |
502 | # 2) if metric enabled and weights available:
503 | all_metric_predictions = []
504 | metric_available = False
505 |
506 | if use_metric:
507 | metric_path = get_model_path("da3metric-large")
508 | if os.path.exists(metric_path):
509 | metric_available = True
510 | # free base model from VRAM before loading metric
511 | self.report({'INFO'}, "Unloading base model and loading metric model...")
512 | base_model = None
513 | unload_current_model()
514 |
515 | metric_model = get_model("da3metric-large")
516 | update_progress_timer(BaseTimeEstimate + MetricLoadModelTime, "Loaded metric model")
517 | self.report({'INFO'}, "Running metric model inference...")
518 |
519 | if metric_mode == "scale_base":
520 | # In scale_base mode, run **one** metric batch over all images.
521 | N = len(image_paths)
522 | start = 0
523 | end = min(batch_size, N)
524 | batch_indices = list(range(start, end))
525 | batch_paths = [image_paths[i] for i in batch_indices]
526 | prediction = run_model(
527 | batch_paths,
528 | metric_model,
529 | process_res,
530 | process_res_method,
531 | use_half=use_half_precision,
532 | use_ray_pose=use_ray_pose,
533 | )
534 | update_progress_timer(BaseTimeEstimate + MetricLoadModelTime + end * MetricBatchTimePerImage, "Metric batch complete")
535 | all_metric_predictions.append((prediction, batch_indices.copy()))
536 | else:
537 | # For other metric modes, keep previous batching behaviour
538 | if batch_mode in {"last_frame_overlap", "first_last_overlap"}:
539 | # Process in overlapping batches for metric too (mirror base logic)
540 | if batch_mode == "last_frame_overlap":
541 | step = batch_size - 1
542 | num_batches = (len(image_paths) + step - 1) // step
543 | for batch_idx, start_idx in enumerate(range(0, len(image_paths), step)):
544 | end_idx = min(start_idx + batch_size, len(image_paths))
545 | batch_paths = image_paths[start_idx:end_idx]
546 | batch_indices = list(range(start_idx, end_idx))
547 | print(f"Batch {batch_idx + 1}/{num_batches}:")
548 | prediction = run_model(batch_paths, metric_model, process_res, process_res_method, use_half=use_half_precision, use_ray_pose=use_ray_pose)
549 | update_progress_timer(BaseTimeEstimate + MetricLoadModelTime + end_idx * MetricBatchTimePerImage, f"Metric batch {batch_idx + 1}")
550 | all_metric_predictions.append((prediction, batch_indices))
551 | else:
552 | N = len(image_paths)
553 | if batch_size < 3:
554 | step = 1
555 | else:
556 | step = batch_size - 2
557 |
558 | start = 0
559 | end = min(batch_size, N)
560 | batch_indices = list(range(start, end))
561 | current_new_indices = batch_indices
562 |
563 | remaining_start = end
564 |
565 | if step > 0:
566 | num_batches = 1 + max(0, (N - end + step - 1) // step)
567 | else:
568 | num_batches = (N + batch_size - 1) // batch_size
569 |
570 | batch_idx = 0
571 | while True:
572 | batch_paths = [image_paths[i] for i in batch_indices]
573 | print(f"Batch {batch_idx + 1}/{num_batches}:")
574 | prediction = run_model(batch_paths, metric_model, process_res, process_res_method, use_half=use_half_precision, use_ray_pose=use_ray_pose)
575 | end_idx = batch_indices[-1] + 1
576 | update_progress_timer(BaseTimeEstimate + MetricLoadModelTime + end_idx * MetricBatchTimePerImage, f"Metric batch {batch_idx + 1}")
577 | all_metric_predictions.append((prediction, batch_indices.copy()))
578 |
579 | if remaining_start >= N:
580 | break
581 |
582 | overlap_indices = [current_new_indices[0], current_new_indices[-1]]
583 | if overlap_indices[0] == overlap_indices[1]:
584 | overlap_indices = [overlap_indices[0]]
585 |
586 | next_end = min(remaining_start + step, N)
587 | next_new_indices = list(range(remaining_start, next_end))
588 |
589 | batch_indices = overlap_indices + next_new_indices
590 | current_new_indices = next_new_indices
591 |
592 | remaining_start = next_end
593 | batch_idx += 1
594 | else:
595 | # Non-overlapping full batch
596 | prediction = run_model(image_paths, metric_model, process_res, process_res_method, use_half=use_half_precision, use_ray_pose=use_ray_pose)
597 | all_metric_predictions.append((prediction, list(range(len(image_paths)))))
598 | update_progress_timer(BaseTimeEstimate + MetricLoadModelTime + len(image_paths) * MetricBatchTimePerImage, "Metric batch complete")
599 | metric_model = None
600 | unload_current_model()
601 | else:
602 | self.report({'WARNING'}, "Metric model not downloaded; using non-metric depth only.")
603 |
604 |
605 | update_progress_timer(MetricTimeEstimate, "Metric inference complete")
606 | # Align base batches. Metric is **not** aligned in scale_base mode.
607 | if batch_mode in {"last_frame_overlap", "first_last_overlap"}:
608 | aligned_base_predictions = align_batches(all_base_predictions)
609 | # Metric depth is absolute, and has no camera poses, so alignment between batches is less important (and not implemented yet).
610 | if metric_available:
611 | aligned_metric_predictions = [p[0] for p in all_metric_predictions]
612 | else:
613 | aligned_base_predictions = [p[0] for p in all_base_predictions]
614 | if metric_available:
615 | aligned_metric_predictions = [p[0] for p in all_metric_predictions]
616 | update_progress_timer(MetricTimeEstimate + AlignBatchesTime, "Align batches complete")
617 |
618 | # Create or get a collection named after the folder
619 | folder_name = os.path.basename(os.path.normpath(input_folder))
620 | scene = context.scene
621 | collections = bpy.data.collections
622 |
623 | # Create parent collection
624 | parent_col = collections.new(folder_name)
625 | scene.collection.children.link(parent_col)
626 |
627 | # Combine the base and metric predictions
628 | if metric_available:
629 | all_combined_predictions = combine_base_and_metric(aligned_base_predictions, aligned_metric_predictions)
630 | else:
631 | all_combined_predictions = aligned_base_predictions
632 | update_progress_timer(AfterCombineTimeEstimate, "Combined predictions complete")
633 |
634 | # Detect motion
635 | detect_motion = getattr(context.scene, "da3_detect_motion", False)
636 | if detect_motion:
637 | motion_threshold = getattr(context.scene, "da3_motion_threshold", 0.1)
638 | self.report({'INFO'}, "Detecting motion...")
639 | compute_motion_scores(all_combined_predictions, threshold_ratio=motion_threshold)
640 | # update_progress_timer(AfterCombineTimeEstimate + 1.0, "Motion detection complete")
641 |
642 | # Add a point cloud for each batch
643 | for batch_number, batch_prediction in enumerate(all_combined_predictions):
644 | batch_indices = all_base_predictions[batch_number][1]
645 | batch_paths = [image_paths[j] for j in batch_indices]
646 |
647 | # Extract segmentation data for this batch
648 | batch_segmentation = None
649 | if all_segmentation_data:
650 | batch_segmentation = [all_segmentation_data[j] for j in batch_indices]
651 |
652 | combined_predictions = convert_prediction_to_dict(
653 | batch_prediction,
654 | batch_paths,
655 | output_debug_images=output_debug_images,
656 | segmentation_data=batch_segmentation,
657 | class_names=segmentation_class_names
658 | )
659 |
660 | # Create batch collection
661 | batch_col_name = f"{folder_name}_Batch_{batch_number+1}"
662 | batch_col = collections.new(batch_col_name)
663 | parent_col.children.link(batch_col)
664 |
665 | if generate_mesh:
666 | import_mesh_from_depth(combined_predictions, collection=batch_col, filter_edges=filter_edges, min_confidence=min_confidence, global_indices=batch_indices)
667 | else:
668 | import_point_cloud(combined_predictions, collection=batch_col, filter_edges=filter_edges, min_confidence=min_confidence, global_indices=batch_indices)
669 |
670 | create_cameras(combined_predictions, collection=batch_col)
671 | end_idx = batch_indices[-1] + 1
672 | update_progress_timer(AfterCombineTimeEstimate + AddImagePointsTime * end_idx, f"Added batch {batch_number + 1} to Blender")
673 |
674 | update_progress_timer(TotalTimeEstimate, "Point cloud generation complete")
675 | end_progress_timer()
676 | self.report({'INFO'}, "Point cloud generation complete.")
677 | except Exception as e:
678 | end_progress_timer()
679 | import traceback
680 | print("DA3 ERROR while generating point cloud:")
681 | traceback.print_exc()
682 | base_model = None
683 | metric_model = None
684 | base_prediction = None
685 | metric_prediction = None
686 | combined_prediction = None
687 | combined_predictions = None
688 | if torch.cuda.is_available():
689 | try:
690 | torch.cuda.empty_cache() # Force free any pending allocations
691 | except Exception as e:
692 | print(f"Warning: Failed to empty CUDA cache: {e}")
693 | import gc
694 | gc.collect() # Force garbage collection
695 | unload_current_model() # Free VRAM on error
696 | self.report({'ERROR'}, f"Failed to generate point cloud: {e}")
697 | return {'CANCELLED'}
698 | return {'FINISHED'}
699 |
700 | @classmethod
701 | def poll(cls, context):
702 | model_name = context.scene.da3_model_name
703 | model_path = get_model_path(model_name)
704 | return os.path.exists(model_path) and context.scene.da3_input_folder != ""
--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
1 | import glob
2 | import os
3 | import numpy as np
4 | import bpy
5 | from mathutils import Matrix
6 | import math
7 | import torch
8 | import cv2
9 |
10 | from depth_anything_3.utils.alignment import (
11 | compute_alignment_mask,
12 | compute_sky_mask,
13 | least_squares_scale_scalar,
14 | sample_tensor_for_quantile,
15 | set_sky_regions_to_max_depth,
16 | )
17 |
18 | def unproject_depth_map_to_point_map(depth, extrinsics, intrinsics):
19 | N, H, W = depth.shape
20 | world_points = np.zeros((N, H, W, 3), dtype=np.float32)
21 | for i in range(N):
22 | u, v = np.meshgrid(np.arange(W), np.arange(H))
23 | pixels = np.stack([u, v, np.ones((H, W))], axis=-1).reshape(-1, 3) # HW, 3
24 | invK = np.linalg.inv(intrinsics[i])
25 | rays = (invK @ pixels.T).T # HW, 3
26 | depths = depth[i].reshape(-1) # HW
27 | cam_points = rays * depths[:, np.newaxis] # HW, 3
28 | cam_points_hom = np.hstack([cam_points, np.ones((len(depths), 1))]) # HW, 4
29 | E = np.vstack([extrinsics[i], [0, 0, 0, 1]]) # 4, 4
30 | cam_to_world = np.linalg.inv(E)
31 | world_points_hom = (cam_to_world @ cam_points_hom.T).T # HW, 4
32 | world_points_i = world_points_hom[:, :3] / world_points_hom[:, 3:4]
33 | world_points[i] = world_points_i.reshape(H, W, 3)
34 | return world_points
35 |
36 | def run_model(image_paths, model, process_res=504, process_res_method="upper_bound_resize", use_half=False, use_ray_pose=False):
37 | print(f"Processing {len(image_paths)} images")
38 | if torch.cuda.is_available():
39 | torch.cuda.reset_peak_memory_stats()
40 | allocated = torch.cuda.memory_allocated() / 1024**2
41 | free, total = torch.cuda.mem_get_info()
42 | free_mb = free / 1024**2
43 | total_mb = total / 1024**2
44 | print(f"VRAM before inference: {allocated:.1f} MB (free: {free_mb:.1f} MB / {total_mb:.1f} MB)")
45 | import torch.cuda.amp as amp
46 | if use_half:
47 | with amp.autocast():
48 | prediction = model.inference(image_paths, process_res=process_res, process_res_method=process_res_method, use_ray_pose=use_ray_pose)
49 | else:
50 | prediction = model.inference(image_paths, process_res=process_res, process_res_method=process_res_method, use_ray_pose=use_ray_pose)
51 | if torch.cuda.is_available():
52 | peak = torch.cuda.max_memory_allocated() / 1024**2
53 | allocated = torch.cuda.memory_allocated() / 1024**2
54 | free, total = torch.cuda.mem_get_info()
55 | free_mb = free / 1024**2
56 | total_mb = total / 1024**2
57 | print(f"VRAM after inference: {allocated:.1f} MB (peak: {peak:.1f} MB, free: {free_mb:.1f} MB / {total_mb:.1f} MB)")
58 | # DEBUG: inspect prediction object for this model
59 | print("DEBUG prediction type:", type(prediction))
60 | if hasattr(prediction, "__dict__"):
61 | print("DEBUG prediction.__dict__ keys:", list(prediction.__dict__.keys()))
62 | else:
63 | print("DEBUG dir(prediction):", dir(prediction))
64 | return prediction
65 |
66 | # Helper functions for matrix operations and type conversion
67 | def _to_tensor(x):
68 | if isinstance(x, np.ndarray):
69 | return torch.from_numpy(x)
70 | return x
71 |
72 | def _to_numpy(x):
73 | if isinstance(x, torch.Tensor):
74 | return x.detach().cpu().float().numpy()
75 | return np.array(x)
76 |
77 | def _extrinsic_to_4x4_torch(ext_3x4):
78 | if ext_3x4.shape == (3, 4):
79 | last_row = torch.tensor([0, 0, 0, 1], device=ext_3x4.device, dtype=ext_3x4.dtype)
80 | return torch.cat([ext_3x4, last_row.unsqueeze(0)], dim=0)
81 | return ext_3x4
82 |
83 | def _invert_4x4_torch(T):
84 | R = T[:3, :3]
85 | t = T[:3, 3]
86 | T_inv = torch.eye(4, device=T.device, dtype=T.dtype)
87 | T_inv[:3, :3] = R.T
88 | T_inv[:3, 3] = -R.T @ t
89 | return T_inv
90 |
91 | # Transform and scale each batch to align with previous batch
92 | # all_predictions is list of (prediction_for_batch, frame_indices_for_batch)
93 | # prediction_for_batch is the result returned by run_model, and is class depth_anything_3.specs.Prediction
94 | # and has these fields: ['depth', 'is_metric', 'sky', 'conf', 'extrinsics', 'intrinsics', 'processed_images', 'gaussians', 'aux', 'scale_factor']
95 | def align_batches(all_predictions):
96 | if not all_predictions:
97 | return []
98 |
99 | # result, a list of predictions with aligned extrinsics and depths
100 | aligned_predictions = []
101 |
102 | # First batch doesn't need aligning
103 | first_pred, first_indices = all_predictions[0]
104 | aligned_predictions.append(first_pred)
105 | prev_pred = first_pred
106 | prev_indices = first_indices
107 |
108 | # Loop through the rest of the batches
109 | for i in range(1, len(all_predictions)):
110 | curr_pred_orig, curr_indices = all_predictions[i]
111 |
112 | # Shallow copy to avoid modifying original
113 | import copy
114 | curr_pred = copy.copy(curr_pred_orig)
115 |
116 | curr_depth = _to_tensor(curr_pred.depth).float() # depth of every pixel in every image in the batch
117 | curr_conf = _to_tensor(curr_pred.conf).float() # confidence of depth of every pixel in every image in the batch, range 0 to more than 1
118 | curr_ext = _to_tensor(curr_pred.extrinsics) # camera position and rotation for every image in the batch (or None for Metric/Mono model)
119 | if curr_ext is not None:
120 | curr_ext = curr_ext.float()
121 |
122 | # Alignment for Metric/Mono model is not supported yet. TODO: still align the depth based on overlap images
123 | if curr_ext is None:
124 | print(f"Batch {i} has no extrinsics, skipping alignment.")
125 | aligned_predictions.append(curr_pred)
126 | prev_pred = curr_pred
127 | prev_indices = curr_indices
128 | continue
129 |
130 | # depths, depth confidences, and camera poses for all images in the previous batch
131 | prev_depth = _to_tensor(prev_pred.depth).float()
132 | prev_conf = _to_tensor(prev_pred.conf).float()
133 | prev_ext = _to_tensor(prev_pred.extrinsics).float()
134 |
135 | # Find overlapping indices
136 | common_indices = set(prev_indices) & set(curr_indices)
137 | if not common_indices:
138 | print(f"Warning: Batch {i} has no overlap with Batch {i-1}. Alignment may be poor.")
139 | aligned_predictions.append(curr_pred)
140 | prev_pred = curr_pred
141 | prev_indices = curr_indices
142 | continue
143 |
144 | # Sort common indices to ensure deterministic order
145 | common_indices = sorted(list(common_indices))
146 |
147 | # Collect valid pixels for depth scaling
148 | valid_prev_depths = []
149 | valid_curr_depths = []
150 |
151 | # Collect transforms for extrinsic alignment
152 | transforms = []
153 |
154 | # for each overlapping frame
155 | for global_idx in common_indices:
156 | # Find local index in prev and curr
157 | idx_prev = prev_indices.index(global_idx)
158 | idx_curr = curr_indices.index(global_idx)
159 |
160 | d_prev = prev_depth[idx_prev] # [H, W] depth of every pixel for this frame in the previous batch
161 | d_curr = curr_depth[idx_curr] # [H, W] depth of every pixel for this frame in the current batch
162 | c_prev = prev_conf[idx_prev] # [H, W] confidence of every pixel for this frame in the previous batch
163 |
164 | # We only want to calculate scale from pixels that aren't sky
165 | # For Metric/Mono/Nested models use the returned sky mask
166 | # For base models there is no sky mask, so assume all pixels are non-sky
167 | non_sky_mask = torch.ones_like(d_prev, dtype=torch.bool) # [H, W]
168 | if hasattr(prev_pred, 'sky') and prev_pred.sky is not None:
169 | non_sky_mask = non_sky_mask & compute_sky_mask(_to_tensor(prev_pred.sky)[idx_prev], threshold=0.3)
170 | if hasattr(curr_pred, 'sky') and curr_pred.sky is not None:
171 | non_sky_mask = non_sky_mask & compute_sky_mask(_to_tensor(curr_pred.sky)[idx_curr], threshold=0.3)
172 |
173 | # Use compute_alignment_mask for robust pixel selection
174 | # Ensure inputs are at least 3D [1, H, W] for the utils
175 | d_prev_3d = d_prev.unsqueeze(0)
176 | d_curr_3d = d_curr.unsqueeze(0)
177 | c_prev_3d = c_prev.unsqueeze(0)
178 | non_sky_mask_3d = non_sky_mask.unsqueeze(0)
179 |
180 | c_prev_ns = c_prev[non_sky_mask] # [num_non_sky_pixels]
181 | if c_prev_ns.numel() > 0:
182 | c_prev_sampled = sample_tensor_for_quantile(c_prev_ns, max_samples=100000) # if there are more than 100,000 non-sky pixels, randomly select 100,000 of them
183 | median_conf = torch.quantile(c_prev_sampled, 0.5) # calculate the median confidence (half the pixels have higher confidence than this, half have lower confidence)
184 |
185 | # DA3 function, mask array is true for pixels that aren't sky and whose confidence is better than half the other non-sky pixels
186 | mask_3d = compute_alignment_mask(
187 | c_prev_3d, non_sky_mask_3d, d_prev_3d, d_curr_3d, median_conf
188 | ) # [1, H, W] boolean mask
189 | mask = mask_3d.squeeze(0) # [H, W]
190 | else:
191 | mask = non_sky_mask # [H, W]
192 |
193 | # make sure there are at least 11 valid pixels (ie. there were originally at least 22 non-sky pixels before we chose the best half)
194 | if mask.sum() > 10:
195 | valid_prev_depths.append(d_prev[mask]) # [num_valid_pixels]
196 | valid_curr_depths.append(d_curr[mask]) # [num_valid_pixels]
197 |
198 | E_prev = _extrinsic_to_4x4_torch(prev_ext[idx_prev]) # 4x4 camera transform matrix for this frame in previous batch
199 | E_curr = _extrinsic_to_4x4_torch(curr_ext[idx_curr]) # 4x4 camera transform matrix for this frame in current batch
200 |
201 | transforms.append((E_prev, E_curr))
202 |
203 | # All overlap frames have now been processed
204 | # Compute global scale factor
205 | if valid_prev_depths:
206 | all_prev = torch.cat(valid_prev_depths) # [total_valid_pixels]
207 | all_curr = torch.cat(valid_curr_depths) # [total_valid_pixels]
208 | # least_squares_scale_scalar(target, source) returns scale such that source * scale ≈ target
209 | # We want curr_depth * scale ≈ prev_depth, so target=all_prev, source=all_curr
210 | scale = least_squares_scale_scalar(all_prev, all_curr)
211 | else:
212 | scale = torch.tensor(1.0) # 1x scale if there were no overlap frames with at least 22 non-sky pixels
213 |
214 | scale_val = float(scale.item())
215 | print(f"Batch {i} alignment: scale={scale_val}")
216 |
217 | # Step 1: Scale depth and extrinsic translations together (like DA3 does)
218 | # This handles all scaling in one place
219 | curr_pred.depth = _to_numpy(curr_depth * scale)
220 | curr_ext[:, :, 3] = curr_ext[:, :, 3] * scale # scale all translations
221 |
222 | # Step 2: Compute rigid alignment transform from first overlap frame
223 | # We want to find T such that: E_curr_scaled @ T ≈ E_prev
224 | # Rearranging: T = inv(E_curr_scaled) @ E_prev
225 | E_prev, E_curr_orig = transforms[0]
226 | E_curr_scaled = _extrinsic_to_4x4_torch(curr_ext[curr_indices.index(common_indices[0])])
227 | T_align = _invert_4x4_torch(E_curr_scaled) @ E_prev
228 |
229 | # Step 3: Apply rigid alignment to all extrinsics
230 | # E_new = E_curr_scaled @ T
231 | new_extrinsics = []
232 | for ext_3x4 in curr_ext:
233 | E_curr = _extrinsic_to_4x4_torch(ext_3x4)
234 | E_new = E_curr @ T_align
235 | new_extrinsics.append(E_new[:3, :4])
236 |
237 | curr_pred.extrinsics = _to_numpy(torch.stack(new_extrinsics))
238 |
239 | # Add the aligned prediction for this batch to the result list
240 | aligned_predictions.append(curr_pred)
241 | prev_pred = curr_pred
242 | prev_indices = curr_indices
243 |
244 | # We've finished all batches, return a list of aligned predictions
245 | return aligned_predictions
246 |
247 | def compute_motion_scores(predictions, threshold_ratio=0.1):
248 | """
249 | Computes a motion score for each pixel based on consistency with other frames.
250 | Score is the number of other frames that see 'empty space' where the point should be.
251 | """
252 | import torch
253 |
254 | # Collect all data
255 | all_depths = []
256 | all_extrinsics = []
257 | all_intrinsics = []
258 | frame_mapping = [] # List of (batch_index, frame_index_in_batch)
259 |
260 | for b_idx, pred in enumerate(predictions):
261 | # Ensure we have tensors
262 | d = _to_tensor(pred.depth).float()
263 | e = _to_tensor(pred.extrinsics).float()
264 | k = _to_tensor(pred.intrinsics).float()
265 |
266 | # Initialize motion attribute on prediction object
267 | if not hasattr(pred, 'motion'):
268 | pred.motion = torch.zeros_like(d)
269 |
270 | for f_idx in range(d.shape[0]):
271 | all_depths.append(d[f_idx])
272 | all_extrinsics.append(e[f_idx])
273 | all_intrinsics.append(k[f_idx])
274 | frame_mapping.append((b_idx, f_idx))
275 |
276 | if not all_depths:
277 | return
278 |
279 | # Stack
280 | depths = torch.stack(all_depths) # [N, H, W]
281 | extrinsics = torch.stack(all_extrinsics) # [N, 3, 4]
282 | intrinsics = torch.stack(all_intrinsics) # [N, 3, 3]
283 |
284 | N, H, W = depths.shape
285 | device = depths.device
286 |
287 | print(f"Computing motion scores for {N} frames...")
288 |
289 | # Construct 4x4 matrices
290 | Es = torch.eye(4, device=device).unsqueeze(0).repeat(N, 1, 1)
291 | Es[:, :3, :4] = extrinsics
292 | Es_inv = torch.linalg.inv(Es)
293 |
294 | # Pixel grid
295 | y, x = torch.meshgrid(torch.arange(H, device=device), torch.arange(W, device=device), indexing='ij')
296 | pixels_hom = torch.stack([x.flatten(), y.flatten(), torch.ones_like(x.flatten())], dim=0).float() # [3, HW]
297 |
298 | # Loop over source frames
299 | for i in range(N):
300 | if i % 10 == 0:
301 | print(f" Processing frame {i+1}/{N}")
302 |
303 | # Unproject frame i
304 | K_i_inv = torch.linalg.inv(intrinsics[i])
305 | rays_i = K_i_inv @ pixels_hom # [3, HW]
306 | d_i = depths[i].flatten() # [HW]
307 |
308 | # Filter valid depth
309 | valid_mask = d_i > 0
310 | if not valid_mask.any():
311 | continue
312 |
313 | points_cam_i = rays_i[:, valid_mask] * d_i[valid_mask].unsqueeze(0) # [3, M]
314 | points_cam_i_hom = torch.cat([points_cam_i, torch.ones((1, points_cam_i.shape[1]), device=device)], dim=0) # [4, M]
315 |
316 | # Transform to world
317 | points_world_hom = Es_inv[i] @ points_cam_i_hom # [4, M]
318 |
319 | motion_votes = torch.zeros(points_cam_i.shape[1], device=device)
320 |
321 | # Check against all other frames j
322 | # Optimization: Process in chunks if N is large?
323 | # For now, simple loop.
324 | for j in range(N):
325 | if i == j:
326 | continue
327 |
328 | # Project to frame j
329 | points_cam_j_hom = Es[j] @ points_world_hom # [4, M]
330 | # Check if in front of camera
331 | z_j = points_cam_j_hom[2]
332 | in_front = z_j > 0.1 # Near plane
333 |
334 | if not in_front.any():
335 | continue
336 |
337 | # Project to pixels
338 | points_cam_j = points_cam_j_hom[:3]
339 | proj_j = intrinsics[j] @ points_cam_j
340 | u_j = proj_j[0] / proj_j[2]
341 | v_j = proj_j[1] / proj_j[2]
342 |
343 | # Check bounds
344 | in_bounds = (u_j >= 0) & (u_j < W - 1) & (v_j >= 0) & (v_j < H - 1) & in_front
345 |
346 | if not in_bounds.any():
347 | continue
348 |
349 | # Sample depth from frame j
350 | u_j_int = torch.round(u_j).long()
351 | v_j_int = torch.round(v_j).long()
352 |
353 | # Filter indices
354 | valid_indices = torch.where(in_bounds)[0]
355 |
356 | u_sample = u_j_int[valid_indices]
357 | v_sample = v_j_int[valid_indices]
358 |
359 | d_target = depths[j, v_sample, u_sample]
360 | d_proj = z_j[valid_indices]
361 |
362 | # Check for "empty space"
363 | # If d_target > d_proj * (1 + threshold)
364 | is_empty = d_target > d_proj * (1 + threshold_ratio)
365 |
366 | # Accumulate votes
367 | motion_votes[valid_indices[is_empty]] += 1
368 |
369 | # Store result
370 | full_motion = torch.zeros(H*W, device=device)
371 | full_motion[valid_mask] = motion_votes
372 |
373 | # Save to prediction object
374 | b_idx, f_idx = frame_mapping[i]
375 | predictions[b_idx].motion[f_idx] = full_motion.reshape(H, W)
376 |
377 | def convert_prediction_to_dict(prediction, image_paths=None, output_debug_images=False, segmentation_data=None, class_names=None):
378 | predictions = {}
379 |
380 | # images is already numpy in your current pipeline
381 | predictions['images'] = prediction.processed_images.astype(np.float32) / 255.0 # [N, H, W, 3]
382 |
383 | # depth / extrinsics / intrinsics may be torch tensors after combination; ensure numpy
384 | predictions['depth'] = _to_numpy(prediction.depth)
385 | predictions['extrinsic'] = _to_numpy(prediction.extrinsics)
386 | predictions['intrinsic'] = _to_numpy(prediction.intrinsics)
387 | predictions['conf'] = _to_numpy(prediction.conf)
388 |
389 | if hasattr(prediction, 'motion'):
390 | predictions['motion'] = _to_numpy(prediction.motion)
391 |
392 | if class_names is not None:
393 | predictions['class_names'] = class_names
394 |
395 | if segmentation_data is not None:
396 | # segmentation_data is a list of dicts (one per frame)
397 | # { "masks": [M, h, w], "ids": [M], "classes": [M] }
398 | # We need to resize masks to match depth map size [H, W]
399 |
400 | N, H, W = predictions['depth'].shape
401 |
402 | # We will store a dense ID map for each frame: [N, H, W]
403 | # Initialize with -1 (no object)
404 | seg_id_map = np.full((N, H, W), -1, dtype=np.int32)
405 |
406 | # Also store metadata about IDs (class, etc.)
407 | # Global map of ID -> Class
408 | id_to_class = {}
409 |
410 | for i in range(N):
411 | if i >= len(segmentation_data): break
412 |
413 | frame_seg = segmentation_data[i]
414 | masks = frame_seg.get("masks", [])
415 | ids = frame_seg.get("ids", [])
416 | classes = frame_seg.get("classes", [])
417 |
418 | if len(masks) == 0: continue
419 |
420 | # Pre-load image for debug if needed
421 | orig_img = None
422 | debug_dir = None
423 | if output_debug_images and image_paths is not None and i < len(image_paths):
424 | try:
425 | first_img_dir = os.path.dirname(image_paths[0])
426 | debug_dir = os.path.join(first_img_dir, "debug_output")
427 | os.makedirs(debug_dir, exist_ok=True)
428 | orig_img = cv2.imread(image_paths[i])
429 | except Exception as e:
430 | print(f"Debug image load failed: {e}")
431 |
432 | # Resize masks to H, W
433 | # masks is [M, h_small, w_small]
434 | # We iterate and resize
435 | for m_idx, mask in enumerate(masks):
436 | # mask is float or bool? YOLO masks are usually float 0..1 or binary
437 | # Resize to H, W
438 | # cv2.resize expects (W, H)
439 | resized_mask = cv2.resize(mask, (W, H), interpolation=cv2.INTER_LINEAR)
440 |
441 | obj_id = ids[m_idx] if len(ids) > m_idx else -1
442 | obj_cls = classes[m_idx] if len(classes) > m_idx else -1
443 |
444 | if output_debug_images and orig_img is not None:
445 | try:
446 | # Native mask
447 | h_nat, w_nat = mask.shape
448 | mask_nat_vis = (mask * 255).astype(np.uint8)
449 | cv2.imwrite(os.path.join(debug_dir, f"frame_{i}_obj_{obj_id}_mask_native.png"), mask_nat_vis)
450 |
451 | # Native image
452 | img_nat = cv2.resize(orig_img, (w_nat, h_nat))
453 | cv2.imwrite(os.path.join(debug_dir, f"frame_{i}_obj_{obj_id}_image_native.png"), img_nat)
454 |
455 | # Resized mask
456 | mask_res_vis = (resized_mask * 255).astype(np.uint8)
457 | cv2.imwrite(os.path.join(debug_dir, f"frame_{i}_obj_{obj_id}_mask_resized.png"), mask_res_vis)
458 | except Exception as e:
459 | print(f"Failed to save debug mask: {e}")
460 |
461 | # Threshold to binary
462 | binary_mask = resized_mask > 0.5
463 |
464 | if obj_id != -1:
465 | id_to_class[obj_id] = obj_cls
466 | # Assign ID to map
467 | # Note: Overlapping masks will overwrite.
468 | # Ideally we sort by size or something, but YOLO usually handles NMS.
469 | seg_id_map[i][binary_mask] = obj_id
470 |
471 | predictions['seg_id_map'] = seg_id_map
472 | predictions['id_to_class'] = id_to_class
473 |
474 | if image_paths is not None and output_debug_images:
475 | predictions['image_paths'] = image_paths
476 |
477 | # Save debug images
478 | try:
479 | # Create debug directory
480 | first_img_dir = os.path.dirname(image_paths[0])
481 | debug_dir = os.path.join(first_img_dir, "debug_output")
482 | os.makedirs(debug_dir, exist_ok=True)
483 |
484 | for i, img_path in enumerate(image_paths):
485 | base_name = os.path.splitext(os.path.basename(img_path))[0]
486 |
487 | # Depth
488 | depth_map = predictions['depth'][i]
489 | # Normalize depth for visualization: 0-255
490 | d_min = np.nanmin(depth_map)
491 | d_max = np.nanmax(depth_map)
492 | if d_max > d_min:
493 | depth_norm = ((depth_map - d_min) / (d_max - d_min) * 255.0).astype(np.uint8)
494 | else:
495 | depth_norm = np.zeros_like(depth_map, dtype=np.uint8)
496 |
497 | depth_filename = os.path.join(debug_dir, f"{base_name}_depth.png")
498 | cv2.imwrite(depth_filename, depth_norm)
499 |
500 | # Confidence
501 | conf_map = predictions['conf'][i]
502 | # Scale confidence: * 10, clip to 255
503 | conf_scaled = np.clip(conf_map * 10.0, 0, 255).astype(np.uint8)
504 |
505 | conf_filename = os.path.join(debug_dir, f"{base_name}_conf.png")
506 | cv2.imwrite(conf_filename, conf_scaled)
507 |
508 | # Color Image
509 | color_img = predictions['images'][i]
510 | color_img_uint8 = (np.clip(color_img, 0, 1) * 255).astype(np.uint8)
511 | color_img_bgr = cv2.cvtColor(color_img_uint8, cv2.COLOR_RGB2BGR)
512 | color_filename = os.path.join(debug_dir, f"{base_name}_color.png")
513 | cv2.imwrite(color_filename, color_img_bgr)
514 |
515 | # Bad Confidence Overlay
516 | H, W = conf_map.shape
517 | bad_img = np.zeros((H, W, 4), dtype=np.uint8) # BGRA
518 |
519 | # Yellow for conf <= 2.0
520 | mask_yellow = (conf_map <= 2.0)
521 | bad_img[mask_yellow] = [0, 255, 255, 255] # Yellow
522 |
523 | # Red for conf <= 1.0
524 | mask_red = (conf_map <= 1.0)
525 | bad_img[mask_red] = [0, 0, 255, 255] # Red
526 |
527 | # Magenta for conf <= 1.0 adjacent to conf > 1.0
528 | mask_good = (conf_map > 1.0)
529 | kernel = np.ones((3,3), np.uint8)
530 | # Dilate good area to find neighbors
531 | dilated_good = cv2.dilate(mask_good.astype(np.uint8), kernel, iterations=1).astype(bool)
532 | # Intersection: Is red AND is touched by good
533 | mask_magenta = mask_red & dilated_good
534 | bad_img[mask_magenta] = [255, 0, 255, 255] # Magenta
535 |
536 | bad_filename = os.path.join(debug_dir, f"{base_name}_bad.png")
537 | cv2.imwrite(bad_filename, bad_img)
538 |
539 | # Depth Gradient
540 | grad_x = cv2.Sobel(depth_map, cv2.CV_64F, 1, 0, ksize=3)
541 | grad_y = cv2.Sobel(depth_map, cv2.CV_64F, 0, 1, ksize=3)
542 | grad_mag = np.sqrt(grad_x**2 + grad_y**2)
543 |
544 | g_min = np.nanmin(grad_mag)
545 | g_max = np.nanmax(grad_mag)
546 | if g_max > g_min:
547 | grad_norm = ((grad_mag - g_min) / (g_max - g_min) * 255.0).astype(np.uint8)
548 | else:
549 | grad_norm = np.zeros_like(grad_mag, dtype=np.uint8)
550 |
551 | grad_filename = os.path.join(debug_dir, f"{base_name}_grad.png")
552 | cv2.imwrite(grad_filename, grad_norm)
553 |
554 | except ImportError:
555 | print("Warning: cv2 not found, skipping debug image output.")
556 | except Exception as e:
557 | print(f"Warning: Failed to save debug images: {e}")
558 | elif image_paths is not None:
559 | predictions['image_paths'] = image_paths
560 |
561 | print("DEBUG shapes:")
562 | print(" images:", predictions['images'].shape)
563 | print(" depth:", predictions['depth'].shape)
564 | print(" extrinsic:", np.array(predictions['extrinsic']).shape)
565 | print(" intrinsic:", np.array(predictions['intrinsic']).shape)
566 | print("Computing world points from depth map...")
567 |
568 | if prediction.extrinsics is None or prediction.intrinsics is None:
569 | raise ValueError("Prediction has no camera parameters; cannot create world-space point cloud.")
570 |
571 | world_points = unproject_depth_map_to_point_map(
572 | predictions['depth'],
573 | predictions['extrinsic'],
574 | predictions['intrinsic'],
575 | )
576 | predictions["world_points_from_depth"] = world_points
577 | return predictions
578 |
579 | # Based on da3_repo/src/depth_anything_3/model/da3.py
580 | def combine_base_and_metric(base_list, metric_list):
581 | """Combine base predictions (with poses) with metric predictions (no poses).
582 |
583 | This version operates purely on [N, H, W] tensors per batch and
584 | re-implements the metric scaling logic from DA3 so that batches may
585 | have different sizes (e.g. a shorter last batch).
586 |
587 | Args:
588 | base_list: list of base `Prediction` objects (one per batch), each with
589 | depth [N_b, H, W], conf [N_b, H, W], intrinsics [N_b, 3, 3],
590 | extrinsics [N_b, 3, 4].
591 | metric_list: list of metric `Prediction` objects (one per batch), each with
592 | depth [N_m, H, W], sky [N_m, H, W]. For scale_base you typically
593 | pass a single-element list and let total metric frames
594 | be <= total base frames.
595 |
596 | Returns:
597 | List of base `Prediction` objects (same length as base_list) whose
598 | depths and extrinsics have been globally scaled to metric units.
599 | """
600 |
601 | if not base_list:
602 | return []
603 |
604 | # Concatenate all base frames into a single [Nb_total, H, W]
605 | base_depth_all = []
606 | base_conf_all = []
607 | base_intr_all = []
608 |
609 | for pred in base_list:
610 | d = _to_tensor(pred.depth).float() # [N_b, H, W]
611 | c = _to_tensor(pred.conf).float() # [N_b, H, W]
612 | K = _to_tensor(pred.intrinsics).float() # [N_b, 3, 3]
613 | if d.ndim != 3 or c.ndim != 3:
614 | raise ValueError(f"Base depth/conf must be [N,H,W], got depth={d.shape}, conf={c.shape}")
615 | base_depth_all.append(d)
616 | base_conf_all.append(c)
617 | base_intr_all.append(K)
618 |
619 | depth_all = torch.cat(base_depth_all, dim=0) # [Nb_total, H, W]
620 | conf_all = torch.cat(base_conf_all, dim=0) # [Nb_total, H, W]
621 | intr_all = torch.cat(base_intr_all, dim=0) # [Nb_total, 3, 3]
622 |
623 | # Concatenate all metric frames similarly
624 | metric_depth_all = []
625 | sky_all = []
626 | for pred in metric_list:
627 | md = _to_tensor(pred.depth).float() # [Nm, H, W]
628 | sky = _to_tensor(pred.sky).float() # [Nm, H, W]
629 | if md.ndim != 3 or sky.ndim != 3:
630 | raise ValueError(f"Metric depth/sky must be [N,H,W], got depth={md.shape}, sky={sky.shape}")
631 | metric_depth_all.append(md)
632 | sky_all.append(sky)
633 |
634 | if not metric_depth_all:
635 | raise ValueError("Metric prediction list is empty or missing required fields")
636 |
637 | metric_all = torch.cat(metric_depth_all, dim=0) # [Nm_total, H, W]
638 | sky_all = torch.cat(sky_all, dim=0) # [Nm_total, H, W]
639 |
640 | Nb_total = depth_all.shape[0]
641 | Nm_total = metric_all.shape[0]
642 |
643 | # Restrict to overlapping frames in the sequence sense
644 | N_overlap = min(Nb_total, Nm_total)
645 | if N_overlap <= 0:
646 | raise ValueError("Metric prediction has no frames; cannot compute scale.")
647 |
648 | depth_overlap = depth_all[:N_overlap] # [N_overlap, H, W]
649 | metric_overlap = metric_all[:N_overlap] # [N_overlap, H, W]
650 | sky_overlap = sky_all[:N_overlap] # [N_overlap, H, W]
651 | ixt_overlap = intr_all[:N_overlap] # [N_overlap, 3, 3]
652 |
653 | # Inline metric scaling logic from DA3's apply_metric_scaling for [N, H, W]
654 | # focal_length = (fx + fy) / 2, depth_scaled = depth * (f / scale_factor)
655 | scale_factor_metric = 300.0
656 | focal_length = (ixt_overlap[:, 0, 0] + ixt_overlap[:, 1, 1]) / 2.0 # [N_overlap]
657 | metric_scaled = metric_overlap * (focal_length[:, None, None] / scale_factor_metric)
658 |
659 | # Non-sky mask and alignment only on overlapping frames
660 | non_sky_mask = compute_sky_mask(sky_overlap, threshold=0.3) # [N_overlap, H, W]
661 | if non_sky_mask.sum() <= 10:
662 | raise ValueError("Insufficient non-sky pixels for alignment")
663 |
664 | depth_conf_overlap = conf_all[:N_overlap] # [N_overlap, H, W]
665 | depth_conf_ns = depth_conf_overlap[non_sky_mask]
666 | depth_conf_sampled = sample_tensor_for_quantile(depth_conf_ns, max_samples=100000)
667 | median_conf = torch.quantile(depth_conf_sampled, 0.5)
668 |
669 | align_mask = compute_alignment_mask(
670 | depth_conf_overlap, non_sky_mask, depth_overlap, metric_scaled, median_conf
671 | )
672 |
673 | valid_depth = depth_overlap[align_mask]
674 | valid_metric_depth = metric_scaled[align_mask]
675 | scale_factor = least_squares_scale_scalar(valid_metric_depth, valid_depth)
676 |
677 | # Scale depth and extrinsics for each base batch
678 | scaled_base_list = []
679 | for pred in base_list:
680 | ext = _to_tensor(pred.extrinsics)
681 | if ext is not None:
682 | if ext.ndim != 3 or ext.shape[1:] != (3, 4):
683 | raise ValueError(f"Expected extrinsics [N,3,4], got {ext.shape}")
684 | ext = ext.float()
685 | ext[:, :, 3] *= scale_factor
686 |
687 | pred.depth = _to_tensor(pred.depth) * scale_factor
688 | if ext is not None:
689 | pred.extrinsics = ext
690 | pred.is_metric = 1
691 | pred.scale_factor = float(scale_factor.item())
692 | scaled_base_list.append(pred)
693 |
694 | return scaled_base_list
695 |
696 |
697 | def combine_base_with_metric_depth(base, metric):
698 | """Combine base prediction cameras with raw metric model depth.
699 |
700 | This variant keeps **base intrinsics/extrinsics/conf** but **replaces
701 | depth with metric.depth in metres**, then applies the same sky-handling
702 | logic as `combine_base_and_metric`.
703 |
704 | Assumes shapes:
705 | - base.depth: [N, H, W]
706 | - metric.depth: [N, H, W]
707 | - base.intrinsics: [N, 3, 3]
708 | - base.extrinsics: [N, 3, 4]
709 | - metric.sky: [N, H, W]
710 | """
711 | output = base
712 |
713 | # Base / metric depths and sky mask
714 | base_depth = _to_tensor(base.depth).float() # [B, H, W]
715 | metric_depth = _to_tensor(metric.depth).float() # [B, H, W]
716 | sky = _to_tensor(metric.sky).float() # [B, H, W]
717 |
718 | if base_depth.ndim != 3 or metric_depth.ndim != 3:
719 | raise ValueError(
720 | f"Unexpected depth shapes: base={base_depth.shape}, metric={metric_depth.shape}"
721 | )
722 |
723 | # Non-sky mask and basic sanity check
724 | non_sky_mask = compute_sky_mask(sky, threshold=0.3)
725 | if non_sky_mask.sum() <= 10:
726 | raise ValueError("Insufficient non-sky pixels for metric depth sky handling")
727 |
728 | # Compute global scale factor aligning base depth to metric depth
729 | # Use robust alignment mask - convert conf to tensor if needed
730 | depth_conf = _to_tensor(output.conf).float()
731 | depth_conf_ns = depth_conf[non_sky_mask]
732 | depth_conf_sampled = sample_tensor_for_quantile(depth_conf_ns, max_samples=100000)
733 | median_conf = torch.quantile(depth_conf_sampled, 0.5)
734 |
735 | align_mask = compute_alignment_mask(
736 | depth_conf, non_sky_mask, base_depth, metric_depth, median_conf
737 | )
738 |
739 | valid_base = base_depth[align_mask]
740 | valid_metric = metric_depth[align_mask]
741 | scale_factor = least_squares_scale_scalar(valid_metric, valid_base)
742 |
743 | # Use metric depth as final depth (in metres)
744 | depth = metric_depth
745 |
746 | # Estimate a far depth for sky regions
747 | non_sky_depth = depth[non_sky_mask]
748 | if non_sky_depth.numel() > 100000:
749 | idx = torch.randint(0, non_sky_depth.numel(), (100000,), device=non_sky_depth.device)
750 | sampled_depth = non_sky_depth[idx]
751 | else:
752 | sampled_depth = non_sky_depth
753 |
754 | non_sky_max = torch.quantile(sampled_depth, 0.99)
755 | non_sky_max = torch.minimum(non_sky_max, torch.tensor(200.0, device=depth.device))
756 |
757 | depth_4d = depth.unsqueeze(1)
758 | dummy_conf = torch.ones_like(depth_4d)
759 | depth_4d, _ = set_sky_regions_to_max_depth(
760 | depth_4d, dummy_conf, non_sky_mask.unsqueeze(1), max_depth=non_sky_max
761 | )
762 | depth = depth_4d.squeeze(1)
763 |
764 | # Scale base extrinsics translation so cameras match metric scale
765 | extrinsics = _to_tensor(output.extrinsics)
766 | print("DEBUG combine_base_with_metric_depth: extrinsics shape:", extrinsics.shape)
767 |
768 | if extrinsics.ndim != 3 or extrinsics.shape[1:] != (3, 4):
769 | raise ValueError(f"Expected extrinsics [N,3,4], got {extrinsics.shape}")
770 |
771 | extrinsics = extrinsics.float()
772 | extrinsics[:, :, 3] = extrinsics[:, :, 3] * scale_factor
773 |
774 | # Write back into output: metric depth + scaled base cameras
775 | output.depth = depth
776 | output.extrinsics = extrinsics
777 | output.is_metric = 1
778 | output.scale_factor = float(scale_factor.item())
779 |
780 | return output
781 |
782 | def get_or_create_point_material():
783 | mat = bpy.data.materials.get("PointMaterial")
784 | if mat is None:
785 | mat = bpy.data.materials.new(name="PointMaterial")
786 | mat.use_nodes = True
787 | nodes = mat.node_tree.nodes
788 | links = mat.node_tree.links
789 | for node in nodes:
790 | nodes.remove(node)
791 |
792 | # Image color attribute
793 | attr_node = nodes.new('ShaderNodeAttribute')
794 | attr_node.attribute_name = "point_color"
795 | attr_node.location = (-600, 200)
796 |
797 | # Confidence attribute (raw values)
798 | conf_attr_node = nodes.new('ShaderNodeAttribute')
799 | conf_attr_node.attribute_name = "conf"
800 | conf_attr_node.location = (-600, -200)
801 |
802 | # Map Range: 0-10 -> 0-1 (so conf values map to reasonable ramp positions)
803 | map_range = nodes.new('ShaderNodeMapRange')
804 | map_range.location = (-400, -200)
805 | map_range.clamp = True
806 | map_range.inputs['From Min'].default_value = 0.0
807 | map_range.inputs['From Max'].default_value = 10.0
808 | map_range.inputs['To Min'].default_value = 0.0
809 | map_range.inputs['To Max'].default_value = 1.0
810 |
811 | # Color Ramp: red (low) -> green (mid) -> blue (high)
812 | # Positions: 0.2 = conf 2, 0.5 = conf 5, 0.6 = conf 6
813 | color_ramp = nodes.new('ShaderNodeValToRGB')
814 | color_ramp.location = (-150, -200)
815 | # Clear default elements and set up: red at 0, green at 0.5-0.6, blue at 1
816 | ramp = color_ramp.color_ramp
817 | ramp.elements[0].position = 0.0
818 | ramp.elements[0].color = (1, 0, 0, 1) # Red (conf < 2)
819 | ramp.elements[1].position = 0.2
820 | ramp.elements[1].color = (1, 0, 0, 1) # Still red at conf=2
821 | # Add green zone
822 | green_start = ramp.elements.new(0.5)
823 | green_start.color = (0, 1, 0, 1) # Green at conf=5
824 | green_end = ramp.elements.new(0.6)
825 | green_end.color = (0, 1, 0, 1) # Green at conf=6
826 | # Add blue
827 | blue_elem = ramp.elements.new(1.0)
828 | blue_elem.color = (0, 0, 1, 1) # Blue at conf=10
829 |
830 | # Mix shader to switch between image color and confidence color
831 | mix_node = nodes.new('ShaderNodeMix')
832 | mix_node.data_type = 'RGBA'
833 | mix_node.location = (100, 100)
834 | mix_node.inputs['Factor'].default_value = 0.0 # 0 = image color, 1 = confidence color
835 |
836 | bsdf = nodes.new('ShaderNodeBsdfPrincipled')
837 | bsdf.location = (300, 100)
838 |
839 | output_node_material = nodes.new('ShaderNodeOutputMaterial')
840 | output_node_material.location = (550, 100)
841 |
842 | # Connect nodes
843 | links.new(conf_attr_node.outputs['Fac'], map_range.inputs['Value'])
844 | links.new(map_range.outputs['Result'], color_ramp.inputs['Fac'])
845 | links.new(attr_node.outputs['Color'], mix_node.inputs['A'])
846 | links.new(color_ramp.outputs['Color'], mix_node.inputs['B'])
847 | links.new(mix_node.outputs['Result'], bsdf.inputs['Base Color'])
848 | links.new(bsdf.outputs['BSDF'], output_node_material.inputs['Surface'])
849 | return mat
850 |
851 | def add_point_cloud_geo_nodes(obj, mat):
852 | geo_mod = obj.modifiers.new(name="GeometryNodes", type='NODES')
853 | node_group = bpy.data.node_groups.new(name="PointCloud", type='GeometryNodeTree')
854 |
855 | # Inputs
856 | node_group.interface.new_socket(name="Geometry", in_out="INPUT", socket_type="NodeSocketGeometry")
857 | node_group.interface.new_socket(name="Threshold", in_out="INPUT", socket_type="NodeSocketFloat")
858 | node_group.interface.items_tree[-1].default_value = 0.5
859 | node_group.interface.new_socket(name="Scale", in_out="INPUT", socket_type="NodeSocketFloat")
860 | node_group.interface.items_tree[-1].default_value = 1.0
861 | node_group.interface.items_tree[-1].min_value = 0.0
862 |
863 | # Outputs
864 | node_group.interface.new_socket(name="Geometry", in_out="OUTPUT", socket_type="NodeSocketGeometry")
865 |
866 | geo_mod.node_group = node_group
867 |
868 | # Nodes
869 | input_node = node_group.nodes.new('NodeGroupInput')
870 | output_node = node_group.nodes.new('NodeGroupOutput')
871 |
872 | mesh_to_points = node_group.nodes.new('GeometryNodeMeshToPoints')
873 | # Radius controlled by Scale input * 0.002
874 | math_node = node_group.nodes.new('ShaderNodeMath')
875 | math_node.operation = 'MULTIPLY'
876 | math_node.inputs[1].default_value = 0.002
877 |
878 | named_attr = node_group.nodes.new('GeometryNodeInputNamedAttribute')
879 | named_attr.inputs['Name'].default_value = "conf"
880 | named_attr.data_type = 'FLOAT'
881 |
882 | compare = node_group.nodes.new('FunctionNodeCompare')
883 | compare.data_type = 'FLOAT'
884 | compare.operation = 'LESS_THAN'
885 |
886 | delete_geo = node_group.nodes.new('GeometryNodeDeleteGeometry')
887 | delete_geo.domain = 'POINT'
888 |
889 | set_material_node = node_group.nodes.new('GeometryNodeSetMaterial')
890 | set_material_node.inputs['Material'].default_value = mat
891 |
892 | # Links
893 | node_group.links.new(input_node.outputs['Geometry'], mesh_to_points.inputs['Mesh'])
894 |
895 | # Scale logic
896 | node_group.links.new(input_node.outputs['Scale'], math_node.inputs[0])
897 | node_group.links.new(math_node.outputs['Value'], mesh_to_points.inputs['Radius'])
898 |
899 | node_group.links.new(mesh_to_points.outputs['Points'], delete_geo.inputs['Geometry'])
900 | node_group.links.new(named_attr.outputs['Attribute'], compare.inputs['A'])
901 | node_group.links.new(input_node.outputs['Threshold'], compare.inputs['B'])
902 | node_group.links.new(compare.outputs['Result'], delete_geo.inputs['Selection'])
903 | node_group.links.new(delete_geo.outputs['Geometry'], set_material_node.inputs['Geometry'])
904 | node_group.links.new(set_material_node.outputs['Geometry'], output_node.inputs['Geometry'])
905 |
906 | def create_point_cloud_object(name, points, colors, confs, motions=None, collection=None):
907 | mesh = bpy.data.meshes.new(name=name)
908 | mesh.from_pydata(points.tolist(), [], [])
909 |
910 | # Image colors
911 | attribute = mesh.attributes.new(name="point_color", type="FLOAT_COLOR", domain="POINT")
912 | attribute.data.foreach_set("color", colors.flatten().tolist())
913 |
914 | # Raw confidence value
915 | attribute_conf = mesh.attributes.new(name="conf", type="FLOAT", domain="POINT")
916 | attribute_conf.data.foreach_set("value", confs.tolist())
917 |
918 | # Motion score
919 | if motions is not None:
920 | attribute_motion = mesh.attributes.new(name="motion", type="FLOAT", domain="POINT")
921 | attribute_motion.data.foreach_set("value", motions.tolist())
922 |
923 | obj = bpy.data.objects.new(name, mesh)
924 |
925 | # Link to the provided collection, or fallback to active collection
926 | if collection is not None:
927 | collection.objects.link(obj)
928 | else:
929 | bpy.context.collection.objects.link(obj)
930 |
931 | # Reuse existing PointMaterial or create new one
932 | mat = get_or_create_point_material()
933 |
934 | # Add material to object so it shows up in Shading mode
935 | obj.data.materials.append(mat)
936 |
937 | # Geometry nodes setup
938 | add_point_cloud_geo_nodes(obj, mat)
939 | return obj
940 |
941 | def import_point_cloud(d, collection=None, filter_edges=True, min_confidence=0.5, global_indices=None):
942 | points = d["world_points_from_depth"]
943 | images = d["images"]
944 | conf = d["conf"]
945 |
946 | # Filter confidence based on depth gradient
947 | if filter_edges and "depth" in d:
948 | try:
949 | depth = d["depth"]
950 | for i in range(len(depth)):
951 | dm = depth[i]
952 | gx = cv2.Sobel(dm, cv2.CV_64F, 1, 0, ksize=3)
953 | gy = cv2.Sobel(dm, cv2.CV_64F, 0, 1, ksize=3)
954 | mag = np.sqrt(gx**2 + gy**2)
955 | mn, mx = np.nanmin(mag), np.nanmax(mag)
956 | if mx > mn:
957 | norm = (mag - mn) / (mx - mn)
958 | else:
959 | norm = np.zeros_like(mag)
960 |
961 | # Set confidence to 0 if normalized gradient >= 12/255
962 | mask = norm >= (12.0 / 255.0)
963 | conf[i][mask] = 0.0
964 | except Exception as e:
965 | print(f"Failed to filter confidence by gradient: {e}")
966 |
967 | if 'seg_id_map' in d:
968 | seg_id_map = d['seg_id_map'] # [N, H, W]
969 | id_to_class = d.get('id_to_class', {})
970 | class_names = d.get('class_names', {})
971 |
972 | # Get all unique IDs across the batch
973 | unique_ids = np.unique(seg_id_map)
974 |
975 | # Create a collection for segmented objects
976 | seg_collection = None
977 | if collection:
978 | seg_collection = bpy.data.collections.new("Segmented")
979 | collection.children.link(seg_collection)
980 |
981 | # Process each ID
982 | for obj_id in unique_ids:
983 | # ID -1 is background/unsegmented
984 | is_background = (obj_id == -1)
985 |
986 | # Collect points for this ID across all frames
987 | obj_points = []
988 | obj_colors = []
989 | obj_confs = []
990 | obj_motions = []
991 |
992 | N = points.shape[0]
993 | for i in range(N):
994 | # Mask for this ID in this frame
995 | mask = (seg_id_map[i] == obj_id)
996 |
997 | # Also apply confidence filter
998 | if min_confidence > 0:
999 | mask = mask & (conf[i] >= min_confidence)
1000 |
1001 | if not mask.any(): continue
1002 |
1003 | p = points[i][mask]
1004 | c = images[i][mask]
1005 | cf = conf[i][mask]
1006 |
1007 | # Transform points
1008 | p_trans = p.copy()
1009 | p_trans[:, [0, 1, 2]] = p[:, [0, 2, 1]]
1010 | p_trans[:, 2] = -p_trans[:, 2]
1011 |
1012 | # Colors RGBA
1013 | c = np.hstack((c, np.ones((c.shape[0], 1))))
1014 |
1015 | obj_points.append(p_trans)
1016 | obj_colors.append(c)
1017 | obj_confs.append(cf)
1018 |
1019 | if 'motion' in d:
1020 | m = d['motion'][i][mask]
1021 | obj_motions.append(m)
1022 |
1023 | if not obj_points: continue
1024 |
1025 | # Concatenate
1026 | all_points = np.vstack(obj_points)
1027 | all_colors = np.vstack(obj_colors)
1028 | all_confs = np.concatenate(obj_confs)
1029 | all_motions = np.concatenate(obj_motions) if obj_motions else None
1030 |
1031 | # Name
1032 | if is_background:
1033 | name = "Background"
1034 | else:
1035 | cls_id = id_to_class.get(obj_id, -1)
1036 | cls_name = class_names.get(cls_id, f"Class_{cls_id}")
1037 | name = f"{cls_name}_{obj_id}"
1038 |
1039 | target_col = seg_collection if seg_collection else collection
1040 | create_point_cloud_object(name, all_points, all_colors, all_confs, all_motions, target_col)
1041 |
1042 | elif 'motion' in d:
1043 | motion = d['motion']
1044 | stationary_points = []
1045 | stationary_colors = []
1046 | stationary_confs = []
1047 | stationary_motions = []
1048 |
1049 | # Create Moving collection
1050 | moving_collection = None
1051 | if collection:
1052 | moving_collection = bpy.data.collections.new("Moving")
1053 | collection.children.link(moving_collection)
1054 |
1055 | N = points.shape[0]
1056 | for i in range(N):
1057 | p = points[i].reshape(-1, 3)
1058 | c = images[i].reshape(-1, 3)
1059 | cf = conf[i].reshape(-1)
1060 | m = motion[i].reshape(-1)
1061 |
1062 | # Transform
1063 | p_trans = p.copy()
1064 | p_trans[:, [0, 1, 2]] = p[:, [0, 2, 1]]
1065 | p_trans[:, 2] = -p_trans[:, 2]
1066 |
1067 | c = np.hstack((c, np.ones((c.shape[0], 1)))) # RGBA
1068 |
1069 | if min_confidence > 0:
1070 | mask = cf >= min_confidence
1071 | p_trans = p_trans[mask]
1072 | c = c[mask]
1073 | cf = cf[mask]
1074 | m = m[mask]
1075 |
1076 | if len(p_trans) == 0: continue
1077 |
1078 | is_moving = m > 0
1079 | is_stationary = ~is_moving
1080 |
1081 | if is_stationary.any():
1082 | stationary_points.append(p_trans[is_stationary])
1083 | stationary_colors.append(c[is_stationary])
1084 | stationary_confs.append(cf[is_stationary])
1085 | stationary_motions.append(m[is_stationary])
1086 |
1087 | if is_moving.any():
1088 | if "image_paths" in d and i < len(d["image_paths"]):
1089 | base_name = os.path.splitext(os.path.basename(d["image_paths"][i]))[0]
1090 | obj_name = f"Moving_{base_name}"
1091 | else:
1092 | obj_name = f"Moving_Frame_{i}"
1093 |
1094 | target_col = moving_collection if moving_collection else collection
1095 | obj = create_point_cloud_object(obj_name, p_trans[is_moving], c[is_moving], cf[is_moving], m[is_moving], target_col)
1096 |
1097 | # Animate Visibility
1098 | spacing = 15
1099 | duration = 15
1100 |
1101 | global_idx = global_indices[i] if global_indices is not None else i
1102 | start_frame = 1 + global_idx * spacing
1103 | end_frame = start_frame + duration
1104 |
1105 | # Ensure we start hidden
1106 | obj.hide_viewport = True
1107 | obj.hide_render = True
1108 | obj.keyframe_insert(data_path="hide_viewport", frame=0)
1109 | obj.keyframe_insert(data_path="hide_render", frame=0)
1110 |
1111 | # Show
1112 | obj.hide_viewport = False
1113 | obj.hide_render = False
1114 | obj.keyframe_insert(data_path="hide_viewport", frame=start_frame)
1115 | obj.keyframe_insert(data_path="hide_render", frame=start_frame)
1116 |
1117 | # Hide
1118 | obj.hide_viewport = True
1119 | obj.hide_render = True
1120 | obj.keyframe_insert(data_path="hide_viewport", frame=end_frame)
1121 | obj.keyframe_insert(data_path="hide_render", frame=end_frame)
1122 |
1123 | if stationary_points:
1124 | create_point_cloud_object("Points", np.vstack(stationary_points), np.vstack(stationary_colors), np.concatenate(stationary_confs), np.concatenate(stationary_motions), collection)
1125 |
1126 | else:
1127 | points_batch = points.reshape(-1, 3)
1128 | reordered_points_batch = points_batch.copy()
1129 | reordered_points_batch[:, [0, 1, 2]] = points_batch[:, [0, 2, 1]]
1130 | reordered_points_batch[:, 2] = -reordered_points_batch[:, 2]
1131 | points_batch = reordered_points_batch
1132 | colors_batch = images.reshape(-1, 3)
1133 | colors_batch = np.hstack((colors_batch, np.ones((colors_batch.shape[0], 1))))
1134 | conf_batch = conf.reshape(-1)
1135 |
1136 | # Remove points with low confidence
1137 | if min_confidence > 0:
1138 | valid_mask = conf_batch >= min_confidence
1139 | points_batch = points_batch[valid_mask]
1140 | colors_batch = colors_batch[valid_mask]
1141 | conf_batch = conf_batch[valid_mask]
1142 |
1143 | if len(conf_batch) > 0:
1144 | print(f"DEBUG confidence: min={conf_batch.min():.4f}, max={conf_batch.max():.4f}")
1145 |
1146 | create_point_cloud_object("Points", points_batch, colors_batch, conf_batch, None, collection)
1147 |
1148 | def create_cameras(predictions, collection=None, image_width=None, image_height=None):
1149 | scene = bpy.context.scene
1150 | if image_width is None or image_height is None:
1151 | H, W = predictions['images'].shape[1:3]
1152 | image_width = W
1153 | image_height = H
1154 | K0 = predictions["intrinsic"][0]
1155 | pixel_aspect_y = K0[1,1] / K0[0,0]
1156 | scene.render.pixel_aspect_x = 1.0
1157 | scene.render.pixel_aspect_y = float(pixel_aspect_y)
1158 | num_cameras = len(predictions["extrinsic"])
1159 | if len(predictions["intrinsic"]) != num_cameras:
1160 | raise ValueError("Extrinsic and intrinsic lists must have the same length")
1161 |
1162 | # Optional: get image paths from predictions, if available
1163 | image_paths = predictions.get("image_paths", None)
1164 |
1165 | # Create Cameras collection
1166 | target_collection = collection
1167 | if collection:
1168 | cameras_col = bpy.data.collections.new("Cameras")
1169 | collection.children.link(cameras_col)
1170 | target_collection = cameras_col
1171 |
1172 | T = np.diag([1.0, -1.0, -1.0, 1.0])
1173 | for i in range(num_cameras):
1174 | # Name from image file if available
1175 | if image_paths and i < len(image_paths):
1176 | import os
1177 | base_name = os.path.splitext(os.path.basename(image_paths[i]))[0]
1178 | cam_name = base_name
1179 | else:
1180 | cam_name = f"Camera_{i}"
1181 |
1182 | cam_data = bpy.data.cameras.new(name=cam_name)
1183 | K = predictions["intrinsic"][i]
1184 | f_x = K[0,0]
1185 | c_x = K[0,2]
1186 | c_y = K[1,2]
1187 | sensor_width = 36.0
1188 | cam_data.sensor_width = sensor_width
1189 | cam_data.lens = (f_x / image_width) * sensor_width
1190 | cam_data.shift_x = (c_x - image_width / 2.0) / image_width
1191 | cam_data.shift_y = (c_y - image_height / 2.0) / image_height
1192 | cam_obj = bpy.data.objects.new(name=cam_name, object_data=cam_data)
1193 |
1194 | if target_collection is not None:
1195 | target_collection.objects.link(cam_obj)
1196 | else:
1197 | scene.collection.objects.link(cam_obj)
1198 |
1199 | ext = predictions["extrinsic"][i]
1200 | E = np.vstack((ext, [0, 0, 0, 1]))
1201 | E_inv = np.linalg.inv(E)
1202 | M = np.dot(E_inv, T)
1203 | cam_obj.matrix_world = Matrix(M.tolist())
1204 | R = Matrix.Rotation(math.radians(-90), 4, 'X')
1205 | cam_obj.matrix_world = R @ cam_obj.matrix_world
1206 |
1207 | def get_or_create_image_material(image_path):
1208 | name = os.path.basename(image_path)
1209 | mat = bpy.data.materials.get(name)
1210 | if mat is None:
1211 | mat = bpy.data.materials.new(name=name)
1212 | mat.use_nodes = True
1213 | nodes = mat.node_tree.nodes
1214 | links = mat.node_tree.links
1215 | nodes.clear()
1216 |
1217 | tex_coord = nodes.new('ShaderNodeTexCoord')
1218 | tex_coord.location = (-800, 200)
1219 |
1220 | tex_image = nodes.new('ShaderNodeTexImage')
1221 | tex_image.location = (-500, 200)
1222 | try:
1223 | # Check if image is already loaded
1224 | img_name = os.path.basename(image_path)
1225 | img = bpy.data.images.get(img_name)
1226 | if img is None:
1227 | img = bpy.data.images.load(image_path)
1228 | tex_image.image = img
1229 | except Exception as e:
1230 | print(f"Could not load image {image_path}: {e}")
1231 |
1232 | bsdf = nodes.new('ShaderNodeBsdfPrincipled')
1233 | bsdf.location = (-200, 200)
1234 | bsdf.inputs['Roughness'].default_value = 1.0
1235 | # Try to set specular to 0 to avoid shiny photos
1236 | if 'Specular IOR Level' in bsdf.inputs:
1237 | bsdf.inputs['Specular IOR Level'].default_value = 0.0
1238 | elif 'Specular' in bsdf.inputs:
1239 | bsdf.inputs['Specular'].default_value = 0.0
1240 |
1241 | output = nodes.new('ShaderNodeOutputMaterial')
1242 | output.location = (100, 200)
1243 |
1244 | links.new(tex_coord.outputs['UV'], tex_image.inputs['Vector'])
1245 | links.new(tex_image.outputs['Color'], bsdf.inputs['Base Color'])
1246 | links.new(bsdf.outputs['BSDF'], output.inputs['Surface'])
1247 | return mat
1248 |
1249 | def add_filter_mesh_modifier(obj, min_confidence):
1250 | mod = obj.modifiers.new(name="FilterMesh", type='NODES')
1251 | group_name = "FilterDepthMesh"
1252 | group = bpy.data.node_groups.get(group_name)
1253 | if not group:
1254 | group = bpy.data.node_groups.new(group_name, 'GeometryNodeTree')
1255 | group.interface.new_socket(name="Geometry", in_out="INPUT", socket_type="NodeSocketGeometry")
1256 | group.interface.new_socket(name="Geometry", in_out="OUTPUT", socket_type="NodeSocketGeometry")
1257 |
1258 | # Nodes
1259 | in_node = group.nodes.new('NodeGroupInput')
1260 | out_node = group.nodes.new('NodeGroupOutput')
1261 |
1262 | # 1. Filter by Confidence (Delete Points)
1263 | del_conf = group.nodes.new('GeometryNodeDeleteGeometry')
1264 | del_conf.domain = 'POINT'
1265 | named_attr = group.nodes.new('GeometryNodeInputNamedAttribute')
1266 | named_attr.data_type = 'FLOAT'
1267 | named_attr.inputs['Name'].default_value = "conf"
1268 | compare_conf = group.nodes.new('FunctionNodeCompare')
1269 | compare_conf.operation = 'LESS_THAN'
1270 | compare_conf.inputs['B'].default_value = min_confidence
1271 |
1272 | group.links.new(named_attr.outputs['Attribute'], compare_conf.inputs['A'])
1273 | group.links.new(compare_conf.outputs['Result'], del_conf.inputs['Selection'])
1274 |
1275 | # 2. Filter by Edge Length (Delete Edges)
1276 | del_edge = group.nodes.new('GeometryNodeDeleteGeometry')
1277 | del_edge.domain = 'EDGE'
1278 |
1279 | # Calculate Edge Length manually (Edge Length node name varies)
1280 | edge_verts = group.nodes.new('GeometryNodeInputMeshEdgeVertices')
1281 | pos = group.nodes.new('GeometryNodeInputPosition')
1282 |
1283 | sample_pos1 = group.nodes.new('GeometryNodeSampleIndex')
1284 | sample_pos1.data_type = 'FLOAT_VECTOR'
1285 | sample_pos1.domain = 'POINT'
1286 |
1287 | sample_pos2 = group.nodes.new('GeometryNodeSampleIndex')
1288 | sample_pos2.data_type = 'FLOAT_VECTOR'
1289 | sample_pos2.domain = 'POINT'
1290 |
1291 | dist = group.nodes.new('ShaderNodeVectorMath')
1292 | dist.operation = 'DISTANCE'
1293 |
1294 | compare_edge = group.nodes.new('FunctionNodeCompare')
1295 | compare_edge.operation = 'GREATER_THAN'
1296 | compare_edge.inputs['B'].default_value = 0.1 # Threshold for jump (meters)
1297 |
1298 | # Connect Geometry (from del_conf)
1299 | group.links.new(del_conf.outputs['Geometry'], sample_pos1.inputs['Geometry'])
1300 | group.links.new(del_conf.outputs['Geometry'], sample_pos2.inputs['Geometry'])
1301 |
1302 | # Connect Indices and Values
1303 | group.links.new(edge_verts.outputs['Vertex Index 1'], sample_pos1.inputs['Index'])
1304 | group.links.new(pos.outputs['Position'], sample_pos1.inputs['Value'])
1305 |
1306 | group.links.new(edge_verts.outputs['Vertex Index 2'], sample_pos2.inputs['Index'])
1307 | group.links.new(pos.outputs['Position'], sample_pos2.inputs['Value'])
1308 |
1309 | # Calculate Distance
1310 | group.links.new(sample_pos1.outputs['Value'], dist.inputs[0])
1311 | group.links.new(sample_pos2.outputs['Value'], dist.inputs[1])
1312 |
1313 | # Compare
1314 | group.links.new(dist.outputs['Value'], compare_edge.inputs['A'])
1315 | group.links.new(compare_edge.outputs['Result'], del_edge.inputs['Selection'])
1316 |
1317 | # Connect Main Flow
1318 | group.links.new(in_node.outputs['Geometry'], del_conf.inputs['Geometry'])
1319 | group.links.new(del_conf.outputs['Geometry'], del_edge.inputs['Geometry'])
1320 | group.links.new(del_edge.outputs['Geometry'], out_node.inputs['Geometry'])
1321 |
1322 | mod.node_group = group
1323 |
1324 | def import_mesh_from_depth(d, collection=None, filter_edges=True, min_confidence=0.5, global_indices=None):
1325 | points = d["world_points_from_depth"] # [N, H, W, 3]
1326 | images = d["images"] # [N, H, W, 3]
1327 | conf = d["conf"] # [N, H, W]
1328 |
1329 | # Filter confidence based on depth gradient (Same as import_point_cloud)
1330 | if filter_edges and "depth" in d:
1331 | try:
1332 | depth = d["depth"]
1333 | for i in range(len(depth)):
1334 | dm = depth[i]
1335 | gx = cv2.Sobel(dm, cv2.CV_64F, 1, 0, ksize=3)
1336 | gy = cv2.Sobel(dm, cv2.CV_64F, 0, 1, ksize=3)
1337 | mag = np.sqrt(gx**2 + gy**2)
1338 | mn, mx = np.nanmin(mag), np.nanmax(mag)
1339 | if mx > mn:
1340 | norm = (mag - mn) / (mx - mn)
1341 | else:
1342 | norm = np.zeros_like(mag)
1343 |
1344 | # Set confidence to 0 if normalized gradient >= 12/255
1345 | mask = norm >= (12.0 / 255.0)
1346 | conf[i][mask] = 0.0
1347 | except Exception as e:
1348 | print(f"Failed to filter confidence by gradient: {e}")
1349 |
1350 | N, H, W, _ = points.shape
1351 |
1352 | # Generate grid faces once (shared for all images in batch)
1353 | # Grid indices: (r, c) -> r*W + c
1354 | # Quad: (r, c), (r, c+1), (r+1, c+1), (r+1, c)
1355 | r = np.arange(H - 1)
1356 | c = np.arange(W - 1)
1357 | rr, cc = np.meshgrid(r, c, indexing='ij')
1358 | v0 = rr * W + cc
1359 | v1 = rr * W + (cc + 1)
1360 | v2 = (rr + 1) * W + (cc + 1)
1361 | v3 = (rr + 1) * W + cc
1362 | # Blender expects counter-clockwise winding for front faces
1363 | faces = np.stack([v0, v1, v2, v3], axis=-1).reshape(-1, 4)
1364 |
1365 | # Generate UVs once
1366 | u_coords = np.linspace(0, 1, W, dtype=np.float32)
1367 | v_coords = np.linspace(1, 0, H, dtype=np.float32) # Top is 1, Bottom is 0
1368 | uu, vv = np.meshgrid(u_coords, v_coords)
1369 | uvs = np.stack([uu, vv], axis=-1).reshape(-1, 2)
1370 |
1371 | if 'seg_id_map' in d:
1372 | seg_id_map = d['seg_id_map']
1373 | id_to_class = d.get('id_to_class', {})
1374 | class_names = d.get('class_names', {})
1375 |
1376 | # Create Segmented collection
1377 | seg_collection = None
1378 | obj_collections = {} # Cache for object collections
1379 |
1380 | if collection:
1381 | seg_collection = bpy.data.collections.new("Segmented_Meshes")
1382 | collection.children.link(seg_collection)
1383 |
1384 | for i in range(N):
1385 | # Prepare data for this image
1386 | pts = points[i].reshape(-1, 3)
1387 | # Apply the same coordinate transform as import_point_cloud
1388 | pts_transformed = pts.copy()
1389 | pts_transformed[:, [0, 1, 2]] = pts[:, [0, 2, 1]]
1390 | pts_transformed[:, 2] = -pts_transformed[:, 2]
1391 |
1392 | cols = images[i].reshape(-1, 3)
1393 | cols = np.hstack((cols, np.ones((cols.shape[0], 1)))) # RGBA
1394 | confs = conf[i].reshape(-1)
1395 |
1396 | motion_vals = None
1397 | if 'motion' in d:
1398 | motion_vals = d['motion'][i].reshape(-1)
1399 |
1400 | # Flatten IDs for this frame
1401 | frame_ids = seg_id_map[i].flatten()
1402 | unique_frame_ids = np.unique(frame_ids)
1403 |
1404 | for obj_id in unique_frame_ids:
1405 | # Mask vertices
1406 | vert_mask = (frame_ids == obj_id)
1407 |
1408 | # Mask faces (strict inclusion)
1409 | face_mask = vert_mask[faces[:, 0]] & vert_mask[faces[:, 1]] & vert_mask[faces[:, 2]] & vert_mask[faces[:, 3]]
1410 |
1411 | if not face_mask.any():
1412 | continue
1413 |
1414 | # Extract sub-mesh
1415 | sub_faces = faces[face_mask]
1416 | unique_v = np.unique(sub_faces)
1417 |
1418 | remap = np.zeros(len(pts_transformed), dtype=np.int32) - 1
1419 | remap[unique_v] = np.arange(len(unique_v))
1420 |
1421 | new_faces = remap[sub_faces]
1422 | new_pts = pts_transformed[unique_v]
1423 | new_cols = cols[unique_v]
1424 | new_confs = confs[unique_v]
1425 | new_uvs = uvs.reshape(-1, 2)[unique_v]
1426 |
1427 | # Naming
1428 | if obj_id == -1:
1429 | base_obj_name = "Background"
1430 | else:
1431 | cls_id = id_to_class.get(obj_id, -1)
1432 | cls_name = class_names.get(cls_id, f"Class_{cls_id}")
1433 | base_obj_name = f"{cls_name}_{obj_id}"
1434 |
1435 | if "image_paths" in d and i < len(d["image_paths"]):
1436 | base_name = os.path.splitext(os.path.basename(d["image_paths"][i]))[0]
1437 | mesh_name = f"{base_name}_{base_obj_name}"
1438 | else:
1439 | mesh_name = f"Mesh_{i}_{base_obj_name}"
1440 |
1441 | # Create Mesh object
1442 | mesh = bpy.data.meshes.new(name=mesh_name)
1443 | mesh.from_pydata(new_pts.tolist(), [], new_faces.tolist())
1444 |
1445 | # UVs
1446 | uv_layer = mesh.uv_layers.new(name="UVMap")
1447 | loop_vert_indices = np.zeros(len(mesh.loops), dtype=np.int32)
1448 | mesh.loops.foreach_get("vertex_index", loop_vert_indices)
1449 | loop_uvs = new_uvs[loop_vert_indices]
1450 | uv_layer.data.foreach_set("uv", loop_uvs.flatten())
1451 |
1452 | # Attributes
1453 | col_attr = mesh.attributes.new(name="point_color", type="FLOAT_COLOR", domain="POINT")
1454 | col_attr.data.foreach_set("color", new_cols.flatten())
1455 | conf_attr = mesh.attributes.new(name="conf", type="FLOAT", domain="POINT")
1456 | conf_attr.data.foreach_set("value", new_confs)
1457 |
1458 | if motion_vals is not None:
1459 | new_motion = motion_vals[unique_v]
1460 | motion_attr = mesh.attributes.new(name="motion", type="FLOAT", domain="POINT")
1461 | motion_attr.data.foreach_set("value", new_motion)
1462 |
1463 | obj = bpy.data.objects.new(mesh_name, mesh)
1464 |
1465 | # Determine target collection
1466 | target_col = collection
1467 | if seg_collection:
1468 | if base_obj_name not in obj_collections:
1469 | new_col = bpy.data.collections.new(base_obj_name)
1470 | seg_collection.children.link(new_col)
1471 | obj_collections[base_obj_name] = new_col
1472 | target_col = obj_collections[base_obj_name]
1473 |
1474 | target_col.objects.link(obj)
1475 |
1476 | # Material (Image)
1477 | if "image_paths" in d:
1478 | img_path = d["image_paths"][i]
1479 | mat = get_or_create_image_material(img_path)
1480 | else:
1481 | mat = get_or_create_point_material()
1482 | obj.data.materials.append(mat)
1483 |
1484 | if filter_edges:
1485 | add_filter_mesh_modifier(obj, min_confidence)
1486 |
1487 | elif 'motion' in d:
1488 | motion = d['motion']
1489 |
1490 | # Create Moving collection
1491 | moving_collection = None
1492 | if collection:
1493 | moving_collection = bpy.data.collections.new("Moving")
1494 | collection.children.link(moving_collection)
1495 |
1496 | for i in range(N):
1497 | # Prepare data for this image
1498 | pts = points[i].reshape(-1, 3)
1499 | # Apply the same coordinate transform as import_point_cloud
1500 | pts_transformed = pts.copy()
1501 | pts_transformed[:, [0, 1, 2]] = pts[:, [0, 2, 1]]
1502 | pts_transformed[:, 2] = -pts_transformed[:, 2]
1503 |
1504 | cols = images[i].reshape(-1, 3)
1505 | cols = np.hstack((cols, np.ones((cols.shape[0], 1)))) # RGBA
1506 | confs = conf[i].reshape(-1)
1507 | m = motion[i].reshape(-1)
1508 |
1509 | is_moving = m > 0
1510 |
1511 | # --- Moving Mesh ---
1512 | # Face is moving if ANY vertex is moving
1513 | face_moving_mask = is_moving[faces[:, 0]] | is_moving[faces[:, 1]] | is_moving[faces[:, 2]] | is_moving[faces[:, 3]]
1514 |
1515 | if face_moving_mask.any():
1516 | sub_faces = faces[face_moving_mask]
1517 | unique_v = np.unique(sub_faces)
1518 |
1519 | remap = np.zeros(len(pts_transformed), dtype=np.int32) - 1
1520 | remap[unique_v] = np.arange(len(unique_v))
1521 |
1522 | new_faces = remap[sub_faces]
1523 | new_pts = pts_transformed[unique_v]
1524 | new_cols = cols[unique_v]
1525 | new_confs = confs[unique_v]
1526 | new_uvs = uvs.reshape(-1, 2)[unique_v]
1527 | new_motion = m[unique_v]
1528 |
1529 | if "image_paths" in d and i < len(d["image_paths"]):
1530 | base_name = os.path.splitext(os.path.basename(d["image_paths"][i]))[0]
1531 | mesh_name = f"Moving_Mesh_{base_name}"
1532 | else:
1533 | mesh_name = f"Moving_Mesh_{i}"
1534 |
1535 | mesh = bpy.data.meshes.new(name=mesh_name)
1536 | mesh.from_pydata(new_pts.tolist(), [], new_faces.tolist())
1537 |
1538 | # UVs
1539 | uv_layer = mesh.uv_layers.new(name="UVMap")
1540 | loop_vert_indices = np.zeros(len(mesh.loops), dtype=np.int32)
1541 | mesh.loops.foreach_get("vertex_index", loop_vert_indices)
1542 | loop_uvs = new_uvs[loop_vert_indices]
1543 | uv_layer.data.foreach_set("uv", loop_uvs.flatten())
1544 |
1545 | # Attributes
1546 | col_attr = mesh.attributes.new(name="point_color", type="FLOAT_COLOR", domain="POINT")
1547 | col_attr.data.foreach_set("color", new_cols.flatten())
1548 | conf_attr = mesh.attributes.new(name="conf", type="FLOAT", domain="POINT")
1549 | conf_attr.data.foreach_set("value", new_confs)
1550 | motion_attr = mesh.attributes.new(name="motion", type="FLOAT", domain="POINT")
1551 | motion_attr.data.foreach_set("value", new_motion)
1552 |
1553 | target_col = moving_collection if moving_collection else collection
1554 | obj = bpy.data.objects.new(mesh_name, mesh)
1555 | target_col.objects.link(obj)
1556 |
1557 | # Material (Image)
1558 | if "image_paths" in d:
1559 | img_path = d["image_paths"][i]
1560 | mat = get_or_create_image_material(img_path)
1561 | else:
1562 | mat = get_or_create_point_material()
1563 | obj.data.materials.append(mat)
1564 |
1565 | if filter_edges:
1566 | add_filter_mesh_modifier(obj, min_confidence)
1567 |
1568 | # Animation
1569 | spacing = 15
1570 | duration = 15
1571 | global_idx = global_indices[i] if global_indices is not None else i
1572 | start_frame = 1 + global_idx * spacing
1573 | end_frame = start_frame + duration
1574 |
1575 | obj.hide_viewport = True
1576 | obj.hide_render = True
1577 | obj.keyframe_insert(data_path="hide_viewport", frame=0)
1578 | obj.keyframe_insert(data_path="hide_render", frame=0)
1579 |
1580 | obj.hide_viewport = False
1581 | obj.hide_render = False
1582 | obj.keyframe_insert(data_path="hide_viewport", frame=start_frame)
1583 | obj.keyframe_insert(data_path="hide_render", frame=start_frame)
1584 |
1585 | obj.hide_viewport = True
1586 | obj.hide_render = True
1587 | obj.keyframe_insert(data_path="hide_viewport", frame=end_frame)
1588 | obj.keyframe_insert(data_path="hide_render", frame=end_frame)
1589 |
1590 | # --- Stationary Mesh ---
1591 | # Face is stationary if ALL vertices are stationary (NOT moving)
1592 | # This ensures no overlap with Moving mesh faces
1593 | face_stationary_mask = ~face_moving_mask
1594 |
1595 | if face_stationary_mask.any():
1596 | sub_faces = faces[face_stationary_mask]
1597 | unique_v = np.unique(sub_faces)
1598 |
1599 | remap = np.zeros(len(pts_transformed), dtype=np.int32) - 1
1600 | remap[unique_v] = np.arange(len(unique_v))
1601 |
1602 | new_faces = remap[sub_faces]
1603 | new_pts = pts_transformed[unique_v]
1604 | new_cols = cols[unique_v]
1605 | new_confs = confs[unique_v]
1606 | new_uvs = uvs.reshape(-1, 2)[unique_v]
1607 |
1608 | if "image_paths" in d and i < len(d["image_paths"]):
1609 | base_name = os.path.splitext(os.path.basename(d["image_paths"][i]))[0]
1610 | mesh_name = f"Mesh_{base_name}"
1611 | else:
1612 | mesh_name = f"Mesh_Img_{i}"
1613 |
1614 | mesh = bpy.data.meshes.new(name=mesh_name)
1615 | mesh.from_pydata(new_pts.tolist(), [], new_faces.tolist())
1616 |
1617 | # UVs
1618 | uv_layer = mesh.uv_layers.new(name="UVMap")
1619 | loop_vert_indices = np.zeros(len(mesh.loops), dtype=np.int32)
1620 | mesh.loops.foreach_get("vertex_index", loop_vert_indices)
1621 | loop_uvs = new_uvs[loop_vert_indices]
1622 | uv_layer.data.foreach_set("uv", loop_uvs.flatten())
1623 |
1624 | # Attributes
1625 | col_attr = mesh.attributes.new(name="point_color", type="FLOAT_COLOR", domain="POINT")
1626 | col_attr.data.foreach_set("color", new_cols.flatten())
1627 | conf_attr = mesh.attributes.new(name="conf", type="FLOAT", domain="POINT")
1628 | conf_attr.data.foreach_set("value", new_confs)
1629 |
1630 | obj = bpy.data.objects.new(mesh_name, mesh)
1631 | if collection:
1632 | collection.objects.link(obj)
1633 | else:
1634 | bpy.context.collection.objects.link(obj)
1635 |
1636 | # Material (Image)
1637 | if "image_paths" in d:
1638 | img_path = d["image_paths"][i]
1639 | mat = get_or_create_image_material(img_path)
1640 | else:
1641 | mat = get_or_create_point_material()
1642 | obj.data.materials.append(mat)
1643 |
1644 | if filter_edges:
1645 | add_filter_mesh_modifier(obj, min_confidence)
1646 |
1647 | else:
1648 | for i in range(N):
1649 | # Prepare data for this image
1650 | pts = points[i].reshape(-1, 3)
1651 | # Apply the same coordinate transform as import_point_cloud
1652 | pts_transformed = pts.copy()
1653 | pts_transformed[:, [0, 1, 2]] = pts[:, [0, 2, 1]]
1654 | pts_transformed[:, 2] = -pts_transformed[:, 2]
1655 |
1656 | cols = images[i].reshape(-1, 3)
1657 | cols = np.hstack((cols, np.ones((cols.shape[0], 1)))) # RGBA
1658 | confs = conf[i].reshape(-1)
1659 |
1660 | motion_vals = None
1661 | if 'motion' in d:
1662 | motion_vals = d['motion'][i].reshape(-1)
1663 |
1664 | # Create Mesh
1665 | if "image_paths" in d and i < len(d["image_paths"]):
1666 | base_name = os.path.splitext(os.path.basename(d["image_paths"][i]))[0]
1667 | mesh_name = f"Mesh_{base_name}"
1668 | else:
1669 | mesh_name = f"Mesh_Img_{i}"
1670 |
1671 | mesh = bpy.data.meshes.new(name=mesh_name)
1672 | mesh.from_pydata(pts_transformed.tolist(), [], faces.tolist())
1673 |
1674 | # Add UVs
1675 | uv_layer = mesh.uv_layers.new(name="UVMap")
1676 | loop_vert_indices = np.zeros(len(mesh.loops), dtype=np.int32)
1677 | mesh.loops.foreach_get("vertex_index", loop_vert_indices)
1678 | loop_uvs = uvs[loop_vert_indices]
1679 | uv_layer.data.foreach_set("uv", loop_uvs.flatten())
1680 |
1681 | # Add Attributes
1682 | # Color
1683 | col_attr = mesh.attributes.new(name="point_color", type="FLOAT_COLOR", domain="POINT")
1684 | col_attr.data.foreach_set("color", cols.flatten())
1685 |
1686 | # Confidence
1687 | conf_attr = mesh.attributes.new(name="conf", type="FLOAT", domain="POINT")
1688 | conf_attr.data.foreach_set("value", confs)
1689 |
1690 | # Motion
1691 | if motion_vals is not None:
1692 | motion_attr = mesh.attributes.new(name="motion", type="FLOAT", domain="POINT")
1693 | motion_attr.data.foreach_set("value", motion_vals)
1694 |
1695 | obj = bpy.data.objects.new(mesh_name, mesh)
1696 | if collection:
1697 | collection.objects.link(obj)
1698 | else:
1699 | bpy.context.collection.objects.link(obj)
1700 |
1701 | # Add Material
1702 | if "image_paths" in d:
1703 | img_path = d["image_paths"][i]
1704 | mat = get_or_create_image_material(img_path)
1705 | else:
1706 | mat = get_or_create_point_material()
1707 | obj.data.materials.append(mat)
1708 |
1709 | # Add Geometry Nodes to filter stretched edges and low confidence
1710 | if filter_edges:
1711 | add_filter_mesh_modifier(obj, min_confidence)
1712 |
1713 |
--------------------------------------------------------------------------------