├── models └── .gitkeep ├── sample_images ├── dog │ ├── 0.jpg │ ├── 87.jpg │ ├── 180.jpg │ └── 249.jpg └── ramen │ └── image.jpg ├── requirements_for_check.txt ├── requirements.txt ├── panels.py ├── README.md ├── .gitignore ├── dependencies.py ├── __init__.py ├── operators.py └── utils.py /models/.gitkeep: -------------------------------------------------------------------------------- 1 | VGGT model will be downloaded here. -------------------------------------------------------------------------------- /sample_images/dog/0.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xy-gao/DA3-blender/HEAD/sample_images/dog/0.jpg -------------------------------------------------------------------------------- /sample_images/dog/87.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xy-gao/DA3-blender/HEAD/sample_images/dog/87.jpg -------------------------------------------------------------------------------- /sample_images/dog/180.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xy-gao/DA3-blender/HEAD/sample_images/dog/180.jpg -------------------------------------------------------------------------------- /sample_images/dog/249.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xy-gao/DA3-blender/HEAD/sample_images/dog/249.jpg -------------------------------------------------------------------------------- /sample_images/ramen/image.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xy-gao/DA3-blender/HEAD/sample_images/ramen/image.jpg -------------------------------------------------------------------------------- /requirements_for_check.txt: -------------------------------------------------------------------------------- 1 | torch==2.5.1 2 | torchvision==0.20.1 3 | numpy==1.26.4 4 | pre-commit 5 | trimesh 6 | einops 7 | huggingface_hub 8 | imageio 9 | opencv-python 10 | # xformers 11 | open3d 12 | fastapi 13 | unicorn 14 | requests 15 | typer 16 | pillow 17 | antlr4-python3-runtime==4.9.2 # required by omegaconf 2.3.0, without this line installing antlr4-python3-runtime failed for me, Google Gemini claims it's a known issue in 4.9.3 on Windows 18 | omegaconf<2.4 # when omegaconf 2.4.0 releases, it will require antlr4-python3-runtime 4.11 19 | # evo 20 | e3nn 21 | moviepy==1.0.3 22 | plyfile 23 | pillow_heif 24 | safetensors 25 | addict 26 | pycolmap 27 | ultralytics # for YOLO image segmentation 28 | lapx>=0.5.5 # lap is required by ultralytics, but when lap attempted to automatically install, it installed to the wrong python environment 29 | # depth_anything_3 -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | --extra-index-url https://download.pytorch.org/whl/cu121 2 | torch==2.5.1 3 | torchvision==0.20.1 4 | numpy==1.26.4 5 | pre-commit 6 | trimesh 7 | einops 8 | huggingface_hub 9 | imageio 10 | opencv-python 11 | # xformers 12 | open3d 13 | fastapi 14 | unicorn 15 | requests 16 | typer 17 | pillow 18 | antlr4-python3-runtime==4.9.2 # required by omegaconf 2.3.0, without this line installing antlr4-python3-runtime failed for me, Google Gemini claims it's a known issue in 4.9.3 on Windows 19 | omegaconf<2.4 # when omegaconf 2.4.0 releases, it will require antlr4-python3-runtime 4.11 20 | evo 21 | e3nn 22 | moviepy==1.0.3 23 | plyfile 24 | pillow_heif 25 | safetensors 26 | addict 27 | pycolmap 28 | ultralytics # for YOLO image segmentation 29 | lapx>=0.5.5 # lap is required by ultralytics, but when lap attempted to automatically install, it installed to the wrong python environment -------------------------------------------------------------------------------- /panels.py: -------------------------------------------------------------------------------- 1 | import bpy 2 | from .operators import get_model_path 3 | import os 4 | 5 | class DA3Panel(bpy.types.Panel): 6 | bl_label = "DA3" 7 | bl_idname = "VIEW3D_PT_da3" 8 | bl_space_type = 'VIEW_3D' 9 | bl_region_type = 'UI' 10 | bl_category = "DA3" 11 | 12 | def draw(self, context): 13 | layout = self.layout 14 | scene = context.scene 15 | 16 | # Model selection dropdown 17 | layout.prop(scene, "da3_model_name", text="Model") 18 | 19 | # Download button or status 20 | model_path = get_model_path(scene.da3_model_name) 21 | row = layout.row() 22 | if os.path.exists(model_path): 23 | row.label(text=f"Model {scene.da3_model_name} ready") 24 | else: 25 | row.operator("da3.download_model", text=f"Download {scene.da3_model_name}") 26 | 27 | # Metric model checkbox and download button/status 28 | if scene.da3_model_name != "da3nested-giant-large": 29 | layout.prop(scene, "da3_use_metric", text="Use Metric") 30 | if scene.da3_use_metric: 31 | # Metric combination mode 32 | layout.prop(scene, "da3_metric_mode", text="Metric Mode") 33 | 34 | metric_model_name = "da3metric-large" 35 | metric_model_path = get_model_path(metric_model_name) 36 | row = layout.row() 37 | if os.path.exists(metric_model_path): 38 | row.label(text=f"Metric model {metric_model_name} ready") 39 | else: 40 | op = row.operator("da3.download_model", text="Download Metric Model") 41 | op.da3_override_model_name = metric_model_name 42 | 43 | layout.prop(scene, "da3_input_folder", text="Input Folder") 44 | layout.prop(scene, "da3_process_res", text="Process Resolution") 45 | layout.prop(scene, "da3_process_res_method", text="Resize Method") 46 | layout.prop(scene, "da3_batch_mode", text="Batch Mode") 47 | if scene.da3_batch_mode != "ignore_batch_size": 48 | layout.prop(scene, "da3_batch_size", text="Batch Size") 49 | layout.prop(scene, "da3_use_ray_pose", text="Use Ray-based Pose") 50 | layout.prop(scene, "da3_use_half_precision", text="Use Half Precision") 51 | layout.prop(scene, "da3_filter_edges", text="Filter Edges") 52 | layout.prop(scene, "da3_min_confidence", text="Min Confidence") 53 | layout.prop(scene, "da3_detect_motion", text="Detect Motion") 54 | if scene.da3_detect_motion: 55 | layout.prop(scene, "da3_motion_threshold", text="Motion Threshold") 56 | 57 | layout.prop(scene, "da3_use_segmentation") 58 | if scene.da3_use_segmentation: 59 | layout.prop(scene, "da3_segmentation_model") 60 | layout.prop(scene, "da3_segmentation_conf") 61 | 62 | layout.separator() 63 | 64 | layout.prop(scene, "da3_generate_mesh", text="Generate Meshes") 65 | layout.prop(scene, "da3_output_debug_images", text="Output Debug Images") 66 | row = layout.row() 67 | row.operator("da3.generate_point_cloud") 68 | row = layout.row() 69 | row.operator("da3.unload_model") -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # DA3-blender 2 | Blender addon for Depth-Anything-3 3D reconstruction 3 | 4 | Input an image folder which contains single or multiple images, then you will get point cloud geometry nodes with material. 5 | 6 | This blender addon is based on [Depth-Anything-3](https://github.com/ByteDance-Seed/Depth-Anything-3). 7 | 8 | ## Usage 9 | 1. Download Depth-Anything-3 model from operation panel (press N to toggle Sidebar, and click DA3 tab). 10 | 2. select an image folder. 11 | 3. Generate. 12 | 13 | https://github.com/user-attachments/assets/6eeff6d0-a89f-4c2c-970b-47fe2b5475d3 14 | 15 | 16 | ## Installation (only the first time) 17 | 1. Download Zip from this github repo (but don't extract it). 18 | 2. In Blender, toggle System Console (Window > Toggle System Console) for installation logs tracking. 19 | 3. Install addon in blender preference (Edit > Preferences > Add-ons) with "Install from Disk" (v button in top right corner) and select downloaded zip. 20 | 4. Wait for Depth-Anything-3 git clone and python dependencies installation. 21 | 5. After addon activated, download Depth-Anything-3 model from operation panel (press N to toggle Sidebar, and click DA3 tab). 22 | 23 | 517991309-15df7535-7177-4d9f-9a25-3dc3d6990ee4 24 | 25 | 26 | 517991330-436125db-a8ee-4c7f-a84b-b5b18ad6ef86 27 | 28 | ## Advanced usage 29 | - There are several **Model**s to choose from. **DA3 Large** is recommended unless you have a lot of VRAM. 30 | - The scale will be very small by default. Check **Use Metric** to use the DA3 Metric model to help scale it to approximately life-size. This is twice as slow. You will have to click the button to download the Metric model if you haven't already. 31 | - The default resolution is only 504x504 (or less for non-square images). You can change the resolution of the longest side in the **Process Resolution** box, but it must be a multiple of 14. If you don't know your 14 times tables, Blender supports typing maths in the box, eg. `504+14`. Higher resolutions use a lot more VRAM and will fail. 32 | - If you want to specify the resolution of the shortest side instead of the longest side, select **Lower Bound Resize** from the drop-down box. That will massively increase VRAM usage and is not recommended. 33 | - There is a limit to how many images the model can process at once without crashing, based on VRAM. For 4GB of VRAM, at 504x280, the limit is 10. Set the **Batch Size** to whatever the limit is for your graphics card (by experimenting). Set the **Batch Mode** to how you want to handle more than that many images. **Skip Frames** will choose 10 evenly spaced images from the folder for a single batch, and is the only mode with good alignment. **Last Frame Overlap** will do it in batches using one frame of overlap to align the batches. **First-Last Overlap** will use two frames of overlap for better alignment (in theory). And **Ignore Batch Size** will try to do all the images at once and risk crashing. 34 | - **Use Ray-based Pose** will use a slower more precise method of aligning cameras within a batch, but doesn't help alignment between batches. 35 | - **Use Half Precision** will run the model in mixed precision, mostly 16-bit, reducing the VRAM used to run the model (but the model itself still uses the same VRAM), and making it faster, with only a slight loss of precision. 36 | - **Generate Meshes** will create a separate textured mesh for each image instead of a single point cloud. The meshes use the original full-resolution image as a texture. You will have many meshes layered on top of each other that you need to clean up manually if you want to use them. It makes no attempt to combine meshes into a single mesh yet. 37 | - **Detect Motion** will detect moving objects that are present in one frame but absent in another where they should be visible. It then puts the moving objects into their own point clouds and animates them. Press Play in the animation panel to watch. Static geometry from all frames will always be visible. You may need to manually increase the length of the scene's animation. Detect Motion doesn't work well on feet or objects that are near other objects. It isn't optimised, so it may have problems with large numbers of frames. 38 | - Click **Unload Model** after you have finished to free VRAM for other things, otherwise the model will stay in VRAM. 39 | - To view the confidence of each point in the point cloud, select the point cloud then click on the **Shading** tab at the top of the screen. In the node editor, change the **Factor** of the yellow **Mix** node to `1.0` (or something between 0 and 1) to show the confidence of each point instead of the colour. 40 | - To change the size of each point, select the point cloud then click on the **Geometry Nodes** tab at the top of the screen. In the node editor, change the **Radius** of the green **Mesh to Points** node to the desired size. 41 | - To hide points below a certain confidence level, select the point cloud, then click on the blue spanner icon in the bottom right column of icons, and set **Threshold** to a value between 1 and about 30. Setting it to `2.0` will filter out almost all the noise, but also some of the background. 42 | - To view the scene from one of the cameras, select the camera, move the mouse over the 3D View, and press Ctrl+Numpad0 43 | 44 | ## Tested on 45 | - Win10, Win11 46 | - Blender 4.2, 4.4 47 | - cuda 12.6 48 | - 4GB VRAM GTX 970 49 | 50 | also tested on Ubuntu 25.10, Blender 5.0, CUDA 13.0 https://github.com/xy-gao/DA3-blender/issues/1#issue-3652866452 51 | 52 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[codz] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py.cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # UV 98 | # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | #uv.lock 102 | 103 | # poetry 104 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 105 | # This is especially recommended for binary packages to ensure reproducibility, and is more 106 | # commonly ignored for libraries. 107 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 108 | #poetry.lock 109 | #poetry.toml 110 | 111 | # pdm 112 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 113 | # pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python. 114 | # https://pdm-project.org/en/latest/usage/project/#working-with-version-control 115 | #pdm.lock 116 | #pdm.toml 117 | .pdm-python 118 | .pdm-build/ 119 | 120 | # pixi 121 | # Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control. 122 | #pixi.lock 123 | # Pixi creates a virtual environment in the .pixi directory, just like venv module creates one 124 | # in the .venv directory. It is recommended not to include this directory in version control. 125 | .pixi 126 | 127 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 128 | __pypackages__/ 129 | 130 | # Celery stuff 131 | celerybeat-schedule 132 | celerybeat.pid 133 | 134 | # SageMath parsed files 135 | *.sage.py 136 | 137 | # Environments 138 | .env 139 | .envrc 140 | .venv 141 | env/ 142 | venv/ 143 | ENV/ 144 | env.bak/ 145 | venv.bak/ 146 | 147 | # Spyder project settings 148 | .spyderproject 149 | .spyproject 150 | 151 | # Rope project settings 152 | .ropeproject 153 | 154 | # mkdocs documentation 155 | /site 156 | 157 | # mypy 158 | .mypy_cache/ 159 | .dmypy.json 160 | dmypy.json 161 | 162 | # Pyre type checker 163 | .pyre/ 164 | 165 | # pytype static type analyzer 166 | .pytype/ 167 | 168 | # Cython debug symbols 169 | cython_debug/ 170 | 171 | # PyCharm 172 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 173 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 174 | # and can be added to the global gitignore or merged into this file. For a more nuclear 175 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 176 | #.idea/ 177 | 178 | # Abstra 179 | # Abstra is an AI-powered process automation framework. 180 | # Ignore directories containing user credentials, local state, and settings. 181 | # Learn more at https://abstra.io/docs 182 | .abstra/ 183 | 184 | # Visual Studio Code 185 | # Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore 186 | # that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore 187 | # and can be added to the global gitignore or merged into this file. However, if you prefer, 188 | # you could uncomment the following to ignore the entire vscode folder 189 | # .vscode/ 190 | 191 | # Ruff stuff: 192 | .ruff_cache/ 193 | 194 | # PyPI configuration file 195 | .pypirc 196 | 197 | # Cursor 198 | # Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to 199 | # exclude from AI features like autocomplete and code analysis. Recommended for sensitive data 200 | # refer to https://docs.cursor.com/context/ignore-files 201 | .cursorignore 202 | .cursorindexingignore 203 | 204 | # Marimo 205 | marimo/_static/ 206 | marimo/_lsp/ 207 | __marimo__/ 208 | 209 | # Our Blender add-on generated files 210 | /da3_repo 211 | /deps_public 212 | /deps_da3 213 | /models/*.safetensors 214 | debug_output/ 215 | /models/*.pt 216 | -------------------------------------------------------------------------------- /dependencies.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pkg_resources 3 | import subprocess 4 | import sys 5 | from pathlib import Path 6 | 7 | 8 | add_on_path = Path(__file__).parent # assuming this file is at root of add-on 9 | os.environ["ADDON_PATH"] = str(add_on_path) 10 | requirements_txt = add_on_path / 'requirements.txt' # assuming requirements.txt is at root of add-on 11 | requirements_for_check_txt = add_on_path / 'requirements_for_check.txt' # assuming requirements.txt is at root of add-on 12 | DA3_DIR = add_on_path / "da3_repo" 13 | 14 | deps_path = add_on_path / 'deps_public' # might not exist until install_deps is called 15 | deps_path_da3 = add_on_path / 'deps_da3' 16 | # Append dependencies folder to system path so we can import 17 | # (important for Windows machines, but less so for Linux) 18 | sys.path.insert(0, os.fspath(deps_path)) 19 | sys.path.insert(0, os.fspath(deps_path_da3)) 20 | sys.path.insert(0, os.fspath(DA3_DIR)) 21 | 22 | 23 | class Dependencies: 24 | # cache variables used to eliminate unnecessary computations 25 | _checked = None 26 | _requirements = None 27 | 28 | @staticmethod 29 | def install(): 30 | if Dependencies.check(): 31 | return True 32 | 33 | # Create folder into which pip will install dependencies 34 | if not os.path.exists(DA3_DIR): 35 | try: 36 | subprocess.check_call(['git', 'clone', 'https://github.com/ByteDance-Seed/Depth-Anything-3.git', DA3_DIR]) 37 | except subprocess.CalledProcessError as e: 38 | print(f'Caught Exception while trying to git clone da3') 39 | print(f' Exception: {e}') 40 | return False 41 | 42 | try: 43 | deps_path.mkdir(exist_ok=True) 44 | except Exception as e: 45 | print(f'Caught Exception while trying to create dependencies folder') 46 | print(f' Exception: {e}') 47 | print(f' Folder: {deps_path}') 48 | return False 49 | try: 50 | deps_path_da3.mkdir(exist_ok=True) 51 | except Exception as e: 52 | print(f'Caught Exception while trying to create dependencies folder') 53 | print(f' Exception: {e}') 54 | print(f' Folder: {deps_path_da3}') 55 | return False 56 | # Ensure pip is installed 57 | try: 58 | subprocess.check_call([sys.executable, "-m", "ensurepip", "--upgrade"]) 59 | except subprocess.CalledProcessError as e: 60 | print(f'Caught CalledProcessError while trying to ensure pip is installed') 61 | print(f' Exception: {e}') 62 | print(f' {sys.executable=}') 63 | return False 64 | 65 | # Install dependencies from requirements.txt 66 | try: 67 | cmd = [ 68 | sys.executable, 69 | "-m", 70 | "pip", 71 | "install", 72 | "-r", 73 | os.fspath(requirements_txt), 74 | "--target", 75 | os.fspath(deps_path) 76 | ] 77 | print(f'Installing: {cmd}') 78 | subprocess.check_call(cmd) 79 | except subprocess.CalledProcessError as e: 80 | print(f'Caught CalledProcessError while trying to install dependencies') 81 | print(f' Exception: {e}') 82 | print(f' Requirements: {requirements_txt}') 83 | print(f' Folder: {deps_path}') 84 | return False 85 | # Install dependencies from requirements.txt 86 | 87 | try: 88 | cmd = [ 89 | sys.executable, 90 | "-m", 91 | "pip", 92 | "install", 93 | "--no-deps", 94 | os.fspath(DA3_DIR), 95 | "--target", 96 | os.fspath(deps_path_da3) 97 | ] 98 | print(f'Installing: {cmd}') 99 | subprocess.check_call(cmd) 100 | except subprocess.CalledProcessError as e: 101 | print(f'Caught CalledProcessError while trying to install DA3') 102 | print(f' Exception: {e}') 103 | print(f' Requirements: {DA3_DIR}') 104 | return False 105 | return Dependencies.check(force=True) 106 | 107 | @staticmethod 108 | def check(*, force=False): 109 | if force: 110 | Dependencies._checked = None 111 | elif Dependencies._checked is not None: 112 | # Assume everything is installed 113 | return Dependencies._checked 114 | 115 | Dependencies._checked = False 116 | 117 | if deps_path.exists() and os.path.exists(DA3_DIR): 118 | try: 119 | # Ensure all required dependencies are installed in dependencies folder 120 | ws = pkg_resources.WorkingSet(entries=[ os.fspath(deps_path) ]) 121 | for dep in Dependencies.requirements(force=force): 122 | ws.require(dep) 123 | 124 | # If we get here, we found all required dependencies 125 | Dependencies._checked = True 126 | 127 | except Exception as e: 128 | print(f'Caught Exception while trying to check dependencies') 129 | print(f' Exception: {e}') 130 | Dependencies._checked = False 131 | 132 | return Dependencies._checked 133 | 134 | @staticmethod 135 | def requirements(*, force=False): 136 | if force: 137 | Dependencies._requirements = None 138 | elif Dependencies._requirements is not None: 139 | return Dependencies._requirements 140 | 141 | # load and cache requirements 142 | with requirements_for_check_txt.open() as requirements: 143 | dependencies = pkg_resources.parse_requirements(requirements) 144 | Dependencies._requirements = [ dep.project_name for dep in dependencies ] 145 | return Dependencies._requirements 146 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- 1 | bl_info = { 2 | "name": "DA3 Addon", 3 | "author": "Xiangyi Gao", 4 | "version": (1, 0), 5 | "blender": (4, 2, 0), 6 | "location": "View3D > Sidebar > DA3", 7 | "description": "Generate point clouds from images using DA3", 8 | "category": "3D View", 9 | } 10 | 11 | import bpy 12 | from .dependencies import Dependencies 13 | import os 14 | 15 | def register(): 16 | # Set PyTorch CUDA allocation config to reduce fragmentation 17 | os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True' 18 | # Set CUDA_LAUNCH_BLOCKING for better error reporting 19 | os.environ['CUDA_LAUNCH_BLOCKING'] = '1' 20 | 21 | if not Dependencies.check(): 22 | Dependencies.install() 23 | if Dependencies.check(): 24 | from . import operators, panels 25 | bpy.utils.register_class(operators.DownloadModelOperator) 26 | bpy.utils.register_class(operators.UnloadModelOperator) 27 | bpy.utils.register_class(operators.GeneratePointCloudOperator) 28 | bpy.utils.register_class(panels.DA3Panel) 29 | bpy.types.Scene.da3_input_folder = bpy.props.StringProperty(subtype='DIR_PATH') 30 | bpy.types.Scene.da3_model_name = bpy.props.EnumProperty( 31 | items=[ 32 | ('da3-small', 'DA3 Small', 'Small model for faster inference'), 33 | ('da3-base', 'DA3 Base', 'Base model with balanced performance'), 34 | ('da3-large', 'DA3 Large', 'Large model for better quality'), 35 | ('da3-giant', 'DA3 Giant', 'Giant model for highest quality'), 36 | ("da3metric-large", "DA3 Metric Large", "Metric depth model"), 37 | ('da3mono-large', 'DA3 Mono Large', 'Single image depth estimation'), 38 | ('da3nested-giant-large', 'DA3 Nested Giant Large', 'Nested depth estimation'), 39 | ], 40 | name="Model", 41 | description="Select DA3 model variant", 42 | default='da3-large' 43 | ) 44 | bpy.types.Scene.da3_use_metric = bpy.props.BoolProperty( 45 | name="Use Metric", 46 | description="Real-world scale using the metric DA3 model", 47 | default=False, 48 | ) 49 | bpy.types.Scene.da3_metric_mode = bpy.props.EnumProperty( 50 | items=[ 51 | ("scale_base", "Scale Base Depth", "Scale base depth using metric model"), 52 | ("metric_depth", "Use Metric Depth", "Use metric model depth with base cameras"), 53 | ], 54 | name="Metric Mode", 55 | description="How to combine base and metric model outputs", 56 | default="scale_base", 57 | ) 58 | bpy.types.Scene.da3_process_res = bpy.props.IntProperty( 59 | name="Process Resolution", 60 | description="Internal resolution for processing (must be multiple of 14)", 61 | default=504, 62 | min=14 63 | ) 64 | bpy.types.Scene.da3_process_res_method = bpy.props.EnumProperty( 65 | items=[ 66 | ("upper_bound_resize", "Upper Bound Resize", "Resize so that the specified dimension becomes the longer side"), 67 | ("lower_bound_resize", "Lower Bound Resize", "Resize so that the specified dimension becomes the shorter side"), 68 | ], 69 | name="Resize Method", 70 | description="Method for resizing images to the target resolution", 71 | default="upper_bound_resize" 72 | ) 73 | bpy.types.Scene.da3_use_half_precision = bpy.props.BoolProperty( 74 | name="Use Half Precision", 75 | description="Use 16-bit floats for reduced VRAM usage", 76 | default=False, 77 | ) 78 | bpy.types.Scene.da3_use_ray_pose = bpy.props.BoolProperty( 79 | name="Use Ray-based Pose", 80 | description="Use ray-based camera pose estimation instead of the camera decoder (slower but potentially more accurate)", 81 | default=False, 82 | ) 83 | bpy.types.Scene.da3_batch_size = bpy.props.IntProperty( 84 | name="Batch Size", 85 | description="Number of images to process in batch mode", 86 | default=10, 87 | min=1 88 | ) 89 | bpy.types.Scene.da3_batch_mode = bpy.props.EnumProperty( 90 | items=[ 91 | ("ignore_batch_size", "Ignore Batch Size", "Process all images (may use excessive VRAM)"), 92 | ("skip_frames", "Skip Frames", "Process evenly spaced frames"), 93 | ("last_frame_overlap", "Last Frame Overlap", "Process overlapping batches for large datasets"), 94 | ("first_last_overlap", "First+Last Overlap", "Use first and last frame of previous batch plus new frames"), 95 | ], 96 | name="Batch Mode", 97 | description="How to select images for processing", 98 | default="skip_frames" 99 | ) 100 | bpy.types.Scene.da3_filter_edges = bpy.props.BoolProperty( 101 | name="Filter Edges", 102 | description="Set confidence to 0 for pixels with high depth gradient", 103 | default=True, 104 | ) 105 | bpy.types.Scene.da3_min_confidence = bpy.props.FloatProperty( 106 | name="Min Confidence", 107 | description="Minimum confidence threshold for points (points below this will be removed)", 108 | default=0.5, 109 | min=0.0, 110 | max=100.0, 111 | ) 112 | bpy.types.Scene.da3_output_debug_images = bpy.props.BoolProperty( 113 | name="Output Debug Images", 114 | description="Save debug images (depth, confidence, etc.) to a subfolder", 115 | default=False, 116 | ) 117 | bpy.types.Scene.da3_generate_mesh = bpy.props.BoolProperty( 118 | name="Generate Meshes", 119 | description="Generate independent textured meshes for each input image instead of a point cloud", 120 | default=False, 121 | ) 122 | bpy.types.Scene.da3_detect_motion = bpy.props.BoolProperty( 123 | name="Detect Motion", 124 | description="Identify and animate moving objects by checking if they're missing in other frames", 125 | default=False, 126 | ) 127 | bpy.types.Scene.da3_motion_threshold = bpy.props.FloatProperty( 128 | name="Motion Threshold", 129 | description="Depth difference ratio to consider as empty space (e.g. 0.1 = 10%)", 130 | default=0.1, 131 | min=0.01, 132 | max=1.0, 133 | ) 134 | bpy.types.Scene.da3_use_segmentation = bpy.props.BoolProperty( 135 | name="Use Segmentation", 136 | description="Use YOLO to segment and track objects across frames", 137 | default=False, 138 | ) 139 | bpy.types.Scene.da3_segmentation_model = bpy.props.EnumProperty( 140 | items=[ 141 | ("yolov8n-seg", "YOLOv8 Nano", "Lowest accuracy"), 142 | ("yolov8l-seg", "YOLOv8 Large", "Balanced speed/accuracy"), 143 | ("yolov8x-seg", "YOLOv8 X-Large", "Best accuracy for v8"), 144 | ("yolo11n-seg", "YOLO11 Nano", "Newest tiny fast model"), 145 | ("yolo11l-seg", "YOLO11 Large", "Newest balanced model"), 146 | ("yolo11x-seg", "YOLO11 X-Large", "Newest best accuracy"), 147 | ("yoloe-11s-seg-pf", "YOLOE Small PF", "YOLOE Small prompt-free"), 148 | ("yoloe-11m-seg-pf", "YOLOE Medium PF", "YOLOE Medium prompt-free"), 149 | ("yoloe-11l-seg-pf", "YOLOE Large PF", "Recognise the most objects"), 150 | ], 151 | name="Seg Model", 152 | description="Select segmentation model", 153 | default="yoloe-11l-seg-pf", 154 | ) 155 | bpy.types.Scene.da3_segmentation_conf = bpy.props.FloatProperty( 156 | name="Seg Confidence", 157 | description="Minimum confidence for segmentation", 158 | default=0.25, 159 | min=0.0, 160 | max=1.0, 161 | ) 162 | else: 163 | raise ValueError("installation failed.") 164 | 165 | def unregister(): 166 | from . import operators, panels 167 | bpy.utils.unregister_class(operators.DownloadModelOperator) 168 | bpy.utils.unregister_class(operators.UnloadModelOperator) 169 | bpy.utils.unregister_class(operators.GeneratePointCloudOperator) 170 | bpy.utils.unregister_class(panels.DA3Panel) 171 | del bpy.types.Scene.da3_input_folder 172 | del bpy.types.Scene.da3_model_name 173 | del bpy.types.Scene.da3_use_metric 174 | del bpy.types.Scene.da3_metric_mode 175 | del bpy.types.Scene.da3_process_res 176 | del bpy.types.Scene.da3_process_res_method 177 | del bpy.types.Scene.da3_use_half_precision 178 | del bpy.types.Scene.da3_use_ray_pose 179 | del bpy.types.Scene.da3_batch_size 180 | del bpy.types.Scene.da3_batch_mode 181 | del bpy.types.Scene.da3_filter_edges 182 | del bpy.types.Scene.da3_min_confidence 183 | del bpy.types.Scene.da3_output_debug_images 184 | del bpy.types.Scene.da3_generate_mesh 185 | del bpy.types.Scene.da3_detect_motion 186 | del bpy.types.Scene.da3_motion_threshold 187 | del bpy.types.Scene.da3_use_segmentation 188 | del bpy.types.Scene.da3_segmentation_model 189 | del bpy.types.Scene.da3_segmentation_conf 190 | 191 | if __name__ == "__main__": 192 | register() -------------------------------------------------------------------------------- /operators.py: -------------------------------------------------------------------------------- 1 | import bpy 2 | from pathlib import Path 3 | import os 4 | import torch 5 | import numpy as np 6 | import time 7 | import datetime 8 | from .utils import ( 9 | run_model, 10 | convert_prediction_to_dict, 11 | combine_base_and_metric, 12 | combine_base_with_metric_depth, 13 | import_point_cloud, 14 | import_mesh_from_depth, 15 | create_cameras, 16 | align_batches, 17 | compute_motion_scores, 18 | ) 19 | 20 | wm = None 21 | total_predicted_time = None 22 | start_time = None 23 | def start_progress_timer(total): 24 | global wm, total_predicted_time, start_time 25 | start_time = time.time() 26 | wm = bpy.context.window_manager 27 | total_predicted_time = total 28 | wm.progress_begin(0, 100) 29 | 30 | # Calculate estimated duration and finish time 31 | minutes = int(total // 60) 32 | seconds = int(total % 60) 33 | if minutes > 0: 34 | duration_str = f"{minutes} minutes {seconds} seconds" 35 | else: 36 | duration_str = f"{seconds} seconds" 37 | 38 | finish_time = datetime.datetime.now() + datetime.timedelta(seconds=total) 39 | finish_str = finish_time.strftime("%H:%M:%S") 40 | print(f"Rough estimated duration: {duration_str}, expected finish at {finish_str}") 41 | 42 | def update_progress_timer(expected_time, stage=""): 43 | global wm, total_predicted_time, start_time 44 | if not total_predicted_time or total_predicted_time <= 0: 45 | print("Warning: total_predicted_time is zero or negative, cannot update progress.") 46 | return 47 | portion = expected_time / total_predicted_time * 100 48 | wm.progress_update(int(portion)) 49 | print(f"Progress: {stage}, {portion:.2f}%, elapsed: {time.time() - start_time:.2f}s") 50 | 51 | def end_progress_timer(): 52 | global wm 53 | if wm is not None: 54 | wm.progress_end() 55 | wm = None 56 | 57 | add_on_path = Path(__file__).parent 58 | MODELS_DIR = os.path.join(add_on_path, 'models') 59 | _URLS = { 60 | 'da3-small': "https://huggingface.co/depth-anything/DA3-SMALL/resolve/main/model.safetensors", 61 | 'da3-base': "https://huggingface.co/depth-anything/DA3-BASE/resolve/main/model.safetensors", 62 | 'da3-large': "https://huggingface.co/depth-anything/DA3-LARGE/resolve/main/model.safetensors", 63 | 'da3-giant': "https://huggingface.co/depth-anything/DA3-GIANT/resolve/main/model.safetensors", 64 | "da3metric-large": "https://huggingface.co/depth-anything/DA3METRIC-LARGE/resolve/main/model.safetensors", 65 | "da3mono-large": "https://huggingface.co/depth-anything/DA3MONO-LARGE/resolve/main/model.safetensors", 66 | "da3nested-giant-large": "https://huggingface.co/depth-anything/DA3NESTED-GIANT-LARGE/resolve/main/model.safetensors", 67 | 68 | "yolov8n-seg": "https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8n-seg.pt", 69 | "yolov8s-seg": "https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8s-seg.pt", 70 | "yolov8m-seg": "https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8m-seg.pt", 71 | "yolov8l-seg": "https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8l-seg.pt", 72 | "yolov8x-seg": "https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8x-seg.pt", 73 | "yolo11n-seg": "https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n-seg.pt", 74 | "yolo11s-seg": "https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11s-seg.pt", 75 | "yolo11m-seg": "https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11m-seg.pt", 76 | "yolo11l-seg": "https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11l-seg.pt", 77 | "yolo11x-seg": "https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11x-seg.pt", 78 | "yoloe-11s-seg-pf": "https://github.com/ultralytics/assets/releases/download/v8.3.0/yoloe-11s-seg-pf.pt", 79 | "yoloe-11m-seg-pf": "https://github.com/ultralytics/assets/releases/download/v8.3.0/yoloe-11m-seg-pf.pt", 80 | "yoloe-11l-seg-pf": "https://github.com/ultralytics/assets/releases/download/v8.3.0/yoloe-11l-seg-pf.pt", 81 | } 82 | model = None 83 | current_model_name = None 84 | 85 | def get_model_path(model_name): 86 | return os.path.join(MODELS_DIR, f'{model_name}.safetensors') 87 | 88 | def display_VRAM_usage(stage: str, include_peak=False): 89 | if torch.cuda.is_available(): 90 | allocated = torch.cuda.memory_allocated() / 1024**2 91 | free, total = torch.cuda.mem_get_info() 92 | free_mb = free / 1024**2 93 | total_mb = total / 1024**2 94 | msg = f"VRAM {stage}: {allocated:.1f} MB (free: {free_mb:.1f} MB / {total_mb:.1f} MB)" 95 | if include_peak: 96 | peak = torch.cuda.max_memory_allocated() / 1024**2 97 | msg += f" (peak: {peak:.1f} MB)" 98 | print(msg) 99 | 100 | 101 | def get_model(model_name): 102 | global model, current_model_name 103 | if model is None or current_model_name != model_name: 104 | from depth_anything_3.api import DepthAnything3 105 | if torch.cuda.is_available(): 106 | torch.cuda.reset_peak_memory_stats() 107 | display_VRAM_usage(f"before loading {model_name}") 108 | model = DepthAnything3(model_name=model_name) 109 | model_path = get_model_path(model_name) 110 | if os.path.exists(model_path): 111 | from safetensors.torch import load_file 112 | weight = load_file(model_path) 113 | model.load_state_dict(weight, strict=False) 114 | else: 115 | raise FileNotFoundError(f"Model file {model_name} not found. Please download it first.") 116 | device = "cuda" if torch.cuda.is_available() else "cpu" 117 | model.to(device) 118 | model.eval() 119 | current_model_name = model_name 120 | display_VRAM_usage(f"after loading {model_name}", include_peak=True) 121 | return model 122 | 123 | def unload_current_model(): 124 | global model, current_model_name 125 | if model is not None: 126 | display_VRAM_usage("before unload") 127 | # Drop references so PyTorch can free memory 128 | del model 129 | model = None 130 | current_model_name = None 131 | if torch.cuda.is_available(): 132 | torch.cuda.empty_cache() 133 | display_VRAM_usage("after unload") 134 | 135 | def run_segmentation(image_paths, conf_threshold=0.25, model_name="yolo11x-seg"): 136 | print(f"Loading {model_name} model...") 137 | display_VRAM_usage("before loading YOLO") 138 | try: 139 | from ultralytics import YOLO 140 | except ImportError: 141 | print("Error: ultralytics not installed. Please install it to use segmentation.") 142 | return None, None 143 | 144 | # Use selected model 145 | # model_name passed as argument 146 | model_path = os.path.join(MODELS_DIR, f"{model_name}.pt") 147 | 148 | if not os.path.exists(model_path): 149 | print(f"Downloading {model_name} to {model_path}...") 150 | url = _URLS.get(model_name, "") 151 | if not url: 152 | print(f"Error: No URL known for {model_name}. Please download {model_name}.pt manually to {MODELS_DIR}") 153 | return None, None 154 | 155 | try: 156 | torch.hub.download_url_to_file(url, model_path) 157 | except Exception as e: 158 | print(f"Failed to download {model_name}: {e}") 159 | return None, None 160 | 161 | # Load model from specific path 162 | seg_model = YOLO(model_path) 163 | display_VRAM_usage("after loading YOLO", include_peak=True) 164 | 165 | print(f"Running segmentation on {len(image_paths)} images...") 166 | 167 | # Run tracking 168 | # persist=True is important for video tracking 169 | # stream=True returns a generator, good for memory 170 | results = seg_model.track(source=image_paths, conf=conf_threshold, persist=True, stream=True, verbose=False) 171 | 172 | segmentation_data = [] 173 | 174 | for i, r in enumerate(results): 175 | # r is a Results object 176 | # We need masks and track IDs 177 | frame_data = { 178 | "masks": [], 179 | "ids": [], 180 | "classes": [], 181 | "orig_shape": r.orig_shape 182 | } 183 | 184 | if r.masks is not None: 185 | # masks.data is a torch tensor of masks [N, H, W] 186 | masks = r.masks.data.cpu().numpy() 187 | 188 | # Crop masks to remove letterbox padding (YOLO pads to multiple of 32) 189 | # This ensures aspect ratio matches original image before we resize later 190 | h_orig, w_orig = r.orig_shape 191 | if len(masks.shape) == 3: 192 | _, h_mask, w_mask = masks.shape 193 | 194 | # Calculate scale factor that was used to fit image into mask 195 | scale = min(w_mask / w_orig, h_mask / h_orig) 196 | 197 | # Compute expected dimensions of the valid image area in the mask 198 | new_w = int(round(w_orig * scale)) 199 | new_h = int(round(h_orig * scale)) 200 | 201 | # Compute start offsets (centering) 202 | x_off = (w_mask - new_w) // 2 203 | y_off = (h_mask - new_h) // 2 204 | 205 | # Crop 206 | masks = masks[:, y_off : y_off + new_h, x_off : x_off + new_w] 207 | 208 | # Fix edge artifacts (sometimes edges are black) 209 | if len(masks.shape) == 3: 210 | for k in range(masks.shape[0]): 211 | m = masks[k] 212 | h_m, w_m = m.shape 213 | 214 | # Fix bottom edge 215 | if h_m >= 3: 216 | if np.max(m[-1, :]) == 0: 217 | if np.max(m[-2, :]) == 0: 218 | m[-2:, :] = m[-3, :] 219 | else: 220 | m[-1, :] = m[-2, :] 221 | 222 | # Fix top edge 223 | if h_m >= 3: 224 | if np.max(m[0, :]) == 0: 225 | if np.max(m[1, :]) == 0: 226 | m[:2, :] = m[2, :] 227 | else: 228 | m[0, :] = m[1, :] 229 | 230 | # Fix left edge 231 | if w_m >= 3: 232 | if np.max(m[:, 0]) == 0: 233 | if np.max(m[:, 1]) == 0: 234 | m[:, :2] = m[:, 2:3] 235 | else: 236 | m[:, 0] = m[:, 1] 237 | 238 | frame_data["masks"] = masks 239 | 240 | if r.boxes is not None and r.boxes.id is not None: 241 | frame_data["ids"] = r.boxes.id.int().cpu().numpy() 242 | else: 243 | # If no tracking IDs (e.g. first frame or lost track), use -1 or generate new ones? 244 | # If tracking is on, it should return IDs. If not, maybe just detection. 245 | # But we requested track(). 246 | # If no ID, maybe it's a new object that wasn't tracked? 247 | # Let's use -1 for untracked 248 | if r.boxes is not None: 249 | frame_data["ids"] = np.full(len(r.boxes), -1, dtype=int) 250 | 251 | if r.boxes is not None: 252 | frame_data["classes"] = r.boxes.cls.int().cpu().numpy() 253 | 254 | segmentation_data.append(frame_data) 255 | 256 | if i % 10 == 0: 257 | print(f"Segmented {i+1}/{len(image_paths)} images") 258 | 259 | display_VRAM_usage("after YOLO inference", include_peak=True) 260 | 261 | # Get class names 262 | class_names = seg_model.names 263 | 264 | # Cleanup 265 | del seg_model 266 | if torch.cuda.is_available(): 267 | torch.cuda.empty_cache() 268 | display_VRAM_usage("after unloading YOLO") 269 | 270 | return segmentation_data, class_names 271 | 272 | class DownloadModelOperator(bpy.types.Operator): 273 | bl_idname = "da3.download_model" 274 | bl_label = "Download DA3 Model" 275 | 276 | # NEW: optional override for which model to download 277 | da3_override_model_name: bpy.props.StringProperty( 278 | name="Override Model Name", 279 | description="If set, download this model instead of the one selected in the scene", 280 | default="", 281 | ) 282 | 283 | def execute(self, context): 284 | model_name = self.da3_override_model_name or context.scene.da3_model_name 285 | model_path = get_model_path(model_name) 286 | 287 | if os.path.exists(model_path): 288 | self.report({'INFO'}, f"Model {model_name} already downloaded.") 289 | return {'FINISHED'} 290 | 291 | if model_name not in _URLS: 292 | self.report({'ERROR'}, f"Unknown model: {model_name}") 293 | return {'CANCELLED'} 294 | 295 | try: 296 | print(f"Downloading model {model_name}...") 297 | os.makedirs(MODELS_DIR, exist_ok=True) 298 | torch.hub.download_url_to_file(_URLS[model_name], model_path) 299 | self.report({'INFO'}, f"Model {model_name} downloaded successfully.") 300 | except Exception as e: 301 | self.report({'ERROR'}, f"Failed to download model {model_name}: {e}") 302 | return {'CANCELLED'} 303 | return {'FINISHED'} 304 | 305 | @classmethod 306 | def poll(cls, context): 307 | # Allow the button to be clicked; existence is checked in execute() 308 | return True 309 | # model_name = context.scene.da3_model_name 310 | # model_path = get_model_path(model_name) 311 | # return not os.path.exists(model_path) 312 | 313 | 314 | class UnloadModelOperator(bpy.types.Operator): 315 | bl_idname = "da3.unload_model" 316 | bl_label = "Unload Model" 317 | 318 | def execute(self, context): 319 | unload_current_model() 320 | self.report({'INFO'}, "Model unloaded and VRAM freed.") 321 | return {'FINISHED'} 322 | 323 | @classmethod 324 | def poll(cls, context): 325 | # Enable if a model is loaded 326 | return model is not None 327 | 328 | 329 | class GeneratePointCloudOperator(bpy.types.Operator): 330 | bl_idname = "da3.generate_point_cloud" 331 | bl_label = "Generate Point Cloud" 332 | 333 | def execute(self, context): 334 | input_folder = context.scene.da3_input_folder 335 | base_model_name = context.scene.da3_model_name 336 | use_metric = context.scene.da3_use_metric 337 | metric_mode = getattr(context.scene, "da3_metric_mode", "scale_base") 338 | use_ray_pose = getattr(context.scene, "da3_use_ray_pose", False) 339 | process_res = context.scene.da3_process_res 340 | process_res_method = context.scene.da3_process_res_method 341 | use_half_precision = context.scene.da3_use_half_precision 342 | filter_edges = getattr(context.scene, "da3_filter_edges", True) 343 | min_confidence = getattr(context.scene, "da3_min_confidence", 0.5) 344 | output_debug_images = getattr(context.scene, "da3_output_debug_images", False) 345 | generate_mesh = getattr(context.scene, "da3_generate_mesh", False) 346 | 347 | if process_res % 14 != 0: 348 | self.report({'ERROR'}, "Process resolution must be a multiple of 14.") 349 | return {'CANCELLED'} 350 | 351 | if not input_folder or not os.path.isdir(input_folder): 352 | self.report({'ERROR'}, "Please select a valid input folder.") 353 | return {'CANCELLED'} 354 | 355 | # Get image paths 356 | import glob 357 | image_paths = sorted(glob.glob(os.path.join(input_folder, "*.[jJpP][pPnN][gG]"))) 358 | if not image_paths: 359 | self.report({'ERROR'}, "No images found in the input folder.") 360 | return {'CANCELLED'} 361 | 362 | print(f"Total images: {len(image_paths)}") 363 | 364 | batch_mode = context.scene.da3_batch_mode 365 | batch_size = context.scene.da3_batch_size 366 | if batch_mode == "skip_frames" and len(image_paths) > batch_size: 367 | import numpy as np 368 | indices = np.linspace(0, len(image_paths) - 1, batch_size, dtype=int) 369 | image_paths = [image_paths[i] for i in indices] 370 | # For overlap modes and ignore_batch_size, use all images 371 | 372 | self.report({'INFO'}, f"Processing {len(image_paths)} images...") 373 | 374 | # Initialize progress bar 375 | LoadModelTime = 9.2 # seconds 376 | AlignBatchesTime = 0.29 377 | AddImagePointsTime = 0.27 378 | BatchTimePerImage = 4.9 # it's actually quadratic but close enough 379 | MetricLoadModelTime = 19.25 380 | MetricBatchTimePerImage = 0.62 381 | MetricCombineTime = 0.12 382 | if current_model_name == base_model_name: 383 | LoadModelTime = 0 384 | needs_alignment = batch_mode in ("last_frame_overlap", "first_last_overlap") 385 | BaseTimeEstimate = LoadModelTime + BatchTimePerImage * len(image_paths) 386 | if needs_alignment: 387 | BaseTimeEstimate += AlignBatchesTime 388 | if use_metric: 389 | MetricTimeEstimate = BaseTimeEstimate + MetricLoadModelTime 390 | if metric_mode == "scale_base": 391 | MetricTimeEstimate += MetricBatchTimePerImage * batch_size 392 | else: 393 | MetricTimeEstimate += MetricBatchTimePerImage * len(image_paths) 394 | AfterCombineTimeEstimate = MetricTimeEstimate 395 | if needs_alignment: 396 | AfterCombineTimeEstimate += AlignBatchesTime 397 | AfterCombineTimeEstimate += MetricCombineTime 398 | else: 399 | MetricTimeEstimate = BaseTimeEstimate 400 | AfterCombineTimeEstimate = BaseTimeEstimate 401 | if needs_alignment: 402 | AfterCombineTimeEstimate += AlignBatchesTime 403 | TotalTimeEstimate = AfterCombineTimeEstimate + AddImagePointsTime*len(image_paths) 404 | start_progress_timer(TotalTimeEstimate) 405 | self.report({'INFO'}, "Starting point cloud generation...") 406 | 407 | try: 408 | # 0) Run Segmentation if enabled 409 | all_segmentation_data = None 410 | segmentation_class_names = None 411 | if getattr(context.scene, "da3_use_segmentation", False): 412 | self.report({'INFO'}, "Running segmentation...") 413 | # Ensure DA3 model is unloaded 414 | unload_current_model() 415 | 416 | seg_conf = getattr(context.scene, "da3_segmentation_conf", 0.25) 417 | seg_model_name = getattr(context.scene, "da3_segmentation_model", "yolo11x-seg") 418 | all_segmentation_data, segmentation_class_names = run_segmentation(image_paths, conf_threshold=seg_conf, model_name=seg_model_name) 419 | 420 | if all_segmentation_data is None: 421 | self.report({'WARNING'}, "Segmentation failed or cancelled. Proceeding without segmentation.") 422 | else: 423 | self.report({'INFO'}, "Segmentation complete.") 424 | update_progress_timer(0, "Segmentation complete") # Timer doesn't account for seg yet 425 | 426 | # 1) run base model 427 | self.report({'INFO'}, f"Loading {base_model_name} model...") 428 | base_model = get_model(base_model_name) 429 | update_progress_timer(LoadModelTime, "Loaded base model") 430 | self.report({'INFO'}, "Running base model inference...") 431 | 432 | all_base_predictions = [] 433 | 434 | if batch_mode in {"last_frame_overlap", "first_last_overlap"}: 435 | # Process in overlapping batches 436 | if batch_mode == "last_frame_overlap": 437 | # Existing scheme: last frame of previous batch overlaps with first of next 438 | step = batch_size - 1 439 | num_batches = (len(image_paths) + step - 1) // step # Ceiling division 440 | for batch_idx, start_idx in enumerate(range(0, len(image_paths), step)): 441 | end_idx = min(start_idx + batch_size, len(image_paths)) 442 | batch_paths = image_paths[start_idx:end_idx] 443 | batch_indices = list(range(start_idx, end_idx)) 444 | print(f"Batch {batch_idx + 1}/{num_batches}:") 445 | prediction = run_model(batch_paths, base_model, process_res, process_res_method, use_half=use_half_precision, use_ray_pose=use_ray_pose) 446 | update_progress_timer(LoadModelTime + end_idx * BatchTimePerImage, f"Base batch {batch_idx + 1}") 447 | all_base_predictions.append((prediction, batch_indices)) 448 | else: 449 | # New scheme: (0..9) (0, 9, 10..17) (10, 17, 18..25) 450 | N = len(image_paths) 451 | if batch_size < 3: 452 | step = 1 453 | else: 454 | step = batch_size - 2 455 | 456 | # First batch 457 | start = 0 458 | end = min(batch_size, N) 459 | batch_indices = list(range(start, end)) 460 | current_new_indices = batch_indices 461 | 462 | remaining_start = end 463 | 464 | if step > 0: 465 | num_batches = 1 + max(0, (N - end + step - 1) // step) 466 | else: 467 | num_batches = (N + batch_size - 1) // batch_size 468 | 469 | batch_idx = 0 470 | while True: 471 | batch_paths = [image_paths[i] for i in batch_indices] 472 | print(f"Batch {batch_idx + 1}/{num_batches}:") 473 | prediction = run_model(batch_paths, base_model, process_res, process_res_method, use_half=use_half_precision, use_ray_pose=use_ray_pose) 474 | end_idx = batch_indices[-1] + 1 475 | update_progress_timer(LoadModelTime + end_idx * BatchTimePerImage, f"Base batch {batch_idx + 1}") 476 | all_base_predictions.append((prediction, batch_indices.copy())) 477 | 478 | if remaining_start >= N: 479 | break 480 | 481 | # Determine overlap frames from the "new" frames of the current batch 482 | overlap_indices = [current_new_indices[0], current_new_indices[-1]] 483 | # Remove duplicates if any (e.g. if only 1 new frame) 484 | if overlap_indices[0] == overlap_indices[1]: 485 | overlap_indices = [overlap_indices[0]] 486 | 487 | next_end = min(remaining_start + step, N) 488 | next_new_indices = list(range(remaining_start, next_end)) 489 | 490 | batch_indices = overlap_indices + next_new_indices 491 | current_new_indices = next_new_indices 492 | 493 | remaining_start = next_end 494 | batch_idx += 1 495 | else: 496 | prediction = run_model(image_paths, base_model, process_res, process_res_method, use_half=use_half_precision, use_ray_pose=use_ray_pose) 497 | update_progress_timer(LoadModelTime + len(image_paths) * BatchTimePerImage, "Base batch complete") 498 | all_base_predictions.append((prediction, list(range(len(image_paths))))) 499 | 500 | update_progress_timer(BaseTimeEstimate, "Base inference complete") 501 | 502 | # 2) if metric enabled and weights available: 503 | all_metric_predictions = [] 504 | metric_available = False 505 | 506 | if use_metric: 507 | metric_path = get_model_path("da3metric-large") 508 | if os.path.exists(metric_path): 509 | metric_available = True 510 | # free base model from VRAM before loading metric 511 | self.report({'INFO'}, "Unloading base model and loading metric model...") 512 | base_model = None 513 | unload_current_model() 514 | 515 | metric_model = get_model("da3metric-large") 516 | update_progress_timer(BaseTimeEstimate + MetricLoadModelTime, "Loaded metric model") 517 | self.report({'INFO'}, "Running metric model inference...") 518 | 519 | if metric_mode == "scale_base": 520 | # In scale_base mode, run **one** metric batch over all images. 521 | N = len(image_paths) 522 | start = 0 523 | end = min(batch_size, N) 524 | batch_indices = list(range(start, end)) 525 | batch_paths = [image_paths[i] for i in batch_indices] 526 | prediction = run_model( 527 | batch_paths, 528 | metric_model, 529 | process_res, 530 | process_res_method, 531 | use_half=use_half_precision, 532 | use_ray_pose=use_ray_pose, 533 | ) 534 | update_progress_timer(BaseTimeEstimate + MetricLoadModelTime + end * MetricBatchTimePerImage, "Metric batch complete") 535 | all_metric_predictions.append((prediction, batch_indices.copy())) 536 | else: 537 | # For other metric modes, keep previous batching behaviour 538 | if batch_mode in {"last_frame_overlap", "first_last_overlap"}: 539 | # Process in overlapping batches for metric too (mirror base logic) 540 | if batch_mode == "last_frame_overlap": 541 | step = batch_size - 1 542 | num_batches = (len(image_paths) + step - 1) // step 543 | for batch_idx, start_idx in enumerate(range(0, len(image_paths), step)): 544 | end_idx = min(start_idx + batch_size, len(image_paths)) 545 | batch_paths = image_paths[start_idx:end_idx] 546 | batch_indices = list(range(start_idx, end_idx)) 547 | print(f"Batch {batch_idx + 1}/{num_batches}:") 548 | prediction = run_model(batch_paths, metric_model, process_res, process_res_method, use_half=use_half_precision, use_ray_pose=use_ray_pose) 549 | update_progress_timer(BaseTimeEstimate + MetricLoadModelTime + end_idx * MetricBatchTimePerImage, f"Metric batch {batch_idx + 1}") 550 | all_metric_predictions.append((prediction, batch_indices)) 551 | else: 552 | N = len(image_paths) 553 | if batch_size < 3: 554 | step = 1 555 | else: 556 | step = batch_size - 2 557 | 558 | start = 0 559 | end = min(batch_size, N) 560 | batch_indices = list(range(start, end)) 561 | current_new_indices = batch_indices 562 | 563 | remaining_start = end 564 | 565 | if step > 0: 566 | num_batches = 1 + max(0, (N - end + step - 1) // step) 567 | else: 568 | num_batches = (N + batch_size - 1) // batch_size 569 | 570 | batch_idx = 0 571 | while True: 572 | batch_paths = [image_paths[i] for i in batch_indices] 573 | print(f"Batch {batch_idx + 1}/{num_batches}:") 574 | prediction = run_model(batch_paths, metric_model, process_res, process_res_method, use_half=use_half_precision, use_ray_pose=use_ray_pose) 575 | end_idx = batch_indices[-1] + 1 576 | update_progress_timer(BaseTimeEstimate + MetricLoadModelTime + end_idx * MetricBatchTimePerImage, f"Metric batch {batch_idx + 1}") 577 | all_metric_predictions.append((prediction, batch_indices.copy())) 578 | 579 | if remaining_start >= N: 580 | break 581 | 582 | overlap_indices = [current_new_indices[0], current_new_indices[-1]] 583 | if overlap_indices[0] == overlap_indices[1]: 584 | overlap_indices = [overlap_indices[0]] 585 | 586 | next_end = min(remaining_start + step, N) 587 | next_new_indices = list(range(remaining_start, next_end)) 588 | 589 | batch_indices = overlap_indices + next_new_indices 590 | current_new_indices = next_new_indices 591 | 592 | remaining_start = next_end 593 | batch_idx += 1 594 | else: 595 | # Non-overlapping full batch 596 | prediction = run_model(image_paths, metric_model, process_res, process_res_method, use_half=use_half_precision, use_ray_pose=use_ray_pose) 597 | all_metric_predictions.append((prediction, list(range(len(image_paths))))) 598 | update_progress_timer(BaseTimeEstimate + MetricLoadModelTime + len(image_paths) * MetricBatchTimePerImage, "Metric batch complete") 599 | metric_model = None 600 | unload_current_model() 601 | else: 602 | self.report({'WARNING'}, "Metric model not downloaded; using non-metric depth only.") 603 | 604 | 605 | update_progress_timer(MetricTimeEstimate, "Metric inference complete") 606 | # Align base batches. Metric is **not** aligned in scale_base mode. 607 | if batch_mode in {"last_frame_overlap", "first_last_overlap"}: 608 | aligned_base_predictions = align_batches(all_base_predictions) 609 | # Metric depth is absolute, and has no camera poses, so alignment between batches is less important (and not implemented yet). 610 | if metric_available: 611 | aligned_metric_predictions = [p[0] for p in all_metric_predictions] 612 | else: 613 | aligned_base_predictions = [p[0] for p in all_base_predictions] 614 | if metric_available: 615 | aligned_metric_predictions = [p[0] for p in all_metric_predictions] 616 | update_progress_timer(MetricTimeEstimate + AlignBatchesTime, "Align batches complete") 617 | 618 | # Create or get a collection named after the folder 619 | folder_name = os.path.basename(os.path.normpath(input_folder)) 620 | scene = context.scene 621 | collections = bpy.data.collections 622 | 623 | # Create parent collection 624 | parent_col = collections.new(folder_name) 625 | scene.collection.children.link(parent_col) 626 | 627 | # Combine the base and metric predictions 628 | if metric_available: 629 | all_combined_predictions = combine_base_and_metric(aligned_base_predictions, aligned_metric_predictions) 630 | else: 631 | all_combined_predictions = aligned_base_predictions 632 | update_progress_timer(AfterCombineTimeEstimate, "Combined predictions complete") 633 | 634 | # Detect motion 635 | detect_motion = getattr(context.scene, "da3_detect_motion", False) 636 | if detect_motion: 637 | motion_threshold = getattr(context.scene, "da3_motion_threshold", 0.1) 638 | self.report({'INFO'}, "Detecting motion...") 639 | compute_motion_scores(all_combined_predictions, threshold_ratio=motion_threshold) 640 | # update_progress_timer(AfterCombineTimeEstimate + 1.0, "Motion detection complete") 641 | 642 | # Add a point cloud for each batch 643 | for batch_number, batch_prediction in enumerate(all_combined_predictions): 644 | batch_indices = all_base_predictions[batch_number][1] 645 | batch_paths = [image_paths[j] for j in batch_indices] 646 | 647 | # Extract segmentation data for this batch 648 | batch_segmentation = None 649 | if all_segmentation_data: 650 | batch_segmentation = [all_segmentation_data[j] for j in batch_indices] 651 | 652 | combined_predictions = convert_prediction_to_dict( 653 | batch_prediction, 654 | batch_paths, 655 | output_debug_images=output_debug_images, 656 | segmentation_data=batch_segmentation, 657 | class_names=segmentation_class_names 658 | ) 659 | 660 | # Create batch collection 661 | batch_col_name = f"{folder_name}_Batch_{batch_number+1}" 662 | batch_col = collections.new(batch_col_name) 663 | parent_col.children.link(batch_col) 664 | 665 | if generate_mesh: 666 | import_mesh_from_depth(combined_predictions, collection=batch_col, filter_edges=filter_edges, min_confidence=min_confidence, global_indices=batch_indices) 667 | else: 668 | import_point_cloud(combined_predictions, collection=batch_col, filter_edges=filter_edges, min_confidence=min_confidence, global_indices=batch_indices) 669 | 670 | create_cameras(combined_predictions, collection=batch_col) 671 | end_idx = batch_indices[-1] + 1 672 | update_progress_timer(AfterCombineTimeEstimate + AddImagePointsTime * end_idx, f"Added batch {batch_number + 1} to Blender") 673 | 674 | update_progress_timer(TotalTimeEstimate, "Point cloud generation complete") 675 | end_progress_timer() 676 | self.report({'INFO'}, "Point cloud generation complete.") 677 | except Exception as e: 678 | end_progress_timer() 679 | import traceback 680 | print("DA3 ERROR while generating point cloud:") 681 | traceback.print_exc() 682 | base_model = None 683 | metric_model = None 684 | base_prediction = None 685 | metric_prediction = None 686 | combined_prediction = None 687 | combined_predictions = None 688 | if torch.cuda.is_available(): 689 | try: 690 | torch.cuda.empty_cache() # Force free any pending allocations 691 | except Exception as e: 692 | print(f"Warning: Failed to empty CUDA cache: {e}") 693 | import gc 694 | gc.collect() # Force garbage collection 695 | unload_current_model() # Free VRAM on error 696 | self.report({'ERROR'}, f"Failed to generate point cloud: {e}") 697 | return {'CANCELLED'} 698 | return {'FINISHED'} 699 | 700 | @classmethod 701 | def poll(cls, context): 702 | model_name = context.scene.da3_model_name 703 | model_path = get_model_path(model_name) 704 | return os.path.exists(model_path) and context.scene.da3_input_folder != "" -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import os 3 | import numpy as np 4 | import bpy 5 | from mathutils import Matrix 6 | import math 7 | import torch 8 | import cv2 9 | 10 | from depth_anything_3.utils.alignment import ( 11 | compute_alignment_mask, 12 | compute_sky_mask, 13 | least_squares_scale_scalar, 14 | sample_tensor_for_quantile, 15 | set_sky_regions_to_max_depth, 16 | ) 17 | 18 | def unproject_depth_map_to_point_map(depth, extrinsics, intrinsics): 19 | N, H, W = depth.shape 20 | world_points = np.zeros((N, H, W, 3), dtype=np.float32) 21 | for i in range(N): 22 | u, v = np.meshgrid(np.arange(W), np.arange(H)) 23 | pixels = np.stack([u, v, np.ones((H, W))], axis=-1).reshape(-1, 3) # HW, 3 24 | invK = np.linalg.inv(intrinsics[i]) 25 | rays = (invK @ pixels.T).T # HW, 3 26 | depths = depth[i].reshape(-1) # HW 27 | cam_points = rays * depths[:, np.newaxis] # HW, 3 28 | cam_points_hom = np.hstack([cam_points, np.ones((len(depths), 1))]) # HW, 4 29 | E = np.vstack([extrinsics[i], [0, 0, 0, 1]]) # 4, 4 30 | cam_to_world = np.linalg.inv(E) 31 | world_points_hom = (cam_to_world @ cam_points_hom.T).T # HW, 4 32 | world_points_i = world_points_hom[:, :3] / world_points_hom[:, 3:4] 33 | world_points[i] = world_points_i.reshape(H, W, 3) 34 | return world_points 35 | 36 | def run_model(image_paths, model, process_res=504, process_res_method="upper_bound_resize", use_half=False, use_ray_pose=False): 37 | print(f"Processing {len(image_paths)} images") 38 | if torch.cuda.is_available(): 39 | torch.cuda.reset_peak_memory_stats() 40 | allocated = torch.cuda.memory_allocated() / 1024**2 41 | free, total = torch.cuda.mem_get_info() 42 | free_mb = free / 1024**2 43 | total_mb = total / 1024**2 44 | print(f"VRAM before inference: {allocated:.1f} MB (free: {free_mb:.1f} MB / {total_mb:.1f} MB)") 45 | import torch.cuda.amp as amp 46 | if use_half: 47 | with amp.autocast(): 48 | prediction = model.inference(image_paths, process_res=process_res, process_res_method=process_res_method, use_ray_pose=use_ray_pose) 49 | else: 50 | prediction = model.inference(image_paths, process_res=process_res, process_res_method=process_res_method, use_ray_pose=use_ray_pose) 51 | if torch.cuda.is_available(): 52 | peak = torch.cuda.max_memory_allocated() / 1024**2 53 | allocated = torch.cuda.memory_allocated() / 1024**2 54 | free, total = torch.cuda.mem_get_info() 55 | free_mb = free / 1024**2 56 | total_mb = total / 1024**2 57 | print(f"VRAM after inference: {allocated:.1f} MB (peak: {peak:.1f} MB, free: {free_mb:.1f} MB / {total_mb:.1f} MB)") 58 | # DEBUG: inspect prediction object for this model 59 | print("DEBUG prediction type:", type(prediction)) 60 | if hasattr(prediction, "__dict__"): 61 | print("DEBUG prediction.__dict__ keys:", list(prediction.__dict__.keys())) 62 | else: 63 | print("DEBUG dir(prediction):", dir(prediction)) 64 | return prediction 65 | 66 | # Helper functions for matrix operations and type conversion 67 | def _to_tensor(x): 68 | if isinstance(x, np.ndarray): 69 | return torch.from_numpy(x) 70 | return x 71 | 72 | def _to_numpy(x): 73 | if isinstance(x, torch.Tensor): 74 | return x.detach().cpu().float().numpy() 75 | return np.array(x) 76 | 77 | def _extrinsic_to_4x4_torch(ext_3x4): 78 | if ext_3x4.shape == (3, 4): 79 | last_row = torch.tensor([0, 0, 0, 1], device=ext_3x4.device, dtype=ext_3x4.dtype) 80 | return torch.cat([ext_3x4, last_row.unsqueeze(0)], dim=0) 81 | return ext_3x4 82 | 83 | def _invert_4x4_torch(T): 84 | R = T[:3, :3] 85 | t = T[:3, 3] 86 | T_inv = torch.eye(4, device=T.device, dtype=T.dtype) 87 | T_inv[:3, :3] = R.T 88 | T_inv[:3, 3] = -R.T @ t 89 | return T_inv 90 | 91 | # Transform and scale each batch to align with previous batch 92 | # all_predictions is list of (prediction_for_batch, frame_indices_for_batch) 93 | # prediction_for_batch is the result returned by run_model, and is class depth_anything_3.specs.Prediction 94 | # and has these fields: ['depth', 'is_metric', 'sky', 'conf', 'extrinsics', 'intrinsics', 'processed_images', 'gaussians', 'aux', 'scale_factor'] 95 | def align_batches(all_predictions): 96 | if not all_predictions: 97 | return [] 98 | 99 | # result, a list of predictions with aligned extrinsics and depths 100 | aligned_predictions = [] 101 | 102 | # First batch doesn't need aligning 103 | first_pred, first_indices = all_predictions[0] 104 | aligned_predictions.append(first_pred) 105 | prev_pred = first_pred 106 | prev_indices = first_indices 107 | 108 | # Loop through the rest of the batches 109 | for i in range(1, len(all_predictions)): 110 | curr_pred_orig, curr_indices = all_predictions[i] 111 | 112 | # Shallow copy to avoid modifying original 113 | import copy 114 | curr_pred = copy.copy(curr_pred_orig) 115 | 116 | curr_depth = _to_tensor(curr_pred.depth).float() # depth of every pixel in every image in the batch 117 | curr_conf = _to_tensor(curr_pred.conf).float() # confidence of depth of every pixel in every image in the batch, range 0 to more than 1 118 | curr_ext = _to_tensor(curr_pred.extrinsics) # camera position and rotation for every image in the batch (or None for Metric/Mono model) 119 | if curr_ext is not None: 120 | curr_ext = curr_ext.float() 121 | 122 | # Alignment for Metric/Mono model is not supported yet. TODO: still align the depth based on overlap images 123 | if curr_ext is None: 124 | print(f"Batch {i} has no extrinsics, skipping alignment.") 125 | aligned_predictions.append(curr_pred) 126 | prev_pred = curr_pred 127 | prev_indices = curr_indices 128 | continue 129 | 130 | # depths, depth confidences, and camera poses for all images in the previous batch 131 | prev_depth = _to_tensor(prev_pred.depth).float() 132 | prev_conf = _to_tensor(prev_pred.conf).float() 133 | prev_ext = _to_tensor(prev_pred.extrinsics).float() 134 | 135 | # Find overlapping indices 136 | common_indices = set(prev_indices) & set(curr_indices) 137 | if not common_indices: 138 | print(f"Warning: Batch {i} has no overlap with Batch {i-1}. Alignment may be poor.") 139 | aligned_predictions.append(curr_pred) 140 | prev_pred = curr_pred 141 | prev_indices = curr_indices 142 | continue 143 | 144 | # Sort common indices to ensure deterministic order 145 | common_indices = sorted(list(common_indices)) 146 | 147 | # Collect valid pixels for depth scaling 148 | valid_prev_depths = [] 149 | valid_curr_depths = [] 150 | 151 | # Collect transforms for extrinsic alignment 152 | transforms = [] 153 | 154 | # for each overlapping frame 155 | for global_idx in common_indices: 156 | # Find local index in prev and curr 157 | idx_prev = prev_indices.index(global_idx) 158 | idx_curr = curr_indices.index(global_idx) 159 | 160 | d_prev = prev_depth[idx_prev] # [H, W] depth of every pixel for this frame in the previous batch 161 | d_curr = curr_depth[idx_curr] # [H, W] depth of every pixel for this frame in the current batch 162 | c_prev = prev_conf[idx_prev] # [H, W] confidence of every pixel for this frame in the previous batch 163 | 164 | # We only want to calculate scale from pixels that aren't sky 165 | # For Metric/Mono/Nested models use the returned sky mask 166 | # For base models there is no sky mask, so assume all pixels are non-sky 167 | non_sky_mask = torch.ones_like(d_prev, dtype=torch.bool) # [H, W] 168 | if hasattr(prev_pred, 'sky') and prev_pred.sky is not None: 169 | non_sky_mask = non_sky_mask & compute_sky_mask(_to_tensor(prev_pred.sky)[idx_prev], threshold=0.3) 170 | if hasattr(curr_pred, 'sky') and curr_pred.sky is not None: 171 | non_sky_mask = non_sky_mask & compute_sky_mask(_to_tensor(curr_pred.sky)[idx_curr], threshold=0.3) 172 | 173 | # Use compute_alignment_mask for robust pixel selection 174 | # Ensure inputs are at least 3D [1, H, W] for the utils 175 | d_prev_3d = d_prev.unsqueeze(0) 176 | d_curr_3d = d_curr.unsqueeze(0) 177 | c_prev_3d = c_prev.unsqueeze(0) 178 | non_sky_mask_3d = non_sky_mask.unsqueeze(0) 179 | 180 | c_prev_ns = c_prev[non_sky_mask] # [num_non_sky_pixels] 181 | if c_prev_ns.numel() > 0: 182 | c_prev_sampled = sample_tensor_for_quantile(c_prev_ns, max_samples=100000) # if there are more than 100,000 non-sky pixels, randomly select 100,000 of them 183 | median_conf = torch.quantile(c_prev_sampled, 0.5) # calculate the median confidence (half the pixels have higher confidence than this, half have lower confidence) 184 | 185 | # DA3 function, mask array is true for pixels that aren't sky and whose confidence is better than half the other non-sky pixels 186 | mask_3d = compute_alignment_mask( 187 | c_prev_3d, non_sky_mask_3d, d_prev_3d, d_curr_3d, median_conf 188 | ) # [1, H, W] boolean mask 189 | mask = mask_3d.squeeze(0) # [H, W] 190 | else: 191 | mask = non_sky_mask # [H, W] 192 | 193 | # make sure there are at least 11 valid pixels (ie. there were originally at least 22 non-sky pixels before we chose the best half) 194 | if mask.sum() > 10: 195 | valid_prev_depths.append(d_prev[mask]) # [num_valid_pixels] 196 | valid_curr_depths.append(d_curr[mask]) # [num_valid_pixels] 197 | 198 | E_prev = _extrinsic_to_4x4_torch(prev_ext[idx_prev]) # 4x4 camera transform matrix for this frame in previous batch 199 | E_curr = _extrinsic_to_4x4_torch(curr_ext[idx_curr]) # 4x4 camera transform matrix for this frame in current batch 200 | 201 | transforms.append((E_prev, E_curr)) 202 | 203 | # All overlap frames have now been processed 204 | # Compute global scale factor 205 | if valid_prev_depths: 206 | all_prev = torch.cat(valid_prev_depths) # [total_valid_pixels] 207 | all_curr = torch.cat(valid_curr_depths) # [total_valid_pixels] 208 | # least_squares_scale_scalar(target, source) returns scale such that source * scale ≈ target 209 | # We want curr_depth * scale ≈ prev_depth, so target=all_prev, source=all_curr 210 | scale = least_squares_scale_scalar(all_prev, all_curr) 211 | else: 212 | scale = torch.tensor(1.0) # 1x scale if there were no overlap frames with at least 22 non-sky pixels 213 | 214 | scale_val = float(scale.item()) 215 | print(f"Batch {i} alignment: scale={scale_val}") 216 | 217 | # Step 1: Scale depth and extrinsic translations together (like DA3 does) 218 | # This handles all scaling in one place 219 | curr_pred.depth = _to_numpy(curr_depth * scale) 220 | curr_ext[:, :, 3] = curr_ext[:, :, 3] * scale # scale all translations 221 | 222 | # Step 2: Compute rigid alignment transform from first overlap frame 223 | # We want to find T such that: E_curr_scaled @ T ≈ E_prev 224 | # Rearranging: T = inv(E_curr_scaled) @ E_prev 225 | E_prev, E_curr_orig = transforms[0] 226 | E_curr_scaled = _extrinsic_to_4x4_torch(curr_ext[curr_indices.index(common_indices[0])]) 227 | T_align = _invert_4x4_torch(E_curr_scaled) @ E_prev 228 | 229 | # Step 3: Apply rigid alignment to all extrinsics 230 | # E_new = E_curr_scaled @ T 231 | new_extrinsics = [] 232 | for ext_3x4 in curr_ext: 233 | E_curr = _extrinsic_to_4x4_torch(ext_3x4) 234 | E_new = E_curr @ T_align 235 | new_extrinsics.append(E_new[:3, :4]) 236 | 237 | curr_pred.extrinsics = _to_numpy(torch.stack(new_extrinsics)) 238 | 239 | # Add the aligned prediction for this batch to the result list 240 | aligned_predictions.append(curr_pred) 241 | prev_pred = curr_pred 242 | prev_indices = curr_indices 243 | 244 | # We've finished all batches, return a list of aligned predictions 245 | return aligned_predictions 246 | 247 | def compute_motion_scores(predictions, threshold_ratio=0.1): 248 | """ 249 | Computes a motion score for each pixel based on consistency with other frames. 250 | Score is the number of other frames that see 'empty space' where the point should be. 251 | """ 252 | import torch 253 | 254 | # Collect all data 255 | all_depths = [] 256 | all_extrinsics = [] 257 | all_intrinsics = [] 258 | frame_mapping = [] # List of (batch_index, frame_index_in_batch) 259 | 260 | for b_idx, pred in enumerate(predictions): 261 | # Ensure we have tensors 262 | d = _to_tensor(pred.depth).float() 263 | e = _to_tensor(pred.extrinsics).float() 264 | k = _to_tensor(pred.intrinsics).float() 265 | 266 | # Initialize motion attribute on prediction object 267 | if not hasattr(pred, 'motion'): 268 | pred.motion = torch.zeros_like(d) 269 | 270 | for f_idx in range(d.shape[0]): 271 | all_depths.append(d[f_idx]) 272 | all_extrinsics.append(e[f_idx]) 273 | all_intrinsics.append(k[f_idx]) 274 | frame_mapping.append((b_idx, f_idx)) 275 | 276 | if not all_depths: 277 | return 278 | 279 | # Stack 280 | depths = torch.stack(all_depths) # [N, H, W] 281 | extrinsics = torch.stack(all_extrinsics) # [N, 3, 4] 282 | intrinsics = torch.stack(all_intrinsics) # [N, 3, 3] 283 | 284 | N, H, W = depths.shape 285 | device = depths.device 286 | 287 | print(f"Computing motion scores for {N} frames...") 288 | 289 | # Construct 4x4 matrices 290 | Es = torch.eye(4, device=device).unsqueeze(0).repeat(N, 1, 1) 291 | Es[:, :3, :4] = extrinsics 292 | Es_inv = torch.linalg.inv(Es) 293 | 294 | # Pixel grid 295 | y, x = torch.meshgrid(torch.arange(H, device=device), torch.arange(W, device=device), indexing='ij') 296 | pixels_hom = torch.stack([x.flatten(), y.flatten(), torch.ones_like(x.flatten())], dim=0).float() # [3, HW] 297 | 298 | # Loop over source frames 299 | for i in range(N): 300 | if i % 10 == 0: 301 | print(f" Processing frame {i+1}/{N}") 302 | 303 | # Unproject frame i 304 | K_i_inv = torch.linalg.inv(intrinsics[i]) 305 | rays_i = K_i_inv @ pixels_hom # [3, HW] 306 | d_i = depths[i].flatten() # [HW] 307 | 308 | # Filter valid depth 309 | valid_mask = d_i > 0 310 | if not valid_mask.any(): 311 | continue 312 | 313 | points_cam_i = rays_i[:, valid_mask] * d_i[valid_mask].unsqueeze(0) # [3, M] 314 | points_cam_i_hom = torch.cat([points_cam_i, torch.ones((1, points_cam_i.shape[1]), device=device)], dim=0) # [4, M] 315 | 316 | # Transform to world 317 | points_world_hom = Es_inv[i] @ points_cam_i_hom # [4, M] 318 | 319 | motion_votes = torch.zeros(points_cam_i.shape[1], device=device) 320 | 321 | # Check against all other frames j 322 | # Optimization: Process in chunks if N is large? 323 | # For now, simple loop. 324 | for j in range(N): 325 | if i == j: 326 | continue 327 | 328 | # Project to frame j 329 | points_cam_j_hom = Es[j] @ points_world_hom # [4, M] 330 | # Check if in front of camera 331 | z_j = points_cam_j_hom[2] 332 | in_front = z_j > 0.1 # Near plane 333 | 334 | if not in_front.any(): 335 | continue 336 | 337 | # Project to pixels 338 | points_cam_j = points_cam_j_hom[:3] 339 | proj_j = intrinsics[j] @ points_cam_j 340 | u_j = proj_j[0] / proj_j[2] 341 | v_j = proj_j[1] / proj_j[2] 342 | 343 | # Check bounds 344 | in_bounds = (u_j >= 0) & (u_j < W - 1) & (v_j >= 0) & (v_j < H - 1) & in_front 345 | 346 | if not in_bounds.any(): 347 | continue 348 | 349 | # Sample depth from frame j 350 | u_j_int = torch.round(u_j).long() 351 | v_j_int = torch.round(v_j).long() 352 | 353 | # Filter indices 354 | valid_indices = torch.where(in_bounds)[0] 355 | 356 | u_sample = u_j_int[valid_indices] 357 | v_sample = v_j_int[valid_indices] 358 | 359 | d_target = depths[j, v_sample, u_sample] 360 | d_proj = z_j[valid_indices] 361 | 362 | # Check for "empty space" 363 | # If d_target > d_proj * (1 + threshold) 364 | is_empty = d_target > d_proj * (1 + threshold_ratio) 365 | 366 | # Accumulate votes 367 | motion_votes[valid_indices[is_empty]] += 1 368 | 369 | # Store result 370 | full_motion = torch.zeros(H*W, device=device) 371 | full_motion[valid_mask] = motion_votes 372 | 373 | # Save to prediction object 374 | b_idx, f_idx = frame_mapping[i] 375 | predictions[b_idx].motion[f_idx] = full_motion.reshape(H, W) 376 | 377 | def convert_prediction_to_dict(prediction, image_paths=None, output_debug_images=False, segmentation_data=None, class_names=None): 378 | predictions = {} 379 | 380 | # images is already numpy in your current pipeline 381 | predictions['images'] = prediction.processed_images.astype(np.float32) / 255.0 # [N, H, W, 3] 382 | 383 | # depth / extrinsics / intrinsics may be torch tensors after combination; ensure numpy 384 | predictions['depth'] = _to_numpy(prediction.depth) 385 | predictions['extrinsic'] = _to_numpy(prediction.extrinsics) 386 | predictions['intrinsic'] = _to_numpy(prediction.intrinsics) 387 | predictions['conf'] = _to_numpy(prediction.conf) 388 | 389 | if hasattr(prediction, 'motion'): 390 | predictions['motion'] = _to_numpy(prediction.motion) 391 | 392 | if class_names is not None: 393 | predictions['class_names'] = class_names 394 | 395 | if segmentation_data is not None: 396 | # segmentation_data is a list of dicts (one per frame) 397 | # { "masks": [M, h, w], "ids": [M], "classes": [M] } 398 | # We need to resize masks to match depth map size [H, W] 399 | 400 | N, H, W = predictions['depth'].shape 401 | 402 | # We will store a dense ID map for each frame: [N, H, W] 403 | # Initialize with -1 (no object) 404 | seg_id_map = np.full((N, H, W), -1, dtype=np.int32) 405 | 406 | # Also store metadata about IDs (class, etc.) 407 | # Global map of ID -> Class 408 | id_to_class = {} 409 | 410 | for i in range(N): 411 | if i >= len(segmentation_data): break 412 | 413 | frame_seg = segmentation_data[i] 414 | masks = frame_seg.get("masks", []) 415 | ids = frame_seg.get("ids", []) 416 | classes = frame_seg.get("classes", []) 417 | 418 | if len(masks) == 0: continue 419 | 420 | # Pre-load image for debug if needed 421 | orig_img = None 422 | debug_dir = None 423 | if output_debug_images and image_paths is not None and i < len(image_paths): 424 | try: 425 | first_img_dir = os.path.dirname(image_paths[0]) 426 | debug_dir = os.path.join(first_img_dir, "debug_output") 427 | os.makedirs(debug_dir, exist_ok=True) 428 | orig_img = cv2.imread(image_paths[i]) 429 | except Exception as e: 430 | print(f"Debug image load failed: {e}") 431 | 432 | # Resize masks to H, W 433 | # masks is [M, h_small, w_small] 434 | # We iterate and resize 435 | for m_idx, mask in enumerate(masks): 436 | # mask is float or bool? YOLO masks are usually float 0..1 or binary 437 | # Resize to H, W 438 | # cv2.resize expects (W, H) 439 | resized_mask = cv2.resize(mask, (W, H), interpolation=cv2.INTER_LINEAR) 440 | 441 | obj_id = ids[m_idx] if len(ids) > m_idx else -1 442 | obj_cls = classes[m_idx] if len(classes) > m_idx else -1 443 | 444 | if output_debug_images and orig_img is not None: 445 | try: 446 | # Native mask 447 | h_nat, w_nat = mask.shape 448 | mask_nat_vis = (mask * 255).astype(np.uint8) 449 | cv2.imwrite(os.path.join(debug_dir, f"frame_{i}_obj_{obj_id}_mask_native.png"), mask_nat_vis) 450 | 451 | # Native image 452 | img_nat = cv2.resize(orig_img, (w_nat, h_nat)) 453 | cv2.imwrite(os.path.join(debug_dir, f"frame_{i}_obj_{obj_id}_image_native.png"), img_nat) 454 | 455 | # Resized mask 456 | mask_res_vis = (resized_mask * 255).astype(np.uint8) 457 | cv2.imwrite(os.path.join(debug_dir, f"frame_{i}_obj_{obj_id}_mask_resized.png"), mask_res_vis) 458 | except Exception as e: 459 | print(f"Failed to save debug mask: {e}") 460 | 461 | # Threshold to binary 462 | binary_mask = resized_mask > 0.5 463 | 464 | if obj_id != -1: 465 | id_to_class[obj_id] = obj_cls 466 | # Assign ID to map 467 | # Note: Overlapping masks will overwrite. 468 | # Ideally we sort by size or something, but YOLO usually handles NMS. 469 | seg_id_map[i][binary_mask] = obj_id 470 | 471 | predictions['seg_id_map'] = seg_id_map 472 | predictions['id_to_class'] = id_to_class 473 | 474 | if image_paths is not None and output_debug_images: 475 | predictions['image_paths'] = image_paths 476 | 477 | # Save debug images 478 | try: 479 | # Create debug directory 480 | first_img_dir = os.path.dirname(image_paths[0]) 481 | debug_dir = os.path.join(first_img_dir, "debug_output") 482 | os.makedirs(debug_dir, exist_ok=True) 483 | 484 | for i, img_path in enumerate(image_paths): 485 | base_name = os.path.splitext(os.path.basename(img_path))[0] 486 | 487 | # Depth 488 | depth_map = predictions['depth'][i] 489 | # Normalize depth for visualization: 0-255 490 | d_min = np.nanmin(depth_map) 491 | d_max = np.nanmax(depth_map) 492 | if d_max > d_min: 493 | depth_norm = ((depth_map - d_min) / (d_max - d_min) * 255.0).astype(np.uint8) 494 | else: 495 | depth_norm = np.zeros_like(depth_map, dtype=np.uint8) 496 | 497 | depth_filename = os.path.join(debug_dir, f"{base_name}_depth.png") 498 | cv2.imwrite(depth_filename, depth_norm) 499 | 500 | # Confidence 501 | conf_map = predictions['conf'][i] 502 | # Scale confidence: * 10, clip to 255 503 | conf_scaled = np.clip(conf_map * 10.0, 0, 255).astype(np.uint8) 504 | 505 | conf_filename = os.path.join(debug_dir, f"{base_name}_conf.png") 506 | cv2.imwrite(conf_filename, conf_scaled) 507 | 508 | # Color Image 509 | color_img = predictions['images'][i] 510 | color_img_uint8 = (np.clip(color_img, 0, 1) * 255).astype(np.uint8) 511 | color_img_bgr = cv2.cvtColor(color_img_uint8, cv2.COLOR_RGB2BGR) 512 | color_filename = os.path.join(debug_dir, f"{base_name}_color.png") 513 | cv2.imwrite(color_filename, color_img_bgr) 514 | 515 | # Bad Confidence Overlay 516 | H, W = conf_map.shape 517 | bad_img = np.zeros((H, W, 4), dtype=np.uint8) # BGRA 518 | 519 | # Yellow for conf <= 2.0 520 | mask_yellow = (conf_map <= 2.0) 521 | bad_img[mask_yellow] = [0, 255, 255, 255] # Yellow 522 | 523 | # Red for conf <= 1.0 524 | mask_red = (conf_map <= 1.0) 525 | bad_img[mask_red] = [0, 0, 255, 255] # Red 526 | 527 | # Magenta for conf <= 1.0 adjacent to conf > 1.0 528 | mask_good = (conf_map > 1.0) 529 | kernel = np.ones((3,3), np.uint8) 530 | # Dilate good area to find neighbors 531 | dilated_good = cv2.dilate(mask_good.astype(np.uint8), kernel, iterations=1).astype(bool) 532 | # Intersection: Is red AND is touched by good 533 | mask_magenta = mask_red & dilated_good 534 | bad_img[mask_magenta] = [255, 0, 255, 255] # Magenta 535 | 536 | bad_filename = os.path.join(debug_dir, f"{base_name}_bad.png") 537 | cv2.imwrite(bad_filename, bad_img) 538 | 539 | # Depth Gradient 540 | grad_x = cv2.Sobel(depth_map, cv2.CV_64F, 1, 0, ksize=3) 541 | grad_y = cv2.Sobel(depth_map, cv2.CV_64F, 0, 1, ksize=3) 542 | grad_mag = np.sqrt(grad_x**2 + grad_y**2) 543 | 544 | g_min = np.nanmin(grad_mag) 545 | g_max = np.nanmax(grad_mag) 546 | if g_max > g_min: 547 | grad_norm = ((grad_mag - g_min) / (g_max - g_min) * 255.0).astype(np.uint8) 548 | else: 549 | grad_norm = np.zeros_like(grad_mag, dtype=np.uint8) 550 | 551 | grad_filename = os.path.join(debug_dir, f"{base_name}_grad.png") 552 | cv2.imwrite(grad_filename, grad_norm) 553 | 554 | except ImportError: 555 | print("Warning: cv2 not found, skipping debug image output.") 556 | except Exception as e: 557 | print(f"Warning: Failed to save debug images: {e}") 558 | elif image_paths is not None: 559 | predictions['image_paths'] = image_paths 560 | 561 | print("DEBUG shapes:") 562 | print(" images:", predictions['images'].shape) 563 | print(" depth:", predictions['depth'].shape) 564 | print(" extrinsic:", np.array(predictions['extrinsic']).shape) 565 | print(" intrinsic:", np.array(predictions['intrinsic']).shape) 566 | print("Computing world points from depth map...") 567 | 568 | if prediction.extrinsics is None or prediction.intrinsics is None: 569 | raise ValueError("Prediction has no camera parameters; cannot create world-space point cloud.") 570 | 571 | world_points = unproject_depth_map_to_point_map( 572 | predictions['depth'], 573 | predictions['extrinsic'], 574 | predictions['intrinsic'], 575 | ) 576 | predictions["world_points_from_depth"] = world_points 577 | return predictions 578 | 579 | # Based on da3_repo/src/depth_anything_3/model/da3.py 580 | def combine_base_and_metric(base_list, metric_list): 581 | """Combine base predictions (with poses) with metric predictions (no poses). 582 | 583 | This version operates purely on [N, H, W] tensors per batch and 584 | re-implements the metric scaling logic from DA3 so that batches may 585 | have different sizes (e.g. a shorter last batch). 586 | 587 | Args: 588 | base_list: list of base `Prediction` objects (one per batch), each with 589 | depth [N_b, H, W], conf [N_b, H, W], intrinsics [N_b, 3, 3], 590 | extrinsics [N_b, 3, 4]. 591 | metric_list: list of metric `Prediction` objects (one per batch), each with 592 | depth [N_m, H, W], sky [N_m, H, W]. For scale_base you typically 593 | pass a single-element list and let total metric frames 594 | be <= total base frames. 595 | 596 | Returns: 597 | List of base `Prediction` objects (same length as base_list) whose 598 | depths and extrinsics have been globally scaled to metric units. 599 | """ 600 | 601 | if not base_list: 602 | return [] 603 | 604 | # Concatenate all base frames into a single [Nb_total, H, W] 605 | base_depth_all = [] 606 | base_conf_all = [] 607 | base_intr_all = [] 608 | 609 | for pred in base_list: 610 | d = _to_tensor(pred.depth).float() # [N_b, H, W] 611 | c = _to_tensor(pred.conf).float() # [N_b, H, W] 612 | K = _to_tensor(pred.intrinsics).float() # [N_b, 3, 3] 613 | if d.ndim != 3 or c.ndim != 3: 614 | raise ValueError(f"Base depth/conf must be [N,H,W], got depth={d.shape}, conf={c.shape}") 615 | base_depth_all.append(d) 616 | base_conf_all.append(c) 617 | base_intr_all.append(K) 618 | 619 | depth_all = torch.cat(base_depth_all, dim=0) # [Nb_total, H, W] 620 | conf_all = torch.cat(base_conf_all, dim=0) # [Nb_total, H, W] 621 | intr_all = torch.cat(base_intr_all, dim=0) # [Nb_total, 3, 3] 622 | 623 | # Concatenate all metric frames similarly 624 | metric_depth_all = [] 625 | sky_all = [] 626 | for pred in metric_list: 627 | md = _to_tensor(pred.depth).float() # [Nm, H, W] 628 | sky = _to_tensor(pred.sky).float() # [Nm, H, W] 629 | if md.ndim != 3 or sky.ndim != 3: 630 | raise ValueError(f"Metric depth/sky must be [N,H,W], got depth={md.shape}, sky={sky.shape}") 631 | metric_depth_all.append(md) 632 | sky_all.append(sky) 633 | 634 | if not metric_depth_all: 635 | raise ValueError("Metric prediction list is empty or missing required fields") 636 | 637 | metric_all = torch.cat(metric_depth_all, dim=0) # [Nm_total, H, W] 638 | sky_all = torch.cat(sky_all, dim=0) # [Nm_total, H, W] 639 | 640 | Nb_total = depth_all.shape[0] 641 | Nm_total = metric_all.shape[0] 642 | 643 | # Restrict to overlapping frames in the sequence sense 644 | N_overlap = min(Nb_total, Nm_total) 645 | if N_overlap <= 0: 646 | raise ValueError("Metric prediction has no frames; cannot compute scale.") 647 | 648 | depth_overlap = depth_all[:N_overlap] # [N_overlap, H, W] 649 | metric_overlap = metric_all[:N_overlap] # [N_overlap, H, W] 650 | sky_overlap = sky_all[:N_overlap] # [N_overlap, H, W] 651 | ixt_overlap = intr_all[:N_overlap] # [N_overlap, 3, 3] 652 | 653 | # Inline metric scaling logic from DA3's apply_metric_scaling for [N, H, W] 654 | # focal_length = (fx + fy) / 2, depth_scaled = depth * (f / scale_factor) 655 | scale_factor_metric = 300.0 656 | focal_length = (ixt_overlap[:, 0, 0] + ixt_overlap[:, 1, 1]) / 2.0 # [N_overlap] 657 | metric_scaled = metric_overlap * (focal_length[:, None, None] / scale_factor_metric) 658 | 659 | # Non-sky mask and alignment only on overlapping frames 660 | non_sky_mask = compute_sky_mask(sky_overlap, threshold=0.3) # [N_overlap, H, W] 661 | if non_sky_mask.sum() <= 10: 662 | raise ValueError("Insufficient non-sky pixels for alignment") 663 | 664 | depth_conf_overlap = conf_all[:N_overlap] # [N_overlap, H, W] 665 | depth_conf_ns = depth_conf_overlap[non_sky_mask] 666 | depth_conf_sampled = sample_tensor_for_quantile(depth_conf_ns, max_samples=100000) 667 | median_conf = torch.quantile(depth_conf_sampled, 0.5) 668 | 669 | align_mask = compute_alignment_mask( 670 | depth_conf_overlap, non_sky_mask, depth_overlap, metric_scaled, median_conf 671 | ) 672 | 673 | valid_depth = depth_overlap[align_mask] 674 | valid_metric_depth = metric_scaled[align_mask] 675 | scale_factor = least_squares_scale_scalar(valid_metric_depth, valid_depth) 676 | 677 | # Scale depth and extrinsics for each base batch 678 | scaled_base_list = [] 679 | for pred in base_list: 680 | ext = _to_tensor(pred.extrinsics) 681 | if ext is not None: 682 | if ext.ndim != 3 or ext.shape[1:] != (3, 4): 683 | raise ValueError(f"Expected extrinsics [N,3,4], got {ext.shape}") 684 | ext = ext.float() 685 | ext[:, :, 3] *= scale_factor 686 | 687 | pred.depth = _to_tensor(pred.depth) * scale_factor 688 | if ext is not None: 689 | pred.extrinsics = ext 690 | pred.is_metric = 1 691 | pred.scale_factor = float(scale_factor.item()) 692 | scaled_base_list.append(pred) 693 | 694 | return scaled_base_list 695 | 696 | 697 | def combine_base_with_metric_depth(base, metric): 698 | """Combine base prediction cameras with raw metric model depth. 699 | 700 | This variant keeps **base intrinsics/extrinsics/conf** but **replaces 701 | depth with metric.depth in metres**, then applies the same sky-handling 702 | logic as `combine_base_and_metric`. 703 | 704 | Assumes shapes: 705 | - base.depth: [N, H, W] 706 | - metric.depth: [N, H, W] 707 | - base.intrinsics: [N, 3, 3] 708 | - base.extrinsics: [N, 3, 4] 709 | - metric.sky: [N, H, W] 710 | """ 711 | output = base 712 | 713 | # Base / metric depths and sky mask 714 | base_depth = _to_tensor(base.depth).float() # [B, H, W] 715 | metric_depth = _to_tensor(metric.depth).float() # [B, H, W] 716 | sky = _to_tensor(metric.sky).float() # [B, H, W] 717 | 718 | if base_depth.ndim != 3 or metric_depth.ndim != 3: 719 | raise ValueError( 720 | f"Unexpected depth shapes: base={base_depth.shape}, metric={metric_depth.shape}" 721 | ) 722 | 723 | # Non-sky mask and basic sanity check 724 | non_sky_mask = compute_sky_mask(sky, threshold=0.3) 725 | if non_sky_mask.sum() <= 10: 726 | raise ValueError("Insufficient non-sky pixels for metric depth sky handling") 727 | 728 | # Compute global scale factor aligning base depth to metric depth 729 | # Use robust alignment mask - convert conf to tensor if needed 730 | depth_conf = _to_tensor(output.conf).float() 731 | depth_conf_ns = depth_conf[non_sky_mask] 732 | depth_conf_sampled = sample_tensor_for_quantile(depth_conf_ns, max_samples=100000) 733 | median_conf = torch.quantile(depth_conf_sampled, 0.5) 734 | 735 | align_mask = compute_alignment_mask( 736 | depth_conf, non_sky_mask, base_depth, metric_depth, median_conf 737 | ) 738 | 739 | valid_base = base_depth[align_mask] 740 | valid_metric = metric_depth[align_mask] 741 | scale_factor = least_squares_scale_scalar(valid_metric, valid_base) 742 | 743 | # Use metric depth as final depth (in metres) 744 | depth = metric_depth 745 | 746 | # Estimate a far depth for sky regions 747 | non_sky_depth = depth[non_sky_mask] 748 | if non_sky_depth.numel() > 100000: 749 | idx = torch.randint(0, non_sky_depth.numel(), (100000,), device=non_sky_depth.device) 750 | sampled_depth = non_sky_depth[idx] 751 | else: 752 | sampled_depth = non_sky_depth 753 | 754 | non_sky_max = torch.quantile(sampled_depth, 0.99) 755 | non_sky_max = torch.minimum(non_sky_max, torch.tensor(200.0, device=depth.device)) 756 | 757 | depth_4d = depth.unsqueeze(1) 758 | dummy_conf = torch.ones_like(depth_4d) 759 | depth_4d, _ = set_sky_regions_to_max_depth( 760 | depth_4d, dummy_conf, non_sky_mask.unsqueeze(1), max_depth=non_sky_max 761 | ) 762 | depth = depth_4d.squeeze(1) 763 | 764 | # Scale base extrinsics translation so cameras match metric scale 765 | extrinsics = _to_tensor(output.extrinsics) 766 | print("DEBUG combine_base_with_metric_depth: extrinsics shape:", extrinsics.shape) 767 | 768 | if extrinsics.ndim != 3 or extrinsics.shape[1:] != (3, 4): 769 | raise ValueError(f"Expected extrinsics [N,3,4], got {extrinsics.shape}") 770 | 771 | extrinsics = extrinsics.float() 772 | extrinsics[:, :, 3] = extrinsics[:, :, 3] * scale_factor 773 | 774 | # Write back into output: metric depth + scaled base cameras 775 | output.depth = depth 776 | output.extrinsics = extrinsics 777 | output.is_metric = 1 778 | output.scale_factor = float(scale_factor.item()) 779 | 780 | return output 781 | 782 | def get_or_create_point_material(): 783 | mat = bpy.data.materials.get("PointMaterial") 784 | if mat is None: 785 | mat = bpy.data.materials.new(name="PointMaterial") 786 | mat.use_nodes = True 787 | nodes = mat.node_tree.nodes 788 | links = mat.node_tree.links 789 | for node in nodes: 790 | nodes.remove(node) 791 | 792 | # Image color attribute 793 | attr_node = nodes.new('ShaderNodeAttribute') 794 | attr_node.attribute_name = "point_color" 795 | attr_node.location = (-600, 200) 796 | 797 | # Confidence attribute (raw values) 798 | conf_attr_node = nodes.new('ShaderNodeAttribute') 799 | conf_attr_node.attribute_name = "conf" 800 | conf_attr_node.location = (-600, -200) 801 | 802 | # Map Range: 0-10 -> 0-1 (so conf values map to reasonable ramp positions) 803 | map_range = nodes.new('ShaderNodeMapRange') 804 | map_range.location = (-400, -200) 805 | map_range.clamp = True 806 | map_range.inputs['From Min'].default_value = 0.0 807 | map_range.inputs['From Max'].default_value = 10.0 808 | map_range.inputs['To Min'].default_value = 0.0 809 | map_range.inputs['To Max'].default_value = 1.0 810 | 811 | # Color Ramp: red (low) -> green (mid) -> blue (high) 812 | # Positions: 0.2 = conf 2, 0.5 = conf 5, 0.6 = conf 6 813 | color_ramp = nodes.new('ShaderNodeValToRGB') 814 | color_ramp.location = (-150, -200) 815 | # Clear default elements and set up: red at 0, green at 0.5-0.6, blue at 1 816 | ramp = color_ramp.color_ramp 817 | ramp.elements[0].position = 0.0 818 | ramp.elements[0].color = (1, 0, 0, 1) # Red (conf < 2) 819 | ramp.elements[1].position = 0.2 820 | ramp.elements[1].color = (1, 0, 0, 1) # Still red at conf=2 821 | # Add green zone 822 | green_start = ramp.elements.new(0.5) 823 | green_start.color = (0, 1, 0, 1) # Green at conf=5 824 | green_end = ramp.elements.new(0.6) 825 | green_end.color = (0, 1, 0, 1) # Green at conf=6 826 | # Add blue 827 | blue_elem = ramp.elements.new(1.0) 828 | blue_elem.color = (0, 0, 1, 1) # Blue at conf=10 829 | 830 | # Mix shader to switch between image color and confidence color 831 | mix_node = nodes.new('ShaderNodeMix') 832 | mix_node.data_type = 'RGBA' 833 | mix_node.location = (100, 100) 834 | mix_node.inputs['Factor'].default_value = 0.0 # 0 = image color, 1 = confidence color 835 | 836 | bsdf = nodes.new('ShaderNodeBsdfPrincipled') 837 | bsdf.location = (300, 100) 838 | 839 | output_node_material = nodes.new('ShaderNodeOutputMaterial') 840 | output_node_material.location = (550, 100) 841 | 842 | # Connect nodes 843 | links.new(conf_attr_node.outputs['Fac'], map_range.inputs['Value']) 844 | links.new(map_range.outputs['Result'], color_ramp.inputs['Fac']) 845 | links.new(attr_node.outputs['Color'], mix_node.inputs['A']) 846 | links.new(color_ramp.outputs['Color'], mix_node.inputs['B']) 847 | links.new(mix_node.outputs['Result'], bsdf.inputs['Base Color']) 848 | links.new(bsdf.outputs['BSDF'], output_node_material.inputs['Surface']) 849 | return mat 850 | 851 | def add_point_cloud_geo_nodes(obj, mat): 852 | geo_mod = obj.modifiers.new(name="GeometryNodes", type='NODES') 853 | node_group = bpy.data.node_groups.new(name="PointCloud", type='GeometryNodeTree') 854 | 855 | # Inputs 856 | node_group.interface.new_socket(name="Geometry", in_out="INPUT", socket_type="NodeSocketGeometry") 857 | node_group.interface.new_socket(name="Threshold", in_out="INPUT", socket_type="NodeSocketFloat") 858 | node_group.interface.items_tree[-1].default_value = 0.5 859 | node_group.interface.new_socket(name="Scale", in_out="INPUT", socket_type="NodeSocketFloat") 860 | node_group.interface.items_tree[-1].default_value = 1.0 861 | node_group.interface.items_tree[-1].min_value = 0.0 862 | 863 | # Outputs 864 | node_group.interface.new_socket(name="Geometry", in_out="OUTPUT", socket_type="NodeSocketGeometry") 865 | 866 | geo_mod.node_group = node_group 867 | 868 | # Nodes 869 | input_node = node_group.nodes.new('NodeGroupInput') 870 | output_node = node_group.nodes.new('NodeGroupOutput') 871 | 872 | mesh_to_points = node_group.nodes.new('GeometryNodeMeshToPoints') 873 | # Radius controlled by Scale input * 0.002 874 | math_node = node_group.nodes.new('ShaderNodeMath') 875 | math_node.operation = 'MULTIPLY' 876 | math_node.inputs[1].default_value = 0.002 877 | 878 | named_attr = node_group.nodes.new('GeometryNodeInputNamedAttribute') 879 | named_attr.inputs['Name'].default_value = "conf" 880 | named_attr.data_type = 'FLOAT' 881 | 882 | compare = node_group.nodes.new('FunctionNodeCompare') 883 | compare.data_type = 'FLOAT' 884 | compare.operation = 'LESS_THAN' 885 | 886 | delete_geo = node_group.nodes.new('GeometryNodeDeleteGeometry') 887 | delete_geo.domain = 'POINT' 888 | 889 | set_material_node = node_group.nodes.new('GeometryNodeSetMaterial') 890 | set_material_node.inputs['Material'].default_value = mat 891 | 892 | # Links 893 | node_group.links.new(input_node.outputs['Geometry'], mesh_to_points.inputs['Mesh']) 894 | 895 | # Scale logic 896 | node_group.links.new(input_node.outputs['Scale'], math_node.inputs[0]) 897 | node_group.links.new(math_node.outputs['Value'], mesh_to_points.inputs['Radius']) 898 | 899 | node_group.links.new(mesh_to_points.outputs['Points'], delete_geo.inputs['Geometry']) 900 | node_group.links.new(named_attr.outputs['Attribute'], compare.inputs['A']) 901 | node_group.links.new(input_node.outputs['Threshold'], compare.inputs['B']) 902 | node_group.links.new(compare.outputs['Result'], delete_geo.inputs['Selection']) 903 | node_group.links.new(delete_geo.outputs['Geometry'], set_material_node.inputs['Geometry']) 904 | node_group.links.new(set_material_node.outputs['Geometry'], output_node.inputs['Geometry']) 905 | 906 | def create_point_cloud_object(name, points, colors, confs, motions=None, collection=None): 907 | mesh = bpy.data.meshes.new(name=name) 908 | mesh.from_pydata(points.tolist(), [], []) 909 | 910 | # Image colors 911 | attribute = mesh.attributes.new(name="point_color", type="FLOAT_COLOR", domain="POINT") 912 | attribute.data.foreach_set("color", colors.flatten().tolist()) 913 | 914 | # Raw confidence value 915 | attribute_conf = mesh.attributes.new(name="conf", type="FLOAT", domain="POINT") 916 | attribute_conf.data.foreach_set("value", confs.tolist()) 917 | 918 | # Motion score 919 | if motions is not None: 920 | attribute_motion = mesh.attributes.new(name="motion", type="FLOAT", domain="POINT") 921 | attribute_motion.data.foreach_set("value", motions.tolist()) 922 | 923 | obj = bpy.data.objects.new(name, mesh) 924 | 925 | # Link to the provided collection, or fallback to active collection 926 | if collection is not None: 927 | collection.objects.link(obj) 928 | else: 929 | bpy.context.collection.objects.link(obj) 930 | 931 | # Reuse existing PointMaterial or create new one 932 | mat = get_or_create_point_material() 933 | 934 | # Add material to object so it shows up in Shading mode 935 | obj.data.materials.append(mat) 936 | 937 | # Geometry nodes setup 938 | add_point_cloud_geo_nodes(obj, mat) 939 | return obj 940 | 941 | def import_point_cloud(d, collection=None, filter_edges=True, min_confidence=0.5, global_indices=None): 942 | points = d["world_points_from_depth"] 943 | images = d["images"] 944 | conf = d["conf"] 945 | 946 | # Filter confidence based on depth gradient 947 | if filter_edges and "depth" in d: 948 | try: 949 | depth = d["depth"] 950 | for i in range(len(depth)): 951 | dm = depth[i] 952 | gx = cv2.Sobel(dm, cv2.CV_64F, 1, 0, ksize=3) 953 | gy = cv2.Sobel(dm, cv2.CV_64F, 0, 1, ksize=3) 954 | mag = np.sqrt(gx**2 + gy**2) 955 | mn, mx = np.nanmin(mag), np.nanmax(mag) 956 | if mx > mn: 957 | norm = (mag - mn) / (mx - mn) 958 | else: 959 | norm = np.zeros_like(mag) 960 | 961 | # Set confidence to 0 if normalized gradient >= 12/255 962 | mask = norm >= (12.0 / 255.0) 963 | conf[i][mask] = 0.0 964 | except Exception as e: 965 | print(f"Failed to filter confidence by gradient: {e}") 966 | 967 | if 'seg_id_map' in d: 968 | seg_id_map = d['seg_id_map'] # [N, H, W] 969 | id_to_class = d.get('id_to_class', {}) 970 | class_names = d.get('class_names', {}) 971 | 972 | # Get all unique IDs across the batch 973 | unique_ids = np.unique(seg_id_map) 974 | 975 | # Create a collection for segmented objects 976 | seg_collection = None 977 | if collection: 978 | seg_collection = bpy.data.collections.new("Segmented") 979 | collection.children.link(seg_collection) 980 | 981 | # Process each ID 982 | for obj_id in unique_ids: 983 | # ID -1 is background/unsegmented 984 | is_background = (obj_id == -1) 985 | 986 | # Collect points for this ID across all frames 987 | obj_points = [] 988 | obj_colors = [] 989 | obj_confs = [] 990 | obj_motions = [] 991 | 992 | N = points.shape[0] 993 | for i in range(N): 994 | # Mask for this ID in this frame 995 | mask = (seg_id_map[i] == obj_id) 996 | 997 | # Also apply confidence filter 998 | if min_confidence > 0: 999 | mask = mask & (conf[i] >= min_confidence) 1000 | 1001 | if not mask.any(): continue 1002 | 1003 | p = points[i][mask] 1004 | c = images[i][mask] 1005 | cf = conf[i][mask] 1006 | 1007 | # Transform points 1008 | p_trans = p.copy() 1009 | p_trans[:, [0, 1, 2]] = p[:, [0, 2, 1]] 1010 | p_trans[:, 2] = -p_trans[:, 2] 1011 | 1012 | # Colors RGBA 1013 | c = np.hstack((c, np.ones((c.shape[0], 1)))) 1014 | 1015 | obj_points.append(p_trans) 1016 | obj_colors.append(c) 1017 | obj_confs.append(cf) 1018 | 1019 | if 'motion' in d: 1020 | m = d['motion'][i][mask] 1021 | obj_motions.append(m) 1022 | 1023 | if not obj_points: continue 1024 | 1025 | # Concatenate 1026 | all_points = np.vstack(obj_points) 1027 | all_colors = np.vstack(obj_colors) 1028 | all_confs = np.concatenate(obj_confs) 1029 | all_motions = np.concatenate(obj_motions) if obj_motions else None 1030 | 1031 | # Name 1032 | if is_background: 1033 | name = "Background" 1034 | else: 1035 | cls_id = id_to_class.get(obj_id, -1) 1036 | cls_name = class_names.get(cls_id, f"Class_{cls_id}") 1037 | name = f"{cls_name}_{obj_id}" 1038 | 1039 | target_col = seg_collection if seg_collection else collection 1040 | create_point_cloud_object(name, all_points, all_colors, all_confs, all_motions, target_col) 1041 | 1042 | elif 'motion' in d: 1043 | motion = d['motion'] 1044 | stationary_points = [] 1045 | stationary_colors = [] 1046 | stationary_confs = [] 1047 | stationary_motions = [] 1048 | 1049 | # Create Moving collection 1050 | moving_collection = None 1051 | if collection: 1052 | moving_collection = bpy.data.collections.new("Moving") 1053 | collection.children.link(moving_collection) 1054 | 1055 | N = points.shape[0] 1056 | for i in range(N): 1057 | p = points[i].reshape(-1, 3) 1058 | c = images[i].reshape(-1, 3) 1059 | cf = conf[i].reshape(-1) 1060 | m = motion[i].reshape(-1) 1061 | 1062 | # Transform 1063 | p_trans = p.copy() 1064 | p_trans[:, [0, 1, 2]] = p[:, [0, 2, 1]] 1065 | p_trans[:, 2] = -p_trans[:, 2] 1066 | 1067 | c = np.hstack((c, np.ones((c.shape[0], 1)))) # RGBA 1068 | 1069 | if min_confidence > 0: 1070 | mask = cf >= min_confidence 1071 | p_trans = p_trans[mask] 1072 | c = c[mask] 1073 | cf = cf[mask] 1074 | m = m[mask] 1075 | 1076 | if len(p_trans) == 0: continue 1077 | 1078 | is_moving = m > 0 1079 | is_stationary = ~is_moving 1080 | 1081 | if is_stationary.any(): 1082 | stationary_points.append(p_trans[is_stationary]) 1083 | stationary_colors.append(c[is_stationary]) 1084 | stationary_confs.append(cf[is_stationary]) 1085 | stationary_motions.append(m[is_stationary]) 1086 | 1087 | if is_moving.any(): 1088 | if "image_paths" in d and i < len(d["image_paths"]): 1089 | base_name = os.path.splitext(os.path.basename(d["image_paths"][i]))[0] 1090 | obj_name = f"Moving_{base_name}" 1091 | else: 1092 | obj_name = f"Moving_Frame_{i}" 1093 | 1094 | target_col = moving_collection if moving_collection else collection 1095 | obj = create_point_cloud_object(obj_name, p_trans[is_moving], c[is_moving], cf[is_moving], m[is_moving], target_col) 1096 | 1097 | # Animate Visibility 1098 | spacing = 15 1099 | duration = 15 1100 | 1101 | global_idx = global_indices[i] if global_indices is not None else i 1102 | start_frame = 1 + global_idx * spacing 1103 | end_frame = start_frame + duration 1104 | 1105 | # Ensure we start hidden 1106 | obj.hide_viewport = True 1107 | obj.hide_render = True 1108 | obj.keyframe_insert(data_path="hide_viewport", frame=0) 1109 | obj.keyframe_insert(data_path="hide_render", frame=0) 1110 | 1111 | # Show 1112 | obj.hide_viewport = False 1113 | obj.hide_render = False 1114 | obj.keyframe_insert(data_path="hide_viewport", frame=start_frame) 1115 | obj.keyframe_insert(data_path="hide_render", frame=start_frame) 1116 | 1117 | # Hide 1118 | obj.hide_viewport = True 1119 | obj.hide_render = True 1120 | obj.keyframe_insert(data_path="hide_viewport", frame=end_frame) 1121 | obj.keyframe_insert(data_path="hide_render", frame=end_frame) 1122 | 1123 | if stationary_points: 1124 | create_point_cloud_object("Points", np.vstack(stationary_points), np.vstack(stationary_colors), np.concatenate(stationary_confs), np.concatenate(stationary_motions), collection) 1125 | 1126 | else: 1127 | points_batch = points.reshape(-1, 3) 1128 | reordered_points_batch = points_batch.copy() 1129 | reordered_points_batch[:, [0, 1, 2]] = points_batch[:, [0, 2, 1]] 1130 | reordered_points_batch[:, 2] = -reordered_points_batch[:, 2] 1131 | points_batch = reordered_points_batch 1132 | colors_batch = images.reshape(-1, 3) 1133 | colors_batch = np.hstack((colors_batch, np.ones((colors_batch.shape[0], 1)))) 1134 | conf_batch = conf.reshape(-1) 1135 | 1136 | # Remove points with low confidence 1137 | if min_confidence > 0: 1138 | valid_mask = conf_batch >= min_confidence 1139 | points_batch = points_batch[valid_mask] 1140 | colors_batch = colors_batch[valid_mask] 1141 | conf_batch = conf_batch[valid_mask] 1142 | 1143 | if len(conf_batch) > 0: 1144 | print(f"DEBUG confidence: min={conf_batch.min():.4f}, max={conf_batch.max():.4f}") 1145 | 1146 | create_point_cloud_object("Points", points_batch, colors_batch, conf_batch, None, collection) 1147 | 1148 | def create_cameras(predictions, collection=None, image_width=None, image_height=None): 1149 | scene = bpy.context.scene 1150 | if image_width is None or image_height is None: 1151 | H, W = predictions['images'].shape[1:3] 1152 | image_width = W 1153 | image_height = H 1154 | K0 = predictions["intrinsic"][0] 1155 | pixel_aspect_y = K0[1,1] / K0[0,0] 1156 | scene.render.pixel_aspect_x = 1.0 1157 | scene.render.pixel_aspect_y = float(pixel_aspect_y) 1158 | num_cameras = len(predictions["extrinsic"]) 1159 | if len(predictions["intrinsic"]) != num_cameras: 1160 | raise ValueError("Extrinsic and intrinsic lists must have the same length") 1161 | 1162 | # Optional: get image paths from predictions, if available 1163 | image_paths = predictions.get("image_paths", None) 1164 | 1165 | # Create Cameras collection 1166 | target_collection = collection 1167 | if collection: 1168 | cameras_col = bpy.data.collections.new("Cameras") 1169 | collection.children.link(cameras_col) 1170 | target_collection = cameras_col 1171 | 1172 | T = np.diag([1.0, -1.0, -1.0, 1.0]) 1173 | for i in range(num_cameras): 1174 | # Name from image file if available 1175 | if image_paths and i < len(image_paths): 1176 | import os 1177 | base_name = os.path.splitext(os.path.basename(image_paths[i]))[0] 1178 | cam_name = base_name 1179 | else: 1180 | cam_name = f"Camera_{i}" 1181 | 1182 | cam_data = bpy.data.cameras.new(name=cam_name) 1183 | K = predictions["intrinsic"][i] 1184 | f_x = K[0,0] 1185 | c_x = K[0,2] 1186 | c_y = K[1,2] 1187 | sensor_width = 36.0 1188 | cam_data.sensor_width = sensor_width 1189 | cam_data.lens = (f_x / image_width) * sensor_width 1190 | cam_data.shift_x = (c_x - image_width / 2.0) / image_width 1191 | cam_data.shift_y = (c_y - image_height / 2.0) / image_height 1192 | cam_obj = bpy.data.objects.new(name=cam_name, object_data=cam_data) 1193 | 1194 | if target_collection is not None: 1195 | target_collection.objects.link(cam_obj) 1196 | else: 1197 | scene.collection.objects.link(cam_obj) 1198 | 1199 | ext = predictions["extrinsic"][i] 1200 | E = np.vstack((ext, [0, 0, 0, 1])) 1201 | E_inv = np.linalg.inv(E) 1202 | M = np.dot(E_inv, T) 1203 | cam_obj.matrix_world = Matrix(M.tolist()) 1204 | R = Matrix.Rotation(math.radians(-90), 4, 'X') 1205 | cam_obj.matrix_world = R @ cam_obj.matrix_world 1206 | 1207 | def get_or_create_image_material(image_path): 1208 | name = os.path.basename(image_path) 1209 | mat = bpy.data.materials.get(name) 1210 | if mat is None: 1211 | mat = bpy.data.materials.new(name=name) 1212 | mat.use_nodes = True 1213 | nodes = mat.node_tree.nodes 1214 | links = mat.node_tree.links 1215 | nodes.clear() 1216 | 1217 | tex_coord = nodes.new('ShaderNodeTexCoord') 1218 | tex_coord.location = (-800, 200) 1219 | 1220 | tex_image = nodes.new('ShaderNodeTexImage') 1221 | tex_image.location = (-500, 200) 1222 | try: 1223 | # Check if image is already loaded 1224 | img_name = os.path.basename(image_path) 1225 | img = bpy.data.images.get(img_name) 1226 | if img is None: 1227 | img = bpy.data.images.load(image_path) 1228 | tex_image.image = img 1229 | except Exception as e: 1230 | print(f"Could not load image {image_path}: {e}") 1231 | 1232 | bsdf = nodes.new('ShaderNodeBsdfPrincipled') 1233 | bsdf.location = (-200, 200) 1234 | bsdf.inputs['Roughness'].default_value = 1.0 1235 | # Try to set specular to 0 to avoid shiny photos 1236 | if 'Specular IOR Level' in bsdf.inputs: 1237 | bsdf.inputs['Specular IOR Level'].default_value = 0.0 1238 | elif 'Specular' in bsdf.inputs: 1239 | bsdf.inputs['Specular'].default_value = 0.0 1240 | 1241 | output = nodes.new('ShaderNodeOutputMaterial') 1242 | output.location = (100, 200) 1243 | 1244 | links.new(tex_coord.outputs['UV'], tex_image.inputs['Vector']) 1245 | links.new(tex_image.outputs['Color'], bsdf.inputs['Base Color']) 1246 | links.new(bsdf.outputs['BSDF'], output.inputs['Surface']) 1247 | return mat 1248 | 1249 | def add_filter_mesh_modifier(obj, min_confidence): 1250 | mod = obj.modifiers.new(name="FilterMesh", type='NODES') 1251 | group_name = "FilterDepthMesh" 1252 | group = bpy.data.node_groups.get(group_name) 1253 | if not group: 1254 | group = bpy.data.node_groups.new(group_name, 'GeometryNodeTree') 1255 | group.interface.new_socket(name="Geometry", in_out="INPUT", socket_type="NodeSocketGeometry") 1256 | group.interface.new_socket(name="Geometry", in_out="OUTPUT", socket_type="NodeSocketGeometry") 1257 | 1258 | # Nodes 1259 | in_node = group.nodes.new('NodeGroupInput') 1260 | out_node = group.nodes.new('NodeGroupOutput') 1261 | 1262 | # 1. Filter by Confidence (Delete Points) 1263 | del_conf = group.nodes.new('GeometryNodeDeleteGeometry') 1264 | del_conf.domain = 'POINT' 1265 | named_attr = group.nodes.new('GeometryNodeInputNamedAttribute') 1266 | named_attr.data_type = 'FLOAT' 1267 | named_attr.inputs['Name'].default_value = "conf" 1268 | compare_conf = group.nodes.new('FunctionNodeCompare') 1269 | compare_conf.operation = 'LESS_THAN' 1270 | compare_conf.inputs['B'].default_value = min_confidence 1271 | 1272 | group.links.new(named_attr.outputs['Attribute'], compare_conf.inputs['A']) 1273 | group.links.new(compare_conf.outputs['Result'], del_conf.inputs['Selection']) 1274 | 1275 | # 2. Filter by Edge Length (Delete Edges) 1276 | del_edge = group.nodes.new('GeometryNodeDeleteGeometry') 1277 | del_edge.domain = 'EDGE' 1278 | 1279 | # Calculate Edge Length manually (Edge Length node name varies) 1280 | edge_verts = group.nodes.new('GeometryNodeInputMeshEdgeVertices') 1281 | pos = group.nodes.new('GeometryNodeInputPosition') 1282 | 1283 | sample_pos1 = group.nodes.new('GeometryNodeSampleIndex') 1284 | sample_pos1.data_type = 'FLOAT_VECTOR' 1285 | sample_pos1.domain = 'POINT' 1286 | 1287 | sample_pos2 = group.nodes.new('GeometryNodeSampleIndex') 1288 | sample_pos2.data_type = 'FLOAT_VECTOR' 1289 | sample_pos2.domain = 'POINT' 1290 | 1291 | dist = group.nodes.new('ShaderNodeVectorMath') 1292 | dist.operation = 'DISTANCE' 1293 | 1294 | compare_edge = group.nodes.new('FunctionNodeCompare') 1295 | compare_edge.operation = 'GREATER_THAN' 1296 | compare_edge.inputs['B'].default_value = 0.1 # Threshold for jump (meters) 1297 | 1298 | # Connect Geometry (from del_conf) 1299 | group.links.new(del_conf.outputs['Geometry'], sample_pos1.inputs['Geometry']) 1300 | group.links.new(del_conf.outputs['Geometry'], sample_pos2.inputs['Geometry']) 1301 | 1302 | # Connect Indices and Values 1303 | group.links.new(edge_verts.outputs['Vertex Index 1'], sample_pos1.inputs['Index']) 1304 | group.links.new(pos.outputs['Position'], sample_pos1.inputs['Value']) 1305 | 1306 | group.links.new(edge_verts.outputs['Vertex Index 2'], sample_pos2.inputs['Index']) 1307 | group.links.new(pos.outputs['Position'], sample_pos2.inputs['Value']) 1308 | 1309 | # Calculate Distance 1310 | group.links.new(sample_pos1.outputs['Value'], dist.inputs[0]) 1311 | group.links.new(sample_pos2.outputs['Value'], dist.inputs[1]) 1312 | 1313 | # Compare 1314 | group.links.new(dist.outputs['Value'], compare_edge.inputs['A']) 1315 | group.links.new(compare_edge.outputs['Result'], del_edge.inputs['Selection']) 1316 | 1317 | # Connect Main Flow 1318 | group.links.new(in_node.outputs['Geometry'], del_conf.inputs['Geometry']) 1319 | group.links.new(del_conf.outputs['Geometry'], del_edge.inputs['Geometry']) 1320 | group.links.new(del_edge.outputs['Geometry'], out_node.inputs['Geometry']) 1321 | 1322 | mod.node_group = group 1323 | 1324 | def import_mesh_from_depth(d, collection=None, filter_edges=True, min_confidence=0.5, global_indices=None): 1325 | points = d["world_points_from_depth"] # [N, H, W, 3] 1326 | images = d["images"] # [N, H, W, 3] 1327 | conf = d["conf"] # [N, H, W] 1328 | 1329 | # Filter confidence based on depth gradient (Same as import_point_cloud) 1330 | if filter_edges and "depth" in d: 1331 | try: 1332 | depth = d["depth"] 1333 | for i in range(len(depth)): 1334 | dm = depth[i] 1335 | gx = cv2.Sobel(dm, cv2.CV_64F, 1, 0, ksize=3) 1336 | gy = cv2.Sobel(dm, cv2.CV_64F, 0, 1, ksize=3) 1337 | mag = np.sqrt(gx**2 + gy**2) 1338 | mn, mx = np.nanmin(mag), np.nanmax(mag) 1339 | if mx > mn: 1340 | norm = (mag - mn) / (mx - mn) 1341 | else: 1342 | norm = np.zeros_like(mag) 1343 | 1344 | # Set confidence to 0 if normalized gradient >= 12/255 1345 | mask = norm >= (12.0 / 255.0) 1346 | conf[i][mask] = 0.0 1347 | except Exception as e: 1348 | print(f"Failed to filter confidence by gradient: {e}") 1349 | 1350 | N, H, W, _ = points.shape 1351 | 1352 | # Generate grid faces once (shared for all images in batch) 1353 | # Grid indices: (r, c) -> r*W + c 1354 | # Quad: (r, c), (r, c+1), (r+1, c+1), (r+1, c) 1355 | r = np.arange(H - 1) 1356 | c = np.arange(W - 1) 1357 | rr, cc = np.meshgrid(r, c, indexing='ij') 1358 | v0 = rr * W + cc 1359 | v1 = rr * W + (cc + 1) 1360 | v2 = (rr + 1) * W + (cc + 1) 1361 | v3 = (rr + 1) * W + cc 1362 | # Blender expects counter-clockwise winding for front faces 1363 | faces = np.stack([v0, v1, v2, v3], axis=-1).reshape(-1, 4) 1364 | 1365 | # Generate UVs once 1366 | u_coords = np.linspace(0, 1, W, dtype=np.float32) 1367 | v_coords = np.linspace(1, 0, H, dtype=np.float32) # Top is 1, Bottom is 0 1368 | uu, vv = np.meshgrid(u_coords, v_coords) 1369 | uvs = np.stack([uu, vv], axis=-1).reshape(-1, 2) 1370 | 1371 | if 'seg_id_map' in d: 1372 | seg_id_map = d['seg_id_map'] 1373 | id_to_class = d.get('id_to_class', {}) 1374 | class_names = d.get('class_names', {}) 1375 | 1376 | # Create Segmented collection 1377 | seg_collection = None 1378 | obj_collections = {} # Cache for object collections 1379 | 1380 | if collection: 1381 | seg_collection = bpy.data.collections.new("Segmented_Meshes") 1382 | collection.children.link(seg_collection) 1383 | 1384 | for i in range(N): 1385 | # Prepare data for this image 1386 | pts = points[i].reshape(-1, 3) 1387 | # Apply the same coordinate transform as import_point_cloud 1388 | pts_transformed = pts.copy() 1389 | pts_transformed[:, [0, 1, 2]] = pts[:, [0, 2, 1]] 1390 | pts_transformed[:, 2] = -pts_transformed[:, 2] 1391 | 1392 | cols = images[i].reshape(-1, 3) 1393 | cols = np.hstack((cols, np.ones((cols.shape[0], 1)))) # RGBA 1394 | confs = conf[i].reshape(-1) 1395 | 1396 | motion_vals = None 1397 | if 'motion' in d: 1398 | motion_vals = d['motion'][i].reshape(-1) 1399 | 1400 | # Flatten IDs for this frame 1401 | frame_ids = seg_id_map[i].flatten() 1402 | unique_frame_ids = np.unique(frame_ids) 1403 | 1404 | for obj_id in unique_frame_ids: 1405 | # Mask vertices 1406 | vert_mask = (frame_ids == obj_id) 1407 | 1408 | # Mask faces (strict inclusion) 1409 | face_mask = vert_mask[faces[:, 0]] & vert_mask[faces[:, 1]] & vert_mask[faces[:, 2]] & vert_mask[faces[:, 3]] 1410 | 1411 | if not face_mask.any(): 1412 | continue 1413 | 1414 | # Extract sub-mesh 1415 | sub_faces = faces[face_mask] 1416 | unique_v = np.unique(sub_faces) 1417 | 1418 | remap = np.zeros(len(pts_transformed), dtype=np.int32) - 1 1419 | remap[unique_v] = np.arange(len(unique_v)) 1420 | 1421 | new_faces = remap[sub_faces] 1422 | new_pts = pts_transformed[unique_v] 1423 | new_cols = cols[unique_v] 1424 | new_confs = confs[unique_v] 1425 | new_uvs = uvs.reshape(-1, 2)[unique_v] 1426 | 1427 | # Naming 1428 | if obj_id == -1: 1429 | base_obj_name = "Background" 1430 | else: 1431 | cls_id = id_to_class.get(obj_id, -1) 1432 | cls_name = class_names.get(cls_id, f"Class_{cls_id}") 1433 | base_obj_name = f"{cls_name}_{obj_id}" 1434 | 1435 | if "image_paths" in d and i < len(d["image_paths"]): 1436 | base_name = os.path.splitext(os.path.basename(d["image_paths"][i]))[0] 1437 | mesh_name = f"{base_name}_{base_obj_name}" 1438 | else: 1439 | mesh_name = f"Mesh_{i}_{base_obj_name}" 1440 | 1441 | # Create Mesh object 1442 | mesh = bpy.data.meshes.new(name=mesh_name) 1443 | mesh.from_pydata(new_pts.tolist(), [], new_faces.tolist()) 1444 | 1445 | # UVs 1446 | uv_layer = mesh.uv_layers.new(name="UVMap") 1447 | loop_vert_indices = np.zeros(len(mesh.loops), dtype=np.int32) 1448 | mesh.loops.foreach_get("vertex_index", loop_vert_indices) 1449 | loop_uvs = new_uvs[loop_vert_indices] 1450 | uv_layer.data.foreach_set("uv", loop_uvs.flatten()) 1451 | 1452 | # Attributes 1453 | col_attr = mesh.attributes.new(name="point_color", type="FLOAT_COLOR", domain="POINT") 1454 | col_attr.data.foreach_set("color", new_cols.flatten()) 1455 | conf_attr = mesh.attributes.new(name="conf", type="FLOAT", domain="POINT") 1456 | conf_attr.data.foreach_set("value", new_confs) 1457 | 1458 | if motion_vals is not None: 1459 | new_motion = motion_vals[unique_v] 1460 | motion_attr = mesh.attributes.new(name="motion", type="FLOAT", domain="POINT") 1461 | motion_attr.data.foreach_set("value", new_motion) 1462 | 1463 | obj = bpy.data.objects.new(mesh_name, mesh) 1464 | 1465 | # Determine target collection 1466 | target_col = collection 1467 | if seg_collection: 1468 | if base_obj_name not in obj_collections: 1469 | new_col = bpy.data.collections.new(base_obj_name) 1470 | seg_collection.children.link(new_col) 1471 | obj_collections[base_obj_name] = new_col 1472 | target_col = obj_collections[base_obj_name] 1473 | 1474 | target_col.objects.link(obj) 1475 | 1476 | # Material (Image) 1477 | if "image_paths" in d: 1478 | img_path = d["image_paths"][i] 1479 | mat = get_or_create_image_material(img_path) 1480 | else: 1481 | mat = get_or_create_point_material() 1482 | obj.data.materials.append(mat) 1483 | 1484 | if filter_edges: 1485 | add_filter_mesh_modifier(obj, min_confidence) 1486 | 1487 | elif 'motion' in d: 1488 | motion = d['motion'] 1489 | 1490 | # Create Moving collection 1491 | moving_collection = None 1492 | if collection: 1493 | moving_collection = bpy.data.collections.new("Moving") 1494 | collection.children.link(moving_collection) 1495 | 1496 | for i in range(N): 1497 | # Prepare data for this image 1498 | pts = points[i].reshape(-1, 3) 1499 | # Apply the same coordinate transform as import_point_cloud 1500 | pts_transformed = pts.copy() 1501 | pts_transformed[:, [0, 1, 2]] = pts[:, [0, 2, 1]] 1502 | pts_transformed[:, 2] = -pts_transformed[:, 2] 1503 | 1504 | cols = images[i].reshape(-1, 3) 1505 | cols = np.hstack((cols, np.ones((cols.shape[0], 1)))) # RGBA 1506 | confs = conf[i].reshape(-1) 1507 | m = motion[i].reshape(-1) 1508 | 1509 | is_moving = m > 0 1510 | 1511 | # --- Moving Mesh --- 1512 | # Face is moving if ANY vertex is moving 1513 | face_moving_mask = is_moving[faces[:, 0]] | is_moving[faces[:, 1]] | is_moving[faces[:, 2]] | is_moving[faces[:, 3]] 1514 | 1515 | if face_moving_mask.any(): 1516 | sub_faces = faces[face_moving_mask] 1517 | unique_v = np.unique(sub_faces) 1518 | 1519 | remap = np.zeros(len(pts_transformed), dtype=np.int32) - 1 1520 | remap[unique_v] = np.arange(len(unique_v)) 1521 | 1522 | new_faces = remap[sub_faces] 1523 | new_pts = pts_transformed[unique_v] 1524 | new_cols = cols[unique_v] 1525 | new_confs = confs[unique_v] 1526 | new_uvs = uvs.reshape(-1, 2)[unique_v] 1527 | new_motion = m[unique_v] 1528 | 1529 | if "image_paths" in d and i < len(d["image_paths"]): 1530 | base_name = os.path.splitext(os.path.basename(d["image_paths"][i]))[0] 1531 | mesh_name = f"Moving_Mesh_{base_name}" 1532 | else: 1533 | mesh_name = f"Moving_Mesh_{i}" 1534 | 1535 | mesh = bpy.data.meshes.new(name=mesh_name) 1536 | mesh.from_pydata(new_pts.tolist(), [], new_faces.tolist()) 1537 | 1538 | # UVs 1539 | uv_layer = mesh.uv_layers.new(name="UVMap") 1540 | loop_vert_indices = np.zeros(len(mesh.loops), dtype=np.int32) 1541 | mesh.loops.foreach_get("vertex_index", loop_vert_indices) 1542 | loop_uvs = new_uvs[loop_vert_indices] 1543 | uv_layer.data.foreach_set("uv", loop_uvs.flatten()) 1544 | 1545 | # Attributes 1546 | col_attr = mesh.attributes.new(name="point_color", type="FLOAT_COLOR", domain="POINT") 1547 | col_attr.data.foreach_set("color", new_cols.flatten()) 1548 | conf_attr = mesh.attributes.new(name="conf", type="FLOAT", domain="POINT") 1549 | conf_attr.data.foreach_set("value", new_confs) 1550 | motion_attr = mesh.attributes.new(name="motion", type="FLOAT", domain="POINT") 1551 | motion_attr.data.foreach_set("value", new_motion) 1552 | 1553 | target_col = moving_collection if moving_collection else collection 1554 | obj = bpy.data.objects.new(mesh_name, mesh) 1555 | target_col.objects.link(obj) 1556 | 1557 | # Material (Image) 1558 | if "image_paths" in d: 1559 | img_path = d["image_paths"][i] 1560 | mat = get_or_create_image_material(img_path) 1561 | else: 1562 | mat = get_or_create_point_material() 1563 | obj.data.materials.append(mat) 1564 | 1565 | if filter_edges: 1566 | add_filter_mesh_modifier(obj, min_confidence) 1567 | 1568 | # Animation 1569 | spacing = 15 1570 | duration = 15 1571 | global_idx = global_indices[i] if global_indices is not None else i 1572 | start_frame = 1 + global_idx * spacing 1573 | end_frame = start_frame + duration 1574 | 1575 | obj.hide_viewport = True 1576 | obj.hide_render = True 1577 | obj.keyframe_insert(data_path="hide_viewport", frame=0) 1578 | obj.keyframe_insert(data_path="hide_render", frame=0) 1579 | 1580 | obj.hide_viewport = False 1581 | obj.hide_render = False 1582 | obj.keyframe_insert(data_path="hide_viewport", frame=start_frame) 1583 | obj.keyframe_insert(data_path="hide_render", frame=start_frame) 1584 | 1585 | obj.hide_viewport = True 1586 | obj.hide_render = True 1587 | obj.keyframe_insert(data_path="hide_viewport", frame=end_frame) 1588 | obj.keyframe_insert(data_path="hide_render", frame=end_frame) 1589 | 1590 | # --- Stationary Mesh --- 1591 | # Face is stationary if ALL vertices are stationary (NOT moving) 1592 | # This ensures no overlap with Moving mesh faces 1593 | face_stationary_mask = ~face_moving_mask 1594 | 1595 | if face_stationary_mask.any(): 1596 | sub_faces = faces[face_stationary_mask] 1597 | unique_v = np.unique(sub_faces) 1598 | 1599 | remap = np.zeros(len(pts_transformed), dtype=np.int32) - 1 1600 | remap[unique_v] = np.arange(len(unique_v)) 1601 | 1602 | new_faces = remap[sub_faces] 1603 | new_pts = pts_transformed[unique_v] 1604 | new_cols = cols[unique_v] 1605 | new_confs = confs[unique_v] 1606 | new_uvs = uvs.reshape(-1, 2)[unique_v] 1607 | 1608 | if "image_paths" in d and i < len(d["image_paths"]): 1609 | base_name = os.path.splitext(os.path.basename(d["image_paths"][i]))[0] 1610 | mesh_name = f"Mesh_{base_name}" 1611 | else: 1612 | mesh_name = f"Mesh_Img_{i}" 1613 | 1614 | mesh = bpy.data.meshes.new(name=mesh_name) 1615 | mesh.from_pydata(new_pts.tolist(), [], new_faces.tolist()) 1616 | 1617 | # UVs 1618 | uv_layer = mesh.uv_layers.new(name="UVMap") 1619 | loop_vert_indices = np.zeros(len(mesh.loops), dtype=np.int32) 1620 | mesh.loops.foreach_get("vertex_index", loop_vert_indices) 1621 | loop_uvs = new_uvs[loop_vert_indices] 1622 | uv_layer.data.foreach_set("uv", loop_uvs.flatten()) 1623 | 1624 | # Attributes 1625 | col_attr = mesh.attributes.new(name="point_color", type="FLOAT_COLOR", domain="POINT") 1626 | col_attr.data.foreach_set("color", new_cols.flatten()) 1627 | conf_attr = mesh.attributes.new(name="conf", type="FLOAT", domain="POINT") 1628 | conf_attr.data.foreach_set("value", new_confs) 1629 | 1630 | obj = bpy.data.objects.new(mesh_name, mesh) 1631 | if collection: 1632 | collection.objects.link(obj) 1633 | else: 1634 | bpy.context.collection.objects.link(obj) 1635 | 1636 | # Material (Image) 1637 | if "image_paths" in d: 1638 | img_path = d["image_paths"][i] 1639 | mat = get_or_create_image_material(img_path) 1640 | else: 1641 | mat = get_or_create_point_material() 1642 | obj.data.materials.append(mat) 1643 | 1644 | if filter_edges: 1645 | add_filter_mesh_modifier(obj, min_confidence) 1646 | 1647 | else: 1648 | for i in range(N): 1649 | # Prepare data for this image 1650 | pts = points[i].reshape(-1, 3) 1651 | # Apply the same coordinate transform as import_point_cloud 1652 | pts_transformed = pts.copy() 1653 | pts_transformed[:, [0, 1, 2]] = pts[:, [0, 2, 1]] 1654 | pts_transformed[:, 2] = -pts_transformed[:, 2] 1655 | 1656 | cols = images[i].reshape(-1, 3) 1657 | cols = np.hstack((cols, np.ones((cols.shape[0], 1)))) # RGBA 1658 | confs = conf[i].reshape(-1) 1659 | 1660 | motion_vals = None 1661 | if 'motion' in d: 1662 | motion_vals = d['motion'][i].reshape(-1) 1663 | 1664 | # Create Mesh 1665 | if "image_paths" in d and i < len(d["image_paths"]): 1666 | base_name = os.path.splitext(os.path.basename(d["image_paths"][i]))[0] 1667 | mesh_name = f"Mesh_{base_name}" 1668 | else: 1669 | mesh_name = f"Mesh_Img_{i}" 1670 | 1671 | mesh = bpy.data.meshes.new(name=mesh_name) 1672 | mesh.from_pydata(pts_transformed.tolist(), [], faces.tolist()) 1673 | 1674 | # Add UVs 1675 | uv_layer = mesh.uv_layers.new(name="UVMap") 1676 | loop_vert_indices = np.zeros(len(mesh.loops), dtype=np.int32) 1677 | mesh.loops.foreach_get("vertex_index", loop_vert_indices) 1678 | loop_uvs = uvs[loop_vert_indices] 1679 | uv_layer.data.foreach_set("uv", loop_uvs.flatten()) 1680 | 1681 | # Add Attributes 1682 | # Color 1683 | col_attr = mesh.attributes.new(name="point_color", type="FLOAT_COLOR", domain="POINT") 1684 | col_attr.data.foreach_set("color", cols.flatten()) 1685 | 1686 | # Confidence 1687 | conf_attr = mesh.attributes.new(name="conf", type="FLOAT", domain="POINT") 1688 | conf_attr.data.foreach_set("value", confs) 1689 | 1690 | # Motion 1691 | if motion_vals is not None: 1692 | motion_attr = mesh.attributes.new(name="motion", type="FLOAT", domain="POINT") 1693 | motion_attr.data.foreach_set("value", motion_vals) 1694 | 1695 | obj = bpy.data.objects.new(mesh_name, mesh) 1696 | if collection: 1697 | collection.objects.link(obj) 1698 | else: 1699 | bpy.context.collection.objects.link(obj) 1700 | 1701 | # Add Material 1702 | if "image_paths" in d: 1703 | img_path = d["image_paths"][i] 1704 | mat = get_or_create_image_material(img_path) 1705 | else: 1706 | mat = get_or_create_point_material() 1707 | obj.data.materials.append(mat) 1708 | 1709 | # Add Geometry Nodes to filter stretched edges and low confidence 1710 | if filter_edges: 1711 | add_filter_mesh_modifier(obj, min_confidence) 1712 | 1713 | --------------------------------------------------------------------------------