├── .gitignore ├── .gitmodules ├── .vscode └── settings.json ├── LICENSE ├── README.md ├── docker ├── ubuntu16.04+miniconda.dockerfile └── ubuntu20.04+miniconda.dockerfile ├── env_v2 ├── README.md ├── download_checkpoints.sh ├── install_labelmaker_env.sh ├── install_sdfstudio_env.sh ├── requirements.txt ├── test_labelmaker_env.sh └── versions.py ├── environment.yml ├── labelmaker ├── __init__.py ├── consensus.py ├── label_data.py ├── label_mappings.py ├── lifting_3d │ ├── __init__.py │ ├── lifting.sh │ ├── lifting_points.py │ └── preprocessing.py ├── mappings │ ├── __init__.py │ └── label_mapping.csv ├── scannet_200_labels.py ├── visualisation.py └── visualization_3d.py ├── models ├── cmx.py ├── grounded_sam.py ├── hha_depth.py ├── internimage.py ├── mask3d_inst.py ├── omnidata_depth.py ├── omnidata_normal.py └── ovseg.py ├── notebooks ├── evaluation_labelmaker.ipynb ├── generate_3d_projections.ipynb ├── visualization.ipynb ├── visualization_lifting.ipynb ├── visualize_arkitscenes.ipynb └── visualize_labelmaker_3d.ipynb ├── scripts ├── __init__.py ├── arkitscenes2labelmaker.py ├── pipeline.sh ├── pipeline_arkit.sh ├── replica2labelmaker.py ├── replica_download.sh ├── replica_pipeline.sh ├── replica_singularity_slurm.sh ├── scanner3d2labelmaker.py ├── scannet2labelmaker.py ├── segmentation_eval.py └── utils_3d.py ├── setup.py └── testing ├── test_models ├── test_cmx_00_omnidata.py ├── test_cmx_01_hha.py ├── test_cmx_02_cmx.py ├── test_consensus.py ├── test_grounded_sam.py ├── test_internimage.py ├── test_mask3d.py ├── test_omnidata_normal.py └── test_ovseg.py └── test_scan ├── color └── 000000.jpg ├── depth └── 000000.png ├── intrinsic └── 000000.txt ├── mesh.ply └── pose └── 000000.txt /.gitignore: -------------------------------------------------------------------------------- 1 | *.egg-info/** 2 | **/__pycache__/** 3 | pose_refinement/notebooks/.ipynb_checkpoints/* 4 | pose_refinement/notebooks 5 | rsyncignore.txt 6 | pose_refinement/__pycache__ 7 | pose_refinement/output 8 | slurm* 9 | notebooks/** 10 | scripts_arkitscenes/.ipynb_checkpoints/* 11 | scripts_arkitscenes/*.ply 12 | transform_info_scene0575_00.npz 13 | saved/** 14 | checkpoints/ 15 | 3rdparty/nltk_data/ 16 | testing/test_scan/intermediate/ 17 | env_v2/INSTALLED_VERSIONS.sh 18 | wandb/ 19 | build/ 20 | *.simg 21 | data -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "mmseg/mmsegmentation"] 2 | path = 3rdparty/mmsegmentation 3 | url = https://github.com/open-mmlab/mmsegmentation.git 4 | [submodule "mmseg/InternImage"] 5 | path = 3rdparty/InternImage 6 | url = https://github.com/OpenGVLab/InternImage.git 7 | [submodule "3rdparty/omnidata"] 8 | path = 3rdparty/omnidata 9 | url = https://github.com/EPFL-VILAB/omnidata.git 10 | [submodule "3rdparty/ov-seg"] 11 | path = 3rdparty/ov-seg 12 | url = https://github.com/facebookresearch/ov-seg.git 13 | [submodule "3rdparty/detectron2"] 14 | path = 3rdparty/detectron2 15 | url = https://github.com/facebookresearch/detectron2.git 16 | [submodule "3rdparty/ARKitScenes"] 17 | path = 3rdparty/ARKitScenes 18 | url = https://github.com/apple/ARKitScenes.git 19 | [submodule "3rdparty/Mask3D"] 20 | path = 3rdparty/Mask3D 21 | url = https://github.com/cvg/Mask3D.git 22 | [submodule "3rdparty/RGBX_Semantic_Segmentation"] 23 | path = 3rdparty/RGBX_Semantic_Segmentation 24 | url = https://github.com/huaaaliu/RGBX_Semantic_Segmentation.git 25 | [submodule "3rdparty/Depth2HHA-python"] 26 | path = 3rdparty/Depth2HHA-python 27 | url = https://github.com/hermannsblum/Depth2HHA-python.git 28 | [submodule "3rdparty/Grounded-Segment-Anything"] 29 | path = 3rdparty/Grounded-Segment-Anything 30 | url = https://github.com/cvg/Grounded-Segment-Anything.git 31 | [submodule "3rdparty/recognize-anything"] 32 | path = 3rdparty/recognize-anything 33 | url = https://github.com/cvg/recognize-anything.git 34 | [submodule "3rdparty/sdfstudio"] 35 | path = 3rdparty/sdfstudio 36 | url = https://github.com/cvg/sdfstudio.git 37 | branch = devel 38 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "[python]": { 3 | "editor.formatOnSaveMode": "file", 4 | "editor.formatOnSave": true, 5 | "editor.defaultFormatter": "eeyore.yapf" 6 | }, 7 | "python.defaultInterpreterPath": "${env:HOME}/.conda/envs/labelmaker/bin/python", 8 | "yapf.args": [ 9 | "--style", 10 | "{based_on_style: google, indent_width: 2}" 11 | ], 12 | "editor.tabSize": 2, 13 | "python.analysis.extraPaths": [ 14 | "./3rdparty/ov-seg", 15 | "./3rdparty/InternImage/segmentation", 16 | "./3rdparty/ov-seg/third_party/CLIP", 17 | "./3rdparty/omnidata/omnidata_tools/torch", 18 | "./3rdparty/RGBX_Semantic_Segmentation" 19 | ], 20 | "editor.detectIndentation": true, 21 | "editor.indentSize": 2, 22 | "workbench.tree.indent": 4, 23 | } 24 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2024, Computer Vision and Geometry Lab 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | 1. Redistributions of source code must retain the above copyright notice, this 9 | list of conditions and the following disclaimer. 10 | 11 | 2. Redistributions in binary form must reproduce the above copyright notice, 12 | this list of conditions and the following disclaimer in the documentation 13 | and/or other materials provided with the distribution. 14 | 15 | 3. Neither the name of the copyright holder nor the names of its 16 | contributors may be used to endorse or promote products derived from 17 | this software without specific prior written permission. 18 | 19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 23 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 25 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 27 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # LabelMaker 2 | 3 | ![LabelMaker Pipeline Overview](https://labelmaker.org/static/images/labelmaker_teaser.png) 4 | 5 | ## Installation 6 | 7 | This is an example on Ubuntu 20.02 with cuda 11.8. 8 | 9 | ### Environment for LabelMaker 10 | This environment is used for semantic segmentation of several models, and it is also used for generating consensus semantic labels. 11 | 12 | ```sh 13 | bash env_v2/install_labelmaker_env.sh 3.9 11.3 1.12.0 9.5.0 14 | ``` 15 | 16 | This command creates a conda environment called `labelmaker` with python version 3.9, cuda version 11.8, pytorch version 2.0.0, and gcc version 10.4.0. Here are possible sets of environment versions: 17 | | Python | CUDA toolkit | PyTorch | GCC | 18 | | ------ | ------------ | ------- | ------ | 19 | | 3.9 | 11.3 | 1.12.0 | 9.5.0 | 20 | | 3.9 | 11.6 | 1.13.0 | 10.4.0 | 21 | | 3.9 | 11.8 | 2.0.0 | 10.4.0 | 22 | | 3.10 | 11.8 | 2.0.0 | 10.4.0 | 23 | 24 | For python=3.10, I only tested with `3.10 11.8 2.0.0 10.4.0`, others might also be possible. 25 | 26 | ```sh 27 | conda activate labelmaker 28 | ``` 29 | 30 | ### Environment for SDFStudio 31 | This environment is used for generating consistent consensus semantic labels. It use the previous consensus semantic labels (together with RGBD data) to train a neural implicit surface and get a view-consistent consensus semantic label. It uses a modified version of SDFStudio. SDFStudio need specific version of pytorch, therefore, it is made as a separate environment. To install the environment, run 32 | ```sh 33 | bash env_v2/install_sdfstudio_env.sh 3.10 11.3 34 | ``` 35 | Python=3.10 and CUDA-toolkit==11.3 is the only tested combination. This version of SDFStudio requires torch==1.12.1, which only supports CUDA 11.3 and 11.6, therefore, it might be impossible to run it on newer GPUs. 36 | 37 | ```sh 38 | conda activate sdfstudio 39 | ``` 40 | 41 | ### Download Model Checkpoints 42 | 43 | ```sh 44 | bash env_v2/download_checkpoints.sh 45 | ``` 46 | 47 | ## Docker Image 48 | 49 | ### Docker image based on Ubuntu 16.04 50 | ```sh 51 | # Build 52 | docker build --tag labelmaker-env-16.04 -f docker/ubuntu16.04+miniconda.dockerfile . 53 | 54 | # Run 55 | docker run \ 56 | --gpus all \ 57 | -i --rm \ 58 | -v ./env_v2:/LabelMaker/env_v2 \ 59 | -v ./models:/LabelMaker/models \ 60 | -v ./labelmaker:/LabelMaker/labelmaker \ 61 | -v ./checkpoints:/LabelMaker/checkpoints \ 62 | -v ./testing:/LabelMaker/testing \ 63 | -v ./.gitmodules:/LabelMaker/.gitmodules \ 64 | -t labelmaker-env-16.04 /bin/bash 65 | ``` 66 | 67 | ### Docker image based on Ubuntu 20.04 68 | 69 | ```sh 70 | # Build 71 | docker build --tag labelmaker-env-20.04 -f docker/ubuntu20.04+miniconda.dockerfile . 72 | 73 | # Run 74 | docker run \ 75 | --gpus all \ 76 | -i --rm \ 77 | -v ./env_v2:/LabelMaker/env_v2 \ 78 | -v ./models:/LabelMaker/models \ 79 | -v ./labelmaker:/LabelMaker/labelmaker \ 80 | -v ./checkpoints:/LabelMaker/checkpoints \ 81 | -v ./testing:/LabelMaker/testing \ 82 | -v ./.gitmodules:/LabelMaker/.gitmodules \ 83 | -t labelmaker-env-20.04 /bin/bash 84 | ``` 85 | 86 | 87 | ## Setup Scene 88 | 89 | ### Download scene 90 | 91 | ```sh 92 | export TRAINING_OR_VALIDATION=Training 93 | export SCENE_ID=47333462 94 | python 3rdparty/ARKitScenes/download_data.py raw --split $TRAINING_OR_VALIDATION --video_id $SCENE_ID --download_dir /tmp/ARKitScenes/ --raw_dataset_assets lowres_depth confidence lowres_wide.traj lowres_wide lowres_wide_intrinsics vga_wide vga_wide_intrinsics 95 | ``` 96 | 97 | ### Convert scene to LabelMaker workspace 98 | 99 | ```sh 100 | WORKSPACE_DIR=/home/weders/scratch/scratch/LabelMaker/arkitscenes/$SCENE_ID 101 | python scripts/arkitscenes2labelmaker.py --scan_dir /tmp/ARKitScenes/raw/$TRAINING_OR_VALIDATION/$SCENE_ID --target_dir $WORKSPACE_DIR 102 | ``` 103 | 104 | ## Run Pipeline on Scene 105 | 106 | ### Run individual models 107 | 108 | 1. InternImage 109 | 110 | ```sh 111 | python models/internimage.py --workspace $WORKSPACE_DIR 112 | ``` 113 | 114 | 2. OVSeg 115 | 116 | ```sh 117 | python models/ovseg.py --workspace $WORKSPACE_DIR 118 | ``` 119 | 120 | 3. Grounded SAM 121 | 122 | ```sh 123 | python models/grounded_sam.py --workspace $WORKSPACE_DIR 124 | ``` 125 | 126 | 4. CMX 127 | 128 | ```sh 129 | python models/omnidata_depth.py --workspace $WORKSPACE_DIR 130 | python models/hha_depth.py --workspace $WORKSPACE_DIR 131 | python models/cmx.py --workspace $WORKSPACE_DIR 132 | ``` 133 | 134 | 5. Mask3D 135 | 136 | ```sh 137 | python models/mask3d_inst.py --workspace $WORKSPACE_DIR 138 | ``` 139 | 140 | 6. OmniData normal (used for NeuS) 141 | ```sh 142 | python models/omnidata_normal.py --workspace $WORKSPACE_DIR 143 | ``` 144 | 145 | ## Run consensus voting 146 | 147 | ```sh 148 | python labelmaker/consensus.py --workspace $WORKSPACE_DIR 149 | ``` 150 | 151 | 152 | ## Run 3D Lifting 153 | 154 | Point-based lifting 155 | ```sh 156 | python -m labelmaker.lifting_3d.lifting_points --workspace $WORKSPACE_DIR 157 | ``` 158 | 159 | 160 | NeRF-based lifting (required for dense 2D labels) 161 | ```sh 162 | bash labelmaker/lifting_3d/lifting.sh $WORKSPACE_DIR 163 | ``` 164 | 165 | ## Visualization 166 | 167 | Visualize 3D point labels (after running point-based lifting) 168 | ```sh 169 | python -m labelmaker.visualization_3d --workspace $WORKSPACE_DIR 170 | ``` 171 | 172 | # Capture your own data 173 | With any LiDAR-enabled iOS device, we provide a [script](https://github.com/cvg/LabelMaker/blob/main/scripts/scanner3d2labelmaker.py) to convert the posed keyframes and scanned mesh from the [3D Scanner App](https://apps.apple.com/de/app/3d-scanner-app/id1419913995) into the labelmaker data format. In the App, use "export all" to export both the mesh and the RGB images with respective poses. After runnning the script on the exported data, the whole pipeline can be run as described above. 174 | 175 | 176 | # Bibtex 177 | 178 | When using LabelMaker in acamdemic works, please use the following reference: 179 | 180 | ``` 181 | @inproceedings{Weder2024labelmaker, 182 | title = {{LabelMaker: Automatic Semantic Label Generation from RGB-D Trajectories}}, 183 | author={Weder, Silvan and Blum, Hermann and Engelmann, Francis and Pollefeys, Marc}, 184 | booktitle = {International Conference on 3D Vision (3DV)}, 185 | year = {2024} 186 | } 187 | ``` 188 | 189 | # License 190 | 191 | LabelMaker itself is released under BSD-3-clause License. However, inidividual models that can be used as part of LabelMaker may have more restrictive licenses. If a user is prohibited by license to use a specific model they can just leave them out of the pipeline. Here are the models and the licenses they use: 192 | - ARKitScenes: CC BY-NC-SA 4.0 license 193 | - InternImage: MIT 194 | - Mask3D: MIT 195 | - GSAM: Apache-2.0 196 | - OpenAI CLIP: MIT 197 | - Grounding DINO: Apache-2.0 198 | - Omnidata: custom license, [view](https://github.com/EPFL-VILAB/omnidata?tab=License-1-ov-file#License-1-ov-file) 199 | - CMX: MIT 200 | - OVSeg: Attribution-NonCommercial 4.0 International, [view](https://github.com/facebookresearch/ov-seg?tab=License-1-ov-file#License-1-ov-file) 201 | -------------------------------------------------------------------------------- /docker/ubuntu16.04+miniconda.dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:16.04 2 | WORKDIR / 3 | ENV TZ=Europe/Zurich 4 | RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ >/etc/timezone 5 | RUN apt-get update &&\ 6 | apt-get -y install git curl wget make nano libgl1 libglib2.0-0 ffmpeg libsm6 libxext6 && \ 7 | wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \ 8 | chmod +x /Miniconda3-latest-Linux-x86_64.sh && \ 9 | /Miniconda3-latest-Linux-x86_64.sh -b -p /miniconda3 && \ 10 | rm -rf /Miniconda3-latest-Linux-x86_64.sh && /\ 11 | miniconda3/bin/conda init bash && \ 12 | chmod -R 777 /miniconda3 13 | RUN export PATH="/miniconda3/bin:$PATH" && conda config --set auto_activate_base false 14 | COPY ./.git /LabelMaker/.git 15 | COPY ./.gitmodules /LabelMaker/.gitmodules 16 | COPY ./3rdparty /LabelMaker/3rdparty 17 | COPY ./env_v2 /LabelMaker/env_v2 18 | COPY ./labelmaker /LabelMaker/labelmaker 19 | COPY ./setup.py /LabelMaker/setup.py 20 | WORKDIR /LabelMaker 21 | RUN export PATH="/miniconda3/bin:$PATH" && \ 22 | bash env_v2/install_labelmaker_env.sh 3.9 11.3 1.12.0 9.5.0 && \ 23 | rm -rf /root/.cache/* && \ 24 | chmod -R 777 /miniconda3/envs/labelmaker 25 | RUN export PATH="/miniconda3/bin:$PATH" && \ 26 | bash env_v2/install_sdfstudio_env.sh 3.10 11.3 && \ 27 | rm -rf /root/.cache/* && \ 28 | chmod -R 777 /miniconda3/envs/sdfstudio 29 | -------------------------------------------------------------------------------- /docker/ubuntu20.04+miniconda.dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:20.04 2 | WORKDIR / 3 | ENV TZ=Europe/Zurich 4 | RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ >/etc/timezone 5 | RUN apt-get update && \ 6 | apt-get -y install git curl wget make nano ffmpeg libsm6 libxext6 unzip && \ 7 | wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \ 8 | chmod +x /Miniconda3-latest-Linux-x86_64.sh && \ 9 | /Miniconda3-latest-Linux-x86_64.sh -b -p /miniconda3 && \ 10 | rm -rf /Miniconda3-latest-Linux-x86_64.sh && \ 11 | /miniconda3/bin/conda init bash && \ 12 | chmod -R 777 /miniconda3 13 | RUN export PATH="/miniconda3/bin:$PATH" && conda config --set auto_activate_base false 14 | COPY ./.git /LabelMaker/.git 15 | COPY ./.gitmodules /LabelMaker/.gitmodules 16 | COPY ./3rdparty /LabelMaker/3rdparty 17 | COPY ./env_v2 /LabelMaker/env_v2 18 | COPY ./labelmaker /LabelMaker/labelmaker 19 | COPY ./scripts /LabelMaker/scripts 20 | COPY ./setup.py /LabelMaker/setup.py 21 | WORKDIR /LabelMaker 22 | RUN export PATH="/miniconda3/bin:$PATH" && \ 23 | bash env_v2/install_labelmaker_env.sh 3.9 11.3 1.12.0 9.5.0 && \ 24 | rm -rf /root/.cache/* && \ 25 | chmod -R 777 /miniconda3/envs/labelmaker 26 | RUN export PATH="/miniconda3/bin:$PATH" && \ 27 | bash env_v2/install_sdfstudio_env.sh 3.10 11.3 && \ 28 | rm -rf /root/.cache/* && \ 29 | chmod -R 777 /miniconda3/envs/sdfstudio 30 | -------------------------------------------------------------------------------- /env_v2/README.md: -------------------------------------------------------------------------------- 1 | On euler, to use headless rendering version of open3d, build OSMesa according to https://github.com/quantaji/open3d-manylinux2014/blob/main/osmesa_euler_build_install.sh, add llvm module in euler, and add OSMesa into LD_LIBRARY_PATH 2 | ```sh 3 | export LD_LIBRARY_PATH=${HOME}/osmesa/lib:$LD_LIBRARY_PATH 4 | ``` 5 | 6 | 7 | ## Install Gounded SAM 8 | First update all submodule 9 | ```sh 10 | git submodule update --init --recursive 11 | ``` 12 | 13 | Install pytorch and other required packages 14 | ```sh 15 | pip install -r ./env_v2/02_grounded_sam.txt 16 | ``` 17 | Install RAM 18 | ```sh 19 | pip install ./3rdparty/recognize-anything/ 20 | ``` 21 | 22 | Install SAM 23 | ```sh 24 | pip install ./3rdparty/Grounded-Segment-Anything/segment_anything 25 | ``` 26 | 27 | Install Grounding DINO 28 | ```sh 29 | export CUDA_HOST_COMPILER="${HOME}/.conda/envs/labelmaker/bin/gcc" 30 | export CUDA_PATH="${HOME}/.conda/envs/labelmaker" 31 | export CUDA_HOME=$CUDA_PATH 32 | pip install ./3rdparty/Grounded-Segment-Anything/GroundingDINO 33 | ``` 34 | 35 | Install MinkowskiEngin 36 | first install openblas 37 | ```sh 38 | conda install -c anaconda openblas=0.3.20 39 | ``` 40 | then 41 | ```sh 42 | export CUDA_HOST_COMPILER="${HOME}/.conda/envs/labelmaker/bin/gcc" 43 | export CUDA_PATH="${HOME}/.conda/envs/labelmaker" 44 | export CUDA_HOME=$CUDA_PATH 45 | python setup.py install --force_cuda --blas=openblas 46 | ``` 47 | -------------------------------------------------------------------------------- /env_v2/download_checkpoints.sh: -------------------------------------------------------------------------------- 1 | env_name=labelmaker 2 | dir_name="$(pwd)/$(dirname "$0")" 3 | eval "$(conda shell.bash hook)" 4 | conda activate $env_name 5 | 6 | echo $dir_name 7 | mkdir -p $dir_name/../checkpoints 8 | 9 | # ovseg https://drive.google.com/file/d/1cn-ohxgXDrDfkzC1QdO-fi8IjbjXmgKy/view?pli=1 10 | gdown "1cn-ohxgXDrDfkzC1QdO-fi8IjbjXmgKy" -O $dir_name/../checkpoints/ovseg_swinbase_vitL14_ft_mpt.pth 11 | 12 | # recognize-anything https://huggingface.co/spaces/xinyu1205/recognize-anything/blob/main/ram_swin_large_14m.pth 13 | gdown "https://huggingface.co/spaces/xinyu1205/recognize-anything/resolve/main/ram_swin_large_14m.pth" -O $dir_name/../checkpoints/ram_swin_large_14m.pth 14 | 15 | # grounding dino https://github.com/IDEA-Research/Grounded-Segment-Anything/tree/main/GroundingDINO#checkpoints 16 | gdown "https://huggingface.co/ShilongLiu/GroundingDINO/resolve/main/groundingdino_swint_ogc.pth" -O $dir_name/../checkpoints/groundingdino_swint_ogc.pth 17 | 18 | # sam-hq https://drive.google.com/file/d/1qobFYrI4eyIANfBSmYcGuWRaSIXfMOQ8/view?usp=sharing 19 | gdown 1qobFYrI4eyIANfBSmYcGuWRaSIXfMOQ8 -O $dir_name/../checkpoints/sam_hq_vit_h.pth 20 | 21 | # cmx https://drive.google.com/file/d/1hlyglGnEB0pnWXfHPtBtCGGlKMDh2K--/view 22 | gdown 1hlyglGnEB0pnWXfHPtBtCGGlKMDh2K-- -O $dir_name/../checkpoints/NYUDV2_CMX+Segformer-B2.pth 23 | 24 | # InternImage https://huggingface.co/OpenGVLab/InternImage/blob/main/upernet_internimage_h_896_160k_ade20k.pth 25 | gdown https://huggingface.co/OpenGVLab/InternImage/resolve/main/mask2former_internimage_h_896_80k_cocostuff2ade20k.pth -O $dir_name/../checkpoints/mask2former_internimage_h_896_80k_cocostuff2ade20k.pth 26 | 27 | # Mask3D https://zenodo.org/records/10422707/files/mask3d_scannet200_demo.ckpt 28 | wget "https://zenodo.org/records/10422707/files/mask3d_scannet200_demo.ckpt?download=1" -O $dir_name/../checkpoints/mask3d_scannet200_demo.ckpt 29 | 30 | # omnidata https://drive.google.com/file/d/1Jrh-bRnJEjyMCS7f-WsaFlccfPjJPPHI/view 31 | # too many download 32 | gdown "1Jrh-bRnJEjyMCS7f-WsaFlccfPjJPPHI" -O $dir_name/../checkpoints/omnidata_dpt_depth_v2.ckpt 33 | 34 | # omnidata normal model https://drive.google.com/file/d/1wNxVO4vVbDEMEpnAi_jwQObf2MFodcBR/view 35 | gdown "1wNxVO4vVbDEMEpnAi_jwQObf2MFodcBR&confirm=t" -O $dir_name/../checkpoints/omnidata_dpt_normal_v2.ckpt 36 | -------------------------------------------------------------------------------- /env_v2/install_labelmaker_env.sh: -------------------------------------------------------------------------------- 1 | # exit when any command fails 2 | set -e 3 | 4 | # make sure submodules are updated 5 | git submodule update --init --recursive 6 | 7 | env_name=labelmaker 8 | dir_name="$(pwd)/$(dirname "$0")" 9 | 10 | echo $dir_name 11 | 12 | # decide which version of python cuda pytorch torchvision to use 13 | if [ -z "$1" ]; then 14 | target_python_version="3.10" 15 | else 16 | target_python_version=$1 17 | fi 18 | 19 | if [ -z "$2" ]; then 20 | target_cuda_version="unset" 21 | else 22 | target_cuda_version=$2 23 | fi 24 | 25 | if [ -z "$3" ]; then 26 | target_torch_version="unset" 27 | else 28 | target_torch_version=$3 29 | fi 30 | 31 | if [ -z "$4" ]; then 32 | target_gcc_version="unset" 33 | else 34 | target_gcc_version=$4 35 | fi 36 | 37 | # create env, install gcc cuda and openblas 38 | conda create --name $env_name --yes python=$target_python_version 39 | eval "$(conda shell.bash hook)" 40 | conda activate $env_name 41 | 42 | pip install packaging 43 | python $dir_name/versions.py --target_cuda_version $target_cuda_version --target_torch_version $target_torch_version --target_gcc_version $target_gcc_version 44 | 45 | source $dir_name/INSTALLED_VERSIONS.sh 46 | echo $INSTALLED_CUDA_VERSION 47 | echo $INSTALLED_CUDA_ABBREV 48 | echo $INSTALLED_PYTORCH_VERSION 49 | echo $INSTALLED_GCC_VERSION 50 | echo $INSTALLED_TORCHVISION_VERSION 51 | echo $INSTALLED_OPEN3D_URL 52 | 53 | conda install -y -c "conda-forge" gxx=$INSTALLED_GCC_VERSION 54 | conda install -y -c conda-forge sysroot_linux-64=2.17 55 | conda install -y -c "nvidia/label/cuda-$INSTALLED_CUDA_VERSION" cuda 56 | conda install -y -c anaconda openblas=0.3.20 57 | 58 | conda deactivate 59 | conda activate $env_name 60 | 61 | conda_home="$(conda info | grep "active env location : " | cut -d ":" -f2-)" 62 | conda_home="${conda_home#"${conda_home%%[![:space:]]*}"}" 63 | 64 | echo $conda_home 65 | 66 | which python 67 | which pip 68 | which nvcc 69 | 70 | # add cuda compiler to path 71 | export CUDA_HOST_COMPILER="$conda_home/bin/gcc" 72 | export CUDA_PATH="$conda_home" 73 | export CUDA_HOME=$CUDA_PATH 74 | export TORCH_CUDA_ARCH_LIST="6.0 6.1 6.2 7.0 7.2 7.5 8.0 8.6" 75 | export MAX_JOBS=6 76 | export AM_I_DOCKER=1 77 | export BUILD_WITH_CUDA=1 78 | export FORCE_CUDA=1 79 | 80 | # specify NLTK download location 81 | export NLTK_DATA="$dir_name/../3rdparty/nltk_data" 82 | mkdir -p $NLTK_DATA 83 | 84 | # TODO add git checkout of all repository to keep version consistent 85 | 86 | # install all dependency from pypi 87 | pip install -r "$dir_name/requirements.txt" 88 | 89 | # install open3d 90 | pip install $INSTALLED_OPEN3D_URL 91 | 92 | # install torch and torch-scater, they are cuda-version dependent 93 | # Pytorch 94 | pip install torch==$INSTALLED_PYTORCH_VERSION+$INSTALLED_CUDA_ABBREV torchvision==$INSTALLED_TORCHVISION_VERSION+$INSTALLED_CUDA_ABBREV --index-url https://download.pytorch.org/whl/$INSTALLED_CUDA_ABBREV 95 | # torch-scatter 96 | pip install torch-scatter --index-url "" -f "https://data.pyg.org/whl/torch-${INSTALLED_PYTORCH_VERSION}%2B${INSTALLED_CUDA_ABBREV}.html" 97 | pip install mmcv-full==1.6.2 -f https://download.openmmlab.com/mmcv/dist/${INSTALLED_CUDA_ABBREV}/torch${INSTALLED_PYTORCH_VERSION}/index.html 98 | 99 | # install mask3d 100 | # Step 1: install detectron 2 and minkowskiengine 101 | pip install "git+https://github.com/facebookresearch/detectron2.git@710e7795d0eeadf9def0e7ef957eea13532e34cf" 102 | cd $dir_name/../3rdparty/Mask3D/third_party 103 | rm -rf MinkowskiEngine 104 | git clone --recursive "https://github.com/NVIDIA/MinkowskiEngine" 105 | cd MinkowskiEngine 106 | git checkout 02fc608bea4c0549b0a7b00ca1bf15dee4a0b228 107 | python setup.py install --force_cuda --blas=openblas 108 | # Step 2: install scannet segmentor 109 | cd $dir_name/../3rdparty/Mask3D/third_party 110 | rm -rf ScanNet 111 | git clone https://github.com/ScanNet/ScanNet.git 112 | cd ScanNet/Segmentator 113 | git checkout 3e5726500896748521a6ceb81271b0f5b2c0e7d2 114 | make 115 | ## Step 3: pointnet2 116 | cd $dir_name/../3rdparty/Mask3D/third_party/pointnet2 117 | python setup.py install 118 | ## Step 4: install mask3d package 119 | cd $dir_name/../3rdparty/Mask3D 120 | pip install . 121 | pip install --no-deps --force-reinstall --upgrade omegaconf==2.2.0 hydra-core==1.0.5 122 | 123 | # install omnidata + hha + cmx 124 | # Step 1: create folder and install omnidata # might be deprecated as weight will be stored at other path 125 | mkdir -p $dir_name/../3rdparty/omnidata/omnidata_tools/torch/pretrained_models/ 126 | # Step 2: install HHA 127 | cd $dir_name/../3rdparty/Depth2HHA-python 128 | pip install . 129 | # Step 3: install cmx 130 | cd $dir_name/../3rdparty/mmsegmentation 131 | pip install -v -e . 132 | # Step 4: create an empty txt for cmx eval configuration 133 | cd $dir_name/../3rdparty/RGBX_Semantic_Segmentation 134 | touch empty.txt 135 | # Step 5: replace collectioin.iterable into collection.abc.iterable 136 | sed -i 's/collections.Iterable/collections.abc.Iterable/g' $dir_name/../3rdparty/RGBX_Semantic_Segmentation/utils/transforms.py 137 | 138 | # install grounded sam 139 | pip install $dir_name/../3rdparty/recognize-anything/ 140 | pip install $dir_name/../3rdparty/Grounded-Segment-Anything/segment_anything 141 | pip install $dir_name/../3rdparty/Grounded-Segment-Anything/GroundingDINO 142 | 143 | # install ovseg, ovseg customize clip, so reinstall from this after grounded sam 144 | cd $dir_name/../3rdparty/ov-seg/third_party/CLIP 145 | python -m pip install -Ue . 146 | python -m nltk.downloader -d $NLTK_DATA wordnet 147 | 148 | # install internimage 149 | # # avoid an error when no cuda runtime available 150 | sed -i 's/torch.cuda.is_available()/True/g' $dir_name/../3rdparty/InternImage/segmentation/ops_dcnv3/setup.py 151 | cd $dir_name/../3rdparty/InternImage/segmentation/ops_dcnv3 152 | sh ./make.sh 153 | 154 | # install labelmaker 155 | pip install -e $dir_name/../. 156 | -------------------------------------------------------------------------------- /env_v2/install_sdfstudio_env.sh: -------------------------------------------------------------------------------- 1 | # exit when any command fails 2 | set -e 3 | 4 | # make sure submodules are updated 5 | git submodule update --init --recursive 6 | 7 | env_name=sdfstudio 8 | dir_name="$(pwd)/$(dirname "$0")" 9 | 10 | echo $dir_name 11 | 12 | # decide which version of python cuda pytorch torchvision to use 13 | if [ -z "$1" ]; then 14 | target_python_version="3.10" 15 | else 16 | target_python_version=$1 17 | fi 18 | 19 | if [ -z "$2" ]; then 20 | target_cuda_version="unset" 21 | else 22 | target_cuda_version=$2 23 | fi 24 | 25 | # create env, install gcc cuda and openblas 26 | conda create --name $env_name --yes python=$target_python_version 27 | eval "$(conda shell.bash hook)" 28 | conda activate $env_name 29 | 30 | pip install packaging 31 | python $dir_name/versions.py --target_cuda_version $target_cuda_version --target_torch_version 1.12.1 --target_gcc_version 9.5.0 32 | 33 | source $dir_name/INSTALLED_VERSIONS.sh 34 | echo $INSTALLED_CUDA_VERSION 35 | echo $INSTALLED_CUDA_ABBREV 36 | echo $INSTALLED_PYTORCH_VERSION 37 | echo $INSTALLED_GCC_VERSION 38 | echo $INSTALLED_TORCHVISION_VERSION 39 | echo $INSTALLED_OPEN3D_URL 40 | 41 | conda install -y -c "conda-forge" gxx=$INSTALLED_GCC_VERSION 42 | conda install -y -c conda-forge sysroot_linux-64=2.17 43 | conda install -y -c "nvidia/label/cuda-$INSTALLED_CUDA_VERSION" cuda 44 | conda install -y -c anaconda openblas=0.3.20 45 | 46 | conda deactivate 47 | conda activate $env_name 48 | 49 | conda_home="$(conda info | grep "active env location : " | cut -d ":" -f2-)" 50 | conda_home="${conda_home#"${conda_home%%[![:space:]]*}"}" 51 | 52 | echo $conda_home 53 | 54 | which python 55 | which pip 56 | which nvcc 57 | 58 | # add cuda compiler to path 59 | export CUDA_HOST_COMPILER="$conda_home/bin/gcc" 60 | export CUDA_PATH="$conda_home" 61 | export CUDA_HOME=$CUDA_PATH 62 | export LD_LIBRARY_PATH=$conda_home/lib:$LD_LIBRARY_PATH 63 | export LIBRARY_PATH="$conda_home/lib/stubs:$LIBRARY_PATH" 64 | export TCNN_CUDA_ARCHITECTURES=75 65 | export AM_I_DOCKER=1 66 | export BUILD_WITH_CUDA=1 67 | export FORCE_CUDA=1 68 | 69 | # install open3d before sdfstudio 70 | pip install $INSTALLED_OPEN3D_URL 71 | 72 | # install other packages 73 | pip install gin-config pandas 74 | 75 | # remove open3d dependency 76 | sed -i 's/"open3d>=0.16.0"/#"open3d>=0.16.0"/g' $dir_name/../3rdparty/sdfstudio/pyproject.toml 77 | 78 | # install sdfstudio 79 | pip install $dir_name/../3rdparty/sdfstudio 80 | # ns-install-cli 81 | 82 | # install labelmaker also 83 | pip install -e $dir_name/.. 84 | 85 | pip install torch==$INSTALLED_PYTORCH_VERSION+$INSTALLED_CUDA_ABBREV torchvision==$INSTALLED_TORCHVISION_VERSION+$INSTALLED_CUDA_ABBREV --index-url https://download.pytorch.org/whl/$INSTALLED_CUDA_ABBREV 86 | 87 | # install tcnn 88 | conda install -y -c anaconda git 89 | pip install "git+https://github.com/NVlabs/tiny-cuda-nn/#subdirectory=bindings/torch" 90 | -------------------------------------------------------------------------------- /env_v2/requirements.txt: -------------------------------------------------------------------------------- 1 | yapf 2 | tqdm 3 | ninja 4 | gin-config 5 | ipykernel 6 | pytest 7 | 8 | gdown # for downloading checkpoints 9 | 10 | # ARKitScenes 11 | pandas 12 | scipy # for pose interpolation 13 | 14 | # mask3d 15 | albumentations 16 | loguru 17 | 18 | # ovseg 19 | nltk 20 | cython 21 | shapely 22 | timm 23 | h5py 24 | fire 25 | opencv-python 26 | pillow==9.5.0 27 | wandb 28 | 29 | # omnidata + hha + cmx 30 | pytorch-lightning 31 | joblib 32 | easydict 33 | 34 | # grounded sam 35 | fairscale 36 | scikit-image 37 | 38 | # sdfstudio 39 | pyquaternion 40 | 41 | # internimage 42 | mmdet==2.26.0 43 | -------------------------------------------------------------------------------- /env_v2/test_labelmaker_env.sh: -------------------------------------------------------------------------------- 1 | # exit when any command fails 2 | set -e 3 | 4 | env_name=labelmaker 5 | dir_name="$(pwd)/$(dirname "$0")" 6 | 7 | echo $dir_name 8 | 9 | eval "$(conda shell.bash hook)" 10 | conda activate $env_name 11 | 12 | conda_home="$(conda info | grep "active env location : " | cut -d ":" -f2-)" 13 | conda_home="${conda_home#"${conda_home%%[![:space:]]*}"}" 14 | 15 | echo $conda_home 16 | 17 | which python 18 | which pip 19 | which nvcc 20 | 21 | # add cuda compiler to path 22 | export CUDA_HOST_COMPILER="$conda_home/bin/gcc" 23 | export CUDA_PATH="$conda_home" 24 | export CUDA_HOME=$CUDA_PATH 25 | export TORCH_CUDA_ARCH_LIST="6.0 6.1 6.2 7.0 7.2 7.5 8.0 8.6" 26 | export MAX_JOBS=6 27 | 28 | # specify NLTK download location 29 | export NLTK_DATA="$dir_name/../3rdparty/nltk_data" 30 | 31 | # testing 32 | rm -rf $dir_name/../testing/test_scan/intermediate 33 | cd $dir_name/../testing/test_models 34 | pytest test_cmx_00_omnidata.py 35 | pytest test_cmx_01_hha.py 36 | pytest test_cmx_02_cmx.py 37 | pytest test_grounded_sam.py 38 | pytest test_internimage.py 39 | pytest test_mask3d.py 40 | pytest test_ovseg.py 41 | pytest test_omnidata_normal.py 42 | pytest test_consensus.py 43 | rm -rf $dir_name/../testing/test_scan/intermediate 44 | -------------------------------------------------------------------------------- /env_v2/versions.py: -------------------------------------------------------------------------------- 1 | # This code gives a valid set of cuda, pytorch, torchvision and gcc version 2 | # This program takes the desired cuda version, desired pytorch version, current nvidia driver cuda version as input 3 | import argparse 4 | import os 5 | import re 6 | import sys 7 | 8 | from packaging.version import parse 9 | 10 | CUDA_VERSIONS = ["11.3", "11.5", "11.6", "11.7", "11.8", "12.1"] 11 | PYTORCH_VERSIONS = [ 12 | "1.10.0", "1.10.1", "1.10.2", "1.11.0", "1.12.0", "1.12.1", "1.13.0", 13 | "1.13.1", "2.0.0", "2.0.1", "2.1.0" 14 | ] 15 | CUDA_PYTORCH_COMPATIBILITY = { 16 | "11.3": [ 17 | "1.10.0", # need python 3.9 18 | "1.10.1", # does not have mmcv-full 19 | "1.10.2", # does not have mmcv-full 20 | "1.11.0", 21 | "1.12.0", 22 | "1.12.1", # does not have mmcv-full 23 | ], 24 | "11.5": ["1.11.0"], 25 | "11.6": [ 26 | "1.12.0", # does not have mmcv-full 27 | "1.12.1", # does not have mmcv-full 28 | "1.13.0", # mmcv-full no 1.6.2 29 | "1.13.1", # does not have mmcv-full 30 | ], 31 | "11.7": [ 32 | "1.13.0", # does not have mmcv-full 33 | "1.13.1", # does not have mmcv-full 34 | "2.0.0", # does not have mmcv-full 35 | "2.0.1", # does not have mmcv-full 36 | ], 37 | "11.8": [ 38 | "2.0.0", 39 | "2.0.1", 40 | "2.1.0", 41 | ], 42 | "12.1": ["2.1.0"], 43 | } 44 | PTTORCH_TORCHVISION_CORRESPONDENCE = { 45 | "2.1.0": "0.16.0", 46 | "2.0.1": "0.15.2", 47 | "2.0.0": "0.15.0", 48 | "1.13.1": "0.14.1", 49 | "1.13.0": "0.14.0", 50 | "1.12.1": "0.13.1", 51 | "1.12.0": "0.13.0", 52 | "1.11.0": "0.12.0", 53 | "1.10.2": "0.11.3", 54 | "1.10.1": "0.11.2", 55 | "1.10.0": "0.11.0", 56 | } 57 | CUDA_MAX_GCC_VERSION = { 58 | "11.3": "10.4.0", 59 | "11.5": "11.4.0", 60 | "11.6": "11.4.0", 61 | "11.7": "11.4.0", 62 | "11.8": "11.4.0", 63 | "12.1": "12.2.0", 64 | } 65 | CUDA_MIN_GCC_VERSION = "8.5.0" 66 | CONDA_AVAIL_CUDA_MAPPING = { # use the higher version 67 | "11.3": "11.3.1", 68 | "11.5": "11.5.1", 69 | "11.6": "11.6.2", 70 | "11.7": "11.7.1", 71 | "11.8": "11.8.0", 72 | "12.1": "12.1.1", 73 | } 74 | CONDA_AVAIL_GCC_VERSION = [ 75 | "12.2.0", "12.1.0", "11.4.0", "11.3.0", "11.2.0", "11.1.0", "10.4.0", 76 | "10.3.0", "9.5.0", "9.4.0", "8.5.0" 77 | ] 78 | CONDA_AVAIL_OPENBLAS_VERSION = [ 79 | "0.3.21", "0.3.20", "0.3.18", "0.3.17", "0.3.13", "0.3.10", "0.3.6", 80 | "0.3.3", "0.3.2", "0.2.20" 81 | ] 82 | 83 | OPEN3D_URLS = { 84 | "3.6": 85 | "https://github.com/cvg/open3d-manylinux2014/releases/download/0.17.0/open3d_cpu-0.17.0-cp36-cp36m-manylinux_2_17_x86_64.whl", 86 | "3.7": 87 | "https://github.com/cvg/open3d-manylinux2014/releases/download/0.17.0/open3d_cpu-0.17.0-cp37-cp37m-manylinux_2_17_x86_64.whl", 88 | "3.8": 89 | "https://github.com/cvg/open3d-manylinux2014/releases/download/0.17.0/open3d_cpu-0.17.0-cp38-cp38-manylinux_2_17_x86_64.whl", 90 | "3.9": 91 | "https://github.com/cvg/open3d-manylinux2014/releases/download/0.17.0/open3d_cpu-0.17.0-cp39-cp39-manylinux_2_17_x86_64.whl", 92 | "3.10": 93 | "https://github.com/cvg/open3d-manylinux2014/releases/download/0.17.0/open3d_cpu-0.17.0-cp310-cp310-manylinux_2_17_x86_64.whl", 94 | } 95 | 96 | if __name__ == "__main__": 97 | 98 | try: 99 | output_stream = os.popen('nvidia-smi | grep "CUDA Version:"') 100 | driver_cuda_version = parse( 101 | re.search(r"CUDA Version:( )*[0-9]+\.[0-9]", 102 | output_stream.read()).group().split(':')[-1].strip()) 103 | except: 104 | driver_cuda_version = None 105 | 106 | print(f"Found nvidia driver's cuda version: {driver_cuda_version} .") 107 | 108 | parser = argparse.ArgumentParser() 109 | parser.add_argument("--target_cuda_version", type=str) 110 | parser.add_argument("--target_torch_version", type=str) 111 | parser.add_argument("--target_gcc_version", type=str) 112 | args = parser.parse_args() 113 | 114 | print(args) 115 | 116 | # check CUDA 117 | target_cuda_version: str = None 118 | if args.target_cuda_version != 'unset': 119 | try: 120 | parse(args.target_cuda_version) 121 | except: 122 | raise ValueError("The cuda version should be in format of x.x !") 123 | 124 | assert args.target_cuda_version in CUDA_VERSIONS, f"The specified cuda version {args.target_cuda_version} is not supported, please use CUDA: {', '.join(CUDA_VERSIONS)}" 125 | 126 | target_cuda_version = args.target_cuda_version 127 | 128 | else: 129 | if driver_cuda_version is None: 130 | raise ValueError( 131 | "No CUDA driver detected on your machine, and no target cuda toolkit specified!" 132 | ) 133 | 134 | for ver in CUDA_VERSIONS[::-1]: 135 | if parse(ver) <= driver_cuda_version: 136 | print(f"CUDA version not specified, using highes possible cuda: {ver}") 137 | target_cuda_version = ver 138 | break 139 | 140 | if target_cuda_version is None: 141 | raise NotImplementedError( 142 | f"The cuda version ({driver_cuda_version}) of this machine is too old!" 143 | ) 144 | 145 | # check pytorch 146 | target_torch_version: str = None 147 | if args.target_torch_version != 'unset': 148 | try: 149 | parse(args.target_torch_version) 150 | except: 151 | raise ValueError("The pytorch version should be in format of x.x !") 152 | 153 | assert args.target_torch_version in PYTORCH_VERSIONS, f"The specified torch version {args.target_torch_version} is not supported, please use PyTorch: {', '.join(PYTORCH_VERSIONS)}" 154 | 155 | assert args.target_torch_version in CUDA_PYTORCH_COMPATIBILITY[ 156 | target_cuda_version], f"The specified torch version {args.target_torch_version} is not supported by the selected version of cuda {target_cuda_version}, please use PyTorch: {', '.join(CUDA_PYTORCH_COMPATIBILITY[target_cuda_version])}" 157 | 158 | target_torch_version = args.target_torch_version 159 | 160 | else: 161 | for ver in PYTORCH_VERSIONS[::-1]: 162 | if ver in CUDA_PYTORCH_COMPATIBILITY[target_cuda_version]: 163 | print(f"PyTorch version not specified, using highes possible: {ver}") 164 | target_torch_version = ver 165 | break 166 | 167 | # check gcc 168 | target_gcc_version: str = None 169 | if args.target_gcc_version != 'unset': 170 | try: 171 | parse(args.target_gcc_version) 172 | except: 173 | raise ValueError("The GCC version should be in format of x.x !") 174 | 175 | assert parse(args.target_gcc_version) >= parse( 176 | CUDA_MIN_GCC_VERSION 177 | ), f"The target GCC compiler version {args.target_gcc_version} should be higher than {CUDA_MIN_GCC_VERSION}" 178 | 179 | assert parse(args.target_gcc_version) <= parse( 180 | CUDA_MAX_GCC_VERSION[target_cuda_version] 181 | ), f"The target GCC compiler version {args.target_gcc_version} should be lower than {CUDA_MAX_GCC_VERSION[target_cuda_version]}" 182 | 183 | assert args.target_gcc_version in CONDA_AVAIL_GCC_VERSION, f"The target GCC compiler version {args.target_gcc_version} should be one of {CONDA_AVAIL_GCC_VERSION}" 184 | 185 | target_gcc_version = args.target_gcc_version 186 | 187 | else: 188 | # use the highest possible gcc compiler 189 | target_gcc_version = CUDA_MAX_GCC_VERSION[target_cuda_version] 190 | 191 | target_torchvision_version = PTTORCH_TORCHVISION_CORRESPONDENCE[ 192 | target_torch_version] 193 | 194 | # detect python version 195 | python_version = str(sys.version_info.major) + '.' + str( 196 | sys.version_info.minor) 197 | target_open3d_url = OPEN3D_URLS[python_version] 198 | 199 | with open(os.path.join(os.path.dirname(__file__), 'INSTALLED_VERSIONS.sh'), 200 | 'w') as f: 201 | f.write( 202 | f'export INSTALLED_CUDA_VERSION={CONDA_AVAIL_CUDA_MAPPING[target_cuda_version]}\n' 203 | ) 204 | f.write( 205 | f'export INSTALLED_CUDA_ABBREV={"cu" + "".join(target_cuda_version.split("."))}\n' 206 | ) 207 | f.write(f'export INSTALLED_PYTORCH_VERSION={target_torch_version}\n') 208 | f.write(f'export INSTALLED_GCC_VERSION={target_gcc_version}\n') 209 | f.write( 210 | f'export INSTALLED_TORCHVISION_VERSION={target_torchvision_version}\n') 211 | f.write(f'export INSTALLED_OPEN3D_URL={target_open3d_url}\n') 212 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: scannetter 2 | channels: 3 | - conda-forge 4 | - defaults 5 | dependencies: 6 | - _libgcc_mutex=0.1=conda_forge 7 | - _openmp_mutex=4.5=2_gnu 8 | - appdirs=1.4.4=pyh9f0ad1d_0 9 | - brotlipy=0.7.0=py38h0a891b7_1005 10 | - bzip2=1.0.8=h7f98852_4 11 | - ca-certificates=2022.12.7=ha878542_0 12 | - certifi=2022.12.7=pyhd8ed1ab_0 13 | - cffi=1.15.1=py38h4a40e3a_3 14 | - charset-normalizer=2.1.1=pyhd8ed1ab_0 15 | - colorama=0.4.6=pyhd8ed1ab_0 16 | - cryptography=39.0.1=py38h3d167d9_0 17 | - freetype=2.12.1=hca18f0e_1 18 | - idna=3.4=pyhd8ed1ab_0 19 | - jpeg=9e=h166bdaf_2 20 | - lcms2=2.14=hfd0df8a_1 21 | - ld_impl_linux-64=2.40=h41732ed_0 22 | - lerc=4.0.0=h27087fc_0 23 | - libblas=3.9.0=16_linux64_openblas 24 | - libcblas=3.9.0=16_linux64_openblas 25 | - libdeflate=1.17=h0b41bf4_0 26 | - libffi=3.4.2=h7f98852_5 27 | - libgcc-ng=12.2.0=h65d4601_19 28 | - libgfortran-ng=12.2.0=h69a702a_19 29 | - libgfortran5=12.2.0=h337968e_19 30 | - libgomp=12.2.0=h65d4601_19 31 | - liblapack=3.9.0=16_linux64_openblas 32 | - libnsl=2.0.0=h7f98852_0 33 | - libopenblas=0.3.21=pthreads_h78a6416_3 34 | - libpng=1.6.39=h753d276_0 35 | - libsqlite=3.40.0=h753d276_0 36 | - libstdcxx-ng=12.2.0=h46fd767_19 37 | - libtiff=4.5.0=h6adf6a1_2 38 | - libuuid=2.32.1=h7f98852_1000 39 | - libwebp-base=1.2.4=h166bdaf_0 40 | - libxcb=1.13=h7f98852_1004 41 | - libzlib=1.2.13=h166bdaf_4 42 | - ncurses=6.3=h27087fc_1 43 | - numpy=1.24.2=py38h10c12cc_0 44 | - openjpeg=2.5.0=hfec8fc6_2 45 | - openssl=3.0.8=h0b41bf4_0 46 | - packaging=23.0=pyhd8ed1ab_0 47 | - pillow=9.4.0=py38hde6dc18_1 48 | - pip=23.0=pyhd8ed1ab_0 49 | - pooch=1.6.0=pyhd8ed1ab_0 50 | - pthread-stubs=0.4=h36c2ea0_1001 51 | - pycparser=2.21=pyhd8ed1ab_0 52 | - pyopenssl=23.0.0=pyhd8ed1ab_0 53 | - pyquaternion=0.9.9=pyhd8ed1ab_1 54 | - pysocks=1.7.1=pyha2e5f31_6 55 | - python=3.8.16=he550d4f_1_cpython 56 | - python_abi=3.8=3_cp38 57 | - readline=8.1.2=h0f457ee_0 58 | - requests=2.28.2=pyhd8ed1ab_0 59 | - setuptools=67.1.0=pyhd8ed1ab_0 60 | - tk=8.6.12=h27826a3_0 61 | - tqdm=4.64.1=pyhd8ed1ab_0 62 | - urllib3=1.26.14=pyhd8ed1ab_0 63 | - wheel=0.38.4=pyhd8ed1ab_0 64 | - xorg-libxau=1.0.9=h7f98852_0 65 | - xorg-libxdmcp=1.1.3=h7f98852_0 66 | - xz=5.2.6=h166bdaf_0 67 | - zstd=1.5.2=h3eb15da_6 68 | - pip: 69 | - addict==2.4.0 70 | - aiohttp==3.8.4 71 | - aiosignal==1.3.1 72 | - anyio==3.6.2 73 | - argon2-cffi==21.3.0 74 | - argon2-cffi-bindings==21.2.0 75 | - arrow==1.2.3 76 | - asttokens==2.2.1 77 | - async-timeout==4.0.2 78 | - attrs==22.2.0 79 | - backcall==0.2.0 80 | - beautifulsoup4==4.11.2 81 | - bleach==6.0.0 82 | - click==8.1.3 83 | - coloredlogs==15.0.1 84 | - comm==0.1.2 85 | - configargparse==1.5.3 86 | - contourpy==1.0.7 87 | - cycler==0.11.0 88 | - dash==2.8.1 89 | - dash-core-components==2.0.0 90 | - dash-html-components==2.0.0 91 | - dash-table==5.0.0 92 | - debugpy==1.6.6 93 | - decorator==5.1.1 94 | - defusedxml==0.7.1 95 | - docker-pycreds==0.4.0 96 | - executing==1.2.0 97 | - fastjsonschema==2.16.3 98 | - filelock==3.9.0 99 | - flask==2.2.3 100 | - fonttools==4.39.0 101 | - fqdn==1.5.1 102 | - frozenlist==1.3.3 103 | - fsspec==2023.3.0 104 | - gdown==4.6.4 105 | - gitdb==4.0.10 106 | - gitpython==3.1.31 107 | - h5py==3.8.0 108 | - humanfriendly==10.0 109 | - imageio==2.26.0 110 | - importlib-metadata==6.0.0 111 | - importlib-resources==5.12.0 112 | - ipykernel==6.21.3 113 | - ipython==8.11.0 114 | - ipython-genutils==0.2.0 115 | - ipywidgets==8.0.4 116 | - isoduration==20.11.0 117 | - itsdangerous==2.1.2 118 | - jedi==0.18.2 119 | - jinja2==3.1.2 120 | - joblib==1.2.0 121 | - jsonpointer==2.3 122 | - jsonschema==4.17.3 123 | - jupyter==1.0.0 124 | - jupyter-client==8.0.3 125 | - jupyter-console==6.6.3 126 | - jupyter-core==5.2.0 127 | - jupyter-events==0.6.3 128 | - jupyter-server==2.4.0 129 | - jupyter-server-terminals==0.4.4 130 | - jupyterlab-pygments==0.2.2 131 | - jupyterlab-widgets==3.0.5 132 | - kiwisolver==1.4.4 133 | - kornia==0.6.10 134 | - lazy-loader==0.1 135 | - lightning-utilities==0.8.0 136 | - markupsafe==2.1.2 137 | - matplotlib==3.7.1 138 | - matplotlib-inline==0.1.6 139 | - mistune==2.0.5 140 | - multidict==6.0.4 141 | - nbclassic==0.5.3 142 | - nbclient==0.7.2 143 | - nbconvert==7.2.9 144 | - nbformat==5.5.0 145 | - nest-asyncio==1.5.6 146 | - networkx==3.0 147 | - ninja==1.10.2.3 148 | - notebook==6.5.3 149 | - notebook-shim==0.2.2 150 | - nvidia-cublas-cu11==11.10.3.66 151 | - nvidia-cuda-nvrtc-cu11==11.7.99 152 | - nvidia-cuda-runtime-cu11==11.7.99 153 | - nvidia-cudnn-cu11==8.5.0.96 154 | - open3d==0.16.0 155 | - opencv-python==4.7.0.72 156 | - pandas==1.5.3 157 | - pandocfilters==1.5.0 158 | - parso==0.8.3 159 | - pathtools==0.1.2 160 | - pexpect==4.8.0 161 | - pickleshare==0.7.5 162 | - pkgutil-resolve-name==1.3.10 163 | - platformdirs==3.1.0 164 | - plotly==5.13.1 165 | - prometheus-client==0.16.0 166 | - prompt-toolkit==3.0.38 167 | - protobuf==4.22.1 168 | - psutil==5.9.4 169 | - ptyprocess==0.7.0 170 | - pure-eval==0.2.2 171 | - pycolmap==0.3.0 172 | - pygments==2.14.0 173 | - pyparsing==3.0.9 174 | - pyrsistent==0.19.3 175 | - python-dateutil==2.8.2 176 | - python-json-logger==2.0.7 177 | - pytorch-lightning==1.9.4 178 | - pytz==2022.7.1 179 | - pywavelets==1.4.1 180 | - pyyaml==6.0 181 | - pyzmq==25.0.0 182 | - qtconsole==5.4.0 183 | - qtpy==2.3.0 184 | - rfc3339-validator==0.1.4 185 | - rfc3986-validator==0.1.1 186 | - scikit-image==0.20.0 187 | - scikit-learn==1.2.2 188 | - scipy==1.9.1 189 | - send2trash==1.8.0 190 | - sentry-sdk==1.16.0 191 | - setproctitle==1.3.2 192 | - six==1.16.0 193 | - smmap==5.0.0 194 | - sniffio==1.3.0 195 | - soupsieve==2.4 196 | - stack-data==0.6.2 197 | - tenacity==8.2.2 198 | - terminado==0.17.1 199 | - threadpoolctl==3.1.0 200 | - tifffile==2023.2.28 201 | - tinycss2==1.2.1 202 | - torch==1.13.1 203 | - torchmetrics==0.11.3 204 | - torchvision==0.14.1 205 | - tornado==6.2 206 | - traitlets==5.9.0 207 | - trimesh==3.11.2 208 | - typing-extensions==4.5.0 209 | - uri-template==1.2.0 210 | - wandb==0.13.11 211 | - wcwidth==0.2.6 212 | - webcolors==1.12 213 | - webencodings==0.5.1 214 | - websocket-client==1.5.1 215 | - werkzeug==2.2.3 216 | - widgetsnbextension==4.0.5 217 | - yarl==1.8.2 218 | - zipp==3.15.0 219 | prefix: /home/weders/anaconda3/envs/scannetter 220 | -------------------------------------------------------------------------------- /labelmaker/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cvg/LabelMaker/44ff92d42ae9b0579f016bb7bc5cd4bd09a613a3/labelmaker/__init__.py -------------------------------------------------------------------------------- /labelmaker/consensus.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | import logging 4 | from tqdm import tqdm 5 | import cv2 6 | import numpy as np 7 | import shutil 8 | from joblib import Parallel, delayed 9 | 10 | from labelmaker.label_mappings import LabelMatcher 11 | 12 | # clean up imports 13 | import gin 14 | from typing import Union 15 | from pathlib import Path 16 | 17 | logging.basicConfig(level="INFO") 18 | log = logging.getLogger('Segmentation Consensus') 19 | 20 | 21 | class PredictorVoting: 22 | 23 | def __init__(self, output_space='wn199-merged-v2'): 24 | #assert output_space == 'wn199' 25 | matcher_ade150 = LabelMatcher('ade20k', output_space) 26 | matcher_nyu40 = LabelMatcher('nyu40id', output_space) 27 | matcher_wn199 = LabelMatcher('wn199', output_space) 28 | matcher_scannet = LabelMatcher('id', output_space) 29 | self.output_space = output_space 30 | # build lookup tables for predictor voting 31 | # some class spaces vote for multiple options in the wordnet output space 32 | self.output_size = max(matcher_ade150.right_ids) + 1 33 | output_ids = np.arange(self.output_size) 34 | self.votes_from_ade150 = np.zeros((150, self.output_size), dtype=np.uint8) 35 | for ade150_id in range(150): 36 | multihot_matches = matcher_ade150.match( 37 | ade150_id * np.ones_like(output_ids), output_ids) 38 | multihot_matches[multihot_matches == -1] = 0 39 | multihot_matches[multihot_matches == -2] = 0 40 | self.votes_from_ade150[ade150_id] = multihot_matches 41 | 42 | self.votes_from_nyu40 = np.zeros((41, self.output_size), dtype=np.uint8) 43 | for nyu40_id in range(1, 41): 44 | multihot_matches = matcher_nyu40.match( 45 | nyu40_id * np.ones_like(output_ids), output_ids) 46 | multihot_matches[multihot_matches == -1] = 0 47 | multihot_matches[multihot_matches == -2] = 0 48 | self.votes_from_nyu40[nyu40_id] = multihot_matches 49 | 50 | self.votes_from_wn199 = np.zeros((200, self.output_size), dtype=np.uint8) 51 | for wn199_id in range(1, 189): 52 | multihot_matches = matcher_wn199.match( 53 | wn199_id * np.ones_like(output_ids), output_ids) 54 | multihot_matches[multihot_matches == -1] = 0 55 | multihot_matches[multihot_matches == -2] = 0 56 | self.votes_from_wn199[wn199_id] = multihot_matches 57 | 58 | scannet_dimensionality = max(matcher_scannet.left_ids) + 1 59 | self.votes_from_scannet = np.zeros( 60 | (scannet_dimensionality, self.output_size), dtype=np.uint8) 61 | for scannet_id in range(scannet_dimensionality): 62 | multihot_matches = matcher_scannet.match( 63 | scannet_id * np.ones_like(output_ids), output_ids) 64 | multihot_matches[multihot_matches == -1] = 0 65 | multihot_matches[multihot_matches == -2] = 0 66 | self.votes_from_scannet[scannet_id] = multihot_matches 67 | 68 | def voting(self, 69 | ade20k_predictions=[], 70 | nyu40_predictions=[], 71 | wn199_predictions=[], 72 | scannet_predictions=[]): 73 | """Voting scheme for combining multiple segmentation predictors. 74 | 75 | Args: 76 | ade20k_predictors (list): list of ade20k predictions 77 | nyu40_predictors (list): list of nyu40 predictions 78 | wn199_predictors (list): list of wn199 predictions 79 | scannet_predictions (list): list of scannet predictions 80 | 81 | Returns: 82 | np.ndarray: consensus prediction in the output space 83 | """ 84 | shape = None 85 | if len(ade20k_predictions) > 0: 86 | shape = ade20k_predictions[0].shape[:2] 87 | elif len(nyu40_predictions) > 0: 88 | shape = nyu40_predictions[0].shape[:2] 89 | elif len(wn199_predictions) > 0: 90 | shape = wn199_predictions[0].shape[:2] 91 | elif len(scannet_predictions) > 0: 92 | shape = scannet_predictions[0].shape[:2] 93 | 94 | # build consensus prediction 95 | # first, each prediction votes for classes in the output space 96 | votes = np.zeros((shape[0], shape[1], self.output_size), dtype=np.uint8) 97 | for pred in wn199_predictions: 98 | vote = self.votes_from_wn199[pred] 99 | vote[pred == -1] = 0 100 | votes += vote 101 | for pred in ade20k_predictions: 102 | votes += self.votes_from_ade150[pred] 103 | for pred in nyu40_predictions: 104 | votes += self.votes_from_nyu40[pred] 105 | 106 | for pred in scannet_predictions: 107 | votes += self.votes_from_scannet[pred] 108 | 109 | pred_vote = np.argmax(votes, axis=2) 110 | n_votes = votes[np.arange(shape[0])[:, None], 111 | np.arange(shape[1]), pred_vote] 112 | #n_votes = np.amax(votes, axis=2) 113 | # # fastest check for ambiguous prediction: take the argmax in reverse order 114 | # alt_pred = (self.output_size - 1) - np.argmax(votes[:, :, ::-1], 115 | # axis=2) 116 | # pred_vote[pred_vote != alt_pred] = -1 117 | return n_votes, pred_vote 118 | 119 | 120 | VALID_LABEL_SPACES = ['ade20k', 'nyu40', 'scannet200', 'wordnet', 'scannet'] 121 | 122 | 123 | def consensus(k, folders, output_dir, min_votes): 124 | 125 | votebox = PredictorVoting(output_space='wn199-merged-v2') 126 | 127 | predictions = {label_space: [] for label_space in VALID_LABEL_SPACES} 128 | 129 | for folder in folders: 130 | assert folder.exists() and folder.is_dir() 131 | 132 | label_space = folder.name.split('_')[0] 133 | pred = cv2.imread(str(folder / f'{k}.png'), cv2.IMREAD_UNCHANGED) 134 | predictions[label_space].append(pred.copy()) 135 | 136 | n_votes, pred_vote = votebox.voting( 137 | ade20k_predictions=predictions['ade20k'], 138 | nyu40_predictions=predictions['nyu40'], 139 | wn199_predictions=predictions['wordnet'], 140 | scannet_predictions=predictions['scannet200'] 141 | ) # double even without flipping 142 | 143 | pred_vote[n_votes < min_votes] = 0 144 | pred_vote[pred_vote == -1] = 0 145 | 146 | cv2.imwrite(str(output_dir / f'{k}.png'), pred_vote) 147 | 148 | 149 | # this is needed for parallel execution 150 | def wrapper_consensus(k, input_folders_str, output_dir_str, min_votes): 151 | input_folders = [Path(s) for s in input_folders_str] 152 | output_dir = Path(output_dir_str) 153 | consensus(k, input_folders, output_dir, min_votes) 154 | return 1 155 | 156 | 157 | @gin.configurable 158 | def run(scene_dir: Union[str, Path], 159 | output_folder: Union[str, Path], 160 | n_jobs=-1, 161 | min_votes=2): 162 | 163 | scene_dir = Path(scene_dir) 164 | output_folder = Path(output_folder) 165 | 166 | assert scene_dir.exists() and scene_dir.is_dir() 167 | 168 | output_dir = scene_dir / output_folder 169 | # check if output directory exists 170 | shutil.rmtree(output_dir, ignore_errors=True) 171 | os.makedirs(str(output_dir), exist_ok=False) 172 | 173 | log.info('[consensus] loading model predictions') 174 | input_folders = [ 175 | scene_dir / 'intermediate' / folder 176 | for folder in os.listdir(scene_dir / 'intermediate') 177 | if folder.split('_')[0] in VALID_LABEL_SPACES 178 | ] 179 | 180 | # assert that all folders have the same number of files 181 | n_files = None 182 | for folder in input_folders: 183 | files = [ 184 | f for f in os.listdir(scene_dir / 'intermediate' / folder) 185 | if f.endswith('.png') 186 | ] 187 | if n_files is None: 188 | n_files = len(files) 189 | else: 190 | assert n_files == len( 191 | files 192 | ), f'Number of files in {folder} does not match {n_files} vs. {len(files)}' 193 | 194 | keys = sorted([s.stem for s in (scene_dir / 'color').iterdir()]) 195 | 196 | input_folders_str = [str(f) for f in input_folders] 197 | output_dir_str = str(output_dir) 198 | 199 | # Using Parallel to run the function in parallel 200 | results = Parallel(n_jobs=n_jobs)(delayed(wrapper_consensus)( 201 | k, input_folders_str, output_dir_str, min_votes) for k in tqdm(keys)) 202 | 203 | 204 | def arg_parser(): 205 | parser = argparse.ArgumentParser(description='Run consensus segmentation') 206 | parser.add_argument( 207 | '--workspace', 208 | type=str, 209 | required=True, 210 | help='Path to workspace directory. There should be a "color" folder.', 211 | ) 212 | parser.add_argument( 213 | '--output', 214 | type=str, 215 | default='intermediate/consensus', 216 | help= 217 | 'Name of output directory in the workspace directory intermediate. Has to follow the pattern $labelspace_$model_$version', 218 | ) 219 | parser.add_argument("--n_jobs", type=int, default=-1) 220 | parser.add_argument('--config', help='Name of config file') 221 | 222 | return parser.parse_args() 223 | 224 | 225 | if __name__ == '__main__': 226 | args = arg_parser() 227 | if args.config is not None: 228 | gin.parse_config_file(args.config) 229 | run(scene_dir=args.workspace, output_folder=args.output, n_jobs=args.n_jobs) 230 | -------------------------------------------------------------------------------- /labelmaker/lifting_3d/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cvg/LabelMaker/44ff92d42ae9b0579f016bb7bc5cd4bd09a613a3/labelmaker/lifting_3d/__init__.py -------------------------------------------------------------------------------- /labelmaker/lifting_3d/lifting.sh: -------------------------------------------------------------------------------- 1 | # exit when any command fails 2 | set -e 3 | 4 | dir_name="$(pwd)/$(dirname "$0")" 5 | repo_dir="$dir_name/../.." 6 | 7 | # activate environment 8 | env_name=sdfstudio 9 | eval "$(conda shell.bash hook)" 10 | conda activate $env_name 11 | 12 | conda_home="$(conda info | grep "active env location : " | cut -d ":" -f2-)" 13 | conda_home="${conda_home#"${conda_home%%[![:space:]]*}"}" 14 | 15 | echo $conda_home 16 | 17 | which python 18 | which pip 19 | which nvcc 20 | 21 | # add cuda compiler to path 22 | export CUDA_HOST_COMPILER="$conda_home/bin/gcc" 23 | export CUDA_PATH="$conda_home" 24 | export CUDA_HOME=$CUDA_PATH 25 | export LD_LIBRARY_PATH=$conda_home/lib:$LD_LIBRARY_PATH 26 | export LIBRARY_PATH="$conda_home/lib/stubs:$LIBRARY_PATH" 27 | export TCNN_CUDA_ARCHITECTURES=75 28 | 29 | wandb online 30 | 31 | # get scene folder 32 | if [ -z "$1" ]; then 33 | echo "Workspace directory not specified!!!" 34 | exit 1 35 | else 36 | WORKSPACE=$1 37 | fi 38 | echo "Workspace is $WORKSPACE" 39 | 40 | export TCNN_CUDA_ARCHITECTURES=75 41 | 42 | # preprocessing 43 | python "$repo_dir"/labelmaker/lifting_3d/preprocessing.py \ 44 | --sampling 1 \ 45 | --workspace $WORKSPACE 46 | 47 | # # train 48 | method=neus-facto 49 | experiment_name=sdfstudio_train 50 | output_dir=${WORKSPACE}/intermediate/${experiment_name} 51 | preprocess_data_dir=${WORKSPACE}/intermediate/sdfstudio_preprocessing 52 | 53 | export WANDB_MODE=online 54 | wandb online 55 | 56 | # about 26G gpu memory, 1207.58s 57 | # currently semantic loss is switched of (semantic-loss-mult 0.0, include-semantics False)m no mono prior (normal, depth) is used (include-mono-prior False) 58 | ns-train ${method} \ 59 | --experiment-name $experiment_name \ 60 | --pipeline.model.sdf-field.use-grid-feature True \ 61 | --pipeline.model.sdf-field.hidden-dim 256 \ 62 | --pipeline.model.sdf-field.num-layers 2 \ 63 | --pipeline.model.sdf-field.num-layers-color 2 \ 64 | --pipeline.model.sdf-field.semantic-num-layers 4 \ 65 | --pipeline.model.sdf-field.semantic_layer_width 512 \ 66 | --pipeline.model.sdf-field.use-appearance-embedding False \ 67 | --pipeline.model.sdf-field.geometric-init True \ 68 | --pipeline.model.sdf-field.inside-outside True \ 69 | --pipeline.model.sdf-field.bias 0.8 \ 70 | --pipeline.model.sdf-field.beta-init 0.3 \ 71 | --pipeline.model.sensor-depth-l1-loss-mult 10.0 \ 72 | --pipeline.model.sensor-depth-sdf-loss-mult 6000.0 \ 73 | --pipeline.model.sensor-depth-freespace-loss-mult 10.0 \ 74 | --pipeline.model.sensor-depth-truncation 0.015 \ 75 | --pipeline.model.mono-normal-loss-mult 0.02 \ 76 | --pipeline.model.mono-depth-loss-mult 0.00 \ 77 | --pipeline.model.semantic-loss-mult 0.1 \ 78 | --pipeline.model.semantic-patch-loss-mult 0.00 \ 79 | --pipeline.model.semantic-patch-loss-min-step 1000 \ 80 | --pipeline.model.semantic-ignore-label 0 \ 81 | --trainer.steps-per-eval-image 1000 \ 82 | --trainer.steps-per-eval-all-images 100000 \ 83 | --trainer.steps-per-save 10000 \ 84 | --trainer.max-num-iterations 20001 \ 85 | --pipeline.datamanager.train-num-rays-per-batch 2048 \ 86 | --pipeline.model.eikonal-loss-mult 0.1 \ 87 | --pipeline.model.background-model none \ 88 | --output-dir ${WORKSPACE}/intermediate \ 89 | --vis wandb \ 90 | sdfstudio-data \ 91 | --data ${preprocess_data_dir} \ 92 | --include-sensor-depth True \ 93 | --include-semantics True \ 94 | --include-mono-prior True 95 | 96 | # the job below may OOM sometimes, so we wait such that all GPU memory is free 97 | # sleep 60 98 | 99 | # locate results 100 | results_dir=${output_dir}/$(ls $output_dir) 101 | train_id=$(ls $results_dir) 102 | 103 | config=$results_dir/$train_id/config.yml 104 | 105 | # extract mesh 106 | ns-extract-mesh \ 107 | --load-config $config \ 108 | --create-visibility-mask True \ 109 | --output-path $results_dir/$train_id/mesh_visible.ply \ 110 | --resolution 512 111 | # # sleep 60 112 | 113 | # render class labels 114 | render_dir=${WORKSPACE}/neus_lifted 115 | mkdir -p $render_dir 116 | ns-render --camera-path-filename $preprocess_data_dir/camera_path.json \ 117 | --traj filename \ 118 | --output-format images \ 119 | --rendered-output-names semantics \ 120 | --output-path $render_dir \ 121 | --load-config $config 122 | -------------------------------------------------------------------------------- /labelmaker/lifting_3d/lifting_points.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import logging 3 | import os 4 | from pathlib import Path 5 | from typing import Union 6 | 7 | import cv2 8 | import gin 9 | import numpy as np 10 | import open3d as o3d 11 | from PIL import Image 12 | from tqdm import tqdm 13 | 14 | from labelmaker.label_data import get_wordnet 15 | 16 | logging.basicConfig(level="INFO") 17 | log = logging.getLogger('3D Point Lifting') 18 | 19 | 20 | def project_pointcloud(points, pose, intrinsics): 21 | 22 | points_h = np.hstack((points, np.ones_like(points[:, 0:1]))) 23 | points_c = np.linalg.inv(pose) @ points_h.T 24 | points_c = points_c.T 25 | 26 | if intrinsics.shape[-1] == 3: 27 | intrinsics = np.hstack((intrinsics, np.zeros((3, 1)))) 28 | intrinsics = np.vstack((intrinsics, np.zeros((1, 4)))) 29 | intrinsics[-1, -1] = 1. 30 | 31 | points_p = intrinsics @ points_c.T 32 | points_p = points_p.T[:, :3] 33 | 34 | points_p[:, 0] /= (points_p[:, -1] + 1.e-6) 35 | points_p[:, 1] /= (points_p[:, -1] + 1.e-6) 36 | 37 | return points_p 38 | 39 | 40 | @gin.configurable 41 | def main( 42 | scene_dir: Union[str, Path], 43 | label_folder: Union[str, Path], 44 | output_file: Union[str, Path], 45 | output_mesh: Union[str, Path], 46 | maximum_label: int, 47 | ): 48 | scene_dir = Path(scene_dir) 49 | label_folder = Path(label_folder) 50 | output_file = Path(output_file) 51 | output_mesh = Path(output_mesh) 52 | 53 | # check if scene_dir exists 54 | assert scene_dir.exists() and scene_dir.is_dir() 55 | 56 | # define all paths 57 | input_color_dir = scene_dir / 'color' 58 | assert input_color_dir.exists() and input_color_dir.is_dir() 59 | 60 | input_depth_dir = scene_dir / 'depth' 61 | assert input_depth_dir.exists() and input_depth_dir.is_dir() 62 | 63 | input_intrinsic_dir = scene_dir / 'intrinsic' 64 | assert input_intrinsic_dir.exists() and input_intrinsic_dir.is_dir() 65 | 66 | input_pose_dir = scene_dir / 'pose' 67 | assert input_pose_dir.exists() and input_pose_dir.is_dir() 68 | 69 | input_label_dir = scene_dir / label_folder 70 | assert input_label_dir.exists() and input_label_dir.is_dir() 71 | 72 | input_mesh_path = scene_dir / 'mesh.ply' 73 | assert input_mesh_path.exists() and input_mesh_path.is_file() 74 | 75 | log.info('Processing {} using for labels {}'.format( 76 | str(scene_dir), 77 | str(input_label_dir), 78 | )) 79 | 80 | # load mesh and extract colors 81 | mesh = o3d.io.read_triangle_mesh(str(input_mesh_path)) 82 | vertices = np.asarray(mesh.vertices) 83 | 84 | # init label container 85 | labels_3d = np.zeros((vertices.shape[0], maximum_label + 1)) 86 | 87 | files = input_label_dir.glob('*.png') 88 | files = sorted(files, key=lambda x: int(x.stem.split('.')[0])) 89 | resize_image = False 90 | 91 | for idx, file in tqdm(enumerate(files), total=len(files)): 92 | 93 | frame_key = file.stem 94 | 95 | intrinsics = np.loadtxt(str(input_intrinsic_dir / f'{frame_key}.txt')) 96 | image = np.asarray(Image.open(str(input_color_dir / 97 | f'{frame_key}.jpg'))).astype(np.uint8) 98 | depth = np.asarray(Image.open(str( 99 | input_depth_dir / f'{frame_key}.png'))).astype(np.float32) / 1000. 100 | labels = np.asarray(Image.open(str(file))) 101 | 102 | max_label = np.max(labels) 103 | if max_label > labels_3d.shape[-1] - 1: 104 | raise ValueError( 105 | f'Label {max_label} is not in the label range of {labels_3d.shape[-1]}' 106 | ) 107 | 108 | if resize_image: 109 | h, w = depth.shape 110 | image = cv2.resize(image, (w, h)) 111 | labels = cv2.resize(labels, (w, h)) 112 | else: 113 | h, w, _ = image.shape 114 | depth = cv2.resize(depth, (w, h)) 115 | 116 | pose_file = input_pose_dir / f'{frame_key}.txt' 117 | pose = np.loadtxt(str(pose_file)) 118 | 119 | points_p = project_pointcloud(vertices, pose, intrinsics) 120 | 121 | xx = points_p[:, 0].astype(int) 122 | yy = points_p[:, 1].astype(int) 123 | zz = points_p[:, 2] 124 | 125 | valid_mask = (xx >= 0) & (yy >= 0) & (xx < w) & (yy < h) 126 | 127 | d = depth[yy[valid_mask], xx[valid_mask]] 128 | 129 | valid_mask[valid_mask] = (zz[valid_mask] > 0) & (np.abs(zz[valid_mask] - d) 130 | <= 0.1) 131 | 132 | labels_2d = labels[yy[valid_mask], xx[valid_mask]] 133 | labels_3d[valid_mask, labels_2d] += 1 134 | 135 | # extract labels 136 | labels_3d = np.argmax(labels_3d, axis=-1) 137 | 138 | # save output 139 | np.savetxt(str(scene_dir / output_file), labels_3d, fmt='%i') 140 | 141 | # save colored mesh 142 | color_map = np.zeros(shape=(maximum_label, 3), dtype=np.uint8) 143 | for item in get_wordnet(): 144 | color_map[item['id']] = item['color'] 145 | label_mesh_color = color_map[labels_3d] 146 | 147 | label_mesh = o3d.geometry.TriangleMesh() 148 | label_mesh.vertices = mesh.vertices 149 | label_mesh.triangles = mesh.triangles 150 | 151 | label_mesh.vertex_colors = o3d.utility.Vector3dVector( 152 | label_mesh_color.astype(float) / 255) 153 | 154 | o3d.io.write_triangle_mesh(str(scene_dir / output_mesh), label_mesh) 155 | 156 | 157 | def arg_parser(): 158 | parser = argparse.ArgumentParser( 159 | description= 160 | 'Project 3D points to 2D image plane and aggregate labels and save label txt' 161 | ) 162 | parser.add_argument( 163 | '--workspace', 164 | type=str, 165 | required=True, 166 | help= 167 | 'Path to workspace directory. There should be a "color" folder inside.', 168 | ) 169 | parser.add_argument( 170 | '--output', 171 | type=str, 172 | default='labels.txt', 173 | help='Name of files to save the labels', 174 | ) 175 | parser.add_argument( 176 | '--output_mesh', 177 | type=str, 178 | default='point_lifted_mesh.ply', 179 | help='Name of files to save the labels', 180 | ) 181 | parser.add_argument('--label_folder', default='intermediate/consensus') 182 | parser.add_argument( 183 | '--max_label', 184 | type=int, 185 | default=2000, 186 | help='Max label value', 187 | ) 188 | parser.add_argument('--config', help='Name of config file') 189 | return parser.parse_args() 190 | 191 | 192 | if __name__ == '__main__': 193 | args = arg_parser() 194 | if args.config is not None: 195 | gin.parse_config_file(args.config) 196 | main( 197 | scene_dir=args.workspace, 198 | label_folder=args.label_folder, 199 | output_file=args.output, 200 | output_mesh=args.output_mesh, 201 | maximum_label=args.max_label, 202 | ) 203 | -------------------------------------------------------------------------------- /labelmaker/mappings/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cvg/LabelMaker/44ff92d42ae9b0579f016bb7bc5cd4bd09a613a3/labelmaker/mappings/__init__.py -------------------------------------------------------------------------------- /labelmaker/scannet_200_labels.py: -------------------------------------------------------------------------------- 1 | ### ScanNet Benchmark constants ### 2 | VALID_CLASS_IDS_20 = (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33, 3 | 34, 36, 39) 4 | 5 | CLASS_LABELS_20 = ('wall', 'floor', 'cabinet', 'bed', 'chair', 'sofa', 'table', 6 | 'door', 'window', 'bookshelf', 'picture', 'counter', 'desk', 7 | 'curtain', 'refrigerator', 'shower curtain', 'toilet', 8 | 'sink', 'bathtub', 'otherfurniture') 9 | 10 | SCANNET_COLOR_MAP_20 = { 11 | 0: (0., 0., 0.), 12 | 1: (174., 199., 232.), 13 | 2: (152., 223., 138.), 14 | 3: (31., 119., 180.), 15 | 4: (255., 187., 120.), 16 | 5: (188., 189., 34.), 17 | 6: (140., 86., 75.), 18 | 7: (255., 152., 150.), 19 | 8: (214., 39., 40.), 20 | 9: (197., 176., 213.), 21 | 10: (148., 103., 189.), 22 | 11: (196., 156., 148.), 23 | 12: (23., 190., 207.), 24 | 14: (247., 182., 210.), 25 | 15: (66., 188., 102.), 26 | 16: (219., 219., 141.), 27 | 17: (140., 57., 197.), 28 | 18: (202., 185., 52.), 29 | 19: (51., 176., 203.), 30 | 20: (200., 54., 131.), 31 | 21: (92., 193., 61.), 32 | 22: (78., 71., 183.), 33 | 23: (172., 114., 82.), 34 | 24: (255., 127., 14.), 35 | 25: (91., 163., 138.), 36 | 26: (153., 98., 156.), 37 | 27: (140., 153., 101.), 38 | 28: (158., 218., 229.), 39 | 29: (100., 125., 154.), 40 | 30: (178., 127., 135.), 41 | 32: (146., 111., 194.), 42 | 33: (44., 160., 44.), 43 | 34: (112., 128., 144.), 44 | 35: (96., 207., 209.), 45 | 36: (227., 119., 194.), 46 | 37: (213., 92., 176.), 47 | 38: (94., 106., 211.), 48 | 39: (82., 84., 163.), 49 | 40: (100., 85., 144.), 50 | } 51 | 52 | ### ScanNet200 Benchmark constants ### 53 | VALID_CLASS_IDS_200 = ( 54 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 55 | 24, 26, 27, 28, 29, 31, 32, 33, 34, 35, 36, 38, 39, 40, 41, 42, 44, 45, 46, 56 | 47, 48, 49, 50, 51, 52, 54, 55, 56, 57, 58, 59, 62, 63, 64, 65, 66, 67, 68, 57 | 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 82, 84, 86, 87, 88, 89, 90, 58 | 93, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 110, 112, 59 | 115, 116, 118, 120, 121, 122, 125, 128, 130, 131, 132, 134, 136, 138, 139, 60 | 140, 141, 145, 148, 154, 155, 156, 157, 159, 161, 163, 165, 166, 168, 169, 61 | 170, 177, 180, 185, 188, 191, 193, 195, 202, 208, 213, 214, 221, 229, 230, 62 | 232, 233, 242, 250, 261, 264, 276, 283, 286, 300, 304, 312, 323, 325, 331, 63 | 342, 356, 370, 392, 395, 399, 408, 417, 488, 540, 562, 570, 572, 581, 609, 64 | 748, 776, 1156, 1163, 1164, 1165, 1166, 1167, 1168, 1169, 1170, 1171, 1172, 65 | 1173, 1174, 1175, 1176, 1178, 1179, 1180, 1181, 1182, 1183, 1184, 1185, 66 | 1186, 1187, 1188, 1189, 1190, 1191) 67 | 68 | CLASS_LABELS_200 = ( 69 | 'wall', 'chair', 'floor', 'table', 'door', 'couch', 'cabinet', 'shelf', 70 | 'desk', 'office chair', 'bed', 'pillow', 'sink', 'picture', 'window', 71 | 'toilet', 'bookshelf', 'monitor', 'curtain', 'book', 'armchair', 72 | 'coffee table', 'box', 'refrigerator', 'lamp', 'kitchen cabinet', 'towel', 73 | 'clothes', 'tv', 'nightstand', 'counter', 'dresser', 'stool', 'cushion', 74 | 'plant', 'ceiling', 'bathtub', 'end table', 'dining table', 'keyboard', 75 | 'bag', 'backpack', 'toilet paper', 'printer', 'tv stand', 'whiteboard', 76 | 'blanket', 'shower curtain', 'trash can', 'closet', 'stairs', 'microwave', 77 | 'stove', 'shoe', 'computer tower', 'bottle', 'bin', 'ottoman', 'bench', 78 | 'board', 'washing machine', 'mirror', 'copier', 'basket', 'sofa chair', 79 | 'file cabinet', 'fan', 'laptop', 'shower', 'paper', 'person', 80 | 'paper towel dispenser', 'oven', 'blinds', 'rack', 'plate', 'blackboard', 81 | 'piano', 'suitcase', 'rail', 'radiator', 'recycling bin', 'container', 82 | 'wardrobe', 'soap dispenser', 'telephone', 'bucket', 'clock', 'stand', 83 | 'light', 'laundry basket', 'pipe', 'clothes dryer', 'guitar', 84 | 'toilet paper holder', 'seat', 'speaker', 'column', 'bicycle', 'ladder', 85 | 'bathroom stall', 'shower wall', 'cup', 'jacket', 'storage bin', 86 | 'coffee maker', 'dishwasher', 'paper towel roll', 'machine', 'mat', 87 | 'windowsill', 'bar', 'toaster', 'bulletin board', 'ironing board', 88 | 'fireplace', 'soap dish', 'kitchen counter', 'doorframe', 89 | 'toilet paper dispenser', 'mini fridge', 'fire extinguisher', 'ball', 'hat', 90 | 'shower curtain rod', 'water cooler', 'paper cutter', 'tray', 'shower door', 91 | 'pillar', 'ledge', 'toaster oven', 'mouse', 'toilet seat cover dispenser', 92 | 'furniture', 'cart', 'storage container', 'scale', 'tissue box', 93 | 'light switch', 'crate', 'power outlet', 'decoration', 'sign', 'projector', 94 | 'closet door', 'vacuum cleaner', 'candle', 'plunger', 'stuffed animal', 95 | 'headphones', 'dish rack', 'broom', 'guitar case', 'range hood', 'dustpan', 96 | 'hair dryer', 'water bottle', 'handicap bar', 'purse', 'vent', 97 | 'shower floor', 'water pitcher', 'mailbox', 'bowl', 'paper bag', 98 | 'alarm clock', 'music stand', 'projector screen', 'divider', 99 | 'laundry detergent', 'bathroom counter', 'object', 'bathroom vanity', 100 | 'closet wall', 'laundry hamper', 'bathroom stall door', 'ceiling light', 101 | 'trash bin', 'dumbbell', 'stair rail', 'tube', 'bathroom cabinet', 102 | 'cd case', 'closet rod', 'coffee kettle', 'structure', 'shower head', 103 | 'keyboard piano', 'case of water bottles', 'coat rack', 'storage organizer', 104 | 'folded chair', 'fire alarm', 'power strip', 'calendar', 'poster', 105 | 'potted plant', 'luggage', 'mattress') 106 | 107 | SCANNET_COLOR_MAP_200 = { 108 | 0: (0., 0., 0.), 109 | 1: (174., 199., 232.), 110 | 2: (188., 189., 34.), 111 | 3: (152., 223., 138.), 112 | 4: (255., 152., 150.), 113 | 5: (214., 39., 40.), 114 | 6: (91., 135., 229.), 115 | 7: (31., 119., 180.), 116 | 8: (229., 91., 104.), 117 | 9: (247., 182., 210.), 118 | 10: (91., 229., 110.), 119 | 11: (255., 187., 120.), 120 | 13: (141., 91., 229.), 121 | 14: (112., 128., 144.), 122 | 15: (196., 156., 148.), 123 | 16: (197., 176., 213.), 124 | 17: (44., 160., 44.), 125 | 18: (148., 103., 189.), 126 | 19: (229., 91., 223.), 127 | 21: (219., 219., 141.), 128 | 22: (192., 229., 91.), 129 | 23: (88., 218., 137.), 130 | 24: (58., 98., 137.), 131 | 26: (177., 82., 239.), 132 | 27: (255., 127., 14.), 133 | 28: (237., 204., 37.), 134 | 29: (41., 206., 32.), 135 | 31: (62., 143., 148.), 136 | 32: (34., 14., 130.), 137 | 33: (143., 45., 115.), 138 | 34: (137., 63., 14.), 139 | 35: (23., 190., 207.), 140 | 36: (16., 212., 139.), 141 | 38: (90., 119., 201.), 142 | 39: (125., 30., 141.), 143 | 40: (150., 53., 56.), 144 | 41: (186., 197., 62.), 145 | 42: (227., 119., 194.), 146 | 44: (38., 100., 128.), 147 | 45: (120., 31., 243.), 148 | 46: (154., 59., 103.), 149 | 47: (169., 137., 78.), 150 | 48: (143., 245., 111.), 151 | 49: (37., 230., 205.), 152 | 50: (14., 16., 155.), 153 | 51: (196., 51., 182.), 154 | 52: (237., 80., 38.), 155 | 54: (138., 175., 62.), 156 | 55: (158., 218., 229.), 157 | 56: (38., 96., 167.), 158 | 57: (190., 77., 246.), 159 | 58: (208., 49., 84.), 160 | 59: (208., 193., 72.), 161 | 62: (55., 220., 57.), 162 | 63: (10., 125., 140.), 163 | 64: (76., 38., 202.), 164 | 65: (191., 28., 135.), 165 | 66: (211., 120., 42.), 166 | 67: (118., 174., 76.), 167 | 68: (17., 242., 171.), 168 | 69: (20., 65., 247.), 169 | 70: (208., 61., 222.), 170 | 71: (162., 62., 60.), 171 | 72: (210., 235., 62.), 172 | 73: (45., 152., 72.), 173 | 74: (35., 107., 149.), 174 | 75: (160., 89., 237.), 175 | 76: (227., 56., 125.), 176 | 77: (169., 143., 81.), 177 | 78: (42., 143., 20.), 178 | 79: (25., 160., 151.), 179 | 80: (82., 75., 227.), 180 | 82: (253., 59., 222.), 181 | 84: (240., 130., 89.), 182 | 86: (123., 172., 47.), 183 | 87: (71., 194., 133.), 184 | 88: (24., 94., 205.), 185 | 89: (134., 16., 179.), 186 | 90: (159., 32., 52.), 187 | 93: (213., 208., 88.), 188 | 95: (64., 158., 70.), 189 | 96: (18., 163., 194.), 190 | 97: (65., 29., 153.), 191 | 98: (177., 10., 109.), 192 | 99: (152., 83., 7.), 193 | 100: (83., 175., 30.), 194 | 101: (18., 199., 153.), 195 | 102: (61., 81., 208.), 196 | 103: (213., 85., 216.), 197 | 104: (170., 53., 42.), 198 | 105: (161., 192., 38.), 199 | 106: (23., 241., 91.), 200 | 107: (12., 103., 170.), 201 | 110: (151., 41., 245.), 202 | 112: (133., 51., 80.), 203 | 115: (184., 162., 91.), 204 | 116: (50., 138., 38.), 205 | 118: (31., 237., 236.), 206 | 120: (39., 19., 208.), 207 | 121: (223., 27., 180.), 208 | 122: (254., 141., 85.), 209 | 125: (97., 144., 39.), 210 | 128: (106., 231., 176.), 211 | 130: (12., 61., 162.), 212 | 131: (124., 66., 140.), 213 | 132: (137., 66., 73.), 214 | 134: (250., 253., 26.), 215 | 136: (55., 191., 73.), 216 | 138: (60., 126., 146.), 217 | 139: (153., 108., 234.), 218 | 140: (184., 58., 125.), 219 | 141: (135., 84., 14.), 220 | 145: (139., 248., 91.), 221 | 148: (53., 200., 172.), 222 | 154: (63., 69., 134.), 223 | 155: (190., 75., 186.), 224 | 156: (127., 63., 52.), 225 | 157: (141., 182., 25.), 226 | 159: (56., 144., 89.), 227 | 161: (64., 160., 250.), 228 | 163: (182., 86., 245.), 229 | 165: (139., 18., 53.), 230 | 166: (134., 120., 54.), 231 | 168: (49., 165., 42.), 232 | 169: (51., 128., 133.), 233 | 170: (44., 21., 163.), 234 | 177: (232., 93., 193.), 235 | 180: (176., 102., 54.), 236 | 185: (116., 217., 17.), 237 | 188: (54., 209., 150.), 238 | 191: (60., 99., 204.), 239 | 193: (129., 43., 144.), 240 | 195: (252., 100., 106.), 241 | 202: (187., 196., 73.), 242 | 208: (13., 158., 40.), 243 | 213: (52., 122., 152.), 244 | 214: (128., 76., 202.), 245 | 221: (187., 50., 115.), 246 | 229: (180., 141., 71.), 247 | 230: (77., 208., 35.), 248 | 232: (72., 183., 168.), 249 | 233: (97., 99., 203.), 250 | 242: (172., 22., 158.), 251 | 250: (155., 64., 40.), 252 | 261: (118., 159., 30.), 253 | 264: (69., 252., 148.), 254 | 276: (45., 103., 173.), 255 | 283: (111., 38., 149.), 256 | 286: (184., 9., 49.), 257 | 300: (188., 174., 67.), 258 | 304: (53., 206., 53.), 259 | 312: (97., 235., 252.), 260 | 323: (66., 32., 182.), 261 | 325: (236., 114., 195.), 262 | 331: (241., 154., 83.), 263 | 342: (133., 240., 52.), 264 | 356: (16., 205., 144.), 265 | 370: (75., 101., 198.), 266 | 392: (237., 95., 251.), 267 | 395: (191., 52., 49.), 268 | 399: (227., 254., 54.), 269 | 408: (49., 206., 87.), 270 | 417: (48., 113., 150.), 271 | 488: (125., 73., 182.), 272 | 540: (229., 32., 114.), 273 | 562: (158., 119., 28.), 274 | 570: (60., 205., 27.), 275 | 572: (18., 215., 201.), 276 | 581: (79., 76., 153.), 277 | 609: (134., 13., 116.), 278 | 748: (192., 97., 63.), 279 | 776: (108., 163., 18.), 280 | 1156: (95., 220., 156.), 281 | 1163: (98., 141., 208.), 282 | 1164: (144., 19., 193.), 283 | 1165: (166., 36., 57.), 284 | 1166: (212., 202., 34.), 285 | 1167: (23., 206., 34.), 286 | 1168: (91., 211., 236.), 287 | 1169: (79., 55., 137.), 288 | 1170: (182., 19., 117.), 289 | 1171: (134., 76., 14.), 290 | 1172: (87., 185., 28.), 291 | 1173: (82., 224., 187.), 292 | 1174: (92., 110., 214.), 293 | 1175: (168., 80., 171.), 294 | 1176: (197., 63., 51.), 295 | 1178: (175., 199., 77.), 296 | 1179: (62., 180., 98.), 297 | 1180: (8., 91., 150.), 298 | 1181: (77., 15., 130.), 299 | 1182: (154., 65., 96.), 300 | 1183: (197., 152., 11.), 301 | 1184: (59., 155., 45.), 302 | 1185: (12., 147., 145.), 303 | 1186: (54., 35., 219.), 304 | 1187: (210., 73., 181.), 305 | 1188: (221., 124., 77.), 306 | 1189: (149., 214., 66.), 307 | 1190: (72., 185., 134.), 308 | 1191: (42., 94., 198.), 309 | } 310 | 311 | ### For instance segmentation the non-object categories ### 312 | VALID_PANOPTIC_IDS = (1, 3) 313 | 314 | CLASS_LABELS_PANOPTIC = ('wall', 'floor') 315 | -------------------------------------------------------------------------------- /labelmaker/visualization_3d.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | 4 | import numpy as np 5 | import open3d as o3d 6 | 7 | from labelmaker.label_data import get_nyu40, get_scannet200, get_wordnet, get_ade150 8 | 9 | COLOR_MAPS = { 10 | 'ade20k': get_ade150, 11 | 'scannet200': get_scannet200, 12 | 'nyu40': get_nyu40, 13 | 'wordnet': get_wordnet, 14 | 'consensus': get_wordnet, 15 | "sdfstudio": get_wordnet, 16 | } 17 | 18 | def read_mesh(scene_path): 19 | mesh_path = os.path.join(scene_path, 'mesh.ply') 20 | mesh = o3d.io.read_triangle_mesh(mesh_path) 21 | return mesh 22 | 23 | def save_mesh(mesh, scene_path): 24 | mesh_path = os.path.join(scene_path, 'mesh_colored.ply') 25 | o3d.io.write_triangle_mesh(mesh_path, mesh) 26 | return mesh 27 | 28 | def read_labels(scene_path): 29 | labels = np.loadtxt(os.path.join(scene_path, 'labels.txt')) 30 | return labels 31 | 32 | def colorize_labels(labels, color_map='consensus'): 33 | n = labels.shape[0] 34 | colors = np.zeros((n, 3)) 35 | cmap = COLOR_MAPS[color_map]() 36 | 37 | for i in np.unique(labels): 38 | colors[labels == i] = cmap[int(i)]['color'] 39 | 40 | colors = colors / 255. 41 | return colors 42 | 43 | def colorize_mesh(mesh, colors): 44 | mesh.vertex_colors = o3d.utility.Vector3dVector(colors) 45 | return mesh 46 | 47 | def main(args): 48 | mesh = read_mesh(args.workspace) 49 | os.makedirs(os.path.join(args.workspace, args.output), exist_ok=True) 50 | labels = read_labels(args.workspace) 51 | colors = colorize_labels(labels) 52 | mesh = colorize_mesh(mesh, colors) 53 | save_mesh(mesh, os.path.join(args.workspace, args.output)) 54 | 55 | def arg_parser(): 56 | parser = argparse.ArgumentParser(description='Lift 2D labels to 3D labels') 57 | parser.add_argument('--workspace', type=str) 58 | parser.add_argument('--output', type=str, default='vis_3d') 59 | return parser.parse_args() 60 | 61 | if __name__ == '__main__': 62 | args = arg_parser() 63 | main(args) -------------------------------------------------------------------------------- /models/cmx.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import logging 3 | import os 4 | import random 5 | import shutil 6 | import sys 7 | from os.path import abspath, dirname, join 8 | from pathlib import Path 9 | from typing import Union 10 | 11 | import cv2 12 | import gin 13 | import matplotlib.pyplot as plt 14 | import mmcv 15 | import numpy as np 16 | import torch 17 | import torch.backends.cudnn as cudnn 18 | from mmseg.apis import inference_segmentor, init_segmentor 19 | from mmseg.core import get_classes, get_palette 20 | from tqdm import tqdm 21 | 22 | sys.path.append( 23 | os.path.join(os.path.dirname(__file__), '../3rdparty', 24 | 'RGBX_Semantic_Segmentation')) 25 | 26 | from config import config 27 | from dataloader.dataloader import ValPre 28 | from dataloader.RGBXDataset import RGBXDataset 29 | from engine.evaluator import Evaluator 30 | from engine.logger import get_logger 31 | from utils.metric import compute_score, hist_info 32 | from utils.pyt_utils import ensure_dir, link_file, load_model, parse_devices 33 | from utils.visualize import print_iou, show_img 34 | 35 | from models.builder import EncoderDecoder as segmodel 36 | 37 | logging.basicConfig(level="INFO") 38 | log = logging.getLogger('CMX Segmentation') 39 | 40 | 41 | def setup_seeds(seed): 42 | 43 | random.seed(seed) 44 | np.random.seed(seed) 45 | torch.manual_seed(seed) 46 | 47 | cudnn.benchmark = False 48 | cudnn.deterministic = True 49 | 50 | 51 | def load_cmx(device: Union[str, torch.device] = 'cuda:0'): 52 | device = str(device) 53 | assert device[:4] == 'cuda' 54 | try: 55 | device_id = str(int(device[:][-1])) 56 | except: 57 | assert False, "device should be cuda device and in format of 'cuda:xx'." 58 | 59 | log.info('loading model') 60 | checkpoint_file = abspath( 61 | join(dirname(__file__), '../checkpoints/NYUDV2_CMX+Segformer-B2.pth')) 62 | network = segmodel(cfg=config, 63 | criterion=None, 64 | norm_layer=torch.nn.BatchNorm2d) 65 | eval_source = abspath( 66 | join(dirname(__file__), 67 | '../3rdparty/RGBX_Semantic_Segmentation/empty.txt')) 68 | data_setting = { 69 | 'rgb_root': config.rgb_root_folder, 70 | 'rgb_format': config.rgb_format, 71 | 'gt_root': config.gt_root_folder, 72 | 'gt_format': config.gt_format, 73 | 'transform_gt': config.gt_transform, 74 | 'x_root': config.x_root_folder, 75 | 'x_format': config.x_format, 76 | 'x_single_channel': config.x_is_single_channel, 77 | 'class_names': config.class_names, 78 | 'train_source': config.train_source, 79 | 'eval_source': eval_source, 80 | 'class_names': config.class_names 81 | } 82 | val_pre = ValPre() 83 | dataset = RGBXDataset(data_setting, 'val', val_pre) 84 | evaluator = Evaluator( 85 | dataset=dataset, 86 | class_num=40, 87 | norm_mean=config.norm_mean, 88 | norm_std=config.norm_std, 89 | network=network, 90 | multi_scales=config.eval_scale_array, 91 | is_flip=config.eval_flip, 92 | devices=parse_devices(device_id), 93 | ) 94 | evaluator.compute_metric = lambda x: str() 95 | evaluator.run('mmseg', checkpoint_file, '/dev/null', '/tmp/fakelog') 96 | return evaluator 97 | 98 | 99 | @gin.configurable 100 | def run( 101 | scene_dir: Union[str, Path], 102 | output_folder: Union[str, Path], 103 | device: Union[str, torch.device] = 'cuda:0', 104 | confidence_threshold: float = 0.995, 105 | flip: bool = False, 106 | ): 107 | 108 | scene_dir = Path(scene_dir) 109 | output_folder = Path(output_folder) 110 | 111 | assert scene_dir.exists() and scene_dir.is_dir() 112 | 113 | input_hha_dir = scene_dir / 'intermediate/hha' 114 | assert input_hha_dir.exists() and input_hha_dir.is_dir() 115 | 116 | input_color_dir = scene_dir / 'color' 117 | assert input_color_dir.exists() and input_color_dir.is_dir() 118 | 119 | assert len(list(input_hha_dir.iterdir())) == len( 120 | list(input_color_dir.iterdir())) 121 | 122 | output_dir = scene_dir / output_folder 123 | output_dir = Path(str(output_dir) + '_flip') if flip else output_dir 124 | shutil.rmtree(output_dir, ignore_errors=True) 125 | os.makedirs(str(output_dir), exist_ok=False) 126 | 127 | evaluator = load_cmx(device=device) 128 | log.info('[cmx] running inference') 129 | 130 | keys = [p.stem for p in input_color_dir.glob('*.jpg')] 131 | for k in tqdm(keys): 132 | img = cv2.imread(str(input_color_dir / f'{k}.jpg'))[..., ::-1] 133 | hha = cv2.imread(str(input_hha_dir / f'{k}.png')) 134 | 135 | if flip: 136 | img = img[:, ::-1] 137 | hha = hha[:, ::-1] 138 | pred = evaluator.sliding_eval_rgbX( 139 | img, 140 | hha, 141 | config.eval_crop_size, 142 | config.eval_stride_rate, 143 | device=device, 144 | ) 145 | 146 | pred = pred + 1 147 | if flip: 148 | pred = pred[:, ::-1] 149 | cv2.imwrite(str(output_dir / f'{k}.png'), pred.astype(np.uint16)) 150 | 151 | 152 | def arg_parser(): 153 | parser = argparse.ArgumentParser(description='CMX Segmentation') 154 | parser.add_argument( 155 | '--workspace', 156 | type=str, 157 | required=True, 158 | help= 159 | 'Path to workspace directory. There should be "color" and "intermediate/hha" folder inside.', 160 | ) 161 | parser.add_argument( 162 | '--output', 163 | type=str, 164 | default='intermediate/nyu40_cmx_1', 165 | help= 166 | 'Name of output directory in the workspace directory intermediate. Has to follow the pattern $labelspace_$model_$version' 167 | ) 168 | parser.add_argument('--seed', type=int, default=42, help='random seed') 169 | parser.add_argument( 170 | '--flip', 171 | action="store_true", 172 | help='Flip the input image, this is part of test time augmentation.', 173 | ) 174 | parser.add_argument('--config', help='Name of config file') 175 | return parser.parse_args() 176 | 177 | 178 | if __name__ == "__main__": 179 | args = arg_parser() 180 | if args.config is not None: 181 | gin.parse_config_file(args.config) 182 | 183 | setup_seeds(seed=args.seed) 184 | run(scene_dir=args.workspace, output_folder=args.output, flip=args.flip) 185 | -------------------------------------------------------------------------------- /models/hha_depth.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import logging 3 | import os 4 | import random 5 | import shutil 6 | import sys 7 | from pathlib import Path 8 | from typing import Union 9 | 10 | import cv2 11 | import gin 12 | import numpy as np 13 | import torch 14 | import torch.backends.cudnn as cudnn 15 | from hha.getHHA import getHHA 16 | from joblib import Parallel, delayed 17 | from tqdm import tqdm 18 | 19 | logging.basicConfig(level="INFO") 20 | log = logging.getLogger('Depth to HHA conversion') 21 | 22 | 23 | def setup_seeds(seed): 24 | 25 | random.seed(seed) 26 | np.random.seed(seed) 27 | torch.manual_seed(seed) 28 | 29 | cudnn.benchmark = False 30 | cudnn.deterministic = True 31 | 32 | 33 | @gin.configurable 34 | def run( 35 | scene_dir: Union[str, Path], 36 | input_folder: Union[str, Path], 37 | output_folder: Union[str, Path], 38 | n_jobs=8, 39 | ): 40 | 41 | scene_dir = Path(scene_dir) 42 | input_folder = Path(input_folder) 43 | output_folder = Path(output_folder) 44 | 45 | assert scene_dir.exists() and scene_dir.is_dir() 46 | 47 | input_depth_dir = scene_dir / 'depth' 48 | assert input_depth_dir.exists() and input_depth_dir.is_dir() 49 | 50 | input_intrinsic_dir = scene_dir / 'intrinsic' 51 | assert input_intrinsic_dir.exists() and input_intrinsic_dir.is_dir() 52 | 53 | omnidata_depth_dir = scene_dir / input_folder 54 | assert omnidata_depth_dir.exists() and omnidata_depth_dir.is_dir() 55 | 56 | assert len(list(input_depth_dir.iterdir())) == len( 57 | list(omnidata_depth_dir.iterdir())) 58 | 59 | output_dir = scene_dir / output_folder 60 | shutil.rmtree(output_dir, ignore_errors=True) 61 | os.makedirs(str(output_dir), exist_ok=False) 62 | 63 | log.info(f'running depth to hha conversion for scene {scene_dir}') 64 | 65 | def depth_to_hha(k): 66 | intrinsics = np.loadtxt(str(input_intrinsic_dir / f'{k}.txt'))[:3, :3] 67 | orig_depth = cv2.imread(str(input_depth_dir / f'{k}.png'), 68 | cv2.COLOR_BGR2GRAY) / 1000 69 | omni_depth = cv2.imread(str(omnidata_depth_dir / f'{k}.png'), 70 | cv2.COLOR_BGR2GRAY) / 1000 71 | hha = getHHA(intrinsics, omni_depth, orig_depth) 72 | cv2.imwrite(str(output_dir / f'{k}.png'), hha) 73 | 74 | keys = [p.stem for p in (scene_dir / 'depth').glob('*.png')] 75 | if n_jobs > 1: 76 | Parallel(n_jobs=n_jobs)(delayed(depth_to_hha)(k) for k in tqdm(keys)) 77 | else: 78 | for k in tqdm(keys): 79 | depth_to_hha(k) 80 | 81 | 82 | def arg_parser(): 83 | parser = argparse.ArgumentParser(description='HHA') 84 | parser.add_argument( 85 | '--workspace', 86 | type=str, 87 | required=True, 88 | help= 89 | 'Path to workspace directory. There should be a "depth" and "instrinsic" folder', 90 | ) 91 | parser.add_argument( 92 | '--input', 93 | type=str, 94 | default='intermediate/depth_omnidata_1', 95 | help='Name of input directory in the workspace directory', 96 | ) 97 | parser.add_argument( 98 | '--output', 99 | type=str, 100 | default='intermediate/hha', 101 | help= 102 | 'Name of output directory in the workspace directory intermediate. Has to follow the pattern $labelspace_$model_$version', 103 | ) 104 | parser.add_argument('--config', help='Name of config file') 105 | parser.add_argument( 106 | '--n_jobs', 107 | type=int, 108 | default=8, 109 | help='Number of parallel jobs', 110 | ) 111 | return parser.parse_args() 112 | 113 | 114 | if __name__ == "__main__": 115 | args = arg_parser() 116 | if args.config is not None: 117 | gin.parse_config_file(args.config) 118 | run( 119 | scene_dir=args.workspace, 120 | input_folder=args.input, 121 | output_folder=args.output, 122 | n_jobs=args.n_jobs, 123 | ) 124 | -------------------------------------------------------------------------------- /models/internimage.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import logging 3 | import os 4 | import random 5 | import shutil 6 | import sys 7 | from pathlib import Path 8 | from typing import Union 9 | 10 | import cv2 11 | import gin 12 | import mmcv 13 | import numpy as np 14 | import torch 15 | import torch.backends.cudnn as cudnn 16 | from mmcv.runner import load_checkpoint 17 | from mmseg.apis import inference_segmentor, init_segmentor 18 | from mmseg.core import get_classes, get_palette 19 | from tqdm import tqdm 20 | 21 | sys.path.append( 22 | os.path.join(os.path.dirname(__file__), '..', '3rdparty', 'InternImage', 23 | 'segmentation')) 24 | 25 | import mmcv_custom # this is actually needed for correct model registry 26 | import mmseg_custom 27 | 28 | logging.basicConfig(level="INFO") 29 | log = logging.getLogger('InternImage Segmentation') 30 | 31 | 32 | def setup_seeds(seed): 33 | 34 | random.seed(seed) 35 | np.random.seed(seed) 36 | torch.manual_seed(seed) 37 | 38 | cudnn.benchmark = False 39 | cudnn.deterministic = True 40 | 41 | 42 | def load_internimage(device: Union[str, torch.device],): 43 | 44 | config_file = os.path.join( 45 | os.path.dirname(__file__), '..', '3rdparty', 'InternImage', 46 | 'segmentation', 'configs', 'ade20k', 47 | 'mask2former_internimage_h_896_80k_cocostuff2ade20k_ss.py') 48 | checkpoint_file = os.path.join( 49 | os.path.dirname(__file__), '..', 'checkpoints', 50 | 'mask2former_internimage_h_896_80k_cocostuff2ade20k.pth') 51 | 52 | # build the model from a config file and a checkpoint file 53 | model = init_segmentor(config_file, checkpoint=None, device=device) 54 | checkpoint = load_checkpoint(model, checkpoint_file, map_location='cpu') 55 | 56 | if 'CLASSES' in checkpoint.get('meta', {}): 57 | model.CLASSES = checkpoint['meta']['CLASSES'] 58 | else: 59 | print('"CLASSES" not found in meta, use dataset.CLASSES instead') 60 | model.CLASSES = get_classes('ade20k') 61 | 62 | if 'PALETTE' in checkpoint.get('meta', {}): 63 | model.PALETTE = checkpoint['meta']['PALETTE'] 64 | else: 65 | print('"PALETTE" not found in meta, use dataset.PALETTE instead') 66 | model.PALETTE = get_palette('ade20k') 67 | 68 | return model 69 | 70 | 71 | @gin.configurable 72 | def run( 73 | scene_dir: Union[str, Path], 74 | output_folder: Union[str, Path], 75 | device: Union[str, torch.device] = 'cuda:0', 76 | flip: bool = False, 77 | ): 78 | # convert str to Path object 79 | scene_dir = Path(scene_dir) 80 | output_folder = Path(output_folder) 81 | 82 | assert scene_dir.exists() and scene_dir.is_dir() 83 | 84 | input_color_dir = scene_dir / 'color' 85 | assert input_color_dir.exists() and input_color_dir.is_dir() 86 | 87 | output_dir = scene_dir / output_folder 88 | output_dir = Path(str(output_dir) + '_flip') if flip else output_dir 89 | 90 | # check if output directory exists 91 | shutil.rmtree(output_dir, ignore_errors=True) 92 | os.makedirs(str(output_dir), exist_ok=False) 93 | 94 | log.info('[internimage] loading model') 95 | model = load_internimage(device=device) 96 | log.info(f'[internimage] running inference in {str(input_color_dir)}') 97 | print(f'[internimage] running inference in {str(input_color_dir)}', 98 | flush=True) 99 | 100 | input_files = input_color_dir.glob('*') 101 | input_files = sorted(input_files, key=lambda x: int(x.stem.split('_')[-1])) 102 | 103 | for file in tqdm(input_files): 104 | img = mmcv.imread(file) 105 | 106 | if flip: 107 | img = img[:, ::-1] 108 | 109 | result = inference_segmentor(model, img)[0] 110 | if flip: 111 | result = result[:, ::-1] 112 | 113 | cv2.imwrite(str(output_dir / f'{file.stem}.png'), result.astype(np.uint16)) 114 | 115 | 116 | # all models should have this command line interface 117 | def arg_parser(): 118 | parser = argparse.ArgumentParser(description='InternImage Segmentation') 119 | parser.add_argument( 120 | '--workspace', 121 | type=str, 122 | required=True, 123 | help='Path to workspace directory. There should be a "color" folder.', 124 | ) 125 | parser.add_argument( 126 | '--output', 127 | type=str, 128 | default='intermediate/ade20k_internimage_1', 129 | help= 130 | 'Name of output directory in the workspace directory intermediate. Has to follow the pattern $labelspace_$model_$version', 131 | ) 132 | parser.add_argument('--seed', type=int, default=42, help='random seed') 133 | parser.add_argument( 134 | '--flip', 135 | action="store_true", 136 | help='Flip the input image, this is part of test time augmentation.', 137 | ) 138 | parser.add_argument('--config', help='Name of config file') 139 | return parser.parse_args() 140 | 141 | 142 | if __name__ == '__main__': 143 | args = arg_parser() 144 | 145 | if args.config is not None: 146 | gin.parse_config_file(args.config) 147 | 148 | setup_seeds(seed=args.seed) 149 | run(scene_dir=args.workspace, output_folder=args.output, flip=args.flip) 150 | -------------------------------------------------------------------------------- /models/omnidata_depth.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import logging 3 | import os 4 | import random 5 | import shutil 6 | import sys 7 | from pathlib import Path 8 | from typing import Union 9 | 10 | import cv2 11 | import gin 12 | import matplotlib.pyplot as plt 13 | import mmcv 14 | import numpy as np 15 | import PIL 16 | import torch 17 | import torch.backends.cudnn as cudnn 18 | import torch.nn.functional as F 19 | from joblib import Parallel, delayed 20 | from PIL import Image 21 | from torchvision import transforms 22 | from tqdm import tqdm 23 | 24 | sys.path.insert( 25 | 0, 26 | os.path.abspath( 27 | os.path.join(os.path.dirname(__file__), '..', '3rdparty', 'omnidata', 28 | 'omnidata_tools', 'torch'))) 29 | 30 | from data.transforms import get_transform 31 | from modules.midas.dpt_depth import DPTDepthModel 32 | from modules.unet import UNet 33 | 34 | logging.basicConfig(level="INFO") 35 | log = logging.getLogger('Omnidata Depth') 36 | 37 | 38 | def setup_seeds(seed): 39 | 40 | random.seed(seed) 41 | np.random.seed(seed) 42 | torch.manual_seed(seed) 43 | 44 | cudnn.benchmark = False 45 | cudnn.deterministic = True 46 | 47 | 48 | def load_omnidepth(device: Union[str, torch.device] = 'cuda:0',): 49 | log.info('loading model') 50 | pretrained_weights_path = Path(os.path.abspath(os.path.dirname( 51 | __file__))) / '..' / 'checkpoints' / 'omnidata_dpt_depth_v2.ckpt' 52 | model = DPTDepthModel(backbone='vitb_rn50_384') 53 | checkpoint = torch.load(pretrained_weights_path, map_location=device) 54 | if 'state_dict' in checkpoint: 55 | state_dict = {} 56 | for k, v in checkpoint['state_dict'].items(): 57 | state_dict[k[6:]] = v 58 | else: 59 | state_dict = checkpoint 60 | model.load_state_dict(state_dict) 61 | model.to(device) 62 | return model 63 | 64 | 65 | def omnidepth_completion( 66 | scene_dir: Union[str, Path], 67 | output_folder: Union[str, Path], 68 | patch_size=32, 69 | ): 70 | # convert str to Path object 71 | scene_dir = Path(scene_dir) 72 | output_folder = Path(output_folder) 73 | 74 | assert scene_dir.exists() and scene_dir.is_dir() 75 | 76 | input_depth_dir = scene_dir / 'depth' 77 | assert input_depth_dir.exists() and input_depth_dir.is_dir() 78 | 79 | output_dir = scene_dir / output_folder 80 | assert (output_dir).exists() 81 | 82 | log.info('[omnidepth] running completion') 83 | 84 | def depth_completion(k): 85 | orig_depth = cv2.imread(str(input_depth_dir / f'{k}.png'), 86 | cv2.IMREAD_UNCHANGED) 87 | omnidepth = cv2.imread(str(output_dir / f'{k}.png'), cv2.IMREAD_UNCHANGED) 88 | 89 | # now complete the original depth with omnidepth predictions, fitted to scale 90 | # within a patch around each missing pixel 91 | fused_depth = orig_depth.copy() 92 | coords_u, coords_v = np.where(fused_depth == 0) 93 | for i in range(len(coords_u)): 94 | u = coords_u[i] 95 | v = coords_v[i] 96 | window_u = max(0, u - patch_size), min(fused_depth.shape[0], 97 | u + patch_size) 98 | window_v = max(0, v - patch_size), min(fused_depth.shape[1], 99 | v + patch_size) 100 | target = orig_depth[window_u[0]:window_u[1], window_v[0]:window_v[1]] 101 | source = omnidepth[window_u[0]:window_u[1], window_v[0]:window_v[1]] 102 | source = source[target != 0] 103 | target = target[target != 0] 104 | a, b = np.linalg.lstsq(np.stack([source, np.ones_like(source)], axis=-1), 105 | target, 106 | rcond=None)[0] 107 | # for some areas this will completely break the geometry, we need to revert to omnidepth 108 | if a < 0.5 or a > 2: 109 | fused_depth[u, v] = omnidepth[u, v] 110 | else: 111 | fused_depth[u, v] = a * omnidepth[u, v] + b 112 | fused_depth[fused_depth == 0] = omnidepth[fused_depth == 0] 113 | cv2.imwrite(str(output_dir / f'{k}.png'), fused_depth) 114 | 115 | keys = [p.stem for p in input_depth_dir.glob('*.png')] 116 | Parallel(n_jobs=8)(delayed(depth_completion)(k) for k in tqdm(keys)) 117 | 118 | 119 | @gin.configurable 120 | def run( 121 | scene_dir: Union[str, Path], 122 | output_folder: Union[str, Path], 123 | device: Union[str, torch.device] = 'cuda:0', 124 | depth_size=(480, 640), 125 | completion=True, 126 | ): 127 | scene_dir = Path(scene_dir) 128 | output_folder = Path(output_folder) 129 | 130 | assert scene_dir.exists() and scene_dir.is_dir() 131 | 132 | input_color_dir = scene_dir / 'color' 133 | assert input_color_dir.exists() and input_color_dir.is_dir() 134 | 135 | input_depth_dir = scene_dir / 'depth' 136 | assert input_depth_dir.exists() and input_depth_dir.is_dir() 137 | 138 | output_dir = scene_dir / output_folder 139 | 140 | log.info('[omnidepth] loading model') 141 | model = load_omnidepth(device=device) 142 | trans_totensor = transforms.Compose([ 143 | transforms.Resize((384, 384), interpolation=PIL.Image.BILINEAR), 144 | transforms.ToTensor(), 145 | transforms.Normalize(mean=0.5, std=0.5) 146 | ]) 147 | 148 | log.info('[omnidepth] running inference') 149 | 150 | shutil.rmtree(output_dir, ignore_errors=True) 151 | os.makedirs(str(output_dir), exist_ok=False) 152 | 153 | keys = [p.stem for p in input_color_dir.glob('*.jpg')] 154 | 155 | for k in tqdm(keys): 156 | 157 | img = Image.open(str(input_color_dir / f'{k}.jpg')) 158 | with torch.no_grad(): 159 | img_tensor = trans_totensor(img)[:3].unsqueeze(0).to(device) 160 | if img_tensor.shape[1] == 1: 161 | img_tensor = img_tensor.repeat_interleave(3, 1) 162 | output = model(img_tensor).clamp(min=0, max=1) 163 | output = F.interpolate(output.unsqueeze(0), depth_size, 164 | mode='bicubic').squeeze(0) 165 | output = output.clamp(0, 1) 166 | omnidepth = output.detach().cpu().squeeze().numpy() 167 | 168 | # find a linear scaling a * depth + b to fit to original depth 169 | orig_depth = cv2.imread(str(input_depth_dir / f'{k}.png'), 170 | cv2.IMREAD_UNCHANGED) 171 | targets = orig_depth[orig_depth != 0] 172 | source = omnidepth[orig_depth != 0] 173 | a, b = np.linalg.lstsq(np.stack([source, np.ones_like(source)], axis=-1), 174 | targets, 175 | rcond=None)[0] 176 | omnidepth = (a * omnidepth + b).astype(orig_depth.dtype) 177 | cv2.imwrite(str(output_dir / f'{k}.png'), omnidepth) 178 | if completion: 179 | omnidepth_completion(scene_dir=scene_dir, output_folder=output_folder) 180 | 181 | 182 | def arg_parser(): 183 | parser = argparse.ArgumentParser(description='Omnidata Depth Estimation') 184 | parser.add_argument( 185 | '--workspace', 186 | type=str, 187 | required=True, 188 | help= 189 | 'Path to workspace directory. There should be "color" and "depth" folder inside.', 190 | ) 191 | parser.add_argument( 192 | '--output', 193 | type=str, 194 | default='intermediate/depth_omnidata_1', 195 | help= 196 | 'Name of output directory in the workspace directory intermediate. Has to follow the pattern $labelspace_$model_$version', 197 | ) 198 | parser.add_argument('--seed', type=int, default=42, help='random seed') 199 | parser.add_argument('--config', help='Name of config file') 200 | return parser.parse_args() 201 | 202 | 203 | if __name__ == "__main__": 204 | args = arg_parser() 205 | if args.config is not None: 206 | gin.parse_config_file(args.config) 207 | setup_seeds(seed=args.seed) 208 | run(scene_dir=args.workspace, output_folder=args.output) 209 | -------------------------------------------------------------------------------- /models/omnidata_normal.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import logging 3 | import os 4 | import random 5 | import shutil 6 | import sys 7 | from pathlib import Path 8 | from typing import Union 9 | 10 | import gin 11 | import numpy as np 12 | import PIL 13 | import torch 14 | import torch.backends.cudnn as cudnn 15 | import torch.nn.functional as F 16 | from PIL import Image 17 | from torchvision import transforms 18 | from tqdm import tqdm 19 | 20 | sys.path.insert( 21 | 0, 22 | os.path.abspath( 23 | os.path.join(os.path.dirname(__file__), "..", "3rdparty", "omnidata", 24 | "omnidata_tools", "torch"))) 25 | 26 | from data.transforms import get_transform 27 | from modules.midas.dpt_depth import DPTDepthModel 28 | from modules.unet import UNet 29 | 30 | logging.basicConfig(level="INFO") 31 | log = logging.getLogger("Omnidata Normal") 32 | 33 | 34 | def setup_seeds(seed): 35 | 36 | random.seed(seed) 37 | np.random.seed(seed) 38 | torch.manual_seed(seed) 39 | 40 | cudnn.benchmark = False 41 | cudnn.deterministic = True 42 | 43 | 44 | def load_omninormal(device: Union[str, torch.device] = 'cuda:0',): 45 | log.info('loading model') 46 | pretrained_weights_path = Path(os.path.abspath(os.path.dirname( 47 | __file__))) / '..' / 'checkpoints' / 'omnidata_dpt_normal_v2.ckpt' 48 | model = DPTDepthModel(backbone='vitb_rn50_384', num_channels=3) 49 | map_location = (lambda storage, loc: storage.cuda(device=device)) 50 | checkpoint = torch.load(pretrained_weights_path, map_location=map_location) 51 | 52 | if 'state_dict' in checkpoint: 53 | state_dict = {} 54 | for k, v in checkpoint['state_dict'].items(): 55 | state_dict[k[6:]] = v 56 | else: 57 | state_dict = checkpoint 58 | 59 | model.load_state_dict(state_dict) 60 | model.to(device) 61 | return model 62 | 63 | 64 | @gin.configurable 65 | def run( 66 | scene_dir: Union[str, Path], 67 | output_folder: Union[str, Path], 68 | device: Union[str, torch.device] = 'cuda:0', 69 | size=(480, 640), 70 | ): 71 | scene_dir = Path(scene_dir) 72 | output_folder = Path(output_folder) 73 | 74 | assert scene_dir.exists() and scene_dir.is_dir() 75 | 76 | input_color_dir = scene_dir / 'color' 77 | assert input_color_dir.exists() and input_color_dir.is_dir() 78 | 79 | output_dir = scene_dir / output_folder 80 | 81 | log.info('[omninormal] loading model') 82 | model = load_omninormal(device=device) 83 | trans_totensor = transforms.Compose([ 84 | transforms.Resize((384, 384), interpolation=PIL.Image.BILINEAR), 85 | transforms.CenterCrop(384), 86 | get_transform('rgb', image_size=None) 87 | ]) 88 | 89 | log.info('[omninormal] running inference') 90 | 91 | shutil.rmtree(output_dir, ignore_errors=True) 92 | os.makedirs(str(output_dir), exist_ok=False) 93 | 94 | keys = [p.stem for p in input_color_dir.glob('*.jpg')] 95 | 96 | for k in tqdm(keys): 97 | img = Image.open(str(input_color_dir / f'{k}.jpg')) 98 | 99 | with torch.no_grad(): 100 | img_tensor = trans_totensor(img)[:3].unsqueeze(0).to(device) 101 | 102 | if img_tensor.shape[1] == 1: 103 | img_tensor = img_tensor.repeat_interleave(3, 1) 104 | 105 | output = model(img_tensor).clamp(min=0, max=1) # (1, 3, 384, 384) 106 | output = F.interpolate( 107 | output, 108 | size, 109 | mode='nearest', 110 | ).squeeze(0) # (3, H, W) 111 | 112 | omninormal = output.detach().cpu().squeeze().numpy() # (3, H, W) 113 | omninormal = omninormal.transpose(1, 2, 0) # (H, W, 3) 114 | 115 | np.save(str(output_dir / f'{k}.npy'), omninormal) 116 | 117 | 118 | def arg_parser(): 119 | parser = argparse.ArgumentParser(description='Omnidata Normal Estimation') 120 | parser.add_argument( 121 | '--workspace', 122 | type=str, 123 | required=True, 124 | help='Path to workspace directory. There should be "color" folder inside.', 125 | ) 126 | parser.add_argument( 127 | '--output', 128 | type=str, 129 | default='intermediate/normal_omnidata_1', 130 | help= 131 | 'Name of output directory in the workspace directory intermediate. Has to follow the pattern $labelspace_$model_$version', 132 | ) 133 | parser.add_argument('--config', help='Name of config file') 134 | return parser.parse_args() 135 | 136 | 137 | if __name__ == "__main__": 138 | args = arg_parser() 139 | if args.config is not None: 140 | gin.parse_config_file(args.config) 141 | run(scene_dir=args.workspace, output_folder=args.output) 142 | -------------------------------------------------------------------------------- /models/ovseg.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | # change default download location for nltk 4 | os.environ['NLTK_DATA'] = os.path.abspath( 5 | os.path.join(os.path.dirname(__file__), '..', '3rdparty', 'nltk_data')) 6 | 7 | import argparse 8 | import logging 9 | import random 10 | import shutil 11 | import sys 12 | from pathlib import Path 13 | from typing import Union 14 | 15 | import cv2 16 | import gin 17 | import numpy as np 18 | import torch 19 | import torch.backends.cudnn as cudnn 20 | from detectron2.config import get_cfg 21 | from detectron2.data.detection_utils import read_image 22 | from detectron2.projects.deeplab import add_deeplab_config 23 | from detectron2.utils.logger import setup_logger 24 | from nltk.corpus import wordnet as wn 25 | from tqdm import tqdm 26 | 27 | from labelmaker.label_data import get_ade150, get_replica, get_wordnet 28 | 29 | sys.path.append( 30 | os.path.join(os.path.dirname(__file__), '..', '3rdparty', 'ov-seg')) 31 | from open_vocab_seg import add_ovseg_config 32 | from open_vocab_seg.utils import VisualizationDemo 33 | 34 | logging.basicConfig(level="INFO") 35 | log = logging.getLogger('OV-Seg Segmentation') 36 | 37 | 38 | def setup_seeds(seed): 39 | 40 | random.seed(seed) 41 | np.random.seed(seed) 42 | torch.manual_seed(seed) 43 | 44 | cudnn.benchmark = False 45 | cudnn.deterministic = True 46 | 47 | 48 | class WordnetPromptTemplate: 49 | 50 | def __init__(self, template, add_synonyms=True): 51 | self.template = template 52 | self.add_synonyms = add_synonyms 53 | 54 | def format(self, noun): 55 | synset = wn.synset(noun) 56 | prompt = self.template.format(noun=synset.name().split('.')[0], 57 | definition=synset.definition()) 58 | if self.add_synonyms and len(synset.lemma_names()) > 1: 59 | prompt += " It can also be called {}".format(", ".join( 60 | synset.lemma_names()[1:])) 61 | return prompt 62 | 63 | def __str__(self): 64 | return str(self.template) 65 | 66 | 67 | def load_ovseg( 68 | device: Union[str, torch.device], 69 | custom_templates=None, 70 | ): 71 | cfg = get_cfg() 72 | add_deeplab_config(cfg) 73 | add_ovseg_config(cfg) 74 | cfg.merge_from_file( 75 | str( 76 | Path(__file__).parent / '..' / '3rdparty' / 'ov-seg' / 'configs' / 77 | 'ovseg_swinB_vitL_demo.yaml')) 78 | cfg.merge_from_list([ 79 | 'MODEL.WEIGHTS', 80 | str( 81 | Path(__file__).parent / '..' / 'checkpoints' / 82 | 'ovseg_swinbase_vitL14_ft_mpt.pth') 83 | ]) 84 | 85 | # add device information 86 | cfg.MODEL.DEVICE = str(device) 87 | 88 | if custom_templates is not None: 89 | cfg.MODEL.CLIP_ADAPTER.TEXT_TEMPLATES = "predefined" 90 | cfg.MODEL.CLIP_ADAPTER.PREDEFINED_PROMPT_TEMPLATES = custom_templates 91 | cfg.freeze() 92 | demo = VisualizationDemo(cfg) 93 | return demo 94 | 95 | 96 | def process_image( 97 | model, 98 | img_path, 99 | class_names, 100 | id_map, 101 | threshold=0.7, 102 | flip=False, 103 | ): 104 | # use PIL, to be consistent with evaluation 105 | img = read_image(img_path, format="BGR") 106 | if flip: 107 | img = img[:, ::-1] 108 | predictions = model.predictor(img, class_names) 109 | blank_area = (predictions['sem_seg'][0] == 0).to('cpu').numpy() 110 | product, pred = torch.max(predictions['sem_seg'], dim=0) 111 | 112 | # map unknown region to last_id + 1 113 | pred[product < threshold] = len(class_names) 114 | pred[blank_area] = len(class_names) 115 | 116 | pred = pred.to('cpu').numpy().astype(int) 117 | 118 | if flip: 119 | pred = pred[:, ::-1] 120 | 121 | # map to corresponding label space 122 | pred = id_map[pred] 123 | 124 | return pred 125 | 126 | 127 | def get_id_map(classes): 128 | """ 129 | In ovseg, the unknown class is not specified in class_names, it is temporarily mapped to the last_id + 1. However, depending on the starting point of each label scheme its mapping may be different. 130 | """ 131 | if classes == 'ade150': 132 | id_map = [x['id'] for x in get_ade150()] + [150] 133 | elif classes == 'replica': 134 | id_map = [x['id'] for x in get_replica()] + [0] 135 | elif classes in ['wordnet', 'wn_nosyn', 'wn_nodef', 'wn_nosyn_nodef']: 136 | id_map = [x['id'] for x in get_wordnet()[1:]] + [0] 137 | else: 138 | raise ValueError(f'Unknown class set {classes}') 139 | 140 | return np.array(id_map) 141 | 142 | 143 | def get_templates(classes): 144 | templates = None 145 | if classes == 'ade150': 146 | class_names = [x['name'] for x in get_ade150()] 147 | elif classes == 'replica': 148 | class_names = [x['name'] for x in get_replica()] 149 | elif classes == 'wordnet': 150 | sizeless_templates = [ 151 | "a photo of a {size}{noun}, which is {definition}.", 152 | "a photo of a {size}{noun}, which can be defined as {definition}.", 153 | "a photo of a {size}{noun}, as in {definition}.", 154 | "This is a photo of a {size}{noun}, which is {definition}", 155 | "This is a photo of a {size}{noun}, which can be defined as {definition}", 156 | "This is a photo of a {size}{noun}, as in {definition}", 157 | "There is a {size}{noun} in the scene", 158 | "There is a {size}{definition} in the scene", 159 | "There is the {size}{noun} in the scene", 160 | "There is the {size}{definition} in the scene", 161 | "a photo of a {size}{noun} in the scene", 162 | "a photo of a {size}{definition} in the scene", 163 | ] 164 | templates = [] 165 | for t in sizeless_templates: 166 | for s in ["", "small ", "medium ", "large "]: 167 | templates.append( 168 | WordnetPromptTemplate( 169 | t.format(size=s, noun="{noun}", definition="{definition}"))) 170 | # the first class is the background class 171 | class_names = [x['name'] for x in get_wordnet()[1:]] 172 | elif classes == 'wn_nosyn': 173 | sizeless_templates = [ 174 | "a photo of a {size}{noun}, which is {definition}.", 175 | "a photo of a {size}{noun}, which can be defined as {definition}.", 176 | "a photo of a {size}{noun}, as in {definition}.", 177 | "This is a photo of a {size}{noun}, which is {definition}", 178 | "This is a photo of a {size}{noun}, which can be defined as {definition}", 179 | "This is a photo of a {size}{noun}, as in {definition}", 180 | "There is a {size}{noun} in the scene", 181 | "There is a {size}{definition} in the scene", 182 | "There is the {size}{noun} in the scene", 183 | "There is the {size}{definition} in the scene", 184 | "a photo of a {size}{noun} in the scene", 185 | "a photo of a {size}{definition} in the scene", 186 | ] 187 | templates = [] 188 | for t in sizeless_templates: 189 | for s in ["", "small ", "medium ", "large "]: 190 | templates.append( 191 | WordnetPromptTemplate(t.format(size=s, 192 | noun="{noun}", 193 | definition="{definition}"), 194 | add_synonyms=False)) 195 | # the first class is the background class 196 | class_names = [x['name'] for x in get_wordnet()[1:]] 197 | elif classes == 'wn_nodef': 198 | sizeless_templates = [ 199 | "a photo of a {size}{noun}", 200 | "a photo of a {size}{noun}", 201 | "a photo of a {size}{noun}", 202 | "This is a photo of a {size}{noun}.", 203 | "This is a photo of a {size}{noun}.", 204 | "This is a photo of a {size}{noun}.", 205 | "There is a {size}{noun} in the scene", 206 | "There is the {size}{noun} in the scene", 207 | "a photo of a {size}{noun} in the scene", 208 | ] 209 | templates = [] 210 | for t in sizeless_templates: 211 | for s in ["", "small ", "medium ", "large "]: 212 | templates.append(WordnetPromptTemplate(t.format(size=s, noun="{noun}"))) 213 | # the first class is the background class 214 | class_names = [x['name'] for x in get_wordnet()[1:]] 215 | elif classes == 'wn_nosyn_nodef': 216 | sizeless_templates = [ 217 | "a photo of a {size}{noun}", 218 | "a photo of a {size}{noun}", 219 | "a photo of a {size}{noun}", 220 | "This is a photo of a {size}{noun}.", 221 | "This is a photo of a {size}{noun}.", 222 | "This is a photo of a {size}{noun}.", 223 | "There is a {size}{noun} in the scene", 224 | "There is the {size}{noun} in the scene", 225 | "a photo of a {size}{noun} in the scene", 226 | ] 227 | templates = [] 228 | for t in sizeless_templates: 229 | for s in ["", "small ", "medium ", "large "]: 230 | templates.append( 231 | WordnetPromptTemplate(t.format(size=s, noun="{noun}"), 232 | add_synonyms=False)) 233 | # the first class is the background class 234 | class_names = [x['name'] for x in get_wordnet()[1:]] 235 | else: 236 | raise ValueError(f'Unknown class set {classes}') 237 | 238 | return templates, class_names 239 | 240 | 241 | @gin.configurable 242 | def run( 243 | scene_dir: Union[str, Path], 244 | output_folder: Union[str, Path], 245 | device: Union[ 246 | str, torch. 247 | device] = 'cuda:0', # changing this to cuda default as all of us have it available. Otherwise, it will fail on machines without cuda 248 | classes='wn_nodef', 249 | flip=False, 250 | ): 251 | scene_dir = Path(scene_dir) 252 | output_folder = Path(output_folder) 253 | 254 | # check if scene_dir exists 255 | assert scene_dir.exists() and scene_dir.is_dir() 256 | 257 | input_color_dir = scene_dir / 'color' 258 | assert input_color_dir.exists() and input_color_dir.is_dir() 259 | 260 | output_dir = scene_dir / output_folder 261 | output_dir = Path(str(output_dir) + '_flip') if flip else output_dir 262 | if classes != 'wn_nodef': 263 | output_dir.replace('wn_nodef', classes) 264 | 265 | # check if output directory exists 266 | shutil.rmtree(output_dir, ignore_errors=True) 267 | os.makedirs(str(output_dir), exist_ok=False) 268 | 269 | input_files = input_color_dir.glob('*') 270 | input_files = sorted(input_files, key=lambda x: int(x.stem.split('_')[-1])) 271 | 272 | log.info(f'[ov-seg] using {classes} classes') 273 | log.info(f'[ov-seg] inference in {str(input_color_dir)}') 274 | 275 | templates, class_names = get_templates(classes) 276 | id_map = get_id_map(classes) 277 | 278 | log.info('[ov-seg] loading model') 279 | model = load_ovseg(device=device, custom_templates=templates) 280 | 281 | log.info('[ov-seg] inference') 282 | 283 | for file in tqdm(input_files): 284 | result = process_image(model, file, class_names, id_map, flip=flip) 285 | cv2.imwrite( 286 | str(output_dir / f'{file.stem}.png'), 287 | result.astype(np.uint16), 288 | ) 289 | 290 | 291 | def arg_parser(): 292 | parser = argparse.ArgumentParser(description='OVSeg Segmentation') 293 | parser.add_argument( 294 | '--workspace', 295 | type=str, 296 | required=True, 297 | help= 298 | 'Path to workspace directory. There should be a "color" folder inside.', 299 | ) 300 | parser.add_argument( 301 | '--output', 302 | type=str, 303 | default='intermediate/wordnet_ovseg_1', 304 | help= 305 | 'Name of output directory in the workspace directory intermediate. Has to follow the pattern $labelspace_$model_$version', 306 | ) 307 | parser.add_argument('--seed', type=int, default=42, help='random seed') 308 | parser.add_argument( 309 | '--flip', 310 | action="store_true", 311 | help='Flip the input image, this is part of test time augmentation.', 312 | ) 313 | parser.add_argument('--config', help='Name of config file') 314 | return parser.parse_args() 315 | 316 | 317 | if __name__ == '__main__': 318 | args = arg_parser() 319 | if args.config is not None: 320 | gin.parse_config_file(args.config) 321 | 322 | setup_seeds(seed=args.seed) 323 | run(scene_dir=args.workspace, output_folder=args.output, flip=args.flip) 324 | -------------------------------------------------------------------------------- /notebooks/generate_3d_projections.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "id": "7d36b077", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "import os\n", 11 | "import cv2\n", 12 | "\n", 13 | "import open3d as o3d\n", 14 | "import numpy as np\n", 15 | "\n", 16 | "from tqdm import tqdm\n", 17 | "from PIL import Image\n", 18 | "\n", 19 | "import matplotlib.pyplot as plt\n" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": null, 25 | "id": "b015fd4d", 26 | "metadata": {}, 27 | "outputs": [], 28 | "source": [ 29 | "PATH = '/home/weders/scratch/scratch/scannetter/arkit/raw/Validation'\n", 30 | "SCENE = 'scene0458_00'\n" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": null, 36 | "id": "40012878", 37 | "metadata": {}, 38 | "outputs": [], 39 | "source": [ 40 | "label_name = ''\n", 41 | "\n", 42 | "scene_path = os.path.join(PATH, SCENE)\n", 43 | "image_path = os.path.join(scene_path, 'color')\n", 44 | "depth_path = os.path.join(scene_path, 'depth')\n", 45 | "intrinsics_path = os.path.join(scene_path, 'intrinsic')\n", 46 | "pose_path = os.path.join(scene_path, 'pose')\n", 47 | "# label_path = os.path.join(scene_path, 'label-proc')\n", 48 | "\n", 49 | "mesh_path = os.path.join(scene_path, f'{SCENE}_vh_clean.ply')\n" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": null, 55 | "id": "b44e1ac0", 56 | "metadata": {}, 57 | "outputs": [], 58 | "source": [ 59 | "# scannet label coloring\n", 60 | "def create_color_palette():\n", 61 | " return [\n", 62 | " (0, 0, 0),\n", 63 | " (174, 199, 232), # wall\n", 64 | " (152, 223, 138), # floor\n", 65 | " (31, 119, 180), # cabinet\n", 66 | " (255, 187, 120), # bed\n", 67 | " (188, 189, 34), # chair\n", 68 | " (140, 86, 75), # sofa\n", 69 | " (255, 152, 150), # table\n", 70 | " (214, 39, 40), # door\n", 71 | " (197, 176, 213), # window\n", 72 | " (148, 103, 189), # bookshelf\n", 73 | " (196, 156, 148), # picture\n", 74 | " (23, 190, 207), # counter\n", 75 | " (178, 76, 76),\n", 76 | " (247, 182, 210), # desk\n", 77 | " (66, 188, 102),\n", 78 | " (219, 219, 141), # curtain\n", 79 | " (140, 57, 197),\n", 80 | " (202, 185, 52),\n", 81 | " (51, 176, 203),\n", 82 | " (200, 54, 131),\n", 83 | " (92, 193, 61),\n", 84 | " (78, 71, 183),\n", 85 | " (172, 114, 82),\n", 86 | " (255, 127, 14), # refrigerator\n", 87 | " (91, 163, 138),\n", 88 | " (153, 98, 156),\n", 89 | " (140, 153, 101),\n", 90 | " (158, 218, 229), # shower curtain\n", 91 | " (100, 125, 154),\n", 92 | " (178, 127, 135),\n", 93 | " (120, 185, 128),\n", 94 | " (146, 111, 194),\n", 95 | " (44, 160, 44), # toilet\n", 96 | " (112, 128, 144), # sink\n", 97 | " (96, 207, 209),\n", 98 | " (227, 119, 194), # bathtub\n", 99 | " (213, 92, 176),\n", 100 | " (94, 106, 211),\n", 101 | " (82, 84, 163), # otherfurn\n", 102 | " (100, 85, 144)\n", 103 | " ]\n", 104 | "\n", 105 | "\n", 106 | "def colorize_semantic_pointcloud(labels):\n", 107 | " colors = 255 * np.ones((labels.shape[0], 3))\n", 108 | " color_palette = np.asarray(create_color_palette())\n", 109 | "\n", 110 | " for l in np.unique(labels):\n", 111 | " colors[labels == l] = color_palette[l, :]\n", 112 | "\n", 113 | " return colors\n", 114 | "\n", 115 | "\n", 116 | "# color by label\n", 117 | "def visualize_label_image(image):\n", 118 | " height = image.shape[0]\n", 119 | " width = image.shape[1]\n", 120 | " vis_image = np.zeros([height, width, 3], dtype=np.uint8)\n", 121 | " color_palette = create_color_palette()\n", 122 | " for idx, color in enumerate(color_palette):\n", 123 | " vis_image[image == idx] = color\n", 124 | " return vis_image\n" 125 | ] 126 | }, 127 | { 128 | "cell_type": "code", 129 | "execution_count": null, 130 | "id": "763a433d", 131 | "metadata": {}, 132 | "outputs": [], 133 | "source": [ 134 | "# load mesh\n", 135 | "mesh = o3d.io.read_triangle_mesh(mesh_path)\n", 136 | "vertices = np.asarray(mesh.vertices)\n", 137 | "colors = np.asarray(mesh.vertex_colors)\n", 138 | "labels_3d = np.zeros((vertices.shape[0], 2000))\n" 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "execution_count": null, 144 | "id": "20493f14", 145 | "metadata": {}, 146 | "outputs": [], 147 | "source": [ 148 | "def project_pointcloud(points, pose, intrinsics):\n", 149 | "\n", 150 | " points_h = np.hstack((points, np.ones_like(points[:, 0:1])))\n", 151 | " points_c = np.linalg.inv(pose) @ points_h.T\n", 152 | " points_c = points_c.T\n", 153 | " points_p = intrinsics @ points_c.T\n", 154 | " points_p = points_p.T[:, :3]\n", 155 | "\n", 156 | " points_p[:, 0] /= (points_p[:, -1] + 1.e-6)\n", 157 | " points_p[:, 1] /= (points_p[:, -1] + 1.e-6)\n", 158 | "\n", 159 | " return points_p\n", 160 | "\n", 161 | "\n", 162 | "files = [f for f in os.listdir(label_path) if f.endswith('png')]\n", 163 | "files = sorted(files, key=lambda x: int(x.split('.')[0]))\n", 164 | "resize_image = False\n", 165 | "subsampling = 1\n", 166 | "\n", 167 | "for idx, file in tqdm(enumerate(files), total=len(files)):\n", 168 | "\n", 169 | " if idx % 10 != 0:\n", 170 | " continue\n", 171 | "\n", 172 | " frame_key = int(file.split('.')[0]) * subsampling\n", 173 | "\n", 174 | " image = np.asarray(Image.open(os.path.join(\n", 175 | " image_path, f'{frame_key}.jpg'))).astype(np.uint8)\n", 176 | " depth = np.asarray(Image.open(os.path.join(\n", 177 | " depth_path, f'{frame_key}.png'))).astype(np.float32) / 1000.\n", 178 | " labels = np.asarray(Image.open(os.path.join(label_path, file)))\n", 179 | "\n", 180 | " if resize_image:\n", 181 | " h, w = depth.shape\n", 182 | " image = cv2.resize(image, (w, h))\n", 183 | " labels = cv2.resize(labels, (w, h))\n", 184 | " else:\n", 185 | " h, w, _ = image.shape\n", 186 | " depth = cv2.resize(depth, (w, h))\n", 187 | "\n", 188 | " if not intrinsics_loaded:\n", 189 | " intrinsics = np.loadtxt(intrinsics_path + '/intrinsic_color.txt')\n", 190 | " # intrinsics = o3d.camera.PinholeCameraIntrinsic(width=w, height=h, fx=intrinsics[0, 0], fy=intrinsics[1, 1], cx=intrinsics[0, 2], cy=intrinsics[1, 2])\n", 191 | " intrinsics_loaded = False\n", 192 | "\n", 193 | " pose_file = os.path.join(pose_path, f'{frame_key}.txt')\n", 194 | " pose = np.loadtxt(pose_file)\n", 195 | "\n", 196 | " points_p = project_pointcloud(vertices, pose, intrinsics)\n", 197 | "\n", 198 | " xx = points_p[:, 0].astype(int)\n", 199 | " yy = points_p[:, 1].astype(int)\n", 200 | " zz = points_p[:, 2]\n", 201 | "\n", 202 | " valid_mask = (xx >= 0) & (yy >= 0) & (xx < w) & (yy < h)\n", 203 | "\n", 204 | " d = depth[yy[valid_mask], xx[valid_mask]]\n", 205 | "\n", 206 | " valid_mask[valid_mask] = (zz[valid_mask] > 0) & (np.abs(zz[valid_mask] - d)\n", 207 | " <= 0.1)\n", 208 | "\n", 209 | " image_rendered = np.zeros_like(image)\n", 210 | " image_rendered[yy[valid_mask], xx[valid_mask], :] = colors[valid_mask] * 255\n", 211 | " print(labels.shape)\n", 212 | " labels_2d = labels[yy[valid_mask], xx[valid_mask]]\n", 213 | " labels_3d[valid_mask, labels_2d] += 1\n", 214 | "\n", 215 | " fig, ax = plt.subplots(1, 3)\n", 216 | " ax[0].imshow(image)\n", 217 | " ax[1].imshow(image_rendered)\n", 218 | " ax[2].imshow(visualize_label_image(labels))\n", 219 | " plt.show()\n" 220 | ] 221 | }, 222 | { 223 | "cell_type": "code", 224 | "execution_count": null, 225 | "id": "4aa91cb5", 226 | "metadata": {}, 227 | "outputs": [], 228 | "source": [ 229 | "labels_3d = np.argmax(labels_3d, axis=-1)\n", 230 | "label_colors = colorize_semantic_pointcloud(labels_3d)\n" 231 | ] 232 | }, 233 | { 234 | "cell_type": "code", 235 | "execution_count": null, 236 | "id": "b20a4d9f", 237 | "metadata": {}, 238 | "outputs": [], 239 | "source": [ 240 | "from copy import deepcopy\n" 241 | ] 242 | }, 243 | { 244 | "cell_type": "code", 245 | "execution_count": null, 246 | "id": "ef45acd2", 247 | "metadata": {}, 248 | "outputs": [], 249 | "source": [ 250 | "label_colors = label_colors.astype(np.float32) / 255.\n", 251 | "mesh_colored = deepcopy(mesh)\n", 252 | "mesh_colored.vertex_colors = o3d.utility.Vector3dVector(label_colors)\n" 253 | ] 254 | }, 255 | { 256 | "cell_type": "code", 257 | "execution_count": null, 258 | "id": "37121f49", 259 | "metadata": {}, 260 | "outputs": [], 261 | "source": [ 262 | "o3d.io.write_triangle_mesh('label_mesh.ply', mesh_colored)\n" 263 | ] 264 | }, 265 | { 266 | "cell_type": "code", 267 | "execution_count": null, 268 | "id": "31f1878f", 269 | "metadata": {}, 270 | "outputs": [], 271 | "source": [ 272 | "tsdf = o3d.pipelines.integration.ScalableTSDFVolume(\n", 273 | " sdf_trunc=0.06,\n", 274 | " voxel_length=0.02,\n", 275 | " color_type=o3d.pipelines.integration.TSDFVolumeColorType.RGB8)\n", 276 | "\n", 277 | "intrinsics_loaded = False\n", 278 | "\n", 279 | "files = sorted(os.listdir(depth_path), key=lambda x: int(x.split('.')[0]))\n", 280 | "pcds = None\n", 281 | "resize_image = False\n", 282 | "\n", 283 | "for idx, file in tqdm(enumerate(files), total=len(files)):\n", 284 | "\n", 285 | " if idx not in [136, 137]:\n", 286 | " continue\n", 287 | "\n", 288 | " if not os.path.exists(os.path.join(image_path, file.replace('.png', '.jpg'))):\n", 289 | " print(file, 'not found')\n", 290 | " continue\n", 291 | "\n", 292 | " image = np.asarray(\n", 293 | " Image.open(os.path.join(image_path,\n", 294 | " file.replace('.png', '.jpg')))).astype(np.uint8)\n", 295 | " depth = np.asarray(Image.open(os.path.join(depth_path, file))).astype(\n", 296 | " np.float32) / 1000.\n", 297 | "\n", 298 | " if resize_image:\n", 299 | " h, w = depth.shape\n", 300 | " image = cv2.resize(image, (w, h))\n", 301 | " else:\n", 302 | " h, w, _, image.shape\n", 303 | " depth = cv2.resize(depth, (w, h))\n", 304 | "\n", 305 | " if not intrinsics_loaded:\n", 306 | " intrinsics = np.loadtxt(intrinsics_path + '/intrinsic_depth.txt')\n", 307 | " intrinsics = o3d.camera.PinholeCameraIntrinsic(width=w,\n", 308 | " height=h,\n", 309 | " fx=intrinsics[0, 0],\n", 310 | " fy=intrinsics[1, 1],\n", 311 | " cx=intrinsics[0, 2],\n", 312 | " cy=intrinsics[1, 2])\n", 313 | " intrinsics_loaded = False\n", 314 | "\n", 315 | " pose_file = os.path.join(pose_path, file.replace('.png', '.txt'))\n", 316 | " pose = np.loadtxt(pose_file)\n", 317 | "\n", 318 | " image = o3d.geometry.Image(image)\n", 319 | " depth = o3d.geometry.Image(depth)\n", 320 | "\n", 321 | " rgbd = o3d.geometry.RGBDImage.create_from_color_and_depth(\n", 322 | " image,\n", 323 | " depth,\n", 324 | " depth_scale=1.0,\n", 325 | " depth_trunc=3.,\n", 326 | " convert_rgb_to_intensity=False)\n", 327 | "\n", 328 | " if pcds is None:\n", 329 | "\n", 330 | " pcds = o3d.geometry.PointCloud.create_from_rgbd_image(\n", 331 | " rgbd, intrinsics, pose).voxel_down_sample(0.04)\n", 332 | "\n", 333 | " else:\n", 334 | " pcds = pcds + o3d.geometry.PointCloud.create_from_rgbd_image(\n", 335 | " rgbd, intrinsics, np.linalg.inv(pose))\n", 336 | " pcds = pcds.voxel_down_sample(0.01)\n", 337 | "\n", 338 | "o3d.io.write_point_cloud(f'{SCENE}.ply', pcds)\n" 339 | ] 340 | }, 341 | { 342 | "cell_type": "code", 343 | "execution_count": null, 344 | "id": "30a65e47", 345 | "metadata": {}, 346 | "outputs": [], 347 | "source": [] 348 | } 349 | ], 350 | "metadata": { 351 | "kernelspec": { 352 | "display_name": "scannetter", 353 | "language": "python", 354 | "name": "scannetter" 355 | }, 356 | "language_info": { 357 | "codemirror_mode": { 358 | "name": "ipython", 359 | "version": 3 360 | }, 361 | "file_extension": ".py", 362 | "mimetype": "text/x-python", 363 | "name": "python", 364 | "nbconvert_exporter": "python", 365 | "pygments_lexer": "ipython3", 366 | "version": "3.8.16" 367 | } 368 | }, 369 | "nbformat": 4, 370 | "nbformat_minor": 5 371 | } 372 | -------------------------------------------------------------------------------- /notebooks/visualization_lifting.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import cv2\n", 10 | "import os\n", 11 | "import glob\n", 12 | "\n", 13 | "import numpy as np\n", 14 | "import matplotlib.pyplot as plt\n", 15 | "\n", 16 | "from pathlib import Path\n", 17 | "from typing import List, Tuple\n", 18 | "from PIL import Image\n", 19 | "\n", 20 | "from labelmaker.consensus import VALID_LABEL_SPACES\n", 21 | "from labelmaker.label_data import get_nyu40, get_scannet200, get_wordnet, get_ade150" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": null, 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [ 30 | "SCENE_ID=47333462\n", 31 | "WORKSPACE_DIR = Path(f'/home/weders/scratch/scratch/LabelMaker/arkitscenes/{SCENE_ID}')\n", 32 | "# WORKSPACE_DIR = '/scratch/quanta/Experiments/LabelMaker/arkit_test_scene'\n", 33 | "LIFTING_DATA = WORKSPACE_DIR / 'intermediate/sdfstudio_preprocessing'\n" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": null, 39 | "metadata": {}, 40 | "outputs": [], 41 | "source": [ 42 | "files = glob.glob(os.path.join(LIFTING_DATA, '*_rgb.png'))\n", 43 | "files = sorted(files, key=lambda x: int(x.split('/')[-1].split('_')[0]))" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": null, 49 | "metadata": {}, 50 | "outputs": [], 51 | "source": [ 52 | "for idx, f in enumerate(files):\n", 53 | " if idx > 200:\n", 54 | " break\n", 55 | " \n", 56 | " \n", 57 | " image = Image.open(f)\n", 58 | " image = np.asarray(image)\n", 59 | " \n", 60 | " depth = np.load(f.replace('_rgb.png', '_sensor_depth.npy'))\n", 61 | "\n", 62 | " fig, ax = plt.subplots(1, 2)\n", 63 | " ax[0].imshow(image)\n", 64 | " ax[0].set_xticks([])\n", 65 | " ax[0].set_yticks([])\n", 66 | " ax[1].imshow(depth)\n", 67 | " ax[1].set_xticks([])\n", 68 | " ax[1].set_yticks([])\n", 69 | " plt.tight_layout()\n", 70 | " plt.show()" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": null, 76 | "metadata": {}, 77 | "outputs": [], 78 | "source": [] 79 | } 80 | ], 81 | "metadata": { 82 | "kernelspec": { 83 | "display_name": "labelmaker", 84 | "language": "python", 85 | "name": "python3" 86 | }, 87 | "language_info": { 88 | "codemirror_mode": { 89 | "name": "ipython", 90 | "version": 3 91 | }, 92 | "file_extension": ".py", 93 | "mimetype": "text/x-python", 94 | "name": "python", 95 | "nbconvert_exporter": "python", 96 | "pygments_lexer": "ipython3", 97 | "version": "3.9.18" 98 | } 99 | }, 100 | "nbformat": 4, 101 | "nbformat_minor": 2 102 | } 103 | -------------------------------------------------------------------------------- /notebooks/visualize_arkitscenes.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "scrolled": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "%load_ext autoreload\n", 12 | "%autoreload 2\n", 13 | "\n", 14 | "import matplotlib.pyplot as plt\n", 15 | "import numpy as np\n", 16 | "import pandas as pd\n", 17 | "import cv2\n", 18 | "import os\n", 19 | "import sys\n", 20 | "sys.path.append('../..')\n", 21 | "\n", 22 | "from PIL import Image\n", 23 | "from segmentation_tools.visualisation import draw_sem_seg, VisImage\n", 24 | "from segmentation_tools.label_data import get_ade150, get_nyu40, get_scannet_all, get_wordnet, get_replica\n", 25 | "from segmentation_tools.label_mappings import set_ids_according_to_names, \\\n", 26 | " match_scannet_ade150, \\\n", 27 | " match_scannet_nyu40, \\\n", 28 | " match_ade150_nyu40, \\\n", 29 | " match_scannet_wordnet199, \\\n", 30 | " LabelMatcher, \\\n", 31 | " set_colors\n", 32 | "from pathlib import Path\n", 33 | "import skimage\n", 34 | "\n", 35 | "# scene 42445991\n", 36 | "# frames:\n", 37 | "# sdfstudio_path: pred_sdfstudio_2023-08-02_230529\n", 38 | "\n", 39 | "# scene 42897688\n", 40 | "# frames: 0, 74\n", 41 | "# sdfstudio_path: pred_sdfstudio_2023-08-02_230607\n", 42 | "\n", 43 | "scene = 42897688\n", 44 | "key = 50\n", 45 | "sdfstudio_path = 'pred_sdfstudio_2023-08-02_230607'\n", 46 | "\n", 47 | "scene_dir = Path(f'/home/weders/scratch/scratch/scannetter/arkit/raw/Validation/{scene}')\n", 48 | "img = cv2.imread(f'{scene_dir}/color/{key}.jpg')[..., ::-1]\n", 49 | "\n", 50 | "# label = cv2.imread(f'{scene_dir}/label-filt/{key}.png',\n", 51 | "# cv2.IMREAD_UNCHANGED)\n", 52 | "# our_label = cv2.imread(f'{scene_dir}/label_agile3d/{key}.png', cv2.IMREAD_UNCHANGED)\n", 53 | "\n", 54 | "consensus = cv2.imread(f'{scene_dir}/pred_consensus_noscannet_new/{key}.png',\n", 55 | " cv2.IMREAD_UNCHANGED)\n", 56 | "# sdfstudio = cv2.imread(f'{scene_dir}/{sdfstudio_path}/{(key):05d}.png',\n", 57 | "# cv2.IMREAD_UNCHANGED)\n", 58 | "\n", 59 | "\n", 60 | "\n", 61 | "classid2wn = {x['id']: x['name'] for x in get_wordnet(label_key='wn199-merged-v2')}\n", 62 | "scannet_id_to_name = {x['id'] : x['name'] for x in get_scannet_all()}\n", 63 | "scannet_id_to_color = {x['id'] : x['color'] for x in get_scannet_all()}\n", 64 | "\n", 65 | "\n", 66 | "keys = sorted(\n", 67 | " int(x.name.split('.')[0])\n", 68 | " for x in (scene_dir / 'color_old').iterdir())\n", 69 | "label_template = 'label-filt/{k}.png'\n", 70 | "label_space = 'id'\n", 71 | "\n", 72 | "\n", 73 | "vis_labelmaker = VisImage(img)\n", 74 | "draw_sem_seg(sdfstudio, vis_labelmaker, \n", 75 | " classes=[x['name'] for x in sorted(get_wordnet(), key=lambda x: x['id'])],\n", 76 | " colors=[x['color'] for x in sorted(get_wordnet(), key=lambda x: x['id'])])\n", 77 | "\n", 78 | "vis_consensus = VisImage(img)\n", 79 | "draw_sem_seg(consensus, vis_consensus, \n", 80 | " classes=[x['name'] for x in sorted(get_wordnet(), key=lambda x: x['id'])],\n", 81 | " colors=[x['color'] for x in sorted(get_wordnet(), key=lambda x: x['id'])])\n", 82 | "\n", 83 | "\n", 84 | "_, plots = plt.subplots(1, 2, figsize=(20, 10))\n", 85 | "\n", 86 | "plots[0].imshow(img)\n", 87 | "plots[0].axis('off')\n", 88 | "plots[0].set_title('LabelMaker3D')\n", 89 | "\n", 90 | "\n", 91 | "# plots[1].imshow(vis_labelmaker.get_image())\n", 92 | "# plots[1].axis('off')\n", 93 | "# plots[1].set_title('LabelMaker3D')\n", 94 | "\n", 95 | "plots[1].imshow(vis_consensus.get_image())\n", 96 | "plots[1].axis('off')\n", 97 | "plots[1].set_title('Consensus')\n", 98 | "plt.show()\n", 99 | "\n", 100 | "\n", 101 | "plt.figure(figsize=(20, 10))\n", 102 | "plt.imshow(img)\n", 103 | "plt.gca().axis('off')\n", 104 | "plt.show()\n", 105 | "\n", 106 | "# plt.figure(figsize=(20, 10))\n", 107 | "# plt.imshow(vis_labelmaker.get_image())\n", 108 | "# plt.gca().axis('off')\n", 109 | "# plt.show()\n", 110 | "\n", 111 | "plt.figure(figsize=(20, 10))\n", 112 | "plt.imshow(vis_consensus.get_image())\n", 113 | "plt.gca().axis('off')\n", 114 | "plt.show()\n", 115 | "\n" 116 | ] 117 | }, 118 | { 119 | "cell_type": "code", 120 | "execution_count": null, 121 | "metadata": { 122 | "scrolled": false 123 | }, 124 | "outputs": [], 125 | "source": [ 126 | "# iterate over all frames for scene\n", 127 | "\n", 128 | "scene = '42897688'\n", 129 | "scene_dir = Path(\n", 130 | " f'/home/weders/scratch/scratch/scannetter/arkit/raw/Validation/{scene}')\n", 131 | "# sdfstudio_pred = 'pred_sdfstudio_2023-08-02_230529'\n", 132 | "# sdfstudio_pred = 'pred_sdfstudio_2023-08-02_230607'\n", 133 | "keys = sorted([\n", 134 | " int(k.split('/')[-1].replace('.jpg', ''))\n", 135 | " for k in os.listdir((scene_dir / 'color'))\n", 136 | "])\n", 137 | "# keys_rendering = sorted([int(k.split('/')[-1].replace('.png', '')) for k in os.listdir((scene_dir / sdfstudio_pred))])\n", 138 | "\n", 139 | "rgb_keys = sorted([\n", 140 | " round(float(k.split('_')[-1].replace('.png', '')), 3)\n", 141 | " for k in os.listdir((scene_dir / 'vga_wide'))\n", 142 | "])\n", 143 | "depth_keys = sorted([\n", 144 | " round(float(k.split('_')[-1].replace('.png', '')), 3)\n", 145 | " for k in os.listdir((scene_dir / 'highres_depth'))\n", 146 | "])\n", 147 | "print(depth_keys)\n", 148 | "\n", 149 | "\n", 150 | "def get_rgb_key(depth_key):\n", 151 | " for cj, k in enumerate(rgb_keys):\n", 152 | " if k >= depth_key:\n", 153 | " key_before = rgb_keys[cj - 1]\n", 154 | " key_after = k\n", 155 | " break\n", 156 | "\n", 157 | " delta_before = abs(key_before - depth_key)\n", 158 | " delta_after = abs(key_after - depth_key)\n", 159 | "\n", 160 | " if delta_before >= delta_after:\n", 161 | " return key_after\n", 162 | " else:\n", 163 | " return key_before\n", 164 | "\n", 165 | "\n", 166 | "for idx, key in enumerate(keys):\n", 167 | " print(idx, key)\n", 168 | "\n", 169 | " # rgb_key = get_rgb_key(depth_keys[idx])\n", 170 | " # print(round(rgb_key, 3))\n", 171 | " if idx % 2 != 0:\n", 172 | " continue\n", 173 | "\n", 174 | "# # print(f'{scene}_{rgb_key:.3f}.jpg')\n", 175 | "# key_rendering = keys_rendering[idx // 2]\n", 176 | "\n", 177 | " img = cv2.imread(f'{scene_dir}/color/{key}.jpg')[..., ::-1]\n", 178 | " # img = cv2.imread(f'{scene_dir}/vga_wide/{scene}_{rgb_key:.3f}.png')[..., ::-1]\n", 179 | "\n", 180 | " # label = cv2.imread(f'{scene_dir}/label-filt/{key}.png',\n", 181 | " # cv2.IMREAD_UNCHANGED)\n", 182 | " # our_label = cv2.imread(f'{scene_dir}/label_agile3d/{key}.png', cv2.IMREAD_UNCHANGED)\n", 183 | "\n", 184 | " consensus = cv2.imread(f'{scene_dir}/pred_consensus_noscannet_new/{key}.png',\n", 185 | " cv2.IMREAD_UNCHANGED)\n", 186 | " # sdfstudio = cv2.imread(f'{scene_dir}/{sdfstudio_pred}/{(key):05d}.png',\n", 187 | " # cv2.IMREAD_UNCHANGED)\n", 188 | "\n", 189 | " classid2wn = {x['id']: x['name'] for x in get_wordnet()}\n", 190 | " scannet_id_to_name = {x['id']: x['name'] for x in get_scannet_all()}\n", 191 | " scannet_id_to_color = {x['id']: x['color'] for x in get_scannet_all()}\n", 192 | "\n", 193 | " classes = [\n", 194 | " x['name'] for x in sorted(get_wordnet(label_key='wn199-merged-v2'),\n", 195 | " key=lambda x: x['id'])\n", 196 | " ]\n", 197 | "\n", 198 | " keys = sorted(\n", 199 | " int(x.name.split('.')[0]) for x in (scene_dir / 'color').iterdir())\n", 200 | " label_template = 'label-filt/{k}.png'\n", 201 | " label_space = 'id'\n", 202 | " # plt.figure(figsize=(20, 10))\n", 203 | " # plt.imshow(img)\n", 204 | " # plt.gca().axis('off')\n", 205 | " # plt.show()\n", 206 | " _, plots = plt.subplots(1, 3, figsize=(40, 10))\n", 207 | "\n", 208 | " plots[0].imshow(img)\n", 209 | " plots[0].axis('off')\n", 210 | " plots[0].set_title('Image')\n", 211 | "\n", 212 | " # vis = VisImage(img)\n", 213 | " # draw_sem_seg(our_label, vis,\n", 214 | " # classes={x['id']: x['name'] for x in sorted(get_wordnet(label_key='wn199-merged-v2'), key=lambda x: x['id'])},\n", 215 | " # colors=[x['color'] for x in sorted(get_wordnet(label_key='wn199-merged-v2'), key=lambda x: x['id'])])\n", 216 | " # plots[1].imshow(vis.get_image())\n", 217 | " # plots[1].axis('off')\n", 218 | " # plots[1].set_title('Ground Truth')\n", 219 | " # vis = VisImage(img)\n", 220 | " # draw_sem_seg(label, vis,\n", 221 | " # classes=[scannet_id_to_name[i] if i in scannet_id_to_name else 'unknown' for i in range(2000)],\n", 222 | " # colors=[scannet_id_to_color[i] if i in scannet_id_to_name else [0, 0, 0] for i in range(2000)],)\n", 223 | " # plots[2].imshow(vis.get_image())\n", 224 | " # plots[2].axis('off')\n", 225 | " # plots[2].set_title('ScanNet')\n", 226 | " vis = VisImage(img)\n", 227 | " draw_sem_seg(consensus,\n", 228 | " vis,\n", 229 | " classes=[\n", 230 | " x['name']\n", 231 | " for x in sorted(get_wordnet(label_key='wn199-merged-v2'),\n", 232 | " key=lambda x: x['id'])\n", 233 | " ],\n", 234 | " colors=[\n", 235 | " x['color']\n", 236 | " for x in sorted(get_wordnet(label_key='wn199-merged-v2'),\n", 237 | " key=lambda x: x['id'])\n", 238 | " ])\n", 239 | " plots[1].imshow(vis.get_image())\n", 240 | " plots[1].axis('off')\n", 241 | " plots[1].set_title('LabelMaker3D')\n", 242 | " vis = VisImage(img)\n", 243 | " # draw_sem_seg(consensus, vis,\n", 244 | " # classes=[x['name'] for x in sorted(get_wordnet(label_key='wn199-merged-v2'), key=lambda x: x['id'])],\n", 245 | " # colors=[x['color'] for x in sorted(get_wordnet(label_key='wn199-merged-v2'), key=lambda x: x['id'])])\n", 246 | " #plots[2].imshow(vis.get_image())\n", 247 | " #plots[2].axis('off')\n", 248 | " #plots[2].set_title('Consensus')\n", 249 | " #plt.tight_layout()\n", 250 | " plt.show()\n" 251 | ] 252 | }, 253 | { 254 | "cell_type": "code", 255 | "execution_count": null, 256 | "metadata": { 257 | "scrolled": true 258 | }, 259 | "outputs": [], 260 | "source": [ 261 | "# colorize labelmaker arkit pointcloud\n", 262 | "# iterate over all frames for scene\n", 263 | "\n", 264 | "import open3d as o3d\n", 265 | "\n", 266 | "scenes = ['42445991', '42446527', '42897688']\n", 267 | "os.makedirs('colored_meshes', exist_ok=True)\n", 268 | "for sc in scenes:\n", 269 | " scene_dir = Path(\n", 270 | " f'/home/weders/scratch/scratch/scannetter/arkit/raw/Validation/{sc}')\n", 271 | "\n", 272 | " label_file = next(\n", 273 | " iter([f for f in scene_dir.iterdir() if 'labels_3d' in str(f)]))\n", 274 | " mesh_file = next(iter([f for f in scene_dir.iterdir() if '.ply' in str(f)]))\n", 275 | " mesh = o3d.io.read_triangle_mesh(str(mesh_file))\n", 276 | "\n", 277 | " labels_3d = np.loadtxt(label_file)\n", 278 | " mesh_colors = np.asarray(mesh.vertex_colors)\n", 279 | " colors = np.zeros_like(mesh_colors)\n", 280 | "\n", 281 | " id_to_color = {\n", 282 | " x['id']: x['color'] for x in sorted(get_wordnet(), key=lambda x: x['id'])\n", 283 | " }\n", 284 | "\n", 285 | " for l in np.unique(labels_3d):\n", 286 | " colors[labels_3d == l] = id_to_color[int(l)]\n", 287 | "\n", 288 | " colors = colors / 255.\n", 289 | " mesh.vertex_colors = o3d.utility.Vector3dVector(colors)\n", 290 | " o3d.io.write_triangle_mesh(f'colored_meshes/{sc}_label_color.ply', mesh)\n" 291 | ] 292 | }, 293 | { 294 | "cell_type": "code", 295 | "execution_count": null, 296 | "metadata": {}, 297 | "outputs": [], 298 | "source": [ 299 | "# colorize mask3d meshes\n", 300 | "scenes = ['42445991', '42446527', '42897688']\n", 301 | "\n", 302 | "for sc in scenes:\n", 303 | " scene_dir = Path(\n", 304 | " f'/home/weders/scratch/scratch/scannetter/arkit/raw/Validation/{sc}')\n", 305 | " mask3d_dir = scene_dir / 'pred_mask3d'\n", 306 | "\n", 307 | " pred_file = next(\n", 308 | " iter([f for f in mask3d_dir.iterdir() if '3dod_mesh.txt' in str(f)]))\n", 309 | "\n", 310 | " mask_paths = []\n", 311 | " confidences = []\n", 312 | " classes = []\n", 313 | "\n", 314 | " with open(pred_file, 'r') as file:\n", 315 | " for line in file:\n", 316 | " m_file, l, c = line.rstrip().split(' ')\n", 317 | " mask_paths.append(m_file)\n", 318 | " classes.append(int(l))\n", 319 | " confidences.append(float(c))\n", 320 | "\n", 321 | " sorting_indices = np.argsort(np.asarray(confidences))[::-1]\n", 322 | "\n", 323 | " mesh_file = next(iter([f for f in scene_dir.iterdir() if '.ply' in str(f)]))\n", 324 | " mesh = o3d.io.read_triangle_mesh(str(mesh_file))\n", 325 | " mesh_colors = np.asarray(mesh.vertex_colors)\n", 326 | " colors = np.zeros_like(mesh_colors)\n", 327 | " colored_mask = np.zeros_like(colors[:, 0])\n", 328 | " id_to_color = {\n", 329 | " x['id']: x['color'] for x in sorted(get_wordnet(), key=lambda x: x['id'])\n", 330 | " }\n", 331 | "\n", 332 | " for idx in sorting_indices:\n", 333 | " m = np.loadtxt(mask3d_dir / mask_paths[idx])\n", 334 | " l = classes[idx]\n", 335 | "\n", 336 | " m = (m == 1) & (colored_mask == 0)\n", 337 | " colored_mask[m] = 1\n", 338 | "\n", 339 | " colors[m] = scannet_id_to_color[l]\n", 340 | "\n", 341 | " colors = colors / 255.\n", 342 | " mesh.vertex_colors = o3d.utility.Vector3dVector(colors)\n", 343 | " o3d.io.write_triangle_mesh(f'colored_meshes/{sc}_mask3d_color.ply', mesh)\n" 344 | ] 345 | }, 346 | { 347 | "cell_type": "code", 348 | "execution_count": null, 349 | "metadata": {}, 350 | "outputs": [], 351 | "source": [] 352 | } 353 | ], 354 | "metadata": { 355 | "kernelspec": { 356 | "display_name": "scannetter", 357 | "language": "python", 358 | "name": "scannetter" 359 | }, 360 | "language_info": { 361 | "codemirror_mode": { 362 | "name": "ipython", 363 | "version": 3 364 | }, 365 | "file_extension": ".py", 366 | "mimetype": "text/x-python", 367 | "name": "python", 368 | "nbconvert_exporter": "python", 369 | "pygments_lexer": "ipython3", 370 | "version": "3.8.16" 371 | }, 372 | "vscode": { 373 | "interpreter": { 374 | "hash": "916dbcbb3f70747c44a77c7bcd40155683ae19c65e1c03b4aa3499c5328201f1" 375 | } 376 | } 377 | }, 378 | "nbformat": 4, 379 | "nbformat_minor": 2 380 | } 381 | -------------------------------------------------------------------------------- /scripts/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cvg/LabelMaker/44ff92d42ae9b0579f016bb7bc5cd4bd09a613a3/scripts/__init__.py -------------------------------------------------------------------------------- /scripts/arkitscenes2labelmaker.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import json 3 | import logging 4 | import os 5 | import shutil 6 | import sys 7 | from os.path import abspath, dirname, exists, join 8 | 9 | import cv2 10 | import gin 11 | import numpy as np 12 | from PIL import Image 13 | from scipy.interpolate import CubicSpline 14 | from scipy.spatial.transform import Rotation, RotationSpline 15 | from tqdm import trange 16 | 17 | sys.path.append(abspath(join(dirname(__file__), '..'))) 18 | from utils_3d import fuse_mesh 19 | 20 | 21 | def get_closest_timestamp(reference_timestamps: np.ndarray, 22 | target_timestamps: np.ndarray): 23 | """ 24 | This function returns: 25 | min_time_delta: for each time in reference_timetamps, the minimum time difference (dt) w.r.t target_timestamps 26 | target_index: the index of element in target_timestamps that gives minimum dt 27 | minimum_margin: the time difference of minimum timestamps and second minimum, used for checking uniqueness of minima 28 | """ 29 | time_delta = np.abs( 30 | reference_timestamps.reshape(-1, 1) - target_timestamps.reshape(1, -1)) 31 | 32 | min_two_idx = time_delta.argsort(axis=1)[:, :2] 33 | target_index = min_two_idx[:, 0] 34 | min_time_delta = time_delta[np.arange(target_index.shape[0]), target_index] 35 | minimum_margin = time_delta[np.arange(target_index.shape[0]), 36 | min_two_idx[:, 1]] - min_time_delta 37 | 38 | return min_time_delta, target_index, minimum_margin 39 | 40 | 41 | def load_intrinsics(file): 42 | # as define here https://github.com/apple/ARKitScenes/blob/951af73d20406acf608061c16774f770c61b1405/threedod/benchmark_scripts/utils/tenFpsDataLoader.py#L46 43 | w, h, fx, fy, hw, hh = np.loadtxt(file) 44 | return np.asarray([[fx, 0, hw], [0, fy, hh], [0, 0, 1]]) 45 | 46 | 47 | @gin.configurable 48 | def process_arkit( 49 | scan_dir: str, 50 | target_dir: str, 51 | sdf_trunc: float, 52 | voxel_length: float, 53 | depth_trunc: float, 54 | ): 55 | 56 | logger = logging.getLogger('ARKitProcess') 57 | logger.setLevel(logging.DEBUG) 58 | consoleHeader = logging.StreamHandler() 59 | formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') 60 | consoleHeader.setFormatter(formatter) 61 | logger.addHandler(consoleHeader) 62 | 63 | logger.info( 64 | "Processing ARKitScene scan to LabelMaker format, from {} to {}...". 65 | format(scan_dir, target_dir)) 66 | 67 | color_dir = join(scan_dir, 'vga_wide') 68 | intrinsic_dir = join(scan_dir, 'vga_wide_intrinsics') 69 | 70 | depth_dir = join(scan_dir, 'lowres_depth') 71 | confidence_dir = join(scan_dir, 'confidence') 72 | 73 | trajectory_file = join(scan_dir, 'lowres_wide.traj') 74 | 75 | assert exists(color_dir), "vga_wide attribute not downloaded!" 76 | assert exists(depth_dir), "lowres_depth attribute not downloaded!" 77 | assert exists(confidence_dir), "confidence attribute not downloaded!" 78 | assert exists(intrinsic_dir), "vga_wide_intrinsics attribute not downloaded!" 79 | assert exists(trajectory_file), "lowres_wide.traj attribute not downloaded!" 80 | 81 | color_file_list = os.listdir(color_dir) 82 | depth_file_list = os.listdir(depth_dir) 83 | confidence_file_list = os.listdir(confidence_dir) 84 | intr_file_list = os.listdir(intrinsic_dir) 85 | 86 | # ts stands for timestamps, inv stands for inverse 87 | color_ts, color_inv = np.unique( 88 | np.array([ 89 | float(name.split('_')[1].split('.png')[0]) for name in color_file_list 90 | ]), 91 | return_index=True, 92 | ) 93 | depth_ts, depth_inv = np.unique( 94 | np.array([ 95 | float(name.split('_')[1].split('.png')[0]) for name in depth_file_list 96 | ]), 97 | return_index=True, 98 | ) 99 | confidence_ts, confidence_inv = np.unique( 100 | np.array([ 101 | float(name.split('_')[1].split('.png')[0]) 102 | for name in confidence_file_list 103 | ]), 104 | return_index=True, 105 | ) 106 | intrinsic_ts, intrinsic_inv = np.unique( 107 | np.array([ 108 | float(name.split('_')[1].split('.pincam')[0]) 109 | for name in intr_file_list 110 | ]), 111 | return_index=True, 112 | ) 113 | 114 | # load trajactory 115 | trajectory_data = np.loadtxt(trajectory_file, delimiter=' ') 116 | trajectory_ts = trajectory_data[:, 0] # already sorted 117 | 118 | # synchronization 119 | logger.info("Synchronizing timestamps...") 120 | dt_max = 1 / 60 / 2 # half of frame time step 121 | 122 | # we compare all with respect to color, as color folder is sparser 123 | # if the matched timestamp and second matched timestamp have difference less than 1 milisecond, 124 | # we regard this case as the matching is not unique, and throw a warning. 125 | margin_threshold = 1e-3 126 | depth_dt, depth_idx, depth_margin = get_closest_timestamp(color_ts, depth_ts) 127 | if depth_margin.min() < margin_threshold: 128 | logger.warn( 129 | "Found multiple color timestamps matching in timestamps: {}".format( 130 | color_ts[depth_margin < margin_threshold].tolist())) 131 | 132 | confidence_dt, confidence_idx, confidence_margin = get_closest_timestamp( 133 | color_ts, confidence_ts) 134 | if confidence_margin.min() < margin_threshold: 135 | logger.warn( 136 | "Found multiple confidence timestamps matching in timestamps: {}". 137 | format(color_ts[confidence_margin < margin_threshold].tolist())) 138 | 139 | intrinsic_dt, intrinsic_idx, intrinsic_margin = get_closest_timestamp( 140 | color_ts, intrinsic_ts) 141 | if intrinsic_margin.min() < margin_threshold: 142 | logger.warn( 143 | "Found multiple intrinsic timestamps matching in timestamps: {}".format( 144 | color_ts[intrinsic_margin < margin_threshold].tolist())) 145 | 146 | color_idx = np.arange(color_ts.shape[0]) 147 | 148 | # we also want to interpolate pose, so we have to filter out times outside trajectory timestamp 149 | timestamp_filter = (depth_dt < dt_max) * (confidence_dt < dt_max) * ( 150 | intrinsic_dt < dt_max) * (color_ts >= trajectory_ts.min()) * ( 151 | color_ts <= trajectory_ts.max()) 152 | 153 | timestamp = color_ts[timestamp_filter] 154 | logger.info("Synchronization finished!") 155 | 156 | if depth_dt[timestamp_filter].max( 157 | ) > 1e-8 or confidence_dt[timestamp_filter].max( 158 | ) > 1e-8 or intrinsic_dt[timestamp_filter].max() > 1e-8: 159 | 160 | depth_unmatched = depth_dt[timestamp_filter].max() > 1e-8 161 | intrinsic_unmatched = intrinsic_dt[timestamp_filter].max() > 1e-8 162 | confidence_unmatched = confidence_dt[timestamp_filter].max() > 1e-8 163 | 164 | unmatched_timestamp = timestamp[depth_unmatched + intrinsic_unmatched + 165 | confidence_unmatched].tolist() 166 | logger.info("There are not perfectly matched timestamps: {}".format( 167 | unmatched_timestamp)) 168 | 169 | # interpolate pose 170 | logger.info("Interpolating poses...") 171 | rots = Rotation.from_rotvec(trajectory_data[:, 1:4]) 172 | rot_spline = RotationSpline(trajectory_ts, rots) 173 | 174 | x_spline = CubicSpline(trajectory_ts, trajectory_data[:, 4]) 175 | y_spline = CubicSpline(trajectory_ts, trajectory_data[:, 5]) 176 | z_spline = CubicSpline(trajectory_ts, trajectory_data[:, 6]) 177 | 178 | num_frame = timestamp_filter.sum() 179 | 180 | extrinsics_mat = np.zeros(shape=(num_frame, 4, 4)) 181 | extrinsics_mat[:, 3, 3] = 1.0 182 | extrinsics_mat[:, :3, :3] = rot_spline(timestamp).as_matrix() 183 | extrinsics_mat[:, :3, 3] = np.stack( 184 | [x_spline(timestamp), 185 | y_spline(timestamp), 186 | z_spline(timestamp)], axis=1) 187 | pose_mat = np.linalg.inv(extrinsics_mat) 188 | logger.info("Pose interpolation finished!") 189 | 190 | # get correspondence to original file 191 | rows = [] 192 | for i in range(num_frame): 193 | frame_id = '{:06d}'.format(i) 194 | color_pth = color_file_list[color_inv[color_idx[timestamp_filter][i]]] 195 | depth_pth = depth_file_list[depth_inv[depth_idx[timestamp_filter][i]]] 196 | confdc_pth = confidence_file_list[confidence_inv[ 197 | confidence_idx[timestamp_filter][i]]] 198 | intr_pth = intr_file_list[intrinsic_inv[intrinsic_idx[timestamp_filter][i]]] 199 | rows.append([frame_id, color_pth, depth_pth, confdc_pth, intr_pth]) 200 | 201 | # write to new file 202 | shutil.rmtree(target_dir, ignore_errors=True) 203 | os.makedirs(target_dir, exist_ok=True) 204 | os.makedirs(join(target_dir, 'color'), exist_ok=True) 205 | os.makedirs(join(target_dir, 'depth'), exist_ok=True) 206 | os.makedirs(join(target_dir, 'intrinsic'), exist_ok=True) 207 | os.makedirs(join(target_dir, 'pose'), exist_ok=True) 208 | 209 | # first write correspondence list 210 | fields = [ 211 | 'frame_id', 'original_color_path', 'original_depth_path', 212 | 'original_confidence_path', 'original_intrinsic_path' 213 | ] 214 | correspondence_list = [dict(zip(fields, row)) for row in rows] 215 | json_object = json.dumps(correspondence_list, indent=4) 216 | with open(join(target_dir, 'correspondence.json'), 'w') as jsonfile: 217 | jsonfile.write(json_object) 218 | logger.info("Saved old and new files correspondence to {}.".format( 219 | join(target_dir, 'correspondence.json'))) 220 | 221 | logger.info("Transfering files...") 222 | for idx in trange(num_frame): 223 | frame_id, color_pth, depth_pth, confdc_pth, intr_pth = rows[idx] 224 | 225 | # save color 226 | tgt_color_pth = join(target_dir, 'color', 227 | frame_id + '.jpg') # png -> jpg, compressed 228 | color_img = Image.open(join(color_dir, color_pth)) 229 | color_img.save(tgt_color_pth) 230 | h, w, _ = np.asarray(color_img).shape 231 | 232 | # save pose 233 | tgt_pose_pth = join(target_dir, 'pose', frame_id + '.txt') 234 | np.savetxt(tgt_pose_pth, pose_mat[idx]) 235 | 236 | # process and save intr 237 | tgt_intrinsic_pth = join(target_dir, 'intrinsic', frame_id + '.txt') 238 | np.savetxt(tgt_intrinsic_pth, load_intrinsics(join(intrinsic_dir, 239 | intr_pth))) 240 | 241 | # process and save depth 242 | depth = cv2.imread(join(depth_dir, depth_pth), cv2.IMREAD_UNCHANGED) 243 | confdc = cv2.imread(join(confidence_dir, confdc_pth), cv2.IMREAD_UNCHANGED) 244 | 245 | depth[confdc < 2] = 0 246 | depth = cv2.resize(depth, (w, h), interpolation=cv2.INTER_NEAREST) 247 | 248 | tgt_depth_pth = join(target_dir, 'depth', frame_id + '.png') 249 | cv2.imwrite(tgt_depth_pth, depth) 250 | 251 | logger.info("File transfer finished!") 252 | 253 | logger.info("Fusing RGBD images into TSDF Volmue...") 254 | fuse_mesh( 255 | scan_dir=target_dir, 256 | sdf_trunc=sdf_trunc, 257 | voxel_length=voxel_length, 258 | depth_trunc=depth_trunc, 259 | depth_scale=1000.0, 260 | ) # depth_scale is a fixed value in ARKitScene, no need to pass an argument in cli 261 | logger.info("Fusion finished! Saving to file as {}".format( 262 | join(target_dir, 'mesh.ply'))) 263 | 264 | 265 | def arg_parser(): 266 | parser = argparse.ArgumentParser() 267 | parser.add_argument("--scan_dir", type=str) 268 | parser.add_argument("--target_dir", type=str) 269 | parser.add_argument("--sdf_trunc", type=float, default=0.04) 270 | parser.add_argument("--voxel_length", type=float, default=0.008) 271 | parser.add_argument("--depth_trunc", type=float, default=3.0) 272 | parser.add_argument('--config', help='Name of config file') 273 | 274 | return parser.parse_args() 275 | 276 | 277 | if __name__ == "__main__": 278 | args = arg_parser() 279 | if args.config is not None: 280 | gin.parse_config_file(args.config) 281 | process_arkit( 282 | scan_dir=args.scan_dir, 283 | target_dir=args.target_dir, 284 | sdf_trunc=args.sdf_trunc, 285 | voxel_length=args.voxel_length, 286 | depth_trunc=args.depth_trunc, 287 | ) 288 | -------------------------------------------------------------------------------- /scripts/pipeline.sh: -------------------------------------------------------------------------------- 1 | # this code includes running the whole labelmaker pipeline, including preprocessing from arkitscene to our custom format, run all individual models and then concensus and then 2D and 3D lifting. 2 | # downloading is not included 3 | # this bash file is not meant for modification into other dataset, needs further modification 4 | # nor is it capable of tuning configuration, yet 5 | env_name=labelmaker 6 | eval "$(conda shell.bash hook)" 7 | conda activate $env_name 8 | 9 | conda_home="$(conda info | grep "active env location : " | cut -d ":" -f2-)" 10 | conda_home="${conda_home#"${conda_home%%[![:space:]]*}"}" 11 | 12 | echo $conda_home 13 | 14 | which python 15 | which pip 16 | which nvcc 17 | 18 | # add cuda compiler to path 19 | export CUDA_HOST_COMPILER="$conda_home/bin/gcc" 20 | export CUDA_PATH="$conda_home" 21 | export CUDA_HOME=$CUDA_PATH 22 | export LD_LIBRARY_PATH=$conda_home/lib:$LD_LIBRARY_PATH 23 | export LIBRARY_PATH="$conda_home/lib/stubs:$LIBRARY_PATH" 24 | export TCNN_CUDA_ARCHITECTURES=75 25 | 26 | if [ -z "$1" ]; then 27 | echo "No target directory specified!" 28 | exit 1 29 | else 30 | target_dir=$1 31 | fi 32 | 33 | # extract mask3D 34 | python models/mask3d_inst.py \ 35 | --seed 42 \ 36 | --workspace ${target_dir} 37 | 38 | python models/mask3d_inst.py \ 39 | --seed 43 \ 40 | --output intermediate/scannet200_mask3d_2 \ 41 | --workspace ${target_dir} 42 | 43 | # extract omnidata normal 44 | python models/omnidata_normal.py \ 45 | --workspace ${target_dir} 46 | 47 | python models/omnidata_depth.py \ 48 | --workspace ${target_dir} 49 | 50 | # extract hha depth, higher jobs may lead to failure 51 | python models/hha_depth.py \ 52 | --n_jobs 4 \ 53 | --workspace ${target_dir} 54 | 55 | # cmx 56 | python models/cmx.py \ 57 | --workspace ${target_dir} 58 | 59 | python models/cmx.py --flip \ 60 | --workspace ${target_dir} 61 | 62 | # internimage 63 | python models/internimage.py \ 64 | --workspace ${target_dir} 65 | 66 | python models/internimage.py --flip \ 67 | --workspace ${target_dir} 68 | 69 | # grounded sam 70 | python models/grounded_sam.py \ 71 | --workspace ${target_dir} 72 | 73 | python models/grounded_sam.py --flip \ 74 | --workspace ${target_dir} 75 | 76 | # ovseg 77 | python models/ovseg.py \ 78 | --workspace ${target_dir} 79 | 80 | python models/ovseg.py --flip \ 81 | --workspace ${target_dir} 82 | 83 | # consensus 84 | python labelmaker/consensus.py \ 85 | --workspace ${target_dir} --n_jobs 8 86 | 87 | # point lifting 88 | python labelmaker/lifting_3d/lifting_points.py \ 89 | --workspace ${target_dir} 90 | 91 | conda deactivate 92 | 93 | # 3D lifting, mesh extracting, and rendering 94 | bash labelmaker/lifting_3d/lifting.sh ${target_dir} 95 | -------------------------------------------------------------------------------- /scripts/pipeline_arkit.sh: -------------------------------------------------------------------------------- 1 | set -e 2 | 3 | env_name=labelmaker 4 | eval "$(conda shell.bash hook)" 5 | conda activate $env_name 6 | 7 | if [ -z "$1" ]; then 8 | echo "No ARKitScene directory specified!" 9 | exit 1 10 | else 11 | original_dir=$1 12 | fi 13 | 14 | if [ -z "$2" ]; then 15 | echo "No target directory specified!" 16 | exit 1 17 | else 18 | target_dir=$2 19 | fi 20 | 21 | # preprocessing 22 | python scripts/arkitscenes2labelmaker.py \ 23 | --scan_dir ${original_dir} \ 24 | --target_dir ${target_dir} 25 | 26 | # now run pipeline.sh 27 | scripts/pipeline.sh ${target_dir} 28 | 29 | -------------------------------------------------------------------------------- /scripts/replica2labelmaker.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import json 3 | import logging 4 | import math 5 | import os 6 | import shutil 7 | import sys 8 | from os.path import abspath, dirname, exists, join 9 | from pathlib import Path 10 | 11 | import cv2 12 | import gin 13 | import numpy as np 14 | from PIL import Image 15 | from scipy.interpolate import CubicSpline 16 | from scipy.spatial.transform import Rotation, RotationSpline 17 | from tqdm import trange 18 | 19 | sys.path.append(abspath(join(dirname(__file__), '..'))) 20 | from utils_3d import fuse_mesh 21 | 22 | 23 | def get_intrinsics(H: int, W: int): 24 | # defined here: https://github.com/Harry-Zhi/semantic_nerf/blob/b79f9c3640b62350e9c167a66c273c2121428ce1/SSR/training/trainer.py#L55C1-L55C1 25 | # replica use a 90 degree fov camera 26 | 27 | hfov = 90 28 | 29 | fx = W / 2.0 / math.tan(math.radians(hfov / 2.0)) 30 | fy = fx 31 | cx = (W - 1.0) / 2.0 32 | cy = (H - 1.0) / 2.0 33 | 34 | return np.asarray([[fx, 0, cx], [0, fy, cy], [0, 0, 1]]) 35 | 36 | 37 | @gin.configurable 38 | def process_replica( 39 | scan_dir: str, 40 | target_dir: str, 41 | sdf_trunc: float, 42 | voxel_length: float, 43 | depth_trunc: float, 44 | ): 45 | logger = logging.getLogger('Replica Process') 46 | logger.setLevel(logging.DEBUG) 47 | consoleHeader = logging.StreamHandler() 48 | formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') 49 | consoleHeader.setFormatter(formatter) 50 | logger.addHandler(consoleHeader) 51 | 52 | logger.info( 53 | "Processing Replica scan to LabelMaker format, from {} to {}...".format( 54 | scan_dir, target_dir)) 55 | 56 | color_dir = join(scan_dir, 'rgb') 57 | depth_dir = join(scan_dir, 'depth') 58 | label_dir = join(scan_dir, 'semantic_class') 59 | pose_file = join(scan_dir, 'traj_w_c.txt') 60 | 61 | assert exists(color_dir), "rgb attribute not downloaded!" 62 | assert exists(depth_dir), "depth attribute not downloaded!" 63 | assert exists(label_dir), "semantic_class attribute not downloaded!" 64 | assert exists(pose_file), "traj_w_c.txt attribute not downloaded!" 65 | 66 | color_file_list = os.listdir(color_dir) 67 | depth_file_list = os.listdir(depth_dir) 68 | label_file_list = Path(label_dir).glob('semantic_class_*.png') 69 | label_file_list = [str(pth) for pth in label_file_list] 70 | 71 | # get file name idx 72 | color_idx, color_inv = np.unique( 73 | np.array([ 74 | int(name.split('_')[1].split('.png')[0]) for name in color_file_list 75 | ]), 76 | return_index=True, 77 | ) 78 | depth_idx, depth_inv = np.unique( 79 | np.array([ 80 | int(name.split('_')[1].split('.png')[0]) for name in depth_file_list 81 | ]), 82 | return_index=True, 83 | ) 84 | label_idx, label_inv = np.unique( 85 | np.array([ 86 | int(name.split('_')[-1].split('.png')[0]) for name in label_file_list 87 | ]), 88 | return_index=True, 89 | ) 90 | 91 | # load poses 92 | poses = np.loadtxt(pose_file, delimiter=' ').reshape(-1, 4, 4) 93 | 94 | # check if indexes are the same and the number is the same as poses 95 | assert (color_idx == depth_idx).all() and ( 96 | color_idx == label_idx).all() and color_idx.shape[0] == poses.shape[0] 97 | 98 | # get correspondence to original file 99 | num_frame = int(poses.shape[0]) 100 | rows = [] 101 | for i in range(num_frame): 102 | frame_id = '{:06d}'.format(i) 103 | color_pth = color_file_list[color_inv[i]] 104 | depth_pth = depth_file_list[depth_inv[i]] 105 | label_pth = label_file_list[label_inv[i]] 106 | rows.append([frame_id, color_pth, depth_pth, label_pth]) 107 | 108 | # write to new file 109 | shutil.rmtree(target_dir, ignore_errors=True) 110 | os.makedirs(target_dir, exist_ok=True) 111 | os.makedirs(join(target_dir, 'color'), exist_ok=True) 112 | os.makedirs(join(target_dir, 'depth'), exist_ok=True) 113 | os.makedirs(join(target_dir, 'intrinsic'), exist_ok=True) 114 | os.makedirs(join(target_dir, 'pose'), exist_ok=True) 115 | os.makedirs(join(target_dir, 'gt_label'), exist_ok=True) 116 | 117 | # first write correspondence list 118 | fields = [ 119 | 'frame_id', 120 | 'original_color_path', 121 | 'original_depth_path', 122 | 'original_ground_truth_label_path', 123 | ] 124 | correspondence_list = [dict(zip(fields, row)) for row in rows] 125 | json_object = json.dumps(correspondence_list, indent=4) 126 | with open(join(target_dir, 'correspondence.json'), 'w') as jsonfile: 127 | jsonfile.write(json_object) 128 | logger.info("Saved old and new files correspondence to {}.".format( 129 | join(target_dir, 'correspondence.json'))) 130 | 131 | logger.info("Transfering files...") 132 | for idx in trange(num_frame): 133 | frame_id, color_pth, depth_pth, label_pth = rows[idx] 134 | 135 | # save color 136 | tgt_color_pth = join(target_dir, 'color', frame_id + '.jpg') 137 | color_img = Image.open(join(color_dir, color_pth)) 138 | color_img.save(tgt_color_pth) 139 | h, w, _ = np.asarray(color_img).shape 140 | 141 | # save pose 142 | tgt_pose_pth = join(target_dir, 'pose', frame_id + '.txt') 143 | np.savetxt(tgt_pose_pth, poses[idx]) 144 | 145 | # process and save intr 146 | tgt_intrinsic_pth = join(target_dir, 'intrinsic', frame_id + '.txt') 147 | intrinsic = get_intrinsics(h, w) 148 | np.savetxt(tgt_intrinsic_pth, intrinsic) 149 | 150 | # process and save depth 151 | depth = cv2.imread(join(depth_dir, depth_pth), cv2.IMREAD_UNCHANGED) 152 | depth = cv2.resize(depth, (w, h), interpolation=cv2.INTER_NEAREST) 153 | tgt_depth_pth = join(target_dir, 'depth', frame_id + '.png') 154 | cv2.imwrite(tgt_depth_pth, depth) 155 | 156 | # process and save ground truth label 157 | label = cv2.imread(join(label_dir, label_pth), cv2.IMREAD_UNCHANGED) 158 | label = cv2.resize(label, (w, h), interpolation=cv2.INTER_NEAREST) 159 | tgt_label_pth = join(target_dir, 'gt_label', frame_id + '.png') 160 | cv2.imwrite(tgt_label_pth, label) 161 | 162 | logger.info("File transfer finished!") 163 | 164 | logger.info("Fusing RGBD images into TSDF Volmue...") 165 | fuse_mesh( 166 | scan_dir=target_dir, 167 | sdf_trunc=sdf_trunc, 168 | voxel_length=voxel_length, 169 | depth_trunc=depth_trunc, 170 | depth_scale=1000.0, 171 | ) # depth_scale is a fixed value in Replica, no need to pass an argument in cli 172 | logger.info("Fusion finished! Saving to file as {}".format( 173 | join(target_dir, 'mesh.ply'))) 174 | 175 | 176 | def arg_parser(): 177 | parser = argparse.ArgumentParser() 178 | parser.add_argument("--scan_dir", type=str) 179 | parser.add_argument("--target_dir", type=str) 180 | parser.add_argument("--sdf_trunc", type=float, default=0.04) 181 | parser.add_argument("--voxel_length", type=float, default=0.008) 182 | parser.add_argument("--depth_trunc", type=float, default=3.0) 183 | parser.add_argument('--config', help='Name of config file') 184 | 185 | return parser.parse_args() 186 | 187 | 188 | if __name__ == "__main__": 189 | args = arg_parser() 190 | if args.config is not None: 191 | gin.parse_config_file(args.config) 192 | process_replica( 193 | scan_dir=args.scan_dir, 194 | target_dir=args.target_dir, 195 | sdf_trunc=args.sdf_trunc, 196 | voxel_length=args.voxel_length, 197 | depth_trunc=args.depth_trunc, 198 | ) 199 | -------------------------------------------------------------------------------- /scripts/replica_download.sh: -------------------------------------------------------------------------------- 1 | # if not specify downlaod directory, use current directory 2 | if [ -z "$1" ]; then 3 | download_dir='.' 4 | else 5 | download_dir=$1 6 | fi 7 | 8 | cd $download_dir 9 | 10 | wget -O replica_semantic_nerf.zip "https://www.dropbox.com/sh/9yu1elddll00sdl/AAC-rSJdLX0C6HhKXGKMOIija?dl=0" 11 | UNZIP_DISABLE_ZIPBOMB_DETECTION=TRUE unzip replica_semantic_nerf.zip && rm replica_semantic_nerf.zip 12 | mv Replica_Dataset Replica_Dataset_Semantic_Nerf 13 | cd Replica_Dataset_Semantic_Nerf 14 | unzip \*.zip && rm -rf *.zip 15 | 16 | cd -- 17 | cd -- 18 | -------------------------------------------------------------------------------- /scripts/replica_pipeline.sh: -------------------------------------------------------------------------------- 1 | set -e 2 | 3 | env_name=labelmaker 4 | eval "$(conda shell.bash hook)" 5 | conda activate $env_name 6 | 7 | conda_home="$(conda info | grep "active env location : " | cut -d ":" -f2-)" 8 | conda_home="${conda_home#"${conda_home%%[![:space:]]*}"}" 9 | 10 | echo $conda_home 11 | 12 | which python 13 | which pip 14 | which nvcc 15 | 16 | # add cuda compiler to path 17 | export CUDA_HOST_COMPILER="$conda_home/bin/gcc" 18 | export CUDA_PATH="$conda_home" 19 | export CUDA_HOME=$CUDA_PATH 20 | export LD_LIBRARY_PATH=$conda_home/lib:$LD_LIBRARY_PATH 21 | export LIBRARY_PATH="$conda_home/lib/stubs:$LIBRARY_PATH" 22 | export TCNN_CUDA_ARCHITECTURES=75 23 | 24 | if [ -z "$1" ]; then 25 | echo "No ARKitScene directory specified!" 26 | exit 1 27 | else 28 | original_dir=$1 29 | fi 30 | 31 | if [ -z "$2" ]; then 32 | echo "No target directory specified!" 33 | exit 1 34 | else 35 | target_dir=$2 36 | fi 37 | 38 | # preprocessing 39 | python scripts/replica2labelmaker.py \ 40 | --scan_dir ${original_dir} \ 41 | --target_dir ${target_dir} 42 | 43 | # extract mask3D 44 | python models/mask3d_inst.py \ 45 | --seed 42 \ 46 | --workspace ${target_dir} 47 | 48 | python models/mask3d_inst.py \ 49 | --seed 43 \ 50 | --output intermediate/scannet200_mask3d_2 \ 51 | --workspace ${target_dir} 52 | 53 | # extract omnidata normal 54 | python models/omnidata_normal.py \ 55 | --workspace ${target_dir} 56 | 57 | python models/omnidata_depth.py \ 58 | --workspace ${target_dir} 59 | 60 | # extract hha depth, higher jobs may lead to failure 61 | python models/hha_depth.py \ 62 | --n_jobs 4 \ 63 | --workspace ${target_dir} 64 | 65 | # internimage 66 | python models/internimage.py \ 67 | --workspace ${target_dir} 68 | 69 | python models/internimage.py --flip \ 70 | --workspace ${target_dir} 71 | 72 | # grounded sam 73 | python models/grounded_sam.py \ 74 | --workspace ${target_dir} 75 | 76 | python models/grounded_sam.py --flip \ 77 | --workspace ${target_dir} 78 | 79 | # ovseg 80 | python models/ovseg.py \ 81 | --workspace ${target_dir} 82 | 83 | python models/ovseg.py --flip \ 84 | --workspace ${target_dir} 85 | 86 | # consensus 87 | python labelmaker/consensus.py \ 88 | --workspace ${target_dir} --n_jobs 8 89 | 90 | # point lifting 91 | python labelmaker/lifting_3d/lifting_points.py \ 92 | --workspace ${target_dir} 93 | 94 | conda deactivate 95 | 96 | # 3D lifting, mesh extracting, and rendering 97 | bash labelmaker/lifting_3d/lifting.sh ${target_dir} 98 | 99 | # rename to non_cmx versoin 100 | mv $target_dir/labels.txt $target_dir/labels_no_cmx.txt 101 | mv $target_dir/point_lifted_mesh.ply $target_dir/point_lifted_mesh_no_cmx.ply 102 | mv $target_dir/neus_lifted $target_dir/neus_lifted_no_cmx 103 | mv $target_dir/intermediate/consensus $target_dir/intermediate/consensus_no_cmx 104 | mv $target_dir/intermediate/sdfstudio_preprocessing $target_dir/intermediate/sdfstudio_preprocessing_no_cmx 105 | mv $target_dir/intermediate/sdfstudio_train $target_dir/intermediate/sdfstudio_train_no_cmx 106 | 107 | env_name=labelmaker 108 | eval "$(conda shell.bash hook)" 109 | conda activate $env_name 110 | 111 | conda_home="$(conda info | grep "active env location : " | cut -d ":" -f2-)" 112 | conda_home="${conda_home#"${conda_home%%[![:space:]]*}"}" 113 | 114 | echo $conda_home 115 | 116 | which python 117 | which pip 118 | which nvcc 119 | 120 | # add cuda compiler to path 121 | export CUDA_HOST_COMPILER="$conda_home/bin/gcc" 122 | export CUDA_PATH="$conda_home" 123 | export CUDA_HOME=$CUDA_PATH 124 | export LD_LIBRARY_PATH=$conda_home/lib:$LD_LIBRARY_PATH 125 | export LIBRARY_PATH="$conda_home/lib/stubs:$LIBRARY_PATH" 126 | export TCNN_CUDA_ARCHITECTURES=75 127 | 128 | # cmx 129 | python models/cmx.py \ 130 | --workspace ${target_dir} 131 | 132 | python models/cmx.py --flip \ 133 | --workspace ${target_dir} 134 | 135 | # consensus 136 | python labelmaker/consensus.py \ 137 | --workspace ${target_dir} --n_jobs 8 138 | 139 | # point lifting 140 | python labelmaker/lifting_3d/lifting_points.py \ 141 | --workspace ${target_dir} 142 | 143 | conda deactivate 144 | 145 | # 3D lifting, mesh extracting, and rendering 146 | bash labelmaker/lifting_3d/lifting.sh ${target_dir} 147 | -------------------------------------------------------------------------------- /scripts/replica_singularity_slurm.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/bash 2 | #SBATCH --job-name="labelmaker" 3 | #SBATCH --output=%j.out 4 | #SBATCH --time=12:00:00 5 | #SBATCH --ntasks=1 6 | #SBATCH --mem-per-cpu=64G 7 | #SBATCH --tmp=32G 8 | #SBATCH --gpus=rtx_3090:1 9 | 10 | module load gcc/11.4.0 cuda/12.1.1 eth_proxy 11 | 12 | LABELMAKER_REPO=/cluster/home/guanji/LabelMaker # the model code base need repo, you can put the labelmaker repo directory here 13 | 14 | # I download it to my scratch, it is valid for 14 days, 15 | # please modify the source and target directory as you wish 16 | scene=room_0 17 | sequence=1 18 | source_dir=/cluster/scratch/guanji/Replica_Dataset_Semantic_Nerf/${scene}/Sequence_${sequence} 19 | target_dir=$SCRATCH/replica_${scene}_${sequence} 20 | mkdir -p $target_dir 21 | 22 | # use wandb to monitor sdfstudio training 23 | WANDB_API_KEY="6b447b1218e7f042525c176c16b0cd32d3e58956" 24 | WANDB_ENTITY="labelmaker-sdfstudio" 25 | 26 | # make temporary directory for processing 27 | mkdir -p $TMPDIR/.cache 28 | 29 | singularity exec --nv \ 30 | --bind /cluster/project/cvg/labelmaker/checkpoints:/LabelMaker/checkpoints \ 31 | --bind $LABELMAKER_REPO/env_v2:/LabelMaker/env_v2 \ 32 | --bind $LABELMAKER_REPO/labelmaker:/LabelMaker/labelmaker \ 33 | --bind $LABELMAKER_REPO/testing:/LabelMaker/testing \ 34 | --bind $LABELMAKER_REPO/models:/LabelMaker/models \ 35 | --bind $LABELMAKER_REPO/scripts:/LabelMaker/scripts \ 36 | --bind $LABELMAKER_REPO/.gitmodules:/LabelMaker/.gitmodules \ 37 | --bind $TMPDIR/.cache:$HOME/.cache \ 38 | --bind $source_dir:/source \ 39 | --bind $target_dir:/target \ 40 | --env WANDB_ENTITY=$WANDB_ENTITY \ 41 | --env WANDB_API_KEY=$WANDB_API_KEY \ 42 | /cluster/project/cvg/labelmaker/labelmaker.simg \ 43 | bash -c "cd /LabelMaker && export PATH=/miniconda3/condabin:$PATH && bash ./scripts/replica_pipeline.sh /source /target" 44 | -------------------------------------------------------------------------------- /scripts/scanner3d2labelmaker.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import json 3 | import logging 4 | import os 5 | import shutil 6 | import sys 7 | import glob 8 | from os.path import abspath, dirname, exists, join 9 | from typing import List, Optional 10 | 11 | import cv2 12 | import gin 13 | import numpy as np 14 | import open3d as o3d 15 | 16 | from PIL import Image 17 | from scipy.interpolate import CubicSpline 18 | from scipy.spatial.transform import Rotation, RotationSpline 19 | from tqdm import trange 20 | import matplotlib.pyplot as plt 21 | 22 | from copy import copy 23 | 24 | sys.path.append(abspath(join(dirname(__file__), '..'))) 25 | from utils_3d import fuse_mesh 26 | 27 | 28 | def extract_pose(data): 29 | return np.asarray(data['cameraPoseARFrame']) 30 | 31 | def extract_intrinsics(data): 32 | return np.asarray(data['intrinsics']) 33 | 34 | def render_depth(world_to_cam, intrinsics, mesh, resolution): 35 | rays = o3d.t.geometry.RaycastingScene.create_rays_pinhole( 36 | width_px=resolution[1], 37 | height_px=resolution[0], 38 | intrinsic_matrix=intrinsics[:3, :3], 39 | extrinsic_matrix=world_to_cam, # world to camera 40 | ) 41 | 42 | scene = o3d.t.geometry.RaycastingScene() 43 | scene.add_triangles(mesh) 44 | vis = scene.cast_rays(rays) 45 | 46 | depth = vis['t_hit'].numpy() 47 | return depth 48 | 49 | 50 | @gin.configurable 51 | def process_scanner3d( 52 | scan_dir: str, 53 | target_dir: str, 54 | sdf_trunc: float, 55 | voxel_length: float, 56 | depth_trunc: float, 57 | resize: Optional[List] = None, 58 | ): 59 | 60 | logger = logging.getLogger('Scanner3DProcess') 61 | logger.setLevel(logging.DEBUG) 62 | consoleHeader = logging.StreamHandler() 63 | formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') 64 | consoleHeader.setFormatter(formatter) 65 | logger.addHandler(consoleHeader) 66 | 67 | logger.info( 68 | "Processing Scanner3D scan to LabelMaker format, from {} to {}...". 69 | format(scan_dir, target_dir)) 70 | 71 | color_dir = join(scan_dir) 72 | 73 | color_file_list = glob.glob(join(color_dir, 'frame_*.jpg')) 74 | color_file_list = sorted([os.path.basename(f) for f in color_file_list], key=lambda x: int(x.split('_')[1].split('.jpg')[0])) 75 | 76 | traj_file_list = glob.glob(join(color_dir, 'frame_*.json')) 77 | traj_file_list = sorted([os.path.basename(f) for f in traj_file_list], key=lambda x: int(x.split('_')[1].split('.json')[0])) 78 | 79 | # write to new file 80 | shutil.rmtree(target_dir, ignore_errors=True) 81 | os.makedirs(target_dir, exist_ok=True) 82 | os.makedirs(join(target_dir, 'color'), exist_ok=True) 83 | os.makedirs(join(target_dir, 'depth'), exist_ok=True) 84 | os.makedirs(join(target_dir, 'intrinsic'), exist_ok=True) 85 | os.makedirs(join(target_dir, 'pose'), exist_ok=True) 86 | 87 | # load scanner 3d mesh 88 | mesh = o3d.io.read_triangle_mesh(scan_dir + '/export.obj') 89 | mesh.compute_vertex_normals() 90 | mesh = o3d.t.geometry.TriangleMesh.from_legacy(mesh) 91 | 92 | num_frame = len(color_file_list) 93 | logger.info("Transfering files...") 94 | for idx in trange(num_frame): 95 | color_pth, traj_pth = color_file_list[idx], traj_file_list[idx] 96 | 97 | traj_data = json.load(open(join(color_dir, traj_pth))) 98 | 99 | pose_mat = extract_pose(traj_data).reshape((4, 4)) 100 | 101 | rotation = pose_mat[:3, :3] 102 | 103 | # rotate the camera and flip the axis 104 | rotation[2, :] = -rotation[2, :] 105 | rotation[1, :] = -rotation[1, :] 106 | rotation[0, :] = -rotation[0, :] 107 | pose_mat[:3, :3] = rotation 108 | pose_mat[:, 0] = -pose_mat[:, 0] 109 | 110 | R_x_90 = np.array([[1, 0, 0, 0], 111 | [0, np.cos(np.radians(90)), -np.sin(np.radians(90)), 0], 112 | [0, np.sin(np.radians(90)), np.cos(np.radians(90)), 0], 113 | [0, 0, 0, 1]]) 114 | 115 | R_z_90 = np.array([[np.cos(np.radians(-90)), -np.sin(np.radians(-90)), 0, 0], 116 | [np.sin(np.radians(-90)), np.cos(np.radians(-90)), 0, 0], 117 | [0, 0, 1, 0], 118 | [0, 0, 0, 1]]) 119 | 120 | R_z_x = np.dot(R_z_90, R_x_90) 121 | pose_mat = np.dot(R_z_x, pose_mat) 122 | 123 | # save color 124 | tgt_color_pth = join(target_dir, 'color', 125 | str(idx) + '.jpg') # png -> jpg, compressed 126 | color_img = Image.open(join(color_dir, color_pth)) 127 | if resize is not None: 128 | original_size = (color_img.width, color_img.height) 129 | color_img = color_img.resize(resize) 130 | color_img.save(tgt_color_pth) 131 | 132 | intr = extract_intrinsics(traj_data).reshape((3, 3)) 133 | 134 | if resize is not None: 135 | w, h = resize 136 | width_factor = w / float(original_size[0]) 137 | height_factor = h / float(original_size[1]) 138 | intr[0, 0] *= width_factor 139 | intr[1, 1] *= height_factor 140 | intr[0, 2] *= width_factor 141 | intr[1, 2] *= height_factor 142 | else: 143 | h, w, _ = np.asarray(color_img).shape 144 | depth = render_depth(np.linalg.inv(pose_mat), intr, mesh, (h, w)) 145 | depth = depth * 1000 146 | depth = depth.astype(np.uint16) 147 | 148 | # save pose 149 | tgt_pose_pth = join(target_dir, 'pose', str(idx) + '.txt') 150 | np.savetxt(tgt_pose_pth, pose_mat) 151 | 152 | # process and save intr 153 | tgt_intrinsic_pth = join(target_dir, 'intrinsic', str(idx) + '.txt') 154 | np.savetxt(tgt_intrinsic_pth, intr) 155 | 156 | tgt_depth_pth = join(target_dir, 'depth', str(idx) + '.png') 157 | cv2.imwrite(tgt_depth_pth, depth) 158 | 159 | logger.info("File transfer finished!") 160 | 161 | logger.info("Fusing RGBD images into TSDF Volmue...") 162 | fuse_mesh( 163 | scan_dir=target_dir, 164 | sdf_trunc=sdf_trunc, 165 | voxel_length=voxel_length, 166 | depth_trunc=depth_trunc, 167 | depth_scale=1000.0, 168 | ) # depth_scale is a fixed value in ARKitScene, no need to pass an argument in cli 169 | logger.info("Fusion finished! Saving to file as {}".format( 170 | join(target_dir, 'mesh.ply'))) 171 | 172 | 173 | def arg_parser(): 174 | parser = argparse.ArgumentParser() 175 | parser.add_argument("--scan_dir", type=str) 176 | parser.add_argument("--target_dir", type=str) 177 | parser.add_argument("--sdf_trunc", type=float, default=0.04) 178 | parser.add_argument("--voxel_length", type=float, default=0.008) 179 | parser.add_argument("--depth_trunc", type=float, default=3.0) 180 | parser.add_argument('--config', help='Name of config file') 181 | 182 | return parser.parse_args() 183 | 184 | 185 | if __name__ == "__main__": 186 | args = arg_parser() 187 | if args.config is not None: 188 | gin.parse_config_file(args.config) 189 | process_scanner3d( 190 | scan_dir=args.scan_dir, 191 | target_dir=args.target_dir, 192 | sdf_trunc=args.sdf_trunc, 193 | voxel_length=args.voxel_length, 194 | depth_trunc=args.depth_trunc, 195 | resize=[640, 480], 196 | ) 197 | -------------------------------------------------------------------------------- /scripts/scannet2labelmaker.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os, sys 3 | 4 | # from SensorData import SensorData 5 | 6 | 7 | import os, struct 8 | import numpy as np 9 | import shutil 10 | import zlib 11 | # import imageio 12 | import imageio.v2 as imageio 13 | import cv2 14 | import png 15 | 16 | COMPRESSION_TYPE_COLOR = {-1:'unknown', 0:'raw', 1:'png', 2:'jpeg'} 17 | COMPRESSION_TYPE_DEPTH = {-1:'unknown', 0:'raw_ushort', 1:'zlib_ushort', 2:'occi_ushort'} 18 | 19 | class RGBDFrame(): 20 | def load(self, file_handle): 21 | self.camera_to_world = np.asarray(struct.unpack('f'*16, file_handle.read(16*4)), dtype=np.float32).reshape(4, 4) 22 | self.timestamp_color = struct.unpack('Q', file_handle.read(8))[0] 23 | self.timestamp_depth = struct.unpack('Q', file_handle.read(8))[0] 24 | self.color_size_bytes = struct.unpack('Q', file_handle.read(8))[0] 25 | self.depth_size_bytes = struct.unpack('Q', file_handle.read(8))[0] 26 | self.color_data = b''.join(struct.unpack('c'*self.color_size_bytes, file_handle.read(self.color_size_bytes))) 27 | self.depth_data = b''.join(struct.unpack('c'*self.depth_size_bytes, file_handle.read(self.depth_size_bytes))) 28 | 29 | 30 | def decompress_depth(self, compression_type): 31 | if compression_type == 'zlib_ushort': 32 | return self.decompress_depth_zlib() 33 | else: 34 | raise 35 | 36 | 37 | def decompress_depth_zlib(self): 38 | return zlib.decompress(self.depth_data) 39 | 40 | 41 | def decompress_color(self, compression_type): 42 | if compression_type == 'jpeg': 43 | return self.decompress_color_jpeg() 44 | else: 45 | raise 46 | 47 | 48 | def decompress_color_jpeg(self): 49 | return imageio.imread(self.color_data) 50 | 51 | 52 | class SensorData: 53 | 54 | def __init__(self, filename): 55 | self.version = 4 56 | self.load(filename) 57 | 58 | 59 | def load(self, filename): 60 | with open(filename, 'rb') as f: 61 | version = struct.unpack('I', f.read(4))[0] 62 | assert self.version == version 63 | strlen = struct.unpack('Q', f.read(8))[0] 64 | # self.sensor_name = ''.join(struct.unpack('c'*strlen, f.read(strlen))) 65 | self.sensorname = f.read(strlen) 66 | self.intrinsic_color = np.asarray(struct.unpack('f'*16, f.read(16*4)), dtype=np.float32).reshape(4, 4) 67 | self.extrinsic_color = np.asarray(struct.unpack('f'*16, f.read(16*4)), dtype=np.float32).reshape(4, 4) 68 | self.intrinsic_depth = np.asarray(struct.unpack('f'*16, f.read(16*4)), dtype=np.float32).reshape(4, 4) 69 | self.extrinsic_depth = np.asarray(struct.unpack('f'*16, f.read(16*4)), dtype=np.float32).reshape(4, 4) 70 | self.color_compression_type = COMPRESSION_TYPE_COLOR[struct.unpack('i', f.read(4))[0]] 71 | self.depth_compression_type = COMPRESSION_TYPE_DEPTH[struct.unpack('i', f.read(4))[0]] 72 | self.color_width = struct.unpack('I', f.read(4))[0] 73 | self.color_height = struct.unpack('I', f.read(4))[0] 74 | self.depth_width = struct.unpack('I', f.read(4))[0] 75 | self.depth_height = struct.unpack('I', f.read(4))[0] 76 | self.depth_shift = struct.unpack('f', f.read(4))[0] 77 | num_frames = struct.unpack('Q', f.read(8))[0] 78 | self.frames = [] 79 | for i in range(num_frames): 80 | frame = RGBDFrame() 81 | frame.load(f) 82 | self.frames.append(frame) 83 | 84 | 85 | def export_depth_images(self, output_path, image_size=None, frame_skip=1): 86 | # if not os.path.exists(output_path): 87 | shutil.rmtree(output_path, ignore_errors=True) 88 | os.makedirs(output_path) 89 | print('exporting', len(self.frames)//frame_skip, ' depth frames to', output_path) 90 | for f in range(0, len(self.frames), frame_skip): 91 | depth_data = self.frames[f].decompress_depth(self.depth_compression_type) 92 | depth = np.frombuffer(depth_data, dtype=np.uint16).reshape(self.depth_height, self.depth_width) 93 | if image_size is not None: 94 | depth = cv2.resize(depth, (image_size[1], image_size[0]), interpolation=cv2.INTER_NEAREST) 95 | #imageio.imwrite(os.path.join(output_path, str(f) + '.png'), depth) 96 | with open(os.path.join(output_path, str(f).zfill(6) + '.png'), 'wb') as f: # write 16-bit 97 | writer = png.Writer(width=depth.shape[1], height=depth.shape[0], bitdepth=16) 98 | depth = depth.reshape(-1, depth.shape[1]).tolist() 99 | writer.write(f, depth) 100 | 101 | def export_color_images(self, output_path, image_size=None, frame_skip=1): 102 | # if not os.path.exists(output_path): 103 | shutil.rmtree(output_path, ignore_errors=True) 104 | os.makedirs(output_path) 105 | print('exporting', len(self.frames)//frame_skip, 'color frames to', output_path) 106 | for f in range(0, len(self.frames), frame_skip): 107 | color = self.frames[f].decompress_color(self.color_compression_type) 108 | if image_size is not None: 109 | color = cv2.resize(color, (image_size[1], image_size[0]), interpolation=cv2.INTER_NEAREST) 110 | imageio.imwrite(os.path.join(output_path, str(f).zfill(6) + '.jpg'), color) 111 | 112 | 113 | def save_mat_to_file(self, matrix, filename): 114 | with open(filename, 'w') as f: 115 | for line in matrix: 116 | np.savetxt(f, line[np.newaxis], fmt='%f') 117 | 118 | 119 | def export_poses(self, output_path, frame_skip=1): 120 | # if not os.path.exists(output_path): 121 | shutil.rmtree(output_path, ignore_errors=True) 122 | os.makedirs(output_path) 123 | print('exporting', len(self.frames)//frame_skip, 'camera poses to', output_path) 124 | for f in range(0, len(self.frames), frame_skip): 125 | self.save_mat_to_file(self.frames[f].camera_to_world, os.path.join(output_path, str(f).zfill(6) + '.txt')) 126 | 127 | 128 | def export_intrinsics(self, output_path, original_intrisic = None, resize = None): 129 | # if not os.path.exists(output_path): 130 | shutil.rmtree(output_path, ignore_errors=True) 131 | os.makedirs(output_path) 132 | print('exporting camera intrinsics to', output_path) 133 | if resize == None: 134 | self.save_mat_to_file(self.intrinsic_color, os.path.join(output_path, 'intrinsic_color.txt')) 135 | self.save_mat_to_file(self.extrinsic_color, os.path.join(output_path, 'extrinsic_color.txt')) 136 | self.save_mat_to_file(self.intrinsic_depth, os.path.join(output_path, 'intrinsic_depth.txt')) 137 | self.save_mat_to_file(self.extrinsic_depth, os.path.join(output_path, 'extrinsic_depth.txt')) 138 | else: 139 | # if not os.path.exists(original_intrisic): 140 | shutil.rmtree(original_intrisic, ignore_errors=True) 141 | os.makedirs(original_intrisic) 142 | self.save_mat_to_file(self.intrinsic_color, os.path.join(original_intrisic, 'intrinsic_color.txt')) 143 | self.save_mat_to_file(self.extrinsic_color, os.path.join(original_intrisic, 'extrinsic_color.txt')) 144 | self.save_mat_to_file(self.intrinsic_depth, os.path.join(original_intrisic, 'intrinsic_depth.txt')) 145 | self.save_mat_to_file(self.extrinsic_depth, os.path.join(original_intrisic, 'extrinsic_depth.txt')) 146 | w = resize[1]/1296 147 | h = resize[0]/968 148 | intrinsic_color = self.intrinsic_color[:3, :3] 149 | scaled_intrinsic_color = np.diag([w,h,1])@intrinsic_color 150 | for i in range(0, len(self.frames)): 151 | target = os.path.join(output_path,str(i).zfill(6)+'.txt') 152 | # print(target) 153 | self.save_mat_to_file(scaled_intrinsic_color,target) 154 | 155 | 156 | def arg_parser(): 157 | parser = argparse.ArgumentParser() 158 | # data paths 159 | parser.add_argument('--scan_dir', required=True, help='path to scan to read') 160 | parser.add_argument('--target_dir', required=True, help='path to output folder') 161 | parser.add_argument('--export_depth_images', dest='export_depth_images') 162 | parser.add_argument('--export_color_images', dest='export_color_images') 163 | parser.add_argument('--export_poses', dest='export_poses') 164 | parser.add_argument('--export_intrinsics', dest='export_intrinsics') 165 | parser.set_defaults(export_depth_images=True, export_color_images=True, export_poses=True, export_intrinsics=True) 166 | return parser.parse_args() 167 | 168 | 169 | def main(): 170 | 171 | args = arg_parser() 172 | if not os.path.exists(args.target_dir): 173 | os.makedirs(args.target_dir) 174 | # load the data 175 | filename = os.path.join(args.scan_dir,str(os.path.basename(args.scan_dir.rstrip(os.sep)))+'.sens') 176 | sys.stdout.write('loading %s...' % filename ) 177 | sd = SensorData(filename) 178 | sys.stdout.write('loaded!\n') 179 | 180 | # copy RGB mesh file 181 | source_mesh = os.path.join(args.scan_dir,os.path.basename(args.scan_dir.rstrip(os.sep))+'_vh_clean.ply') 182 | destination_mesh = os.path.join(args.target_dir, 'mesh.ply') 183 | try: 184 | with open(source_mesh, 'rb') as src: 185 | with open(destination_mesh, 'wb') as dst: 186 | dst.write(src.read()) 187 | print(f"file {source_mesh} was copied to {destination_mesh}") 188 | except FileNotFoundError: 189 | print(f"{source_mesh} not found") 190 | 191 | # !!!resize image for labelmaker usage 192 | resize = [480,640] 193 | 194 | if args.export_depth_images: 195 | sd.export_depth_images(os.path.join(args.target_dir, 'depth')) 196 | if args.export_color_images: 197 | sd.export_color_images(os.path.join(args.target_dir, 'color'), image_size = resize) 198 | if args.export_poses: 199 | sd.export_poses(os.path.join(args.target_dir, 'pose')) 200 | if args.export_intrinsics: 201 | sd.export_intrinsics(os.path.join(args.target_dir, 'intrinsic'), os.path.join(args.target_dir, 'original_intrinsic'),resize=resize) 202 | 203 | 204 | 205 | 206 | if __name__ == '__main__': 207 | main() 208 | 209 | 210 | 211 | 212 | -------------------------------------------------------------------------------- /scripts/segmentation_eval.py: -------------------------------------------------------------------------------- 1 | import sys, os 2 | import argparse 3 | import logging 4 | from pathlib import Path 5 | from tqdm import tqdm 6 | import cv2 7 | import numpy as np 8 | from glob import glob 9 | import re 10 | from joblib import Parallel, delayed 11 | from scipy.sparse import coo_matrix 12 | 13 | from labelmaker.label_mappings import LabelMatcher 14 | 15 | logging.basicConfig(level="INFO") 16 | log = logging.getLogger('Segmentation Evaluation') 17 | 18 | 19 | def _dist_get_matcher_confmat(scene_dir, keys, pred_space, label_space, 20 | pred_template, label_template): 21 | matcher = LabelMatcher(pred_space, label_space) 22 | confmat = np.zeros((len(matcher.right_ids), len(matcher.right_ids)), 23 | dtype=np.int64) 24 | for k in tqdm(keys): 25 | pred = cv2.imread(str(scene_dir / pred_template.format(k=k)), 26 | cv2.IMREAD_UNCHANGED) 27 | label = cv2.imread(str(scene_dir / label_template.format(k=k)), 28 | cv2.IMREAD_UNCHANGED) 29 | if pred.shape[0] != label.shape[0] or pred.shape[1] != label.shape[1]: 30 | pred = cv2.resize(pred, (label.shape[1], label.shape[0]), 31 | interpolation=cv2.INTER_NEAREST) 32 | confmat += matcher.confusion_matrix(pred, label) 33 | return confmat 34 | 35 | 36 | def _dist_get_unmatched_confmat(scene_dir, keys, pred_space, label_space, 37 | pred_template, label_template, subsampling): 38 | matcher = LabelMatcher(pred_space, label_space) 39 | confmat = np.zeros((len(matcher.left_ids) + 1, len(matcher.right_ids) + 1), 40 | dtype=np.int64) 41 | # we do not know whether all predictions or labels actually only contain the ids listed, 42 | # or if there are gaps in the data 43 | # Therefore, we keep 0 in each dimension as a "not in list" category 44 | left_id_to_confmat_idx = np.zeros(max(matcher.left_ids) + 1, dtype=np.int64) 45 | for i, left_id in enumerate(matcher.left_ids): 46 | left_id_to_confmat_idx[left_id] = i + 1 47 | right_id_to_confmat_idx = np.zeros(max(matcher.right_ids) + 1, dtype=np.int64) 48 | for i, right_id in enumerate(matcher.right_ids): 49 | right_id_to_confmat_idx[right_id] = i + 1 50 | for k in tqdm(keys): 51 | pred = cv2.imread( 52 | str(scene_dir / pred_template.format(k=(k // subsampling))), 53 | cv2.IMREAD_UNCHANGED) 54 | label = cv2.imread(str(scene_dir / label_template.format(k=k)), 55 | cv2.IMREAD_UNCHANGED) 56 | if pred.shape[0] != label.shape[0] or pred.shape[1] != label.shape[1]: 57 | pred = cv2.resize(pred, (label.shape[1], label.shape[0]), 58 | interpolation=cv2.INTER_NEAREST) 59 | sample_weights = np.ones_like(label.flatten(), dtype=np.int64) 60 | left = left_id_to_confmat_idx[pred.flatten()] 61 | right = right_id_to_confmat_idx[label.flatten()] 62 | confmat += coo_matrix((sample_weights, (left, right)), 63 | shape=confmat.shape, 64 | dtype=np.int64).toarray() 65 | return confmat 66 | 67 | 68 | """ 69 | def _get_confmat(scene_dir, 70 | keys, 71 | pred_space, 72 | label_space, 73 | pred_template, 74 | label_template, 75 | n_jobs=8): 76 | confmat_path = scene_dir / pred_template.split( 77 | '/')[0] / f'confmat_{label_space}.txt' 78 | if confmat_path.exists(): 79 | log.info(f'using cached {confmat_path}') 80 | return np.loadtxt(str(confmat_path)).astype(np.int64) 81 | # split keys into chunks for parallel execution 82 | keys = np.array_split(keys, n_jobs) 83 | confmats = Parallel(n_jobs=n_jobs)( 84 | delayed(_dist_get_confmat)(scene_dir, keys[i], pred_space, label_space, 85 | pred_template, label_template) 86 | for i in range(n_jobs)) 87 | confmat = np.sum(confmats, axis=0) 88 | np.savetxt(str(confmat_path), confmat) 89 | return confmat.astype(np.int64) 90 | """ 91 | 92 | 93 | def metrics_from_confmat(confmat): 94 | assert confmat.shape[0] == confmat.shape[1] 95 | assert confmat[:, 0].sum() == 0 96 | float_confmat = confmat.astype(float) 97 | metrics = { 98 | 'iou': 99 | np.diag(float_confmat) / 100 | (float_confmat.sum(axis=1) + float_confmat.sum(axis=0) - 101 | np.diag(float_confmat)), 102 | 'acc': 103 | np.diag(float_confmat) / (float_confmat.sum(0)), 104 | } 105 | 106 | nan_mask_c = confmat[1:, :].sum(axis=1) == 0 # no prediction for this class 107 | nan_mask_r = confmat[:, 1:].sum(axis=0) == 0 # no groundtruth for this class 108 | 109 | nan_mask = np.logical_and(nan_mask_c, nan_mask_r) 110 | nan_mask = nan_mask_r 111 | 112 | acc = np.nan_to_num(metrics['acc'][1:], 0) # fill with 0 113 | iou = np.nan_to_num(metrics['iou'][1:], 0) # fill with 0 114 | 115 | metrics['mIoU'] = (iou * (1 - nan_mask)).sum() / (1 - nan_mask).sum() 116 | metrics['mAcc'] = (acc * (1 - nan_mask)).sum() / (1 - nan_mask).sum() 117 | 118 | # metrics['mIoU'] = iou.mean() 119 | # metrics['mAcc'] = acc.mean() 120 | 121 | metrics['tAcc'] = np.diag(float_confmat).sum() / float_confmat.sum() 122 | 123 | acc[nan_mask == 1] = 'nan' 124 | iou[nan_mask == 1] = 'nan' 125 | 126 | metrics['acc'] = acc.copy() 127 | metrics['iou'] = iou.copy() 128 | 129 | return metrics 130 | 131 | 132 | def _get_confmat(scene_dir, 133 | keys, 134 | pred_space, 135 | label_space, 136 | pred_template, 137 | label_template, 138 | subsampling=1, 139 | overwrite_confmat=False, 140 | n_jobs=8): 141 | confmat_path = scene_dir / pred_template.split( 142 | '/')[0] / f'confmat_{pred_space}_{label_space}.txt' 143 | if confmat_path.exists() and not overwrite_confmat: 144 | confmat = np.loadtxt(str(confmat_path)).astype(np.int64) 145 | else: 146 | # split keys into chunks for parallel execution 147 | keys = np.array_split(keys, n_jobs) 148 | confmats = Parallel(n_jobs=n_jobs)(delayed(_dist_get_unmatched_confmat)( 149 | scene_dir, keys[i], pred_space, label_space, pred_template, 150 | label_template, subsampling) for i in range(n_jobs)) 151 | confmat = np.sum(confmats, axis=0) 152 | np.savetxt(str(confmat_path), confmat) 153 | matcher = LabelMatcher(pred_space, label_space) 154 | return matcher.match_confmat(confmat) 155 | 156 | 157 | def evaluate_scene(scene_dir, 158 | pred_space, 159 | label_space, 160 | keys=None, 161 | subsampling=1, 162 | pred_template='pred/{k}.png', 163 | pred_template_glob='pred/{k}.png', 164 | label_template='label_filt/{k}.png', 165 | label_template_glob='label_filt/{k}.png', 166 | overwrite_confmat=False, 167 | n_jobs=8): 168 | scene_dir = Path(scene_dir) 169 | if keys is None: 170 | 171 | files = glob(str(scene_dir / label_template_glob.format(k='*')), 172 | recursive=True) 173 | keys = sorted( 174 | int(re.search(label_template_glob.format(k='(\d+)'), x).group(1)) 175 | for x in files) 176 | keys = keys[::subsampling] 177 | 178 | log.info(f"getting confmat for {pred_template.split('/')[0]} in {scene_dir}") 179 | confmat = _get_confmat(scene_dir, 180 | keys, 181 | pred_space, 182 | label_space, 183 | pred_template, 184 | label_template, 185 | subsampling=subsampling, 186 | overwrite_confmat=overwrite_confmat, 187 | n_jobs=n_jobs) 188 | metrics = metrics_from_confmat(confmat) 189 | return metrics, confmat 190 | 191 | 192 | def evaluate_scenes(scene_dirs, 193 | pred_space, 194 | label_space, 195 | subsampling=1, 196 | pred_template='pred/{k}.png', 197 | pred_template_glob='pred/{k}.png', 198 | label_template_glob='label_filt/{k}.png', 199 | label_template='label_filt/{k}.png', 200 | overwrite_confmat=False, 201 | n_jobs=8): 202 | confmat = None 203 | for k, scene_dir in enumerate(scene_dirs): 204 | 205 | _, c = evaluate_scene(scene_dir, 206 | pred_space, 207 | label_space, 208 | pred_template=pred_template[k] 209 | if type(pred_template) is list else pred_template, 210 | pred_template_glob=pred_template_glob, 211 | label_template=label_template, 212 | label_template_glob=label_template_glob, 213 | subsampling=subsampling, 214 | overwrite_confmat=overwrite_confmat, 215 | n_jobs=n_jobs) 216 | if confmat is None: 217 | confmat = c 218 | else: 219 | confmat += c 220 | metrics = metrics_from_confmat(confmat) 221 | return metrics, confmat 222 | 223 | 224 | if __name__ == '__main__': 225 | parser = argparse.ArgumentParser() 226 | parser.add_argument('scene') 227 | parser.add_argument('--replica', default=False) 228 | parser.add_argument('--j', default=8) 229 | flags = parser.parse_args() 230 | scene_dir = Path(flags.scene) 231 | assert scene_dir.exists() and scene_dir.is_dir() 232 | if flags.replica: 233 | label_template = 'semantic_class/semantic_class_{k}.png' 234 | label_space = 'replicaid' 235 | else: 236 | label_template = 'label_agile3d/{k}.png' 237 | label_space = 'wn199' 238 | 239 | # check which predictors are present 240 | for subdir in scene_dir.iterdir(): 241 | if subdir.is_dir(): 242 | if subdir.name == 'pred_internimage': 243 | pred_space = 'ade20k' 244 | pred_template = 'pred_internimage/{k}.png' 245 | elif subdir.name == 'pred_cmx': 246 | pred_space = 'nyu40id' 247 | pred_template = 'pred_cmx/{k}.png' 248 | elif subdir.name == 'pred_consensus': 249 | if flags.replica: 250 | pred_space = 'replicaid' 251 | else: 252 | pred_space = 'wn199' 253 | pred_template = 'pred_consensus/{k}.png' 254 | elif subdir.name == 'pred_wn_consensus': 255 | pred_space = 'wn199' 256 | pred_template = 'pred_wn_consensus/{k}.png' 257 | elif subdir.name == 'pred_ovseg_replica': 258 | pred_space = 'replicaid' 259 | pred_template = 'pred_ovseg_replica/{k}.png' 260 | elif subdir.name.startswith('pred_ovseg_w'): 261 | pred_space = 'wn199' 262 | pred_template = subdir.name + '/{k}.png' 263 | elif subdir.name == 'label-filt': 264 | pred_space = 'id' 265 | pred_template = 'label-filt/{k}.png' 266 | elif subdir.name == 'nerf': 267 | pred_space = 'replicaid' 268 | pred_template = 'nerf/pred_nerf_{k}.png' 269 | elif subdir.name == 'pred_mask3d_rendered': 270 | pred_space = 'id' 271 | pred_template = 'pred_mask3d_rendered/{k}.png' 272 | elif subdir.name.startswith('pred_sdfstudio'): 273 | if flags.replica: 274 | pred_space = 'replicaid' 275 | else: 276 | pred_space = 'wn199' 277 | pred_template = subdir.name + '/{k:05d}.png' 278 | else: 279 | continue 280 | metrics, confmat = evaluate_scene(scene_dir, 281 | pred_space, 282 | label_space, 283 | pred_template=pred_template, 284 | label_template=label_template, 285 | n_jobs=int(flags.j)) 286 | -------------------------------------------------------------------------------- /scripts/utils_3d.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | from os.path import exists, join 4 | 5 | import cv2 6 | import gin 7 | import numpy as np 8 | import open3d as o3d 9 | from PIL import Image 10 | from tqdm import tqdm 11 | 12 | 13 | @gin.configurable 14 | def fuse_mesh( 15 | scan_dir: str, 16 | sdf_trunc: float = 0.06, 17 | voxel_length: float = 0.02, 18 | depth_trunc: float = 3.0, 19 | depth_scale: float = 1000.0, 20 | ): 21 | 22 | color_dir = join(scan_dir, 'color') 23 | depth_dir = join(scan_dir, 'depth') 24 | pose_dir = join(scan_dir, 'pose') 25 | intrinsic_dir = join(scan_dir, 'intrinsic') 26 | 27 | assert exists(color_dir) 28 | assert exists(depth_dir) 29 | assert exists(pose_dir) 30 | assert exists(intrinsic_dir) 31 | 32 | color_list = os.listdir(color_dir) 33 | color_list.sort(key=lambda e: int(e[:-4])) 34 | 35 | depth_list = os.listdir(depth_dir) 36 | depth_list.sort(key=lambda e: int(e[:-4])) 37 | 38 | pose_list = os.listdir(pose_dir) 39 | pose_list.sort(key=lambda e: int(e[:-4])) 40 | 41 | intr_list = os.listdir(intrinsic_dir) 42 | intr_list.sort(key=lambda e: int(e[:-4])) 43 | 44 | # see if all files exists 45 | assert all( 46 | (a[:-4] == b[:-4]) and (a[:-4] == c[:-4]) and (a[:-4] == d[:-4]) 47 | for a, b, c, d in zip(color_list, depth_list, pose_list, intr_list)) 48 | 49 | tsdf = o3d.pipelines.integration.ScalableTSDFVolume( 50 | sdf_trunc=sdf_trunc, 51 | voxel_length=voxel_length, 52 | color_type=o3d.pipelines.integration.TSDFVolumeColorType.RGB8) 53 | 54 | for color_f, depth_f, pose_f, intr_f in tqdm( 55 | zip(color_list, depth_list, pose_list, intr_list), 56 | total=len(color_list), 57 | ): 58 | 59 | intr = np.loadtxt(join(intrinsic_dir, intr_f)) 60 | pose = np.loadtxt(join(pose_dir, pose_f)) 61 | color = np.asanyarray(Image.open(join(color_dir, color_f))).astype(np.uint8) 62 | depth = np.asarray(Image.open(join(depth_dir, depth_f))).astype(np.uint16) 63 | 64 | h, w, _ = color.shape 65 | color = o3d.geometry.Image(color) 66 | depth = o3d.geometry.Image(depth) 67 | 68 | rgbd = o3d.geometry.RGBDImage.create_from_color_and_depth( 69 | color=color, 70 | depth=depth, 71 | depth_scale=depth_scale, 72 | depth_trunc=depth_trunc, 73 | convert_rgb_to_intensity=False) 74 | 75 | tsdf.integrate( 76 | image=rgbd, 77 | intrinsic=o3d.camera.PinholeCameraIntrinsic( 78 | height=h, 79 | width=w, 80 | fx=intr[0, 0], 81 | fy=intr[1, 1], 82 | cx=intr[0, 2], 83 | cy=intr[1, 2] 84 | ), 85 | extrinsic=np.linalg.inv(pose), 86 | ) 87 | 88 | mesh = tsdf.extract_triangle_mesh() 89 | o3d.io.write_triangle_mesh(join(scan_dir, 'mesh.ply'), mesh) 90 | 91 | 92 | def arg_parser(): 93 | parser = argparse.ArgumentParser() 94 | parser.add_argument("--workspace", type=str) 95 | parser.add_argument("--sdf_trunc", type=float, default=0.04) 96 | parser.add_argument("--voxel_length", type=float, default=0.008) 97 | parser.add_argument("--depth_trunc", type=float, default=3.0) 98 | parser.add_argument("--depth_scale", type=float, default=1000.0) 99 | parser.add_argument('--config', help='Name of config file') 100 | 101 | return parser.parse_args() 102 | 103 | 104 | if __name__ == "__main__": 105 | args = arg_parser() 106 | if args.config is not None: 107 | gin.parse_config_file(args.config) 108 | fuse_mesh( 109 | scan_dir=args.workspace, 110 | sdf_trunc=args.sdf_trunc, 111 | voxel_length=args.voxel_length, 112 | depth_trunc=args.depth_trunc, 113 | depth_scale=args.depth_scale, 114 | ) 115 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import os 3 | import os.path as osp 4 | import platform 5 | import shutil 6 | import sys 7 | import warnings 8 | 9 | from setuptools import find_packages, setup 10 | 11 | setup( 12 | name='labelmaker', 13 | version='0.1', 14 | description='', 15 | packages=find_packages(include=['labelmaker*', 'scripts*']), 16 | install_requires=['numpy'], 17 | package_data={'': ['*.csv', '*.sh']}, 18 | include_package_data=True, 19 | ) 20 | -------------------------------------------------------------------------------- /testing/test_models/test_cmx_00_omnidata.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | from os.path import abspath, dirname, exists, join 4 | from pathlib import Path 5 | 6 | import cv2 7 | import numpy as np 8 | 9 | sys.path.append(abspath(join(dirname(__file__), '../../models'))) 10 | 11 | 12 | def test_omnidata(): 13 | from omnidata_depth import run as run_omnidata 14 | 15 | scene_dir = Path(abspath(join(dirname(__file__), '../test_scan'))) 16 | output_folder = 'intermediate/depth_omnidata_1' 17 | 18 | run_omnidata( 19 | scene_dir=scene_dir, 20 | output_folder=output_folder, 21 | device='cuda:0', 22 | ) 23 | -------------------------------------------------------------------------------- /testing/test_models/test_cmx_01_hha.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | from os.path import abspath, dirname, exists, join 4 | from pathlib import Path 5 | 6 | import cv2 7 | import numpy as np 8 | 9 | sys.path.append(abspath(join(dirname(__file__), '../../models'))) 10 | 11 | 12 | def test_hha(): 13 | from hha_depth import run as run_hha 14 | 15 | scene_dir = Path(abspath(join(dirname(__file__), '../test_scan'))) 16 | input_folder = 'intermediate/depth_omnidata_1' 17 | output_folder = 'intermediate/hha' 18 | 19 | run_hha( 20 | scene_dir=scene_dir, 21 | input_folder=input_folder, 22 | output_folder=output_folder, 23 | n_jobs=2, 24 | ) 25 | -------------------------------------------------------------------------------- /testing/test_models/test_cmx_02_cmx.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | from os.path import abspath, dirname, exists, join 4 | from pathlib import Path 5 | 6 | import cv2 7 | import numpy as np 8 | 9 | sys.path.append(abspath(join(dirname(__file__), '../../models'))) 10 | 11 | 12 | def test_cmx(): 13 | sys.path = [path for path in sys.path if 'omnidata' not in path] 14 | from cmx import run as run_cmx 15 | 16 | scene_dir = Path(abspath(join(dirname(__file__), '../test_scan'))) 17 | output_folder = 'intermediate/depth_omnidata_1' 18 | name = '000000.png' 19 | 20 | run_cmx( 21 | scene_dir=scene_dir, 22 | output_folder=output_folder, 23 | device='cuda:0', 24 | ) 25 | 26 | img_arr = cv2.imread(str(scene_dir / output_folder / name), 27 | cv2.IMREAD_UNCHANGED) 28 | assert np.unique(img_arr).shape[0] > 1 29 | -------------------------------------------------------------------------------- /testing/test_models/test_consensus.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | from os.path import abspath, dirname, exists, join 4 | from pathlib import Path 5 | 6 | import cv2 7 | import numpy as np 8 | 9 | 10 | def test_consensus(): 11 | from labelmaker.consensus import run as run_consensus 12 | 13 | scene_dir = Path(abspath(join(dirname(__file__), '../test_scan'))) 14 | output_folder = 'intermediate/consensus' 15 | name = '000000.png' 16 | 17 | run_consensus( 18 | scene_dir=scene_dir, 19 | output_folder=output_folder, 20 | ) 21 | 22 | img_arr = cv2.imread(str(scene_dir / output_folder / name), 23 | cv2.IMREAD_UNCHANGED) 24 | assert np.unique(img_arr).shape[0] > 1 25 | -------------------------------------------------------------------------------- /testing/test_models/test_grounded_sam.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | from os.path import abspath, dirname, exists, join 4 | from pathlib import Path 5 | 6 | import cv2 7 | import numpy as np 8 | 9 | sys.path.append(abspath(join(dirname(__file__), '../../models'))) 10 | 11 | 12 | def test_grounded_sam(): 13 | from grounded_sam import run as run_grounded_sam 14 | 15 | scene_dir = Path(abspath(join(dirname(__file__), '../test_scan'))) 16 | output_folder = 'intermediate/wordnet_groundedsam_1' 17 | name = '000000.png' 18 | 19 | run_grounded_sam( 20 | scene_dir=scene_dir, 21 | output_folder=output_folder, 22 | device='cuda:0', 23 | ) 24 | 25 | img_arr = cv2.imread(str(scene_dir / output_folder / name), 26 | cv2.IMREAD_UNCHANGED) 27 | assert np.unique(img_arr).shape[0] > 1 28 | -------------------------------------------------------------------------------- /testing/test_models/test_internimage.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | from os.path import abspath, dirname, exists, join 4 | from pathlib import Path 5 | 6 | import cv2 7 | import numpy as np 8 | 9 | sys.path.append(abspath(join(dirname(__file__), '../../models'))) 10 | 11 | 12 | def test_internimage(): 13 | from internimage import run as run_internimage 14 | 15 | scene_dir = Path(abspath(join(dirname(__file__), '../test_scan'))) 16 | output_folder = 'intermediate/ade20k_internimage_1' 17 | name = '000000.png' 18 | 19 | run_internimage( 20 | scene_dir=scene_dir, 21 | output_folder=output_folder, 22 | device='cuda:0', 23 | ) 24 | 25 | img_arr = cv2.imread(str(scene_dir / output_folder / name), 26 | cv2.IMREAD_UNCHANGED) 27 | assert np.unique(img_arr).shape[0] > 1 28 | -------------------------------------------------------------------------------- /testing/test_models/test_mask3d.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | from os.path import abspath, dirname, exists, join 4 | from pathlib import Path 5 | 6 | import cv2 7 | import numpy as np 8 | 9 | sys.path.append(abspath(join(dirname(__file__), '../../models'))) 10 | 11 | 12 | def test_mask3d(): 13 | from mask3d_inst import run as run_mask3d_inst 14 | 15 | scene_dir = Path(abspath(join(dirname(__file__), '../test_scan'))) 16 | output_folder = 'intermediate/scannet200_mask3d_1' 17 | name = '000000.png' 18 | 19 | run_mask3d_inst( 20 | scene_dir=scene_dir, 21 | output_folder=output_folder, 22 | device='cuda:0', 23 | ) 24 | 25 | img_arr = cv2.imread(str(scene_dir / output_folder / name), 26 | cv2.IMREAD_UNCHANGED) 27 | assert np.unique(img_arr).shape[0] > 1 28 | -------------------------------------------------------------------------------- /testing/test_models/test_omnidata_normal.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | from os.path import abspath, dirname, exists, join 4 | from pathlib import Path 5 | 6 | import cv2 7 | import numpy as np 8 | 9 | sys.path.append(abspath(join(dirname(__file__), '../../models'))) 10 | 11 | 12 | def test_omnidata(): 13 | from omnidata_normal import run as run_omnidata 14 | 15 | scene_dir = Path(abspath(join(dirname(__file__), '../test_scan'))) 16 | output_folder = 'intermediate/normal_omnidata_1' 17 | 18 | run_omnidata( 19 | scene_dir=scene_dir, 20 | output_folder=output_folder, 21 | device='cuda:0', 22 | ) 23 | -------------------------------------------------------------------------------- /testing/test_models/test_ovseg.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | from os.path import abspath, dirname, exists, join 4 | from pathlib import Path 5 | 6 | import cv2 7 | import numpy as np 8 | 9 | sys.path.append(abspath(join(dirname(__file__), '../../models'))) 10 | 11 | 12 | def test_ovseg(): 13 | from ovseg import run as run_ovseg 14 | 15 | scene_dir = Path(abspath(join(dirname(__file__), '../test_scan'))) 16 | output_folder = 'intermediate/wn_nodef_ovseg_1' 17 | name = '000000.png' 18 | 19 | run_ovseg( 20 | scene_dir=scene_dir, 21 | output_folder=output_folder, 22 | device='cuda:0', 23 | ) 24 | 25 | img_arr = cv2.imread(str(scene_dir / output_folder / name), 26 | cv2.IMREAD_UNCHANGED) 27 | assert np.unique(img_arr).shape[0] > 1 28 | -------------------------------------------------------------------------------- /testing/test_scan/color/000000.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cvg/LabelMaker/44ff92d42ae9b0579f016bb7bc5cd4bd09a613a3/testing/test_scan/color/000000.jpg -------------------------------------------------------------------------------- /testing/test_scan/depth/000000.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cvg/LabelMaker/44ff92d42ae9b0579f016bb7bc5cd4bd09a613a3/testing/test_scan/depth/000000.png -------------------------------------------------------------------------------- /testing/test_scan/intrinsic/000000.txt: -------------------------------------------------------------------------------- 1 | 5.354909999999999854e+02 0.000000000000000000e+00 3.216309999999999718e+02 2 | 0.000000000000000000e+00 5.354909999999999854e+02 2.394190000000000111e+02 3 | 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 4 | -------------------------------------------------------------------------------- /testing/test_scan/mesh.ply: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cvg/LabelMaker/44ff92d42ae9b0579f016bb7bc5cd4bd09a613a3/testing/test_scan/mesh.ply -------------------------------------------------------------------------------- /testing/test_scan/pose/000000.txt: -------------------------------------------------------------------------------- 1 | 1.829560822640563869e-02 -9.986093787547937195e-01 -4.944268785766454261e-02 5.316176301353460476e-02 2 | -9.997274661290184161e-01 -1.755411944260264895e-02 -1.538981344419157286e-02 5.090203246895330658e-02 3 | 1.450048919443955656e-02 4.971077904800336178e-02 -9.986583871672847224e-01 -7.897646873520455107e-03 4 | 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 5 | --------------------------------------------------------------------------------