├── .gitignore
├── .gitmodules
├── .vscode
    └── settings.json
├── LICENSE
├── README.md
├── docker
    ├── ubuntu16.04+miniconda.dockerfile
    └── ubuntu20.04+miniconda.dockerfile
├── env_v2
    ├── README.md
    ├── download_checkpoints.sh
    ├── install_labelmaker_env.sh
    ├── install_sdfstudio_env.sh
    ├── requirements.txt
    ├── test_labelmaker_env.sh
    └── versions.py
├── environment.yml
├── labelmaker
    ├── __init__.py
    ├── consensus.py
    ├── label_data.py
    ├── label_mappings.py
    ├── lifting_3d
    │   ├── __init__.py
    │   ├── lifting.sh
    │   ├── lifting_points.py
    │   └── preprocessing.py
    ├── mappings
    │   ├── __init__.py
    │   └── label_mapping.csv
    ├── scannet_200_labels.py
    ├── visualisation.py
    └── visualization_3d.py
├── models
    ├── cmx.py
    ├── grounded_sam.py
    ├── hha_depth.py
    ├── internimage.py
    ├── mask3d_inst.py
    ├── omnidata_depth.py
    ├── omnidata_normal.py
    └── ovseg.py
├── notebooks
    ├── evaluation_labelmaker.ipynb
    ├── generate_3d_projections.ipynb
    ├── visualization.ipynb
    ├── visualization_lifting.ipynb
    ├── visualize_arkitscenes.ipynb
    └── visualize_labelmaker_3d.ipynb
├── scripts
    ├── __init__.py
    ├── arkitscenes2labelmaker.py
    ├── pipeline.sh
    ├── pipeline_arkit.sh
    ├── replica2labelmaker.py
    ├── replica_download.sh
    ├── replica_pipeline.sh
    ├── replica_singularity_slurm.sh
    ├── scanner3d2labelmaker.py
    ├── scannet2labelmaker.py
    ├── segmentation_eval.py
    └── utils_3d.py
├── setup.py
└── testing
    ├── test_models
        ├── test_cmx_00_omnidata.py
        ├── test_cmx_01_hha.py
        ├── test_cmx_02_cmx.py
        ├── test_consensus.py
        ├── test_grounded_sam.py
        ├── test_internimage.py
        ├── test_mask3d.py
        ├── test_omnidata_normal.py
        └── test_ovseg.py
    └── test_scan
        ├── color
            └── 000000.jpg
        ├── depth
            └── 000000.png
        ├── intrinsic
            └── 000000.txt
        ├── mesh.ply
        └── pose
            └── 000000.txt


/.gitignore:
--------------------------------------------------------------------------------
 1 | *.egg-info/**
 2 | **/__pycache__/**
 3 | pose_refinement/notebooks/.ipynb_checkpoints/*
 4 | pose_refinement/notebooks
 5 | rsyncignore.txt
 6 | pose_refinement/__pycache__
 7 | pose_refinement/output
 8 | slurm*
 9 | notebooks/**
10 | scripts_arkitscenes/.ipynb_checkpoints/*
11 | scripts_arkitscenes/*.ply
12 | transform_info_scene0575_00.npz
13 | saved/**
14 | checkpoints/
15 | 3rdparty/nltk_data/
16 | testing/test_scan/intermediate/
17 | env_v2/INSTALLED_VERSIONS.sh
18 | wandb/
19 | build/
20 | *.simg
21 | data


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
 1 | [submodule "mmseg/mmsegmentation"]
 2 | 	path = 3rdparty/mmsegmentation
 3 | 	url = https://github.com/open-mmlab/mmsegmentation.git
 4 | [submodule "mmseg/InternImage"]
 5 | 	path = 3rdparty/InternImage
 6 | 	url = https://github.com/OpenGVLab/InternImage.git
 7 | [submodule "3rdparty/omnidata"]
 8 | 	path = 3rdparty/omnidata
 9 | 	url = https://github.com/EPFL-VILAB/omnidata.git
10 | [submodule "3rdparty/ov-seg"]
11 | 	path = 3rdparty/ov-seg
12 | 	url = https://github.com/facebookresearch/ov-seg.git
13 | [submodule "3rdparty/detectron2"]
14 | 	path = 3rdparty/detectron2
15 | 	url = https://github.com/facebookresearch/detectron2.git
16 | [submodule "3rdparty/ARKitScenes"]
17 | 	path = 3rdparty/ARKitScenes
18 | 	url = https://github.com/apple/ARKitScenes.git
19 | [submodule "3rdparty/Mask3D"]
20 | 	path = 3rdparty/Mask3D
21 | 	url = https://github.com/cvg/Mask3D.git
22 | [submodule "3rdparty/RGBX_Semantic_Segmentation"]
23 | 	path = 3rdparty/RGBX_Semantic_Segmentation
24 | 	url = https://github.com/huaaaliu/RGBX_Semantic_Segmentation.git
25 | [submodule "3rdparty/Depth2HHA-python"]
26 | 	path = 3rdparty/Depth2HHA-python
27 | 	url = https://github.com/hermannsblum/Depth2HHA-python.git
28 | [submodule "3rdparty/Grounded-Segment-Anything"]
29 | 	path = 3rdparty/Grounded-Segment-Anything
30 | 	url = https://github.com/cvg/Grounded-Segment-Anything.git
31 | [submodule "3rdparty/recognize-anything"]
32 | 	path = 3rdparty/recognize-anything
33 | 	url = https://github.com/cvg/recognize-anything.git
34 | [submodule "3rdparty/sdfstudio"]
35 | 	path = 3rdparty/sdfstudio
36 | 	url = https://github.com/cvg/sdfstudio.git
37 | 	branch = devel
38 | 


--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "[python]": {
 3 |         "editor.formatOnSaveMode": "file",
 4 |         "editor.formatOnSave": true,
 5 |         "editor.defaultFormatter": "eeyore.yapf"
 6 |     },
 7 |     "python.defaultInterpreterPath": "${env:HOME}/.conda/envs/labelmaker/bin/python",
 8 |     "yapf.args": [
 9 |         "--style",
10 |         "{based_on_style: google, indent_width: 2}"
11 |     ],
12 |     "editor.tabSize": 2,
13 |     "python.analysis.extraPaths": [
14 |         "./3rdparty/ov-seg",
15 |         "./3rdparty/InternImage/segmentation",
16 |         "./3rdparty/ov-seg/third_party/CLIP",
17 |         "./3rdparty/omnidata/omnidata_tools/torch",
18 |         "./3rdparty/RGBX_Semantic_Segmentation"
19 |     ],
20 |     "editor.detectIndentation": true,
21 |     "editor.indentSize": 2,
22 |     "workbench.tree.indent": 4,
23 | }
24 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2024, Computer Vision and Geometry Lab
 4 | 
 5 | Redistribution and use in source and binary forms, with or without
 6 | modification, are permitted provided that the following conditions are met:
 7 | 
 8 | 1. Redistributions of source code must retain the above copyright notice, this
 9 |    list of conditions and the following disclaimer.
10 | 
11 | 2. Redistributions in binary form must reproduce the above copyright notice,
12 |    this list of conditions and the following disclaimer in the documentation
13 |    and/or other materials provided with the distribution.
14 | 
15 | 3. Neither the name of the copyright holder nor the names of its
16 |    contributors may be used to endorse or promote products derived from
17 |    this software without specific prior written permission.
18 | 
19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # LabelMaker
  2 | 
  3 | ![LabelMaker Pipeline Overview](https://labelmaker.org/static/images/labelmaker_teaser.png)
  4 | 
  5 | ## Installation
  6 | 
  7 | This is an example on Ubuntu 20.02 with cuda 11.8.
  8 | 
  9 | ### Environment for LabelMaker
 10 | This environment is used for semantic segmentation of several models, and it is also used for generating consensus semantic labels.
 11 | 
 12 | ```sh
 13 | bash env_v2/install_labelmaker_env.sh 3.9 11.3 1.12.0 9.5.0
 14 | ```
 15 | 
 16 | This command creates a conda environment called `labelmaker` with python version 3.9, cuda version 11.8, pytorch version 2.0.0, and gcc version 10.4.0. Here are possible sets of environment versions:
 17 | | Python | CUDA toolkit | PyTorch | GCC    |
 18 | | ------ | ------------ | ------- | ------ |
 19 | | 3.9    | 11.3         | 1.12.0  | 9.5.0  |
 20 | | 3.9    | 11.6         | 1.13.0  | 10.4.0 |
 21 | | 3.9    | 11.8         | 2.0.0   | 10.4.0 |
 22 | | 3.10   | 11.8         | 2.0.0   | 10.4.0 |
 23 | 
 24 | For python=3.10, I only tested with `3.10 11.8 2.0.0 10.4.0`, others might also be possible.
 25 | 
 26 | ```sh
 27 | conda activate labelmaker
 28 | ```
 29 | 
 30 | ### Environment for SDFStudio
 31 | This environment is used for generating consistent consensus semantic labels. It use the previous consensus semantic labels (together with RGBD data) to train a neural implicit surface and get a view-consistent consensus semantic label. It uses a modified version of SDFStudio. SDFStudio need specific version of pytorch, therefore, it is made as a separate environment. To install the environment, run
 32 | ```sh
 33 | bash env_v2/install_sdfstudio_env.sh 3.10 11.3
 34 | ```
 35 | Python=3.10 and CUDA-toolkit==11.3 is the only tested combination. This version of SDFStudio requires torch==1.12.1, which only supports CUDA 11.3 and 11.6, therefore, it might be impossible to run it on newer GPUs.
 36 | 
 37 | ```sh
 38 | conda activate sdfstudio
 39 | ```
 40 | 
 41 | ### Download Model Checkpoints
 42 | 
 43 | ```sh
 44 | bash env_v2/download_checkpoints.sh
 45 | ```
 46 | 
 47 | ## Docker Image
 48 | 
 49 | ### Docker image based on Ubuntu 16.04
 50 | ```sh
 51 | # Build
 52 | docker build --tag labelmaker-env-16.04 -f docker/ubuntu16.04+miniconda.dockerfile .
 53 | 
 54 | # Run
 55 | docker run \
 56 |   --gpus all \
 57 |   -i --rm \
 58 |   -v ./env_v2:/LabelMaker/env_v2 \
 59 |   -v ./models:/LabelMaker/models \
 60 |   -v ./labelmaker:/LabelMaker/labelmaker \
 61 |   -v ./checkpoints:/LabelMaker/checkpoints \
 62 |   -v ./testing:/LabelMaker/testing \
 63 |   -v ./.gitmodules:/LabelMaker/.gitmodules \
 64 |   -t labelmaker-env-16.04 /bin/bash
 65 | ```
 66 | 
 67 | ### Docker image based on Ubuntu 20.04
 68 | 
 69 | ```sh
 70 | # Build
 71 | docker build --tag labelmaker-env-20.04 -f docker/ubuntu20.04+miniconda.dockerfile .
 72 | 
 73 | # Run
 74 | docker run \
 75 |   --gpus all \
 76 |   -i --rm \
 77 |   -v ./env_v2:/LabelMaker/env_v2 \
 78 |   -v ./models:/LabelMaker/models \
 79 |   -v ./labelmaker:/LabelMaker/labelmaker \
 80 |   -v ./checkpoints:/LabelMaker/checkpoints \
 81 |   -v ./testing:/LabelMaker/testing \
 82 |   -v ./.gitmodules:/LabelMaker/.gitmodules \
 83 |   -t labelmaker-env-20.04 /bin/bash
 84 | ```
 85 | 
 86 | 
 87 | ## Setup Scene
 88 | 
 89 | ### Download scene
 90 | 
 91 | ```sh
 92 | export TRAINING_OR_VALIDATION=Training
 93 | export SCENE_ID=47333462
 94 | python 3rdparty/ARKitScenes/download_data.py raw --split $TRAINING_OR_VALIDATION --video_id $SCENE_ID --download_dir /tmp/ARKitScenes/ --raw_dataset_assets lowres_depth confidence lowres_wide.traj lowres_wide lowres_wide_intrinsics vga_wide vga_wide_intrinsics
 95 | ```
 96 | 
 97 | ### Convert scene to LabelMaker workspace
 98 | 
 99 | ```sh
100 | WORKSPACE_DIR=/home/weders/scratch/scratch/LabelMaker/arkitscenes/$SCENE_ID
101 | python scripts/arkitscenes2labelmaker.py --scan_dir /tmp/ARKitScenes/raw/$TRAINING_OR_VALIDATION/$SCENE_ID --target_dir $WORKSPACE_DIR
102 | ```
103 | 
104 | ## Run Pipeline on Scene
105 | 
106 | ### Run individual models
107 | 
108 | 1. InternImage
109 | 
110 | ```sh
111 | python models/internimage.py --workspace $WORKSPACE_DIR
112 | ```
113 | 
114 | 2. OVSeg
115 | 
116 | ```sh
117 | python models/ovseg.py --workspace $WORKSPACE_DIR
118 | ```
119 | 
120 | 3. Grounded SAM
121 | 
122 | ```sh
123 | python models/grounded_sam.py --workspace $WORKSPACE_DIR
124 | ```
125 | 
126 | 4. CMX
127 | 
128 | ```sh
129 | python models/omnidata_depth.py --workspace $WORKSPACE_DIR
130 | python models/hha_depth.py --workspace $WORKSPACE_DIR
131 | python models/cmx.py --workspace $WORKSPACE_DIR
132 | ```
133 | 
134 | 5. Mask3D
135 | 
136 | ```sh
137 | python models/mask3d_inst.py --workspace $WORKSPACE_DIR
138 | ```
139 | 
140 | 6. OmniData normal (used for NeuS)
141 | ```sh
142 | python models/omnidata_normal.py --workspace $WORKSPACE_DIR
143 | ```
144 | 
145 | ## Run consensus voting
146 | 
147 | ```sh
148 | python labelmaker/consensus.py --workspace $WORKSPACE_DIR
149 | ```
150 | 
151 | 
152 | ## Run 3D Lifting
153 | 
154 | Point-based lifting
155 | ```sh
156 | python -m labelmaker.lifting_3d.lifting_points --workspace $WORKSPACE_DIR
157 | ```
158 | 
159 | 
160 | NeRF-based lifting (required for dense 2D labels)
161 | ```sh
162 | bash labelmaker/lifting_3d/lifting.sh $WORKSPACE_DIR
163 | ```
164 | 
165 | ## Visualization
166 | 
167 | Visualize 3D point labels (after running point-based lifting)
168 | ```sh
169 |  python -m labelmaker.visualization_3d --workspace $WORKSPACE_DIR
170 | ```
171 | 
172 | # Capture your own data
173 | With any LiDAR-enabled iOS device, we provide a [script](https://github.com/cvg/LabelMaker/blob/main/scripts/scanner3d2labelmaker.py) to convert the posed keyframes and scanned mesh from the [3D Scanner App](https://apps.apple.com/de/app/3d-scanner-app/id1419913995) into the labelmaker data format. In the App, use "export all" to export both the mesh and the RGB images with respective poses. After runnning the script on the exported data, the whole pipeline can be run as described above.
174 | 
175 | 
176 | # Bibtex
177 | 
178 | When using LabelMaker in acamdemic works, please use the following reference:
179 | 
180 | ```
181 | @inproceedings{Weder2024labelmaker,
182 |   title = {{LabelMaker: Automatic Semantic Label Generation from RGB-D Trajectories}},
183 |   author={Weder, Silvan and Blum, Hermann and Engelmann, Francis and Pollefeys, Marc},
184 |   booktitle = {International Conference on 3D Vision (3DV)},
185 |   year = {2024}
186 | }
187 | ```
188 | 
189 | # License
190 | 
191 | LabelMaker itself is released under BSD-3-clause License. However, inidividual models that can be used as part of LabelMaker may have more restrictive licenses. If a user is prohibited by license to use a specific model they can just leave them out of the pipeline. Here are the models and the licenses they use:
192 |  - ARKitScenes: CC BY-NC-SA 4.0 license
193 |  - InternImage: MIT
194 |  - Mask3D: MIT
195 |  - GSAM: Apache-2.0
196 |  - OpenAI CLIP: MIT
197 |  - Grounding DINO: Apache-2.0
198 |  - Omnidata: custom license, [view](https://github.com/EPFL-VILAB/omnidata?tab=License-1-ov-file#License-1-ov-file)
199 |  - CMX: MIT
200 |  - OVSeg: Attribution-NonCommercial 4.0 International, [view](https://github.com/facebookresearch/ov-seg?tab=License-1-ov-file#License-1-ov-file)
201 | 


--------------------------------------------------------------------------------
/docker/ubuntu16.04+miniconda.dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:16.04
 2 | WORKDIR /
 3 | ENV TZ=Europe/Zurich
 4 | RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ >/etc/timezone
 5 | RUN apt-get update &&\
 6 |     apt-get -y install git curl wget make nano libgl1 libglib2.0-0 ffmpeg libsm6 libxext6 && \
 7 |     wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
 8 |     chmod +x /Miniconda3-latest-Linux-x86_64.sh && \
 9 |     /Miniconda3-latest-Linux-x86_64.sh -b -p /miniconda3 && \
10 |     rm -rf /Miniconda3-latest-Linux-x86_64.sh && /\
11 |     miniconda3/bin/conda init bash && \
12 |     chmod -R 777 /miniconda3
13 | RUN export PATH="/miniconda3/bin:$PATH" && conda config --set auto_activate_base false
14 | COPY ./.git /LabelMaker/.git
15 | COPY ./.gitmodules /LabelMaker/.gitmodules
16 | COPY ./3rdparty /LabelMaker/3rdparty
17 | COPY ./env_v2 /LabelMaker/env_v2
18 | COPY ./labelmaker /LabelMaker/labelmaker
19 | COPY ./setup.py /LabelMaker/setup.py
20 | WORKDIR /LabelMaker
21 | RUN export PATH="/miniconda3/bin:$PATH" && \
22 |     bash env_v2/install_labelmaker_env.sh 3.9 11.3 1.12.0 9.5.0 && \
23 |     rm -rf /root/.cache/* && \
24 |     chmod -R 777 /miniconda3/envs/labelmaker
25 | RUN export PATH="/miniconda3/bin:$PATH" && \
26 |     bash env_v2/install_sdfstudio_env.sh 3.10 11.3 && \
27 |     rm -rf /root/.cache/* && \
28 |     chmod -R 777 /miniconda3/envs/sdfstudio
29 | 


--------------------------------------------------------------------------------
/docker/ubuntu20.04+miniconda.dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:20.04
 2 | WORKDIR /
 3 | ENV TZ=Europe/Zurich
 4 | RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ >/etc/timezone
 5 | RUN apt-get update && \
 6 |     apt-get -y install git curl wget make nano ffmpeg libsm6 libxext6 unzip && \
 7 |     wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
 8 |     chmod +x /Miniconda3-latest-Linux-x86_64.sh && \
 9 |     /Miniconda3-latest-Linux-x86_64.sh -b -p /miniconda3 && \
10 |     rm -rf /Miniconda3-latest-Linux-x86_64.sh && \
11 |     /miniconda3/bin/conda init bash && \
12 |     chmod -R 777 /miniconda3
13 | RUN export PATH="/miniconda3/bin:$PATH" && conda config --set auto_activate_base false
14 | COPY ./.git /LabelMaker/.git
15 | COPY ./.gitmodules /LabelMaker/.gitmodules
16 | COPY ./3rdparty /LabelMaker/3rdparty
17 | COPY ./env_v2 /LabelMaker/env_v2
18 | COPY ./labelmaker /LabelMaker/labelmaker
19 | COPY ./scripts /LabelMaker/scripts
20 | COPY ./setup.py /LabelMaker/setup.py
21 | WORKDIR /LabelMaker
22 | RUN export PATH="/miniconda3/bin:$PATH" && \
23 |     bash env_v2/install_labelmaker_env.sh 3.9 11.3 1.12.0 9.5.0 && \
24 |     rm -rf /root/.cache/* && \
25 |     chmod -R 777 /miniconda3/envs/labelmaker
26 | RUN export PATH="/miniconda3/bin:$PATH" && \
27 |     bash env_v2/install_sdfstudio_env.sh 3.10 11.3 && \
28 |     rm -rf /root/.cache/* && \
29 |     chmod -R 777 /miniconda3/envs/sdfstudio
30 | 


--------------------------------------------------------------------------------
/env_v2/README.md:
--------------------------------------------------------------------------------
 1 | On euler, to use headless rendering version of open3d, build OSMesa according to https://github.com/quantaji/open3d-manylinux2014/blob/main/osmesa_euler_build_install.sh, add llvm module in euler, and add OSMesa into LD_LIBRARY_PATH
 2 | ```sh
 3 | export LD_LIBRARY_PATH=${HOME}/osmesa/lib:$LD_LIBRARY_PATH
 4 | ```
 5 | 
 6 | 
 7 | ## Install Gounded SAM
 8 | First update all submodule
 9 | ```sh
10 | git submodule update --init --recursive
11 | ```
12 | 
13 | Install pytorch and other required packages
14 | ```sh
15 | pip install -r ./env_v2/02_grounded_sam.txt
16 | ```
17 | Install RAM
18 | ```sh
19 | pip install ./3rdparty/recognize-anything/
20 | ```
21 | 
22 | Install SAM
23 | ```sh
24 | pip install ./3rdparty/Grounded-Segment-Anything/segment_anything
25 | ```
26 | 
27 | Install Grounding DINO
28 | ```sh
29 | export CUDA_HOST_COMPILER="${HOME}/.conda/envs/labelmaker/bin/gcc"
30 | export CUDA_PATH="${HOME}/.conda/envs/labelmaker"
31 | export CUDA_HOME=$CUDA_PATH
32 | pip install ./3rdparty/Grounded-Segment-Anything/GroundingDINO
33 | ```
34 | 
35 | Install MinkowskiEngin
36 | first install openblas
37 | ```sh
38 | conda install -c anaconda openblas=0.3.20
39 | ```
40 | then 
41 | ```sh
42 | export CUDA_HOST_COMPILER="${HOME}/.conda/envs/labelmaker/bin/gcc"
43 | export CUDA_PATH="${HOME}/.conda/envs/labelmaker"
44 | export CUDA_HOME=$CUDA_PATH
45 | python setup.py install --force_cuda --blas=openblas
46 | ```
47 | 


--------------------------------------------------------------------------------
/env_v2/download_checkpoints.sh:
--------------------------------------------------------------------------------
 1 | env_name=labelmaker
 2 | dir_name="$(pwd)/$(dirname "$0")"
 3 | eval "$(conda shell.bash hook)"
 4 | conda activate $env_name
 5 | 
 6 | echo $dir_name
 7 | mkdir -p $dir_name/../checkpoints
 8 | 
 9 | # ovseg https://drive.google.com/file/d/1cn-ohxgXDrDfkzC1QdO-fi8IjbjXmgKy/view?pli=1
10 | gdown "1cn-ohxgXDrDfkzC1QdO-fi8IjbjXmgKy" -O $dir_name/../checkpoints/ovseg_swinbase_vitL14_ft_mpt.pth
11 | 
12 | # recognize-anything https://huggingface.co/spaces/xinyu1205/recognize-anything/blob/main/ram_swin_large_14m.pth
13 | gdown "https://huggingface.co/spaces/xinyu1205/recognize-anything/resolve/main/ram_swin_large_14m.pth" -O $dir_name/../checkpoints/ram_swin_large_14m.pth
14 | 
15 | # grounding dino https://github.com/IDEA-Research/Grounded-Segment-Anything/tree/main/GroundingDINO#checkpoints
16 | gdown "https://huggingface.co/ShilongLiu/GroundingDINO/resolve/main/groundingdino_swint_ogc.pth" -O $dir_name/../checkpoints/groundingdino_swint_ogc.pth
17 | 
18 | # sam-hq https://drive.google.com/file/d/1qobFYrI4eyIANfBSmYcGuWRaSIXfMOQ8/view?usp=sharing
19 | gdown 1qobFYrI4eyIANfBSmYcGuWRaSIXfMOQ8 -O $dir_name/../checkpoints/sam_hq_vit_h.pth
20 | 
21 | # cmx https://drive.google.com/file/d/1hlyglGnEB0pnWXfHPtBtCGGlKMDh2K--/view
22 | gdown 1hlyglGnEB0pnWXfHPtBtCGGlKMDh2K-- -O $dir_name/../checkpoints/NYUDV2_CMX+Segformer-B2.pth
23 | 
24 | # InternImage https://huggingface.co/OpenGVLab/InternImage/blob/main/upernet_internimage_h_896_160k_ade20k.pth
25 | gdown https://huggingface.co/OpenGVLab/InternImage/resolve/main/mask2former_internimage_h_896_80k_cocostuff2ade20k.pth -O $dir_name/../checkpoints/mask2former_internimage_h_896_80k_cocostuff2ade20k.pth
26 | 
27 | # Mask3D https://zenodo.org/records/10422707/files/mask3d_scannet200_demo.ckpt
28 | wget "https://zenodo.org/records/10422707/files/mask3d_scannet200_demo.ckpt?download=1"  -O $dir_name/../checkpoints/mask3d_scannet200_demo.ckpt
29 | 
30 | # omnidata https://drive.google.com/file/d/1Jrh-bRnJEjyMCS7f-WsaFlccfPjJPPHI/view
31 | # too many download
32 | gdown "1Jrh-bRnJEjyMCS7f-WsaFlccfPjJPPHI" -O $dir_name/../checkpoints/omnidata_dpt_depth_v2.ckpt
33 | 
34 | # omnidata normal model https://drive.google.com/file/d/1wNxVO4vVbDEMEpnAi_jwQObf2MFodcBR/view
35 | gdown "1wNxVO4vVbDEMEpnAi_jwQObf2MFodcBR&confirm=t" -O $dir_name/../checkpoints/omnidata_dpt_normal_v2.ckpt
36 | 


--------------------------------------------------------------------------------
/env_v2/install_labelmaker_env.sh:
--------------------------------------------------------------------------------
  1 | # exit when any command fails
  2 | set -e
  3 | 
  4 | # make sure submodules are updated
  5 | git submodule update --init --recursive
  6 | 
  7 | env_name=labelmaker
  8 | dir_name="$(pwd)/$(dirname "$0")"
  9 | 
 10 | echo $dir_name
 11 | 
 12 | # decide which version of python cuda pytorch torchvision to use
 13 | if [ -z "$1" ]; then
 14 |   target_python_version="3.10"
 15 | else
 16 |   target_python_version=$1
 17 | fi
 18 | 
 19 | if [ -z "$2" ]; then
 20 |   target_cuda_version="unset"
 21 | else
 22 |   target_cuda_version=$2
 23 | fi
 24 | 
 25 | if [ -z "$3" ]; then
 26 |   target_torch_version="unset"
 27 | else
 28 |   target_torch_version=$3
 29 | fi
 30 | 
 31 | if [ -z "$4" ]; then
 32 |   target_gcc_version="unset"
 33 | else
 34 |   target_gcc_version=$4
 35 | fi
 36 | 
 37 | # create env, install gcc cuda and openblas
 38 | conda create --name $env_name --yes python=$target_python_version
 39 | eval "$(conda shell.bash hook)"
 40 | conda activate $env_name
 41 | 
 42 | pip install packaging
 43 | python $dir_name/versions.py --target_cuda_version $target_cuda_version --target_torch_version $target_torch_version --target_gcc_version $target_gcc_version
 44 | 
 45 | source $dir_name/INSTALLED_VERSIONS.sh
 46 | echo $INSTALLED_CUDA_VERSION
 47 | echo $INSTALLED_CUDA_ABBREV
 48 | echo $INSTALLED_PYTORCH_VERSION
 49 | echo $INSTALLED_GCC_VERSION
 50 | echo $INSTALLED_TORCHVISION_VERSION
 51 | echo $INSTALLED_OPEN3D_URL
 52 | 
 53 | conda install -y -c "conda-forge" gxx=$INSTALLED_GCC_VERSION
 54 | conda install -y -c conda-forge sysroot_linux-64=2.17
 55 | conda install -y -c "nvidia/label/cuda-$INSTALLED_CUDA_VERSION" cuda
 56 | conda install -y -c anaconda openblas=0.3.20
 57 | 
 58 | conda deactivate
 59 | conda activate $env_name
 60 | 
 61 | conda_home="$(conda info | grep "active env location : " | cut -d ":" -f2-)"
 62 | conda_home="${conda_home#"${conda_home%%[![:space:]]*}"}"
 63 | 
 64 | echo $conda_home
 65 | 
 66 | which python
 67 | which pip
 68 | which nvcc
 69 | 
 70 | # add cuda compiler to path
 71 | export CUDA_HOST_COMPILER="$conda_home/bin/gcc"
 72 | export CUDA_PATH="$conda_home"
 73 | export CUDA_HOME=$CUDA_PATH
 74 | export TORCH_CUDA_ARCH_LIST="6.0 6.1 6.2 7.0 7.2 7.5 8.0 8.6"
 75 | export MAX_JOBS=6
 76 | export AM_I_DOCKER=1
 77 | export BUILD_WITH_CUDA=1
 78 | export FORCE_CUDA=1 
 79 | 
 80 | # specify NLTK download location
 81 | export NLTK_DATA="$dir_name/../3rdparty/nltk_data"
 82 | mkdir -p $NLTK_DATA
 83 | 
 84 | # TODO add git checkout of all repository to keep version consistent
 85 | 
 86 | # install all dependency from pypi
 87 | pip install -r "$dir_name/requirements.txt"
 88 | 
 89 | # install open3d
 90 | pip install $INSTALLED_OPEN3D_URL
 91 | 
 92 | # install torch and torch-scater, they are cuda-version dependent
 93 | # Pytorch
 94 | pip install torch==$INSTALLED_PYTORCH_VERSION+$INSTALLED_CUDA_ABBREV torchvision==$INSTALLED_TORCHVISION_VERSION+$INSTALLED_CUDA_ABBREV --index-url https://download.pytorch.org/whl/$INSTALLED_CUDA_ABBREV
 95 | # torch-scatter
 96 | pip install torch-scatter --index-url "" -f "https://data.pyg.org/whl/torch-${INSTALLED_PYTORCH_VERSION}%2B${INSTALLED_CUDA_ABBREV}.html"
 97 | pip install mmcv-full==1.6.2 -f https://download.openmmlab.com/mmcv/dist/${INSTALLED_CUDA_ABBREV}/torch${INSTALLED_PYTORCH_VERSION}/index.html
 98 | 
 99 | # install mask3d
100 | # Step 1: install detectron 2 and minkowskiengine
101 | pip install "git+https://github.com/facebookresearch/detectron2.git@710e7795d0eeadf9def0e7ef957eea13532e34cf"
102 | cd $dir_name/../3rdparty/Mask3D/third_party
103 | rm -rf MinkowskiEngine
104 | git clone --recursive "https://github.com/NVIDIA/MinkowskiEngine"
105 | cd MinkowskiEngine
106 | git checkout 02fc608bea4c0549b0a7b00ca1bf15dee4a0b228
107 | python setup.py install --force_cuda --blas=openblas
108 | # Step 2: install scannet segmentor
109 | cd $dir_name/../3rdparty/Mask3D/third_party
110 | rm -rf ScanNet
111 | git clone https://github.com/ScanNet/ScanNet.git
112 | cd ScanNet/Segmentator
113 | git checkout 3e5726500896748521a6ceb81271b0f5b2c0e7d2
114 | make
115 | ## Step 3: pointnet2
116 | cd $dir_name/../3rdparty/Mask3D/third_party/pointnet2
117 | python setup.py install
118 | ## Step 4: install mask3d package
119 | cd $dir_name/../3rdparty/Mask3D
120 | pip install .
121 | pip install --no-deps --force-reinstall --upgrade omegaconf==2.2.0 hydra-core==1.0.5
122 | 
123 | # install omnidata + hha + cmx
124 | # Step 1: create folder and install omnidata # might be deprecated as weight will be stored at other path
125 | mkdir -p $dir_name/../3rdparty/omnidata/omnidata_tools/torch/pretrained_models/
126 | # Step 2: install HHA
127 | cd $dir_name/../3rdparty/Depth2HHA-python
128 | pip install .
129 | # Step 3: install cmx
130 | cd $dir_name/../3rdparty/mmsegmentation
131 | pip install -v -e .
132 | # Step 4: create an empty txt for cmx eval configuration
133 | cd $dir_name/../3rdparty/RGBX_Semantic_Segmentation
134 | touch empty.txt
135 | # Step 5: replace collectioin.iterable into collection.abc.iterable
136 | sed -i 's/collections.Iterable/collections.abc.Iterable/g' $dir_name/../3rdparty/RGBX_Semantic_Segmentation/utils/transforms.py
137 | 
138 | # install grounded sam
139 | pip install $dir_name/../3rdparty/recognize-anything/
140 | pip install $dir_name/../3rdparty/Grounded-Segment-Anything/segment_anything
141 | pip install $dir_name/../3rdparty/Grounded-Segment-Anything/GroundingDINO
142 | 
143 | # install ovseg, ovseg customize clip, so reinstall from this after grounded sam
144 | cd $dir_name/../3rdparty/ov-seg/third_party/CLIP
145 | python -m pip install -Ue .
146 | python -m nltk.downloader -d $NLTK_DATA wordnet
147 | 
148 | # install internimage
149 | # # avoid an error when no cuda runtime available
150 | sed -i 's/torch.cuda.is_available()/True/g' $dir_name/../3rdparty/InternImage/segmentation/ops_dcnv3/setup.py
151 | cd $dir_name/../3rdparty/InternImage/segmentation/ops_dcnv3
152 | sh ./make.sh
153 | 
154 | # install labelmaker
155 | pip install -e $dir_name/../.
156 | 


--------------------------------------------------------------------------------
/env_v2/install_sdfstudio_env.sh:
--------------------------------------------------------------------------------
 1 | # exit when any command fails
 2 | set -e
 3 | 
 4 | # make sure submodules are updated
 5 | git submodule update --init --recursive
 6 | 
 7 | env_name=sdfstudio
 8 | dir_name="$(pwd)/$(dirname "$0")"
 9 | 
10 | echo $dir_name
11 | 
12 | # decide which version of python cuda pytorch torchvision to use
13 | if [ -z "$1" ]; then
14 |   target_python_version="3.10"
15 | else
16 |   target_python_version=$1
17 | fi
18 | 
19 | if [ -z "$2" ]; then
20 |   target_cuda_version="unset"
21 | else
22 |   target_cuda_version=$2
23 | fi
24 | 
25 | # create env, install gcc cuda and openblas
26 | conda create --name $env_name --yes python=$target_python_version
27 | eval "$(conda shell.bash hook)"
28 | conda activate $env_name
29 | 
30 | pip install packaging
31 | python $dir_name/versions.py --target_cuda_version $target_cuda_version --target_torch_version 1.12.1 --target_gcc_version 9.5.0
32 | 
33 | source $dir_name/INSTALLED_VERSIONS.sh
34 | echo $INSTALLED_CUDA_VERSION
35 | echo $INSTALLED_CUDA_ABBREV
36 | echo $INSTALLED_PYTORCH_VERSION
37 | echo $INSTALLED_GCC_VERSION
38 | echo $INSTALLED_TORCHVISION_VERSION
39 | echo $INSTALLED_OPEN3D_URL
40 | 
41 | conda install -y -c "conda-forge" gxx=$INSTALLED_GCC_VERSION
42 | conda install -y -c conda-forge sysroot_linux-64=2.17
43 | conda install -y -c "nvidia/label/cuda-$INSTALLED_CUDA_VERSION" cuda
44 | conda install -y -c anaconda openblas=0.3.20
45 | 
46 | conda deactivate
47 | conda activate $env_name
48 | 
49 | conda_home="$(conda info | grep "active env location : " | cut -d ":" -f2-)"
50 | conda_home="${conda_home#"${conda_home%%[![:space:]]*}"}"
51 | 
52 | echo $conda_home
53 | 
54 | which python
55 | which pip
56 | which nvcc
57 | 
58 | # add cuda compiler to path
59 | export CUDA_HOST_COMPILER="$conda_home/bin/gcc"
60 | export CUDA_PATH="$conda_home"
61 | export CUDA_HOME=$CUDA_PATH
62 | export LD_LIBRARY_PATH=$conda_home/lib:$LD_LIBRARY_PATH
63 | export LIBRARY_PATH="$conda_home/lib/stubs:$LIBRARY_PATH"
64 | export TCNN_CUDA_ARCHITECTURES=75
65 | export AM_I_DOCKER=1
66 | export BUILD_WITH_CUDA=1
67 | export FORCE_CUDA=1
68 | 
69 | # install open3d before sdfstudio
70 | pip install $INSTALLED_OPEN3D_URL
71 | 
72 | # install other packages
73 | pip install gin-config pandas
74 | 
75 | # remove open3d dependency
76 | sed -i 's/"open3d>=0.16.0"/#"open3d>=0.16.0"/g' $dir_name/../3rdparty/sdfstudio/pyproject.toml
77 | 
78 | # install sdfstudio
79 | pip install $dir_name/../3rdparty/sdfstudio
80 | # ns-install-cli
81 | 
82 | # install labelmaker also
83 | pip install -e $dir_name/..
84 | 
85 | pip install torch==$INSTALLED_PYTORCH_VERSION+$INSTALLED_CUDA_ABBREV torchvision==$INSTALLED_TORCHVISION_VERSION+$INSTALLED_CUDA_ABBREV --index-url https://download.pytorch.org/whl/$INSTALLED_CUDA_ABBREV
86 | 
87 | # install tcnn
88 | conda install -y -c anaconda git
89 | pip install "git+https://github.com/NVlabs/tiny-cuda-nn/#subdirectory=bindings/torch"
90 | 


--------------------------------------------------------------------------------
/env_v2/requirements.txt:
--------------------------------------------------------------------------------
 1 | yapf
 2 | tqdm
 3 | ninja
 4 | gin-config
 5 | ipykernel
 6 | pytest
 7 | 
 8 | gdown # for downloading checkpoints
 9 | 
10 | # ARKitScenes
11 | pandas
12 | scipy # for pose interpolation
13 | 
14 | # mask3d
15 | albumentations
16 | loguru
17 | 
18 | # ovseg
19 | nltk
20 | cython
21 | shapely
22 | timm
23 | h5py
24 | fire
25 | opencv-python
26 | pillow==9.5.0
27 | wandb
28 | 
29 | # omnidata + hha + cmx
30 | pytorch-lightning
31 | joblib
32 | easydict
33 | 
34 | # grounded sam
35 | fairscale
36 | scikit-image
37 | 
38 | # sdfstudio
39 | pyquaternion
40 | 
41 | # internimage
42 | mmdet==2.26.0
43 | 


--------------------------------------------------------------------------------
/env_v2/test_labelmaker_env.sh:
--------------------------------------------------------------------------------
 1 | # exit when any command fails
 2 | set -e
 3 | 
 4 | env_name=labelmaker
 5 | dir_name="$(pwd)/$(dirname "$0")"
 6 | 
 7 | echo $dir_name
 8 | 
 9 | eval "$(conda shell.bash hook)"
10 | conda activate $env_name
11 | 
12 | conda_home="$(conda info | grep "active env location : " | cut -d ":" -f2-)"
13 | conda_home="${conda_home#"${conda_home%%[![:space:]]*}"}"
14 | 
15 | echo $conda_home
16 | 
17 | which python
18 | which pip
19 | which nvcc
20 | 
21 | # add cuda compiler to path
22 | export CUDA_HOST_COMPILER="$conda_home/bin/gcc"
23 | export CUDA_PATH="$conda_home"
24 | export CUDA_HOME=$CUDA_PATH
25 | export TORCH_CUDA_ARCH_LIST="6.0 6.1 6.2 7.0 7.2 7.5 8.0 8.6"
26 | export MAX_JOBS=6
27 | 
28 | # specify NLTK download location
29 | export NLTK_DATA="$dir_name/../3rdparty/nltk_data"
30 | 
31 | # testing
32 | rm -rf $dir_name/../testing/test_scan/intermediate
33 | cd $dir_name/../testing/test_models
34 | pytest test_cmx_00_omnidata.py
35 | pytest test_cmx_01_hha.py
36 | pytest test_cmx_02_cmx.py
37 | pytest test_grounded_sam.py
38 | pytest test_internimage.py
39 | pytest test_mask3d.py
40 | pytest test_ovseg.py
41 | pytest test_omnidata_normal.py
42 | pytest test_consensus.py
43 | rm -rf $dir_name/../testing/test_scan/intermediate
44 | 


--------------------------------------------------------------------------------
/env_v2/versions.py:
--------------------------------------------------------------------------------
  1 | # This code gives a valid set of cuda, pytorch, torchvision and gcc version
  2 | # This program takes the desired cuda version, desired pytorch version, current nvidia driver cuda version as input
  3 | import argparse
  4 | import os
  5 | import re
  6 | import sys
  7 | 
  8 | from packaging.version import parse
  9 | 
 10 | CUDA_VERSIONS = ["11.3", "11.5", "11.6", "11.7", "11.8", "12.1"]
 11 | PYTORCH_VERSIONS = [
 12 |     "1.10.0", "1.10.1", "1.10.2", "1.11.0", "1.12.0", "1.12.1", "1.13.0",
 13 |     "1.13.1", "2.0.0", "2.0.1", "2.1.0"
 14 | ]
 15 | CUDA_PYTORCH_COMPATIBILITY = {
 16 |     "11.3": [
 17 |         "1.10.0",  # need python 3.9
 18 |         "1.10.1",  # does not have mmcv-full
 19 |         "1.10.2",  # does not have mmcv-full
 20 |         "1.11.0",
 21 |         "1.12.0",
 22 |         "1.12.1",  # does not have mmcv-full
 23 |     ],
 24 |     "11.5": ["1.11.0"],
 25 |     "11.6": [
 26 |         "1.12.0",  # does not have mmcv-full
 27 |         "1.12.1",  # does not have mmcv-full
 28 |         "1.13.0",  # mmcv-full no 1.6.2
 29 |         "1.13.1",  # does not have mmcv-full
 30 |     ],
 31 |     "11.7": [
 32 |         "1.13.0",  # does not have mmcv-full
 33 |         "1.13.1",  # does not have mmcv-full
 34 |         "2.0.0",  # does not have mmcv-full
 35 |         "2.0.1",  # does not have mmcv-full
 36 |     ],
 37 |     "11.8": [
 38 |         "2.0.0",
 39 |         "2.0.1",
 40 |         "2.1.0",
 41 |     ],
 42 |     "12.1": ["2.1.0"],
 43 | }
 44 | PTTORCH_TORCHVISION_CORRESPONDENCE = {
 45 |     "2.1.0": "0.16.0",
 46 |     "2.0.1": "0.15.2",
 47 |     "2.0.0": "0.15.0",
 48 |     "1.13.1": "0.14.1",
 49 |     "1.13.0": "0.14.0",
 50 |     "1.12.1": "0.13.1",
 51 |     "1.12.0": "0.13.0",
 52 |     "1.11.0": "0.12.0",
 53 |     "1.10.2": "0.11.3",
 54 |     "1.10.1": "0.11.2",
 55 |     "1.10.0": "0.11.0",
 56 | }
 57 | CUDA_MAX_GCC_VERSION = {
 58 |     "11.3": "10.4.0",
 59 |     "11.5": "11.4.0",
 60 |     "11.6": "11.4.0",
 61 |     "11.7": "11.4.0",
 62 |     "11.8": "11.4.0",
 63 |     "12.1": "12.2.0",
 64 | }
 65 | CUDA_MIN_GCC_VERSION = "8.5.0"
 66 | CONDA_AVAIL_CUDA_MAPPING = { # use the higher version
 67 |     "11.3": "11.3.1",
 68 |     "11.5": "11.5.1",
 69 |     "11.6": "11.6.2",
 70 |     "11.7": "11.7.1",
 71 |     "11.8": "11.8.0",
 72 |     "12.1": "12.1.1",
 73 | }
 74 | CONDA_AVAIL_GCC_VERSION = [
 75 |     "12.2.0", "12.1.0", "11.4.0", "11.3.0", "11.2.0", "11.1.0", "10.4.0",
 76 |     "10.3.0", "9.5.0", "9.4.0", "8.5.0"
 77 | ]
 78 | CONDA_AVAIL_OPENBLAS_VERSION = [
 79 |     "0.3.21", "0.3.20", "0.3.18", "0.3.17", "0.3.13", "0.3.10", "0.3.6",
 80 |     "0.3.3", "0.3.2", "0.2.20"
 81 | ]
 82 | 
 83 | OPEN3D_URLS = {
 84 |     "3.6":
 85 |         "https://github.com/cvg/open3d-manylinux2014/releases/download/0.17.0/open3d_cpu-0.17.0-cp36-cp36m-manylinux_2_17_x86_64.whl",
 86 |     "3.7":
 87 |         "https://github.com/cvg/open3d-manylinux2014/releases/download/0.17.0/open3d_cpu-0.17.0-cp37-cp37m-manylinux_2_17_x86_64.whl",
 88 |     "3.8":
 89 |         "https://github.com/cvg/open3d-manylinux2014/releases/download/0.17.0/open3d_cpu-0.17.0-cp38-cp38-manylinux_2_17_x86_64.whl",
 90 |     "3.9":
 91 |         "https://github.com/cvg/open3d-manylinux2014/releases/download/0.17.0/open3d_cpu-0.17.0-cp39-cp39-manylinux_2_17_x86_64.whl",
 92 |     "3.10":
 93 |         "https://github.com/cvg/open3d-manylinux2014/releases/download/0.17.0/open3d_cpu-0.17.0-cp310-cp310-manylinux_2_17_x86_64.whl",
 94 | }
 95 | 
 96 | if __name__ == "__main__":
 97 | 
 98 |   try:
 99 |     output_stream = os.popen('nvidia-smi | grep "CUDA Version:"')
100 |     driver_cuda_version = parse(
101 |         re.search(r"CUDA Version:( )*[0-9]+\.[0-9]",
102 |                   output_stream.read()).group().split(':')[-1].strip())
103 |   except:
104 |     driver_cuda_version = None
105 | 
106 |   print(f"Found nvidia driver's cuda version: {driver_cuda_version} .")
107 | 
108 |   parser = argparse.ArgumentParser()
109 |   parser.add_argument("--target_cuda_version", type=str)
110 |   parser.add_argument("--target_torch_version", type=str)
111 |   parser.add_argument("--target_gcc_version", type=str)
112 |   args = parser.parse_args()
113 | 
114 |   print(args)
115 | 
116 |   # check CUDA
117 |   target_cuda_version: str = None
118 |   if args.target_cuda_version != 'unset':
119 |     try:
120 |       parse(args.target_cuda_version)
121 |     except:
122 |       raise ValueError("The cuda version should be in format of x.x !")
123 | 
124 |     assert args.target_cuda_version in CUDA_VERSIONS, f"The specified cuda version {args.target_cuda_version} is not supported, please use CUDA: {', '.join(CUDA_VERSIONS)}"
125 | 
126 |     target_cuda_version = args.target_cuda_version
127 | 
128 |   else:
129 |     if driver_cuda_version is None:
130 |       raise ValueError(
131 |           "No CUDA driver detected on your machine, and no target cuda toolkit specified!"
132 |       )
133 | 
134 |     for ver in CUDA_VERSIONS[::-1]:
135 |       if parse(ver) <= driver_cuda_version:
136 |         print(f"CUDA version not specified, using highes possible cuda: {ver}")
137 |         target_cuda_version = ver
138 |         break
139 | 
140 |     if target_cuda_version is None:
141 |       raise NotImplementedError(
142 |           f"The cuda version ({driver_cuda_version}) of this machine is too old!"
143 |       )
144 | 
145 |   # check pytorch
146 |   target_torch_version: str = None
147 |   if args.target_torch_version != 'unset':
148 |     try:
149 |       parse(args.target_torch_version)
150 |     except:
151 |       raise ValueError("The pytorch version should be in format of x.x !")
152 | 
153 |     assert args.target_torch_version in PYTORCH_VERSIONS, f"The specified torch version {args.target_torch_version} is not supported, please use PyTorch: {', '.join(PYTORCH_VERSIONS)}"
154 | 
155 |     assert args.target_torch_version in CUDA_PYTORCH_COMPATIBILITY[
156 |         target_cuda_version], f"The specified torch version {args.target_torch_version} is not supported by the selected version of cuda {target_cuda_version}, please use PyTorch: {', '.join(CUDA_PYTORCH_COMPATIBILITY[target_cuda_version])}"
157 | 
158 |     target_torch_version = args.target_torch_version
159 | 
160 |   else:
161 |     for ver in PYTORCH_VERSIONS[::-1]:
162 |       if ver in CUDA_PYTORCH_COMPATIBILITY[target_cuda_version]:
163 |         print(f"PyTorch version not specified, using highes possible: {ver}")
164 |         target_torch_version = ver
165 |         break
166 | 
167 |   # check gcc
168 |   target_gcc_version: str = None
169 |   if args.target_gcc_version != 'unset':
170 |     try:
171 |       parse(args.target_gcc_version)
172 |     except:
173 |       raise ValueError("The GCC version should be in format of x.x !")
174 | 
175 |     assert parse(args.target_gcc_version) >= parse(
176 |         CUDA_MIN_GCC_VERSION
177 |     ), f"The target GCC compiler version {args.target_gcc_version} should be higher than {CUDA_MIN_GCC_VERSION}"
178 | 
179 |     assert parse(args.target_gcc_version) <= parse(
180 |         CUDA_MAX_GCC_VERSION[target_cuda_version]
181 |     ), f"The target GCC compiler version {args.target_gcc_version} should be lower than {CUDA_MAX_GCC_VERSION[target_cuda_version]}"
182 | 
183 |     assert args.target_gcc_version in CONDA_AVAIL_GCC_VERSION, f"The target GCC compiler version {args.target_gcc_version} should be one of {CONDA_AVAIL_GCC_VERSION}"
184 | 
185 |     target_gcc_version = args.target_gcc_version
186 | 
187 |   else:
188 |     # use the highest possible gcc compiler
189 |     target_gcc_version = CUDA_MAX_GCC_VERSION[target_cuda_version]
190 | 
191 |   target_torchvision_version = PTTORCH_TORCHVISION_CORRESPONDENCE[
192 |       target_torch_version]
193 | 
194 |   # detect python version
195 |   python_version = str(sys.version_info.major) + '.' + str(
196 |       sys.version_info.minor)
197 |   target_open3d_url = OPEN3D_URLS[python_version]
198 | 
199 |   with open(os.path.join(os.path.dirname(__file__), 'INSTALLED_VERSIONS.sh'),
200 |             'w') as f:
201 |     f.write(
202 |         f'export INSTALLED_CUDA_VERSION={CONDA_AVAIL_CUDA_MAPPING[target_cuda_version]}\n'
203 |     )
204 |     f.write(
205 |         f'export INSTALLED_CUDA_ABBREV={"cu" + "".join(target_cuda_version.split("."))}\n'
206 |     )
207 |     f.write(f'export INSTALLED_PYTORCH_VERSION={target_torch_version}\n')
208 |     f.write(f'export INSTALLED_GCC_VERSION={target_gcc_version}\n')
209 |     f.write(
210 |         f'export INSTALLED_TORCHVISION_VERSION={target_torchvision_version}\n')
211 |     f.write(f'export INSTALLED_OPEN3D_URL={target_open3d_url}\n')
212 | 


--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
  1 | name: scannetter
  2 | channels:
  3 |   - conda-forge
  4 |   - defaults
  5 | dependencies:
  6 |   - _libgcc_mutex=0.1=conda_forge
  7 |   - _openmp_mutex=4.5=2_gnu
  8 |   - appdirs=1.4.4=pyh9f0ad1d_0
  9 |   - brotlipy=0.7.0=py38h0a891b7_1005
 10 |   - bzip2=1.0.8=h7f98852_4
 11 |   - ca-certificates=2022.12.7=ha878542_0
 12 |   - certifi=2022.12.7=pyhd8ed1ab_0
 13 |   - cffi=1.15.1=py38h4a40e3a_3
 14 |   - charset-normalizer=2.1.1=pyhd8ed1ab_0
 15 |   - colorama=0.4.6=pyhd8ed1ab_0
 16 |   - cryptography=39.0.1=py38h3d167d9_0
 17 |   - freetype=2.12.1=hca18f0e_1
 18 |   - idna=3.4=pyhd8ed1ab_0
 19 |   - jpeg=9e=h166bdaf_2
 20 |   - lcms2=2.14=hfd0df8a_1
 21 |   - ld_impl_linux-64=2.40=h41732ed_0
 22 |   - lerc=4.0.0=h27087fc_0
 23 |   - libblas=3.9.0=16_linux64_openblas
 24 |   - libcblas=3.9.0=16_linux64_openblas
 25 |   - libdeflate=1.17=h0b41bf4_0
 26 |   - libffi=3.4.2=h7f98852_5
 27 |   - libgcc-ng=12.2.0=h65d4601_19
 28 |   - libgfortran-ng=12.2.0=h69a702a_19
 29 |   - libgfortran5=12.2.0=h337968e_19
 30 |   - libgomp=12.2.0=h65d4601_19
 31 |   - liblapack=3.9.0=16_linux64_openblas
 32 |   - libnsl=2.0.0=h7f98852_0
 33 |   - libopenblas=0.3.21=pthreads_h78a6416_3
 34 |   - libpng=1.6.39=h753d276_0
 35 |   - libsqlite=3.40.0=h753d276_0
 36 |   - libstdcxx-ng=12.2.0=h46fd767_19
 37 |   - libtiff=4.5.0=h6adf6a1_2
 38 |   - libuuid=2.32.1=h7f98852_1000
 39 |   - libwebp-base=1.2.4=h166bdaf_0
 40 |   - libxcb=1.13=h7f98852_1004
 41 |   - libzlib=1.2.13=h166bdaf_4
 42 |   - ncurses=6.3=h27087fc_1
 43 |   - numpy=1.24.2=py38h10c12cc_0
 44 |   - openjpeg=2.5.0=hfec8fc6_2
 45 |   - openssl=3.0.8=h0b41bf4_0
 46 |   - packaging=23.0=pyhd8ed1ab_0
 47 |   - pillow=9.4.0=py38hde6dc18_1
 48 |   - pip=23.0=pyhd8ed1ab_0
 49 |   - pooch=1.6.0=pyhd8ed1ab_0
 50 |   - pthread-stubs=0.4=h36c2ea0_1001
 51 |   - pycparser=2.21=pyhd8ed1ab_0
 52 |   - pyopenssl=23.0.0=pyhd8ed1ab_0
 53 |   - pyquaternion=0.9.9=pyhd8ed1ab_1
 54 |   - pysocks=1.7.1=pyha2e5f31_6
 55 |   - python=3.8.16=he550d4f_1_cpython
 56 |   - python_abi=3.8=3_cp38
 57 |   - readline=8.1.2=h0f457ee_0
 58 |   - requests=2.28.2=pyhd8ed1ab_0
 59 |   - setuptools=67.1.0=pyhd8ed1ab_0
 60 |   - tk=8.6.12=h27826a3_0
 61 |   - tqdm=4.64.1=pyhd8ed1ab_0
 62 |   - urllib3=1.26.14=pyhd8ed1ab_0
 63 |   - wheel=0.38.4=pyhd8ed1ab_0
 64 |   - xorg-libxau=1.0.9=h7f98852_0
 65 |   - xorg-libxdmcp=1.1.3=h7f98852_0
 66 |   - xz=5.2.6=h166bdaf_0
 67 |   - zstd=1.5.2=h3eb15da_6
 68 |   - pip:
 69 |     - addict==2.4.0
 70 |     - aiohttp==3.8.4
 71 |     - aiosignal==1.3.1
 72 |     - anyio==3.6.2
 73 |     - argon2-cffi==21.3.0
 74 |     - argon2-cffi-bindings==21.2.0
 75 |     - arrow==1.2.3
 76 |     - asttokens==2.2.1
 77 |     - async-timeout==4.0.2
 78 |     - attrs==22.2.0
 79 |     - backcall==0.2.0
 80 |     - beautifulsoup4==4.11.2
 81 |     - bleach==6.0.0
 82 |     - click==8.1.3
 83 |     - coloredlogs==15.0.1
 84 |     - comm==0.1.2
 85 |     - configargparse==1.5.3
 86 |     - contourpy==1.0.7
 87 |     - cycler==0.11.0
 88 |     - dash==2.8.1
 89 |     - dash-core-components==2.0.0
 90 |     - dash-html-components==2.0.0
 91 |     - dash-table==5.0.0
 92 |     - debugpy==1.6.6
 93 |     - decorator==5.1.1
 94 |     - defusedxml==0.7.1
 95 |     - docker-pycreds==0.4.0
 96 |     - executing==1.2.0
 97 |     - fastjsonschema==2.16.3
 98 |     - filelock==3.9.0
 99 |     - flask==2.2.3
100 |     - fonttools==4.39.0
101 |     - fqdn==1.5.1
102 |     - frozenlist==1.3.3
103 |     - fsspec==2023.3.0
104 |     - gdown==4.6.4
105 |     - gitdb==4.0.10
106 |     - gitpython==3.1.31
107 |     - h5py==3.8.0
108 |     - humanfriendly==10.0
109 |     - imageio==2.26.0
110 |     - importlib-metadata==6.0.0
111 |     - importlib-resources==5.12.0
112 |     - ipykernel==6.21.3
113 |     - ipython==8.11.0
114 |     - ipython-genutils==0.2.0
115 |     - ipywidgets==8.0.4
116 |     - isoduration==20.11.0
117 |     - itsdangerous==2.1.2
118 |     - jedi==0.18.2
119 |     - jinja2==3.1.2
120 |     - joblib==1.2.0
121 |     - jsonpointer==2.3
122 |     - jsonschema==4.17.3
123 |     - jupyter==1.0.0
124 |     - jupyter-client==8.0.3
125 |     - jupyter-console==6.6.3
126 |     - jupyter-core==5.2.0
127 |     - jupyter-events==0.6.3
128 |     - jupyter-server==2.4.0
129 |     - jupyter-server-terminals==0.4.4
130 |     - jupyterlab-pygments==0.2.2
131 |     - jupyterlab-widgets==3.0.5
132 |     - kiwisolver==1.4.4
133 |     - kornia==0.6.10
134 |     - lazy-loader==0.1
135 |     - lightning-utilities==0.8.0
136 |     - markupsafe==2.1.2
137 |     - matplotlib==3.7.1
138 |     - matplotlib-inline==0.1.6
139 |     - mistune==2.0.5
140 |     - multidict==6.0.4
141 |     - nbclassic==0.5.3
142 |     - nbclient==0.7.2
143 |     - nbconvert==7.2.9
144 |     - nbformat==5.5.0
145 |     - nest-asyncio==1.5.6
146 |     - networkx==3.0
147 |     - ninja==1.10.2.3
148 |     - notebook==6.5.3
149 |     - notebook-shim==0.2.2
150 |     - nvidia-cublas-cu11==11.10.3.66
151 |     - nvidia-cuda-nvrtc-cu11==11.7.99
152 |     - nvidia-cuda-runtime-cu11==11.7.99
153 |     - nvidia-cudnn-cu11==8.5.0.96
154 |     - open3d==0.16.0
155 |     - opencv-python==4.7.0.72
156 |     - pandas==1.5.3
157 |     - pandocfilters==1.5.0
158 |     - parso==0.8.3
159 |     - pathtools==0.1.2
160 |     - pexpect==4.8.0
161 |     - pickleshare==0.7.5
162 |     - pkgutil-resolve-name==1.3.10
163 |     - platformdirs==3.1.0
164 |     - plotly==5.13.1
165 |     - prometheus-client==0.16.0
166 |     - prompt-toolkit==3.0.38
167 |     - protobuf==4.22.1
168 |     - psutil==5.9.4
169 |     - ptyprocess==0.7.0
170 |     - pure-eval==0.2.2
171 |     - pycolmap==0.3.0
172 |     - pygments==2.14.0
173 |     - pyparsing==3.0.9
174 |     - pyrsistent==0.19.3
175 |     - python-dateutil==2.8.2
176 |     - python-json-logger==2.0.7
177 |     - pytorch-lightning==1.9.4
178 |     - pytz==2022.7.1
179 |     - pywavelets==1.4.1
180 |     - pyyaml==6.0
181 |     - pyzmq==25.0.0
182 |     - qtconsole==5.4.0
183 |     - qtpy==2.3.0
184 |     - rfc3339-validator==0.1.4
185 |     - rfc3986-validator==0.1.1
186 |     - scikit-image==0.20.0
187 |     - scikit-learn==1.2.2
188 |     - scipy==1.9.1
189 |     - send2trash==1.8.0
190 |     - sentry-sdk==1.16.0
191 |     - setproctitle==1.3.2
192 |     - six==1.16.0
193 |     - smmap==5.0.0
194 |     - sniffio==1.3.0
195 |     - soupsieve==2.4
196 |     - stack-data==0.6.2
197 |     - tenacity==8.2.2
198 |     - terminado==0.17.1
199 |     - threadpoolctl==3.1.0
200 |     - tifffile==2023.2.28
201 |     - tinycss2==1.2.1
202 |     - torch==1.13.1
203 |     - torchmetrics==0.11.3
204 |     - torchvision==0.14.1
205 |     - tornado==6.2
206 |     - traitlets==5.9.0
207 |     - trimesh==3.11.2
208 |     - typing-extensions==4.5.0
209 |     - uri-template==1.2.0
210 |     - wandb==0.13.11
211 |     - wcwidth==0.2.6
212 |     - webcolors==1.12
213 |     - webencodings==0.5.1
214 |     - websocket-client==1.5.1
215 |     - werkzeug==2.2.3
216 |     - widgetsnbextension==4.0.5
217 |     - yarl==1.8.2
218 |     - zipp==3.15.0
219 | prefix: /home/weders/anaconda3/envs/scannetter
220 | 


--------------------------------------------------------------------------------
/labelmaker/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cvg/LabelMaker/44ff92d42ae9b0579f016bb7bc5cd4bd09a613a3/labelmaker/__init__.py


--------------------------------------------------------------------------------
/labelmaker/consensus.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import argparse
  3 | import logging
  4 | from tqdm import tqdm
  5 | import cv2
  6 | import numpy as np
  7 | import shutil
  8 | from joblib import Parallel, delayed
  9 | 
 10 | from labelmaker.label_mappings import LabelMatcher
 11 | 
 12 | # clean up imports
 13 | import gin
 14 | from typing import Union
 15 | from pathlib import Path
 16 | 
 17 | logging.basicConfig(level="INFO")
 18 | log = logging.getLogger('Segmentation Consensus')
 19 | 
 20 | 
 21 | class PredictorVoting:
 22 | 
 23 |   def __init__(self, output_space='wn199-merged-v2'):
 24 |     #assert output_space == 'wn199'
 25 |     matcher_ade150 = LabelMatcher('ade20k', output_space)
 26 |     matcher_nyu40 = LabelMatcher('nyu40id', output_space)
 27 |     matcher_wn199 = LabelMatcher('wn199', output_space)
 28 |     matcher_scannet = LabelMatcher('id', output_space)
 29 |     self.output_space = output_space
 30 |     # build lookup tables for predictor voting
 31 |     # some class spaces vote for multiple options in the wordnet output space
 32 |     self.output_size = max(matcher_ade150.right_ids) + 1
 33 |     output_ids = np.arange(self.output_size)
 34 |     self.votes_from_ade150 = np.zeros((150, self.output_size), dtype=np.uint8)
 35 |     for ade150_id in range(150):
 36 |       multihot_matches = matcher_ade150.match(
 37 |           ade150_id * np.ones_like(output_ids), output_ids)
 38 |       multihot_matches[multihot_matches == -1] = 0
 39 |       multihot_matches[multihot_matches == -2] = 0
 40 |       self.votes_from_ade150[ade150_id] = multihot_matches
 41 | 
 42 |     self.votes_from_nyu40 = np.zeros((41, self.output_size), dtype=np.uint8)
 43 |     for nyu40_id in range(1, 41):
 44 |       multihot_matches = matcher_nyu40.match(
 45 |           nyu40_id * np.ones_like(output_ids), output_ids)
 46 |       multihot_matches[multihot_matches == -1] = 0
 47 |       multihot_matches[multihot_matches == -2] = 0
 48 |       self.votes_from_nyu40[nyu40_id] = multihot_matches
 49 | 
 50 |     self.votes_from_wn199 = np.zeros((200, self.output_size), dtype=np.uint8)
 51 |     for wn199_id in range(1, 189):
 52 |       multihot_matches = matcher_wn199.match(
 53 |           wn199_id * np.ones_like(output_ids), output_ids)
 54 |       multihot_matches[multihot_matches == -1] = 0
 55 |       multihot_matches[multihot_matches == -2] = 0
 56 |       self.votes_from_wn199[wn199_id] = multihot_matches
 57 | 
 58 |     scannet_dimensionality = max(matcher_scannet.left_ids) + 1
 59 |     self.votes_from_scannet = np.zeros(
 60 |         (scannet_dimensionality, self.output_size), dtype=np.uint8)
 61 |     for scannet_id in range(scannet_dimensionality):
 62 |       multihot_matches = matcher_scannet.match(
 63 |           scannet_id * np.ones_like(output_ids), output_ids)
 64 |       multihot_matches[multihot_matches == -1] = 0
 65 |       multihot_matches[multihot_matches == -2] = 0
 66 |       self.votes_from_scannet[scannet_id] = multihot_matches
 67 | 
 68 |   def voting(self,
 69 |              ade20k_predictions=[],
 70 |              nyu40_predictions=[],
 71 |              wn199_predictions=[],
 72 |              scannet_predictions=[]):
 73 |     """Voting scheme for combining multiple segmentation predictors.
 74 | 
 75 |         Args:
 76 |             ade20k_predictors (list): list of ade20k predictions
 77 |             nyu40_predictors (list): list of nyu40 predictions
 78 |             wn199_predictors (list): list of wn199 predictions
 79 |             scannet_predictions (list): list of scannet predictions
 80 | 
 81 |         Returns:
 82 |             np.ndarray: consensus prediction in the output space
 83 |         """
 84 |     shape = None
 85 |     if len(ade20k_predictions) > 0:
 86 |       shape = ade20k_predictions[0].shape[:2]
 87 |     elif len(nyu40_predictions) > 0:
 88 |       shape = nyu40_predictions[0].shape[:2]
 89 |     elif len(wn199_predictions) > 0:
 90 |       shape = wn199_predictions[0].shape[:2]
 91 |     elif len(scannet_predictions) > 0:
 92 |       shape = scannet_predictions[0].shape[:2]
 93 | 
 94 |     # build consensus prediction
 95 |     # first, each prediction votes for classes in the output space
 96 |     votes = np.zeros((shape[0], shape[1], self.output_size), dtype=np.uint8)
 97 |     for pred in wn199_predictions:
 98 |       vote = self.votes_from_wn199[pred]
 99 |       vote[pred == -1] = 0
100 |       votes += vote
101 |     for pred in ade20k_predictions:
102 |       votes += self.votes_from_ade150[pred]
103 |     for pred in nyu40_predictions:
104 |       votes += self.votes_from_nyu40[pred]
105 | 
106 |     for pred in scannet_predictions:
107 |       votes += self.votes_from_scannet[pred]
108 | 
109 |     pred_vote = np.argmax(votes, axis=2)
110 |     n_votes = votes[np.arange(shape[0])[:, None],
111 |                     np.arange(shape[1]), pred_vote]
112 |     #n_votes = np.amax(votes, axis=2)
113 |     # # fastest check for ambiguous prediction: take the argmax in reverse order
114 |     # alt_pred = (self.output_size - 1) - np.argmax(votes[:, :, ::-1],
115 |     #                                               axis=2)
116 |     # pred_vote[pred_vote != alt_pred] = -1
117 |     return n_votes, pred_vote
118 | 
119 | 
120 | VALID_LABEL_SPACES = ['ade20k', 'nyu40', 'scannet200', 'wordnet', 'scannet']
121 | 
122 | 
123 | def consensus(k, folders, output_dir, min_votes):
124 | 
125 |   votebox = PredictorVoting(output_space='wn199-merged-v2')
126 | 
127 |   predictions = {label_space: [] for label_space in VALID_LABEL_SPACES}
128 | 
129 |   for folder in folders:
130 |     assert folder.exists() and folder.is_dir()
131 | 
132 |     label_space = folder.name.split('_')[0]
133 |     pred = cv2.imread(str(folder / f'{k}.png'), cv2.IMREAD_UNCHANGED)
134 |     predictions[label_space].append(pred.copy())
135 | 
136 |   n_votes, pred_vote = votebox.voting(
137 |       ade20k_predictions=predictions['ade20k'],
138 |       nyu40_predictions=predictions['nyu40'],
139 |       wn199_predictions=predictions['wordnet'],
140 |       scannet_predictions=predictions['scannet200']
141 |   )  # double even without flipping
142 | 
143 |   pred_vote[n_votes < min_votes] = 0
144 |   pred_vote[pred_vote == -1] = 0
145 | 
146 |   cv2.imwrite(str(output_dir / f'{k}.png'), pred_vote)
147 | 
148 | 
149 | # this is needed for parallel execution
150 | def wrapper_consensus(k, input_folders_str, output_dir_str, min_votes):
151 |   input_folders = [Path(s) for s in input_folders_str]
152 |   output_dir = Path(output_dir_str)
153 |   consensus(k, input_folders, output_dir, min_votes)
154 |   return 1
155 | 
156 | 
157 | @gin.configurable
158 | def run(scene_dir: Union[str, Path],
159 |         output_folder: Union[str, Path],
160 |         n_jobs=-1,
161 |         min_votes=2):
162 | 
163 |   scene_dir = Path(scene_dir)
164 |   output_folder = Path(output_folder)
165 | 
166 |   assert scene_dir.exists() and scene_dir.is_dir()
167 | 
168 |   output_dir = scene_dir / output_folder
169 |   # check if output directory exists
170 |   shutil.rmtree(output_dir, ignore_errors=True)
171 |   os.makedirs(str(output_dir), exist_ok=False)
172 | 
173 |   log.info('[consensus] loading model predictions')
174 |   input_folders = [
175 |       scene_dir / 'intermediate' / folder
176 |       for folder in os.listdir(scene_dir / 'intermediate')
177 |       if folder.split('_')[0] in VALID_LABEL_SPACES
178 |   ]
179 | 
180 |   # assert that all folders have the same number of files
181 |   n_files = None
182 |   for folder in input_folders:
183 |     files = [
184 |         f for f in os.listdir(scene_dir / 'intermediate' / folder)
185 |         if f.endswith('.png')
186 |     ]
187 |     if n_files is None:
188 |       n_files = len(files)
189 |     else:
190 |       assert n_files == len(
191 |           files
192 |       ), f'Number of files in {folder} does not match {n_files} vs. {len(files)}'
193 | 
194 |   keys = sorted([s.stem for s in (scene_dir / 'color').iterdir()])
195 | 
196 |   input_folders_str = [str(f) for f in input_folders]
197 |   output_dir_str = str(output_dir)
198 | 
199 |   # Using Parallel to run the function in parallel
200 |   results = Parallel(n_jobs=n_jobs)(delayed(wrapper_consensus)(
201 |       k, input_folders_str, output_dir_str, min_votes) for k in tqdm(keys))
202 | 
203 | 
204 | def arg_parser():
205 |   parser = argparse.ArgumentParser(description='Run consensus segmentation')
206 |   parser.add_argument(
207 |       '--workspace',
208 |       type=str,
209 |       required=True,
210 |       help='Path to workspace directory. There should be a "color" folder.',
211 |   )
212 |   parser.add_argument(
213 |       '--output',
214 |       type=str,
215 |       default='intermediate/consensus',
216 |       help=
217 |       'Name of output directory in the workspace directory intermediate. Has to follow the pattern $labelspace_$model_$version',
218 |   )
219 |   parser.add_argument("--n_jobs", type=int, default=-1)
220 |   parser.add_argument('--config', help='Name of config file')
221 | 
222 |   return parser.parse_args()
223 | 
224 | 
225 | if __name__ == '__main__':
226 |   args = arg_parser()
227 |   if args.config is not None:
228 |     gin.parse_config_file(args.config)
229 |   run(scene_dir=args.workspace, output_folder=args.output, n_jobs=args.n_jobs)
230 | 


--------------------------------------------------------------------------------
/labelmaker/lifting_3d/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cvg/LabelMaker/44ff92d42ae9b0579f016bb7bc5cd4bd09a613a3/labelmaker/lifting_3d/__init__.py


--------------------------------------------------------------------------------
/labelmaker/lifting_3d/lifting.sh:
--------------------------------------------------------------------------------
  1 | # exit when any command fails
  2 | set -e
  3 | 
  4 | dir_name="$(pwd)/$(dirname "$0")"
  5 | repo_dir="$dir_name/../.."
  6 | 
  7 | # activate environment
  8 | env_name=sdfstudio
  9 | eval "$(conda shell.bash hook)"
 10 | conda activate $env_name
 11 | 
 12 | conda_home="$(conda info | grep "active env location : " | cut -d ":" -f2-)"
 13 | conda_home="${conda_home#"${conda_home%%[![:space:]]*}"}"
 14 | 
 15 | echo $conda_home
 16 | 
 17 | which python
 18 | which pip
 19 | which nvcc
 20 | 
 21 | # add cuda compiler to path
 22 | export CUDA_HOST_COMPILER="$conda_home/bin/gcc"
 23 | export CUDA_PATH="$conda_home"
 24 | export CUDA_HOME=$CUDA_PATH
 25 | export LD_LIBRARY_PATH=$conda_home/lib:$LD_LIBRARY_PATH
 26 | export LIBRARY_PATH="$conda_home/lib/stubs:$LIBRARY_PATH"
 27 | export TCNN_CUDA_ARCHITECTURES=75
 28 | 
 29 | wandb online
 30 | 
 31 | # get scene folder
 32 | if [ -z "$1" ]; then
 33 |   echo "Workspace directory not specified!!!"
 34 |   exit 1
 35 | else
 36 |   WORKSPACE=$1
 37 | fi
 38 | echo "Workspace is $WORKSPACE"
 39 | 
 40 | export TCNN_CUDA_ARCHITECTURES=75
 41 | 
 42 | # preprocessing
 43 | python "$repo_dir"/labelmaker/lifting_3d/preprocessing.py \
 44 |   --sampling 1 \
 45 |   --workspace $WORKSPACE
 46 | 
 47 | # # train
 48 | method=neus-facto
 49 | experiment_name=sdfstudio_train
 50 | output_dir=${WORKSPACE}/intermediate/${experiment_name}
 51 | preprocess_data_dir=${WORKSPACE}/intermediate/sdfstudio_preprocessing
 52 | 
 53 | export WANDB_MODE=online
 54 | wandb online
 55 | 
 56 | # about 26G gpu memory, 1207.58s
 57 | # currently semantic loss is switched of (semantic-loss-mult 0.0, include-semantics False)m no mono prior (normal, depth) is used (include-mono-prior False)
 58 | ns-train ${method} \
 59 |   --experiment-name $experiment_name \
 60 |   --pipeline.model.sdf-field.use-grid-feature True \
 61 |   --pipeline.model.sdf-field.hidden-dim 256 \
 62 |   --pipeline.model.sdf-field.num-layers 2 \
 63 |   --pipeline.model.sdf-field.num-layers-color 2 \
 64 |   --pipeline.model.sdf-field.semantic-num-layers 4 \
 65 |   --pipeline.model.sdf-field.semantic_layer_width 512 \
 66 |   --pipeline.model.sdf-field.use-appearance-embedding False \
 67 |   --pipeline.model.sdf-field.geometric-init True \
 68 |   --pipeline.model.sdf-field.inside-outside True \
 69 |   --pipeline.model.sdf-field.bias 0.8 \
 70 |   --pipeline.model.sdf-field.beta-init 0.3 \
 71 |   --pipeline.model.sensor-depth-l1-loss-mult 10.0 \
 72 |   --pipeline.model.sensor-depth-sdf-loss-mult 6000.0 \
 73 |   --pipeline.model.sensor-depth-freespace-loss-mult 10.0 \
 74 |   --pipeline.model.sensor-depth-truncation 0.015 \
 75 |   --pipeline.model.mono-normal-loss-mult 0.02 \
 76 |   --pipeline.model.mono-depth-loss-mult 0.00 \
 77 |   --pipeline.model.semantic-loss-mult 0.1 \
 78 |   --pipeline.model.semantic-patch-loss-mult 0.00 \
 79 |   --pipeline.model.semantic-patch-loss-min-step 1000 \
 80 |   --pipeline.model.semantic-ignore-label 0 \
 81 |   --trainer.steps-per-eval-image 1000 \
 82 |   --trainer.steps-per-eval-all-images 100000 \
 83 |   --trainer.steps-per-save 10000 \
 84 |   --trainer.max-num-iterations 20001 \
 85 |   --pipeline.datamanager.train-num-rays-per-batch 2048 \
 86 |   --pipeline.model.eikonal-loss-mult 0.1 \
 87 |   --pipeline.model.background-model none \
 88 |   --output-dir ${WORKSPACE}/intermediate \
 89 |   --vis wandb \
 90 |   sdfstudio-data \
 91 |   --data ${preprocess_data_dir} \
 92 |   --include-sensor-depth True \
 93 |   --include-semantics True \
 94 |   --include-mono-prior True
 95 | 
 96 | # the job below may OOM sometimes, so we wait such that all GPU memory is free
 97 | # sleep 60
 98 | 
 99 | # locate results
100 | results_dir=${output_dir}/$(ls $output_dir)
101 | train_id=$(ls $results_dir)
102 | 
103 | config=$results_dir/$train_id/config.yml
104 | 
105 | # extract mesh
106 | ns-extract-mesh \
107 |   --load-config $config \
108 |   --create-visibility-mask True \
109 |   --output-path $results_dir/$train_id/mesh_visible.ply \
110 |   --resolution 512
111 | # # sleep 60
112 | 
113 | # render class labels
114 | render_dir=${WORKSPACE}/neus_lifted
115 | mkdir -p $render_dir
116 | ns-render --camera-path-filename $preprocess_data_dir/camera_path.json \
117 |   --traj filename \
118 |   --output-format images \
119 |   --rendered-output-names semantics \
120 |   --output-path $render_dir \
121 |   --load-config $config
122 | 


--------------------------------------------------------------------------------
/labelmaker/lifting_3d/lifting_points.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import logging
  3 | import os
  4 | from pathlib import Path
  5 | from typing import Union
  6 | 
  7 | import cv2
  8 | import gin
  9 | import numpy as np
 10 | import open3d as o3d
 11 | from PIL import Image
 12 | from tqdm import tqdm
 13 | 
 14 | from labelmaker.label_data import get_wordnet
 15 | 
 16 | logging.basicConfig(level="INFO")
 17 | log = logging.getLogger('3D Point Lifting')
 18 | 
 19 | 
 20 | def project_pointcloud(points, pose, intrinsics):
 21 | 
 22 |   points_h = np.hstack((points, np.ones_like(points[:, 0:1])))
 23 |   points_c = np.linalg.inv(pose) @ points_h.T
 24 |   points_c = points_c.T
 25 | 
 26 |   if intrinsics.shape[-1] == 3:
 27 |     intrinsics = np.hstack((intrinsics, np.zeros((3, 1))))
 28 |     intrinsics = np.vstack((intrinsics, np.zeros((1, 4))))
 29 |     intrinsics[-1, -1] = 1.
 30 | 
 31 |   points_p = intrinsics @ points_c.T
 32 |   points_p = points_p.T[:, :3]
 33 | 
 34 |   points_p[:, 0] /= (points_p[:, -1] + 1.e-6)
 35 |   points_p[:, 1] /= (points_p[:, -1] + 1.e-6)
 36 | 
 37 |   return points_p
 38 | 
 39 | 
 40 | @gin.configurable
 41 | def main(
 42 |     scene_dir: Union[str, Path],
 43 |     label_folder: Union[str, Path],
 44 |     output_file: Union[str, Path],
 45 |     output_mesh: Union[str, Path],
 46 |     maximum_label: int,
 47 | ):
 48 |   scene_dir = Path(scene_dir)
 49 |   label_folder = Path(label_folder)
 50 |   output_file = Path(output_file)
 51 |   output_mesh = Path(output_mesh)
 52 | 
 53 |   # check if scene_dir exists
 54 |   assert scene_dir.exists() and scene_dir.is_dir()
 55 | 
 56 |   # define all paths
 57 |   input_color_dir = scene_dir / 'color'
 58 |   assert input_color_dir.exists() and input_color_dir.is_dir()
 59 | 
 60 |   input_depth_dir = scene_dir / 'depth'
 61 |   assert input_depth_dir.exists() and input_depth_dir.is_dir()
 62 | 
 63 |   input_intrinsic_dir = scene_dir / 'intrinsic'
 64 |   assert input_intrinsic_dir.exists() and input_intrinsic_dir.is_dir()
 65 | 
 66 |   input_pose_dir = scene_dir / 'pose'
 67 |   assert input_pose_dir.exists() and input_pose_dir.is_dir()
 68 | 
 69 |   input_label_dir = scene_dir / label_folder
 70 |   assert input_label_dir.exists() and input_label_dir.is_dir()
 71 | 
 72 |   input_mesh_path = scene_dir / 'mesh.ply'
 73 |   assert input_mesh_path.exists() and input_mesh_path.is_file()
 74 | 
 75 |   log.info('Processing {} using for labels {}'.format(
 76 |       str(scene_dir),
 77 |       str(input_label_dir),
 78 |   ))
 79 | 
 80 |   # load mesh and extract colors
 81 |   mesh = o3d.io.read_triangle_mesh(str(input_mesh_path))
 82 |   vertices = np.asarray(mesh.vertices)
 83 | 
 84 |   # init label container
 85 |   labels_3d = np.zeros((vertices.shape[0], maximum_label + 1))
 86 | 
 87 |   files = input_label_dir.glob('*.png')
 88 |   files = sorted(files, key=lambda x: int(x.stem.split('.')[0]))
 89 |   resize_image = False
 90 | 
 91 |   for idx, file in tqdm(enumerate(files), total=len(files)):
 92 | 
 93 |     frame_key = file.stem
 94 | 
 95 |     intrinsics = np.loadtxt(str(input_intrinsic_dir / f'{frame_key}.txt'))
 96 |     image = np.asarray(Image.open(str(input_color_dir /
 97 |                                       f'{frame_key}.jpg'))).astype(np.uint8)
 98 |     depth = np.asarray(Image.open(str(
 99 |         input_depth_dir / f'{frame_key}.png'))).astype(np.float32) / 1000.
100 |     labels = np.asarray(Image.open(str(file)))
101 | 
102 |     max_label = np.max(labels)
103 |     if max_label > labels_3d.shape[-1] - 1:
104 |       raise ValueError(
105 |           f'Label {max_label} is not in the label range of {labels_3d.shape[-1]}'
106 |       )
107 | 
108 |     if resize_image:
109 |       h, w = depth.shape
110 |       image = cv2.resize(image, (w, h))
111 |       labels = cv2.resize(labels, (w, h))
112 |     else:
113 |       h, w, _ = image.shape
114 |       depth = cv2.resize(depth, (w, h))
115 | 
116 |     pose_file = input_pose_dir / f'{frame_key}.txt'
117 |     pose = np.loadtxt(str(pose_file))
118 | 
119 |     points_p = project_pointcloud(vertices, pose, intrinsics)
120 | 
121 |     xx = points_p[:, 0].astype(int)
122 |     yy = points_p[:, 1].astype(int)
123 |     zz = points_p[:, 2]
124 | 
125 |     valid_mask = (xx >= 0) & (yy >= 0) & (xx < w) & (yy < h)
126 | 
127 |     d = depth[yy[valid_mask], xx[valid_mask]]
128 | 
129 |     valid_mask[valid_mask] = (zz[valid_mask] > 0) & (np.abs(zz[valid_mask] - d)
130 |                                                      <= 0.1)
131 | 
132 |     labels_2d = labels[yy[valid_mask], xx[valid_mask]]
133 |     labels_3d[valid_mask, labels_2d] += 1
134 | 
135 |   # extract labels
136 |   labels_3d = np.argmax(labels_3d, axis=-1)
137 | 
138 |   # save output
139 |   np.savetxt(str(scene_dir / output_file), labels_3d, fmt='%i')
140 | 
141 |   # save colored mesh
142 |   color_map = np.zeros(shape=(maximum_label, 3), dtype=np.uint8)
143 |   for item in get_wordnet():
144 |     color_map[item['id']] = item['color']
145 |   label_mesh_color = color_map[labels_3d]
146 | 
147 |   label_mesh = o3d.geometry.TriangleMesh()
148 |   label_mesh.vertices = mesh.vertices
149 |   label_mesh.triangles = mesh.triangles
150 | 
151 |   label_mesh.vertex_colors = o3d.utility.Vector3dVector(
152 |       label_mesh_color.astype(float) / 255)
153 | 
154 |   o3d.io.write_triangle_mesh(str(scene_dir / output_mesh), label_mesh)
155 | 
156 | 
157 | def arg_parser():
158 |   parser = argparse.ArgumentParser(
159 |       description=
160 |       'Project 3D points to 2D image plane and aggregate labels and save label txt'
161 |   )
162 |   parser.add_argument(
163 |       '--workspace',
164 |       type=str,
165 |       required=True,
166 |       help=
167 |       'Path to workspace directory. There should be a "color" folder inside.',
168 |   )
169 |   parser.add_argument(
170 |       '--output',
171 |       type=str,
172 |       default='labels.txt',
173 |       help='Name of files to save the labels',
174 |   )
175 |   parser.add_argument(
176 |       '--output_mesh',
177 |       type=str,
178 |       default='point_lifted_mesh.ply',
179 |       help='Name of files to save the labels',
180 |   )
181 |   parser.add_argument('--label_folder', default='intermediate/consensus')
182 |   parser.add_argument(
183 |       '--max_label',
184 |       type=int,
185 |       default=2000,
186 |       help='Max label value',
187 |   )
188 |   parser.add_argument('--config', help='Name of config file')
189 |   return parser.parse_args()
190 | 
191 | 
192 | if __name__ == '__main__':
193 |   args = arg_parser()
194 |   if args.config is not None:
195 |     gin.parse_config_file(args.config)
196 |   main(
197 |       scene_dir=args.workspace,
198 |       label_folder=args.label_folder,
199 |       output_file=args.output,
200 |       output_mesh=args.output_mesh,
201 |       maximum_label=args.max_label,
202 |   )
203 | 


--------------------------------------------------------------------------------
/labelmaker/mappings/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cvg/LabelMaker/44ff92d42ae9b0579f016bb7bc5cd4bd09a613a3/labelmaker/mappings/__init__.py


--------------------------------------------------------------------------------
/labelmaker/scannet_200_labels.py:
--------------------------------------------------------------------------------
  1 | ### ScanNet Benchmark constants ###
  2 | VALID_CLASS_IDS_20 = (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33,
  3 |                       34, 36, 39)
  4 | 
  5 | CLASS_LABELS_20 = ('wall', 'floor', 'cabinet', 'bed', 'chair', 'sofa', 'table',
  6 |                    'door', 'window', 'bookshelf', 'picture', 'counter', 'desk',
  7 |                    'curtain', 'refrigerator', 'shower curtain', 'toilet',
  8 |                    'sink', 'bathtub', 'otherfurniture')
  9 | 
 10 | SCANNET_COLOR_MAP_20 = {
 11 |     0: (0., 0., 0.),
 12 |     1: (174., 199., 232.),
 13 |     2: (152., 223., 138.),
 14 |     3: (31., 119., 180.),
 15 |     4: (255., 187., 120.),
 16 |     5: (188., 189., 34.),
 17 |     6: (140., 86., 75.),
 18 |     7: (255., 152., 150.),
 19 |     8: (214., 39., 40.),
 20 |     9: (197., 176., 213.),
 21 |     10: (148., 103., 189.),
 22 |     11: (196., 156., 148.),
 23 |     12: (23., 190., 207.),
 24 |     14: (247., 182., 210.),
 25 |     15: (66., 188., 102.),
 26 |     16: (219., 219., 141.),
 27 |     17: (140., 57., 197.),
 28 |     18: (202., 185., 52.),
 29 |     19: (51., 176., 203.),
 30 |     20: (200., 54., 131.),
 31 |     21: (92., 193., 61.),
 32 |     22: (78., 71., 183.),
 33 |     23: (172., 114., 82.),
 34 |     24: (255., 127., 14.),
 35 |     25: (91., 163., 138.),
 36 |     26: (153., 98., 156.),
 37 |     27: (140., 153., 101.),
 38 |     28: (158., 218., 229.),
 39 |     29: (100., 125., 154.),
 40 |     30: (178., 127., 135.),
 41 |     32: (146., 111., 194.),
 42 |     33: (44., 160., 44.),
 43 |     34: (112., 128., 144.),
 44 |     35: (96., 207., 209.),
 45 |     36: (227., 119., 194.),
 46 |     37: (213., 92., 176.),
 47 |     38: (94., 106., 211.),
 48 |     39: (82., 84., 163.),
 49 |     40: (100., 85., 144.),
 50 | }
 51 | 
 52 | ### ScanNet200 Benchmark constants ###
 53 | VALID_CLASS_IDS_200 = (
 54 |     1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23,
 55 |     24, 26, 27, 28, 29, 31, 32, 33, 34, 35, 36, 38, 39, 40, 41, 42, 44, 45, 46,
 56 |     47, 48, 49, 50, 51, 52, 54, 55, 56, 57, 58, 59, 62, 63, 64, 65, 66, 67, 68,
 57 |     69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 82, 84, 86, 87, 88, 89, 90,
 58 |     93, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 110, 112,
 59 |     115, 116, 118, 120, 121, 122, 125, 128, 130, 131, 132, 134, 136, 138, 139,
 60 |     140, 141, 145, 148, 154, 155, 156, 157, 159, 161, 163, 165, 166, 168, 169,
 61 |     170, 177, 180, 185, 188, 191, 193, 195, 202, 208, 213, 214, 221, 229, 230,
 62 |     232, 233, 242, 250, 261, 264, 276, 283, 286, 300, 304, 312, 323, 325, 331,
 63 |     342, 356, 370, 392, 395, 399, 408, 417, 488, 540, 562, 570, 572, 581, 609,
 64 |     748, 776, 1156, 1163, 1164, 1165, 1166, 1167, 1168, 1169, 1170, 1171, 1172,
 65 |     1173, 1174, 1175, 1176, 1178, 1179, 1180, 1181, 1182, 1183, 1184, 1185,
 66 |     1186, 1187, 1188, 1189, 1190, 1191)
 67 | 
 68 | CLASS_LABELS_200 = (
 69 |     'wall', 'chair', 'floor', 'table', 'door', 'couch', 'cabinet', 'shelf',
 70 |     'desk', 'office chair', 'bed', 'pillow', 'sink', 'picture', 'window',
 71 |     'toilet', 'bookshelf', 'monitor', 'curtain', 'book', 'armchair',
 72 |     'coffee table', 'box', 'refrigerator', 'lamp', 'kitchen cabinet', 'towel',
 73 |     'clothes', 'tv', 'nightstand', 'counter', 'dresser', 'stool', 'cushion',
 74 |     'plant', 'ceiling', 'bathtub', 'end table', 'dining table', 'keyboard',
 75 |     'bag', 'backpack', 'toilet paper', 'printer', 'tv stand', 'whiteboard',
 76 |     'blanket', 'shower curtain', 'trash can', 'closet', 'stairs', 'microwave',
 77 |     'stove', 'shoe', 'computer tower', 'bottle', 'bin', 'ottoman', 'bench',
 78 |     'board', 'washing machine', 'mirror', 'copier', 'basket', 'sofa chair',
 79 |     'file cabinet', 'fan', 'laptop', 'shower', 'paper', 'person',
 80 |     'paper towel dispenser', 'oven', 'blinds', 'rack', 'plate', 'blackboard',
 81 |     'piano', 'suitcase', 'rail', 'radiator', 'recycling bin', 'container',
 82 |     'wardrobe', 'soap dispenser', 'telephone', 'bucket', 'clock', 'stand',
 83 |     'light', 'laundry basket', 'pipe', 'clothes dryer', 'guitar',
 84 |     'toilet paper holder', 'seat', 'speaker', 'column', 'bicycle', 'ladder',
 85 |     'bathroom stall', 'shower wall', 'cup', 'jacket', 'storage bin',
 86 |     'coffee maker', 'dishwasher', 'paper towel roll', 'machine', 'mat',
 87 |     'windowsill', 'bar', 'toaster', 'bulletin board', 'ironing board',
 88 |     'fireplace', 'soap dish', 'kitchen counter', 'doorframe',
 89 |     'toilet paper dispenser', 'mini fridge', 'fire extinguisher', 'ball', 'hat',
 90 |     'shower curtain rod', 'water cooler', 'paper cutter', 'tray', 'shower door',
 91 |     'pillar', 'ledge', 'toaster oven', 'mouse', 'toilet seat cover dispenser',
 92 |     'furniture', 'cart', 'storage container', 'scale', 'tissue box',
 93 |     'light switch', 'crate', 'power outlet', 'decoration', 'sign', 'projector',
 94 |     'closet door', 'vacuum cleaner', 'candle', 'plunger', 'stuffed animal',
 95 |     'headphones', 'dish rack', 'broom', 'guitar case', 'range hood', 'dustpan',
 96 |     'hair dryer', 'water bottle', 'handicap bar', 'purse', 'vent',
 97 |     'shower floor', 'water pitcher', 'mailbox', 'bowl', 'paper bag',
 98 |     'alarm clock', 'music stand', 'projector screen', 'divider',
 99 |     'laundry detergent', 'bathroom counter', 'object', 'bathroom vanity',
100 |     'closet wall', 'laundry hamper', 'bathroom stall door', 'ceiling light',
101 |     'trash bin', 'dumbbell', 'stair rail', 'tube', 'bathroom cabinet',
102 |     'cd case', 'closet rod', 'coffee kettle', 'structure', 'shower head',
103 |     'keyboard piano', 'case of water bottles', 'coat rack', 'storage organizer',
104 |     'folded chair', 'fire alarm', 'power strip', 'calendar', 'poster',
105 |     'potted plant', 'luggage', 'mattress')
106 | 
107 | SCANNET_COLOR_MAP_200 = {
108 |     0: (0., 0., 0.),
109 |     1: (174., 199., 232.),
110 |     2: (188., 189., 34.),
111 |     3: (152., 223., 138.),
112 |     4: (255., 152., 150.),
113 |     5: (214., 39., 40.),
114 |     6: (91., 135., 229.),
115 |     7: (31., 119., 180.),
116 |     8: (229., 91., 104.),
117 |     9: (247., 182., 210.),
118 |     10: (91., 229., 110.),
119 |     11: (255., 187., 120.),
120 |     13: (141., 91., 229.),
121 |     14: (112., 128., 144.),
122 |     15: (196., 156., 148.),
123 |     16: (197., 176., 213.),
124 |     17: (44., 160., 44.),
125 |     18: (148., 103., 189.),
126 |     19: (229., 91., 223.),
127 |     21: (219., 219., 141.),
128 |     22: (192., 229., 91.),
129 |     23: (88., 218., 137.),
130 |     24: (58., 98., 137.),
131 |     26: (177., 82., 239.),
132 |     27: (255., 127., 14.),
133 |     28: (237., 204., 37.),
134 |     29: (41., 206., 32.),
135 |     31: (62., 143., 148.),
136 |     32: (34., 14., 130.),
137 |     33: (143., 45., 115.),
138 |     34: (137., 63., 14.),
139 |     35: (23., 190., 207.),
140 |     36: (16., 212., 139.),
141 |     38: (90., 119., 201.),
142 |     39: (125., 30., 141.),
143 |     40: (150., 53., 56.),
144 |     41: (186., 197., 62.),
145 |     42: (227., 119., 194.),
146 |     44: (38., 100., 128.),
147 |     45: (120., 31., 243.),
148 |     46: (154., 59., 103.),
149 |     47: (169., 137., 78.),
150 |     48: (143., 245., 111.),
151 |     49: (37., 230., 205.),
152 |     50: (14., 16., 155.),
153 |     51: (196., 51., 182.),
154 |     52: (237., 80., 38.),
155 |     54: (138., 175., 62.),
156 |     55: (158., 218., 229.),
157 |     56: (38., 96., 167.),
158 |     57: (190., 77., 246.),
159 |     58: (208., 49., 84.),
160 |     59: (208., 193., 72.),
161 |     62: (55., 220., 57.),
162 |     63: (10., 125., 140.),
163 |     64: (76., 38., 202.),
164 |     65: (191., 28., 135.),
165 |     66: (211., 120., 42.),
166 |     67: (118., 174., 76.),
167 |     68: (17., 242., 171.),
168 |     69: (20., 65., 247.),
169 |     70: (208., 61., 222.),
170 |     71: (162., 62., 60.),
171 |     72: (210., 235., 62.),
172 |     73: (45., 152., 72.),
173 |     74: (35., 107., 149.),
174 |     75: (160., 89., 237.),
175 |     76: (227., 56., 125.),
176 |     77: (169., 143., 81.),
177 |     78: (42., 143., 20.),
178 |     79: (25., 160., 151.),
179 |     80: (82., 75., 227.),
180 |     82: (253., 59., 222.),
181 |     84: (240., 130., 89.),
182 |     86: (123., 172., 47.),
183 |     87: (71., 194., 133.),
184 |     88: (24., 94., 205.),
185 |     89: (134., 16., 179.),
186 |     90: (159., 32., 52.),
187 |     93: (213., 208., 88.),
188 |     95: (64., 158., 70.),
189 |     96: (18., 163., 194.),
190 |     97: (65., 29., 153.),
191 |     98: (177., 10., 109.),
192 |     99: (152., 83., 7.),
193 |     100: (83., 175., 30.),
194 |     101: (18., 199., 153.),
195 |     102: (61., 81., 208.),
196 |     103: (213., 85., 216.),
197 |     104: (170., 53., 42.),
198 |     105: (161., 192., 38.),
199 |     106: (23., 241., 91.),
200 |     107: (12., 103., 170.),
201 |     110: (151., 41., 245.),
202 |     112: (133., 51., 80.),
203 |     115: (184., 162., 91.),
204 |     116: (50., 138., 38.),
205 |     118: (31., 237., 236.),
206 |     120: (39., 19., 208.),
207 |     121: (223., 27., 180.),
208 |     122: (254., 141., 85.),
209 |     125: (97., 144., 39.),
210 |     128: (106., 231., 176.),
211 |     130: (12., 61., 162.),
212 |     131: (124., 66., 140.),
213 |     132: (137., 66., 73.),
214 |     134: (250., 253., 26.),
215 |     136: (55., 191., 73.),
216 |     138: (60., 126., 146.),
217 |     139: (153., 108., 234.),
218 |     140: (184., 58., 125.),
219 |     141: (135., 84., 14.),
220 |     145: (139., 248., 91.),
221 |     148: (53., 200., 172.),
222 |     154: (63., 69., 134.),
223 |     155: (190., 75., 186.),
224 |     156: (127., 63., 52.),
225 |     157: (141., 182., 25.),
226 |     159: (56., 144., 89.),
227 |     161: (64., 160., 250.),
228 |     163: (182., 86., 245.),
229 |     165: (139., 18., 53.),
230 |     166: (134., 120., 54.),
231 |     168: (49., 165., 42.),
232 |     169: (51., 128., 133.),
233 |     170: (44., 21., 163.),
234 |     177: (232., 93., 193.),
235 |     180: (176., 102., 54.),
236 |     185: (116., 217., 17.),
237 |     188: (54., 209., 150.),
238 |     191: (60., 99., 204.),
239 |     193: (129., 43., 144.),
240 |     195: (252., 100., 106.),
241 |     202: (187., 196., 73.),
242 |     208: (13., 158., 40.),
243 |     213: (52., 122., 152.),
244 |     214: (128., 76., 202.),
245 |     221: (187., 50., 115.),
246 |     229: (180., 141., 71.),
247 |     230: (77., 208., 35.),
248 |     232: (72., 183., 168.),
249 |     233: (97., 99., 203.),
250 |     242: (172., 22., 158.),
251 |     250: (155., 64., 40.),
252 |     261: (118., 159., 30.),
253 |     264: (69., 252., 148.),
254 |     276: (45., 103., 173.),
255 |     283: (111., 38., 149.),
256 |     286: (184., 9., 49.),
257 |     300: (188., 174., 67.),
258 |     304: (53., 206., 53.),
259 |     312: (97., 235., 252.),
260 |     323: (66., 32., 182.),
261 |     325: (236., 114., 195.),
262 |     331: (241., 154., 83.),
263 |     342: (133., 240., 52.),
264 |     356: (16., 205., 144.),
265 |     370: (75., 101., 198.),
266 |     392: (237., 95., 251.),
267 |     395: (191., 52., 49.),
268 |     399: (227., 254., 54.),
269 |     408: (49., 206., 87.),
270 |     417: (48., 113., 150.),
271 |     488: (125., 73., 182.),
272 |     540: (229., 32., 114.),
273 |     562: (158., 119., 28.),
274 |     570: (60., 205., 27.),
275 |     572: (18., 215., 201.),
276 |     581: (79., 76., 153.),
277 |     609: (134., 13., 116.),
278 |     748: (192., 97., 63.),
279 |     776: (108., 163., 18.),
280 |     1156: (95., 220., 156.),
281 |     1163: (98., 141., 208.),
282 |     1164: (144., 19., 193.),
283 |     1165: (166., 36., 57.),
284 |     1166: (212., 202., 34.),
285 |     1167: (23., 206., 34.),
286 |     1168: (91., 211., 236.),
287 |     1169: (79., 55., 137.),
288 |     1170: (182., 19., 117.),
289 |     1171: (134., 76., 14.),
290 |     1172: (87., 185., 28.),
291 |     1173: (82., 224., 187.),
292 |     1174: (92., 110., 214.),
293 |     1175: (168., 80., 171.),
294 |     1176: (197., 63., 51.),
295 |     1178: (175., 199., 77.),
296 |     1179: (62., 180., 98.),
297 |     1180: (8., 91., 150.),
298 |     1181: (77., 15., 130.),
299 |     1182: (154., 65., 96.),
300 |     1183: (197., 152., 11.),
301 |     1184: (59., 155., 45.),
302 |     1185: (12., 147., 145.),
303 |     1186: (54., 35., 219.),
304 |     1187: (210., 73., 181.),
305 |     1188: (221., 124., 77.),
306 |     1189: (149., 214., 66.),
307 |     1190: (72., 185., 134.),
308 |     1191: (42., 94., 198.),
309 | }
310 | 
311 | ### For instance segmentation the non-object categories ###
312 | VALID_PANOPTIC_IDS = (1, 3)
313 | 
314 | CLASS_LABELS_PANOPTIC = ('wall', 'floor')
315 | 


--------------------------------------------------------------------------------
/labelmaker/visualization_3d.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import argparse
 3 | 
 4 | import numpy as np
 5 | import open3d as o3d
 6 | 
 7 | from labelmaker.label_data import get_nyu40, get_scannet200, get_wordnet, get_ade150
 8 | 
 9 | COLOR_MAPS = {
10 |     'ade20k': get_ade150,
11 |     'scannet200': get_scannet200,
12 |     'nyu40': get_nyu40,
13 |     'wordnet': get_wordnet,
14 |     'consensus': get_wordnet,
15 |     "sdfstudio": get_wordnet,
16 | }
17 | 
18 | def read_mesh(scene_path):
19 |     mesh_path = os.path.join(scene_path, 'mesh.ply')
20 |     mesh = o3d.io.read_triangle_mesh(mesh_path)
21 |     return mesh
22 |   
23 | def save_mesh(mesh, scene_path):
24 |     mesh_path = os.path.join(scene_path, 'mesh_colored.ply')
25 |     o3d.io.write_triangle_mesh(mesh_path, mesh)
26 |     return mesh
27 |   
28 | def read_labels(scene_path):
29 |   labels = np.loadtxt(os.path.join(scene_path, 'labels.txt'))
30 |   return labels
31 | 
32 | def colorize_labels(labels, color_map='consensus'):
33 |   n = labels.shape[0]
34 |   colors = np.zeros((n, 3))
35 |   cmap = COLOR_MAPS[color_map]()
36 |   
37 |   for i in np.unique(labels):
38 |     colors[labels == i] = cmap[int(i)]['color']
39 |   
40 |   colors = colors / 255.
41 |   return colors
42 |   
43 | def colorize_mesh(mesh, colors):
44 |   mesh.vertex_colors = o3d.utility.Vector3dVector(colors)
45 |   return mesh
46 | 
47 | def main(args):
48 |   mesh = read_mesh(args.workspace)
49 |   os.makedirs(os.path.join(args.workspace, args.output), exist_ok=True)
50 |   labels = read_labels(args.workspace)
51 |   colors = colorize_labels(labels)
52 |   mesh = colorize_mesh(mesh, colors)
53 |   save_mesh(mesh, os.path.join(args.workspace, args.output))
54 | 
55 | def arg_parser():
56 |     parser = argparse.ArgumentParser(description='Lift 2D labels to 3D labels')
57 |     parser.add_argument('--workspace', type=str)
58 |     parser.add_argument('--output', type=str, default='vis_3d')
59 |     return parser.parse_args()
60 | 
61 | if __name__ == '__main__':
62 |   args = arg_parser()
63 |   main(args)


--------------------------------------------------------------------------------
/models/cmx.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import logging
  3 | import os
  4 | import random
  5 | import shutil
  6 | import sys
  7 | from os.path import abspath, dirname, join
  8 | from pathlib import Path
  9 | from typing import Union
 10 | 
 11 | import cv2
 12 | import gin
 13 | import matplotlib.pyplot as plt
 14 | import mmcv
 15 | import numpy as np
 16 | import torch
 17 | import torch.backends.cudnn as cudnn
 18 | from mmseg.apis import inference_segmentor, init_segmentor
 19 | from mmseg.core import get_classes, get_palette
 20 | from tqdm import tqdm
 21 | 
 22 | sys.path.append(
 23 |     os.path.join(os.path.dirname(__file__), '../3rdparty',
 24 |                  'RGBX_Semantic_Segmentation'))
 25 | 
 26 | from config import config
 27 | from dataloader.dataloader import ValPre
 28 | from dataloader.RGBXDataset import RGBXDataset
 29 | from engine.evaluator import Evaluator
 30 | from engine.logger import get_logger
 31 | from utils.metric import compute_score, hist_info
 32 | from utils.pyt_utils import ensure_dir, link_file, load_model, parse_devices
 33 | from utils.visualize import print_iou, show_img
 34 | 
 35 | from models.builder import EncoderDecoder as segmodel
 36 | 
 37 | logging.basicConfig(level="INFO")
 38 | log = logging.getLogger('CMX Segmentation')
 39 | 
 40 | 
 41 | def setup_seeds(seed):
 42 | 
 43 |   random.seed(seed)
 44 |   np.random.seed(seed)
 45 |   torch.manual_seed(seed)
 46 | 
 47 |   cudnn.benchmark = False
 48 |   cudnn.deterministic = True
 49 | 
 50 | 
 51 | def load_cmx(device: Union[str, torch.device] = 'cuda:0'):
 52 |   device = str(device)
 53 |   assert device[:4] == 'cuda'
 54 |   try:
 55 |     device_id = str(int(device[:][-1]))
 56 |   except:
 57 |     assert False, "device should be cuda device and in format of 'cuda:xx'."
 58 | 
 59 |   log.info('loading model')
 60 |   checkpoint_file = abspath(
 61 |       join(dirname(__file__), '../checkpoints/NYUDV2_CMX+Segformer-B2.pth'))
 62 |   network = segmodel(cfg=config,
 63 |                      criterion=None,
 64 |                      norm_layer=torch.nn.BatchNorm2d)
 65 |   eval_source = abspath(
 66 |       join(dirname(__file__),
 67 |            '../3rdparty/RGBX_Semantic_Segmentation/empty.txt'))
 68 |   data_setting = {
 69 |       'rgb_root': config.rgb_root_folder,
 70 |       'rgb_format': config.rgb_format,
 71 |       'gt_root': config.gt_root_folder,
 72 |       'gt_format': config.gt_format,
 73 |       'transform_gt': config.gt_transform,
 74 |       'x_root': config.x_root_folder,
 75 |       'x_format': config.x_format,
 76 |       'x_single_channel': config.x_is_single_channel,
 77 |       'class_names': config.class_names,
 78 |       'train_source': config.train_source,
 79 |       'eval_source': eval_source,
 80 |       'class_names': config.class_names
 81 |   }
 82 |   val_pre = ValPre()
 83 |   dataset = RGBXDataset(data_setting, 'val', val_pre)
 84 |   evaluator = Evaluator(
 85 |       dataset=dataset,
 86 |       class_num=40,
 87 |       norm_mean=config.norm_mean,
 88 |       norm_std=config.norm_std,
 89 |       network=network,
 90 |       multi_scales=config.eval_scale_array,
 91 |       is_flip=config.eval_flip,
 92 |       devices=parse_devices(device_id),
 93 |   )
 94 |   evaluator.compute_metric = lambda x: str()
 95 |   evaluator.run('mmseg', checkpoint_file, '/dev/null', '/tmp/fakelog')
 96 |   return evaluator
 97 | 
 98 | 
 99 | @gin.configurable
100 | def run(
101 |     scene_dir: Union[str, Path],
102 |     output_folder: Union[str, Path],
103 |     device: Union[str, torch.device] = 'cuda:0',
104 |     confidence_threshold: float = 0.995,
105 |     flip: bool = False,
106 | ):
107 | 
108 |   scene_dir = Path(scene_dir)
109 |   output_folder = Path(output_folder)
110 | 
111 |   assert scene_dir.exists() and scene_dir.is_dir()
112 | 
113 |   input_hha_dir = scene_dir / 'intermediate/hha'
114 |   assert input_hha_dir.exists() and input_hha_dir.is_dir()
115 | 
116 |   input_color_dir = scene_dir / 'color'
117 |   assert input_color_dir.exists() and input_color_dir.is_dir()
118 | 
119 |   assert len(list(input_hha_dir.iterdir())) == len(
120 |       list(input_color_dir.iterdir()))
121 | 
122 |   output_dir = scene_dir / output_folder
123 |   output_dir = Path(str(output_dir) + '_flip') if flip else output_dir
124 |   shutil.rmtree(output_dir, ignore_errors=True)
125 |   os.makedirs(str(output_dir), exist_ok=False)
126 | 
127 |   evaluator = load_cmx(device=device)
128 |   log.info('[cmx] running inference')
129 | 
130 |   keys = [p.stem for p in input_color_dir.glob('*.jpg')]
131 |   for k in tqdm(keys):
132 |     img = cv2.imread(str(input_color_dir / f'{k}.jpg'))[..., ::-1]
133 |     hha = cv2.imread(str(input_hha_dir / f'{k}.png'))
134 | 
135 |     if flip:
136 |       img = img[:, ::-1]
137 |       hha = hha[:, ::-1]
138 |     pred = evaluator.sliding_eval_rgbX(
139 |         img,
140 |         hha,
141 |         config.eval_crop_size,
142 |         config.eval_stride_rate,
143 |         device=device,
144 |     )
145 | 
146 |     pred = pred + 1
147 |     if flip:
148 |       pred = pred[:, ::-1]
149 |     cv2.imwrite(str(output_dir / f'{k}.png'), pred.astype(np.uint16))
150 | 
151 | 
152 | def arg_parser():
153 |   parser = argparse.ArgumentParser(description='CMX Segmentation')
154 |   parser.add_argument(
155 |       '--workspace',
156 |       type=str,
157 |       required=True,
158 |       help=
159 |       'Path to workspace directory. There should be "color" and "intermediate/hha" folder inside.',
160 |   )
161 |   parser.add_argument(
162 |       '--output',
163 |       type=str,
164 |       default='intermediate/nyu40_cmx_1',
165 |       help=
166 |       'Name of output directory in the workspace directory intermediate. Has to follow the pattern $labelspace_$model_$version'
167 |   )
168 |   parser.add_argument('--seed', type=int, default=42, help='random seed')
169 |   parser.add_argument(
170 |       '--flip',
171 |       action="store_true",
172 |       help='Flip the input image, this is part of test time augmentation.',
173 |   )
174 |   parser.add_argument('--config', help='Name of config file')
175 |   return parser.parse_args()
176 | 
177 | 
178 | if __name__ == "__main__":
179 |   args = arg_parser()
180 |   if args.config is not None:
181 |     gin.parse_config_file(args.config)
182 | 
183 |   setup_seeds(seed=args.seed)
184 |   run(scene_dir=args.workspace, output_folder=args.output, flip=args.flip)
185 | 


--------------------------------------------------------------------------------
/models/hha_depth.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import logging
  3 | import os
  4 | import random
  5 | import shutil
  6 | import sys
  7 | from pathlib import Path
  8 | from typing import Union
  9 | 
 10 | import cv2
 11 | import gin
 12 | import numpy as np
 13 | import torch
 14 | import torch.backends.cudnn as cudnn
 15 | from hha.getHHA import getHHA
 16 | from joblib import Parallel, delayed
 17 | from tqdm import tqdm
 18 | 
 19 | logging.basicConfig(level="INFO")
 20 | log = logging.getLogger('Depth to HHA conversion')
 21 | 
 22 | 
 23 | def setup_seeds(seed):
 24 | 
 25 |   random.seed(seed)
 26 |   np.random.seed(seed)
 27 |   torch.manual_seed(seed)
 28 | 
 29 |   cudnn.benchmark = False
 30 |   cudnn.deterministic = True
 31 | 
 32 | 
 33 | @gin.configurable
 34 | def run(
 35 |     scene_dir: Union[str, Path],
 36 |     input_folder: Union[str, Path],
 37 |     output_folder: Union[str, Path],
 38 |     n_jobs=8,
 39 | ):
 40 | 
 41 |   scene_dir = Path(scene_dir)
 42 |   input_folder = Path(input_folder)
 43 |   output_folder = Path(output_folder)
 44 | 
 45 |   assert scene_dir.exists() and scene_dir.is_dir()
 46 | 
 47 |   input_depth_dir = scene_dir / 'depth'
 48 |   assert input_depth_dir.exists() and input_depth_dir.is_dir()
 49 | 
 50 |   input_intrinsic_dir = scene_dir / 'intrinsic'
 51 |   assert input_intrinsic_dir.exists() and input_intrinsic_dir.is_dir()
 52 | 
 53 |   omnidata_depth_dir = scene_dir / input_folder
 54 |   assert omnidata_depth_dir.exists() and omnidata_depth_dir.is_dir()
 55 | 
 56 |   assert len(list(input_depth_dir.iterdir())) == len(
 57 |       list(omnidata_depth_dir.iterdir()))
 58 | 
 59 |   output_dir = scene_dir / output_folder
 60 |   shutil.rmtree(output_dir, ignore_errors=True)
 61 |   os.makedirs(str(output_dir), exist_ok=False)
 62 | 
 63 |   log.info(f'running depth to hha conversion for scene {scene_dir}')
 64 | 
 65 |   def depth_to_hha(k):
 66 |     intrinsics = np.loadtxt(str(input_intrinsic_dir / f'{k}.txt'))[:3, :3]
 67 |     orig_depth = cv2.imread(str(input_depth_dir / f'{k}.png'),
 68 |                             cv2.COLOR_BGR2GRAY) / 1000
 69 |     omni_depth = cv2.imread(str(omnidata_depth_dir / f'{k}.png'),
 70 |                             cv2.COLOR_BGR2GRAY) / 1000
 71 |     hha = getHHA(intrinsics, omni_depth, orig_depth)
 72 |     cv2.imwrite(str(output_dir / f'{k}.png'), hha)
 73 | 
 74 |   keys = [p.stem for p in (scene_dir / 'depth').glob('*.png')]
 75 |   if n_jobs > 1:
 76 |     Parallel(n_jobs=n_jobs)(delayed(depth_to_hha)(k) for k in tqdm(keys))
 77 |   else:
 78 |     for k in tqdm(keys):
 79 |       depth_to_hha(k)
 80 | 
 81 | 
 82 | def arg_parser():
 83 |   parser = argparse.ArgumentParser(description='HHA')
 84 |   parser.add_argument(
 85 |       '--workspace',
 86 |       type=str,
 87 |       required=True,
 88 |       help=
 89 |       'Path to workspace directory. There should be a "depth" and "instrinsic" folder',
 90 |   )
 91 |   parser.add_argument(
 92 |       '--input',
 93 |       type=str,
 94 |       default='intermediate/depth_omnidata_1',
 95 |       help='Name of input directory in the workspace directory',
 96 |   )
 97 |   parser.add_argument(
 98 |       '--output',
 99 |       type=str,
100 |       default='intermediate/hha',
101 |       help=
102 |       'Name of output directory in the workspace directory intermediate. Has to follow the pattern $labelspace_$model_$version',
103 |   )
104 |   parser.add_argument('--config', help='Name of config file')
105 |   parser.add_argument(
106 |       '--n_jobs',
107 |       type=int,
108 |       default=8,
109 |       help='Number of parallel jobs',
110 |   )
111 |   return parser.parse_args()
112 | 
113 | 
114 | if __name__ == "__main__":
115 |   args = arg_parser()
116 |   if args.config is not None:
117 |     gin.parse_config_file(args.config)
118 |   run(
119 |       scene_dir=args.workspace,
120 |       input_folder=args.input,
121 |       output_folder=args.output,
122 |       n_jobs=args.n_jobs,
123 |   )
124 | 


--------------------------------------------------------------------------------
/models/internimage.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import logging
  3 | import os
  4 | import random
  5 | import shutil
  6 | import sys
  7 | from pathlib import Path
  8 | from typing import Union
  9 | 
 10 | import cv2
 11 | import gin
 12 | import mmcv
 13 | import numpy as np
 14 | import torch
 15 | import torch.backends.cudnn as cudnn
 16 | from mmcv.runner import load_checkpoint
 17 | from mmseg.apis import inference_segmentor, init_segmentor
 18 | from mmseg.core import get_classes, get_palette
 19 | from tqdm import tqdm
 20 | 
 21 | sys.path.append(
 22 |     os.path.join(os.path.dirname(__file__), '..', '3rdparty', 'InternImage',
 23 |                  'segmentation'))
 24 | 
 25 | import mmcv_custom  # this is actually needed for correct model registry
 26 | import mmseg_custom
 27 | 
 28 | logging.basicConfig(level="INFO")
 29 | log = logging.getLogger('InternImage Segmentation')
 30 | 
 31 | 
 32 | def setup_seeds(seed):
 33 | 
 34 |   random.seed(seed)
 35 |   np.random.seed(seed)
 36 |   torch.manual_seed(seed)
 37 | 
 38 |   cudnn.benchmark = False
 39 |   cudnn.deterministic = True
 40 | 
 41 | 
 42 | def load_internimage(device: Union[str, torch.device],):
 43 | 
 44 |   config_file = os.path.join(
 45 |       os.path.dirname(__file__), '..', '3rdparty', 'InternImage',
 46 |       'segmentation', 'configs', 'ade20k',
 47 |       'mask2former_internimage_h_896_80k_cocostuff2ade20k_ss.py')
 48 |   checkpoint_file = os.path.join(
 49 |       os.path.dirname(__file__), '..', 'checkpoints',
 50 |       'mask2former_internimage_h_896_80k_cocostuff2ade20k.pth')
 51 | 
 52 |   # build the model from a config file and a checkpoint file
 53 |   model = init_segmentor(config_file, checkpoint=None, device=device)
 54 |   checkpoint = load_checkpoint(model, checkpoint_file, map_location='cpu')
 55 | 
 56 |   if 'CLASSES' in checkpoint.get('meta', {}):
 57 |     model.CLASSES = checkpoint['meta']['CLASSES']
 58 |   else:
 59 |     print('"CLASSES" not found in meta, use dataset.CLASSES instead')
 60 |     model.CLASSES = get_classes('ade20k')
 61 | 
 62 |   if 'PALETTE' in checkpoint.get('meta', {}):
 63 |     model.PALETTE = checkpoint['meta']['PALETTE']
 64 |   else:
 65 |     print('"PALETTE" not found in meta, use dataset.PALETTE instead')
 66 |     model.PALETTE = get_palette('ade20k')
 67 | 
 68 |   return model
 69 | 
 70 | 
 71 | @gin.configurable
 72 | def run(
 73 |     scene_dir: Union[str, Path],
 74 |     output_folder: Union[str, Path],
 75 |     device: Union[str, torch.device] = 'cuda:0',
 76 |     flip: bool = False,
 77 | ):
 78 |   # convert str to Path object
 79 |   scene_dir = Path(scene_dir)
 80 |   output_folder = Path(output_folder)
 81 | 
 82 |   assert scene_dir.exists() and scene_dir.is_dir()
 83 | 
 84 |   input_color_dir = scene_dir / 'color'
 85 |   assert input_color_dir.exists() and input_color_dir.is_dir()
 86 | 
 87 |   output_dir = scene_dir / output_folder
 88 |   output_dir = Path(str(output_dir) + '_flip') if flip else output_dir
 89 | 
 90 |   # check if output directory exists
 91 |   shutil.rmtree(output_dir, ignore_errors=True)
 92 |   os.makedirs(str(output_dir), exist_ok=False)
 93 | 
 94 |   log.info('[internimage] loading model')
 95 |   model = load_internimage(device=device)
 96 |   log.info(f'[internimage] running inference in {str(input_color_dir)}')
 97 |   print(f'[internimage] running inference in {str(input_color_dir)}',
 98 |         flush=True)
 99 | 
100 |   input_files = input_color_dir.glob('*')
101 |   input_files = sorted(input_files, key=lambda x: int(x.stem.split('_')[-1]))
102 | 
103 |   for file in tqdm(input_files):
104 |     img = mmcv.imread(file)
105 | 
106 |     if flip:
107 |       img = img[:, ::-1]
108 | 
109 |     result = inference_segmentor(model, img)[0]
110 |     if flip:
111 |       result = result[:, ::-1]
112 | 
113 |     cv2.imwrite(str(output_dir / f'{file.stem}.png'), result.astype(np.uint16))
114 | 
115 | 
116 | # all models should have this command line interface
117 | def arg_parser():
118 |   parser = argparse.ArgumentParser(description='InternImage Segmentation')
119 |   parser.add_argument(
120 |       '--workspace',
121 |       type=str,
122 |       required=True,
123 |       help='Path to workspace directory. There should be a "color" folder.',
124 |   )
125 |   parser.add_argument(
126 |       '--output',
127 |       type=str,
128 |       default='intermediate/ade20k_internimage_1',
129 |       help=
130 |       'Name of output directory in the workspace directory intermediate. Has to follow the pattern $labelspace_$model_$version',
131 |   )
132 |   parser.add_argument('--seed', type=int, default=42, help='random seed')
133 |   parser.add_argument(
134 |       '--flip',
135 |       action="store_true",
136 |       help='Flip the input image, this is part of test time augmentation.',
137 |   )
138 |   parser.add_argument('--config', help='Name of config file')
139 |   return parser.parse_args()
140 | 
141 | 
142 | if __name__ == '__main__':
143 |   args = arg_parser()
144 | 
145 |   if args.config is not None:
146 |     gin.parse_config_file(args.config)
147 | 
148 |   setup_seeds(seed=args.seed)
149 |   run(scene_dir=args.workspace, output_folder=args.output, flip=args.flip)
150 | 


--------------------------------------------------------------------------------
/models/omnidata_depth.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import logging
  3 | import os
  4 | import random
  5 | import shutil
  6 | import sys
  7 | from pathlib import Path
  8 | from typing import Union
  9 | 
 10 | import cv2
 11 | import gin
 12 | import matplotlib.pyplot as plt
 13 | import mmcv
 14 | import numpy as np
 15 | import PIL
 16 | import torch
 17 | import torch.backends.cudnn as cudnn
 18 | import torch.nn.functional as F
 19 | from joblib import Parallel, delayed
 20 | from PIL import Image
 21 | from torchvision import transforms
 22 | from tqdm import tqdm
 23 | 
 24 | sys.path.insert(
 25 |     0,
 26 |     os.path.abspath(
 27 |         os.path.join(os.path.dirname(__file__), '..', '3rdparty', 'omnidata',
 28 |                      'omnidata_tools', 'torch')))
 29 | 
 30 | from data.transforms import get_transform
 31 | from modules.midas.dpt_depth import DPTDepthModel
 32 | from modules.unet import UNet
 33 | 
 34 | logging.basicConfig(level="INFO")
 35 | log = logging.getLogger('Omnidata Depth')
 36 | 
 37 | 
 38 | def setup_seeds(seed):
 39 | 
 40 |   random.seed(seed)
 41 |   np.random.seed(seed)
 42 |   torch.manual_seed(seed)
 43 | 
 44 |   cudnn.benchmark = False
 45 |   cudnn.deterministic = True
 46 | 
 47 | 
 48 | def load_omnidepth(device: Union[str, torch.device] = 'cuda:0',):
 49 |   log.info('loading model')
 50 |   pretrained_weights_path = Path(os.path.abspath(os.path.dirname(
 51 |       __file__))) / '..' / 'checkpoints' / 'omnidata_dpt_depth_v2.ckpt'
 52 |   model = DPTDepthModel(backbone='vitb_rn50_384')
 53 |   checkpoint = torch.load(pretrained_weights_path, map_location=device)
 54 |   if 'state_dict' in checkpoint:
 55 |     state_dict = {}
 56 |     for k, v in checkpoint['state_dict'].items():
 57 |       state_dict[k[6:]] = v
 58 |   else:
 59 |     state_dict = checkpoint
 60 |   model.load_state_dict(state_dict)
 61 |   model.to(device)
 62 |   return model
 63 | 
 64 | 
 65 | def omnidepth_completion(
 66 |     scene_dir: Union[str, Path],
 67 |     output_folder: Union[str, Path],
 68 |     patch_size=32,
 69 | ):
 70 |   # convert str to Path object
 71 |   scene_dir = Path(scene_dir)
 72 |   output_folder = Path(output_folder)
 73 | 
 74 |   assert scene_dir.exists() and scene_dir.is_dir()
 75 | 
 76 |   input_depth_dir = scene_dir / 'depth'
 77 |   assert input_depth_dir.exists() and input_depth_dir.is_dir()
 78 | 
 79 |   output_dir = scene_dir / output_folder
 80 |   assert (output_dir).exists()
 81 | 
 82 |   log.info('[omnidepth] running completion')
 83 | 
 84 |   def depth_completion(k):
 85 |     orig_depth = cv2.imread(str(input_depth_dir / f'{k}.png'),
 86 |                             cv2.IMREAD_UNCHANGED)
 87 |     omnidepth = cv2.imread(str(output_dir / f'{k}.png'), cv2.IMREAD_UNCHANGED)
 88 | 
 89 |     # now complete the original depth with omnidepth predictions, fitted to scale
 90 |     # within a patch around each missing pixel
 91 |     fused_depth = orig_depth.copy()
 92 |     coords_u, coords_v = np.where(fused_depth == 0)
 93 |     for i in range(len(coords_u)):
 94 |       u = coords_u[i]
 95 |       v = coords_v[i]
 96 |       window_u = max(0, u - patch_size), min(fused_depth.shape[0],
 97 |                                              u + patch_size)
 98 |       window_v = max(0, v - patch_size), min(fused_depth.shape[1],
 99 |                                              v + patch_size)
100 |       target = orig_depth[window_u[0]:window_u[1], window_v[0]:window_v[1]]
101 |       source = omnidepth[window_u[0]:window_u[1], window_v[0]:window_v[1]]
102 |       source = source[target != 0]
103 |       target = target[target != 0]
104 |       a, b = np.linalg.lstsq(np.stack([source, np.ones_like(source)], axis=-1),
105 |                              target,
106 |                              rcond=None)[0]
107 |       # for some areas this will completely break the geometry, we need to revert to omnidepth
108 |       if a < 0.5 or a > 2:
109 |         fused_depth[u, v] = omnidepth[u, v]
110 |       else:
111 |         fused_depth[u, v] = a * omnidepth[u, v] + b
112 |     fused_depth[fused_depth == 0] = omnidepth[fused_depth == 0]
113 |     cv2.imwrite(str(output_dir / f'{k}.png'), fused_depth)
114 | 
115 |   keys = [p.stem for p in input_depth_dir.glob('*.png')]
116 |   Parallel(n_jobs=8)(delayed(depth_completion)(k) for k in tqdm(keys))
117 | 
118 | 
119 | @gin.configurable
120 | def run(
121 |     scene_dir: Union[str, Path],
122 |     output_folder: Union[str, Path],
123 |     device: Union[str, torch.device] = 'cuda:0',
124 |     depth_size=(480, 640),
125 |     completion=True,
126 | ):
127 |   scene_dir = Path(scene_dir)
128 |   output_folder = Path(output_folder)
129 | 
130 |   assert scene_dir.exists() and scene_dir.is_dir()
131 | 
132 |   input_color_dir = scene_dir / 'color'
133 |   assert input_color_dir.exists() and input_color_dir.is_dir()
134 | 
135 |   input_depth_dir = scene_dir / 'depth'
136 |   assert input_depth_dir.exists() and input_depth_dir.is_dir()
137 | 
138 |   output_dir = scene_dir / output_folder
139 | 
140 |   log.info('[omnidepth] loading model')
141 |   model = load_omnidepth(device=device)
142 |   trans_totensor = transforms.Compose([
143 |       transforms.Resize((384, 384), interpolation=PIL.Image.BILINEAR),
144 |       transforms.ToTensor(),
145 |       transforms.Normalize(mean=0.5, std=0.5)
146 |   ])
147 | 
148 |   log.info('[omnidepth] running inference')
149 | 
150 |   shutil.rmtree(output_dir, ignore_errors=True)
151 |   os.makedirs(str(output_dir), exist_ok=False)
152 | 
153 |   keys = [p.stem for p in input_color_dir.glob('*.jpg')]
154 | 
155 |   for k in tqdm(keys):
156 | 
157 |     img = Image.open(str(input_color_dir / f'{k}.jpg'))
158 |     with torch.no_grad():
159 |       img_tensor = trans_totensor(img)[:3].unsqueeze(0).to(device)
160 |       if img_tensor.shape[1] == 1:
161 |         img_tensor = img_tensor.repeat_interleave(3, 1)
162 |       output = model(img_tensor).clamp(min=0, max=1)
163 |       output = F.interpolate(output.unsqueeze(0), depth_size,
164 |                              mode='bicubic').squeeze(0)
165 |       output = output.clamp(0, 1)
166 |       omnidepth = output.detach().cpu().squeeze().numpy()
167 | 
168 |     # find a linear scaling a * depth + b to fit to original depth
169 |     orig_depth = cv2.imread(str(input_depth_dir / f'{k}.png'),
170 |                             cv2.IMREAD_UNCHANGED)
171 |     targets = orig_depth[orig_depth != 0]
172 |     source = omnidepth[orig_depth != 0]
173 |     a, b = np.linalg.lstsq(np.stack([source, np.ones_like(source)], axis=-1),
174 |                            targets,
175 |                            rcond=None)[0]
176 |     omnidepth = (a * omnidepth + b).astype(orig_depth.dtype)
177 |     cv2.imwrite(str(output_dir / f'{k}.png'), omnidepth)
178 |   if completion:
179 |     omnidepth_completion(scene_dir=scene_dir, output_folder=output_folder)
180 | 
181 | 
182 | def arg_parser():
183 |   parser = argparse.ArgumentParser(description='Omnidata Depth Estimation')
184 |   parser.add_argument(
185 |       '--workspace',
186 |       type=str,
187 |       required=True,
188 |       help=
189 |       'Path to workspace directory. There should be "color" and "depth" folder inside.',
190 |   )
191 |   parser.add_argument(
192 |       '--output',
193 |       type=str,
194 |       default='intermediate/depth_omnidata_1',
195 |       help=
196 |       'Name of output directory in the workspace directory intermediate. Has to follow the pattern $labelspace_$model_$version',
197 |   )
198 |   parser.add_argument('--seed', type=int, default=42, help='random seed')
199 |   parser.add_argument('--config', help='Name of config file')
200 |   return parser.parse_args()
201 | 
202 | 
203 | if __name__ == "__main__":
204 |   args = arg_parser()
205 |   if args.config is not None:
206 |     gin.parse_config_file(args.config)
207 |   setup_seeds(seed=args.seed)
208 |   run(scene_dir=args.workspace, output_folder=args.output)
209 | 


--------------------------------------------------------------------------------
/models/omnidata_normal.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import logging
  3 | import os
  4 | import random
  5 | import shutil
  6 | import sys
  7 | from pathlib import Path
  8 | from typing import Union
  9 | 
 10 | import gin
 11 | import numpy as np
 12 | import PIL
 13 | import torch
 14 | import torch.backends.cudnn as cudnn
 15 | import torch.nn.functional as F
 16 | from PIL import Image
 17 | from torchvision import transforms
 18 | from tqdm import tqdm
 19 | 
 20 | sys.path.insert(
 21 |     0,
 22 |     os.path.abspath(
 23 |         os.path.join(os.path.dirname(__file__), "..", "3rdparty", "omnidata",
 24 |                      "omnidata_tools", "torch")))
 25 | 
 26 | from data.transforms import get_transform
 27 | from modules.midas.dpt_depth import DPTDepthModel
 28 | from modules.unet import UNet
 29 | 
 30 | logging.basicConfig(level="INFO")
 31 | log = logging.getLogger("Omnidata Normal")
 32 | 
 33 | 
 34 | def setup_seeds(seed):
 35 | 
 36 |   random.seed(seed)
 37 |   np.random.seed(seed)
 38 |   torch.manual_seed(seed)
 39 | 
 40 |   cudnn.benchmark = False
 41 |   cudnn.deterministic = True
 42 | 
 43 | 
 44 | def load_omninormal(device: Union[str, torch.device] = 'cuda:0',):
 45 |   log.info('loading model')
 46 |   pretrained_weights_path = Path(os.path.abspath(os.path.dirname(
 47 |       __file__))) / '..' / 'checkpoints' / 'omnidata_dpt_normal_v2.ckpt'
 48 |   model = DPTDepthModel(backbone='vitb_rn50_384', num_channels=3)
 49 |   map_location = (lambda storage, loc: storage.cuda(device=device))
 50 |   checkpoint = torch.load(pretrained_weights_path, map_location=map_location)
 51 | 
 52 |   if 'state_dict' in checkpoint:
 53 |     state_dict = {}
 54 |     for k, v in checkpoint['state_dict'].items():
 55 |       state_dict[k[6:]] = v
 56 |   else:
 57 |     state_dict = checkpoint
 58 | 
 59 |   model.load_state_dict(state_dict)
 60 |   model.to(device)
 61 |   return model
 62 | 
 63 | 
 64 | @gin.configurable
 65 | def run(
 66 |     scene_dir: Union[str, Path],
 67 |     output_folder: Union[str, Path],
 68 |     device: Union[str, torch.device] = 'cuda:0',
 69 |     size=(480, 640),
 70 | ):
 71 |   scene_dir = Path(scene_dir)
 72 |   output_folder = Path(output_folder)
 73 | 
 74 |   assert scene_dir.exists() and scene_dir.is_dir()
 75 | 
 76 |   input_color_dir = scene_dir / 'color'
 77 |   assert input_color_dir.exists() and input_color_dir.is_dir()
 78 | 
 79 |   output_dir = scene_dir / output_folder
 80 | 
 81 |   log.info('[omninormal] loading model')
 82 |   model = load_omninormal(device=device)
 83 |   trans_totensor = transforms.Compose([
 84 |       transforms.Resize((384, 384), interpolation=PIL.Image.BILINEAR),
 85 |       transforms.CenterCrop(384),
 86 |       get_transform('rgb', image_size=None)
 87 |   ])
 88 | 
 89 |   log.info('[omninormal] running inference')
 90 | 
 91 |   shutil.rmtree(output_dir, ignore_errors=True)
 92 |   os.makedirs(str(output_dir), exist_ok=False)
 93 | 
 94 |   keys = [p.stem for p in input_color_dir.glob('*.jpg')]
 95 | 
 96 |   for k in tqdm(keys):
 97 |     img = Image.open(str(input_color_dir / f'{k}.jpg'))
 98 | 
 99 |     with torch.no_grad():
100 |       img_tensor = trans_totensor(img)[:3].unsqueeze(0).to(device)
101 | 
102 |     if img_tensor.shape[1] == 1:
103 |       img_tensor = img_tensor.repeat_interleave(3, 1)
104 | 
105 |     output = model(img_tensor).clamp(min=0, max=1)  # (1, 3, 384, 384)
106 |     output = F.interpolate(
107 |         output,
108 |         size,
109 |         mode='nearest',
110 |     ).squeeze(0)  # (3, H, W)
111 | 
112 |     omninormal = output.detach().cpu().squeeze().numpy()  # (3, H, W)
113 |     omninormal = omninormal.transpose(1, 2, 0)  # (H, W, 3)
114 | 
115 |     np.save(str(output_dir / f'{k}.npy'), omninormal)
116 | 
117 | 
118 | def arg_parser():
119 |   parser = argparse.ArgumentParser(description='Omnidata Normal Estimation')
120 |   parser.add_argument(
121 |       '--workspace',
122 |       type=str,
123 |       required=True,
124 |       help='Path to workspace directory. There should be "color" folder inside.',
125 |   )
126 |   parser.add_argument(
127 |       '--output',
128 |       type=str,
129 |       default='intermediate/normal_omnidata_1',
130 |       help=
131 |       'Name of output directory in the workspace directory intermediate. Has to follow the pattern $labelspace_$model_$version',
132 |   )
133 |   parser.add_argument('--config', help='Name of config file')
134 |   return parser.parse_args()
135 | 
136 | 
137 | if __name__ == "__main__":
138 |   args = arg_parser()
139 |   if args.config is not None:
140 |     gin.parse_config_file(args.config)
141 |   run(scene_dir=args.workspace, output_folder=args.output)
142 | 


--------------------------------------------------------------------------------
/models/ovseg.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | # change default download location for nltk
  4 | os.environ['NLTK_DATA'] = os.path.abspath(
  5 |     os.path.join(os.path.dirname(__file__), '..', '3rdparty', 'nltk_data'))
  6 | 
  7 | import argparse
  8 | import logging
  9 | import random
 10 | import shutil
 11 | import sys
 12 | from pathlib import Path
 13 | from typing import Union
 14 | 
 15 | import cv2
 16 | import gin
 17 | import numpy as np
 18 | import torch
 19 | import torch.backends.cudnn as cudnn
 20 | from detectron2.config import get_cfg
 21 | from detectron2.data.detection_utils import read_image
 22 | from detectron2.projects.deeplab import add_deeplab_config
 23 | from detectron2.utils.logger import setup_logger
 24 | from nltk.corpus import wordnet as wn
 25 | from tqdm import tqdm
 26 | 
 27 | from labelmaker.label_data import get_ade150, get_replica, get_wordnet
 28 | 
 29 | sys.path.append(
 30 |     os.path.join(os.path.dirname(__file__), '..', '3rdparty', 'ov-seg'))
 31 | from open_vocab_seg import add_ovseg_config
 32 | from open_vocab_seg.utils import VisualizationDemo
 33 | 
 34 | logging.basicConfig(level="INFO")
 35 | log = logging.getLogger('OV-Seg Segmentation')
 36 | 
 37 | 
 38 | def setup_seeds(seed):
 39 | 
 40 |   random.seed(seed)
 41 |   np.random.seed(seed)
 42 |   torch.manual_seed(seed)
 43 | 
 44 |   cudnn.benchmark = False
 45 |   cudnn.deterministic = True
 46 | 
 47 | 
 48 | class WordnetPromptTemplate:
 49 | 
 50 |   def __init__(self, template, add_synonyms=True):
 51 |     self.template = template
 52 |     self.add_synonyms = add_synonyms
 53 | 
 54 |   def format(self, noun):
 55 |     synset = wn.synset(noun)
 56 |     prompt = self.template.format(noun=synset.name().split('.')[0],
 57 |                                   definition=synset.definition())
 58 |     if self.add_synonyms and len(synset.lemma_names()) > 1:
 59 |       prompt += " It can also be called {}".format(", ".join(
 60 |           synset.lemma_names()[1:]))
 61 |     return prompt
 62 | 
 63 |   def __str__(self):
 64 |     return str(self.template)
 65 | 
 66 | 
 67 | def load_ovseg(
 68 |     device: Union[str, torch.device],
 69 |     custom_templates=None,
 70 | ):
 71 |   cfg = get_cfg()
 72 |   add_deeplab_config(cfg)
 73 |   add_ovseg_config(cfg)
 74 |   cfg.merge_from_file(
 75 |       str(
 76 |           Path(__file__).parent / '..' / '3rdparty' / 'ov-seg' / 'configs' /
 77 |           'ovseg_swinB_vitL_demo.yaml'))
 78 |   cfg.merge_from_list([
 79 |       'MODEL.WEIGHTS',
 80 |       str(
 81 |           Path(__file__).parent / '..' / 'checkpoints' /
 82 |           'ovseg_swinbase_vitL14_ft_mpt.pth')
 83 |   ])
 84 | 
 85 |   # add device information
 86 |   cfg.MODEL.DEVICE = str(device)
 87 | 
 88 |   if custom_templates is not None:
 89 |     cfg.MODEL.CLIP_ADAPTER.TEXT_TEMPLATES = "predefined"
 90 |     cfg.MODEL.CLIP_ADAPTER.PREDEFINED_PROMPT_TEMPLATES = custom_templates
 91 |   cfg.freeze()
 92 |   demo = VisualizationDemo(cfg)
 93 |   return demo
 94 | 
 95 | 
 96 | def process_image(
 97 |     model,
 98 |     img_path,
 99 |     class_names,
100 |     id_map,
101 |     threshold=0.7,
102 |     flip=False,
103 | ):
104 |   # use PIL, to be consistent with evaluation
105 |   img = read_image(img_path, format="BGR")
106 |   if flip:
107 |     img = img[:, ::-1]
108 |   predictions = model.predictor(img, class_names)
109 |   blank_area = (predictions['sem_seg'][0] == 0).to('cpu').numpy()
110 |   product, pred = torch.max(predictions['sem_seg'], dim=0)
111 | 
112 |   # map unknown region to last_id + 1
113 |   pred[product < threshold] = len(class_names)
114 |   pred[blank_area] = len(class_names)
115 | 
116 |   pred = pred.to('cpu').numpy().astype(int)
117 | 
118 |   if flip:
119 |     pred = pred[:, ::-1]
120 | 
121 |   # map to corresponding label space
122 |   pred = id_map[pred]
123 | 
124 |   return pred
125 | 
126 | 
127 | def get_id_map(classes):
128 |   """
129 |   In ovseg, the unknown class is not specified in class_names, it is temporarily mapped to the last_id + 1. However, depending on the starting point of each label scheme its mapping may be different.
130 |   """
131 |   if classes == 'ade150':
132 |     id_map = [x['id'] for x in get_ade150()] + [150]
133 |   elif classes == 'replica':
134 |     id_map = [x['id'] for x in get_replica()] + [0]
135 |   elif classes in ['wordnet', 'wn_nosyn', 'wn_nodef', 'wn_nosyn_nodef']:
136 |     id_map = [x['id'] for x in get_wordnet()[1:]] + [0]
137 |   else:
138 |     raise ValueError(f'Unknown class set {classes}')
139 | 
140 |   return np.array(id_map)
141 | 
142 | 
143 | def get_templates(classes):
144 |   templates = None
145 |   if classes == 'ade150':
146 |     class_names = [x['name'] for x in get_ade150()]
147 |   elif classes == 'replica':
148 |     class_names = [x['name'] for x in get_replica()]
149 |   elif classes == 'wordnet':
150 |     sizeless_templates = [
151 |         "a photo of a {size}{noun}, which is {definition}.",
152 |         "a photo of a {size}{noun}, which can be defined as {definition}.",
153 |         "a photo of a {size}{noun}, as in {definition}.",
154 |         "This is a photo of a {size}{noun}, which is {definition}",
155 |         "This is a photo of a {size}{noun}, which can be defined as {definition}",
156 |         "This is a photo of a {size}{noun}, as in {definition}",
157 |         "There is a {size}{noun} in the scene",
158 |         "There is a {size}{definition} in the scene",
159 |         "There is the {size}{noun} in the scene",
160 |         "There is the {size}{definition} in the scene",
161 |         "a photo of a {size}{noun} in the scene",
162 |         "a photo of a {size}{definition} in the scene",
163 |     ]
164 |     templates = []
165 |     for t in sizeless_templates:
166 |       for s in ["", "small ", "medium ", "large "]:
167 |         templates.append(
168 |             WordnetPromptTemplate(
169 |                 t.format(size=s, noun="{noun}", definition="{definition}")))
170 |     # the first class is the background class
171 |     class_names = [x['name'] for x in get_wordnet()[1:]]
172 |   elif classes == 'wn_nosyn':
173 |     sizeless_templates = [
174 |         "a photo of a {size}{noun}, which is {definition}.",
175 |         "a photo of a {size}{noun}, which can be defined as {definition}.",
176 |         "a photo of a {size}{noun}, as in {definition}.",
177 |         "This is a photo of a {size}{noun}, which is {definition}",
178 |         "This is a photo of a {size}{noun}, which can be defined as {definition}",
179 |         "This is a photo of a {size}{noun}, as in {definition}",
180 |         "There is a {size}{noun} in the scene",
181 |         "There is a {size}{definition} in the scene",
182 |         "There is the {size}{noun} in the scene",
183 |         "There is the {size}{definition} in the scene",
184 |         "a photo of a {size}{noun} in the scene",
185 |         "a photo of a {size}{definition} in the scene",
186 |     ]
187 |     templates = []
188 |     for t in sizeless_templates:
189 |       for s in ["", "small ", "medium ", "large "]:
190 |         templates.append(
191 |             WordnetPromptTemplate(t.format(size=s,
192 |                                            noun="{noun}",
193 |                                            definition="{definition}"),
194 |                                   add_synonyms=False))
195 |     # the first class is the background class
196 |     class_names = [x['name'] for x in get_wordnet()[1:]]
197 |   elif classes == 'wn_nodef':
198 |     sizeless_templates = [
199 |         "a photo of a {size}{noun}",
200 |         "a photo of a {size}{noun}",
201 |         "a photo of a {size}{noun}",
202 |         "This is a photo of a {size}{noun}.",
203 |         "This is a photo of a {size}{noun}.",
204 |         "This is a photo of a {size}{noun}.",
205 |         "There is a {size}{noun} in the scene",
206 |         "There is the {size}{noun} in the scene",
207 |         "a photo of a {size}{noun} in the scene",
208 |     ]
209 |     templates = []
210 |     for t in sizeless_templates:
211 |       for s in ["", "small ", "medium ", "large "]:
212 |         templates.append(WordnetPromptTemplate(t.format(size=s, noun="{noun}")))
213 |     # the first class is the background class
214 |     class_names = [x['name'] for x in get_wordnet()[1:]]
215 |   elif classes == 'wn_nosyn_nodef':
216 |     sizeless_templates = [
217 |         "a photo of a {size}{noun}",
218 |         "a photo of a {size}{noun}",
219 |         "a photo of a {size}{noun}",
220 |         "This is a photo of a {size}{noun}.",
221 |         "This is a photo of a {size}{noun}.",
222 |         "This is a photo of a {size}{noun}.",
223 |         "There is a {size}{noun} in the scene",
224 |         "There is the {size}{noun} in the scene",
225 |         "a photo of a {size}{noun} in the scene",
226 |     ]
227 |     templates = []
228 |     for t in sizeless_templates:
229 |       for s in ["", "small ", "medium ", "large "]:
230 |         templates.append(
231 |             WordnetPromptTemplate(t.format(size=s, noun="{noun}"),
232 |                                   add_synonyms=False))
233 |     # the first class is the background class
234 |     class_names = [x['name'] for x in get_wordnet()[1:]]
235 |   else:
236 |     raise ValueError(f'Unknown class set {classes}')
237 | 
238 |   return templates, class_names
239 | 
240 | 
241 | @gin.configurable
242 | def run(
243 |     scene_dir: Union[str, Path],
244 |     output_folder: Union[str, Path],
245 |     device: Union[
246 |         str, torch.
247 |         device] = 'cuda:0',  # changing this to cuda default as all of us have it available. Otherwise, it will fail on machines without cuda
248 |     classes='wn_nodef',
249 |     flip=False,
250 | ):
251 |   scene_dir = Path(scene_dir)
252 |   output_folder = Path(output_folder)
253 | 
254 |   # check if scene_dir exists
255 |   assert scene_dir.exists() and scene_dir.is_dir()
256 | 
257 |   input_color_dir = scene_dir / 'color'
258 |   assert input_color_dir.exists() and input_color_dir.is_dir()
259 | 
260 |   output_dir = scene_dir / output_folder
261 |   output_dir = Path(str(output_dir) + '_flip') if flip else output_dir
262 |   if classes != 'wn_nodef':
263 |     output_dir.replace('wn_nodef', classes)
264 | 
265 |   # check if output directory exists
266 |   shutil.rmtree(output_dir, ignore_errors=True)
267 |   os.makedirs(str(output_dir), exist_ok=False)
268 | 
269 |   input_files = input_color_dir.glob('*')
270 |   input_files = sorted(input_files, key=lambda x: int(x.stem.split('_')[-1]))
271 | 
272 |   log.info(f'[ov-seg] using {classes} classes')
273 |   log.info(f'[ov-seg] inference in {str(input_color_dir)}')
274 | 
275 |   templates, class_names = get_templates(classes)
276 |   id_map = get_id_map(classes)
277 | 
278 |   log.info('[ov-seg] loading model')
279 |   model = load_ovseg(device=device, custom_templates=templates)
280 | 
281 |   log.info('[ov-seg] inference')
282 | 
283 |   for file in tqdm(input_files):
284 |     result = process_image(model, file, class_names, id_map, flip=flip)
285 |     cv2.imwrite(
286 |         str(output_dir / f'{file.stem}.png'),
287 |         result.astype(np.uint16),
288 |     )
289 | 
290 | 
291 | def arg_parser():
292 |   parser = argparse.ArgumentParser(description='OVSeg Segmentation')
293 |   parser.add_argument(
294 |       '--workspace',
295 |       type=str,
296 |       required=True,
297 |       help=
298 |       'Path to workspace directory. There should be a "color" folder inside.',
299 |   )
300 |   parser.add_argument(
301 |       '--output',
302 |       type=str,
303 |       default='intermediate/wordnet_ovseg_1',
304 |       help=
305 |       'Name of output directory in the workspace directory intermediate. Has to follow the pattern $labelspace_$model_$version',
306 |   )
307 |   parser.add_argument('--seed', type=int, default=42, help='random seed')
308 |   parser.add_argument(
309 |       '--flip',
310 |       action="store_true",
311 |       help='Flip the input image, this is part of test time augmentation.',
312 |   )
313 |   parser.add_argument('--config', help='Name of config file')
314 |   return parser.parse_args()
315 | 
316 | 
317 | if __name__ == '__main__':
318 |   args = arg_parser()
319 |   if args.config is not None:
320 |     gin.parse_config_file(args.config)
321 | 
322 |   setup_seeds(seed=args.seed)
323 |   run(scene_dir=args.workspace, output_folder=args.output, flip=args.flip)
324 | 


--------------------------------------------------------------------------------
/notebooks/generate_3d_projections.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "id": "7d36b077",
  7 |    "metadata": {},
  8 |    "outputs": [],
  9 |    "source": [
 10 |     "import os\n",
 11 |     "import cv2\n",
 12 |     "\n",
 13 |     "import open3d as o3d\n",
 14 |     "import numpy as np\n",
 15 |     "\n",
 16 |     "from tqdm import tqdm\n",
 17 |     "from PIL import Image\n",
 18 |     "\n",
 19 |     "import matplotlib.pyplot as plt\n"
 20 |    ]
 21 |   },
 22 |   {
 23 |    "cell_type": "code",
 24 |    "execution_count": null,
 25 |    "id": "b015fd4d",
 26 |    "metadata": {},
 27 |    "outputs": [],
 28 |    "source": [
 29 |     "PATH = '/home/weders/scratch/scratch/scannetter/arkit/raw/Validation'\n",
 30 |     "SCENE = 'scene0458_00'\n"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "code",
 35 |    "execution_count": null,
 36 |    "id": "40012878",
 37 |    "metadata": {},
 38 |    "outputs": [],
 39 |    "source": [
 40 |     "label_name = ''\n",
 41 |     "\n",
 42 |     "scene_path = os.path.join(PATH, SCENE)\n",
 43 |     "image_path = os.path.join(scene_path, 'color')\n",
 44 |     "depth_path = os.path.join(scene_path, 'depth')\n",
 45 |     "intrinsics_path = os.path.join(scene_path, 'intrinsic')\n",
 46 |     "pose_path = os.path.join(scene_path, 'pose')\n",
 47 |     "# label_path = os.path.join(scene_path, 'label-proc')\n",
 48 |     "\n",
 49 |     "mesh_path = os.path.join(scene_path, f'{SCENE}_vh_clean.ply')\n"
 50 |    ]
 51 |   },
 52 |   {
 53 |    "cell_type": "code",
 54 |    "execution_count": null,
 55 |    "id": "b44e1ac0",
 56 |    "metadata": {},
 57 |    "outputs": [],
 58 |    "source": [
 59 |     "# scannet label coloring\n",
 60 |     "def create_color_palette():\n",
 61 |     "  return [\n",
 62 |     "      (0, 0, 0),\n",
 63 |     "      (174, 199, 232),  # wall\n",
 64 |     "      (152, 223, 138),  # floor\n",
 65 |     "      (31, 119, 180),  # cabinet\n",
 66 |     "      (255, 187, 120),  # bed\n",
 67 |     "      (188, 189, 34),  # chair\n",
 68 |     "      (140, 86, 75),  # sofa\n",
 69 |     "      (255, 152, 150),  # table\n",
 70 |     "      (214, 39, 40),  # door\n",
 71 |     "      (197, 176, 213),  # window\n",
 72 |     "      (148, 103, 189),  # bookshelf\n",
 73 |     "      (196, 156, 148),  # picture\n",
 74 |     "      (23, 190, 207),  # counter\n",
 75 |     "      (178, 76, 76),\n",
 76 |     "      (247, 182, 210),  # desk\n",
 77 |     "      (66, 188, 102),\n",
 78 |     "      (219, 219, 141),  # curtain\n",
 79 |     "      (140, 57, 197),\n",
 80 |     "      (202, 185, 52),\n",
 81 |     "      (51, 176, 203),\n",
 82 |     "      (200, 54, 131),\n",
 83 |     "      (92, 193, 61),\n",
 84 |     "      (78, 71, 183),\n",
 85 |     "      (172, 114, 82),\n",
 86 |     "      (255, 127, 14),  # refrigerator\n",
 87 |     "      (91, 163, 138),\n",
 88 |     "      (153, 98, 156),\n",
 89 |     "      (140, 153, 101),\n",
 90 |     "      (158, 218, 229),  # shower curtain\n",
 91 |     "      (100, 125, 154),\n",
 92 |     "      (178, 127, 135),\n",
 93 |     "      (120, 185, 128),\n",
 94 |     "      (146, 111, 194),\n",
 95 |     "      (44, 160, 44),  # toilet\n",
 96 |     "      (112, 128, 144),  # sink\n",
 97 |     "      (96, 207, 209),\n",
 98 |     "      (227, 119, 194),  # bathtub\n",
 99 |     "      (213, 92, 176),\n",
100 |     "      (94, 106, 211),\n",
101 |     "      (82, 84, 163),  # otherfurn\n",
102 |     "      (100, 85, 144)\n",
103 |     "  ]\n",
104 |     "\n",
105 |     "\n",
106 |     "def colorize_semantic_pointcloud(labels):\n",
107 |     "  colors = 255 * np.ones((labels.shape[0], 3))\n",
108 |     "  color_palette = np.asarray(create_color_palette())\n",
109 |     "\n",
110 |     "  for l in np.unique(labels):\n",
111 |     "    colors[labels == l] = color_palette[l, :]\n",
112 |     "\n",
113 |     "  return colors\n",
114 |     "\n",
115 |     "\n",
116 |     "# color by label\n",
117 |     "def visualize_label_image(image):\n",
118 |     "  height = image.shape[0]\n",
119 |     "  width = image.shape[1]\n",
120 |     "  vis_image = np.zeros([height, width, 3], dtype=np.uint8)\n",
121 |     "  color_palette = create_color_palette()\n",
122 |     "  for idx, color in enumerate(color_palette):\n",
123 |     "    vis_image[image == idx] = color\n",
124 |     "  return vis_image\n"
125 |    ]
126 |   },
127 |   {
128 |    "cell_type": "code",
129 |    "execution_count": null,
130 |    "id": "763a433d",
131 |    "metadata": {},
132 |    "outputs": [],
133 |    "source": [
134 |     "# load mesh\n",
135 |     "mesh = o3d.io.read_triangle_mesh(mesh_path)\n",
136 |     "vertices = np.asarray(mesh.vertices)\n",
137 |     "colors = np.asarray(mesh.vertex_colors)\n",
138 |     "labels_3d = np.zeros((vertices.shape[0], 2000))\n"
139 |    ]
140 |   },
141 |   {
142 |    "cell_type": "code",
143 |    "execution_count": null,
144 |    "id": "20493f14",
145 |    "metadata": {},
146 |    "outputs": [],
147 |    "source": [
148 |     "def project_pointcloud(points, pose, intrinsics):\n",
149 |     "\n",
150 |     "  points_h = np.hstack((points, np.ones_like(points[:, 0:1])))\n",
151 |     "  points_c = np.linalg.inv(pose) @ points_h.T\n",
152 |     "  points_c = points_c.T\n",
153 |     "  points_p = intrinsics @ points_c.T\n",
154 |     "  points_p = points_p.T[:, :3]\n",
155 |     "\n",
156 |     "  points_p[:, 0] /= (points_p[:, -1] + 1.e-6)\n",
157 |     "  points_p[:, 1] /= (points_p[:, -1] + 1.e-6)\n",
158 |     "\n",
159 |     "  return points_p\n",
160 |     "\n",
161 |     "\n",
162 |     "files = [f for f in os.listdir(label_path) if f.endswith('png')]\n",
163 |     "files = sorted(files, key=lambda x: int(x.split('.')[0]))\n",
164 |     "resize_image = False\n",
165 |     "subsampling = 1\n",
166 |     "\n",
167 |     "for idx, file in tqdm(enumerate(files), total=len(files)):\n",
168 |     "\n",
169 |     "  if idx % 10 != 0:\n",
170 |     "    continue\n",
171 |     "\n",
172 |     "  frame_key = int(file.split('.')[0]) * subsampling\n",
173 |     "\n",
174 |     "  image = np.asarray(Image.open(os.path.join(\n",
175 |     "      image_path, f'{frame_key}.jpg'))).astype(np.uint8)\n",
176 |     "  depth = np.asarray(Image.open(os.path.join(\n",
177 |     "      depth_path, f'{frame_key}.png'))).astype(np.float32) / 1000.\n",
178 |     "  labels = np.asarray(Image.open(os.path.join(label_path, file)))\n",
179 |     "\n",
180 |     "  if resize_image:\n",
181 |     "    h, w = depth.shape\n",
182 |     "    image = cv2.resize(image, (w, h))\n",
183 |     "    labels = cv2.resize(labels, (w, h))\n",
184 |     "  else:\n",
185 |     "    h, w, _ = image.shape\n",
186 |     "    depth = cv2.resize(depth, (w, h))\n",
187 |     "\n",
188 |     "  if not intrinsics_loaded:\n",
189 |     "    intrinsics = np.loadtxt(intrinsics_path + '/intrinsic_color.txt')\n",
190 |     "    #         intrinsics = o3d.camera.PinholeCameraIntrinsic(width=w, height=h, fx=intrinsics[0, 0], fy=intrinsics[1, 1], cx=intrinsics[0, 2], cy=intrinsics[1, 2])\n",
191 |     "    intrinsics_loaded = False\n",
192 |     "\n",
193 |     "  pose_file = os.path.join(pose_path, f'{frame_key}.txt')\n",
194 |     "  pose = np.loadtxt(pose_file)\n",
195 |     "\n",
196 |     "  points_p = project_pointcloud(vertices, pose, intrinsics)\n",
197 |     "\n",
198 |     "  xx = points_p[:, 0].astype(int)\n",
199 |     "  yy = points_p[:, 1].astype(int)\n",
200 |     "  zz = points_p[:, 2]\n",
201 |     "\n",
202 |     "  valid_mask = (xx >= 0) & (yy >= 0) & (xx < w) & (yy < h)\n",
203 |     "\n",
204 |     "  d = depth[yy[valid_mask], xx[valid_mask]]\n",
205 |     "\n",
206 |     "  valid_mask[valid_mask] = (zz[valid_mask] > 0) & (np.abs(zz[valid_mask] - d)\n",
207 |     "                                                   <= 0.1)\n",
208 |     "\n",
209 |     "  image_rendered = np.zeros_like(image)\n",
210 |     "  image_rendered[yy[valid_mask], xx[valid_mask], :] = colors[valid_mask] * 255\n",
211 |     "  print(labels.shape)\n",
212 |     "  labels_2d = labels[yy[valid_mask], xx[valid_mask]]\n",
213 |     "  labels_3d[valid_mask, labels_2d] += 1\n",
214 |     "\n",
215 |     "  fig, ax = plt.subplots(1, 3)\n",
216 |     "  ax[0].imshow(image)\n",
217 |     "  ax[1].imshow(image_rendered)\n",
218 |     "  ax[2].imshow(visualize_label_image(labels))\n",
219 |     "  plt.show()\n"
220 |    ]
221 |   },
222 |   {
223 |    "cell_type": "code",
224 |    "execution_count": null,
225 |    "id": "4aa91cb5",
226 |    "metadata": {},
227 |    "outputs": [],
228 |    "source": [
229 |     "labels_3d = np.argmax(labels_3d, axis=-1)\n",
230 |     "label_colors = colorize_semantic_pointcloud(labels_3d)\n"
231 |    ]
232 |   },
233 |   {
234 |    "cell_type": "code",
235 |    "execution_count": null,
236 |    "id": "b20a4d9f",
237 |    "metadata": {},
238 |    "outputs": [],
239 |    "source": [
240 |     "from copy import deepcopy\n"
241 |    ]
242 |   },
243 |   {
244 |    "cell_type": "code",
245 |    "execution_count": null,
246 |    "id": "ef45acd2",
247 |    "metadata": {},
248 |    "outputs": [],
249 |    "source": [
250 |     "label_colors = label_colors.astype(np.float32) / 255.\n",
251 |     "mesh_colored = deepcopy(mesh)\n",
252 |     "mesh_colored.vertex_colors = o3d.utility.Vector3dVector(label_colors)\n"
253 |    ]
254 |   },
255 |   {
256 |    "cell_type": "code",
257 |    "execution_count": null,
258 |    "id": "37121f49",
259 |    "metadata": {},
260 |    "outputs": [],
261 |    "source": [
262 |     "o3d.io.write_triangle_mesh('label_mesh.ply', mesh_colored)\n"
263 |    ]
264 |   },
265 |   {
266 |    "cell_type": "code",
267 |    "execution_count": null,
268 |    "id": "31f1878f",
269 |    "metadata": {},
270 |    "outputs": [],
271 |    "source": [
272 |     "tsdf = o3d.pipelines.integration.ScalableTSDFVolume(\n",
273 |     "    sdf_trunc=0.06,\n",
274 |     "    voxel_length=0.02,\n",
275 |     "    color_type=o3d.pipelines.integration.TSDFVolumeColorType.RGB8)\n",
276 |     "\n",
277 |     "intrinsics_loaded = False\n",
278 |     "\n",
279 |     "files = sorted(os.listdir(depth_path), key=lambda x: int(x.split('.')[0]))\n",
280 |     "pcds = None\n",
281 |     "resize_image = False\n",
282 |     "\n",
283 |     "for idx, file in tqdm(enumerate(files), total=len(files)):\n",
284 |     "\n",
285 |     "  if idx not in [136, 137]:\n",
286 |     "    continue\n",
287 |     "\n",
288 |     "  if not os.path.exists(os.path.join(image_path, file.replace('.png', '.jpg'))):\n",
289 |     "    print(file, 'not found')\n",
290 |     "    continue\n",
291 |     "\n",
292 |     "  image = np.asarray(\n",
293 |     "      Image.open(os.path.join(image_path,\n",
294 |     "                              file.replace('.png', '.jpg')))).astype(np.uint8)\n",
295 |     "  depth = np.asarray(Image.open(os.path.join(depth_path, file))).astype(\n",
296 |     "      np.float32) / 1000.\n",
297 |     "\n",
298 |     "  if resize_image:\n",
299 |     "    h, w = depth.shape\n",
300 |     "    image = cv2.resize(image, (w, h))\n",
301 |     "  else:\n",
302 |     "    h, w, _, image.shape\n",
303 |     "    depth = cv2.resize(depth, (w, h))\n",
304 |     "\n",
305 |     "  if not intrinsics_loaded:\n",
306 |     "    intrinsics = np.loadtxt(intrinsics_path + '/intrinsic_depth.txt')\n",
307 |     "    intrinsics = o3d.camera.PinholeCameraIntrinsic(width=w,\n",
308 |     "                                                   height=h,\n",
309 |     "                                                   fx=intrinsics[0, 0],\n",
310 |     "                                                   fy=intrinsics[1, 1],\n",
311 |     "                                                   cx=intrinsics[0, 2],\n",
312 |     "                                                   cy=intrinsics[1, 2])\n",
313 |     "    intrinsics_loaded = False\n",
314 |     "\n",
315 |     "  pose_file = os.path.join(pose_path, file.replace('.png', '.txt'))\n",
316 |     "  pose = np.loadtxt(pose_file)\n",
317 |     "\n",
318 |     "  image = o3d.geometry.Image(image)\n",
319 |     "  depth = o3d.geometry.Image(depth)\n",
320 |     "\n",
321 |     "  rgbd = o3d.geometry.RGBDImage.create_from_color_and_depth(\n",
322 |     "      image,\n",
323 |     "      depth,\n",
324 |     "      depth_scale=1.0,\n",
325 |     "      depth_trunc=3.,\n",
326 |     "      convert_rgb_to_intensity=False)\n",
327 |     "\n",
328 |     "  if pcds is None:\n",
329 |     "\n",
330 |     "    pcds = o3d.geometry.PointCloud.create_from_rgbd_image(\n",
331 |     "        rgbd, intrinsics, pose).voxel_down_sample(0.04)\n",
332 |     "\n",
333 |     "  else:\n",
334 |     "    pcds = pcds + o3d.geometry.PointCloud.create_from_rgbd_image(\n",
335 |     "        rgbd, intrinsics, np.linalg.inv(pose))\n",
336 |     "    pcds = pcds.voxel_down_sample(0.01)\n",
337 |     "\n",
338 |     "o3d.io.write_point_cloud(f'{SCENE}.ply', pcds)\n"
339 |    ]
340 |   },
341 |   {
342 |    "cell_type": "code",
343 |    "execution_count": null,
344 |    "id": "30a65e47",
345 |    "metadata": {},
346 |    "outputs": [],
347 |    "source": []
348 |   }
349 |  ],
350 |  "metadata": {
351 |   "kernelspec": {
352 |    "display_name": "scannetter",
353 |    "language": "python",
354 |    "name": "scannetter"
355 |   },
356 |   "language_info": {
357 |    "codemirror_mode": {
358 |     "name": "ipython",
359 |     "version": 3
360 |    },
361 |    "file_extension": ".py",
362 |    "mimetype": "text/x-python",
363 |    "name": "python",
364 |    "nbconvert_exporter": "python",
365 |    "pygments_lexer": "ipython3",
366 |    "version": "3.8.16"
367 |   }
368 |  },
369 |  "nbformat": 4,
370 |  "nbformat_minor": 5
371 | }
372 | 


--------------------------------------------------------------------------------
/notebooks/visualization_lifting.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import cv2\n",
 10 |     "import os\n",
 11 |     "import glob\n",
 12 |     "\n",
 13 |     "import numpy as np\n",
 14 |     "import matplotlib.pyplot as plt\n",
 15 |     "\n",
 16 |     "from pathlib import Path\n",
 17 |     "from typing import List, Tuple\n",
 18 |     "from PIL import Image\n",
 19 |     "\n",
 20 |     "from labelmaker.consensus import VALID_LABEL_SPACES\n",
 21 |     "from labelmaker.label_data import get_nyu40, get_scannet200, get_wordnet, get_ade150"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "code",
 26 |    "execution_count": null,
 27 |    "metadata": {},
 28 |    "outputs": [],
 29 |    "source": [
 30 |     "SCENE_ID=47333462\n",
 31 |     "WORKSPACE_DIR = Path(f'/home/weders/scratch/scratch/LabelMaker/arkitscenes/{SCENE_ID}')\n",
 32 |     "# WORKSPACE_DIR = '/scratch/quanta/Experiments/LabelMaker/arkit_test_scene'\n",
 33 |     "LIFTING_DATA = WORKSPACE_DIR / 'intermediate/sdfstudio_preprocessing'\n"
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "code",
 38 |    "execution_count": null,
 39 |    "metadata": {},
 40 |    "outputs": [],
 41 |    "source": [
 42 |     "files = glob.glob(os.path.join(LIFTING_DATA, '*_rgb.png'))\n",
 43 |     "files = sorted(files, key=lambda x: int(x.split('/')[-1].split('_')[0]))"
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "code",
 48 |    "execution_count": null,
 49 |    "metadata": {},
 50 |    "outputs": [],
 51 |    "source": [
 52 |     "for idx, f in enumerate(files):\n",
 53 |     "  if idx > 200:\n",
 54 |     "    break\n",
 55 |     "  \n",
 56 |     "  \n",
 57 |     "  image = Image.open(f)\n",
 58 |     "  image = np.asarray(image)\n",
 59 |     "  \n",
 60 |     "  depth = np.load(f.replace('_rgb.png', '_sensor_depth.npy'))\n",
 61 |     "\n",
 62 |     "  fig, ax = plt.subplots(1, 2)\n",
 63 |     "  ax[0].imshow(image)\n",
 64 |     "  ax[0].set_xticks([])\n",
 65 |     "  ax[0].set_yticks([])\n",
 66 |     "  ax[1].imshow(depth)\n",
 67 |     "  ax[1].set_xticks([])\n",
 68 |     "  ax[1].set_yticks([])\n",
 69 |     "  plt.tight_layout()\n",
 70 |     "  plt.show()"
 71 |    ]
 72 |   },
 73 |   {
 74 |    "cell_type": "code",
 75 |    "execution_count": null,
 76 |    "metadata": {},
 77 |    "outputs": [],
 78 |    "source": []
 79 |   }
 80 |  ],
 81 |  "metadata": {
 82 |   "kernelspec": {
 83 |    "display_name": "labelmaker",
 84 |    "language": "python",
 85 |    "name": "python3"
 86 |   },
 87 |   "language_info": {
 88 |    "codemirror_mode": {
 89 |     "name": "ipython",
 90 |     "version": 3
 91 |    },
 92 |    "file_extension": ".py",
 93 |    "mimetype": "text/x-python",
 94 |    "name": "python",
 95 |    "nbconvert_exporter": "python",
 96 |    "pygments_lexer": "ipython3",
 97 |    "version": "3.9.18"
 98 |   }
 99 |  },
100 |  "nbformat": 4,
101 |  "nbformat_minor": 2
102 | }
103 | 


--------------------------------------------------------------------------------
/notebooks/visualize_arkitscenes.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {
  7 |     "scrolled": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "%load_ext autoreload\n",
 12 |     "%autoreload 2\n",
 13 |     "\n",
 14 |     "import matplotlib.pyplot as plt\n",
 15 |     "import numpy as np\n",
 16 |     "import pandas as pd\n",
 17 |     "import cv2\n",
 18 |     "import os\n",
 19 |     "import sys\n",
 20 |     "sys.path.append('../..')\n",
 21 |     "\n",
 22 |     "from PIL import Image\n",
 23 |     "from segmentation_tools.visualisation import draw_sem_seg, VisImage\n",
 24 |     "from segmentation_tools.label_data import get_ade150, get_nyu40, get_scannet_all, get_wordnet, get_replica\n",
 25 |     "from segmentation_tools.label_mappings import set_ids_according_to_names, \\\n",
 26 |     "        match_scannet_ade150, \\\n",
 27 |     "        match_scannet_nyu40, \\\n",
 28 |     "        match_ade150_nyu40, \\\n",
 29 |     "        match_scannet_wordnet199, \\\n",
 30 |     "        LabelMatcher, \\\n",
 31 |     "        set_colors\n",
 32 |     "from pathlib import Path\n",
 33 |     "import skimage\n",
 34 |     "\n",
 35 |     "# scene 42445991\n",
 36 |     "# frames:\n",
 37 |     "# sdfstudio_path: pred_sdfstudio_2023-08-02_230529\n",
 38 |     "\n",
 39 |     "# scene 42897688\n",
 40 |     "# frames: 0, 74\n",
 41 |     "# sdfstudio_path: pred_sdfstudio_2023-08-02_230607\n",
 42 |     "\n",
 43 |     "scene = 42897688\n",
 44 |     "key = 50\n",
 45 |     "sdfstudio_path = 'pred_sdfstudio_2023-08-02_230607'\n",
 46 |     "\n",
 47 |     "scene_dir = Path(f'/home/weders/scratch/scratch/scannetter/arkit/raw/Validation/{scene}')\n",
 48 |     "img = cv2.imread(f'{scene_dir}/color/{key}.jpg')[..., ::-1]\n",
 49 |     "\n",
 50 |     "# label = cv2.imread(f'{scene_dir}/label-filt/{key}.png',\n",
 51 |     "#                    cv2.IMREAD_UNCHANGED)\n",
 52 |     "# our_label = cv2.imread(f'{scene_dir}/label_agile3d/{key}.png', cv2.IMREAD_UNCHANGED)\n",
 53 |     "\n",
 54 |     "consensus = cv2.imread(f'{scene_dir}/pred_consensus_noscannet_new/{key}.png',\n",
 55 |     "                   cv2.IMREAD_UNCHANGED)\n",
 56 |     "# sdfstudio = cv2.imread(f'{scene_dir}/{sdfstudio_path}/{(key):05d}.png',\n",
 57 |     "#                    cv2.IMREAD_UNCHANGED)\n",
 58 |     "\n",
 59 |     "\n",
 60 |     "\n",
 61 |     "classid2wn = {x['id']: x['name'] for x in get_wordnet(label_key='wn199-merged-v2')}\n",
 62 |     "scannet_id_to_name = {x['id'] : x['name'] for x in get_scannet_all()}\n",
 63 |     "scannet_id_to_color = {x['id'] : x['color'] for x in get_scannet_all()}\n",
 64 |     "\n",
 65 |     "\n",
 66 |     "keys = sorted(\n",
 67 |     "            int(x.name.split('.')[0])\n",
 68 |     "            for x in (scene_dir / 'color_old').iterdir())\n",
 69 |     "label_template = 'label-filt/{k}.png'\n",
 70 |     "label_space = 'id'\n",
 71 |     "\n",
 72 |     "\n",
 73 |     "vis_labelmaker = VisImage(img)\n",
 74 |     "draw_sem_seg(sdfstudio, vis_labelmaker, \n",
 75 |     "             classes=[x['name'] for x in sorted(get_wordnet(), key=lambda x: x['id'])],\n",
 76 |     "             colors=[x['color'] for x in sorted(get_wordnet(), key=lambda x: x['id'])])\n",
 77 |     "\n",
 78 |     "vis_consensus = VisImage(img)\n",
 79 |     "draw_sem_seg(consensus, vis_consensus, \n",
 80 |     "             classes=[x['name'] for x in sorted(get_wordnet(), key=lambda x: x['id'])],\n",
 81 |     "             colors=[x['color'] for x in sorted(get_wordnet(), key=lambda x: x['id'])])\n",
 82 |     "\n",
 83 |     "\n",
 84 |     "_, plots = plt.subplots(1, 2, figsize=(20, 10))\n",
 85 |     "\n",
 86 |     "plots[0].imshow(img)\n",
 87 |     "plots[0].axis('off')\n",
 88 |     "plots[0].set_title('LabelMaker3D')\n",
 89 |     "\n",
 90 |     "\n",
 91 |     "# plots[1].imshow(vis_labelmaker.get_image())\n",
 92 |     "# plots[1].axis('off')\n",
 93 |     "# plots[1].set_title('LabelMaker3D')\n",
 94 |     "\n",
 95 |     "plots[1].imshow(vis_consensus.get_image())\n",
 96 |     "plots[1].axis('off')\n",
 97 |     "plots[1].set_title('Consensus')\n",
 98 |     "plt.show()\n",
 99 |     "\n",
100 |     "\n",
101 |     "plt.figure(figsize=(20, 10))\n",
102 |     "plt.imshow(img)\n",
103 |     "plt.gca().axis('off')\n",
104 |     "plt.show()\n",
105 |     "\n",
106 |     "# plt.figure(figsize=(20, 10))\n",
107 |     "# plt.imshow(vis_labelmaker.get_image())\n",
108 |     "# plt.gca().axis('off')\n",
109 |     "# plt.show()\n",
110 |     "\n",
111 |     "plt.figure(figsize=(20, 10))\n",
112 |     "plt.imshow(vis_consensus.get_image())\n",
113 |     "plt.gca().axis('off')\n",
114 |     "plt.show()\n",
115 |     "\n"
116 |    ]
117 |   },
118 |   {
119 |    "cell_type": "code",
120 |    "execution_count": null,
121 |    "metadata": {
122 |     "scrolled": false
123 |    },
124 |    "outputs": [],
125 |    "source": [
126 |     "# iterate over all frames for scene\n",
127 |     "\n",
128 |     "scene = '42897688'\n",
129 |     "scene_dir = Path(\n",
130 |     "    f'/home/weders/scratch/scratch/scannetter/arkit/raw/Validation/{scene}')\n",
131 |     "# sdfstudio_pred = 'pred_sdfstudio_2023-08-02_230529'\n",
132 |     "# sdfstudio_pred = 'pred_sdfstudio_2023-08-02_230607'\n",
133 |     "keys = sorted([\n",
134 |     "    int(k.split('/')[-1].replace('.jpg', ''))\n",
135 |     "    for k in os.listdir((scene_dir / 'color'))\n",
136 |     "])\n",
137 |     "# keys_rendering = sorted([int(k.split('/')[-1].replace('.png', '')) for k in os.listdir((scene_dir / sdfstudio_pred))])\n",
138 |     "\n",
139 |     "rgb_keys = sorted([\n",
140 |     "    round(float(k.split('_')[-1].replace('.png', '')), 3)\n",
141 |     "    for k in os.listdir((scene_dir / 'vga_wide'))\n",
142 |     "])\n",
143 |     "depth_keys = sorted([\n",
144 |     "    round(float(k.split('_')[-1].replace('.png', '')), 3)\n",
145 |     "    for k in os.listdir((scene_dir / 'highres_depth'))\n",
146 |     "])\n",
147 |     "print(depth_keys)\n",
148 |     "\n",
149 |     "\n",
150 |     "def get_rgb_key(depth_key):\n",
151 |     "  for cj, k in enumerate(rgb_keys):\n",
152 |     "    if k >= depth_key:\n",
153 |     "      key_before = rgb_keys[cj - 1]\n",
154 |     "      key_after = k\n",
155 |     "      break\n",
156 |     "\n",
157 |     "  delta_before = abs(key_before - depth_key)\n",
158 |     "  delta_after = abs(key_after - depth_key)\n",
159 |     "\n",
160 |     "  if delta_before >= delta_after:\n",
161 |     "    return key_after\n",
162 |     "  else:\n",
163 |     "    return key_before\n",
164 |     "\n",
165 |     "\n",
166 |     "for idx, key in enumerate(keys):\n",
167 |     "  print(idx, key)\n",
168 |     "\n",
169 |     "  # rgb_key = get_rgb_key(depth_keys[idx])\n",
170 |     "  # print(round(rgb_key, 3))\n",
171 |     "  if idx % 2 != 0:\n",
172 |     "    continue\n",
173 |     "\n",
174 |     "#     # print(f'{scene}_{rgb_key:.3f}.jpg')\n",
175 |     "#     key_rendering = keys_rendering[idx // 2]\n",
176 |     "\n",
177 |     "  img = cv2.imread(f'{scene_dir}/color/{key}.jpg')[..., ::-1]\n",
178 |     "  # img = cv2.imread(f'{scene_dir}/vga_wide/{scene}_{rgb_key:.3f}.png')[..., ::-1]\n",
179 |     "\n",
180 |     "  #     label = cv2.imread(f'{scene_dir}/label-filt/{key}.png',\n",
181 |     "  #                        cv2.IMREAD_UNCHANGED)\n",
182 |     "  #     our_label = cv2.imread(f'{scene_dir}/label_agile3d/{key}.png', cv2.IMREAD_UNCHANGED)\n",
183 |     "\n",
184 |     "  consensus = cv2.imread(f'{scene_dir}/pred_consensus_noscannet_new/{key}.png',\n",
185 |     "                         cv2.IMREAD_UNCHANGED)\n",
186 |     "  #     sdfstudio = cv2.imread(f'{scene_dir}/{sdfstudio_pred}/{(key):05d}.png',\n",
187 |     "  #                        cv2.IMREAD_UNCHANGED)\n",
188 |     "\n",
189 |     "  classid2wn = {x['id']: x['name'] for x in get_wordnet()}\n",
190 |     "  scannet_id_to_name = {x['id']: x['name'] for x in get_scannet_all()}\n",
191 |     "  scannet_id_to_color = {x['id']: x['color'] for x in get_scannet_all()}\n",
192 |     "\n",
193 |     "  classes = [\n",
194 |     "      x['name'] for x in sorted(get_wordnet(label_key='wn199-merged-v2'),\n",
195 |     "                                key=lambda x: x['id'])\n",
196 |     "  ]\n",
197 |     "\n",
198 |     "  keys = sorted(\n",
199 |     "      int(x.name.split('.')[0]) for x in (scene_dir / 'color').iterdir())\n",
200 |     "  label_template = 'label-filt/{k}.png'\n",
201 |     "  label_space = 'id'\n",
202 |     "  #     plt.figure(figsize=(20, 10))\n",
203 |     "  #     plt.imshow(img)\n",
204 |     "  #     plt.gca().axis('off')\n",
205 |     "  #     plt.show()\n",
206 |     "  _, plots = plt.subplots(1, 3, figsize=(40, 10))\n",
207 |     "\n",
208 |     "  plots[0].imshow(img)\n",
209 |     "  plots[0].axis('off')\n",
210 |     "  plots[0].set_title('Image')\n",
211 |     "\n",
212 |     "  #     vis = VisImage(img)\n",
213 |     "  #     draw_sem_seg(our_label, vis,\n",
214 |     "  #                  classes={x['id']: x['name'] for x in sorted(get_wordnet(label_key='wn199-merged-v2'), key=lambda x: x['id'])},\n",
215 |     "  #                  colors=[x['color'] for x in sorted(get_wordnet(label_key='wn199-merged-v2'), key=lambda x: x['id'])])\n",
216 |     "  #     plots[1].imshow(vis.get_image())\n",
217 |     "  #     plots[1].axis('off')\n",
218 |     "  #     plots[1].set_title('Ground Truth')\n",
219 |     "  #     vis = VisImage(img)\n",
220 |     "  #     draw_sem_seg(label, vis,\n",
221 |     "  #                  classes=[scannet_id_to_name[i] if i in scannet_id_to_name else 'unknown' for i in range(2000)],\n",
222 |     "  #                  colors=[scannet_id_to_color[i] if i in scannet_id_to_name else [0, 0, 0] for i in range(2000)],)\n",
223 |     "  #     plots[2].imshow(vis.get_image())\n",
224 |     "  #     plots[2].axis('off')\n",
225 |     "  #     plots[2].set_title('ScanNet')\n",
226 |     "  vis = VisImage(img)\n",
227 |     "  draw_sem_seg(consensus,\n",
228 |     "               vis,\n",
229 |     "               classes=[\n",
230 |     "                   x['name']\n",
231 |     "                   for x in sorted(get_wordnet(label_key='wn199-merged-v2'),\n",
232 |     "                                   key=lambda x: x['id'])\n",
233 |     "               ],\n",
234 |     "               colors=[\n",
235 |     "                   x['color']\n",
236 |     "                   for x in sorted(get_wordnet(label_key='wn199-merged-v2'),\n",
237 |     "                                   key=lambda x: x['id'])\n",
238 |     "               ])\n",
239 |     "  plots[1].imshow(vis.get_image())\n",
240 |     "  plots[1].axis('off')\n",
241 |     "  plots[1].set_title('LabelMaker3D')\n",
242 |     "  vis = VisImage(img)\n",
243 |     "  # draw_sem_seg(consensus, vis,\n",
244 |     "  #             classes=[x['name'] for x in sorted(get_wordnet(label_key='wn199-merged-v2'), key=lambda x: x['id'])],\n",
245 |     "  #             colors=[x['color'] for x in sorted(get_wordnet(label_key='wn199-merged-v2'), key=lambda x: x['id'])])\n",
246 |     "  #plots[2].imshow(vis.get_image())\n",
247 |     "  #plots[2].axis('off')\n",
248 |     "  #plots[2].set_title('Consensus')\n",
249 |     "  #plt.tight_layout()\n",
250 |     "  plt.show()\n"
251 |    ]
252 |   },
253 |   {
254 |    "cell_type": "code",
255 |    "execution_count": null,
256 |    "metadata": {
257 |     "scrolled": true
258 |    },
259 |    "outputs": [],
260 |    "source": [
261 |     "# colorize labelmaker arkit pointcloud\n",
262 |     "# iterate over all frames for scene\n",
263 |     "\n",
264 |     "import open3d as o3d\n",
265 |     "\n",
266 |     "scenes = ['42445991', '42446527', '42897688']\n",
267 |     "os.makedirs('colored_meshes', exist_ok=True)\n",
268 |     "for sc in scenes:\n",
269 |     "  scene_dir = Path(\n",
270 |     "      f'/home/weders/scratch/scratch/scannetter/arkit/raw/Validation/{sc}')\n",
271 |     "\n",
272 |     "  label_file = next(\n",
273 |     "      iter([f for f in scene_dir.iterdir() if 'labels_3d' in str(f)]))\n",
274 |     "  mesh_file = next(iter([f for f in scene_dir.iterdir() if '.ply' in str(f)]))\n",
275 |     "  mesh = o3d.io.read_triangle_mesh(str(mesh_file))\n",
276 |     "\n",
277 |     "  labels_3d = np.loadtxt(label_file)\n",
278 |     "  mesh_colors = np.asarray(mesh.vertex_colors)\n",
279 |     "  colors = np.zeros_like(mesh_colors)\n",
280 |     "\n",
281 |     "  id_to_color = {\n",
282 |     "      x['id']: x['color'] for x in sorted(get_wordnet(), key=lambda x: x['id'])\n",
283 |     "  }\n",
284 |     "\n",
285 |     "  for l in np.unique(labels_3d):\n",
286 |     "    colors[labels_3d == l] = id_to_color[int(l)]\n",
287 |     "\n",
288 |     "  colors = colors / 255.\n",
289 |     "  mesh.vertex_colors = o3d.utility.Vector3dVector(colors)\n",
290 |     "  o3d.io.write_triangle_mesh(f'colored_meshes/{sc}_label_color.ply', mesh)\n"
291 |    ]
292 |   },
293 |   {
294 |    "cell_type": "code",
295 |    "execution_count": null,
296 |    "metadata": {},
297 |    "outputs": [],
298 |    "source": [
299 |     "# colorize mask3d meshes\n",
300 |     "scenes = ['42445991', '42446527', '42897688']\n",
301 |     "\n",
302 |     "for sc in scenes:\n",
303 |     "  scene_dir = Path(\n",
304 |     "      f'/home/weders/scratch/scratch/scannetter/arkit/raw/Validation/{sc}')\n",
305 |     "  mask3d_dir = scene_dir / 'pred_mask3d'\n",
306 |     "\n",
307 |     "  pred_file = next(\n",
308 |     "      iter([f for f in mask3d_dir.iterdir() if '3dod_mesh.txt' in str(f)]))\n",
309 |     "\n",
310 |     "  mask_paths = []\n",
311 |     "  confidences = []\n",
312 |     "  classes = []\n",
313 |     "\n",
314 |     "  with open(pred_file, 'r') as file:\n",
315 |     "    for line in file:\n",
316 |     "      m_file, l, c = line.rstrip().split(' ')\n",
317 |     "      mask_paths.append(m_file)\n",
318 |     "      classes.append(int(l))\n",
319 |     "      confidences.append(float(c))\n",
320 |     "\n",
321 |     "  sorting_indices = np.argsort(np.asarray(confidences))[::-1]\n",
322 |     "\n",
323 |     "  mesh_file = next(iter([f for f in scene_dir.iterdir() if '.ply' in str(f)]))\n",
324 |     "  mesh = o3d.io.read_triangle_mesh(str(mesh_file))\n",
325 |     "  mesh_colors = np.asarray(mesh.vertex_colors)\n",
326 |     "  colors = np.zeros_like(mesh_colors)\n",
327 |     "  colored_mask = np.zeros_like(colors[:, 0])\n",
328 |     "  id_to_color = {\n",
329 |     "      x['id']: x['color'] for x in sorted(get_wordnet(), key=lambda x: x['id'])\n",
330 |     "  }\n",
331 |     "\n",
332 |     "  for idx in sorting_indices:\n",
333 |     "    m = np.loadtxt(mask3d_dir / mask_paths[idx])\n",
334 |     "    l = classes[idx]\n",
335 |     "\n",
336 |     "    m = (m == 1) & (colored_mask == 0)\n",
337 |     "    colored_mask[m] = 1\n",
338 |     "\n",
339 |     "    colors[m] = scannet_id_to_color[l]\n",
340 |     "\n",
341 |     "  colors = colors / 255.\n",
342 |     "  mesh.vertex_colors = o3d.utility.Vector3dVector(colors)\n",
343 |     "  o3d.io.write_triangle_mesh(f'colored_meshes/{sc}_mask3d_color.ply', mesh)\n"
344 |    ]
345 |   },
346 |   {
347 |    "cell_type": "code",
348 |    "execution_count": null,
349 |    "metadata": {},
350 |    "outputs": [],
351 |    "source": []
352 |   }
353 |  ],
354 |  "metadata": {
355 |   "kernelspec": {
356 |    "display_name": "scannetter",
357 |    "language": "python",
358 |    "name": "scannetter"
359 |   },
360 |   "language_info": {
361 |    "codemirror_mode": {
362 |     "name": "ipython",
363 |     "version": 3
364 |    },
365 |    "file_extension": ".py",
366 |    "mimetype": "text/x-python",
367 |    "name": "python",
368 |    "nbconvert_exporter": "python",
369 |    "pygments_lexer": "ipython3",
370 |    "version": "3.8.16"
371 |   },
372 |   "vscode": {
373 |    "interpreter": {
374 |     "hash": "916dbcbb3f70747c44a77c7bcd40155683ae19c65e1c03b4aa3499c5328201f1"
375 |    }
376 |   }
377 |  },
378 |  "nbformat": 4,
379 |  "nbformat_minor": 2
380 | }
381 | 


--------------------------------------------------------------------------------
/scripts/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cvg/LabelMaker/44ff92d42ae9b0579f016bb7bc5cd4bd09a613a3/scripts/__init__.py


--------------------------------------------------------------------------------
/scripts/arkitscenes2labelmaker.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import json
  3 | import logging
  4 | import os
  5 | import shutil
  6 | import sys
  7 | from os.path import abspath, dirname, exists, join
  8 | 
  9 | import cv2
 10 | import gin
 11 | import numpy as np
 12 | from PIL import Image
 13 | from scipy.interpolate import CubicSpline
 14 | from scipy.spatial.transform import Rotation, RotationSpline
 15 | from tqdm import trange
 16 | 
 17 | sys.path.append(abspath(join(dirname(__file__), '..')))
 18 | from utils_3d import fuse_mesh
 19 | 
 20 | 
 21 | def get_closest_timestamp(reference_timestamps: np.ndarray,
 22 |                           target_timestamps: np.ndarray):
 23 |   """
 24 |   This function returns:
 25 |     min_time_delta: for each time in reference_timetamps, the minimum time difference (dt) w.r.t target_timestamps
 26 |     target_index: the index of element in target_timestamps that gives minimum dt
 27 |     minimum_margin: the time difference of minimum timestamps and second minimum, used for checking uniqueness of minima
 28 |   """
 29 |   time_delta = np.abs(
 30 |       reference_timestamps.reshape(-1, 1) - target_timestamps.reshape(1, -1))
 31 | 
 32 |   min_two_idx = time_delta.argsort(axis=1)[:, :2]
 33 |   target_index = min_two_idx[:, 0]
 34 |   min_time_delta = time_delta[np.arange(target_index.shape[0]), target_index]
 35 |   minimum_margin = time_delta[np.arange(target_index.shape[0]),
 36 |                               min_two_idx[:, 1]] - min_time_delta
 37 | 
 38 |   return min_time_delta, target_index, minimum_margin
 39 | 
 40 | 
 41 | def load_intrinsics(file):
 42 |   # as define here https://github.com/apple/ARKitScenes/blob/951af73d20406acf608061c16774f770c61b1405/threedod/benchmark_scripts/utils/tenFpsDataLoader.py#L46
 43 |   w, h, fx, fy, hw, hh = np.loadtxt(file)
 44 |   return np.asarray([[fx, 0, hw], [0, fy, hh], [0, 0, 1]])
 45 | 
 46 | 
 47 | @gin.configurable
 48 | def process_arkit(
 49 |     scan_dir: str,
 50 |     target_dir: str,
 51 |     sdf_trunc: float,
 52 |     voxel_length: float,
 53 |     depth_trunc: float,
 54 | ):
 55 | 
 56 |   logger = logging.getLogger('ARKitProcess')
 57 |   logger.setLevel(logging.DEBUG)
 58 |   consoleHeader = logging.StreamHandler()
 59 |   formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
 60 |   consoleHeader.setFormatter(formatter)
 61 |   logger.addHandler(consoleHeader)
 62 | 
 63 |   logger.info(
 64 |       "Processing ARKitScene scan to LabelMaker format, from {} to {}...".
 65 |       format(scan_dir, target_dir))
 66 | 
 67 |   color_dir = join(scan_dir, 'vga_wide')
 68 |   intrinsic_dir = join(scan_dir, 'vga_wide_intrinsics')
 69 | 
 70 |   depth_dir = join(scan_dir, 'lowres_depth')
 71 |   confidence_dir = join(scan_dir, 'confidence')
 72 | 
 73 |   trajectory_file = join(scan_dir, 'lowres_wide.traj')
 74 | 
 75 |   assert exists(color_dir), "vga_wide attribute not downloaded!"
 76 |   assert exists(depth_dir), "lowres_depth attribute not downloaded!"
 77 |   assert exists(confidence_dir), "confidence attribute not downloaded!"
 78 |   assert exists(intrinsic_dir), "vga_wide_intrinsics attribute not downloaded!"
 79 |   assert exists(trajectory_file), "lowres_wide.traj attribute not downloaded!"
 80 | 
 81 |   color_file_list = os.listdir(color_dir)
 82 |   depth_file_list = os.listdir(depth_dir)
 83 |   confidence_file_list = os.listdir(confidence_dir)
 84 |   intr_file_list = os.listdir(intrinsic_dir)
 85 | 
 86 |   # ts stands for timestamps, inv stands for inverse
 87 |   color_ts, color_inv = np.unique(
 88 |       np.array([
 89 |           float(name.split('_')[1].split('.png')[0]) for name in color_file_list
 90 |       ]),
 91 |       return_index=True,
 92 |   )
 93 |   depth_ts, depth_inv = np.unique(
 94 |       np.array([
 95 |           float(name.split('_')[1].split('.png')[0]) for name in depth_file_list
 96 |       ]),
 97 |       return_index=True,
 98 |   )
 99 |   confidence_ts, confidence_inv = np.unique(
100 |       np.array([
101 |           float(name.split('_')[1].split('.png')[0])
102 |           for name in confidence_file_list
103 |       ]),
104 |       return_index=True,
105 |   )
106 |   intrinsic_ts, intrinsic_inv = np.unique(
107 |       np.array([
108 |           float(name.split('_')[1].split('.pincam')[0])
109 |           for name in intr_file_list
110 |       ]),
111 |       return_index=True,
112 |   )
113 | 
114 |   # load trajactory
115 |   trajectory_data = np.loadtxt(trajectory_file, delimiter=' ')
116 |   trajectory_ts = trajectory_data[:, 0]  # already sorted
117 | 
118 |   # synchronization
119 |   logger.info("Synchronizing timestamps...")
120 |   dt_max = 1 / 60 / 2  # half of frame time step
121 | 
122 |   # we compare all with respect to color, as color folder is sparser
123 |   # if the matched timestamp and second matched timestamp have difference less than 1 milisecond,
124 |   # we regard this case as the matching is not unique, and throw a warning.
125 |   margin_threshold = 1e-3
126 |   depth_dt, depth_idx, depth_margin = get_closest_timestamp(color_ts, depth_ts)
127 |   if depth_margin.min() < margin_threshold:
128 |     logger.warn(
129 |         "Found multiple color timestamps matching in timestamps: {}".format(
130 |             color_ts[depth_margin < margin_threshold].tolist()))
131 | 
132 |   confidence_dt, confidence_idx, confidence_margin = get_closest_timestamp(
133 |       color_ts, confidence_ts)
134 |   if confidence_margin.min() < margin_threshold:
135 |     logger.warn(
136 |         "Found multiple confidence timestamps matching in timestamps: {}".
137 |         format(color_ts[confidence_margin < margin_threshold].tolist()))
138 | 
139 |   intrinsic_dt, intrinsic_idx, intrinsic_margin = get_closest_timestamp(
140 |       color_ts, intrinsic_ts)
141 |   if intrinsic_margin.min() < margin_threshold:
142 |     logger.warn(
143 |         "Found multiple intrinsic timestamps matching in timestamps: {}".format(
144 |             color_ts[intrinsic_margin < margin_threshold].tolist()))
145 | 
146 |   color_idx = np.arange(color_ts.shape[0])
147 | 
148 |   # we also want to interpolate pose, so we have to filter out times outside trajectory timestamp
149 |   timestamp_filter = (depth_dt < dt_max) * (confidence_dt < dt_max) * (
150 |       intrinsic_dt < dt_max) * (color_ts >= trajectory_ts.min()) * (
151 |           color_ts <= trajectory_ts.max())
152 | 
153 |   timestamp = color_ts[timestamp_filter]
154 |   logger.info("Synchronization finished!")
155 | 
156 |   if depth_dt[timestamp_filter].max(
157 |   ) > 1e-8 or confidence_dt[timestamp_filter].max(
158 |   ) > 1e-8 or intrinsic_dt[timestamp_filter].max() > 1e-8:
159 | 
160 |     depth_unmatched = depth_dt[timestamp_filter].max() > 1e-8
161 |     intrinsic_unmatched = intrinsic_dt[timestamp_filter].max() > 1e-8
162 |     confidence_unmatched = confidence_dt[timestamp_filter].max() > 1e-8
163 | 
164 |     unmatched_timestamp = timestamp[depth_unmatched + intrinsic_unmatched +
165 |                                     confidence_unmatched].tolist()
166 |     logger.info("There are not perfectly matched timestamps: {}".format(
167 |         unmatched_timestamp))
168 | 
169 |   # interpolate pose
170 |   logger.info("Interpolating poses...")
171 |   rots = Rotation.from_rotvec(trajectory_data[:, 1:4])
172 |   rot_spline = RotationSpline(trajectory_ts, rots)
173 | 
174 |   x_spline = CubicSpline(trajectory_ts, trajectory_data[:, 4])
175 |   y_spline = CubicSpline(trajectory_ts, trajectory_data[:, 5])
176 |   z_spline = CubicSpline(trajectory_ts, trajectory_data[:, 6])
177 | 
178 |   num_frame = timestamp_filter.sum()
179 | 
180 |   extrinsics_mat = np.zeros(shape=(num_frame, 4, 4))
181 |   extrinsics_mat[:, 3, 3] = 1.0
182 |   extrinsics_mat[:, :3, :3] = rot_spline(timestamp).as_matrix()
183 |   extrinsics_mat[:, :3, 3] = np.stack(
184 |       [x_spline(timestamp),
185 |        y_spline(timestamp),
186 |        z_spline(timestamp)], axis=1)
187 |   pose_mat = np.linalg.inv(extrinsics_mat)
188 |   logger.info("Pose interpolation finished!")
189 | 
190 |   # get correspondence to original file
191 |   rows = []
192 |   for i in range(num_frame):
193 |     frame_id = '{:06d}'.format(i)
194 |     color_pth = color_file_list[color_inv[color_idx[timestamp_filter][i]]]
195 |     depth_pth = depth_file_list[depth_inv[depth_idx[timestamp_filter][i]]]
196 |     confdc_pth = confidence_file_list[confidence_inv[
197 |         confidence_idx[timestamp_filter][i]]]
198 |     intr_pth = intr_file_list[intrinsic_inv[intrinsic_idx[timestamp_filter][i]]]
199 |     rows.append([frame_id, color_pth, depth_pth, confdc_pth, intr_pth])
200 | 
201 |   # write to new file
202 |   shutil.rmtree(target_dir, ignore_errors=True)
203 |   os.makedirs(target_dir, exist_ok=True)
204 |   os.makedirs(join(target_dir, 'color'), exist_ok=True)
205 |   os.makedirs(join(target_dir, 'depth'), exist_ok=True)
206 |   os.makedirs(join(target_dir, 'intrinsic'), exist_ok=True)
207 |   os.makedirs(join(target_dir, 'pose'), exist_ok=True)
208 | 
209 |   # first write correspondence list
210 |   fields = [
211 |       'frame_id', 'original_color_path', 'original_depth_path',
212 |       'original_confidence_path', 'original_intrinsic_path'
213 |   ]
214 |   correspondence_list = [dict(zip(fields, row)) for row in rows]
215 |   json_object = json.dumps(correspondence_list, indent=4)
216 |   with open(join(target_dir, 'correspondence.json'), 'w') as jsonfile:
217 |     jsonfile.write(json_object)
218 |   logger.info("Saved old and new files correspondence to {}.".format(
219 |       join(target_dir, 'correspondence.json')))
220 | 
221 |   logger.info("Transfering files...")
222 |   for idx in trange(num_frame):
223 |     frame_id, color_pth, depth_pth, confdc_pth, intr_pth = rows[idx]
224 | 
225 |     # save color
226 |     tgt_color_pth = join(target_dir, 'color',
227 |                          frame_id + '.jpg')  # png -> jpg, compressed
228 |     color_img = Image.open(join(color_dir, color_pth))
229 |     color_img.save(tgt_color_pth)
230 |     h, w, _ = np.asarray(color_img).shape
231 | 
232 |     # save pose
233 |     tgt_pose_pth = join(target_dir, 'pose', frame_id + '.txt')
234 |     np.savetxt(tgt_pose_pth, pose_mat[idx])
235 | 
236 |     # process and save intr
237 |     tgt_intrinsic_pth = join(target_dir, 'intrinsic', frame_id + '.txt')
238 |     np.savetxt(tgt_intrinsic_pth, load_intrinsics(join(intrinsic_dir,
239 |                                                        intr_pth)))
240 | 
241 |     # process and save depth
242 |     depth = cv2.imread(join(depth_dir, depth_pth), cv2.IMREAD_UNCHANGED)
243 |     confdc = cv2.imread(join(confidence_dir, confdc_pth), cv2.IMREAD_UNCHANGED)
244 | 
245 |     depth[confdc < 2] = 0
246 |     depth = cv2.resize(depth, (w, h), interpolation=cv2.INTER_NEAREST)
247 | 
248 |     tgt_depth_pth = join(target_dir, 'depth', frame_id + '.png')
249 |     cv2.imwrite(tgt_depth_pth, depth)
250 | 
251 |   logger.info("File transfer finished!")
252 | 
253 |   logger.info("Fusing RGBD images into TSDF Volmue...")
254 |   fuse_mesh(
255 |       scan_dir=target_dir,
256 |       sdf_trunc=sdf_trunc,
257 |       voxel_length=voxel_length,
258 |       depth_trunc=depth_trunc,
259 |       depth_scale=1000.0,
260 |   )  # depth_scale is a fixed value in ARKitScene, no need to pass an argument in cli
261 |   logger.info("Fusion finished! Saving to file as {}".format(
262 |       join(target_dir, 'mesh.ply')))
263 | 
264 | 
265 | def arg_parser():
266 |   parser = argparse.ArgumentParser()
267 |   parser.add_argument("--scan_dir", type=str)
268 |   parser.add_argument("--target_dir", type=str)
269 |   parser.add_argument("--sdf_trunc", type=float, default=0.04)
270 |   parser.add_argument("--voxel_length", type=float, default=0.008)
271 |   parser.add_argument("--depth_trunc", type=float, default=3.0)
272 |   parser.add_argument('--config', help='Name of config file')
273 | 
274 |   return parser.parse_args()
275 | 
276 | 
277 | if __name__ == "__main__":
278 |   args = arg_parser()
279 |   if args.config is not None:
280 |     gin.parse_config_file(args.config)
281 |   process_arkit(
282 |       scan_dir=args.scan_dir,
283 |       target_dir=args.target_dir,
284 |       sdf_trunc=args.sdf_trunc,
285 |       voxel_length=args.voxel_length,
286 |       depth_trunc=args.depth_trunc,
287 |   )
288 | 


--------------------------------------------------------------------------------
/scripts/pipeline.sh:
--------------------------------------------------------------------------------
 1 | # this code includes running the whole labelmaker pipeline, including preprocessing from arkitscene to our custom format, run all individual models and then concensus and then 2D and 3D lifting.
 2 | # downloading is not included
 3 | # this bash file is not meant for modification into other dataset, needs further modification
 4 | # nor is it capable of tuning configuration, yet
 5 | env_name=labelmaker
 6 | eval "$(conda shell.bash hook)"
 7 | conda activate $env_name
 8 | 
 9 | conda_home="$(conda info | grep "active env location : " | cut -d ":" -f2-)"
10 | conda_home="${conda_home#"${conda_home%%[![:space:]]*}"}"
11 | 
12 | echo $conda_home
13 | 
14 | which python
15 | which pip
16 | which nvcc
17 | 
18 | # add cuda compiler to path
19 | export CUDA_HOST_COMPILER="$conda_home/bin/gcc"
20 | export CUDA_PATH="$conda_home"
21 | export CUDA_HOME=$CUDA_PATH
22 | export LD_LIBRARY_PATH=$conda_home/lib:$LD_LIBRARY_PATH
23 | export LIBRARY_PATH="$conda_home/lib/stubs:$LIBRARY_PATH"
24 | export TCNN_CUDA_ARCHITECTURES=75
25 | 
26 | if [ -z "$1" ]; then
27 |   echo "No target directory specified!"
28 |   exit 1
29 | else
30 |   target_dir=$1
31 | fi
32 | 
33 | # extract mask3D
34 | python models/mask3d_inst.py \
35 |   --seed 42 \
36 |   --workspace ${target_dir}
37 | 
38 | python models/mask3d_inst.py \
39 |   --seed 43 \
40 |   --output intermediate/scannet200_mask3d_2 \
41 |   --workspace ${target_dir}
42 | 
43 | # extract omnidata normal
44 | python models/omnidata_normal.py \
45 |   --workspace ${target_dir}
46 | 
47 | python models/omnidata_depth.py \
48 |   --workspace ${target_dir}
49 | 
50 | # extract hha depth, higher jobs may lead to failure
51 | python models/hha_depth.py \
52 |   --n_jobs 4 \
53 |   --workspace ${target_dir}
54 | 
55 | # cmx
56 | python models/cmx.py \
57 |   --workspace ${target_dir}
58 | 
59 | python models/cmx.py --flip \
60 |   --workspace ${target_dir}
61 | 
62 | # internimage
63 | python models/internimage.py \
64 |   --workspace ${target_dir}
65 | 
66 | python models/internimage.py --flip \
67 |   --workspace ${target_dir}
68 | 
69 | # grounded sam
70 | python models/grounded_sam.py \
71 |   --workspace ${target_dir}
72 | 
73 | python models/grounded_sam.py --flip \
74 |   --workspace ${target_dir}
75 | 
76 | # ovseg
77 | python models/ovseg.py \
78 |   --workspace ${target_dir}
79 | 
80 | python models/ovseg.py --flip \
81 |   --workspace ${target_dir}
82 | 
83 | # consensus
84 | python labelmaker/consensus.py \
85 |   --workspace ${target_dir} --n_jobs 8
86 | 
87 | # point lifting
88 | python labelmaker/lifting_3d/lifting_points.py \
89 |   --workspace ${target_dir}
90 | 
91 | conda deactivate
92 | 
93 | # 3D lifting, mesh extracting, and rendering
94 | bash labelmaker/lifting_3d/lifting.sh ${target_dir}
95 | 


--------------------------------------------------------------------------------
/scripts/pipeline_arkit.sh:
--------------------------------------------------------------------------------
 1 | set -e
 2 | 
 3 | env_name=labelmaker
 4 | eval "$(conda shell.bash hook)"
 5 | conda activate $env_name
 6 | 
 7 | if [ -z "$1" ]; then
 8 |   echo "No ARKitScene directory specified!"
 9 |   exit 1
10 | else
11 |   original_dir=$1
12 | fi
13 | 
14 | if [ -z "$2" ]; then
15 |   echo "No target directory specified!"
16 |   exit 1
17 | else
18 |   target_dir=$2
19 | fi
20 | 
21 | # preprocessing
22 | python scripts/arkitscenes2labelmaker.py \
23 |   --scan_dir ${original_dir} \
24 |   --target_dir ${target_dir}
25 | 
26 | # now run pipeline.sh
27 | scripts/pipeline.sh ${target_dir}
28 | 
29 | 


--------------------------------------------------------------------------------
/scripts/replica2labelmaker.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import json
  3 | import logging
  4 | import math
  5 | import os
  6 | import shutil
  7 | import sys
  8 | from os.path import abspath, dirname, exists, join
  9 | from pathlib import Path
 10 | 
 11 | import cv2
 12 | import gin
 13 | import numpy as np
 14 | from PIL import Image
 15 | from scipy.interpolate import CubicSpline
 16 | from scipy.spatial.transform import Rotation, RotationSpline
 17 | from tqdm import trange
 18 | 
 19 | sys.path.append(abspath(join(dirname(__file__), '..')))
 20 | from utils_3d import fuse_mesh
 21 | 
 22 | 
 23 | def get_intrinsics(H: int, W: int):
 24 |   # defined here: https://github.com/Harry-Zhi/semantic_nerf/blob/b79f9c3640b62350e9c167a66c273c2121428ce1/SSR/training/trainer.py#L55C1-L55C1
 25 |   # replica use a 90 degree fov camera
 26 | 
 27 |   hfov = 90
 28 | 
 29 |   fx = W / 2.0 / math.tan(math.radians(hfov / 2.0))
 30 |   fy = fx
 31 |   cx = (W - 1.0) / 2.0
 32 |   cy = (H - 1.0) / 2.0
 33 | 
 34 |   return np.asarray([[fx, 0, cx], [0, fy, cy], [0, 0, 1]])
 35 | 
 36 | 
 37 | @gin.configurable
 38 | def process_replica(
 39 |     scan_dir: str,
 40 |     target_dir: str,
 41 |     sdf_trunc: float,
 42 |     voxel_length: float,
 43 |     depth_trunc: float,
 44 | ):
 45 |   logger = logging.getLogger('Replica Process')
 46 |   logger.setLevel(logging.DEBUG)
 47 |   consoleHeader = logging.StreamHandler()
 48 |   formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
 49 |   consoleHeader.setFormatter(formatter)
 50 |   logger.addHandler(consoleHeader)
 51 | 
 52 |   logger.info(
 53 |       "Processing Replica scan to LabelMaker format, from {} to {}...".format(
 54 |           scan_dir, target_dir))
 55 | 
 56 |   color_dir = join(scan_dir, 'rgb')
 57 |   depth_dir = join(scan_dir, 'depth')
 58 |   label_dir = join(scan_dir, 'semantic_class')
 59 |   pose_file = join(scan_dir, 'traj_w_c.txt')
 60 | 
 61 |   assert exists(color_dir), "rgb attribute not downloaded!"
 62 |   assert exists(depth_dir), "depth attribute not downloaded!"
 63 |   assert exists(label_dir), "semantic_class attribute not downloaded!"
 64 |   assert exists(pose_file), "traj_w_c.txt attribute not downloaded!"
 65 | 
 66 |   color_file_list = os.listdir(color_dir)
 67 |   depth_file_list = os.listdir(depth_dir)
 68 |   label_file_list = Path(label_dir).glob('semantic_class_*.png')
 69 |   label_file_list = [str(pth) for pth in label_file_list]
 70 | 
 71 |   # get file name idx
 72 |   color_idx, color_inv = np.unique(
 73 |       np.array([
 74 |           int(name.split('_')[1].split('.png')[0]) for name in color_file_list
 75 |       ]),
 76 |       return_index=True,
 77 |   )
 78 |   depth_idx, depth_inv = np.unique(
 79 |       np.array([
 80 |           int(name.split('_')[1].split('.png')[0]) for name in depth_file_list
 81 |       ]),
 82 |       return_index=True,
 83 |   )
 84 |   label_idx, label_inv = np.unique(
 85 |       np.array([
 86 |           int(name.split('_')[-1].split('.png')[0]) for name in label_file_list
 87 |       ]),
 88 |       return_index=True,
 89 |   )
 90 | 
 91 |   # load poses
 92 |   poses = np.loadtxt(pose_file, delimiter=' ').reshape(-1, 4, 4)
 93 | 
 94 |   # check if indexes are the same and the number is the same as poses
 95 |   assert (color_idx == depth_idx).all() and (
 96 |       color_idx == label_idx).all() and color_idx.shape[0] == poses.shape[0]
 97 | 
 98 |   # get correspondence to original file
 99 |   num_frame = int(poses.shape[0])
100 |   rows = []
101 |   for i in range(num_frame):
102 |     frame_id = '{:06d}'.format(i)
103 |     color_pth = color_file_list[color_inv[i]]
104 |     depth_pth = depth_file_list[depth_inv[i]]
105 |     label_pth = label_file_list[label_inv[i]]
106 |     rows.append([frame_id, color_pth, depth_pth, label_pth])
107 | 
108 |   # write to new file
109 |   shutil.rmtree(target_dir, ignore_errors=True)
110 |   os.makedirs(target_dir, exist_ok=True)
111 |   os.makedirs(join(target_dir, 'color'), exist_ok=True)
112 |   os.makedirs(join(target_dir, 'depth'), exist_ok=True)
113 |   os.makedirs(join(target_dir, 'intrinsic'), exist_ok=True)
114 |   os.makedirs(join(target_dir, 'pose'), exist_ok=True)
115 |   os.makedirs(join(target_dir, 'gt_label'), exist_ok=True)
116 | 
117 |   # first write correspondence list
118 |   fields = [
119 |       'frame_id',
120 |       'original_color_path',
121 |       'original_depth_path',
122 |       'original_ground_truth_label_path',
123 |   ]
124 |   correspondence_list = [dict(zip(fields, row)) for row in rows]
125 |   json_object = json.dumps(correspondence_list, indent=4)
126 |   with open(join(target_dir, 'correspondence.json'), 'w') as jsonfile:
127 |     jsonfile.write(json_object)
128 |   logger.info("Saved old and new files correspondence to {}.".format(
129 |       join(target_dir, 'correspondence.json')))
130 | 
131 |   logger.info("Transfering files...")
132 |   for idx in trange(num_frame):
133 |     frame_id, color_pth, depth_pth, label_pth = rows[idx]
134 | 
135 |     # save color
136 |     tgt_color_pth = join(target_dir, 'color', frame_id + '.jpg')
137 |     color_img = Image.open(join(color_dir, color_pth))
138 |     color_img.save(tgt_color_pth)
139 |     h, w, _ = np.asarray(color_img).shape
140 | 
141 |     # save pose
142 |     tgt_pose_pth = join(target_dir, 'pose', frame_id + '.txt')
143 |     np.savetxt(tgt_pose_pth, poses[idx])
144 | 
145 |     # process and save intr
146 |     tgt_intrinsic_pth = join(target_dir, 'intrinsic', frame_id + '.txt')
147 |     intrinsic = get_intrinsics(h, w)
148 |     np.savetxt(tgt_intrinsic_pth, intrinsic)
149 | 
150 |     # process and save depth
151 |     depth = cv2.imread(join(depth_dir, depth_pth), cv2.IMREAD_UNCHANGED)
152 |     depth = cv2.resize(depth, (w, h), interpolation=cv2.INTER_NEAREST)
153 |     tgt_depth_pth = join(target_dir, 'depth', frame_id + '.png')
154 |     cv2.imwrite(tgt_depth_pth, depth)
155 | 
156 |     # process and save ground truth label
157 |     label = cv2.imread(join(label_dir, label_pth), cv2.IMREAD_UNCHANGED)
158 |     label = cv2.resize(label, (w, h), interpolation=cv2.INTER_NEAREST)
159 |     tgt_label_pth = join(target_dir, 'gt_label', frame_id + '.png')
160 |     cv2.imwrite(tgt_label_pth, label)
161 | 
162 |   logger.info("File transfer finished!")
163 | 
164 |   logger.info("Fusing RGBD images into TSDF Volmue...")
165 |   fuse_mesh(
166 |       scan_dir=target_dir,
167 |       sdf_trunc=sdf_trunc,
168 |       voxel_length=voxel_length,
169 |       depth_trunc=depth_trunc,
170 |       depth_scale=1000.0,
171 |   )  # depth_scale is a fixed value in Replica, no need to pass an argument in cli
172 |   logger.info("Fusion finished! Saving to file as {}".format(
173 |       join(target_dir, 'mesh.ply')))
174 | 
175 | 
176 | def arg_parser():
177 |   parser = argparse.ArgumentParser()
178 |   parser.add_argument("--scan_dir", type=str)
179 |   parser.add_argument("--target_dir", type=str)
180 |   parser.add_argument("--sdf_trunc", type=float, default=0.04)
181 |   parser.add_argument("--voxel_length", type=float, default=0.008)
182 |   parser.add_argument("--depth_trunc", type=float, default=3.0)
183 |   parser.add_argument('--config', help='Name of config file')
184 | 
185 |   return parser.parse_args()
186 | 
187 | 
188 | if __name__ == "__main__":
189 |   args = arg_parser()
190 |   if args.config is not None:
191 |     gin.parse_config_file(args.config)
192 |   process_replica(
193 |       scan_dir=args.scan_dir,
194 |       target_dir=args.target_dir,
195 |       sdf_trunc=args.sdf_trunc,
196 |       voxel_length=args.voxel_length,
197 |       depth_trunc=args.depth_trunc,
198 |   )
199 | 


--------------------------------------------------------------------------------
/scripts/replica_download.sh:
--------------------------------------------------------------------------------
 1 | # if not specify downlaod directory, use current directory
 2 | if [ -z "$1" ]; then
 3 |   download_dir='.'
 4 | else
 5 |   download_dir=$1
 6 | fi
 7 | 
 8 | cd $download_dir
 9 | 
10 | wget -O replica_semantic_nerf.zip "https://www.dropbox.com/sh/9yu1elddll00sdl/AAC-rSJdLX0C6HhKXGKMOIija?dl=0"
11 | UNZIP_DISABLE_ZIPBOMB_DETECTION=TRUE unzip replica_semantic_nerf.zip && rm replica_semantic_nerf.zip
12 | mv Replica_Dataset Replica_Dataset_Semantic_Nerf
13 | cd Replica_Dataset_Semantic_Nerf
14 | unzip \*.zip && rm -rf *.zip
15 | 
16 | cd --
17 | cd --
18 | 


--------------------------------------------------------------------------------
/scripts/replica_pipeline.sh:
--------------------------------------------------------------------------------
  1 | set -e
  2 | 
  3 | env_name=labelmaker
  4 | eval "$(conda shell.bash hook)"
  5 | conda activate $env_name
  6 | 
  7 | conda_home="$(conda info | grep "active env location : " | cut -d ":" -f2-)"
  8 | conda_home="${conda_home#"${conda_home%%[![:space:]]*}"}"
  9 | 
 10 | echo $conda_home
 11 | 
 12 | which python
 13 | which pip
 14 | which nvcc
 15 | 
 16 | # add cuda compiler to path
 17 | export CUDA_HOST_COMPILER="$conda_home/bin/gcc"
 18 | export CUDA_PATH="$conda_home"
 19 | export CUDA_HOME=$CUDA_PATH
 20 | export LD_LIBRARY_PATH=$conda_home/lib:$LD_LIBRARY_PATH
 21 | export LIBRARY_PATH="$conda_home/lib/stubs:$LIBRARY_PATH"
 22 | export TCNN_CUDA_ARCHITECTURES=75
 23 | 
 24 | if [ -z "$1" ]; then
 25 |   echo "No ARKitScene directory specified!"
 26 |   exit 1
 27 | else
 28 |   original_dir=$1
 29 | fi
 30 | 
 31 | if [ -z "$2" ]; then
 32 |   echo "No target directory specified!"
 33 |   exit 1
 34 | else
 35 |   target_dir=$2
 36 | fi
 37 | 
 38 | # preprocessing
 39 | python scripts/replica2labelmaker.py \
 40 |   --scan_dir ${original_dir} \
 41 |   --target_dir ${target_dir}
 42 | 
 43 | # extract mask3D
 44 | python models/mask3d_inst.py \
 45 |   --seed 42 \
 46 |   --workspace ${target_dir}
 47 | 
 48 | python models/mask3d_inst.py \
 49 |   --seed 43 \
 50 |   --output intermediate/scannet200_mask3d_2 \
 51 |   --workspace ${target_dir}
 52 | 
 53 | # extract omnidata normal
 54 | python models/omnidata_normal.py \
 55 |   --workspace ${target_dir}
 56 | 
 57 | python models/omnidata_depth.py \
 58 |   --workspace ${target_dir}
 59 | 
 60 | # extract hha depth, higher jobs may lead to failure
 61 | python models/hha_depth.py \
 62 |   --n_jobs 4 \
 63 |   --workspace ${target_dir}
 64 | 
 65 | # internimage
 66 | python models/internimage.py \
 67 |   --workspace ${target_dir}
 68 | 
 69 | python models/internimage.py --flip \
 70 |   --workspace ${target_dir}
 71 | 
 72 | # grounded sam
 73 | python models/grounded_sam.py \
 74 |   --workspace ${target_dir}
 75 | 
 76 | python models/grounded_sam.py --flip \
 77 |   --workspace ${target_dir}
 78 | 
 79 | # ovseg
 80 | python models/ovseg.py \
 81 |   --workspace ${target_dir}
 82 | 
 83 | python models/ovseg.py --flip \
 84 |   --workspace ${target_dir}
 85 | 
 86 | # consensus
 87 | python labelmaker/consensus.py \
 88 |   --workspace ${target_dir} --n_jobs 8
 89 | 
 90 | # point lifting
 91 | python labelmaker/lifting_3d/lifting_points.py \
 92 |   --workspace ${target_dir}
 93 | 
 94 | conda deactivate
 95 | 
 96 | # 3D lifting, mesh extracting, and rendering
 97 | bash labelmaker/lifting_3d/lifting.sh ${target_dir}
 98 | 
 99 | # rename to non_cmx versoin
100 | mv $target_dir/labels.txt $target_dir/labels_no_cmx.txt
101 | mv $target_dir/point_lifted_mesh.ply $target_dir/point_lifted_mesh_no_cmx.ply
102 | mv $target_dir/neus_lifted $target_dir/neus_lifted_no_cmx
103 | mv $target_dir/intermediate/consensus $target_dir/intermediate/consensus_no_cmx
104 | mv $target_dir/intermediate/sdfstudio_preprocessing $target_dir/intermediate/sdfstudio_preprocessing_no_cmx
105 | mv $target_dir/intermediate/sdfstudio_train $target_dir/intermediate/sdfstudio_train_no_cmx
106 | 
107 | env_name=labelmaker
108 | eval "$(conda shell.bash hook)"
109 | conda activate $env_name
110 | 
111 | conda_home="$(conda info | grep "active env location : " | cut -d ":" -f2-)"
112 | conda_home="${conda_home#"${conda_home%%[![:space:]]*}"}"
113 | 
114 | echo $conda_home
115 | 
116 | which python
117 | which pip
118 | which nvcc
119 | 
120 | # add cuda compiler to path
121 | export CUDA_HOST_COMPILER="$conda_home/bin/gcc"
122 | export CUDA_PATH="$conda_home"
123 | export CUDA_HOME=$CUDA_PATH
124 | export LD_LIBRARY_PATH=$conda_home/lib:$LD_LIBRARY_PATH
125 | export LIBRARY_PATH="$conda_home/lib/stubs:$LIBRARY_PATH"
126 | export TCNN_CUDA_ARCHITECTURES=75
127 | 
128 | # cmx
129 | python models/cmx.py \
130 |   --workspace ${target_dir}
131 | 
132 | python models/cmx.py --flip \
133 |   --workspace ${target_dir}
134 | 
135 | # consensus
136 | python labelmaker/consensus.py \
137 |   --workspace ${target_dir} --n_jobs 8
138 | 
139 | # point lifting
140 | python labelmaker/lifting_3d/lifting_points.py \
141 |   --workspace ${target_dir}
142 | 
143 | conda deactivate
144 | 
145 | # 3D lifting, mesh extracting, and rendering
146 | bash labelmaker/lifting_3d/lifting.sh ${target_dir}
147 | 


--------------------------------------------------------------------------------
/scripts/replica_singularity_slurm.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/bash
 2 | #SBATCH --job-name="labelmaker"
 3 | #SBATCH --output=%j.out
 4 | #SBATCH --time=12:00:00
 5 | #SBATCH --ntasks=1
 6 | #SBATCH --mem-per-cpu=64G
 7 | #SBATCH --tmp=32G
 8 | #SBATCH --gpus=rtx_3090:1
 9 | 
10 | module load gcc/11.4.0 cuda/12.1.1 eth_proxy
11 | 
12 | LABELMAKER_REPO=/cluster/home/guanji/LabelMaker # the model code base need repo,  you can put the labelmaker repo directory here
13 | 
14 | # I download it to my scratch, it is valid for 14 days,
15 | # please modify the source and target directory as you wish
16 | scene=room_0
17 | sequence=1
18 | source_dir=/cluster/scratch/guanji/Replica_Dataset_Semantic_Nerf/${scene}/Sequence_${sequence}
19 | target_dir=$SCRATCH/replica_${scene}_${sequence}
20 | mkdir -p $target_dir
21 | 
22 | # use wandb to monitor sdfstudio training
23 | WANDB_API_KEY="6b447b1218e7f042525c176c16b0cd32d3e58956"
24 | WANDB_ENTITY="labelmaker-sdfstudio"
25 | 
26 | # make temporary directory for processing
27 | mkdir -p $TMPDIR/.cache
28 | 
29 | singularity exec --nv \
30 |   --bind /cluster/project/cvg/labelmaker/checkpoints:/LabelMaker/checkpoints \
31 |   --bind $LABELMAKER_REPO/env_v2:/LabelMaker/env_v2 \
32 |   --bind $LABELMAKER_REPO/labelmaker:/LabelMaker/labelmaker \
33 |   --bind $LABELMAKER_REPO/testing:/LabelMaker/testing \
34 |   --bind $LABELMAKER_REPO/models:/LabelMaker/models \
35 |   --bind $LABELMAKER_REPO/scripts:/LabelMaker/scripts \
36 |   --bind $LABELMAKER_REPO/.gitmodules:/LabelMaker/.gitmodules \
37 |   --bind $TMPDIR/.cache:$HOME/.cache \
38 |   --bind $source_dir:/source \
39 |   --bind $target_dir:/target \
40 |   --env WANDB_ENTITY=$WANDB_ENTITY \
41 |   --env WANDB_API_KEY=$WANDB_API_KEY \
42 |   /cluster/project/cvg/labelmaker/labelmaker.simg \
43 |   bash -c "cd /LabelMaker && export PATH=/miniconda3/condabin:$PATH && bash ./scripts/replica_pipeline.sh /source /target"
44 | 


--------------------------------------------------------------------------------
/scripts/scanner3d2labelmaker.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import json
  3 | import logging
  4 | import os
  5 | import shutil
  6 | import sys
  7 | import glob
  8 | from os.path import abspath, dirname, exists, join
  9 | from typing import List, Optional
 10 | 
 11 | import cv2
 12 | import gin
 13 | import numpy as np
 14 | import open3d as o3d
 15 | 
 16 | from PIL import Image
 17 | from scipy.interpolate import CubicSpline
 18 | from scipy.spatial.transform import Rotation, RotationSpline
 19 | from tqdm import trange
 20 | import matplotlib.pyplot as plt
 21 | 
 22 | from copy import copy
 23 | 
 24 | sys.path.append(abspath(join(dirname(__file__), '..')))
 25 | from utils_3d import fuse_mesh
 26 | 
 27 | 
 28 | def extract_pose(data):
 29 |   return np.asarray(data['cameraPoseARFrame'])
 30 | 
 31 | def extract_intrinsics(data):
 32 |   return np.asarray(data['intrinsics'])
 33 | 
 34 | def render_depth(world_to_cam, intrinsics, mesh, resolution):
 35 |   rays = o3d.t.geometry.RaycastingScene.create_rays_pinhole(
 36 |         width_px=resolution[1],
 37 |         height_px=resolution[0],
 38 |         intrinsic_matrix=intrinsics[:3, :3],
 39 |         extrinsic_matrix=world_to_cam,  # world to camera
 40 |     )
 41 | 
 42 |   scene = o3d.t.geometry.RaycastingScene()
 43 |   scene.add_triangles(mesh)
 44 |   vis = scene.cast_rays(rays)
 45 | 
 46 |   depth = vis['t_hit'].numpy()
 47 |   return depth
 48 | 
 49 | 
 50 | @gin.configurable
 51 | def process_scanner3d(
 52 |     scan_dir: str,
 53 |     target_dir: str,
 54 |     sdf_trunc: float,
 55 |     voxel_length: float,
 56 |     depth_trunc: float,
 57 |     resize: Optional[List] = None,
 58 | ):
 59 | 
 60 |   logger = logging.getLogger('Scanner3DProcess')
 61 |   logger.setLevel(logging.DEBUG)
 62 |   consoleHeader = logging.StreamHandler()
 63 |   formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
 64 |   consoleHeader.setFormatter(formatter)
 65 |   logger.addHandler(consoleHeader)
 66 | 
 67 |   logger.info(
 68 |       "Processing Scanner3D scan to LabelMaker format, from {} to {}...".
 69 |       format(scan_dir, target_dir))
 70 | 
 71 |   color_dir = join(scan_dir)
 72 | 
 73 |   color_file_list = glob.glob(join(color_dir, 'frame_*.jpg'))
 74 |   color_file_list = sorted([os.path.basename(f) for f in color_file_list], key=lambda x: int(x.split('_')[1].split('.jpg')[0]))
 75 | 
 76 |   traj_file_list = glob.glob(join(color_dir, 'frame_*.json'))
 77 |   traj_file_list = sorted([os.path.basename(f) for f in traj_file_list], key=lambda x: int(x.split('_')[1].split('.json')[0]))
 78 | 
 79 |   # write to new file
 80 |   shutil.rmtree(target_dir, ignore_errors=True)
 81 |   os.makedirs(target_dir, exist_ok=True)
 82 |   os.makedirs(join(target_dir, 'color'), exist_ok=True)
 83 |   os.makedirs(join(target_dir, 'depth'), exist_ok=True)
 84 |   os.makedirs(join(target_dir, 'intrinsic'), exist_ok=True)
 85 |   os.makedirs(join(target_dir, 'pose'), exist_ok=True)
 86 | 
 87 |   # load scanner 3d mesh
 88 |   mesh = o3d.io.read_triangle_mesh(scan_dir + '/export.obj')
 89 |   mesh.compute_vertex_normals()
 90 |   mesh = o3d.t.geometry.TriangleMesh.from_legacy(mesh)
 91 | 
 92 |   num_frame = len(color_file_list)
 93 |   logger.info("Transfering files...")
 94 |   for idx in trange(num_frame):
 95 |     color_pth, traj_pth = color_file_list[idx], traj_file_list[idx]
 96 | 
 97 |     traj_data = json.load(open(join(color_dir, traj_pth)))
 98 | 
 99 |     pose_mat = extract_pose(traj_data).reshape((4, 4))
100 | 
101 |     rotation = pose_mat[:3, :3]
102 | 
103 |     # rotate the camera and flip the axis
104 |     rotation[2, :] = -rotation[2, :]
105 |     rotation[1, :] = -rotation[1, :]
106 |     rotation[0, :] = -rotation[0, :]
107 |     pose_mat[:3, :3] = rotation
108 |     pose_mat[:, 0] = -pose_mat[:, 0]
109 | 
110 |     R_x_90 = np.array([[1, 0, 0, 0],
111 |         [0, np.cos(np.radians(90)), -np.sin(np.radians(90)), 0],
112 |         [0, np.sin(np.radians(90)), np.cos(np.radians(90)), 0],
113 |         [0, 0, 0, 1]])
114 |     
115 |     R_z_90 = np.array([[np.cos(np.radians(-90)), -np.sin(np.radians(-90)), 0, 0],
116 |         [np.sin(np.radians(-90)), np.cos(np.radians(-90)), 0, 0],
117 |         [0, 0, 1, 0],
118 |         [0, 0, 0, 1]])
119 |     
120 |     R_z_x = np.dot(R_z_90, R_x_90)
121 |     pose_mat = np.dot(R_z_x, pose_mat)
122 | 
123 |     # save color
124 |     tgt_color_pth = join(target_dir, 'color',
125 |                          str(idx) + '.jpg')  # png -> jpg, compressed
126 |     color_img = Image.open(join(color_dir, color_pth))
127 |     if resize is not None:
128 |       original_size = (color_img.width, color_img.height)
129 |       color_img = color_img.resize(resize)
130 |     color_img.save(tgt_color_pth)
131 | 
132 |     intr = extract_intrinsics(traj_data).reshape((3, 3))
133 | 
134 |     if resize is not None:
135 |       w, h = resize
136 |       width_factor = w / float(original_size[0])
137 |       height_factor = h / float(original_size[1])
138 |       intr[0, 0] *= width_factor
139 |       intr[1, 1] *= height_factor
140 |       intr[0, 2] *= width_factor
141 |       intr[1, 2] *= height_factor
142 |     else:
143 |       h, w, _ = np.asarray(color_img).shape
144 |     depth = render_depth(np.linalg.inv(pose_mat), intr, mesh, (h, w))
145 |     depth = depth * 1000
146 |     depth = depth.astype(np.uint16)
147 | 
148 |     # save pose
149 |     tgt_pose_pth = join(target_dir, 'pose', str(idx) + '.txt')
150 |     np.savetxt(tgt_pose_pth, pose_mat)
151 | 
152 |     # process and save intr
153 |     tgt_intrinsic_pth = join(target_dir, 'intrinsic', str(idx) + '.txt')
154 |     np.savetxt(tgt_intrinsic_pth, intr)
155 | 
156 |     tgt_depth_pth = join(target_dir, 'depth', str(idx) + '.png')
157 |     cv2.imwrite(tgt_depth_pth, depth)
158 | 
159 |   logger.info("File transfer finished!")
160 | 
161 |   logger.info("Fusing RGBD images into TSDF Volmue...")
162 |   fuse_mesh(
163 |       scan_dir=target_dir,
164 |       sdf_trunc=sdf_trunc,
165 |       voxel_length=voxel_length,
166 |       depth_trunc=depth_trunc,
167 |       depth_scale=1000.0,
168 |   )  # depth_scale is a fixed value in ARKitScene, no need to pass an argument in cli
169 |   logger.info("Fusion finished! Saving to file as {}".format(
170 |       join(target_dir, 'mesh.ply')))
171 | 
172 | 
173 | def arg_parser():
174 |   parser = argparse.ArgumentParser()
175 |   parser.add_argument("--scan_dir", type=str)
176 |   parser.add_argument("--target_dir", type=str)
177 |   parser.add_argument("--sdf_trunc", type=float, default=0.04)
178 |   parser.add_argument("--voxel_length", type=float, default=0.008)
179 |   parser.add_argument("--depth_trunc", type=float, default=3.0)
180 |   parser.add_argument('--config', help='Name of config file')
181 | 
182 |   return parser.parse_args()
183 | 
184 | 
185 | if __name__ == "__main__":
186 |   args = arg_parser()
187 |   if args.config is not None:
188 |     gin.parse_config_file(args.config)
189 |   process_scanner3d(
190 |       scan_dir=args.scan_dir,
191 |       target_dir=args.target_dir,
192 |       sdf_trunc=args.sdf_trunc,
193 |       voxel_length=args.voxel_length,
194 |       depth_trunc=args.depth_trunc,
195 |       resize=[640, 480],
196 |   )
197 | 


--------------------------------------------------------------------------------
/scripts/scannet2labelmaker.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os, sys
  3 | 
  4 | # from SensorData import SensorData
  5 | 
  6 | 
  7 | import os, struct
  8 | import numpy as np
  9 | import shutil
 10 | import zlib
 11 | # import imageio
 12 | import imageio.v2 as imageio
 13 | import cv2
 14 | import png
 15 | 
 16 | COMPRESSION_TYPE_COLOR = {-1:'unknown', 0:'raw', 1:'png', 2:'jpeg'}
 17 | COMPRESSION_TYPE_DEPTH = {-1:'unknown', 0:'raw_ushort', 1:'zlib_ushort', 2:'occi_ushort'}
 18 | 
 19 | class RGBDFrame():
 20 |   def load(self, file_handle):
 21 |     self.camera_to_world = np.asarray(struct.unpack('f'*16, file_handle.read(16*4)), dtype=np.float32).reshape(4, 4)
 22 |     self.timestamp_color = struct.unpack('Q', file_handle.read(8))[0]
 23 |     self.timestamp_depth = struct.unpack('Q', file_handle.read(8))[0]
 24 |     self.color_size_bytes = struct.unpack('Q', file_handle.read(8))[0]
 25 |     self.depth_size_bytes = struct.unpack('Q', file_handle.read(8))[0]
 26 |     self.color_data = b''.join(struct.unpack('c'*self.color_size_bytes, file_handle.read(self.color_size_bytes)))
 27 |     self.depth_data = b''.join(struct.unpack('c'*self.depth_size_bytes, file_handle.read(self.depth_size_bytes)))
 28 | 
 29 | 
 30 |   def decompress_depth(self, compression_type):
 31 |     if compression_type == 'zlib_ushort':
 32 |        return self.decompress_depth_zlib()
 33 |     else:
 34 |        raise
 35 | 
 36 | 
 37 |   def decompress_depth_zlib(self):
 38 |     return zlib.decompress(self.depth_data)
 39 | 
 40 | 
 41 |   def decompress_color(self, compression_type):
 42 |     if compression_type == 'jpeg':
 43 |        return self.decompress_color_jpeg()
 44 |     else:
 45 |        raise
 46 | 
 47 | 
 48 |   def decompress_color_jpeg(self):
 49 |     return imageio.imread(self.color_data)
 50 | 
 51 | 
 52 | class SensorData:
 53 | 
 54 |   def __init__(self, filename):
 55 |     self.version = 4
 56 |     self.load(filename)
 57 | 
 58 | 
 59 |   def load(self, filename):
 60 |     with open(filename, 'rb') as f:
 61 |       version = struct.unpack('I', f.read(4))[0]
 62 |       assert self.version == version
 63 |       strlen = struct.unpack('Q', f.read(8))[0]
 64 |       # self.sensor_name = ''.join(struct.unpack('c'*strlen, f.read(strlen)))
 65 |       self.sensorname = f.read(strlen)
 66 |       self.intrinsic_color = np.asarray(struct.unpack('f'*16, f.read(16*4)), dtype=np.float32).reshape(4, 4)
 67 |       self.extrinsic_color = np.asarray(struct.unpack('f'*16, f.read(16*4)), dtype=np.float32).reshape(4, 4)
 68 |       self.intrinsic_depth = np.asarray(struct.unpack('f'*16, f.read(16*4)), dtype=np.float32).reshape(4, 4)
 69 |       self.extrinsic_depth = np.asarray(struct.unpack('f'*16, f.read(16*4)), dtype=np.float32).reshape(4, 4)
 70 |       self.color_compression_type = COMPRESSION_TYPE_COLOR[struct.unpack('i', f.read(4))[0]]
 71 |       self.depth_compression_type = COMPRESSION_TYPE_DEPTH[struct.unpack('i', f.read(4))[0]]
 72 |       self.color_width = struct.unpack('I', f.read(4))[0]
 73 |       self.color_height =  struct.unpack('I', f.read(4))[0]
 74 |       self.depth_width = struct.unpack('I', f.read(4))[0]
 75 |       self.depth_height =  struct.unpack('I', f.read(4))[0]
 76 |       self.depth_shift =  struct.unpack('f', f.read(4))[0]
 77 |       num_frames =  struct.unpack('Q', f.read(8))[0]
 78 |       self.frames = []
 79 |       for i in range(num_frames):
 80 |         frame = RGBDFrame()
 81 |         frame.load(f)
 82 |         self.frames.append(frame)
 83 | 
 84 | 
 85 |   def export_depth_images(self, output_path, image_size=None, frame_skip=1):
 86 |     # if not os.path.exists(output_path):
 87 |     shutil.rmtree(output_path, ignore_errors=True)
 88 |     os.makedirs(output_path)
 89 |     print('exporting', len(self.frames)//frame_skip, ' depth frames to', output_path)
 90 |     for f in range(0, len(self.frames), frame_skip):
 91 |       depth_data = self.frames[f].decompress_depth(self.depth_compression_type)
 92 |       depth = np.frombuffer(depth_data, dtype=np.uint16).reshape(self.depth_height, self.depth_width)
 93 |       if image_size is not None:
 94 |         depth = cv2.resize(depth, (image_size[1], image_size[0]), interpolation=cv2.INTER_NEAREST)
 95 |       #imageio.imwrite(os.path.join(output_path, str(f) + '.png'), depth)
 96 |       with open(os.path.join(output_path, str(f).zfill(6) + '.png'), 'wb') as f: # write 16-bit
 97 |         writer = png.Writer(width=depth.shape[1], height=depth.shape[0], bitdepth=16)
 98 |         depth = depth.reshape(-1, depth.shape[1]).tolist()
 99 |         writer.write(f, depth)
100 | 
101 |   def export_color_images(self, output_path, image_size=None, frame_skip=1):
102 |     # if not os.path.exists(output_path):
103 |     shutil.rmtree(output_path, ignore_errors=True)
104 |     os.makedirs(output_path)
105 |     print('exporting', len(self.frames)//frame_skip, 'color frames to', output_path)
106 |     for f in range(0, len(self.frames), frame_skip):
107 |       color = self.frames[f].decompress_color(self.color_compression_type)
108 |       if image_size is not None:
109 |         color = cv2.resize(color, (image_size[1], image_size[0]), interpolation=cv2.INTER_NEAREST)
110 |       imageio.imwrite(os.path.join(output_path, str(f).zfill(6) + '.jpg'), color)
111 | 
112 | 
113 |   def save_mat_to_file(self, matrix, filename):
114 |     with open(filename, 'w') as f:
115 |       for line in matrix:
116 |         np.savetxt(f, line[np.newaxis], fmt='%f')
117 | 
118 | 
119 |   def export_poses(self, output_path, frame_skip=1):
120 |     # if not os.path.exists(output_path):
121 |     shutil.rmtree(output_path, ignore_errors=True)
122 |     os.makedirs(output_path)
123 |     print('exporting', len(self.frames)//frame_skip, 'camera poses to', output_path)
124 |     for f in range(0, len(self.frames), frame_skip):
125 |       self.save_mat_to_file(self.frames[f].camera_to_world, os.path.join(output_path, str(f).zfill(6) + '.txt'))
126 | 
127 | 
128 |   def export_intrinsics(self, output_path, original_intrisic = None, resize = None):
129 |     # if not os.path.exists(output_path):
130 |     shutil.rmtree(output_path, ignore_errors=True)
131 |     os.makedirs(output_path)
132 |     print('exporting camera intrinsics to', output_path)
133 |     if resize == None:
134 |       self.save_mat_to_file(self.intrinsic_color, os.path.join(output_path, 'intrinsic_color.txt'))
135 |       self.save_mat_to_file(self.extrinsic_color, os.path.join(output_path, 'extrinsic_color.txt'))
136 |       self.save_mat_to_file(self.intrinsic_depth, os.path.join(output_path, 'intrinsic_depth.txt'))
137 |       self.save_mat_to_file(self.extrinsic_depth, os.path.join(output_path, 'extrinsic_depth.txt'))
138 |     else:
139 |       # if not os.path.exists(original_intrisic):
140 |       shutil.rmtree(original_intrisic, ignore_errors=True)
141 |       os.makedirs(original_intrisic)
142 |       self.save_mat_to_file(self.intrinsic_color, os.path.join(original_intrisic, 'intrinsic_color.txt'))
143 |       self.save_mat_to_file(self.extrinsic_color, os.path.join(original_intrisic, 'extrinsic_color.txt'))
144 |       self.save_mat_to_file(self.intrinsic_depth, os.path.join(original_intrisic, 'intrinsic_depth.txt'))
145 |       self.save_mat_to_file(self.extrinsic_depth, os.path.join(original_intrisic, 'extrinsic_depth.txt'))
146 |       w = resize[1]/1296
147 |       h = resize[0]/968
148 |       intrinsic_color = self.intrinsic_color[:3, :3]
149 |       scaled_intrinsic_color = np.diag([w,h,1])@intrinsic_color
150 |       for i in range(0, len(self.frames)):
151 |         target = os.path.join(output_path,str(i).zfill(6)+'.txt')
152 |         # print(target)
153 |         self.save_mat_to_file(scaled_intrinsic_color,target)
154 |       
155 |       
156 | def arg_parser():
157 |     parser = argparse.ArgumentParser()
158 |     # data paths
159 |     parser.add_argument('--scan_dir', required=True, help='path to scan to read')
160 |     parser.add_argument('--target_dir', required=True, help='path to output folder')
161 |     parser.add_argument('--export_depth_images', dest='export_depth_images')
162 |     parser.add_argument('--export_color_images', dest='export_color_images')
163 |     parser.add_argument('--export_poses', dest='export_poses')
164 |     parser.add_argument('--export_intrinsics', dest='export_intrinsics')
165 |     parser.set_defaults(export_depth_images=True, export_color_images=True, export_poses=True, export_intrinsics=True)
166 |     return parser.parse_args()
167 | 
168 | 
169 | def main():
170 |     
171 |     args = arg_parser()
172 |     if not os.path.exists(args.target_dir):
173 |         os.makedirs(args.target_dir)
174 |     # load the data
175 |     filename = os.path.join(args.scan_dir,str(os.path.basename(args.scan_dir.rstrip(os.sep)))+'.sens')
176 |     sys.stdout.write('loading %s...' % filename )
177 |     sd = SensorData(filename)
178 |     sys.stdout.write('loaded!\n')
179 |     
180 |     #  copy RGB mesh file
181 |     source_mesh = os.path.join(args.scan_dir,os.path.basename(args.scan_dir.rstrip(os.sep))+'_vh_clean.ply')
182 |     destination_mesh = os.path.join(args.target_dir, 'mesh.ply')
183 |     try:
184 |         with open(source_mesh, 'rb') as src:
185 |             with open(destination_mesh, 'wb') as dst:
186 |                 dst.write(src.read())
187 |         print(f"file {source_mesh} was copied to {destination_mesh}")
188 |     except FileNotFoundError:
189 |             print(f"{source_mesh} not found")
190 |             
191 |     # !!!resize image for labelmaker usage
192 |     resize = [480,640]
193 |     
194 |     if args.export_depth_images:
195 |         sd.export_depth_images(os.path.join(args.target_dir, 'depth'))
196 |     if args.export_color_images:
197 |         sd.export_color_images(os.path.join(args.target_dir, 'color'), image_size = resize)
198 |     if args.export_poses:
199 |         sd.export_poses(os.path.join(args.target_dir, 'pose'))
200 |     if args.export_intrinsics:
201 |         sd.export_intrinsics(os.path.join(args.target_dir, 'intrinsic'), os.path.join(args.target_dir, 'original_intrinsic'),resize=resize)
202 |     
203 |     
204 | 
205 | 
206 | if __name__ == '__main__':
207 |     main()
208 |     
209 |     
210 |     
211 | 
212 | 


--------------------------------------------------------------------------------
/scripts/segmentation_eval.py:
--------------------------------------------------------------------------------
  1 | import sys, os
  2 | import argparse
  3 | import logging
  4 | from pathlib import Path
  5 | from tqdm import tqdm
  6 | import cv2
  7 | import numpy as np
  8 | from glob import glob
  9 | import re
 10 | from joblib import Parallel, delayed
 11 | from scipy.sparse import coo_matrix
 12 | 
 13 | from labelmaker.label_mappings import LabelMatcher
 14 | 
 15 | logging.basicConfig(level="INFO")
 16 | log = logging.getLogger('Segmentation Evaluation')
 17 | 
 18 | 
 19 | def _dist_get_matcher_confmat(scene_dir, keys, pred_space, label_space,
 20 |                               pred_template, label_template):
 21 |   matcher = LabelMatcher(pred_space, label_space)
 22 |   confmat = np.zeros((len(matcher.right_ids), len(matcher.right_ids)),
 23 |                      dtype=np.int64)
 24 |   for k in tqdm(keys):
 25 |     pred = cv2.imread(str(scene_dir / pred_template.format(k=k)),
 26 |                       cv2.IMREAD_UNCHANGED)
 27 |     label = cv2.imread(str(scene_dir / label_template.format(k=k)),
 28 |                        cv2.IMREAD_UNCHANGED)
 29 |     if pred.shape[0] != label.shape[0] or pred.shape[1] != label.shape[1]:
 30 |       pred = cv2.resize(pred, (label.shape[1], label.shape[0]),
 31 |                         interpolation=cv2.INTER_NEAREST)
 32 |     confmat += matcher.confusion_matrix(pred, label)
 33 |   return confmat
 34 | 
 35 | 
 36 | def _dist_get_unmatched_confmat(scene_dir, keys, pred_space, label_space,
 37 |                                 pred_template, label_template, subsampling):
 38 |   matcher = LabelMatcher(pred_space, label_space)
 39 |   confmat = np.zeros((len(matcher.left_ids) + 1, len(matcher.right_ids) + 1),
 40 |                      dtype=np.int64)
 41 |   # we do not know whether all predictions or labels actually only contain the ids listed,
 42 |   # or if there are gaps in the data
 43 |   # Therefore, we keep 0 in each dimension as a "not in list" category
 44 |   left_id_to_confmat_idx = np.zeros(max(matcher.left_ids) + 1, dtype=np.int64)
 45 |   for i, left_id in enumerate(matcher.left_ids):
 46 |     left_id_to_confmat_idx[left_id] = i + 1
 47 |   right_id_to_confmat_idx = np.zeros(max(matcher.right_ids) + 1, dtype=np.int64)
 48 |   for i, right_id in enumerate(matcher.right_ids):
 49 |     right_id_to_confmat_idx[right_id] = i + 1
 50 |   for k in tqdm(keys):
 51 |     pred = cv2.imread(
 52 |         str(scene_dir / pred_template.format(k=(k // subsampling))),
 53 |         cv2.IMREAD_UNCHANGED)
 54 |     label = cv2.imread(str(scene_dir / label_template.format(k=k)),
 55 |                        cv2.IMREAD_UNCHANGED)
 56 |     if pred.shape[0] != label.shape[0] or pred.shape[1] != label.shape[1]:
 57 |       pred = cv2.resize(pred, (label.shape[1], label.shape[0]),
 58 |                         interpolation=cv2.INTER_NEAREST)
 59 |     sample_weights = np.ones_like(label.flatten(), dtype=np.int64)
 60 |     left = left_id_to_confmat_idx[pred.flatten()]
 61 |     right = right_id_to_confmat_idx[label.flatten()]
 62 |     confmat += coo_matrix((sample_weights, (left, right)),
 63 |                           shape=confmat.shape,
 64 |                           dtype=np.int64).toarray()
 65 |   return confmat
 66 | 
 67 | 
 68 | """
 69 | def _get_confmat(scene_dir,
 70 |                  keys,
 71 |                  pred_space,
 72 |                  label_space,
 73 |                  pred_template,
 74 |                  label_template,
 75 |                  n_jobs=8):
 76 |     confmat_path = scene_dir / pred_template.split(
 77 |         '/')[0] / f'confmat_{label_space}.txt'
 78 |     if confmat_path.exists():
 79 |         log.info(f'using cached {confmat_path}')
 80 |         return np.loadtxt(str(confmat_path)).astype(np.int64)
 81 |     # split keys into chunks for parallel execution
 82 |     keys = np.array_split(keys, n_jobs)
 83 |     confmats = Parallel(n_jobs=n_jobs)(
 84 |         delayed(_dist_get_confmat)(scene_dir, keys[i], pred_space, label_space,
 85 |                                    pred_template, label_template)
 86 |         for i in range(n_jobs))
 87 |     confmat = np.sum(confmats, axis=0)
 88 |     np.savetxt(str(confmat_path), confmat)
 89 |     return confmat.astype(np.int64)
 90 | """
 91 | 
 92 | 
 93 | def metrics_from_confmat(confmat):
 94 |   assert confmat.shape[0] == confmat.shape[1]
 95 |   assert confmat[:, 0].sum() == 0
 96 |   float_confmat = confmat.astype(float)
 97 |   metrics = {
 98 |       'iou':
 99 |           np.diag(float_confmat) /
100 |           (float_confmat.sum(axis=1) + float_confmat.sum(axis=0) -
101 |            np.diag(float_confmat)),
102 |       'acc':
103 |           np.diag(float_confmat) / (float_confmat.sum(0)),
104 |   }
105 | 
106 |   nan_mask_c = confmat[1:, :].sum(axis=1) == 0  # no prediction for this class
107 |   nan_mask_r = confmat[:, 1:].sum(axis=0) == 0  # no groundtruth for this class
108 | 
109 |   nan_mask = np.logical_and(nan_mask_c, nan_mask_r)
110 |   nan_mask = nan_mask_r
111 | 
112 |   acc = np.nan_to_num(metrics['acc'][1:], 0)  # fill with 0
113 |   iou = np.nan_to_num(metrics['iou'][1:], 0)  # fill with 0
114 | 
115 |   metrics['mIoU'] = (iou * (1 - nan_mask)).sum() / (1 - nan_mask).sum()
116 |   metrics['mAcc'] = (acc * (1 - nan_mask)).sum() / (1 - nan_mask).sum()
117 | 
118 |   # metrics['mIoU'] = iou.mean()
119 |   # metrics['mAcc'] = acc.mean()
120 | 
121 |   metrics['tAcc'] = np.diag(float_confmat).sum() / float_confmat.sum()
122 | 
123 |   acc[nan_mask == 1] = 'nan'
124 |   iou[nan_mask == 1] = 'nan'
125 | 
126 |   metrics['acc'] = acc.copy()
127 |   metrics['iou'] = iou.copy()
128 | 
129 |   return metrics
130 | 
131 | 
132 | def _get_confmat(scene_dir,
133 |                  keys,
134 |                  pred_space,
135 |                  label_space,
136 |                  pred_template,
137 |                  label_template,
138 |                  subsampling=1,
139 |                  overwrite_confmat=False,
140 |                  n_jobs=8):
141 |   confmat_path = scene_dir / pred_template.split(
142 |       '/')[0] / f'confmat_{pred_space}_{label_space}.txt'
143 |   if confmat_path.exists() and not overwrite_confmat:
144 |     confmat = np.loadtxt(str(confmat_path)).astype(np.int64)
145 |   else:
146 |     # split keys into chunks for parallel execution
147 |     keys = np.array_split(keys, n_jobs)
148 |     confmats = Parallel(n_jobs=n_jobs)(delayed(_dist_get_unmatched_confmat)(
149 |         scene_dir, keys[i], pred_space, label_space, pred_template,
150 |         label_template, subsampling) for i in range(n_jobs))
151 |     confmat = np.sum(confmats, axis=0)
152 |     np.savetxt(str(confmat_path), confmat)
153 |   matcher = LabelMatcher(pred_space, label_space)
154 |   return matcher.match_confmat(confmat)
155 | 
156 | 
157 | def evaluate_scene(scene_dir,
158 |                    pred_space,
159 |                    label_space,
160 |                    keys=None,
161 |                    subsampling=1,
162 |                    pred_template='pred/{k}.png',
163 |                    pred_template_glob='pred/{k}.png',
164 |                    label_template='label_filt/{k}.png',
165 |                    label_template_glob='label_filt/{k}.png',
166 |                    overwrite_confmat=False,
167 |                    n_jobs=8):
168 |   scene_dir = Path(scene_dir)
169 |   if keys is None:
170 | 
171 |     files = glob(str(scene_dir / label_template_glob.format(k='*')),
172 |                  recursive=True)
173 |     keys = sorted(
174 |         int(re.search(label_template_glob.format(k='(\d+)'), x).group(1))
175 |         for x in files)
176 |     keys = keys[::subsampling]
177 | 
178 |   log.info(f"getting confmat for {pred_template.split('/')[0]} in {scene_dir}")
179 |   confmat = _get_confmat(scene_dir,
180 |                          keys,
181 |                          pred_space,
182 |                          label_space,
183 |                          pred_template,
184 |                          label_template,
185 |                          subsampling=subsampling,
186 |                          overwrite_confmat=overwrite_confmat,
187 |                          n_jobs=n_jobs)
188 |   metrics = metrics_from_confmat(confmat)
189 |   return metrics, confmat
190 | 
191 | 
192 | def evaluate_scenes(scene_dirs,
193 |                     pred_space,
194 |                     label_space,
195 |                     subsampling=1,
196 |                     pred_template='pred/{k}.png',
197 |                     pred_template_glob='pred/{k}.png',
198 |                     label_template_glob='label_filt/{k}.png',
199 |                     label_template='label_filt/{k}.png',
200 |                     overwrite_confmat=False,
201 |                     n_jobs=8):
202 |   confmat = None
203 |   for k, scene_dir in enumerate(scene_dirs):
204 | 
205 |     _, c = evaluate_scene(scene_dir,
206 |                           pred_space,
207 |                           label_space,
208 |                           pred_template=pred_template[k]
209 |                           if type(pred_template) is list else pred_template,
210 |                           pred_template_glob=pred_template_glob,
211 |                           label_template=label_template,
212 |                           label_template_glob=label_template_glob,
213 |                           subsampling=subsampling,
214 |                           overwrite_confmat=overwrite_confmat,
215 |                           n_jobs=n_jobs)
216 |     if confmat is None:
217 |       confmat = c
218 |     else:
219 |       confmat += c
220 |   metrics = metrics_from_confmat(confmat)
221 |   return metrics, confmat
222 | 
223 | 
224 | if __name__ == '__main__':
225 |   parser = argparse.ArgumentParser()
226 |   parser.add_argument('scene')
227 |   parser.add_argument('--replica', default=False)
228 |   parser.add_argument('--j', default=8)
229 |   flags = parser.parse_args()
230 |   scene_dir = Path(flags.scene)
231 |   assert scene_dir.exists() and scene_dir.is_dir()
232 |   if flags.replica:
233 |     label_template = 'semantic_class/semantic_class_{k}.png'
234 |     label_space = 'replicaid'
235 |   else:
236 |     label_template = 'label_agile3d/{k}.png'
237 |     label_space = 'wn199'
238 | 
239 |   # check which predictors are present
240 |   for subdir in scene_dir.iterdir():
241 |     if subdir.is_dir():
242 |       if subdir.name == 'pred_internimage':
243 |         pred_space = 'ade20k'
244 |         pred_template = 'pred_internimage/{k}.png'
245 |       elif subdir.name == 'pred_cmx':
246 |         pred_space = 'nyu40id'
247 |         pred_template = 'pred_cmx/{k}.png'
248 |       elif subdir.name == 'pred_consensus':
249 |         if flags.replica:
250 |           pred_space = 'replicaid'
251 |         else:
252 |           pred_space = 'wn199'
253 |         pred_template = 'pred_consensus/{k}.png'
254 |       elif subdir.name == 'pred_wn_consensus':
255 |         pred_space = 'wn199'
256 |         pred_template = 'pred_wn_consensus/{k}.png'
257 |       elif subdir.name == 'pred_ovseg_replica':
258 |         pred_space = 'replicaid'
259 |         pred_template = 'pred_ovseg_replica/{k}.png'
260 |       elif subdir.name.startswith('pred_ovseg_w'):
261 |         pred_space = 'wn199'
262 |         pred_template = subdir.name + '/{k}.png'
263 |       elif subdir.name == 'label-filt':
264 |         pred_space = 'id'
265 |         pred_template = 'label-filt/{k}.png'
266 |       elif subdir.name == 'nerf':
267 |         pred_space = 'replicaid'
268 |         pred_template = 'nerf/pred_nerf_{k}.png'
269 |       elif subdir.name == 'pred_mask3d_rendered':
270 |         pred_space = 'id'
271 |         pred_template = 'pred_mask3d_rendered/{k}.png'
272 |       elif subdir.name.startswith('pred_sdfstudio'):
273 |         if flags.replica:
274 |           pred_space = 'replicaid'
275 |         else:
276 |           pred_space = 'wn199'
277 |         pred_template = subdir.name + '/{k:05d}.png'
278 |       else:
279 |         continue
280 |     metrics, confmat = evaluate_scene(scene_dir,
281 |                                       pred_space,
282 |                                       label_space,
283 |                                       pred_template=pred_template,
284 |                                       label_template=label_template,
285 |                                       n_jobs=int(flags.j))
286 | 


--------------------------------------------------------------------------------
/scripts/utils_3d.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os
  3 | from os.path import exists, join
  4 | 
  5 | import cv2
  6 | import gin
  7 | import numpy as np
  8 | import open3d as o3d
  9 | from PIL import Image
 10 | from tqdm import tqdm
 11 | 
 12 | 
 13 | @gin.configurable
 14 | def fuse_mesh(
 15 |     scan_dir: str,
 16 |     sdf_trunc: float = 0.06,
 17 |     voxel_length: float = 0.02,
 18 |     depth_trunc: float = 3.0,
 19 |     depth_scale: float = 1000.0,
 20 | ):
 21 | 
 22 |   color_dir = join(scan_dir, 'color')
 23 |   depth_dir = join(scan_dir, 'depth')
 24 |   pose_dir = join(scan_dir, 'pose')
 25 |   intrinsic_dir = join(scan_dir, 'intrinsic')
 26 | 
 27 |   assert exists(color_dir)
 28 |   assert exists(depth_dir)
 29 |   assert exists(pose_dir)
 30 |   assert exists(intrinsic_dir)
 31 | 
 32 |   color_list = os.listdir(color_dir)
 33 |   color_list.sort(key=lambda e: int(e[:-4]))
 34 | 
 35 |   depth_list = os.listdir(depth_dir)
 36 |   depth_list.sort(key=lambda e: int(e[:-4]))
 37 | 
 38 |   pose_list = os.listdir(pose_dir)
 39 |   pose_list.sort(key=lambda e: int(e[:-4]))
 40 | 
 41 |   intr_list = os.listdir(intrinsic_dir)
 42 |   intr_list.sort(key=lambda e: int(e[:-4]))
 43 | 
 44 |   # see if all files exists
 45 |   assert all(
 46 |       (a[:-4] == b[:-4]) and (a[:-4] == c[:-4]) and (a[:-4] == d[:-4])
 47 |       for a, b, c, d in zip(color_list, depth_list, pose_list, intr_list))
 48 | 
 49 |   tsdf = o3d.pipelines.integration.ScalableTSDFVolume(
 50 |       sdf_trunc=sdf_trunc,
 51 |       voxel_length=voxel_length,
 52 |       color_type=o3d.pipelines.integration.TSDFVolumeColorType.RGB8)
 53 | 
 54 |   for color_f, depth_f, pose_f, intr_f in tqdm(
 55 |       zip(color_list, depth_list, pose_list, intr_list),
 56 |       total=len(color_list),
 57 |   ):
 58 | 
 59 |     intr = np.loadtxt(join(intrinsic_dir, intr_f))
 60 |     pose = np.loadtxt(join(pose_dir, pose_f))
 61 |     color = np.asanyarray(Image.open(join(color_dir, color_f))).astype(np.uint8)
 62 |     depth = np.asarray(Image.open(join(depth_dir, depth_f))).astype(np.uint16)
 63 | 
 64 |     h, w, _ = color.shape
 65 |     color = o3d.geometry.Image(color)
 66 |     depth = o3d.geometry.Image(depth)
 67 |   
 68 |     rgbd = o3d.geometry.RGBDImage.create_from_color_and_depth(
 69 |         color=color,
 70 |         depth=depth,
 71 |         depth_scale=depth_scale,
 72 |         depth_trunc=depth_trunc,
 73 |         convert_rgb_to_intensity=False)
 74 | 
 75 |     tsdf.integrate(
 76 |         image=rgbd,
 77 |         intrinsic=o3d.camera.PinholeCameraIntrinsic(
 78 |             height=h,
 79 |             width=w,
 80 |             fx=intr[0, 0],
 81 |             fy=intr[1, 1],
 82 |             cx=intr[0, 2],
 83 |             cy=intr[1, 2]
 84 |         ),
 85 |         extrinsic=np.linalg.inv(pose),
 86 |     )
 87 | 
 88 |   mesh = tsdf.extract_triangle_mesh()
 89 |   o3d.io.write_triangle_mesh(join(scan_dir, 'mesh.ply'), mesh)
 90 | 
 91 | 
 92 | def arg_parser():
 93 |   parser = argparse.ArgumentParser()
 94 |   parser.add_argument("--workspace", type=str)
 95 |   parser.add_argument("--sdf_trunc", type=float, default=0.04)
 96 |   parser.add_argument("--voxel_length", type=float, default=0.008)
 97 |   parser.add_argument("--depth_trunc", type=float, default=3.0)
 98 |   parser.add_argument("--depth_scale", type=float, default=1000.0)
 99 |   parser.add_argument('--config', help='Name of config file')
100 | 
101 |   return parser.parse_args()
102 | 
103 | 
104 | if __name__ == "__main__":
105 |   args = arg_parser()
106 |   if args.config is not None:
107 |     gin.parse_config_file(args.config)
108 |   fuse_mesh(
109 |       scan_dir=args.workspace,
110 |       sdf_trunc=args.sdf_trunc,
111 |       voxel_length=args.voxel_length,
112 |       depth_trunc=args.depth_trunc,
113 |       depth_scale=args.depth_scale,
114 |   )
115 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import os
 3 | import os.path as osp
 4 | import platform
 5 | import shutil
 6 | import sys
 7 | import warnings
 8 | 
 9 | from setuptools import find_packages, setup
10 | 
11 | setup(
12 |     name='labelmaker',
13 |     version='0.1',
14 |     description='',
15 |     packages=find_packages(include=['labelmaker*', 'scripts*']),
16 |     install_requires=['numpy'],
17 |     package_data={'': ['*.csv', '*.sh']},
18 |     include_package_data=True,
19 | )
20 | 


--------------------------------------------------------------------------------
/testing/test_models/test_cmx_00_omnidata.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | from os.path import abspath, dirname, exists, join
 4 | from pathlib import Path
 5 | 
 6 | import cv2
 7 | import numpy as np
 8 | 
 9 | sys.path.append(abspath(join(dirname(__file__), '../../models')))
10 | 
11 | 
12 | def test_omnidata():
13 |   from omnidata_depth import run as run_omnidata
14 | 
15 |   scene_dir = Path(abspath(join(dirname(__file__), '../test_scan')))
16 |   output_folder = 'intermediate/depth_omnidata_1'
17 | 
18 |   run_omnidata(
19 |       scene_dir=scene_dir,
20 |       output_folder=output_folder,
21 |       device='cuda:0',
22 |   )
23 | 


--------------------------------------------------------------------------------
/testing/test_models/test_cmx_01_hha.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | from os.path import abspath, dirname, exists, join
 4 | from pathlib import Path
 5 | 
 6 | import cv2
 7 | import numpy as np
 8 | 
 9 | sys.path.append(abspath(join(dirname(__file__), '../../models')))
10 | 
11 | 
12 | def test_hha():
13 |   from hha_depth import run as run_hha
14 | 
15 |   scene_dir = Path(abspath(join(dirname(__file__), '../test_scan')))
16 |   input_folder = 'intermediate/depth_omnidata_1'
17 |   output_folder = 'intermediate/hha'
18 | 
19 |   run_hha(
20 |       scene_dir=scene_dir,
21 |       input_folder=input_folder,
22 |       output_folder=output_folder,
23 |       n_jobs=2,
24 |   )
25 | 


--------------------------------------------------------------------------------
/testing/test_models/test_cmx_02_cmx.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | from os.path import abspath, dirname, exists, join
 4 | from pathlib import Path
 5 | 
 6 | import cv2
 7 | import numpy as np
 8 | 
 9 | sys.path.append(abspath(join(dirname(__file__), '../../models')))
10 | 
11 | 
12 | def test_cmx():
13 |   sys.path = [path for path in sys.path if 'omnidata' not in path]
14 |   from cmx import run as run_cmx
15 | 
16 |   scene_dir = Path(abspath(join(dirname(__file__), '../test_scan')))
17 |   output_folder = 'intermediate/depth_omnidata_1'
18 |   name = '000000.png'
19 | 
20 |   run_cmx(
21 |       scene_dir=scene_dir,
22 |       output_folder=output_folder,
23 |       device='cuda:0',
24 |   )
25 | 
26 |   img_arr = cv2.imread(str(scene_dir / output_folder / name),
27 |                        cv2.IMREAD_UNCHANGED)
28 |   assert np.unique(img_arr).shape[0] > 1
29 | 


--------------------------------------------------------------------------------
/testing/test_models/test_consensus.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | from os.path import abspath, dirname, exists, join
 4 | from pathlib import Path
 5 | 
 6 | import cv2
 7 | import numpy as np
 8 | 
 9 | 
10 | def test_consensus():
11 |   from labelmaker.consensus import run as run_consensus
12 | 
13 |   scene_dir = Path(abspath(join(dirname(__file__), '../test_scan')))
14 |   output_folder = 'intermediate/consensus'
15 |   name = '000000.png'
16 | 
17 |   run_consensus(
18 |       scene_dir=scene_dir,
19 |       output_folder=output_folder,
20 |   )
21 | 
22 |   img_arr = cv2.imread(str(scene_dir / output_folder / name),
23 |                        cv2.IMREAD_UNCHANGED)
24 |   assert np.unique(img_arr).shape[0] > 1
25 | 


--------------------------------------------------------------------------------
/testing/test_models/test_grounded_sam.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | from os.path import abspath, dirname, exists, join
 4 | from pathlib import Path
 5 | 
 6 | import cv2
 7 | import numpy as np
 8 | 
 9 | sys.path.append(abspath(join(dirname(__file__), '../../models')))
10 | 
11 | 
12 | def test_grounded_sam():
13 |   from grounded_sam import run as run_grounded_sam
14 | 
15 |   scene_dir = Path(abspath(join(dirname(__file__), '../test_scan')))
16 |   output_folder = 'intermediate/wordnet_groundedsam_1'
17 |   name = '000000.png'
18 | 
19 |   run_grounded_sam(
20 |       scene_dir=scene_dir,
21 |       output_folder=output_folder,
22 |       device='cuda:0',
23 |   )
24 | 
25 |   img_arr = cv2.imread(str(scene_dir / output_folder / name),
26 |                        cv2.IMREAD_UNCHANGED)
27 |   assert np.unique(img_arr).shape[0] > 1
28 | 


--------------------------------------------------------------------------------
/testing/test_models/test_internimage.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | from os.path import abspath, dirname, exists, join
 4 | from pathlib import Path
 5 | 
 6 | import cv2
 7 | import numpy as np
 8 | 
 9 | sys.path.append(abspath(join(dirname(__file__), '../../models')))
10 | 
11 | 
12 | def test_internimage():
13 |   from internimage import run as run_internimage
14 | 
15 |   scene_dir = Path(abspath(join(dirname(__file__), '../test_scan')))
16 |   output_folder = 'intermediate/ade20k_internimage_1'
17 |   name = '000000.png'
18 | 
19 |   run_internimage(
20 |       scene_dir=scene_dir,
21 |       output_folder=output_folder,
22 |       device='cuda:0',
23 |   )
24 | 
25 |   img_arr = cv2.imread(str(scene_dir / output_folder / name),
26 |                        cv2.IMREAD_UNCHANGED)
27 |   assert np.unique(img_arr).shape[0] > 1
28 | 


--------------------------------------------------------------------------------
/testing/test_models/test_mask3d.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | from os.path import abspath, dirname, exists, join
 4 | from pathlib import Path
 5 | 
 6 | import cv2
 7 | import numpy as np
 8 | 
 9 | sys.path.append(abspath(join(dirname(__file__), '../../models')))
10 | 
11 | 
12 | def test_mask3d():
13 |   from mask3d_inst import run as run_mask3d_inst
14 | 
15 |   scene_dir = Path(abspath(join(dirname(__file__), '../test_scan')))
16 |   output_folder = 'intermediate/scannet200_mask3d_1'
17 |   name = '000000.png'
18 | 
19 |   run_mask3d_inst(
20 |       scene_dir=scene_dir,
21 |       output_folder=output_folder,
22 |       device='cuda:0',
23 |   )
24 | 
25 |   img_arr = cv2.imread(str(scene_dir / output_folder / name),
26 |                        cv2.IMREAD_UNCHANGED)
27 |   assert np.unique(img_arr).shape[0] > 1
28 | 


--------------------------------------------------------------------------------
/testing/test_models/test_omnidata_normal.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | from os.path import abspath, dirname, exists, join
 4 | from pathlib import Path
 5 | 
 6 | import cv2
 7 | import numpy as np
 8 | 
 9 | sys.path.append(abspath(join(dirname(__file__), '../../models')))
10 | 
11 | 
12 | def test_omnidata():
13 |   from omnidata_normal import run as run_omnidata
14 | 
15 |   scene_dir = Path(abspath(join(dirname(__file__), '../test_scan')))
16 |   output_folder = 'intermediate/normal_omnidata_1'
17 | 
18 |   run_omnidata(
19 |       scene_dir=scene_dir,
20 |       output_folder=output_folder,
21 |       device='cuda:0',
22 |   )
23 | 


--------------------------------------------------------------------------------
/testing/test_models/test_ovseg.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | from os.path import abspath, dirname, exists, join
 4 | from pathlib import Path
 5 | 
 6 | import cv2
 7 | import numpy as np
 8 | 
 9 | sys.path.append(abspath(join(dirname(__file__), '../../models')))
10 | 
11 | 
12 | def test_ovseg():
13 |   from ovseg import run as run_ovseg
14 | 
15 |   scene_dir = Path(abspath(join(dirname(__file__), '../test_scan')))
16 |   output_folder = 'intermediate/wn_nodef_ovseg_1'
17 |   name = '000000.png'
18 | 
19 |   run_ovseg(
20 |       scene_dir=scene_dir,
21 |       output_folder=output_folder,
22 |       device='cuda:0',
23 |   )
24 | 
25 |   img_arr = cv2.imread(str(scene_dir / output_folder / name),
26 |                        cv2.IMREAD_UNCHANGED)
27 |   assert np.unique(img_arr).shape[0] > 1
28 | 


--------------------------------------------------------------------------------
/testing/test_scan/color/000000.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cvg/LabelMaker/44ff92d42ae9b0579f016bb7bc5cd4bd09a613a3/testing/test_scan/color/000000.jpg


--------------------------------------------------------------------------------
/testing/test_scan/depth/000000.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cvg/LabelMaker/44ff92d42ae9b0579f016bb7bc5cd4bd09a613a3/testing/test_scan/depth/000000.png


--------------------------------------------------------------------------------
/testing/test_scan/intrinsic/000000.txt:
--------------------------------------------------------------------------------
1 | 5.354909999999999854e+02 0.000000000000000000e+00 3.216309999999999718e+02
2 | 0.000000000000000000e+00 5.354909999999999854e+02 2.394190000000000111e+02
3 | 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00
4 | 


--------------------------------------------------------------------------------
/testing/test_scan/mesh.ply:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cvg/LabelMaker/44ff92d42ae9b0579f016bb7bc5cd4bd09a613a3/testing/test_scan/mesh.ply


--------------------------------------------------------------------------------
/testing/test_scan/pose/000000.txt:
--------------------------------------------------------------------------------
1 | 1.829560822640563869e-02 -9.986093787547937195e-01 -4.944268785766454261e-02 5.316176301353460476e-02
2 | -9.997274661290184161e-01 -1.755411944260264895e-02 -1.538981344419157286e-02 5.090203246895330658e-02
3 | 1.450048919443955656e-02 4.971077904800336178e-02 -9.986583871672847224e-01 -7.897646873520455107e-03
4 | 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00
5 | 


--------------------------------------------------------------------------------